ccmain/tfacepp.cpp - platform/external/tesseract - Git at Google

 /**********************************************************************
  * File:        tfacepp.cpp  (Formerly tface++.c)
  * Description: C++ side of the C/C++ Tess/Editor interface.
  * Author:                  Ray Smith
  * Created:                 Thu Apr 23 15:39:23 BST 1992
  *
  * (C) Copyright 1992, Hewlett-Packard Ltd.
  ** Licensed under the Apache License, Version 2.0 (the "License");
  ** you may not use this file except in compliance with the License.
  ** You may obtain a copy of the License at
  ** http://www.apache.org/licenses/LICENSE-2.0
  ** Unless required by applicable law or agreed to in writing, software
  ** distributed under the License is distributed on an "AS IS" BASIS,
  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  ** See the License for the specific language governing permissions and
  ** limitations under the License.
  *
  **********************************************************************/

 #include "mfcpch.h"
 #ifdef __UNIX__
 #include          <assert.h>
 #endif
 #include          "errcode.h"
 #include          "ratngs.h"
 #include          "reject.h"
 #include          "werd.h"
 #include          "tfacep.h"
 #include          "tstruct.h"
 #include          "tfacepp.h"
 #include          "tessvars.h"
 #include          "globals.h"
 #include          "reject.h"
 #include          "tesseractclass.h"

 #define EXTERN

 EXTERN BOOL_VAR (tessedit_override_permuter, TRUE, "According to dict_word");


 #define MAX_UNDIVIDED_LENGTH 24


 /**********************************************************************
  * recog_word
  *
  * Convert the word to tess form and pass it to the tess segmenter.
  * Convert the output back to editor form.
  **********************************************************************/
 namespace tesseract {
 WERD_CHOICE *Tesseract::recog_word(                     //recog one owrd
                                    WERD *word,          //word to do
                                    DENORM *denorm,      //de-normaliser
                                                         //matcher function
                                    POLY_MATCHER matcher,
                                    POLY_TESTER tester,  //tester function
                                    POLY_TESTER trainer, //trainer function
                                    BOOL8 testing,       //true if answer driven
                                                         //raw result
                                    WERD_CHOICE *&raw_choice,
                                                         //list of blob lists
                                    BLOB_CHOICE_LIST_CLIST *blob_choices,
                                    WERD *&outword       //bln word output
                                   ) {
   WERD_CHOICE *word_choice;
   uinT8 perm_type;
   uinT8 real_dict_perm_type;

   if (word->blob_list ()->empty ()) {
     word_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f,
                                   TOP_CHOICE_PERM, unicharset);
     raw_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f,
                                  TOP_CHOICE_PERM, unicharset);
     outword = word->poly_copy (denorm->row ()->x_height ());
   }
   else
     word_choice = recog_word_recursive (word, denorm, matcher, tester,
       trainer, testing, raw_choice,
       blob_choices, outword);
   if ((word_choice->length() != outword->blob_list()->length()) ||
       (word_choice->length() != blob_choices->length())) {
     tprintf
       ("recog_word ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",
       word_choice->debug_string(unicharset).string(),
       word_choice->length(), outword->blob_list()->length(),
       blob_choices->length());
   }
   ASSERT_HOST(word_choice->length() == outword->blob_list()->length());
   ASSERT_HOST(word_choice->length() == blob_choices->length());

   /* Copy any reject blobs into the outword */
   outword->rej_blob_list()->deep_copy(word->rej_blob_list(), &PBLOB::deep_copy);

   if (tessedit_override_permuter) {
     /* Override the permuter type if a straight dictionary check disagrees. */
     perm_type = word_choice->permuter();
     if ((perm_type != SYSTEM_DAWG_PERM) &&
         (perm_type != FREQ_DAWG_PERM) && (perm_type != USER_DAWG_PERM)) {
       real_dict_perm_type = dict_word(*word_choice);
       if (((real_dict_perm_type == SYSTEM_DAWG_PERM) ||
            (real_dict_perm_type == FREQ_DAWG_PERM) ||
            (real_dict_perm_type == USER_DAWG_PERM)) &&
           (alpha_count(word_choice->unichar_string().string(),
                       word_choice->unichar_lengths().string()) > 0)) {
         word_choice->set_permuter (real_dict_perm_type);  // use dict perm
       }
     }
     if (tessedit_rejection_debug && perm_type != word_choice->permuter ()) {
       tprintf ("Permuter Type Flipped from %d to %d\n",
         perm_type, word_choice->permuter ());
     }
   }
   assert ((word_choice == NULL) == (raw_choice == NULL));
   return word_choice;
 }


 /**********************************************************************
  * recog_word_recursive
  *
  * Convert the word to tess form and pass it to the tess segmenter.
  * Convert the output back to editor form.
  **********************************************************************/
 WERD_CHOICE *
 Tesseract::recog_word_recursive(
     WERD *word,                            // word to do
     DENORM *denorm,                        // de-normaliser
     POLY_MATCHER matcher,                  // matcher function
     POLY_TESTER tester,                    // tester function
     POLY_TESTER trainer,                   // trainer function
     BOOL8 testing,                         // true if answer driven
     WERD_CHOICE *&raw_choice,              // raw result
     BLOB_CHOICE_LIST_CLIST *blob_choices,  // list of blob lists
     WERD *&outword                         // bln word output
     ) {
   inT32 initial_blob_choice_len;
   inT32 word_length;                      // no of blobs
   STRING word_string;                     // converted from tess
   STRING word_string_lengths;
   BLOB_CHOICE_LIST_VECTOR *tess_ratings;  // tess results
   TWERD *tessword;                        // tess format
   BLOB_CHOICE_LIST_C_IT blob_choices_it;  // iterator

   tess_matcher = matcher;           // install matcher
   tess_tester = testing ? tester : NULL;
   tess_trainer = testing ? trainer : NULL;
   tess_denorm = denorm;
   tess_word = word;
   //      blob_matchers[1]=call_matcher;
   if (word->blob_list ()->length () > MAX_UNDIVIDED_LENGTH) {
     return split_and_recog_word (word, denorm, matcher, tester, trainer,
       testing, raw_choice, blob_choices,
       outword);
   } else {
     UNICHAR_ID space_id = unicharset.unichar_to_id(" ");
     WERD_CHOICE *best_choice = new WERD_CHOICE();
     raw_choice = new WERD_CHOICE();
     initial_blob_choice_len = blob_choices->length();
     tessword = make_tess_word (word, NULL);
     tess_ratings = cc_recog(tessword, best_choice, raw_choice,
                             testing && tester != NULL,
                             testing && trainer != NULL,
                             word->flag(W_EOL));

     outword = make_ed_word (tessword, word);  // convert word
     if (outword == NULL) {
       outword = word->poly_copy (denorm->row ()->x_height ());
     }
     delete_word(tessword);  // get rid of it
     word_length = outword->blob_list()->length();  // no of blobs

     // Put BLOB_CHOICE_LISTs from tess_ratings into blob_choices.
     blob_choices_it.set_to_list(blob_choices);
     for (int i = 0; i < tess_ratings->length(); ++i) {
       blob_choices_it.add_to_end(tess_ratings->get(i));
     }
     delete tess_ratings;

     // Pad raw_choice with spaces if needed.
     if (raw_choice->length() < word_length) {
       while (raw_choice->length() < word_length) {
         raw_choice->append_unichar_id(space_id, 1, 0.0,
                                       raw_choice->certainty());
       }
       raw_choice->populate_unichars(unicharset);
     }

     // Do sanity checks and minor fixes on best_choice.
     if (best_choice->length() > word_length) {
       tprintf("recog_word: Discarded long string \"%s\""
               " (%d characters vs %d blobs)\n",
               best_choice->unichar_string().string (),
               best_choice->length(), word_length);
       best_choice->make_bad();  // should never happen
       tprintf("Word is at (%g,%g)\n",
               denorm->origin(),
               denorm->y(word->bounding_box().bottom(), 0.0));
     }
     if (blob_choices->length() - initial_blob_choice_len != word_length) {
       best_choice->make_bad();  // force rejection
       tprintf ("recog_word: Choices list len:%d; blob lists len:%d\n",
         blob_choices->length(), word_length);
       blob_choices_it.set_to_list(blob_choices);  // list of lists
       while (blob_choices->length() - initial_blob_choice_len < word_length) {
         blob_choices_it.add_to_end(new BLOB_CHOICE_LIST());  // add a fake one
         tprintf("recog_word: Added dummy choice list\n");
       }
       while (blob_choices->length() - initial_blob_choice_len > word_length) {
         blob_choices_it.move_to_last(); // should never happen
         delete blob_choices_it.extract();
         tprintf("recog_word: Deleted choice list\n");
       }
     }
     if (best_choice->length() < word_length) {
       while (best_choice->length() < word_length) {
         best_choice->append_unichar_id(space_id, 1, 0.0,
                                        best_choice->certainty());
       }
       best_choice->populate_unichars(unicharset);
     }

     return best_choice;
   }
 }


 /**********************************************************************
  * split_and_recog_word
  *
  * Convert the word to tess form and pass it to the tess segmenter.
  * Convert the output back to editor form.
  **********************************************************************/

 WERD_CHOICE *
 Tesseract::split_and_recog_word(                        //recog one owrd
                                 WERD *word,             //word to do
                                 DENORM *denorm,         //de-normaliser
                                 POLY_MATCHER matcher,   //matcher function
                                 POLY_TESTER tester,     //tester function
                                 POLY_TESTER trainer,    //trainer function
                                 BOOL8 testing,          //true if answer driven
                                                         //raw result
                                 WERD_CHOICE *&raw_choice,
                                                         //list of blob lists
                                 BLOB_CHOICE_LIST_CLIST *blob_choices,
                                 WERD *&outword          //bln word output
                                ) {
   //   inT32                                                      outword1_len;
   //   inT32                                                      outword2_len;
   WERD *first_word;              //poly copy of word
   WERD *second_word;             //fabricated word
   WERD *outword2;                //2nd output word
   PBLOB *blob;
   WERD_CHOICE *result;           //return value
   WERD_CHOICE *result2;          //output of 2nd word
   WERD_CHOICE *raw_choice2;      //raw version of 2nd
   float gap;                     //blob gap
   float bestgap;                 //biggest gap
   PBLOB_LIST new_blobs;          //list of gathered blobs
   PBLOB_IT blob_it;
                                  //iterator
   PBLOB_IT new_blob_it = &new_blobs;

   first_word = word->poly_copy (denorm->row ()->x_height ());
   blob_it.set_to_list (first_word->blob_list ());
   bestgap = -MAX_INT32;
   while (!blob_it.at_last ()) {
     blob = blob_it.data ();
                                  //gap to next
     gap = blob_it.data_relative(1)->bounding_box().left() -
         blob->bounding_box().right();
     blob_it.forward ();
     if (gap > bestgap) {
       bestgap = gap;             //find biggest
       new_blob_it = blob_it;     //save position
     }
   }
                                  //take 2nd half
   new_blobs.assign_to_sublist (&new_blob_it, &blob_it);
                                  //make it a word
   second_word = new WERD (&new_blobs, 1, NULL);
   ASSERT_HOST (word->blob_list ()->length () ==
     first_word->blob_list ()->length () +
     second_word->blob_list ()->length ());

   result = recog_word_recursive (first_word, denorm, matcher,
     tester, trainer, testing, raw_choice,
     blob_choices, outword);
   delete first_word;             //done that one
   result2 = recog_word_recursive (second_word, denorm, matcher,
     tester, trainer, testing, raw_choice2,
     blob_choices, outword2);
   delete second_word;            //done that too
   *result += *result2;           //combine ratings
   delete result2;
   *raw_choice += *raw_choice2;
   delete raw_choice2;            //finished with it
   //   outword1_len= outword->blob_list()->length();
   //   outword2_len= outword2->blob_list()->length();
   outword->join_on (outword2);   //join words
   delete outword2;
   //   if ( outword->blob_list()->length() != outword1_len + outword2_len )
   //      tprintf( "Split&Recog: part1len=%d; part2len=%d; combinedlen=%d\n",
   //                                outword1_len, outword2_len, outword->blob_list()->length() );
   //   ASSERT_HOST( outword->blob_list()->length() == outword1_len + outword2_len );
   return result;
 }

 }  // namespace tesseract

 /**********************************************************************
  * call_tester
  *
  * Called from Tess with a blob in tess form.
  * Convert the blob to editor form.
  * Call the tester setup by the segmenter in tess_tester.
  **********************************************************************/
 #if 0  // dead code
 void call_tester(                     //call a tester
                  const STRING& filename,
                  TBLOB *tessblob,     //blob to test
                  BOOL8 correct_blob,  //true if good
                  char *text,          //source text
                  inT32 count,         //chars in text
                  LIST result          //output of matcher
                 ) {
   PBLOB *blob;                   //converted blob
   BLOB_CHOICE_LIST ratings;      //matcher result

   blob = make_ed_blob (tessblob);//convert blob
   if (blob == NULL)
     return;
                                  //make it right type
   convert_choice_list(result, ratings);
   if (tess_tester != NULL)
     (*tess_tester) (filename, blob, tess_denorm, correct_blob, text, count, &ratings);
   delete blob;                   //don't need that now
 }
 #endif

 /**********************************************************************
  * call_train_tester
  *
  * Called from Tess with a blob in tess form.
  * Convert the blob to editor form.
  * Call the trainer setup by the segmenter in tess_trainer.
  **********************************************************************/
 #if 0  // dead code
 void call_train_tester(                     //call a tester
                        const STRING& filename,
                        TBLOB *tessblob,     //blob to test
                        BOOL8 correct_blob,  //true if good
                        char *text,          //source text
                        inT32 count,         //chars in text
                        LIST result          //output of matcher
                       ) {
   PBLOB *blob;                   //converted blob
   BLOB_CHOICE_LIST ratings;      //matcher result

   blob = make_ed_blob (tessblob);//convert blob
   if (blob == NULL)
     return;
                                  //make it right type
   convert_choice_list(result, ratings);
   if (tess_trainer != NULL)
     (*tess_trainer) (filename, blob, tess_denorm, correct_blob, text, count, &ratings);
   delete blob;                   //don't need that now
 }
 #endif
	/**********************************************************************
	* File: tfacepp.cpp (Formerly tface++.c)
	* Description: C++ side of the C/C++ Tess/Editor interface.
	* Author: Ray Smith
	* Created: Thu Apr 23 15:39:23 BST 1992
	*
	* (C) Copyright 1992, Hewlett-Packard Ltd.
	** Licensed under the Apache License, Version 2.0 (the "License");
	** you may not use this file except in compliance with the License.
	** You may obtain a copy of the License at
	** http://www.apache.org/licenses/LICENSE-2.0
	** Unless required by applicable law or agreed to in writing, software
	** distributed under the License is distributed on an "AS IS" BASIS,
	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	** See the License for the specific language governing permissions and
	** limitations under the License.
	*
	**********************************************************************/

	#include "mfcpch.h"
	#ifdef __UNIX__
	#include <assert.h>
	#endif
	#include "errcode.h"
	#include "ratngs.h"
	#include "reject.h"
	#include "werd.h"
	#include "tfacep.h"
	#include "tstruct.h"
	#include "tfacepp.h"
	#include "tessvars.h"
	#include "globals.h"
	#include "reject.h"
	#include "tesseractclass.h"

	#define EXTERN

	EXTERN BOOL_VAR (tessedit_override_permuter, TRUE, "According to dict_word");


	#define MAX_UNDIVIDED_LENGTH 24



	/**********************************************************************
	* recog_word
	*
	* Convert the word to tess form and pass it to the tess segmenter.
	* Convert the output back to editor form.
	**********************************************************************/
	namespace tesseract {
	WERD_CHOICE *Tesseract::recog_word( //recog one owrd
	WERD *word, //word to do
	DENORM *denorm, //de-normaliser
	//matcher function
	POLY_MATCHER matcher,
	POLY_TESTER tester, //tester function
	POLY_TESTER trainer, //trainer function
	BOOL8 testing, //true if answer driven
	//raw result
	WERD_CHOICE *&raw_choice,
	//list of blob lists
	BLOB_CHOICE_LIST_CLIST *blob_choices,
	WERD *&outword //bln word output
	) {
	WERD_CHOICE *word_choice;
	uinT8 perm_type;
	uinT8 real_dict_perm_type;

	if (word->blob_list ()->empty ()) {
	word_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f,
	TOP_CHOICE_PERM, unicharset);
	raw_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f,
	TOP_CHOICE_PERM, unicharset);
	outword = word->poly_copy (denorm->row ()->x_height ());
	}
	else
	word_choice = recog_word_recursive (word, denorm, matcher, tester,
	trainer, testing, raw_choice,
	blob_choices, outword);
	if ((word_choice->length() != outword->blob_list()->length()) \|\|
	(word_choice->length() != blob_choices->length())) {
	tprintf
	("recog_word ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",
	word_choice->debug_string(unicharset).string(),
	word_choice->length(), outword->blob_list()->length(),
	blob_choices->length());
	}
	ASSERT_HOST(word_choice->length() == outword->blob_list()->length());
	ASSERT_HOST(word_choice->length() == blob_choices->length());

	/* Copy any reject blobs into the outword */
	outword->rej_blob_list()->deep_copy(word->rej_blob_list(), &PBLOB::deep_copy);

	if (tessedit_override_permuter) {
	/* Override the permuter type if a straight dictionary check disagrees. */
	perm_type = word_choice->permuter();
	if ((perm_type != SYSTEM_DAWG_PERM) &&
	(perm_type != FREQ_DAWG_PERM) && (perm_type != USER_DAWG_PERM)) {
	real_dict_perm_type = dict_word(*word_choice);
	if (((real_dict_perm_type == SYSTEM_DAWG_PERM) \|\|
	(real_dict_perm_type == FREQ_DAWG_PERM) \|\|
	(real_dict_perm_type == USER_DAWG_PERM)) &&
	(alpha_count(word_choice->unichar_string().string(),
	word_choice->unichar_lengths().string()) > 0)) {
	word_choice->set_permuter (real_dict_perm_type); // use dict perm
	}
	}
	if (tessedit_rejection_debug && perm_type != word_choice->permuter ()) {
	tprintf ("Permuter Type Flipped from %d to %d\n",
	perm_type, word_choice->permuter ());
	}
	}
	assert ((word_choice == NULL) == (raw_choice == NULL));
	return word_choice;
	}


	/**********************************************************************
	* recog_word_recursive
	*
	* Convert the word to tess form and pass it to the tess segmenter.
	* Convert the output back to editor form.
	**********************************************************************/
	WERD_CHOICE *
	Tesseract::recog_word_recursive(
	WERD *word, // word to do
	DENORM *denorm, // de-normaliser
	POLY_MATCHER matcher, // matcher function
	POLY_TESTER tester, // tester function
	POLY_TESTER trainer, // trainer function
	BOOL8 testing, // true if answer driven
	WERD_CHOICE *&raw_choice, // raw result
	BLOB_CHOICE_LIST_CLIST *blob_choices, // list of blob lists
	WERD *&outword // bln word output
	) {
	inT32 initial_blob_choice_len;
	inT32 word_length; // no of blobs
	STRING word_string; // converted from tess
	STRING word_string_lengths;
	BLOB_CHOICE_LIST_VECTOR *tess_ratings; // tess results
	TWERD *tessword; // tess format
	BLOB_CHOICE_LIST_C_IT blob_choices_it; // iterator

	tess_matcher = matcher; // install matcher
	tess_tester = testing ? tester : NULL;
	tess_trainer = testing ? trainer : NULL;
	tess_denorm = denorm;
	tess_word = word;
	// blob_matchers[1]=call_matcher;
	if (word->blob_list ()->length () > MAX_UNDIVIDED_LENGTH) {
	return split_and_recog_word (word, denorm, matcher, tester, trainer,
	testing, raw_choice, blob_choices,
	outword);
	} else {
	UNICHAR_ID space_id = unicharset.unichar_to_id(" ");
	WERD_CHOICE *best_choice = new WERD_CHOICE();
	raw_choice = new WERD_CHOICE();
	initial_blob_choice_len = blob_choices->length();
	tessword = make_tess_word (word, NULL);
	tess_ratings = cc_recog(tessword, best_choice, raw_choice,
	testing && tester != NULL,
	testing && trainer != NULL,
	word->flag(W_EOL));

	outword = make_ed_word (tessword, word); // convert word
	if (outword == NULL) {
	outword = word->poly_copy (denorm->row ()->x_height ());
	}
	delete_word(tessword); // get rid of it
	word_length = outword->blob_list()->length(); // no of blobs

	// Put BLOB_CHOICE_LISTs from tess_ratings into blob_choices.
	blob_choices_it.set_to_list(blob_choices);
	for (int i = 0; i < tess_ratings->length(); ++i) {
	blob_choices_it.add_to_end(tess_ratings->get(i));
	}
	delete tess_ratings;

	// Pad raw_choice with spaces if needed.
	if (raw_choice->length() < word_length) {
	while (raw_choice->length() < word_length) {
	raw_choice->append_unichar_id(space_id, 1, 0.0,
	raw_choice->certainty());
	}
	raw_choice->populate_unichars(unicharset);
	}

	// Do sanity checks and minor fixes on best_choice.
	if (best_choice->length() > word_length) {
	tprintf("recog_word: Discarded long string \"%s\""
	" (%d characters vs %d blobs)\n",
	best_choice->unichar_string().string (),
	best_choice->length(), word_length);
	best_choice->make_bad(); // should never happen
	tprintf("Word is at (%g,%g)\n",
	denorm->origin(),
	denorm->y(word->bounding_box().bottom(), 0.0));
	}
	if (blob_choices->length() - initial_blob_choice_len != word_length) {
	best_choice->make_bad(); // force rejection
	tprintf ("recog_word: Choices list len:%d; blob lists len:%d\n",
	blob_choices->length(), word_length);
	blob_choices_it.set_to_list(blob_choices); // list of lists
	while (blob_choices->length() - initial_blob_choice_len < word_length) {
	blob_choices_it.add_to_end(new BLOB_CHOICE_LIST()); // add a fake one
	tprintf("recog_word: Added dummy choice list\n");
	}
	while (blob_choices->length() - initial_blob_choice_len > word_length) {
	blob_choices_it.move_to_last(); // should never happen
	delete blob_choices_it.extract();
	tprintf("recog_word: Deleted choice list\n");
	}
	}
	if (best_choice->length() < word_length) {
	while (best_choice->length() < word_length) {
	best_choice->append_unichar_id(space_id, 1, 0.0,
	best_choice->certainty());
	}
	best_choice->populate_unichars(unicharset);
	}

	return best_choice;
	}
	}


	/**********************************************************************
	* split_and_recog_word
	*
	* Convert the word to tess form and pass it to the tess segmenter.
	* Convert the output back to editor form.
	**********************************************************************/

	WERD_CHOICE *
	Tesseract::split_and_recog_word( //recog one owrd
	WERD *word, //word to do
	DENORM *denorm, //de-normaliser
	POLY_MATCHER matcher, //matcher function
	POLY_TESTER tester, //tester function
	POLY_TESTER trainer, //trainer function
	BOOL8 testing, //true if answer driven
	//raw result
	WERD_CHOICE *&raw_choice,
	//list of blob lists
	BLOB_CHOICE_LIST_CLIST *blob_choices,
	WERD *&outword //bln word output
	) {
	// inT32 outword1_len;
	// inT32 outword2_len;
	WERD *first_word; //poly copy of word
	WERD *second_word; //fabricated word
	WERD *outword2; //2nd output word
	PBLOB *blob;
	WERD_CHOICE *result; //return value
	WERD_CHOICE *result2; //output of 2nd word
	WERD_CHOICE *raw_choice2; //raw version of 2nd
	float gap; //blob gap
	float bestgap; //biggest gap
	PBLOB_LIST new_blobs; //list of gathered blobs
	PBLOB_IT blob_it;
	//iterator
	PBLOB_IT new_blob_it = &new_blobs;

	first_word = word->poly_copy (denorm->row ()->x_height ());
	blob_it.set_to_list (first_word->blob_list ());
	bestgap = -MAX_INT32;
	while (!blob_it.at_last ()) {
	blob = blob_it.data ();
	//gap to next
	gap = blob_it.data_relative(1)->bounding_box().left() -
	blob->bounding_box().right();
	blob_it.forward ();
	if (gap > bestgap) {
	bestgap = gap; //find biggest
	new_blob_it = blob_it; //save position
	}
	}
	//take 2nd half
	new_blobs.assign_to_sublist (&new_blob_it, &blob_it);
	//make it a word
	second_word = new WERD (&new_blobs, 1, NULL);
	ASSERT_HOST (word->blob_list ()->length () ==
	first_word->blob_list ()->length () +
	second_word->blob_list ()->length ());

	result = recog_word_recursive (first_word, denorm, matcher,
	tester, trainer, testing, raw_choice,
	blob_choices, outword);
	delete first_word; //done that one
	result2 = recog_word_recursive (second_word, denorm, matcher,
	tester, trainer, testing, raw_choice2,
	blob_choices, outword2);
	delete second_word; //done that too
	result += result2; //combine ratings
	delete result2;
	raw_choice += raw_choice2;
	delete raw_choice2; //finished with it
	// outword1_len= outword->blob_list()->length();
	// outword2_len= outword2->blob_list()->length();
	outword->join_on (outword2); //join words
	delete outword2;
	// if ( outword->blob_list()->length() != outword1_len + outword2_len )
	// tprintf( "Split&Recog: part1len=%d; part2len=%d; combinedlen=%d\n",
	// outword1_len, outword2_len, outword->blob_list()->length() );
	// ASSERT_HOST( outword->blob_list()->length() == outword1_len + outword2_len );
	return result;
	}

	} // namespace tesseract

	/**********************************************************************
	* call_tester
	*
	* Called from Tess with a blob in tess form.
	* Convert the blob to editor form.
	* Call the tester setup by the segmenter in tess_tester.
	**********************************************************************/
	#if 0 // dead code
	void call_tester( //call a tester
	const STRING& filename,
	TBLOB *tessblob, //blob to test
	BOOL8 correct_blob, //true if good
	char *text, //source text
	inT32 count, //chars in text
	LIST result //output of matcher
	) {
	PBLOB *blob; //converted blob
	BLOB_CHOICE_LIST ratings; //matcher result

	blob = make_ed_blob (tessblob);//convert blob
	if (blob == NULL)
	return;
	//make it right type
	convert_choice_list(result, ratings);
	if (tess_tester != NULL)
	(*tess_tester) (filename, blob, tess_denorm, correct_blob, text, count, &ratings);
	delete blob; //don't need that now
	}
	#endif

	/**********************************************************************
	* call_train_tester
	*
	* Called from Tess with a blob in tess form.
	* Convert the blob to editor form.
	* Call the trainer setup by the segmenter in tess_trainer.
	**********************************************************************/
	#if 0 // dead code
	void call_train_tester( //call a tester
	const STRING& filename,
	TBLOB *tessblob, //blob to test
	BOOL8 correct_blob, //true if good
	char *text, //source text
	inT32 count, //chars in text
	LIST result //output of matcher
	) {
	PBLOB *blob; //converted blob
	BLOB_CHOICE_LIST ratings; //matcher result

	blob = make_ed_blob (tessblob);//convert blob
	if (blob == NULL)
	return;
	//make it right type
	convert_choice_list(result, ratings);
	if (tess_trainer != NULL)
	(*tess_trainer) (filename, blob, tess_denorm, correct_blob, text, count, &ratings);
	delete blob; //don't need that now
	}
	#endif