ccmain/tesseractclass.h - platform/external/tesseract - Git at Google

 ///////////////////////////////////////////////////////////////////////
 // File:        tesseractclass.h
 // Description: An instance of Tesseract. For thread safety, *every*
 //              global variable goes in here, directly, or indirectly.
 // Author:      Ray Smith
 // Created:     Fri Mar 07 08:17:01 PST 2008
 //
 // (C) Copyright 2008, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 // http://www.apache.org/licenses/LICENSE-2.0
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 ///////////////////////////////////////////////////////////////////////

 #ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H__
 #define TESSERACT_CCMAIN_TESSERACTCLASS_H__

 #include "varable.h"
 #include "wordrec.h"
 #include "ocrclass.h"
 #include "control.h"
 #include "docqual.h"

 class CHAR_SAMPLES_LIST;
 class CHAR_SAMPLE_LIST;
 class PAGE_RES;
 class PAGE_RES_IT;
 class BLOCK_LIST;
 class TO_BLOCK_LIST;
 class IMAGE;
 class WERD_RES;
 class ROW;
 class TBOX;
 class SVMenuNode;
 struct Pix;
 class WERD_CHOICE;
 class WERD;
 class BLOB_CHOICE_LIST_CLIST;


 // Top-level class for all tesseract global instance data.
 // This class either holds or points to all data used by an instance
 // of Tesseract, including the memory allocator. When this is
 // complete, Tesseract will be thread-safe. UNTIL THEN, IT IS NOT!
 //
 // NOTE to developers: Do not create cyclic dependencies through this class!
 // The directory dependency tree must remain a tree! The keep this clean,
 // lower-level code (eg in ccutil, the bottom level) must never need to
 // know about the content of a higher-level directory.
 // The following scheme will grant the easiest access to lower-level
 // global members without creating a cyclic dependency:
 // ccmain inherits wordrec, includes textord as a member
 // wordrec inherits classify
 // classify inherits ccstruct, includes dict as a member
 // ccstruct inherits c_util, includes image as a member
 // c_util inherits cc_util
 // textord has a pointer to ccstruct, but doesn't own it.
 // dict has a pointer to ccstruct, but doesn't own it.
 //
 // NOTE: that each level contains members that correspond to global
 // data that is defined (and used) at that level, not necessarily where
 // the type is defined so for instance:
 // BOOL_VAR (textord_show_blobs, FALSE, "Display unsorted blobs");
 // goes inside the Textord class, not the cc_util class.

 namespace tesseract {

 class Tesseract : public Wordrec {
  public:
   Tesseract();
   ~Tesseract();

   void Clear();

   // Simple accessors.
   const FCOORD& reskew() const {
     return reskew_;
   }
   // Destroy any existing pix and return a pointer to the pointer.
   Pix** mutable_pix_binary() {
     Clear();
     return &pix_binary_;
   }
   Pix* pix_binary() const {
     return pix_binary_;
   }

   void SetBlackAndWhitelist();
   int SegmentPage(const STRING* input_file,
                   IMAGE* image, BLOCK_LIST* blocks);
   int AutoPageSeg(int width, int height, int resolution,
                   bool single_column, IMAGE* image,
                   BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);

   //// control.h /////////////////////////////////////////////////////////
   void recog_all_words(                                //process words
                                   PAGE_RES *page_res,  //page structure
                                                        //progress monitor
                                   volatile ETEXT_DESC *monitor,
                                   TBOX *target_word_box=0L,
                                   inT16 dopasses=0
                                  );
   void classify_word_pass1(                 //recog one word
                            WERD_RES *word,  //word to do
                            ROW *row,
                            BLOCK* block,
                            BOOL8 cluster_adapt,
                            CHAR_SAMPLES_LIST *char_clusters,
                            CHAR_SAMPLE_LIST *chars_waiting);
   void recog_pseudo_word(                         //recognize blobs
                          BLOCK_LIST *block_list,  //blocks to check
                          TBOX &selection_box);

   // This method returns all the blobs in the specified blocks.
   // It's the caller's responsibility to destroy the returned list.
   C_BLOB_LIST* get_blobs_from_blocks(BLOCK_LIST* blocks  // blocks to look at.
                                     );

   // This method can be used to perform word-level training using box files.
   // TODO: this can be modified to perform training in general case too.
   void train_word_level_with_boxes(
                                    const STRING& box_file,  // File with boxes.
                                    const STRING& out_file,  // Output file.
                                    BLOCK_LIST* blocks       // Blocks to use.
                                   );
   void fix_rep_char(WERD_RES *word);
   void fix_quotes(               //make double quotes
                   WERD_CHOICE *choice,  //choice to fix
                   WERD *word,    //word to do //char choices
                   BLOB_CHOICE_LIST_CLIST *blob_choices);
   ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s,
                                               const char *lengths);
   void match_word_pass2(                 //recog one word
                         WERD_RES *word,  //word to do
                         ROW *row,
                         BLOCK* block,
                         float x_height);
   void classify_word_pass2(  //word to do
                            WERD_RES *word,
                            BLOCK* block,
                            ROW *row);
   BOOL8 recog_interactive(            //recognize blobs
                           BLOCK *block,    //block
                           ROW *row,   //row of word
                           WERD *word  //word to recognize
                          );
   void fix_hyphens(               //crunch double hyphens
                    WERD_CHOICE *choice,  //choice to fix
                    WERD *word,    //word to do //char choices
                    BLOB_CHOICE_LIST_CLIST *blob_choices);
   void set_word_fonts(
       WERD_RES *word,  // word to adapt to
       BLOB_CHOICE_LIST_CLIST *blob_choices);  // detailed results
   void font_recognition_pass(  //good chars in word
                              PAGE_RES_IT &page_res_it);

   //// output.h //////////////////////////////////////////////////////////

   void output_pass(  //Tess output pass //send to api
                             PAGE_RES_IT &page_res_it,
                             BOOL8 write_to_shm,
                             TBOX *target_word_box);
   FILE *open_outfile(  //open .map & .unlv file
                                 const char *extension);
   void write_results(                           //output a word
                      PAGE_RES_IT &page_res_it,  //full info
                      char newline_type,         //type of newline
                      BOOL8 force_eol,           //override tilde crunch?
                      BOOL8 write_to_shm         //send to api
                     );
   void set_unlv_suspects(WERD_RES *word);
   UNICHAR_ID get_rep_char(WERD_RES *word);  // what char is repeated?
   BOOL8 acceptable_number_string(const char *s,
                                  const char *lengths);
   inT16 count_alphanums(const WERD_CHOICE &word);
   inT16 count_alphas(const WERD_CHOICE &word);
   //// tessedit.h ////////////////////////////////////////////////////////
   void read_config_file(const char *filename, bool global_only);
   int init_tesseract(const char *arg0,
                      const char *textbase,
                      const char *language,
                      char **configs,
                      int configs_size,
                      bool configs_global_only);

   int init_tesseract_lm(const char *arg0,
                         const char *textbase,
                         const char *language);

   // Initializes the tesseract classifier without loading language models.
   int init_tesseract_classifier(const char *arg0,
                                 const char *textbase,
                         const char *language,
                                 char **configs,
                                 int configs_size,
                                 bool configs_global_only);

   void recognize_page(STRING& image_name);
   void end_tesseract();

   bool init_tesseract_lang_data(const char *arg0,
                                 const char *textbase,
                                 const char *language,
                                 char **configs,
                                 int configs_size,
                                 bool configs_global_only);

   //// pgedit.h //////////////////////////////////////////////////////////
   SVMenuNode *build_menu_new();
   void pgeditor_main(BLOCK_LIST *blocks);
   void process_image_event( // action in image win
                            const SVEvent &event);
   void pgeditor_read_file(                   // of serialised file
                           STRING &filename,
                           BLOCK_LIST *blocks  // block list to add to
                          );
   void do_new_source(           // serialise
                     );
   BOOL8 process_cmd_win_event(                 // UI command semantics
                               inT32 cmd_event,  // which menu item?
                               char *new_value   // any prompt data
                              );
   //// reject.h //////////////////////////////////////////////////////////
   const char *char_ambiguities(char c);
   void make_reject_map(            //make rej map for wd //detailed results
                        WERD_RES *word,
                        BLOB_CHOICE_LIST_CLIST *blob_choices,
                        ROW *row,
                        inT16 pass  //1st or 2nd?
                       );
   BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map);
   inT16 first_alphanum_index(const char *word,
                              const char *word_lengths);
   inT16 first_alphanum_offset(const char *word,
                               const char *word_lengths);
   inT16 alpha_count(const char *word,
                     const char *word_lengths);
   BOOL8 word_contains_non_1_digit(const char *word,
                                   const char *word_lengths);
   void dont_allow_1Il(WERD_RES *word);
   inT16 count_alphanums(  //how many alphanums
                         WERD_RES *word);
   BOOL8 repeated_ch_string(const char *rep_ch_str,
                            const char *lengths);
   void flip_0O(WERD_RES *word);
   BOOL8 non_0_digit(UNICHAR_ID unichar_id);
   BOOL8 non_O_upper(UNICHAR_ID unichar_id);
   BOOL8 repeated_nonalphanum_wd(WERD_RES *word, ROW *row);
   void nn_match_word(  //Match a word
                      WERD_RES *word,
                      ROW *row);
   void nn_recover_rejects(WERD_RES *word, ROW *row);
   BOOL8 test_ambig_word(  //test for ambiguity
                         WERD_RES *word);
   void set_done(  //set done flag
                 WERD_RES *word,
                 inT16 pass);
   inT16 safe_dict_word(const WERD_CHOICE  &word);
   void flip_hyphens(WERD_RES *word);
   //// adaptions.h ///////////////////////////////////////////////////////
   void adapt_to_good_ems(WERD_RES *word,
                          CHAR_SAMPLES_LIST *char_clusters,
                          CHAR_SAMPLE_LIST *chars_waiting);
   void adapt_to_good_samples(WERD_RES *word,
                              CHAR_SAMPLES_LIST *char_clusters,
                              CHAR_SAMPLE_LIST *chars_waiting);
   BOOL8 word_adaptable(  //should we adapt?
                        WERD_RES *word,
                        uinT16 mode);
   void reject_suspect_ems(WERD_RES *word);
   void collect_ems_for_adaption(WERD_RES *word,
                                 CHAR_SAMPLES_LIST *char_clusters,
                                 CHAR_SAMPLE_LIST *chars_waiting);
   void collect_characters_for_adaption(WERD_RES *word,
                                        CHAR_SAMPLES_LIST *char_clusters,
                                        CHAR_SAMPLE_LIST *chars_waiting);
   void check_wait_list(CHAR_SAMPLE_LIST *chars_waiting,
                        CHAR_SAMPLE *sample,
                        CHAR_SAMPLES *best_cluster);
   void cluster_sample(CHAR_SAMPLE *sample,
                       CHAR_SAMPLES_LIST *char_clusters,
                       CHAR_SAMPLE_LIST *chars_waiting);
   void complete_clustering(CHAR_SAMPLES_LIST *char_clusters,
                            CHAR_SAMPLE_LIST *chars_waiting);

   //// tfacepp.cpp ///////////////////////////////////////////////////////
   WERD_CHOICE *recog_word_recursive(                    //recog one owrd
                                     WERD *word,         //word to do
                                     DENORM *denorm,     //de-normaliser
                                                         //matcher function
                                     POLY_MATCHER matcher,
                                                         //tester function
                                     POLY_TESTER tester,
                                                         //trainer function
                                     POLY_TESTER trainer,
                                     BOOL8 testing,      //true if answer driven
                                                         //raw result
                                     WERD_CHOICE *&raw_choice,
                                                         //list of blob lists
                                     BLOB_CHOICE_LIST_CLIST *blob_choices,
                                     WERD *&outword      //bln word output
                                    );
   WERD_CHOICE *recog_word(                           //recog one owrd
                           WERD *word,                //word to do
                           DENORM *denorm,            //de-normaliser
                           POLY_MATCHER matcher,      //matcher function
                           POLY_TESTER tester,        //tester function
                           POLY_TESTER trainer,       //trainer function
                           BOOL8 testing,             //true if answer driven
                           WERD_CHOICE *&raw_choice,  //raw result
                                                      //list of blob lists
                           BLOB_CHOICE_LIST_CLIST *blob_choices,
                           WERD *&outword             //bln word output
                          );
   WERD_CHOICE *split_and_recog_word(                    //recog one owrd
                                     WERD *word,         //word to do
                                     DENORM *denorm,     //de-normaliser
                                                         //matcher function
                                     POLY_MATCHER matcher,
                                                         //tester function
                                     POLY_TESTER tester,
                                                         //trainer function
                                     POLY_TESTER trainer,
                                     BOOL8 testing,      //true if answer driven
                                                         //raw result
                                     WERD_CHOICE *&raw_choice,
                                                         //list of blob lists
                                     BLOB_CHOICE_LIST_CLIST *blob_choices,
                                     WERD *&outword      //bln word output
                                    );
   //// fixspace.cpp ///////////////////////////////////////////////////////
   BOOL8 digit_or_numeric_punct(WERD_RES *word, int char_position);
   inT16 eval_word_spacing(WERD_RES_LIST &word_res_list);
   void match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK* block);
   inT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list);
   void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block);
   void fix_fuzzy_space_list(  //space explorer
                             WERD_RES_LIST &best_perm,
                             ROW *row,
                             BLOCK* block);
   void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK* block);
   void fix_fuzzy_spaces(                               //find fuzzy words
                         volatile ETEXT_DESC *monitor,  //progress monitor
                         inT32 word_count,              //count of words in doc
                         PAGE_RES *page_res);
   //// docqual.cpp ////////////////////////////////////////////////////////
   GARBAGE_LEVEL garbage_word(WERD_RES *word, BOOL8 ok_dict_word);
   BOOL8 potential_word_crunch(WERD_RES *word,
                               GARBAGE_LEVEL garbage_level,
                               BOOL8 ok_dict_word);
   void tilde_crunch(PAGE_RES_IT &page_res_it);
   void unrej_good_quality_words(  //unreject potential
                                 PAGE_RES_IT &page_res_it);
   void doc_and_block_rejection(  //reject big chunks
                                PAGE_RES_IT &page_res_it,
                                BOOL8 good_quality_doc);
   void quality_based_rejection(PAGE_RES_IT &page_res_it,
                                BOOL8 good_quality_doc);
   void convert_bad_unlv_chs(WERD_RES *word_res);
   void merge_tess_fails(WERD_RES *word_res);
   void tilde_delete(PAGE_RES_IT &page_res_it);
   void insert_rej_cblobs(WERD_RES *word);
   //// pagewalk.cpp ///////////////////////////////////////////////////////
   void
   process_selected_words (
       BLOCK_LIST * block_list, //blocks to check
       //function to call
       TBOX & selection_box,
       BOOL8 (tesseract::Tesseract::*word_processor) (
           BLOCK *,
           ROW *,
           WERD *));
   //// tessbox.cpp ///////////////////////////////////////////////////////
   void tess_add_doc_word(                          //test acceptability
                          WERD_CHOICE *word_choice  //after context
                         );
   void tess_adapter(                         //adapt to word
                     WERD *word,              //bln word
                     DENORM *denorm,          //de-normalise
                     const WERD_CHOICE& choice,      //string for word
                     const WERD_CHOICE& raw_choice,  //before context
                     const char *rejmap       //reject map
                    );
   WERD_CHOICE *test_segment_pass2(                        //recog one word
                                   WERD *word,             //bln word to do
                                   DENORM *denorm,         //de-normaliser
                                   POLY_MATCHER matcher,   //matcher function
                                   POLY_TESTER tester,     //tester function
                                                           //raw result
                                   WERD_CHOICE *&raw_choice,
                                                           //list of blob lists
                                   BLOB_CHOICE_LIST_CLIST *blob_choices,
                                   WERD *&outword          //bln word output
                                  );
   WERD_CHOICE *tess_segment_pass1(                        //recog one word
                                   WERD *word,             //bln word to do
                                   DENORM *denorm,         //de-normaliser
                                   POLY_MATCHER matcher,   //matcher function
                                                           //raw result
                                   WERD_CHOICE *&raw_choice,
                                                           //list of blob lists
                                   BLOB_CHOICE_LIST_CLIST *blob_choices,
                                   WERD *&outword             //bln word output
                                  );
   WERD_CHOICE *tess_segment_pass2(                        //recog one word
                                   WERD *word,             //bln word to do
                                   DENORM *denorm,         //de-normaliser
                                   POLY_MATCHER matcher,   //matcher function
                                                           //raw result
                                   WERD_CHOICE *&raw_choice,
                                                           //list of blob lists
                                   BLOB_CHOICE_LIST_CLIST *blob_choices,
                                   WERD *&outword          //bln word output
                                  );
   WERD_CHOICE *correct_segment_pass2(                       //recog one word
                                      WERD *word,            //bln word to do
                                      DENORM *denorm,        //de-normaliser
                                      POLY_MATCHER matcher,  //matcher function
                                      POLY_TESTER tester,    //tester function
                                                             //raw result
                                      WERD_CHOICE *&raw_choice,
                                                             //list of blob lists
                                      BLOB_CHOICE_LIST_CLIST *blob_choices,
                                      WERD *&outword         //bln word output
                                     );
   void tess_default_matcher(                            //call tess
                             PBLOB *pblob,               //previous blob
                             PBLOB *blob,                //blob to match
                             PBLOB *nblob,               //next blob
                             WERD *word,                 //word it came from
                             DENORM *denorm,             //de-normaliser
                             BLOB_CHOICE_LIST *ratings,  //list of results
                             const char* script
                             );
   void tess_bn_matcher(                           //call tess
                        PBLOB *pblob,              //previous blob
                        PBLOB *blob,               //blob to match
                        PBLOB *nblob,              //next blob
                        WERD *word,                //word it came from
                        DENORM *denorm,            //de-normaliser
                        BLOB_CHOICE_LIST *ratings  //list of results
                       );
   void tess_cn_matcher(                           //call tess
                        PBLOB *pblob,              //previous blob
                        PBLOB *blob,               //blob to match
                        PBLOB *nblob,              //next blob
                        WERD *word,                //word it came from
                        DENORM *denorm,            //de-normaliser
                        BLOB_CHOICE_LIST *ratings,  //list of results
                        // Sorted array of CP_RESULT_STRUCT from class pruner.
                        CLASS_PRUNER_RESULTS cpresults
                       );
   BOOL8 tess_adaptable_word(                           //test adaptability
                             WERD *word,                //word to test
                             WERD_CHOICE *word_choice,  //after context
                             WERD_CHOICE *raw_choice    //before context
                            );
   BOOL8 tess_acceptable_word(                           //test acceptability
                              WERD_CHOICE *word_choice,  //after context
                              WERD_CHOICE *raw_choice    //before context
                             );
   //// applybox.cpp //////////////////////////////////////////////////////
   void apply_box_testing(BLOCK_LIST *block_list);
   void apply_boxes(const STRING& fname,
                    BLOCK_LIST *block_list    //real blocks
                   );
   // converts an array of boxes to a block list
   int Boxes2BlockList(int box_cnt, TBOX *boxes, BLOCK_LIST *block_list,
                       bool right2left);
   //// blobcmp.cpp ///////////////////////////////////////////////////////
   float compare_tess_blobs(TBLOB *blob1,
                            TEXTROW *row1,
                            TBLOB *blob2,
                            TEXTROW *row2);
   //// paircmp.cpp ///////////////////////////////////////////////////////
   float compare_bln_blobs(               //match 2 blobs
                           PBLOB *blob1,  //first blob
                           DENORM *denorm1,
                           PBLOB *blob2,  //other blob
                           DENORM *denorm2);
   float compare_blobs(               //match 2 blobs
                       PBLOB *blob1,  //first blob
                       ROW *row1,     //row it came from
                       PBLOB *blob2,  //other blob
                       ROW *row2);
   BOOL8 compare_blob_pairs(             //blob processor
                            BLOCK *,
                            ROW *row,    //row it came from
                            WERD *,
                            PBLOB *blob  //blob to compare
                           );
   //// fixxht.cpp ///////////////////////////////////////////////////////
   void check_block_occ(WERD_RES *word_res);

   //// Data members ///////////////////////////////////////////////////////
   BOOL_VAR_H(tessedit_resegment_from_boxes, false,
              "Take segmentation and labeling from box file");
   BOOL_VAR_H(tessedit_train_from_boxes, false,
              "Generate training data from boxed chars");
   BOOL_VAR_H(tessedit_dump_pageseg_images, false,
              "Dump itermediate images made during page segmentation");
   INT_VAR_H(tessedit_pageseg_mode, 2,
             "Page seg mode: 0=auto, 1=col, 2=block, 3=line, 4=word, 6=char"
             " (Values from PageSegMode enum in baseapi.h)");
   INT_VAR_H(tessedit_accuracyvspeed, 0,
             "Accuracy V Speed tradeoff: 0 fastest, 100 most accurate"
             " (Values from AccuracyVSpeed enum in baseapi.h)");
   BOOL_VAR_H(tessedit_train_from_boxes_word_level, false,
              "Generate training data from boxed chars at word level.");
   STRING_VAR_H(tessedit_char_blacklist, "",
                "Blacklist of chars not to recognize");
   STRING_VAR_H(tessedit_char_whitelist, "",
                "Whitelist of chars to recognize");
   BOOL_VAR_H(global_tessedit_ambigs_training, false,
              "Perform training for ambiguities");
   //// ambigsrecog.cpp /////////////////////////////////////////////////////////
   FILE *init_ambigs_training(const STRING &fname);
   void ambigs_training_segmented(const STRING &fname,
                                  PAGE_RES *page_res,
                                  volatile ETEXT_DESC *monitor,
                                  FILE *output_file);
   void ambigs_classify_and_output(PAGE_RES_IT *page_res_it,
                                   const char *label,
                                   FILE *output_file);
  private:
   Pix* pix_binary_;
   FCOORD deskew_;
   FCOORD reskew_;
   bool hindi_image_;
 };

 }  // namespace tesseract


 #endif  // TESSERACT_CCMAIN_TESSERACTCLASS_H__
	///////////////////////////////////////////////////////////////////////
	// File: tesseractclass.h
	// Description: An instance of Tesseract. For thread safety, every
	// global variable goes in here, directly, or indirectly.
	// Author: Ray Smith
	// Created: Fri Mar 07 08:17:01 PST 2008
	//
	// (C) Copyright 2008, Google Inc.
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	// http://www.apache.org/licenses/LICENSE-2.0
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	//
	///////////////////////////////////////////////////////////////////////

	#ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H__
	#define TESSERACT_CCMAIN_TESSERACTCLASS_H__

	#include "varable.h"
	#include "wordrec.h"
	#include "ocrclass.h"
	#include "control.h"
	#include "docqual.h"

	class CHAR_SAMPLES_LIST;
	class CHAR_SAMPLE_LIST;
	class PAGE_RES;
	class PAGE_RES_IT;
	class BLOCK_LIST;
	class TO_BLOCK_LIST;
	class IMAGE;
	class WERD_RES;
	class ROW;
	class TBOX;
	class SVMenuNode;
	struct Pix;
	class WERD_CHOICE;
	class WERD;
	class BLOB_CHOICE_LIST_CLIST;


	// Top-level class for all tesseract global instance data.
	// This class either holds or points to all data used by an instance
	// of Tesseract, including the memory allocator. When this is
	// complete, Tesseract will be thread-safe. UNTIL THEN, IT IS NOT!
	//
	// NOTE to developers: Do not create cyclic dependencies through this class!
	// The directory dependency tree must remain a tree! The keep this clean,
	// lower-level code (eg in ccutil, the bottom level) must never need to
	// know about the content of a higher-level directory.
	// The following scheme will grant the easiest access to lower-level
	// global members without creating a cyclic dependency:
	// ccmain inherits wordrec, includes textord as a member
	// wordrec inherits classify
	// classify inherits ccstruct, includes dict as a member
	// ccstruct inherits c_util, includes image as a member
	// c_util inherits cc_util
	// textord has a pointer to ccstruct, but doesn't own it.
	// dict has a pointer to ccstruct, but doesn't own it.
	//
	// NOTE: that each level contains members that correspond to global
	// data that is defined (and used) at that level, not necessarily where
	// the type is defined so for instance:
	// BOOL_VAR (textord_show_blobs, FALSE, "Display unsorted blobs");
	// goes inside the Textord class, not the cc_util class.

	namespace tesseract {

	class Tesseract : public Wordrec {
	public:
	Tesseract();
	~Tesseract();

	void Clear();

	// Simple accessors.
	const FCOORD& reskew() const {
	return reskew_;
	}
	// Destroy any existing pix and return a pointer to the pointer.
	Pix** mutable_pix_binary() {
	Clear();
	return &pix_binary_;
	}
	Pix* pix_binary() const {
	return pix_binary_;
	}

	void SetBlackAndWhitelist();
	int SegmentPage(const STRING* input_file,
	IMAGE* image, BLOCK_LIST* blocks);
	int AutoPageSeg(int width, int height, int resolution,
	bool single_column, IMAGE* image,
	BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);

	//// control.h /////////////////////////////////////////////////////////
	void recog_all_words( //process words
	PAGE_RES *page_res, //page structure
	//progress monitor
	volatile ETEXT_DESC *monitor,
	TBOX *target_word_box=0L,
	inT16 dopasses=0
	);
	void classify_word_pass1( //recog one word
	WERD_RES *word, //word to do
	ROW *row,
	BLOCK* block,
	BOOL8 cluster_adapt,
	CHAR_SAMPLES_LIST *char_clusters,
	CHAR_SAMPLE_LIST *chars_waiting);
	void recog_pseudo_word( //recognize blobs
	BLOCK_LIST *block_list, //blocks to check
	TBOX &selection_box);

	// This method returns all the blobs in the specified blocks.
	// It's the caller's responsibility to destroy the returned list.
	C_BLOB_LIST* get_blobs_from_blocks(BLOCK_LIST* blocks // blocks to look at.
	);

	// This method can be used to perform word-level training using box files.
	// TODO: this can be modified to perform training in general case too.
	void train_word_level_with_boxes(
	const STRING& box_file, // File with boxes.
	const STRING& out_file, // Output file.
	BLOCK_LIST* blocks // Blocks to use.
	);
	void fix_rep_char(WERD_RES *word);
	void fix_quotes( //make double quotes
	WERD_CHOICE *choice, //choice to fix
	WERD *word, //word to do //char choices
	BLOB_CHOICE_LIST_CLIST *blob_choices);
	ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s,
	const char *lengths);
	void match_word_pass2( //recog one word
	WERD_RES *word, //word to do
	ROW *row,
	BLOCK* block,
	float x_height);
	void classify_word_pass2( //word to do
	WERD_RES *word,
	BLOCK* block,
	ROW *row);
	BOOL8 recog_interactive( //recognize blobs
	BLOCK *block, //block
	ROW *row, //row of word
	WERD *word //word to recognize
	);
	void fix_hyphens( //crunch double hyphens
	WERD_CHOICE *choice, //choice to fix
	WERD *word, //word to do //char choices
	BLOB_CHOICE_LIST_CLIST *blob_choices);
	void set_word_fonts(
	WERD_RES *word, // word to adapt to
	BLOB_CHOICE_LIST_CLIST *blob_choices); // detailed results
	void font_recognition_pass( //good chars in word
	PAGE_RES_IT &page_res_it);

	//// output.h //////////////////////////////////////////////////////////

	void output_pass( //Tess output pass //send to api
	PAGE_RES_IT &page_res_it,
	BOOL8 write_to_shm,
	TBOX *target_word_box);
	FILE *open_outfile( //open .map & .unlv file
	const char *extension);
	void write_results( //output a word
	PAGE_RES_IT &page_res_it, //full info
	char newline_type, //type of newline
	BOOL8 force_eol, //override tilde crunch?
	BOOL8 write_to_shm //send to api
	);
	void set_unlv_suspects(WERD_RES *word);
	UNICHAR_ID get_rep_char(WERD_RES *word); // what char is repeated?
	BOOL8 acceptable_number_string(const char *s,
	const char *lengths);
	inT16 count_alphanums(const WERD_CHOICE &word);
	inT16 count_alphas(const WERD_CHOICE &word);
	//// tessedit.h ////////////////////////////////////////////////////////
	void read_config_file(const char *filename, bool global_only);
	int init_tesseract(const char *arg0,
	const char *textbase,
	const char *language,
	char **configs,
	int configs_size,
	bool configs_global_only);

	int init_tesseract_lm(const char *arg0,
	const char *textbase,
	const char *language);

	// Initializes the tesseract classifier without loading language models.
	int init_tesseract_classifier(const char *arg0,
	const char *textbase,
	const char *language,
	char **configs,
	int configs_size,
	bool configs_global_only);

	void recognize_page(STRING& image_name);
	void end_tesseract();

	bool init_tesseract_lang_data(const char *arg0,
	const char *textbase,
	const char *language,
	char **configs,
	int configs_size,
	bool configs_global_only);

	//// pgedit.h //////////////////////////////////////////////////////////
	SVMenuNode *build_menu_new();
	void pgeditor_main(BLOCK_LIST *blocks);
	void process_image_event( // action in image win
	const SVEvent &event);
	void pgeditor_read_file( // of serialised file
	STRING &filename,
	BLOCK_LIST *blocks // block list to add to
	);
	void do_new_source( // serialise
	);
	BOOL8 process_cmd_win_event( // UI command semantics
	inT32 cmd_event, // which menu item?
	char *new_value // any prompt data
	);
	//// reject.h //////////////////////////////////////////////////////////
	const char *char_ambiguities(char c);
	void make_reject_map( //make rej map for wd //detailed results
	WERD_RES *word,
	BLOB_CHOICE_LIST_CLIST *blob_choices,
	ROW *row,
	inT16 pass //1st or 2nd?
	);
	BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map);
	inT16 first_alphanum_index(const char *word,
	const char *word_lengths);
	inT16 first_alphanum_offset(const char *word,
	const char *word_lengths);
	inT16 alpha_count(const char *word,
	const char *word_lengths);
	BOOL8 word_contains_non_1_digit(const char *word,
	const char *word_lengths);
	void dont_allow_1Il(WERD_RES *word);
	inT16 count_alphanums( //how many alphanums
	WERD_RES *word);
	BOOL8 repeated_ch_string(const char *rep_ch_str,
	const char *lengths);
	void flip_0O(WERD_RES *word);
	BOOL8 non_0_digit(UNICHAR_ID unichar_id);
	BOOL8 non_O_upper(UNICHAR_ID unichar_id);
	BOOL8 repeated_nonalphanum_wd(WERD_RES word, ROW row);
	void nn_match_word( //Match a word
	WERD_RES *word,
	ROW *row);
	void nn_recover_rejects(WERD_RES word, ROW row);
	BOOL8 test_ambig_word( //test for ambiguity
	WERD_RES *word);
	void set_done( //set done flag
	WERD_RES *word,
	inT16 pass);
	inT16 safe_dict_word(const WERD_CHOICE &word);
	void flip_hyphens(WERD_RES *word);
	//// adaptions.h ///////////////////////////////////////////////////////
	void adapt_to_good_ems(WERD_RES *word,
	CHAR_SAMPLES_LIST *char_clusters,
	CHAR_SAMPLE_LIST *chars_waiting);
	void adapt_to_good_samples(WERD_RES *word,
	CHAR_SAMPLES_LIST *char_clusters,
	CHAR_SAMPLE_LIST *chars_waiting);
	BOOL8 word_adaptable( //should we adapt?
	WERD_RES *word,
	uinT16 mode);
	void reject_suspect_ems(WERD_RES *word);
	void collect_ems_for_adaption(WERD_RES *word,
	CHAR_SAMPLES_LIST *char_clusters,
	CHAR_SAMPLE_LIST *chars_waiting);
	void collect_characters_for_adaption(WERD_RES *word,
	CHAR_SAMPLES_LIST *char_clusters,
	CHAR_SAMPLE_LIST *chars_waiting);
	void check_wait_list(CHAR_SAMPLE_LIST *chars_waiting,
	CHAR_SAMPLE *sample,
	CHAR_SAMPLES *best_cluster);
	void cluster_sample(CHAR_SAMPLE *sample,
	CHAR_SAMPLES_LIST *char_clusters,
	CHAR_SAMPLE_LIST *chars_waiting);
	void complete_clustering(CHAR_SAMPLES_LIST *char_clusters,
	CHAR_SAMPLE_LIST *chars_waiting);

	//// tfacepp.cpp ///////////////////////////////////////////////////////
	WERD_CHOICE *recog_word_recursive( //recog one owrd
	WERD *word, //word to do
	DENORM *denorm, //de-normaliser
	//matcher function
	POLY_MATCHER matcher,
	//tester function
	POLY_TESTER tester,
	//trainer function
	POLY_TESTER trainer,
	BOOL8 testing, //true if answer driven
	//raw result
	WERD_CHOICE *&raw_choice,
	//list of blob lists
	BLOB_CHOICE_LIST_CLIST *blob_choices,
	WERD *&outword //bln word output
	);
	WERD_CHOICE *recog_word( //recog one owrd
	WERD *word, //word to do
	DENORM *denorm, //de-normaliser
	POLY_MATCHER matcher, //matcher function
	POLY_TESTER tester, //tester function
	POLY_TESTER trainer, //trainer function
	BOOL8 testing, //true if answer driven
	WERD_CHOICE *&raw_choice, //raw result
	//list of blob lists
	BLOB_CHOICE_LIST_CLIST *blob_choices,
	WERD *&outword //bln word output
	);
	WERD_CHOICE *split_and_recog_word( //recog one owrd
	WERD *word, //word to do
	DENORM *denorm, //de-normaliser
	//matcher function
	POLY_MATCHER matcher,
	//tester function
	POLY_TESTER tester,
	//trainer function
	POLY_TESTER trainer,
	BOOL8 testing, //true if answer driven
	//raw result
	WERD_CHOICE *&raw_choice,
	//list of blob lists
	BLOB_CHOICE_LIST_CLIST *blob_choices,
	WERD *&outword //bln word output
	);
	//// fixspace.cpp ///////////////////////////////////////////////////////
	BOOL8 digit_or_numeric_punct(WERD_RES *word, int char_position);
	inT16 eval_word_spacing(WERD_RES_LIST &word_res_list);
	void match_current_words(WERD_RES_LIST &words, ROW row, BLOCK block);
	inT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list);
	void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW row, BLOCK block);
	void fix_fuzzy_space_list( //space explorer
	WERD_RES_LIST &best_perm,
	ROW *row,
	BLOCK* block);
	void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW row, BLOCK block);
	void fix_fuzzy_spaces( //find fuzzy words
	volatile ETEXT_DESC *monitor, //progress monitor
	inT32 word_count, //count of words in doc
	PAGE_RES *page_res);
	//// docqual.cpp ////////////////////////////////////////////////////////
	GARBAGE_LEVEL garbage_word(WERD_RES *word, BOOL8 ok_dict_word);
	BOOL8 potential_word_crunch(WERD_RES *word,
	GARBAGE_LEVEL garbage_level,
	BOOL8 ok_dict_word);
	void tilde_crunch(PAGE_RES_IT &page_res_it);
	void unrej_good_quality_words( //unreject potential
	PAGE_RES_IT &page_res_it);
	void doc_and_block_rejection( //reject big chunks
	PAGE_RES_IT &page_res_it,
	BOOL8 good_quality_doc);
	void quality_based_rejection(PAGE_RES_IT &page_res_it,
	BOOL8 good_quality_doc);
	void convert_bad_unlv_chs(WERD_RES *word_res);
	void merge_tess_fails(WERD_RES *word_res);
	void tilde_delete(PAGE_RES_IT &page_res_it);
	void insert_rej_cblobs(WERD_RES *word);
	//// pagewalk.cpp ///////////////////////////////////////////////////////
	void
	process_selected_words (
	BLOCK_LIST * block_list, //blocks to check
	//function to call
	TBOX & selection_box,
	BOOL8 (tesseract::Tesseract::*word_processor) (
	BLOCK *,
	ROW *,
	WERD *));
	//// tessbox.cpp ///////////////////////////////////////////////////////
	void tess_add_doc_word( //test acceptability
	WERD_CHOICE *word_choice //after context
	);
	void tess_adapter( //adapt to word
	WERD *word, //bln word
	DENORM *denorm, //de-normalise
	const WERD_CHOICE& choice, //string for word
	const WERD_CHOICE& raw_choice, //before context
	const char *rejmap //reject map
	);
	WERD_CHOICE *test_segment_pass2( //recog one word
	WERD *word, //bln word to do
	DENORM *denorm, //de-normaliser
	POLY_MATCHER matcher, //matcher function
	POLY_TESTER tester, //tester function
	//raw result
	WERD_CHOICE *&raw_choice,
	//list of blob lists
	BLOB_CHOICE_LIST_CLIST *blob_choices,
	WERD *&outword //bln word output
	);
	WERD_CHOICE *tess_segment_pass1( //recog one word
	WERD *word, //bln word to do
	DENORM *denorm, //de-normaliser
	POLY_MATCHER matcher, //matcher function
	//raw result
	WERD_CHOICE *&raw_choice,
	//list of blob lists
	BLOB_CHOICE_LIST_CLIST *blob_choices,
	WERD *&outword //bln word output
	);
	WERD_CHOICE *tess_segment_pass2( //recog one word
	WERD *word, //bln word to do
	DENORM *denorm, //de-normaliser
	POLY_MATCHER matcher, //matcher function
	//raw result
	WERD_CHOICE *&raw_choice,
	//list of blob lists
	BLOB_CHOICE_LIST_CLIST *blob_choices,
	WERD *&outword //bln word output
	);
	WERD_CHOICE *correct_segment_pass2( //recog one word
	WERD *word, //bln word to do
	DENORM *denorm, //de-normaliser
	POLY_MATCHER matcher, //matcher function
	POLY_TESTER tester, //tester function
	//raw result
	WERD_CHOICE *&raw_choice,
	//list of blob lists
	BLOB_CHOICE_LIST_CLIST *blob_choices,
	WERD *&outword //bln word output
	);
	void tess_default_matcher( //call tess
	PBLOB *pblob, //previous blob
	PBLOB *blob, //blob to match
	PBLOB *nblob, //next blob
	WERD *word, //word it came from
	DENORM *denorm, //de-normaliser
	BLOB_CHOICE_LIST *ratings, //list of results
	const char* script
	);
	void tess_bn_matcher( //call tess
	PBLOB *pblob, //previous blob
	PBLOB *blob, //blob to match
	PBLOB *nblob, //next blob
	WERD *word, //word it came from
	DENORM *denorm, //de-normaliser
	BLOB_CHOICE_LIST *ratings //list of results
	);
	void tess_cn_matcher( //call tess
	PBLOB *pblob, //previous blob
	PBLOB *blob, //blob to match
	PBLOB *nblob, //next blob
	WERD *word, //word it came from
	DENORM *denorm, //de-normaliser
	BLOB_CHOICE_LIST *ratings, //list of results
	// Sorted array of CP_RESULT_STRUCT from class pruner.
	CLASS_PRUNER_RESULTS cpresults
	);
	BOOL8 tess_adaptable_word( //test adaptability
	WERD *word, //word to test
	WERD_CHOICE *word_choice, //after context
	WERD_CHOICE *raw_choice //before context
	);
	BOOL8 tess_acceptable_word( //test acceptability
	WERD_CHOICE *word_choice, //after context
	WERD_CHOICE *raw_choice //before context
	);
	//// applybox.cpp //////////////////////////////////////////////////////
	void apply_box_testing(BLOCK_LIST *block_list);
	void apply_boxes(const STRING& fname,
	BLOCK_LIST *block_list //real blocks
	);
	// converts an array of boxes to a block list
	int Boxes2BlockList(int box_cnt, TBOX boxes, BLOCK_LIST block_list,
	bool right2left);
	//// blobcmp.cpp ///////////////////////////////////////////////////////
	float compare_tess_blobs(TBLOB *blob1,
	TEXTROW *row1,
	TBLOB *blob2,
	TEXTROW *row2);
	//// paircmp.cpp ///////////////////////////////////////////////////////
	float compare_bln_blobs( //match 2 blobs
	PBLOB *blob1, //first blob
	DENORM *denorm1,
	PBLOB *blob2, //other blob
	DENORM *denorm2);
	float compare_blobs( //match 2 blobs
	PBLOB *blob1, //first blob
	ROW *row1, //row it came from
	PBLOB *blob2, //other blob
	ROW *row2);
	BOOL8 compare_blob_pairs( //blob processor
	BLOCK *,
	ROW *row, //row it came from
	WERD *,
	PBLOB *blob //blob to compare
	);
	//// fixxht.cpp ///////////////////////////////////////////////////////
	void check_block_occ(WERD_RES *word_res);

	//// Data members ///////////////////////////////////////////////////////
	BOOL_VAR_H(tessedit_resegment_from_boxes, false,
	"Take segmentation and labeling from box file");
	BOOL_VAR_H(tessedit_train_from_boxes, false,
	"Generate training data from boxed chars");
	BOOL_VAR_H(tessedit_dump_pageseg_images, false,
	"Dump itermediate images made during page segmentation");
	INT_VAR_H(tessedit_pageseg_mode, 2,
	"Page seg mode: 0=auto, 1=col, 2=block, 3=line, 4=word, 6=char"
	" (Values from PageSegMode enum in baseapi.h)");
	INT_VAR_H(tessedit_accuracyvspeed, 0,
	"Accuracy V Speed tradeoff: 0 fastest, 100 most accurate"
	" (Values from AccuracyVSpeed enum in baseapi.h)");
	BOOL_VAR_H(tessedit_train_from_boxes_word_level, false,
	"Generate training data from boxed chars at word level.");
	STRING_VAR_H(tessedit_char_blacklist, "",
	"Blacklist of chars not to recognize");
	STRING_VAR_H(tessedit_char_whitelist, "",
	"Whitelist of chars to recognize");
	BOOL_VAR_H(global_tessedit_ambigs_training, false,
	"Perform training for ambiguities");
	//// ambigsrecog.cpp /////////////////////////////////////////////////////////
	FILE *init_ambigs_training(const STRING &fname);
	void ambigs_training_segmented(const STRING &fname,
	PAGE_RES *page_res,
	volatile ETEXT_DESC *monitor,
	FILE *output_file);
	void ambigs_classify_and_output(PAGE_RES_IT *page_res_it,
	const char *label,
	FILE *output_file);
	private:
	Pix* pix_binary_;
	FCOORD deskew_;
	FCOORD reskew_;
	bool hindi_image_;
	};

	} // namespace tesseract


	#endif // TESSERACT_CCMAIN_TESSERACTCLASS_H__