| /********************************************************************** |
| * File: tessbox.cpp (Formerly tessbox.c) |
| * Description: Black boxed Tess for developing a resaljet. |
| * Author: Ray Smith |
| * Created: Thu Apr 23 11:03:36 BST 1992 |
| * |
| * (C) Copyright 1992, Hewlett-Packard Ltd. |
| ** Licensed under the Apache License, Version 2.0 (the "License"); |
| ** you may not use this file except in compliance with the License. |
| ** You may obtain a copy of the License at |
| ** http://www.apache.org/licenses/LICENSE-2.0 |
| ** Unless required by applicable law or agreed to in writing, software |
| ** distributed under the License is distributed on an "AS IS" BASIS, |
| ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| ** See the License for the specific language governing permissions and |
| ** limitations under the License. |
| * |
| **********************************************************************/ |
| |
| #include "mfcpch.h" |
| #include "tfacep.h" |
| #include "tfacepp.h" |
| #include "tessbox.h" |
| #include "mfoutline.h" |
| #include "tesseractclass.h" |
| |
| #define EXTERN |
| |
| /********************************************************************** |
| * tess_segment_pass1 |
| * |
| * Segment a word using the pass1 conditions of the tess segmenter. |
| **********************************************************************/ |
| |
| namespace tesseract { |
| WERD_CHOICE *Tesseract::tess_segment_pass1( //recog one word |
| WERD *word, //bln word to do |
| DENORM *denorm, //de-normaliser |
| //matcher function |
| POLY_MATCHER matcher, |
| //raw result |
| WERD_CHOICE *&raw_choice, |
| //list of blob lists |
| BLOB_CHOICE_LIST_CLIST *blob_choices, |
| WERD *&outword //bln word output |
| ) { |
| WERD_CHOICE *result; //return value |
| int saved_enable_assoc = 0; |
| int saved_chop_enable = 0; |
| |
| if (word->flag (W_DONT_CHOP)) { |
| saved_enable_assoc = wordrec_enable_assoc; |
| saved_chop_enable = chop_enable; |
| wordrec_enable_assoc.set_value(0); |
| chop_enable.set_value(0); |
| if (word->flag (W_REP_CHAR)) |
| permute_only_top = 1; |
| } |
| set_pass1(); |
| // tprintf("pass1 chop on=%d, seg=%d, onlytop=%d",chop_enable,enable_assoc,permute_only_top); |
| result = recog_word (word, denorm, matcher, NULL, NULL, FALSE, |
| raw_choice, blob_choices, outword); |
| if (word->flag (W_DONT_CHOP)) { |
| wordrec_enable_assoc.set_value(saved_enable_assoc); |
| chop_enable.set_value(saved_chop_enable); |
| permute_only_top = 0; |
| } |
| return result; |
| } |
| |
| |
| /********************************************************************** |
| * tess_segment_pass2 |
| * |
| * Segment a word using the pass2 conditions of the tess segmenter. |
| **********************************************************************/ |
| |
| WERD_CHOICE *Tesseract::tess_segment_pass2( //recog one word |
| WERD *word, //bln word to do |
| DENORM *denorm, //de-normaliser |
| //matcher function |
| POLY_MATCHER matcher, |
| //raw result |
| WERD_CHOICE *&raw_choice, |
| //list of blob lists |
| BLOB_CHOICE_LIST_CLIST *blob_choices, |
| WERD *&outword //bln word output |
| ) { |
| WERD_CHOICE *result; //return value |
| int saved_enable_assoc = 0; |
| int saved_chop_enable = 0; |
| |
| if (word->flag (W_DONT_CHOP)) { |
| saved_enable_assoc = wordrec_enable_assoc; |
| saved_chop_enable = chop_enable; |
| wordrec_enable_assoc.set_value(0); |
| chop_enable.set_value(0); |
| if (word->flag (W_REP_CHAR)) |
| permute_only_top = 1; |
| } |
| set_pass2(); |
| result = recog_word (word, denorm, matcher, NULL, NULL, FALSE, |
| raw_choice, blob_choices, outword); |
| if (word->flag (W_DONT_CHOP)) { |
| wordrec_enable_assoc.set_value(saved_enable_assoc); |
| chop_enable.set_value(saved_chop_enable); |
| permute_only_top = 0; |
| } |
| return result; |
| } |
| |
| |
| /********************************************************************** |
| * correct_segment_pass2 |
| * |
| * Segment a word correctly using the pass2 conditions of the tess segmenter. |
| * Then call the tester with all the correctly segmented blobs. |
| * If the correct segmentation cannot be found, the tester is called |
| * with the segmentation found by tess and all the correct flags set to |
| * false and all strings are NULL. |
| **********************************************************************/ |
| |
| WERD_CHOICE *Tesseract::correct_segment_pass2( //recog one word |
| WERD *word, //bln word to do |
| //de-normaliser |
| DENORM *denorm, |
| //matcher function |
| POLY_MATCHER matcher, |
| //tester function |
| POLY_TESTER tester, |
| //raw result |
| WERD_CHOICE *&raw_choice, |
| //list of blob lists |
| BLOB_CHOICE_LIST_CLIST *blob_choices, |
| //bln word output |
| WERD *&outword |
| ) { |
| set_pass2(); |
| return recog_word (word, denorm, matcher, NULL, tester, TRUE, |
| raw_choice, blob_choices, outword); |
| } |
| |
| |
| /********************************************************************** |
| * test_segment_pass2 |
| * |
| * Segment a word correctly using the pass2 conditions of the tess segmenter. |
| * Then call the tester on all words used by tess in its search. |
| * Do this only on words where the correct segmentation could be found. |
| **********************************************************************/ |
| WERD_CHOICE *Tesseract::test_segment_pass2( //recog one word |
| WERD *word, //bln word to do |
| //de-normaliser |
| DENORM *denorm, |
| //matcher function |
| POLY_MATCHER matcher, |
| //tester function |
| POLY_TESTER tester, |
| //raw result |
| WERD_CHOICE *&raw_choice, |
| //list of blob lists |
| BLOB_CHOICE_LIST_CLIST *blob_choices, |
| //bln word output |
| WERD *&outword |
| ) { |
| set_pass2(); |
| return recog_word (word, denorm, matcher, tester, NULL, TRUE, |
| raw_choice, blob_choices, outword); |
| } |
| |
| |
| /********************************************************************** |
| * tess_acceptable_word |
| * |
| * Return true if the word is regarded as "good enough". |
| **********************************************************************/ |
| BOOL8 Tesseract::tess_acceptable_word( |
| WERD_CHOICE *word_choice, // after context |
| WERD_CHOICE *raw_choice) { // before context |
| return getDict().AcceptableResult(*word_choice, *raw_choice); |
| } |
| |
| |
| /********************************************************************** |
| * tess_adaptable_word |
| * |
| * Return true if the word is regarded as "good enough". |
| **********************************************************************/ |
| BOOL8 Tesseract::tess_adaptable_word( // test adaptability |
| WERD *word, // word to test |
| WERD_CHOICE *best_choice, // after context |
| WERD_CHOICE *raw_choice // before context |
| ) { |
| TWERD *tessword = make_tess_word(word, NULL); |
| int result = (tessword && best_choice && raw_choice && |
| AdaptableWord(tessword, *best_choice, *raw_choice)); |
| delete_word(tessword); |
| return result != 0; |
| } |
| |
| |
| /********************************************************************** |
| * tess_cn_matcher |
| * |
| * Match a blob using the Tess Char Normalized (non-adaptive) matcher |
| * only. |
| **********************************************************************/ |
| |
| void Tesseract::tess_cn_matcher( //call tess |
| PBLOB *pblob, //previous blob |
| PBLOB *blob, //blob to match |
| PBLOB *nblob, //next blob |
| WERD *word, //word it came from |
| DENORM *denorm, //de-normaliser |
| BLOB_CHOICE_LIST *ratings, //list of results |
| CLASS_PRUNER_RESULTS cpresults // may be null. |
| ) { |
| TBLOB *tessblob; //converted blob |
| TEXTROW tessrow; //dummy row |
| |
| tess_cn_matching.set_value(true); //turn it on |
| tess_bn_matching.set_value(false); |
| //convert blob |
| tessblob = make_rotated_tess_blob(denorm, blob, true); |
| //make dummy row |
| make_tess_row(denorm, &tessrow); |
| //classify |
| AdaptiveClassifier(tessblob, NULL, &tessrow, ratings, cpresults); |
| free_blob(tessblob); |
| } |
| |
| |
| /********************************************************************** |
| * tess_bn_matcher |
| * |
| * Match a blob using the Tess Baseline Normalized (adaptive) matcher |
| * only. |
| **********************************************************************/ |
| |
| void Tesseract::tess_bn_matcher( //call tess |
| PBLOB *pblob, //previous blob |
| PBLOB *blob, //blob to match |
| PBLOB *nblob, //next blob |
| WERD *word, //word it came from |
| DENORM *denorm, //de-normaliser |
| BLOB_CHOICE_LIST *ratings //list of results |
| ) { |
| TBLOB *tessblob; //converted blob |
| TEXTROW tessrow; //dummy row |
| |
| tess_bn_matching.set_value(true); //turn it on |
| tess_cn_matching.set_value(false); |
| //convert blob |
| tessblob = make_rotated_tess_blob(denorm, blob, true); |
| //make dummy row |
| make_tess_row(denorm, &tessrow); |
| //classify |
| AdaptiveClassifier(tessblob, NULL, &tessrow, ratings, NULL); |
| free_blob(tessblob); |
| } |
| |
| |
| /********************************************************************** |
| * tess_default_matcher |
| * |
| * Match a blob using the default functionality of the Tess matcher. |
| **********************************************************************/ |
| |
| void Tesseract::tess_default_matcher( //call tess |
| PBLOB *pblob, //previous blob |
| PBLOB *blob, //blob to match |
| PBLOB *nblob, //next blob |
| WERD *word, //word it came from |
| DENORM *denorm, //de-normaliser |
| //list of results |
| BLOB_CHOICE_LIST *ratings, |
| const char* script |
| ) { |
| assert(ratings != NULL); |
| TBLOB *tessblob; //converted blob |
| TEXTROW tessrow; //dummy row |
| |
| tess_bn_matching.set_value(false); //turn it off |
| tess_cn_matching.set_value(false); |
| //convert blob |
| tessblob = make_rotated_tess_blob(denorm, blob, true); |
| //make dummy row |
| make_tess_row(denorm, &tessrow); |
| //classify |
| AdaptiveClassifier (tessblob, NULL, &tessrow, ratings, NULL); |
| free_blob(tessblob); |
| } |
| } // namespace tesseract |
| |
| |
| /********************************************************************** |
| * tess_training_tester |
| * |
| * Matcher tester function which actually trains tess. |
| **********************************************************************/ |
| |
| void tess_training_tester( //call tess |
| const STRING& filename, //filename to output |
| PBLOB *blob, //blob to match |
| DENORM *denorm, //de-normaliser |
| BOOL8 correct, //ly segmented |
| char *text, //correct text |
| inT32 count, //chars in text |
| BLOB_CHOICE_LIST *ratings //list of results |
| ) { |
| TBLOB *tessblob; //converted blob |
| TEXTROW tessrow; //dummy row |
| |
| if (correct) { |
| classify_norm_method.set_value(character); // force char norm spc 30/11/93 |
| tess_bn_matching.set_value(false); //turn it off |
| tess_cn_matching.set_value(false); |
| //convert blob |
| tessblob = make_tess_blob (blob, TRUE); |
| //make dummy row |
| make_tess_row(denorm, &tessrow); |
| //learn it |
| LearnBlob(filename, tessblob, &tessrow, text); |
| free_blob(tessblob); |
| } |
| } |
| |
| |
| /********************************************************************** |
| * tess_adapter |
| * |
| * Adapt to the word using the Tesseract mechanism. |
| **********************************************************************/ |
| |
| namespace tesseract { |
| void Tesseract::tess_adapter( //adapt to word |
| WERD *word, //bln word |
| DENORM *denorm, //de-normalise |
| const WERD_CHOICE& choice, //string for word |
| const WERD_CHOICE& raw_choice, //before context |
| const char *rejmap //reject map |
| ) { |
| TWERD *tessword; //converted word |
| static TEXTROW tessrow; //dummy row |
| |
| //make dummy row |
| make_tess_row(denorm, &tessrow); |
| //make a word |
| tessword = make_tess_word (word, &tessrow); |
| AdaptToWord(tessword, &tessrow, choice, raw_choice, rejmap); |
| //adapt to it |
| delete_word(tessword); //free it |
| } |
| |
| |
| /********************************************************************** |
| * tess_add_doc_word |
| * |
| * Add the given word to the document dictionary |
| **********************************************************************/ |
| void Tesseract::tess_add_doc_word(WERD_CHOICE *word_choice) { |
| getDict().add_document_word(*word_choice); |
| } |
| } // namespace tesseract |