| /********************************************************************** |
| * File: ratngs.cpp (Formerly ratings.c) |
| * Description: Code to manipulate the BLOB_CHOICE and WERD_CHOICE classes. |
| * Author: Ray Smith |
| * Created: Thu Apr 23 13:23:29 BST 1992 |
| * |
| * (C) Copyright 1992, Hewlett-Packard Ltd. |
| ** Licensed under the Apache License, Version 2.0 (the "License"); |
| ** you may not use this file except in compliance with the License. |
| ** You may obtain a copy of the License at |
| ** http://www.apache.org/licenses/LICENSE-2.0 |
| ** Unless required by applicable law or agreed to in writing, software |
| ** distributed under the License is distributed on an "AS IS" BASIS, |
| ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| ** See the License for the specific language governing permissions and |
| ** limitations under the License. |
| * |
| **********************************************************************/ |
| |
| #include "ratngs.h" |
| |
| #include "callcpp.h" |
| #include "genericvector.h" |
| #include "mfcpch.h" |
| #include "unicharset.h" |
| |
| extern FILE *matcher_fp; |
| |
| ELISTIZE (BLOB_CHOICE) CLISTIZE (BLOB_CHOICE_LIST) CLISTIZE (WERD_CHOICE) |
| //extern FILE* matcher_fp; |
| |
| /********************************************************************** |
| * BLOB_CHOICE::BLOB_CHOICE |
| * |
| * Constructor to build a BLOB_CHOICE from a char, rating and certainty. |
| **********************************************************************/ |
| BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id |
| float src_rating, // rating |
| float src_cert, // certainty |
| inT8 src_config, // config (font) |
| int src_script_id // script |
| ) { |
| unichar_id_ = src_unichar_id; |
| rating_ = src_rating; |
| certainty_ = src_cert; |
| config_ = src_config; |
| script_id_ = src_script_id; |
| } |
| |
| /********************************************************************** |
| * BLOB_CHOICE::BLOB_CHOICE |
| * |
| * Constructor to build a BLOB_CHOICE from another BLOB_CHOICE. |
| **********************************************************************/ |
| BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) { |
| unichar_id_ = other.unichar_id(); |
| rating_ = other.rating(); |
| certainty_ = other.certainty(); |
| config_ = other.config(); |
| script_id_ = other.script_id(); |
| } |
| |
| /********************************************************************** |
| * WERD_CHOICE::WERD_CHOICE |
| * |
| * Constructor to build a WERD_CHOICE from the given string, |
| * fragment lengths, rating, certainty and permuter. |
| * |
| * The function assumes that src_string is not NULL. |
| * src_lengths argument could be NULL, in which case the unichars |
| * in src_string are assumed to all be of length 1. |
| **********************************************************************/ |
| WERD_CHOICE::WERD_CHOICE( |
| const char *src_string, |
| const char *src_lengths, |
| float src_rating, |
| float src_certainty, |
| uinT8 src_permuter, |
| const UNICHARSET ¤t_unicharset) { |
| int src_string_len = strlen(src_string); |
| if (src_string_len == 0) { |
| this->init(8); |
| } else { |
| this->init(src_lengths ? strlen(src_lengths): src_string_len); |
| length_ = reserved_; |
| int offset = 0; |
| for (int i = 0; i < length_; ++i) { |
| int unichar_length = src_lengths ? src_lengths[i] : 1; |
| unichar_ids_[i] = |
| current_unicharset.unichar_to_id(src_string+offset, unichar_length); |
| fragment_lengths_[i] = 1; |
| offset += unichar_length; |
| } |
| } |
| rating_ = src_rating; |
| certainty_ = src_certainty; |
| permuter_ = src_permuter; |
| } |
| |
| /********************************************************************** |
| * WERD_CHOICE::~WERD_CHOICE |
| **********************************************************************/ |
| WERD_CHOICE::~WERD_CHOICE() { |
| delete[] unichar_ids_; |
| delete[] fragment_lengths_; |
| delete_blob_choices(); |
| } |
| |
| |
| /********************************************************************** |
| * WERD_CHOICE::set_blob_choices |
| * |
| * Delete current blob_choices. Set the blob_choices to the given new |
| * list. |
| **********************************************************************/ |
| void WERD_CHOICE::set_blob_choices(BLOB_CHOICE_LIST_CLIST *blob_choices) { |
| if (blob_choices_ != blob_choices) { |
| delete_blob_choices(); |
| blob_choices = blob_choices; |
| } |
| } |
| |
| |
| /********************************************************************** |
| * contains_unichar_id |
| * |
| * Returns true if unichar_ids_ contain the given unichar_id, false otherwise. |
| ************************************************************************/ |
| bool WERD_CHOICE::contains_unichar_id(UNICHAR_ID unichar_id) const { |
| for (int i = 0; i < length_; ++i) { |
| if (unichar_ids_[i] == unichar_id) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /********************************************************************** |
| * remove_unichar |
| * |
| * Removes unichar id at the given index from unichar_ids_ and updates |
| * length_, unichar_ids_ and fragment_lengths_ to reflect this change. |
| * Note: this function does not modify rating_ and certainty_. |
| ***********************************************************************/ |
| void WERD_CHOICE::remove_unichar_id(int index) { |
| ASSERT_HOST(index < length_); |
| for (int i = index; i < length_-1; ++i) { |
| unichar_ids_[i] = unichar_ids_[i+1]; |
| fragment_lengths_[i] = fragment_lengths_[i+1]; |
| } |
| length_--; |
| } |
| |
| /********************************************************************** |
| * string_and_lengths |
| * |
| * Populates the given word_str with unichars from unichar_ids and |
| * and word_lengths_str with the corresponding unichar lengths. |
| * Uses current_unicharset to make unichar id -> unichar conversions. |
| **********************************************************************/ |
| void WERD_CHOICE::string_and_lengths(const UNICHARSET ¤t_unicharset, |
| STRING *word_str, |
| STRING *word_lengths_str) const { |
| *word_str = ""; |
| *word_lengths_str = ""; |
| for (int i = 0; i < length_; ++i) { |
| const char *ch = current_unicharset.id_to_unichar(unichar_ids_[i]); |
| *word_str += ch; |
| if (word_lengths_str != NULL) { |
| *word_lengths_str += strlen(ch); |
| } |
| } |
| } |
| |
| /********************************************************************** |
| * append_unichar_id |
| * |
| * Make sure there is enough space in the word for the new unichar id |
| * and call append_unichar_id_space_allocated(). |
| **********************************************************************/ |
| void WERD_CHOICE::append_unichar_id( |
| UNICHAR_ID unichar_id, char fragment_length, |
| float rating, float certainty) { |
| if (length_ == reserved_) { |
| this->double_the_size(); |
| } |
| this->append_unichar_id_space_allocated(unichar_id, fragment_length, |
| rating, certainty); |
| } |
| |
| /********************************************************************** |
| * WERD_CHOICE::operator+= |
| * |
| * Cat a second word rating on the end of this current one. |
| * The ratings are added and the confidence is the min. |
| * If the permuters are NOT the same the permuter is set to COMPOUND_PERM |
| **********************************************************************/ |
| WERD_CHOICE & WERD_CHOICE::operator+= (const WERD_CHOICE & second) { |
| // TODO(daria): find out why the choice was cleared this way if any |
| // of the pieces are empty. Add the description of this behavior |
| // to the comments. |
| // if (word_string.length () == 0 || second.word_string.length () == 0) { |
| // word_string = NULL; //make it empty |
| // word_lengths = NULL; |
| // delete_blob_choices(); |
| // } else { |
| while (reserved_ < length_ + second.length()) { |
| this->double_the_size(); |
| } |
| const UNICHAR_ID *other_unichar_ids = second.unichar_ids(); |
| const char *other_fragment_lengths = second.fragment_lengths(); |
| for (int i = 0; i < second.length(); ++i) { |
| unichar_ids_[length_ + i] = other_unichar_ids[i]; |
| fragment_lengths_[length_ + i] = other_fragment_lengths[i]; |
| } |
| length_ += second.length(); |
| rating_ += second.rating(); // add ratings |
| if (second.certainty() < certainty_) // take min |
| certainty_ = second.certainty(); |
| if (permuter_ == NO_PERM) { |
| permuter_ = second.permuter(); |
| } else if (second.permuter() != NO_PERM && |
| second.permuter() != permuter_) { |
| permuter_ = COMPOUND_PERM; |
| } |
| unichar_string_ += second.unichar_string(); |
| unichar_lengths_ += second.unichar_lengths(); |
| |
| // Append a deep copy of second blob_choices if it exists. |
| if (second.blob_choices_ != NULL) { |
| if (this->blob_choices_ == NULL) |
| this->blob_choices_ = new BLOB_CHOICE_LIST_CLIST; |
| |
| BLOB_CHOICE_LIST_C_IT this_blob_choices_it; |
| BLOB_CHOICE_LIST_C_IT second_blob_choices_it; |
| |
| this_blob_choices_it.set_to_list(this->blob_choices_); |
| this_blob_choices_it.move_to_last(); |
| |
| second_blob_choices_it.set_to_list(second.blob_choices_); |
| |
| for (second_blob_choices_it.mark_cycle_pt(); |
| !second_blob_choices_it.cycled_list(); |
| second_blob_choices_it.forward()) { |
| |
| BLOB_CHOICE_LIST* blob_choices_copy = new BLOB_CHOICE_LIST(); |
| blob_choices_copy->deep_copy(second_blob_choices_it.data()); |
| |
| this_blob_choices_it.add_after_then_move(blob_choices_copy); |
| } |
| } |
| return *this; |
| } |
| |
| |
| /********************************************************************** |
| * WERD_CHOICE::operator= |
| * |
| * Allocate enough memory to hold a copy of source and copy over |
| * all the information from source to this WERD_CHOICE. |
| **********************************************************************/ |
| WERD_CHOICE& WERD_CHOICE::operator=(const WERD_CHOICE& source) { |
| while (reserved_ < source.length()) { |
| this->double_the_size(); |
| } |
| |
| const UNICHAR_ID *other_unichar_ids = source.unichar_ids(); |
| const char *other_fragment_lengths = source.fragment_lengths(); |
| for (int i = 0; i < source.length(); ++i) { |
| unichar_ids_[i] = other_unichar_ids[i]; |
| fragment_lengths_[i] = other_fragment_lengths[i]; |
| } |
| length_ = source.length(); |
| rating_ = source.rating(); |
| certainty_ = source.certainty(); |
| permuter_ = source.permuter(); |
| fragment_mark_ = source.fragment_mark(); |
| unichar_string_ = source.unichar_string(); |
| unichar_lengths_ = source.unichar_lengths(); |
| |
| // Delete existing blob_choices |
| this->delete_blob_choices(); |
| |
| // Deep copy blob_choices of source |
| if (source.blob_choices_ != NULL) { |
| BLOB_CHOICE_LIST_C_IT this_blob_choices_it; |
| BLOB_CHOICE_LIST_C_IT source_blob_choices_it; |
| |
| this->blob_choices_ = new BLOB_CHOICE_LIST_CLIST(); |
| |
| this_blob_choices_it.set_to_list(this->blob_choices_); |
| source_blob_choices_it.set_to_list(source.blob_choices_); |
| |
| for (source_blob_choices_it.mark_cycle_pt(); |
| !source_blob_choices_it.cycled_list(); |
| source_blob_choices_it.forward()) { |
| |
| BLOB_CHOICE_LIST* blob_choices_copy = new BLOB_CHOICE_LIST(); |
| blob_choices_copy->deep_copy(source_blob_choices_it.data()); |
| |
| this_blob_choices_it.add_after_then_move(blob_choices_copy); |
| } |
| } |
| return *this; |
| } |
| |
| /********************************************************************** |
| * WERD_CHOICE::delete_blob_choices |
| * |
| * Clear the blob_choices list, delete it and set it to NULL. |
| **********************************************************************/ |
| void WERD_CHOICE::delete_blob_choices() { |
| if (blob_choices_ != NULL) { |
| blob_choices_->deep_clear(); |
| delete blob_choices_; |
| blob_choices_ = NULL; |
| } |
| } |
| |
| /********************************************************************** |
| * WERD_CHOICE::print |
| * |
| * Print WERD_CHOICE to stdout. |
| **********************************************************************/ |
| const void WERD_CHOICE::print() const { |
| tprintf("WERD_CHOICE:\n"); |
| tprintf("length_ %d reserved_ %d permuter_ %d\n", |
| length_, reserved_, permuter_); |
| tprintf("rating_ %.4f certainty_ %.4f", rating_, certainty_); |
| if (fragment_mark_) { |
| tprintf(" fragment_mark_ true"); |
| } |
| tprintf("\n"); |
| if (unichar_string_.length() > 0) { |
| tprintf("unichar_string_ %s unichar_lengths_ %s\n", |
| unichar_string_.string(), unichar_lengths_.string()); |
| } |
| tprintf("unichar_ids: "); |
| int i; |
| for (i = 0; i < length_; ++i) { |
| tprintf("%d ", unichar_ids_[i]); |
| } |
| tprintf("\nfragment_lengths_: "); |
| for (i = 0; i < length_; ++i) { |
| tprintf("%d ", fragment_lengths_[i]); |
| } |
| tprintf("\n"); |
| fflush(stdout); |
| } |
| |
| /********************************************************************** |
| * print_ratings_list |
| * |
| * Send all the ratings out to the logfile. |
| **********************************************************************/ |
| void print_ratings_list( |
| const char *msg, // intro message |
| BLOB_CHOICE_LIST *ratings, // list of results |
| const UNICHARSET ¤t_unicharset // unicharset that can be used |
| // for id-to-unichar conversion |
| ) { |
| if (ratings->length() == 0) { |
| tprintf("%s:<none>\n", msg); |
| return; |
| } |
| if (*msg != '\0') { |
| tprintf("%s\n", msg); |
| } |
| BLOB_CHOICE_IT c_it; |
| c_it.set_to_list(ratings); |
| for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { |
| tprintf("r%.2f c%.2f : %d %s", |
| c_it.data()->rating(), c_it.data()->certainty(), |
| c_it.data()->unichar_id(), |
| current_unicharset.debug_str(c_it.data()->unichar_id()).string()); |
| if (!c_it.at_last()) { |
| tprintf("\n"); |
| } |
| } |
| tprintf("\n"); |
| fflush(stdout); |
| } |
| |
| /********************************************************************** |
| * print_ratings_list |
| * |
| * Print ratings list (unichar ids only). |
| **********************************************************************/ |
| void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings) { |
| if (ratings->length() == 0) { |
| tprintf("%s:<none>\n", msg); |
| return; |
| } |
| if (*msg != '\0') { |
| tprintf("%s\n", msg); |
| } |
| BLOB_CHOICE_IT c_it; |
| c_it.set_to_list(ratings); |
| for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { |
| tprintf("r%.2f c%.2f : %d", c_it.data()->rating(), |
| c_it.data()->certainty(), c_it.data()->unichar_id()); |
| if (!c_it.at_last()) { |
| tprintf("\n"); |
| } |
| } |
| tprintf("\n"); |
| fflush(stdout); |
| } |
| |
| /********************************************************************** |
| * print_ratings_info |
| * |
| * Send all the ratings out to the logfile. |
| **********************************************************************/ |
| void print_ratings_info( |
| FILE *fp, // file to use |
| BLOB_CHOICE_LIST *ratings, // list of results |
| const UNICHARSET ¤t_unicharset // unicharset that can be used |
| // for id-to-unichar conversion |
| ) { |
| inT32 index; // to list |
| inT32 best_index; // to list |
| FLOAT32 best_rat; // rating |
| FLOAT32 best_cert; // certainty |
| const char* first_char = NULL; // character |
| FLOAT32 first_rat; // rating |
| FLOAT32 first_cert; // certainty |
| const char* sec_char = NULL; // character |
| FLOAT32 sec_rat = 0.0f; // rating |
| FLOAT32 sec_cert = 0.0f; // certainty |
| BLOB_CHOICE_IT c_it = ratings; // iterator |
| |
| index = ratings->length(); |
| if (index > 0) { |
| first_char = current_unicharset.id_to_unichar(c_it.data()->unichar_id()); |
| first_rat = c_it.data()->rating(); |
| first_cert = -c_it.data()->certainty(); |
| if (index > 1) { |
| sec_char = current_unicharset.id_to_unichar( |
| c_it.data_relative(1)->unichar_id()); |
| sec_rat = c_it.data_relative(1)->rating(); |
| sec_cert = -c_it.data_relative(1)->certainty(); |
| } else { |
| sec_char = NULL; |
| sec_rat = -1; |
| sec_cert = -1; |
| } |
| } else { |
| first_char = NULL; |
| first_rat = -1; |
| first_cert = -1; |
| } |
| best_index = -1; |
| best_rat = -1; |
| best_cert = -1; |
| for (index = 0, c_it.mark_cycle_pt(); !c_it.cycled_list(); |
| c_it.forward(), index++) { |
| if (strcmp(current_unicharset.id_to_unichar(c_it.data()->unichar_id()), |
| blob_answer) == 0) { |
| best_index = index; |
| best_rat = c_it.data()->rating(); |
| best_cert = -c_it.data()->certainty(); |
| } |
| } |
| if (first_char != NULL && (*first_char == '\0' || *first_char == ' ')) |
| first_char = NULL; |
| if (sec_char != NULL && (*sec_char == '\0' || *sec_char == ' ')) |
| sec_char = NULL; |
| fprintf(matcher_fp, |
| " " INT32FORMAT " " INT32FORMAT " %g %g %s %g %g %s %g %g\n", |
| ratings->length(), best_index, best_rat, best_cert, |
| first_char != NULL ? first_char : "~", |
| first_rat, first_cert, sec_char != NULL ? sec_char : "~", |
| sec_rat, sec_cert); |
| } |