| /********************************************************************** |
| * File: ratngs.h (Formerly ratings.h) |
| * Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes. |
| * Author: Ray Smith |
| * Created: Thu Apr 23 11:40:38 BST 1992 |
| * |
| * (C) Copyright 1992, Hewlett-Packard Ltd. |
| ** Licensed under the Apache License, Version 2.0 (the "License"); |
| ** you may not use this file except in compliance with the License. |
| ** You may obtain a copy of the License at |
| ** http://www.apache.org/licenses/LICENSE-2.0 |
| ** Unless required by applicable law or agreed to in writing, software |
| ** distributed under the License is distributed on an "AS IS" BASIS, |
| ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| ** See the License for the specific language governing permissions and |
| ** limitations under the License. |
| * |
| **********************************************************************/ |
| |
| #ifndef RATNGS_H |
| #define RATNGS_H |
| |
| #include <assert.h> |
| |
| #include "clst.h" |
| #include "genericvector.h" |
| #include "notdll.h" |
| #include "unichar.h" |
| #include "unicharset.h" |
| #include "werd.h" |
| |
| class BLOB_CHOICE: public ELIST_LINK |
| { |
| public: |
| BLOB_CHOICE() { |
| unichar_id_ = INVALID_UNICHAR_ID; |
| config_ = '\0'; |
| rating_ = MAX_FLOAT32; |
| certainty_ = -MAX_FLOAT32; |
| script_id_ = -1; |
| } |
| BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id |
| float src_rating, // rating |
| float src_cert, // certainty |
| inT8 src_config, // config (font) |
| int script_id); // script |
| BLOB_CHOICE(const BLOB_CHOICE &other); |
| ~BLOB_CHOICE() {} |
| UNICHAR_ID unichar_id() const { |
| return unichar_id_; |
| } |
| float rating() const { |
| return rating_; |
| } |
| float certainty() const { |
| return certainty_; |
| } |
| inT8 config() const { |
| return config_; |
| } |
| int script_id() const { |
| return script_id_; |
| } |
| |
| void set_unichar_id(UNICHAR_ID newunichar_id) { |
| unichar_id_ = newunichar_id; |
| } |
| void set_rating(float newrat) { |
| rating_ = newrat; |
| } |
| void set_certainty(float newrat) { |
| certainty_ = newrat; |
| } |
| void set_config(inT8 newfont) { |
| config_ = newfont; |
| } |
| void set_script(int newscript_id) { |
| script_id_ = newscript_id; |
| } |
| |
| NEWDELETE private: |
| UNICHAR_ID unichar_id_; // unichar id |
| char config_; // char config (font) |
| inT16 junk2_; |
| float rating_; // size related |
| float certainty_; // absolute |
| int script_id_; |
| }; |
| |
| // Make BLOB_CHOICE listable. |
| ELISTIZEH (BLOB_CHOICE) CLISTIZEH (BLOB_CHOICE_LIST) |
| /* permuter codes used in WERD_CHOICEs */ |
| # |
| #define MIN_PERM 1 |
| #define NO_PERM 0 |
| #define TOP_CHOICE_PERM 1 |
| #define LOWER_CASE_PERM 2 |
| #define UPPER_CASE_PERM 3 |
| #define NUMBER_PERM 4 |
| #define SYSTEM_DAWG_PERM 5 |
| #define DOC_DAWG_PERM 6 |
| #define USER_DAWG_PERM 7 |
| #define FREQ_DAWG_PERM 8 |
| #define COMPOUND_PERM 9 |
| #define MAX_PERM 9 |
| |
| class WERD_CHOICE { |
| public: |
| WERD_CHOICE() { this->init(8); } |
| WERD_CHOICE(int reserved) { this->init(reserved); } |
| WERD_CHOICE(const char *src_string, |
| const char *src_lengths, |
| float src_rating, |
| float src_certainty, |
| uinT8 src_permuter, |
| const UNICHARSET ¤t_unicharset); |
| WERD_CHOICE(const WERD_CHOICE &word) { |
| this->init(word.length()); |
| this->operator=(word); |
| } |
| ~WERD_CHOICE(); |
| |
| inline int length() const { |
| return length_; |
| } |
| inline const UNICHAR_ID *unichar_ids() const { |
| return unichar_ids_; |
| } |
| inline const UNICHAR_ID unichar_id(int index) const { |
| assert(index < length_); |
| return unichar_ids_[index]; |
| } |
| inline const char *fragment_lengths() const { |
| return fragment_lengths_; |
| } |
| inline const char fragment_length(int index) const { |
| assert(index < length_); |
| return fragment_lengths_[index]; |
| } |
| inline float rating() const { |
| return rating_; |
| } |
| inline float certainty() const { |
| return certainty_; |
| } |
| inline uinT8 permuter() const { |
| return permuter_; |
| } |
| inline bool fragment_mark() const { |
| return fragment_mark_; |
| } |
| inline BLOB_CHOICE_LIST_CLIST* blob_choices() { |
| return blob_choices_; |
| } |
| inline void set_unichar_id(UNICHAR_ID unichar_id, int index) { |
| assert(index < length_); |
| unichar_ids_[index] = unichar_id; |
| } |
| inline void set_rating(float new_val) { |
| rating_ = new_val; |
| } |
| inline void set_certainty(float new_val) { |
| certainty_ = new_val; |
| } |
| inline void set_permuter(uinT8 perm) { |
| permuter_ = perm; |
| } |
| inline void set_fragment_mark(bool new_fragment_mark) { |
| fragment_mark_ = new_fragment_mark; |
| } |
| void set_blob_choices(BLOB_CHOICE_LIST_CLIST *blob_choices); |
| |
| // Make more space in unichar_id_ and fragment_lengths_ arrays. |
| inline void double_the_size() { |
| unichar_ids_ = GenericVector<UNICHAR_ID>::double_the_size_memcpy( |
| reserved_, unichar_ids_); |
| fragment_lengths_ = GenericVector<char>::double_the_size_memcpy( |
| reserved_, fragment_lengths_); |
| reserved_ *= 2; |
| } |
| |
| // Initializes WERD_CHOICE - reseves length slots in unichar_ids_ and |
| // fragment_length_ arrays. Sets other values to default (blank) values. |
| inline void init(int reserved) { |
| reserved_ = reserved; |
| unichar_ids_ = new UNICHAR_ID[reserved]; |
| fragment_lengths_ = new char[reserved]; |
| length_ = 0; |
| rating_ = 0.0; |
| certainty_ = MAX_FLOAT32; |
| permuter_ = NO_PERM; |
| fragment_mark_ = false; |
| blob_choices_ = NULL; |
| unichar_string_ = ""; |
| unichar_lengths_ = ""; |
| } |
| |
| // Set the fields in this choice to be default (bad) values. |
| inline void make_bad() { |
| length_ = 0; |
| rating_ = MAX_FLOAT32; |
| certainty_ = -MAX_FLOAT32; |
| fragment_mark_ = false; |
| unichar_string_ = ""; |
| unichar_lengths_ = ""; |
| } |
| |
| // This function assumes that there is enough space reserved |
| // in the WERD_CHOICE for adding another unichar. |
| // This is an efficient alternative to append_unichar_id(). |
| inline void append_unichar_id_space_allocated( |
| UNICHAR_ID unichar_id, char fragment_length, |
| float rating, float certainty) { |
| assert(reserved_ > length_); |
| length_++; |
| this->set_unichar_id(unichar_id, fragment_length, |
| rating, certainty, length_-1); |
| } |
| |
| void append_unichar_id(UNICHAR_ID unichar_id, char fragment_length, |
| float rating, float certainty); |
| |
| inline void set_unichar_id(UNICHAR_ID unichar_id, char fragment_length, |
| float rating, float certainty, int index) { |
| assert(index < length_); |
| unichar_ids_[index] = unichar_id; |
| fragment_lengths_[index] = fragment_length; |
| rating_ += rating; |
| if (certainty < certainty_) { |
| certainty_ = certainty; |
| } |
| } |
| |
| bool contains_unichar_id(UNICHAR_ID unichar_id) const; |
| void remove_unichar_id(int index); |
| void string_and_lengths(const UNICHARSET ¤t_unicharset, |
| STRING *word_str, STRING *word_lengths_str) const; |
| const STRING debug_string(const UNICHARSET ¤t_unicharset) const { |
| STRING word_str; |
| for (int i = 0; i < length_; ++i) { |
| word_str += current_unicharset.debug_str(unichar_ids_[i]); |
| word_str += " "; |
| } |
| return word_str; |
| } |
| // Since this function walks over the whole word to convert unichar ids |
| // to unichars, it is best to call it once, e.g. after all changes to |
| // unichar_ids_ in WERD_CHOICE are finished. |
| void populate_unichars(const UNICHARSET ¤t_unicharset) { |
| this->string_and_lengths(current_unicharset, &unichar_string_, |
| &unichar_lengths_); |
| } |
| // This function should only be called if populate_unichars() |
| // was called and WERD_CHOICE did not change since then. |
| const STRING &unichar_string() const { |
| assert(unichar_string_.length() <= 0 || |
| unichar_string_.length() >= length_); // sanity check |
| return unichar_string_; |
| } |
| // This function should only be called if populate_unichars() |
| // was called and WERD_CHOICE did not change since then. |
| const STRING &unichar_lengths() const { |
| assert(unichar_lengths_.length() <= 0 || |
| unichar_lengths_.length() == length_); // sanity check |
| return unichar_lengths_; |
| } |
| const void print() const; |
| |
| WERD_CHOICE& operator+= ( // concatanate |
| const WERD_CHOICE & second);// second on first |
| |
| WERD_CHOICE& operator= (const WERD_CHOICE& source); |
| |
| NEWDELETE private: |
| UNICHAR_ID *unichar_ids_; // unichar ids that represent the text of the word |
| char *fragment_lengths_; // number of fragments in each unichar |
| int reserved_; // size of the above arrays |
| int length_; // word length |
| float rating_; // size related |
| float certainty_; // absolute |
| uinT8 permuter_; // permuter code |
| bool fragment_mark_; // if true, indicates that this choice |
| // was chosen over a better one that |
| // contained a fragment |
| BLOB_CHOICE_LIST_CLIST *blob_choices_; // best choices for each blob |
| |
| // The following variables are only populated by calling populate_unichars(). |
| // They are not synchronized with the values in unichar_ids otherwise. |
| STRING unichar_string_; |
| STRING unichar_lengths_; |
| bool unichar_info_present; |
| |
| private: |
| void delete_blob_choices(); |
| }; |
| |
| // Make WERD_CHOICE listable. |
| ELISTIZEH (WERD_CHOICE) |
| void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings); |
| void print_ratings_list( |
| const char *msg, // intro message |
| BLOB_CHOICE_LIST *ratings, // list of results |
| const UNICHARSET ¤t_unicharset // unicharset that can be used |
| // for id-to-unichar conversion |
| ); |
| void print_ratings_info( |
| FILE *fp, // file to use |
| BLOB_CHOICE_LIST *ratings, // list of results |
| const UNICHARSET ¤t_unicharset // unicharset that can be used |
| // for id-to-unichar conversion |
| ); |
| |
| |
| typedef GenericVector<BLOB_CHOICE_LIST *> BLOB_CHOICE_LIST_VECTOR; |
| typedef GenericVector<WERD_CHOICE_LIST *> WERD_CHOICE_LIST_VECTOR; |
| |
| typedef void (*POLY_TESTER) |
| (const STRING&, PBLOB *, DENORM *, BOOL8, char *, inT32, |
| BLOB_CHOICE_LIST *); |
| |
| #endif |