blob: 99eb1cad879347fd88e0fa18ffd6bfca0a6fa72c [file] [log] [blame]
// Copyright 2008 Google Inc. All Rights Reserved.
#ifndef THIRD_PARTY_TESSERACT_DICT_WORDFST_H_
#define THIRD_PARTY_TESSERACT_DICT_WORDFST_H_
#include <string>
#include <map>
#include <list>
#include "nlp/fst/lib/fstlib-inl.h"
#include "third_party/libidn/stringprep.h"
#include "util/utf8/unicodeprops.h"
#include "third_party/icu/current/unicode/uchar.h"
#include "i18n/utf8/char_properties.h"
// A string of unicode (utf32/ucs4) characters.
typedef basic_string<uint32> String;
// A Comparison functor for stl maps.
struct ltstr {
bool operator()(const String& s1, const String& s2) const {
return s1 < s2;
}
};
enum ECharType {
LOWER_CASE,
UPPER_CASE,
DIGIT,
END_SENTENCE,
START_SENTENCE,
OPEN_EXPR,
CLOSE_EXPR,
OPEN_QUOTE,
CLOSE_QUOTE,
CURRENCY,
DASH,
OTHER,
EMPTY,
UNKNOWN
};
using nlp_fst::StdVectorFst;
using nlp_fst::StdFst;
using nlp_fst::StdArc;
using nlp_fst::ArcIterator;
using nlp_fst::StateIterator;
using nlp_fst::kNoStateId;
using nlp_fst::StdOLabelCompare;
using nlp_fst::StdILabelCompare;
using nlp_fst::kNoStateId;
class WordFst {
public :
WordFst();
~WordFst();
bool AddWord(const String& word);
void Output(FILE* filename);
void LoadFromFile(FILE* filename);
bool Matches(const String& word);
StdVectorFst* fst();
void SetFst(StdVectorFst);
void WordsMatched(list<String>* results,
String prefix,
int current_node);
private :
unsigned int next_node_id_;
nlp_fst::StdVectorFst fst_;
};
bool CompareWordsFsts(WordFst *a, WordFst *b);
bool MyCompareWordsFsts(WordFst *a, WordFst *b, list<String>*, int);
#endif // THIRD_PARTY_TESSERACT_DICT_WORDFST_H_