| // far.h |
| |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // Copyright 2005-2010 Google, Inc. |
| // Author: riley@google.com (Michael Riley) |
| // |
| // \file |
| // Finite-State Transducer (FST) archive classes. |
| // |
| |
| #ifndef FST_EXTENSIONS_FAR_FAR_H__ |
| #define FST_EXTENSIONS_FAR_FAR_H__ |
| |
| #include <fst/extensions/far/stlist.h> |
| #include <fst/extensions/far/sttable.h> |
| #include <fst/fst.h> |
| #include <fst/vector-fst.h> |
| |
| namespace fst { |
| |
| enum FarEntryType { FET_LINE, FET_FILE }; |
| enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 }; |
| |
| // FST archive header class |
| class FarHeader { |
| public: |
| const string &FarType() const { return fartype_; } |
| const string &ArcType() const { return arctype_; } |
| |
| bool Read(const string &filename) { |
| FstHeader fsthdr; |
| if (filename.empty()) { // Header reading unsupported on stdin. |
| return false; |
| } else if (IsSTTable(filename)) { // Check if STTable |
| ReadSTTableHeader(filename, &fsthdr); |
| fartype_ = "sttable"; |
| arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); |
| return true; |
| } else if (IsSTList(filename)) { // Check if STList |
| ReadSTListHeader(filename, &fsthdr); |
| fartype_ = "sttable"; |
| arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); |
| return true; |
| } |
| return false; |
| } |
| |
| private: |
| string fartype_; |
| string arctype_; |
| }; |
| |
| enum FarType { FAR_DEFAULT = 0, FAR_STTABLE = 1, FAR_STLIST = 2, |
| FAR_SSTABLE = 3 }; |
| |
| // This class creates an archive of FSTs. |
| template <class A> |
| class FarWriter { |
| public: |
| typedef A Arc; |
| |
| // Creates a new (empty) FST archive; returns NULL on error. |
| static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT); |
| |
| // Adds an FST to the end of an archive. Keys must be non-empty and |
| // in lexicographic order. FSTs must have a suitable write method. |
| virtual void Add(const string &key, const Fst<A> &fst) = 0; |
| |
| virtual FarType Type() const = 0; |
| |
| virtual bool Error() const = 0; |
| |
| virtual ~FarWriter() {} |
| |
| protected: |
| FarWriter() {} |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(FarWriter); |
| }; |
| |
| |
| // This class iterates through an existing archive of FSTs. |
| template <class A> |
| class FarReader { |
| public: |
| typedef A Arc; |
| |
| // Opens an existing FST archive in a single file; returns NULL on error. |
| // Sets current position to the beginning of the achive. |
| static FarReader *Open(const string &filename); |
| |
| // Opens an existing FST archive in multiple files; returns NULL on error. |
| // Sets current position to the beginning of the achive. |
| static FarReader *Open(const vector<string> &filenames); |
| |
| // Resets current posision to beginning of archive. |
| virtual void Reset() = 0; |
| |
| // Sets current position to first entry >= key. Returns true if a match. |
| virtual bool Find(const string &key) = 0; |
| |
| // Current position at end of archive? |
| virtual bool Done() const = 0; |
| |
| // Move current position to next FST. |
| virtual void Next() = 0; |
| |
| // Returns key at the current position. This reference is invalidated if |
| // the current position in the archive is changed. |
| virtual const string &GetKey() const = 0; |
| |
| // Returns FST at the current position. This reference is invalidated if |
| // the current position in the archive is changed. |
| virtual const Fst<A> &GetFst() const = 0; |
| |
| virtual FarType Type() const = 0; |
| |
| virtual bool Error() const = 0; |
| |
| virtual ~FarReader() {} |
| |
| protected: |
| FarReader() {} |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(FarReader); |
| }; |
| |
| |
| template <class A> |
| class FstWriter { |
| public: |
| void operator()(ostream &strm, const Fst<A> &fst) const { |
| fst.Write(strm, FstWriteOptions()); |
| } |
| }; |
| |
| |
| template <class A> |
| class STTableFarWriter : public FarWriter<A> { |
| public: |
| typedef A Arc; |
| |
| static STTableFarWriter *Create(const string filename) { |
| STTableWriter<Fst<A>, FstWriter<A> > *writer = |
| STTableWriter<Fst<A>, FstWriter<A> >::Create(filename); |
| return new STTableFarWriter(writer); |
| } |
| |
| void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); } |
| |
| FarType Type() const { return FAR_STTABLE; } |
| |
| bool Error() const { return writer_->Error(); } |
| |
| ~STTableFarWriter() { delete writer_; } |
| |
| private: |
| explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer) |
| : writer_(writer) {} |
| |
| private: |
| STTableWriter<Fst<A>, FstWriter<A> > *writer_; |
| |
| DISALLOW_COPY_AND_ASSIGN(STTableFarWriter); |
| }; |
| |
| |
| template <class A> |
| class STListFarWriter : public FarWriter<A> { |
| public: |
| typedef A Arc; |
| |
| static STListFarWriter *Create(const string filename) { |
| STListWriter<Fst<A>, FstWriter<A> > *writer = |
| STListWriter<Fst<A>, FstWriter<A> >::Create(filename); |
| return new STListFarWriter(writer); |
| } |
| |
| void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); } |
| |
| FarType Type() const { return FAR_STLIST; } |
| |
| bool Error() const { return writer_->Error(); } |
| |
| ~STListFarWriter() { delete writer_; } |
| |
| private: |
| explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer) |
| : writer_(writer) {} |
| |
| private: |
| STListWriter<Fst<A>, FstWriter<A> > *writer_; |
| |
| DISALLOW_COPY_AND_ASSIGN(STListFarWriter); |
| }; |
| |
| |
| template <class A> |
| FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) { |
| switch(type) { |
| case FAR_DEFAULT: |
| if (filename.empty()) |
| return STListFarWriter<A>::Create(filename); |
| case FAR_STTABLE: |
| return STTableFarWriter<A>::Create(filename); |
| break; |
| case FAR_STLIST: |
| return STListFarWriter<A>::Create(filename); |
| break; |
| default: |
| LOG(ERROR) << "FarWriter::Create: unknown far type"; |
| return 0; |
| } |
| } |
| |
| |
| template <class A> |
| class FstReader { |
| public: |
| Fst<A> *operator()(istream &strm) const { |
| return Fst<A>::Read(strm, FstReadOptions()); |
| } |
| }; |
| |
| |
| template <class A> |
| class STTableFarReader : public FarReader<A> { |
| public: |
| typedef A Arc; |
| |
| static STTableFarReader *Open(const string &filename) { |
| STTableReader<Fst<A>, FstReader<A> > *reader = |
| STTableReader<Fst<A>, FstReader<A> >::Open(filename); |
| // TODO: error check |
| return new STTableFarReader(reader); |
| } |
| |
| static STTableFarReader *Open(const vector<string> &filenames) { |
| STTableReader<Fst<A>, FstReader<A> > *reader = |
| STTableReader<Fst<A>, FstReader<A> >::Open(filenames); |
| // TODO: error check |
| return new STTableFarReader(reader); |
| } |
| |
| void Reset() { reader_->Reset(); } |
| |
| bool Find(const string &key) { return reader_->Find(key); } |
| |
| bool Done() const { return reader_->Done(); } |
| |
| void Next() { return reader_->Next(); } |
| |
| const string &GetKey() const { return reader_->GetKey(); } |
| |
| const Fst<A> &GetFst() const { return reader_->GetEntry(); } |
| |
| FarType Type() const { return FAR_STTABLE; } |
| |
| bool Error() const { return reader_->Error(); } |
| |
| ~STTableFarReader() { delete reader_; } |
| |
| private: |
| explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader) |
| : reader_(reader) {} |
| |
| private: |
| STTableReader<Fst<A>, FstReader<A> > *reader_; |
| |
| DISALLOW_COPY_AND_ASSIGN(STTableFarReader); |
| }; |
| |
| |
| template <class A> |
| class STListFarReader : public FarReader<A> { |
| public: |
| typedef A Arc; |
| |
| static STListFarReader *Open(const string &filename) { |
| STListReader<Fst<A>, FstReader<A> > *reader = |
| STListReader<Fst<A>, FstReader<A> >::Open(filename); |
| // TODO: error check |
| return new STListFarReader(reader); |
| } |
| |
| static STListFarReader *Open(const vector<string> &filenames) { |
| STListReader<Fst<A>, FstReader<A> > *reader = |
| STListReader<Fst<A>, FstReader<A> >::Open(filenames); |
| // TODO: error check |
| return new STListFarReader(reader); |
| } |
| |
| void Reset() { reader_->Reset(); } |
| |
| bool Find(const string &key) { return reader_->Find(key); } |
| |
| bool Done() const { return reader_->Done(); } |
| |
| void Next() { return reader_->Next(); } |
| |
| const string &GetKey() const { return reader_->GetKey(); } |
| |
| const Fst<A> &GetFst() const { return reader_->GetEntry(); } |
| |
| FarType Type() const { return FAR_STLIST; } |
| |
| bool Error() const { return reader_->Error(); } |
| |
| ~STListFarReader() { delete reader_; } |
| |
| private: |
| explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader) |
| : reader_(reader) {} |
| |
| private: |
| STListReader<Fst<A>, FstReader<A> > *reader_; |
| |
| DISALLOW_COPY_AND_ASSIGN(STListFarReader); |
| }; |
| |
| |
| template <class A> |
| FarReader<A> *FarReader<A>::Open(const string &filename) { |
| if (filename.empty()) |
| return STListFarReader<A>::Open(filename); |
| else if (IsSTTable(filename)) |
| return STTableFarReader<A>::Open(filename); |
| else if (IsSTList(filename)) |
| return STListFarReader<A>::Open(filename); |
| return 0; |
| } |
| |
| |
| template <class A> |
| FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) { |
| if (!filenames.empty() && filenames[0].empty()) |
| return STListFarReader<A>::Open(filenames); |
| else if (!filenames.empty() && IsSTTable(filenames[0])) |
| return STTableFarReader<A>::Open(filenames); |
| else if (!filenames.empty() && IsSTList(filenames[0])) |
| return STListFarReader<A>::Open(filenames); |
| return 0; |
| } |
| |
| } // namespace fst |
| |
| #endif // FST_EXTENSIONS_FAR_FAR_H__ |