| |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // Copyright 2005-2010 Google, Inc. |
| // Author: allauzen@google.com (Cyril Allauzen) |
| // |
| // \file |
| // A generic (string,type) list file format. |
| // |
| // This is a stripped-down version of STTable that does |
| // not support the Find() operation but that does support |
| // reading/writting from standard in/out. |
| |
| #ifndef FST_EXTENSIONS_FAR_STLIST_H_ |
| #define FST_EXTENSIONS_FAR_STLIST_H_ |
| |
| #include <iostream> |
| #include <fstream> |
| #include <fst/util.h> |
| |
| #include <algorithm> |
| #include <functional> |
| #include <queue> |
| #include <string> |
| #include <utility> |
| using std::pair; using std::make_pair; |
| #include <vector> |
| using std::vector; |
| |
| namespace fst { |
| |
| static const int32 kSTListMagicNumber = 5656924; |
| static const int32 kSTListFileVersion = 1; |
| |
| // String-type list writing class for object of type 'T' using functor 'W' |
| // to write an object of type 'T' from a stream. 'W' must conform to the |
| // following interface: |
| // |
| // struct Writer { |
| // void operator()(ostream &, const T &) const; |
| // }; |
| // |
| template <class T, class W> |
| class STListWriter { |
| public: |
| typedef T EntryType; |
| typedef W EntryWriter; |
| |
| explicit STListWriter(const string filename) |
| : stream_( |
| filename.empty() ? &std::cout : |
| new ofstream(filename.c_str(), ofstream::out | ofstream::binary)), |
| error_(false) { |
| WriteType(*stream_, kSTListMagicNumber); |
| WriteType(*stream_, kSTListFileVersion); |
| if (!stream_) { |
| FSTERROR() << "STListWriter::STListWriter: error writing to file: " |
| << filename; |
| error_ = true; |
| } |
| } |
| |
| static STListWriter<T, W> *Create(const string &filename) { |
| return new STListWriter<T, W>(filename); |
| } |
| |
| void Add(const string &key, const T &t) { |
| if (key == "") { |
| FSTERROR() << "STListWriter::Add: key empty: " << key; |
| error_ = true; |
| } else if (key < last_key_) { |
| FSTERROR() << "STListWriter::Add: key disorder: " << key; |
| error_ = true; |
| } |
| if (error_) return; |
| last_key_ = key; |
| WriteType(*stream_, key); |
| entry_writer_(*stream_, t); |
| } |
| |
| bool Error() const { return error_; } |
| |
| ~STListWriter() { |
| WriteType(*stream_, string()); |
| if (stream_ != &std::cout) |
| delete stream_; |
| } |
| |
| private: |
| EntryWriter entry_writer_; // Write functor for 'EntryType' |
| ostream *stream_; // Output stream |
| string last_key_; // Last key |
| bool error_; |
| |
| DISALLOW_COPY_AND_ASSIGN(STListWriter); |
| }; |
| |
| |
| // String-type list reading class for object of type 'T' using functor 'R' |
| // to read an object of type 'T' form a stream. 'R' must conform to the |
| // following interface: |
| // |
| // struct Reader { |
| // T *operator()(istream &) const; |
| // }; |
| // |
| template <class T, class R> |
| class STListReader { |
| public: |
| typedef T EntryType; |
| typedef R EntryReader; |
| |
| explicit STListReader(const vector<string> &filenames) |
| : sources_(filenames), entry_(0), error_(false) { |
| streams_.resize(filenames.size(), 0); |
| bool has_stdin = false; |
| for (size_t i = 0; i < filenames.size(); ++i) { |
| if (filenames[i].empty()) { |
| if (!has_stdin) { |
| streams_[i] = &std::cin; |
| sources_[i] = "stdin"; |
| has_stdin = true; |
| } else { |
| FSTERROR() << "STListReader::STListReader: stdin should only " |
| << "appear once in the input file list."; |
| error_ = true; |
| return; |
| } |
| } else { |
| streams_[i] = new ifstream( |
| filenames[i].c_str(), ifstream::in | ifstream::binary); |
| } |
| int32 magic_number = 0, file_version = 0; |
| ReadType(*streams_[i], &magic_number); |
| ReadType(*streams_[i], &file_version); |
| if (magic_number != kSTListMagicNumber) { |
| FSTERROR() << "STListReader::STTableReader: wrong file type: " |
| << filenames[i]; |
| error_ = true; |
| return; |
| } |
| if (file_version != kSTListFileVersion) { |
| FSTERROR() << "STListReader::STTableReader: wrong file version: " |
| << filenames[i]; |
| error_ = true; |
| return; |
| } |
| string key; |
| ReadType(*streams_[i], &key); |
| if (!key.empty()) |
| heap_.push(make_pair(key, i)); |
| if (!*streams_[i]) { |
| FSTERROR() << "STTableReader: error reading file: " << sources_[i]; |
| error_ = true; |
| return; |
| } |
| } |
| if (heap_.empty()) return; |
| size_t current = heap_.top().second; |
| entry_ = entry_reader_(*streams_[current]); |
| if (!entry_ || !*streams_[current]) { |
| FSTERROR() << "STTableReader: error reading entry for key: " |
| << heap_.top().first << ", file: " << sources_[current]; |
| error_ = true; |
| } |
| } |
| |
| ~STListReader() { |
| for (size_t i = 0; i < streams_.size(); ++i) { |
| if (streams_[i] != &std::cin) |
| delete streams_[i]; |
| } |
| if (entry_) |
| delete entry_; |
| } |
| |
| static STListReader<T, R> *Open(const string &filename) { |
| vector<string> filenames; |
| filenames.push_back(filename); |
| return new STListReader<T, R>(filenames); |
| } |
| |
| static STListReader<T, R> *Open(const vector<string> &filenames) { |
| return new STListReader<T, R>(filenames); |
| } |
| |
| void Reset() { |
| FSTERROR() |
| << "STListReader::Reset: stlist does not support reset operation"; |
| error_ = true; |
| } |
| |
| bool Find(const string &key) { |
| FSTERROR() |
| << "STListReader::Find: stlist does not support find operation"; |
| error_ = true; |
| return false; |
| } |
| |
| bool Done() const { |
| return error_ || heap_.empty(); |
| } |
| |
| void Next() { |
| if (error_) return; |
| size_t current = heap_.top().second; |
| string key; |
| heap_.pop(); |
| ReadType(*(streams_[current]), &key); |
| if (!*streams_[current]) { |
| FSTERROR() << "STTableReader: error reading file: " |
| << sources_[current]; |
| error_ = true; |
| return; |
| } |
| if (!key.empty()) |
| heap_.push(make_pair(key, current)); |
| |
| if(!heap_.empty()) { |
| current = heap_.top().second; |
| if (entry_) |
| delete entry_; |
| entry_ = entry_reader_(*streams_[current]); |
| if (!entry_ || !*streams_[current]) { |
| FSTERROR() << "STTableReader: error reading entry for key: " |
| << heap_.top().first << ", file: " << sources_[current]; |
| error_ = true; |
| } |
| } |
| } |
| |
| const string &GetKey() const { |
| return heap_.top().first; |
| } |
| |
| const EntryType &GetEntry() const { |
| return *entry_; |
| } |
| |
| bool Error() const { return error_; } |
| |
| private: |
| EntryReader entry_reader_; // Read functor for 'EntryType' |
| vector<istream*> streams_; // Input streams |
| vector<string> sources_; // and corresponding file names |
| priority_queue< |
| pair<string, size_t>, vector<pair<string, size_t> >, |
| greater<pair<string, size_t> > > heap_; // (Key, stream id) heap |
| mutable EntryType *entry_; // Pointer to the currently read entry |
| bool error_; |
| |
| DISALLOW_COPY_AND_ASSIGN(STListReader); |
| }; |
| |
| |
| // String-type list header reading function template on the entry header |
| // type 'H' having a member function: |
| // Read(istream &strm, const string &filename); |
| // Checks that 'filename' is an STTable and call the H::Read() on the last |
| // entry in the STTable. |
| // Does not support reading from stdin. |
| template <class H> |
| bool ReadSTListHeader(const string &filename, H *header) { |
| if (filename.empty()) { |
| LOG(ERROR) << "ReadSTListHeader: reading header not supported on stdin"; |
| return false; |
| } |
| ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); |
| int32 magic_number = 0, file_version = 0; |
| ReadType(strm, &magic_number); |
| ReadType(strm, &file_version); |
| if (magic_number != kSTListMagicNumber) { |
| LOG(ERROR) << "ReadSTTableHeader: wrong file type: " << filename; |
| return false; |
| } |
| if (file_version != kSTListFileVersion) { |
| LOG(ERROR) << "ReadSTTableHeader: wrong file version: " << filename; |
| return false; |
| } |
| string key; |
| ReadType(strm, &key); |
| header->Read(strm, filename + ":" + key); |
| if (!strm) { |
| LOG(ERROR) << "ReadSTTableHeader: error reading file: " << filename; |
| return false; |
| } |
| return true; |
| } |
| |
| bool IsSTList(const string &filename); |
| |
| } // namespace fst |
| |
| #endif // FST_EXTENSIONS_FAR_STLIST_H_ |