blob: 4738181e8843ec457598fd0b55786afe9e45f8bc [file] [log] [blame]
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright 2005-2010 Google, Inc.
// Author: allauzen@google.com (Cyril Allauzen)
//
// \file
// A generic (string,type) list file format.
//
// This is a stripped-down version of STTable that does
// not support the Find() operation but that does support
// reading/writting from standard in/out.
#ifndef FST_EXTENSIONS_FAR_STLIST_H_
#define FST_EXTENSIONS_FAR_STLIST_H_
#include <iostream>
#include <fstream>
#include <fst/util.h>
#include <algorithm>
#include <functional>
#include <queue>
#include <string>
#include <utility>
using std::pair; using std::make_pair;
#include <vector>
using std::vector;
namespace fst {
static const int32 kSTListMagicNumber = 5656924;
static const int32 kSTListFileVersion = 1;
// String-type list writing class for object of type 'T' using functor 'W'
// to write an object of type 'T' from a stream. 'W' must conform to the
// following interface:
//
// struct Writer {
// void operator()(ostream &, const T &) const;
// };
//
template <class T, class W>
class STListWriter {
public:
typedef T EntryType;
typedef W EntryWriter;
explicit STListWriter(const string filename)
: stream_(
filename.empty() ? &std::cout :
new ofstream(filename.c_str(), ofstream::out | ofstream::binary)),
error_(false) {
WriteType(*stream_, kSTListMagicNumber);
WriteType(*stream_, kSTListFileVersion);
if (!stream_) {
FSTERROR() << "STListWriter::STListWriter: error writing to file: "
<< filename;
error_ = true;
}
}
static STListWriter<T, W> *Create(const string &filename) {
return new STListWriter<T, W>(filename);
}
void Add(const string &key, const T &t) {
if (key == "") {
FSTERROR() << "STListWriter::Add: key empty: " << key;
error_ = true;
} else if (key < last_key_) {
FSTERROR() << "STListWriter::Add: key disorder: " << key;
error_ = true;
}
if (error_) return;
last_key_ = key;
WriteType(*stream_, key);
entry_writer_(*stream_, t);
}
bool Error() const { return error_; }
~STListWriter() {
WriteType(*stream_, string());
if (stream_ != &std::cout)
delete stream_;
}
private:
EntryWriter entry_writer_; // Write functor for 'EntryType'
ostream *stream_; // Output stream
string last_key_; // Last key
bool error_;
DISALLOW_COPY_AND_ASSIGN(STListWriter);
};
// String-type list reading class for object of type 'T' using functor 'R'
// to read an object of type 'T' form a stream. 'R' must conform to the
// following interface:
//
// struct Reader {
// T *operator()(istream &) const;
// };
//
template <class T, class R>
class STListReader {
public:
typedef T EntryType;
typedef R EntryReader;
explicit STListReader(const vector<string> &filenames)
: sources_(filenames), entry_(0), error_(false) {
streams_.resize(filenames.size(), 0);
bool has_stdin = false;
for (size_t i = 0; i < filenames.size(); ++i) {
if (filenames[i].empty()) {
if (!has_stdin) {
streams_[i] = &std::cin;
sources_[i] = "stdin";
has_stdin = true;
} else {
FSTERROR() << "STListReader::STListReader: stdin should only "
<< "appear once in the input file list.";
error_ = true;
return;
}
} else {
streams_[i] = new ifstream(
filenames[i].c_str(), ifstream::in | ifstream::binary);
}
int32 magic_number = 0, file_version = 0;
ReadType(*streams_[i], &magic_number);
ReadType(*streams_[i], &file_version);
if (magic_number != kSTListMagicNumber) {
FSTERROR() << "STListReader::STTableReader: wrong file type: "
<< filenames[i];
error_ = true;
return;
}
if (file_version != kSTListFileVersion) {
FSTERROR() << "STListReader::STTableReader: wrong file version: "
<< filenames[i];
error_ = true;
return;
}
string key;
ReadType(*streams_[i], &key);
if (!key.empty())
heap_.push(make_pair(key, i));
if (!*streams_[i]) {
FSTERROR() << "STTableReader: error reading file: " << sources_[i];
error_ = true;
return;
}
}
if (heap_.empty()) return;
size_t current = heap_.top().second;
entry_ = entry_reader_(*streams_[current]);
if (!entry_ || !*streams_[current]) {
FSTERROR() << "STTableReader: error reading entry for key: "
<< heap_.top().first << ", file: " << sources_[current];
error_ = true;
}
}
~STListReader() {
for (size_t i = 0; i < streams_.size(); ++i) {
if (streams_[i] != &std::cin)
delete streams_[i];
}
if (entry_)
delete entry_;
}
static STListReader<T, R> *Open(const string &filename) {
vector<string> filenames;
filenames.push_back(filename);
return new STListReader<T, R>(filenames);
}
static STListReader<T, R> *Open(const vector<string> &filenames) {
return new STListReader<T, R>(filenames);
}
void Reset() {
FSTERROR()
<< "STListReader::Reset: stlist does not support reset operation";
error_ = true;
}
bool Find(const string &key) {
FSTERROR()
<< "STListReader::Find: stlist does not support find operation";
error_ = true;
return false;
}
bool Done() const {
return error_ || heap_.empty();
}
void Next() {
if (error_) return;
size_t current = heap_.top().second;
string key;
heap_.pop();
ReadType(*(streams_[current]), &key);
if (!*streams_[current]) {
FSTERROR() << "STTableReader: error reading file: "
<< sources_[current];
error_ = true;
return;
}
if (!key.empty())
heap_.push(make_pair(key, current));
if(!heap_.empty()) {
current = heap_.top().second;
if (entry_)
delete entry_;
entry_ = entry_reader_(*streams_[current]);
if (!entry_ || !*streams_[current]) {
FSTERROR() << "STTableReader: error reading entry for key: "
<< heap_.top().first << ", file: " << sources_[current];
error_ = true;
}
}
}
const string &GetKey() const {
return heap_.top().first;
}
const EntryType &GetEntry() const {
return *entry_;
}
bool Error() const { return error_; }
private:
EntryReader entry_reader_; // Read functor for 'EntryType'
vector<istream*> streams_; // Input streams
vector<string> sources_; // and corresponding file names
priority_queue<
pair<string, size_t>, vector<pair<string, size_t> >,
greater<pair<string, size_t> > > heap_; // (Key, stream id) heap
mutable EntryType *entry_; // Pointer to the currently read entry
bool error_;
DISALLOW_COPY_AND_ASSIGN(STListReader);
};
// String-type list header reading function template on the entry header
// type 'H' having a member function:
// Read(istream &strm, const string &filename);
// Checks that 'filename' is an STTable and call the H::Read() on the last
// entry in the STTable.
// Does not support reading from stdin.
template <class H>
bool ReadSTListHeader(const string &filename, H *header) {
if (filename.empty()) {
LOG(ERROR) << "ReadSTListHeader: reading header not supported on stdin";
return false;
}
ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
int32 magic_number = 0, file_version = 0;
ReadType(strm, &magic_number);
ReadType(strm, &file_version);
if (magic_number != kSTListMagicNumber) {
LOG(ERROR) << "ReadSTTableHeader: wrong file type: " << filename;
return false;
}
if (file_version != kSTListFileVersion) {
LOG(ERROR) << "ReadSTTableHeader: wrong file version: " << filename;
return false;
}
string key;
ReadType(strm, &key);
header->Read(strm, filename + ":" + key);
if (!strm) {
LOG(ERROR) << "ReadSTTableHeader: error reading file: " << filename;
return false;
}
return true;
}
bool IsSTList(const string &filename);
} // namespace fst
#endif // FST_EXTENSIONS_FAR_STLIST_H_