| // symbol-table.cc |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // |
| // \file |
| // Classes to provide symbol-to-integer and integer-to-symbol mappings. |
| |
| #include "fst/lib/symbol-table.h" |
| #include "fst/lib/util.h" |
| |
| #include <string.h> |
| |
| DEFINE_bool(fst_compat_symbols, true, |
| "Require symbol tables to match when appropriate"); |
| |
| namespace fst { |
| |
| // Maximum line length in textual symbols file. |
| const int kLineLen = 8096; |
| |
| // Identifies stream data as a symbol table (and its endianity) |
| static const int32 kSymbolTableMagicNumber = 2125658996; |
| |
| SymbolTableImpl* SymbolTableImpl::ReadText(const string &filename) { |
| ifstream strm(filename.c_str()); |
| if (!strm) { |
| LOG(ERROR) << "SymbolTable::ReadText: Can't open symbol file: " |
| << filename; |
| return 0; |
| } |
| |
| SymbolTableImpl* impl = new SymbolTableImpl(filename); |
| |
| int64 nline = 0; |
| char line[kLineLen]; |
| while (strm.getline(line, kLineLen)) { |
| ++nline; |
| vector<char *> col; |
| SplitToVector(line, "\n\t ", &col, true); |
| if (col.size() == 0) // empty line |
| continue; |
| if (col.size() != 2) { |
| LOG(ERROR) << "SymbolTable::ReadText: Bad number of columns (skipping), " |
| << "file = " << filename << ", line = " << nline; |
| continue; |
| } |
| const char *symbol = col[0]; |
| const char *value = col[1]; |
| char *p; |
| int64 key = strtoll(value, &p, 10); |
| if (p < value + strlen(value) || key < 0) { |
| LOG(ERROR) << "SymbolTable::ReadText: Bad non-negative integer \"" |
| << value << "\" (skipping), " |
| << "file = " << filename << ", line = " << nline; |
| continue; |
| } |
| impl->AddSymbol(symbol, key); |
| } |
| |
| return impl; |
| } |
| |
| void SymbolTableImpl::RecomputeCheckSum() const { |
| check_sum_.Reset(); |
| for (size_t i = 0; i < symbols_.size(); ++i) { |
| check_sum_.Update(symbols_[i], strlen(symbols_[i])+1); |
| } |
| check_sum_finalized_ = true; |
| } |
| |
| int64 SymbolTableImpl::AddSymbol(const string& symbol, int64 key) { |
| std::unordered_map<string, int64>::const_iterator it = |
| symbol_map_.find(symbol); |
| if (it == symbol_map_.end()) { // only add if not in table |
| check_sum_finalized_ = false; |
| |
| char *csymbol = new char[symbol.size() + 1]; |
| strcpy(csymbol, symbol.c_str()); |
| symbols_.push_back(csymbol); |
| key_map_[key] = csymbol; |
| symbol_map_[csymbol] = key; |
| |
| if (key >= available_key_) { |
| available_key_ = key + 1; |
| } |
| } |
| |
| return key; |
| } |
| |
| SymbolTableImpl* SymbolTableImpl::Read(istream &strm, |
| const string &source) { |
| int32 magic_number = 0; |
| ReadType(strm, &magic_number); |
| if (magic_number != kSymbolTableMagicNumber) { |
| LOG(ERROR) << "SymbolTable::Read: read failed"; |
| return 0; |
| } |
| string name; |
| ReadType(strm, &name); |
| SymbolTableImpl* impl = new SymbolTableImpl(name); |
| ReadType(strm, &impl->available_key_); |
| int64 size; |
| ReadType(strm, &size); |
| string symbol; |
| int64 key = 0; |
| for (size_t i = 0; i < size; ++i) { |
| ReadType(strm, &symbol); |
| ReadType(strm, &key); |
| impl->AddSymbol(symbol, key); |
| } |
| if (!strm) |
| LOG(ERROR) << "SymbolTable::Read: read failed"; |
| return impl; |
| } |
| |
| bool SymbolTableImpl::Write(ostream &strm) const { |
| WriteType(strm, kSymbolTableMagicNumber); |
| WriteType(strm, name_); |
| WriteType(strm, available_key_); |
| int64 size = symbols_.size(); |
| WriteType(strm, size); |
| for (size_t i = 0; i < symbols_.size(); ++i) { |
| const string symbol = symbols_[i]; |
| WriteType(strm, symbol); |
| std::unordered_map<string, int64>::const_iterator it = symbol_map_.find(symbol); |
| WriteType(strm, it->second); |
| } |
| strm.flush(); |
| if (!strm) { |
| LOG(ERROR) << "SymbolTable::Write: write failed"; |
| return false; |
| } |
| return true; |
| } |
| |
| bool SymbolTableImpl::WriteText(ostream &strm) const { |
| for (size_t i = 0; i < symbols_.size(); ++i) { |
| char line[kLineLen]; |
| snprintf(line, kLineLen, "%s\t%lld\n", symbols_[i], Find(symbols_[i])); |
| strm.write(line, strlen(line)); |
| } |
| strm.flush(); |
| if (!strm) { |
| LOG(ERROR) << "SymbolTable::WriteText: write failed"; |
| return false; |
| } |
| return true; |
| } |
| |
| } // namespace fst |