| // sparse-tuple-weight.h |
| |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // Copyright 2005-2010 Google, Inc. |
| // Author: krr@google.com (Kasturi Rangan Raghavan) |
| // Inspiration: allauzen@google.com (Cyril Allauzen) |
| // \file |
| // Sparse version of tuple-weight, based on tuple-weight.h |
| // Internally stores sparse key, value pairs in linked list |
| // Default value elemnt is the assumed value of unset keys |
| // Internal singleton implementation that stores first key, |
| // value pair as a initialized member variable to avoide |
| // unnecessary allocation on heap. |
| // Use SparseTupleWeightIterator to iterate through the key,value pairs |
| // Note: this does NOT iterate through the default value. |
| // |
| // Sparse tuple weight set operation definitions. |
| |
| #ifndef FST_LIB_SPARSE_TUPLE_WEIGHT_H__ |
| #define FST_LIB_SPARSE_TUPLE_WEIGHT_H__ |
| |
| #include<string> |
| #include<list> |
| #include<stack> |
| #include<tr1/unordered_map> |
| using std::tr1::unordered_map; |
| using std::tr1::unordered_multimap; |
| |
| #include <fst/weight.h> |
| |
| |
| DECLARE_string(fst_weight_parentheses); |
| DECLARE_string(fst_weight_separator); |
| |
| namespace fst { |
| |
| template <class W, class K> class SparseTupleWeight; |
| |
| template<class W, class K> |
| class SparseTupleWeightIterator; |
| |
| template <class W, class K> |
| istream &operator>>(istream &strm, SparseTupleWeight<W, K> &w); |
| |
| // Arbitrary dimension tuple weight, stored as a sorted linked-list |
| // W is any weight class, |
| // K is the key value type. kNoKey(-1) is reserved for internal use |
| template <class W, class K = int> |
| class SparseTupleWeight { |
| public: |
| typedef pair<K, W> Pair; |
| typedef SparseTupleWeight<typename W::ReverseWeight, K> ReverseWeight; |
| |
| const static K kNoKey = -1; |
| SparseTupleWeight() { |
| Init(); |
| } |
| |
| template <class Iterator> |
| SparseTupleWeight(Iterator begin, Iterator end) { |
| Init(); |
| // Assumes input iterator is sorted |
| for (Iterator it = begin; it != end; ++it) |
| Push(*it); |
| } |
| |
| |
| SparseTupleWeight(const K& key, const W &w) { |
| Init(); |
| Push(key, w); |
| } |
| |
| SparseTupleWeight(const W &w) { |
| Init(w); |
| } |
| |
| SparseTupleWeight(const SparseTupleWeight<W, K> &w) { |
| Init(w.DefaultValue()); |
| SetDefaultValue(w.DefaultValue()); |
| for (SparseTupleWeightIterator<W, K> it(w); !it.Done(); it.Next()) { |
| Push(it.Value()); |
| } |
| } |
| |
| static const SparseTupleWeight<W, K> &Zero() { |
| static SparseTupleWeight<W, K> zero; |
| return zero; |
| } |
| |
| static const SparseTupleWeight<W, K> &One() { |
| static SparseTupleWeight<W, K> one(W::One()); |
| return one; |
| } |
| |
| static const SparseTupleWeight<W, K> &NoWeight() { |
| static SparseTupleWeight<W, K> no_weight(W::NoWeight()); |
| return no_weight; |
| } |
| |
| istream &Read(istream &strm) { |
| ReadType(strm, &default_); |
| ReadType(strm, &first_); |
| return ReadType(strm, &rest_); |
| } |
| |
| ostream &Write(ostream &strm) const { |
| WriteType(strm, default_); |
| WriteType(strm, first_); |
| return WriteType(strm, rest_); |
| } |
| |
| SparseTupleWeight<W, K> &operator=(const SparseTupleWeight<W, K> &w) { |
| if (this == &w) return *this; // check for w = w |
| Init(w.DefaultValue()); |
| for (SparseTupleWeightIterator<W, K> it(w); !it.Done(); it.Next()) { |
| Push(it.Value()); |
| } |
| return *this; |
| } |
| |
| bool Member() const { |
| if (!DefaultValue().Member()) return false; |
| for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) { |
| if (!it.Value().second.Member()) return false; |
| } |
| return true; |
| } |
| |
| // Assumes H() function exists for the hash of the key value |
| size_t Hash() const { |
| uint64 h = 0; |
| std::tr1::hash<K> H; |
| for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) { |
| h = 5 * h + H(it.Value().first); |
| h = 13 * h + it.Value().second.Hash(); |
| } |
| return size_t(h); |
| } |
| |
| SparseTupleWeight<W, K> Quantize(float delta = kDelta) const { |
| SparseTupleWeight<W, K> w; |
| for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) { |
| w.Push(it.Value().first, it.Value().second.Quantize(delta)); |
| } |
| return w; |
| } |
| |
| ReverseWeight Reverse() const { |
| SparseTupleWeight<W, K> w; |
| for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) { |
| w.Push(it.Value().first, it.Value().second.Reverse()); |
| } |
| return w; |
| } |
| |
| // Common initializer among constructors. |
| void Init() { |
| Init(W::Zero()); |
| } |
| |
| void Init(const W& default_value) { |
| first_.first = kNoKey; |
| /* initialized to the reserved key value */ |
| default_ = default_value; |
| rest_.clear(); |
| } |
| |
| size_t Size() const { |
| if (first_.first == kNoKey) |
| return 0; |
| else |
| return rest_.size() + 1; |
| } |
| |
| inline void Push(const K &k, const W &w, bool default_value_check = true) { |
| Push(make_pair(k, w), default_value_check); |
| } |
| |
| inline void Push(const Pair &p, bool default_value_check = true) { |
| if (default_value_check && p.second == default_) return; |
| if (first_.first == kNoKey) { |
| first_ = p; |
| } else { |
| rest_.push_back(p); |
| } |
| } |
| |
| void SetDefaultValue(const W& val) { default_ = val; } |
| |
| const W& DefaultValue() const { return default_; } |
| |
| protected: |
| static istream& ReadNoParen( |
| istream&, SparseTupleWeight<W, K>&, char separator); |
| |
| static istream& ReadWithParen( |
| istream&, SparseTupleWeight<W, K>&, |
| char separator, char open_paren, char close_paren); |
| |
| private: |
| // Assumed default value of uninitialized keys, by default W::Zero() |
| W default_; |
| |
| // Key values pairs are first stored in first_, then fill rest_ |
| // this way we can avoid dynamic allocation in the common case |
| // where the weight is a single key,val pair. |
| Pair first_; |
| list<Pair> rest_; |
| |
| friend istream &operator>><W, K>(istream&, SparseTupleWeight<W, K>&); |
| friend class SparseTupleWeightIterator<W, K>; |
| }; |
| |
| template<class W, class K> |
| class SparseTupleWeightIterator { |
| public: |
| typedef typename SparseTupleWeight<W, K>::Pair Pair; |
| typedef typename list<Pair>::const_iterator const_iterator; |
| typedef typename list<Pair>::iterator iterator; |
| |
| explicit SparseTupleWeightIterator(const SparseTupleWeight<W, K>& w) |
| : first_(w.first_), rest_(w.rest_), init_(true), |
| iter_(rest_.begin()) {} |
| |
| bool Done() const { |
| if (init_) |
| return first_.first == SparseTupleWeight<W, K>::kNoKey; |
| else |
| return iter_ == rest_.end(); |
| } |
| |
| const Pair& Value() const { return init_ ? first_ : *iter_; } |
| |
| void Next() { |
| if (init_) |
| init_ = false; |
| else |
| ++iter_; |
| } |
| |
| void Reset() { |
| init_ = true; |
| iter_ = rest_.begin(); |
| } |
| |
| private: |
| const Pair &first_; |
| const list<Pair> & rest_; |
| bool init_; // in the initialized state? |
| typename list<Pair>::const_iterator iter_; |
| |
| DISALLOW_COPY_AND_ASSIGN(SparseTupleWeightIterator); |
| }; |
| |
| template<class W, class K, class M> |
| inline void SparseTupleWeightMap( |
| SparseTupleWeight<W, K>* ret, |
| const SparseTupleWeight<W, K>& w1, |
| const SparseTupleWeight<W, K>& w2, |
| const M& operator_mapper) { |
| SparseTupleWeightIterator<W, K> w1_it(w1); |
| SparseTupleWeightIterator<W, K> w2_it(w2); |
| const W& v1_def = w1.DefaultValue(); |
| const W& v2_def = w2.DefaultValue(); |
| ret->SetDefaultValue(operator_mapper.Map(0, v1_def, v2_def)); |
| while (!w1_it.Done() || !w2_it.Done()) { |
| const K& k1 = (w1_it.Done()) ? w2_it.Value().first : w1_it.Value().first; |
| const K& k2 = (w2_it.Done()) ? w1_it.Value().first : w2_it.Value().first; |
| const W& v1 = (w1_it.Done()) ? v1_def : w1_it.Value().second; |
| const W& v2 = (w2_it.Done()) ? v2_def : w2_it.Value().second; |
| if (k1 == k2) { |
| ret->Push(k1, operator_mapper.Map(k1, v1, v2)); |
| if (!w1_it.Done()) w1_it.Next(); |
| if (!w2_it.Done()) w2_it.Next(); |
| } else if (k1 < k2) { |
| ret->Push(k1, operator_mapper.Map(k1, v1, v2_def)); |
| w1_it.Next(); |
| } else { |
| ret->Push(k2, operator_mapper.Map(k2, v1_def, v2)); |
| w2_it.Next(); |
| } |
| } |
| } |
| |
| template <class W, class K> |
| inline bool operator==(const SparseTupleWeight<W, K> &w1, |
| const SparseTupleWeight<W, K> &w2) { |
| const W& v1_def = w1.DefaultValue(); |
| const W& v2_def = w2.DefaultValue(); |
| if (v1_def != v2_def) return false; |
| |
| SparseTupleWeightIterator<W, K> w1_it(w1); |
| SparseTupleWeightIterator<W, K> w2_it(w2); |
| while (!w1_it.Done() || !w2_it.Done()) { |
| const K& k1 = (w1_it.Done()) ? w2_it.Value().first : w1_it.Value().first; |
| const K& k2 = (w2_it.Done()) ? w1_it.Value().first : w2_it.Value().first; |
| const W& v1 = (w1_it.Done()) ? v1_def : w1_it.Value().second; |
| const W& v2 = (w2_it.Done()) ? v2_def : w2_it.Value().second; |
| if (k1 == k2) { |
| if (v1 != v2) return false; |
| if (!w1_it.Done()) w1_it.Next(); |
| if (!w2_it.Done()) w2_it.Next(); |
| } else if (k1 < k2) { |
| if (v1 != v2_def) return false; |
| w1_it.Next(); |
| } else { |
| if (v1_def != v2) return false; |
| w2_it.Next(); |
| } |
| } |
| return true; |
| } |
| |
| template <class W, class K> |
| inline bool operator!=(const SparseTupleWeight<W, K> &w1, |
| const SparseTupleWeight<W, K> &w2) { |
| return !(w1 == w2); |
| } |
| |
| template <class W, class K> |
| inline ostream &operator<<(ostream &strm, const SparseTupleWeight<W, K> &w) { |
| if(FLAGS_fst_weight_separator.size() != 1) { |
| FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1"; |
| strm.clear(std::ios::badbit); |
| return strm; |
| } |
| char separator = FLAGS_fst_weight_separator[0]; |
| bool write_parens = false; |
| if (!FLAGS_fst_weight_parentheses.empty()) { |
| if (FLAGS_fst_weight_parentheses.size() != 2) { |
| FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2"; |
| strm.clear(std::ios::badbit); |
| return strm; |
| } |
| write_parens = true; |
| } |
| |
| if (write_parens) |
| strm << FLAGS_fst_weight_parentheses[0]; |
| |
| strm << w.DefaultValue(); |
| strm << separator; |
| |
| size_t n = w.Size(); |
| strm << n; |
| strm << separator; |
| |
| for (SparseTupleWeightIterator<W, K> it(w); !it.Done(); it.Next()) { |
| strm << it.Value().first; |
| strm << separator; |
| strm << it.Value().second; |
| strm << separator; |
| } |
| |
| if (write_parens) |
| strm << FLAGS_fst_weight_parentheses[1]; |
| |
| return strm; |
| } |
| |
| template <class W, class K> |
| inline istream &operator>>(istream &strm, SparseTupleWeight<W, K> &w) { |
| if(FLAGS_fst_weight_separator.size() != 1) { |
| FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1"; |
| strm.clear(std::ios::badbit); |
| return strm; |
| } |
| char separator = FLAGS_fst_weight_separator[0]; |
| |
| if (!FLAGS_fst_weight_parentheses.empty()) { |
| if (FLAGS_fst_weight_parentheses.size() != 2) { |
| FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2"; |
| strm.clear(std::ios::badbit); |
| return strm; |
| } |
| return SparseTupleWeight<W, K>::ReadWithParen( |
| strm, w, separator, FLAGS_fst_weight_parentheses[0], |
| FLAGS_fst_weight_parentheses[1]); |
| } else { |
| return SparseTupleWeight<W, K>::ReadNoParen(strm, w, separator); |
| } |
| } |
| |
| // Reads SparseTupleWeight when there are no parentheses around tuple terms |
| template <class W, class K> |
| inline istream& SparseTupleWeight<W, K>::ReadNoParen( |
| istream &strm, |
| SparseTupleWeight<W, K> &w, |
| char separator) { |
| int c; |
| size_t n; |
| |
| do { |
| c = strm.get(); |
| } while (isspace(c)); |
| |
| |
| { // Read default weight |
| W default_value; |
| string s; |
| while (c != separator) { |
| if (c == EOF) { |
| strm.clear(std::ios::badbit); |
| return strm; |
| } |
| s += c; |
| c = strm.get(); |
| } |
| istringstream sstrm(s); |
| sstrm >> default_value; |
| w.SetDefaultValue(default_value); |
| } |
| |
| c = strm.get(); |
| |
| { // Read n |
| string s; |
| while (c != separator) { |
| if (c == EOF) { |
| strm.clear(std::ios::badbit); |
| return strm; |
| } |
| s += c; |
| c = strm.get(); |
| } |
| istringstream sstrm(s); |
| sstrm >> n; |
| } |
| |
| // Read n elements |
| for (size_t i = 0; i < n; ++i) { |
| // discard separator |
| c = strm.get(); |
| K p; |
| W r; |
| |
| { // read key |
| string s; |
| while (c != separator) { |
| if (c == EOF) { |
| strm.clear(std::ios::badbit); |
| return strm; |
| } |
| s += c; |
| c = strm.get(); |
| } |
| istringstream sstrm(s); |
| sstrm >> p; |
| } |
| |
| c = strm.get(); |
| |
| { // read weight |
| string s; |
| while (c != separator) { |
| if (c == EOF) { |
| strm.clear(std::ios::badbit); |
| return strm; |
| } |
| s += c; |
| c = strm.get(); |
| } |
| istringstream sstrm(s); |
| sstrm >> r; |
| } |
| |
| w.Push(p, r); |
| } |
| |
| c = strm.get(); |
| if (c != separator) { |
| strm.clear(std::ios::badbit); |
| } |
| |
| return strm; |
| } |
| |
| // Reads SparseTupleWeight when there are parentheses around tuple terms |
| template <class W, class K> |
| inline istream& SparseTupleWeight<W, K>::ReadWithParen( |
| istream &strm, |
| SparseTupleWeight<W, K> &w, |
| char separator, |
| char open_paren, |
| char close_paren) { |
| int c; |
| size_t n; |
| |
| do { |
| c = strm.get(); |
| } while (isspace(c)); |
| |
| if (c != open_paren) { |
| FSTERROR() << "is fst_weight_parentheses flag set correcty? "; |
| strm.clear(std::ios::badbit); |
| return strm; |
| } |
| |
| c = strm.get(); |
| |
| { // Read weight |
| W default_value; |
| stack<int> parens; |
| string s; |
| while (c != separator || !parens.empty()) { |
| if (c == EOF) { |
| strm.clear(std::ios::badbit); |
| return strm; |
| } |
| s += c; |
| // If parens encountered before separator, they must be matched |
| if (c == open_paren) { |
| parens.push(1); |
| } else if (c == close_paren) { |
| // Fail for mismatched parens |
| if (parens.empty()) { |
| strm.clear(std::ios::failbit); |
| return strm; |
| } |
| parens.pop(); |
| } |
| c = strm.get(); |
| } |
| istringstream sstrm(s); |
| sstrm >> default_value; |
| w.SetDefaultValue(default_value); |
| } |
| |
| c = strm.get(); |
| |
| { // Read n |
| string s; |
| while (c != separator) { |
| if (c == EOF) { |
| strm.clear(std::ios::badbit); |
| return strm; |
| } |
| s += c; |
| c = strm.get(); |
| } |
| istringstream sstrm(s); |
| sstrm >> n; |
| } |
| |
| // Read n elements |
| for (size_t i = 0; i < n; ++i) { |
| // discard separator |
| c = strm.get(); |
| K p; |
| W r; |
| |
| { // Read key |
| stack<int> parens; |
| string s; |
| while (c != separator || !parens.empty()) { |
| if (c == EOF) { |
| strm.clear(std::ios::badbit); |
| return strm; |
| } |
| s += c; |
| // If parens encountered before separator, they must be matched |
| if (c == open_paren) { |
| parens.push(1); |
| } else if (c == close_paren) { |
| // Fail for mismatched parens |
| if (parens.empty()) { |
| strm.clear(std::ios::failbit); |
| return strm; |
| } |
| parens.pop(); |
| } |
| c = strm.get(); |
| } |
| istringstream sstrm(s); |
| sstrm >> p; |
| } |
| |
| c = strm.get(); |
| |
| { // Read weight |
| stack<int> parens; |
| string s; |
| while (c != separator || !parens.empty()) { |
| if (c == EOF) { |
| strm.clear(std::ios::badbit); |
| return strm; |
| } |
| s += c; |
| // If parens encountered before separator, they must be matched |
| if (c == open_paren) { |
| parens.push(1); |
| } else if (c == close_paren) { |
| // Fail for mismatched parens |
| if (parens.empty()) { |
| strm.clear(std::ios::failbit); |
| return strm; |
| } |
| parens.pop(); |
| } |
| c = strm.get(); |
| } |
| istringstream sstrm(s); |
| sstrm >> r; |
| } |
| |
| w.Push(p, r); |
| } |
| |
| if (c != separator) { |
| FSTERROR() << " separator expected, not found! "; |
| strm.clear(std::ios::badbit); |
| return strm; |
| } |
| |
| c = strm.get(); |
| if (c != close_paren) { |
| FSTERROR() << " is fst_weight_parentheses flag set correcty? "; |
| strm.clear(std::ios::badbit); |
| return strm; |
| } |
| |
| return strm; |
| } |
| |
| |
| |
| } // namespace fst |
| |
| #endif // FST_LIB_SPARSE_TUPLE_WEIGHT_H__ |