blob: 7d863a04813c651f9d2a148908538e710f8ed385 [file] [log] [blame]
// state-table.h
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright 2005-2010 Google, Inc.
// Author: riley@google.com (Michael Riley)
//
// \file
// Classes for representing the mapping between state tuples and state Ids.
#ifndef FST_LIB_STATE_TABLE_H__
#define FST_LIB_STATE_TABLE_H__
#include <deque>
#include <vector>
using std::vector;
#include <fst/bi-table.h>
#include <fst/expanded-fst.h>
namespace fst {
// STATE TABLES - these determine the bijective mapping between state
// tuples (e.g. in composition triples of two FST states and a
// composition filter state) and their corresponding state IDs.
// They are classes, templated on state tuples, of the form:
//
// template <class T>
// class StateTable {
// public:
// typedef typename T StateTuple;
//
// // Required constructors.
// StateTable();
//
// // Lookup state ID by tuple. If it doesn't exist, then add it.
// StateId FindState(const StateTuple &);
// // Lookup state tuple by state ID.
// const StateTuple<StateId> &Tuple(StateId) const;
// // # of stored tuples.
// StateId Size() const;
// };
//
// A state tuple has the form:
//
// template <class S>
// struct StateTuple {
// typedef typename S StateId;
//
// // Required constructor.
// StateTuple();
// };
// An implementation using a hash map for the tuple to state ID mapping.
// The state tuple T must have == defined and the default constructor
// must produce a tuple that will never be seen. H is the hash function.
template <class T, class H>
class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
public:
typedef T StateTuple;
typedef typename StateTuple::StateId StateId;
using HashBiTable<StateId, T, H>::FindId;
using HashBiTable<StateId, T, H>::FindEntry;
using HashBiTable<StateId, T, H>::Size;
HashStateTable() : HashBiTable<StateId, T, H>() {}
StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
};
// An implementation using a hash set for the tuple to state ID
// mapping. The state tuple T must have == defined and the default
// constructor must produce a tuple that will never be seen. H is the
// hash function.
template <class T, class H>
class CompactHashStateTable
: public CompactHashBiTable<typename T::StateId, T, H> {
public:
typedef T StateTuple;
typedef typename StateTuple::StateId StateId;
using CompactHashBiTable<StateId, T, H>::FindId;
using CompactHashBiTable<StateId, T, H>::FindEntry;
using CompactHashBiTable<StateId, T, H>::Size;
CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {}
// Reserves space for table_size elements.
explicit CompactHashStateTable(size_t table_size)
: CompactHashBiTable<StateId, T, H>(table_size) {}
StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
};
// An implementation using a vector for the tuple to state mapping.
// It is passed a function object FP that should fingerprint tuples
// uniquely to an integer that can used as a vector index. Normally,
// VectorStateTable constructs the FP object. The user can instead
// pass in this object; in that case, VectorStateTable takes its
// ownership.
template <class T, class FP>
class VectorStateTable
: public VectorBiTable<typename T::StateId, T, FP> {
public:
typedef T StateTuple;
typedef typename StateTuple::StateId StateId;
using VectorBiTable<StateId, T, FP>::FindId;
using VectorBiTable<StateId, T, FP>::FindEntry;
using VectorBiTable<StateId, T, FP>::Size;
using VectorBiTable<StateId, T, FP>::Fingerprint;
explicit VectorStateTable(FP *fp = 0) : VectorBiTable<StateId, T, FP>(fp) {}
StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
};
// An implementation using a vector and a compact hash table. The
// selecting functor S returns true for tuples to be hashed in the
// vector. The fingerprinting functor FP returns a unique fingerprint
// for each tuple to be hashed in the vector (these need to be
// suitable for indexing in a vector). The hash functor H is used when
// hashing tuple into the compact hash table.
template <class T, class S, class FP, class H>
class VectorHashStateTable
: public VectorHashBiTable<typename T::StateId, T, S, FP, H> {
public:
typedef T StateTuple;
typedef typename StateTuple::StateId StateId;
using VectorHashBiTable<StateId, T, S, FP, H>::FindId;
using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry;
using VectorHashBiTable<StateId, T, S, FP, H>::Size;
using VectorHashBiTable<StateId, T, S, FP, H>::Selector;
using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint;
using VectorHashBiTable<StateId, T, S, FP, H>::Hash;
VectorHashStateTable(S *s, FP *fp, H *h,
size_t vector_size = 0,
size_t tuple_size = 0)
: VectorHashBiTable<StateId, T, S, FP, H>(
s, fp, h, vector_size, tuple_size) {}
StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
};
// An implementation using a hash map for the tuple to state ID
// mapping. This version permits erasing of states. The state tuple T
// must have == defined and its default constructor must produce a
// tuple that will never be seen. F is the hash function.
template <class T, class F>
class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> {
public:
typedef T StateTuple;
typedef typename StateTuple::StateId StateId;
using ErasableBiTable<StateId, T, F>::FindId;
using ErasableBiTable<StateId, T, F>::FindEntry;
using ErasableBiTable<StateId, T, F>::Size;
using ErasableBiTable<StateId, T, F>::Erase;
ErasableStateTable() : ErasableBiTable<StateId, T, F>() {}
StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
};
//
// COMPOSITION STATE TUPLES AND TABLES
//
// The composition state table has the form:
//
// template <class A, class F>
// class ComposeStateTable {
// public:
// typedef A Arc;
// typedef F FilterState;
// typedef typename A::StateId StateId;
// typedef ComposeStateTuple<StateId> StateTuple;
//
// // Required constructors. Copy constructor does not copy state.
// ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
// ComposeStateTable(const ComposeStateTable<A, F> &table);
// // Lookup state ID by tuple. If it doesn't exist, then add it.
// StateId FindState(const StateTuple &);
// // Lookup state tuple by state ID.
// const StateTuple<StateId> &Tuple(StateId) const;
// // # of stored tuples.
// StateId Size() const;
// // Return true if error encountered
// bool Error() const;
// };
// Represents the composition state.
template <typename S, typename F>
struct ComposeStateTuple {
typedef S StateId;
typedef F FilterState;
ComposeStateTuple()
: state_id1(kNoStateId), state_id2(kNoStateId),
filter_state(FilterState::NoState()) {}
ComposeStateTuple(StateId s1, StateId s2, const FilterState &f)
: state_id1(s1), state_id2(s2), filter_state(f) {}
StateId state_id1; // State Id on fst1
StateId state_id2; // State Id on fst2
FilterState filter_state; // State of composition filter
};
// Equality of composition state tuples.
template <typename S, typename F>
inline bool operator==(const ComposeStateTuple<S, F>& x,
const ComposeStateTuple<S, F>& y) {
if (&x == &y)
return true;
return x.state_id1 == y.state_id1 &&
x.state_id2 == y.state_id2 &&
x.filter_state == y.filter_state;
}
// Hashing of composition state tuples.
template <typename S, typename F>
class ComposeHash {
public:
size_t operator()(const ComposeStateTuple<S, F>& t) const {
return t.state_id1 + t.state_id2 * kPrime0 +
t.filter_state.Hash() * kPrime1;
}
private:
static const size_t kPrime0;
static const size_t kPrime1;
};
template <typename S, typename F>
const size_t ComposeHash<S, F>::kPrime0 = 7853;
template <typename S, typename F>
const size_t ComposeHash<S, F>::kPrime1 = 7867;
// A HashStateTable over composition tuples.
template <typename A,
typename F,
typename H =
CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>,
ComposeHash<typename A::StateId, F> > >
class GenericComposeStateTable : public H {
public:
typedef A Arc;
typedef typename A::StateId StateId;
typedef F FilterState;
typedef ComposeStateTuple<StateId, F> StateTuple;
GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
GenericComposeStateTable(const GenericComposeStateTable<A, F> &table) {}
bool Error() const { return false; }
private:
void operator=(const GenericComposeStateTable<A, F> &table); // disallow
};
// Fingerprint for general composition tuples.
template <typename S, typename F>
class ComposeFingerprint {
public:
typedef S StateId;
typedef F FilterState;
typedef ComposeStateTuple<S, F> StateTuple;
// Required but suboptimal constructor.
ComposeFingerprint() : mult1_(8192), mult2_(8192) {
LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
}
// Constructor is provided the sizes of the input FSTs
ComposeFingerprint(StateId nstates1, StateId nstates2)
: mult1_(nstates1), mult2_(nstates1 * nstates2) { }
size_t operator()(const StateTuple &tuple) {
return tuple.state_id1 + tuple.state_id2 * mult1_ +
tuple.filter_state.Hash() * mult2_;
}
private:
ssize_t mult1_;
ssize_t mult2_;
};
// Useful when the first composition state determines the tuple.
template <typename S, typename F>
class ComposeState1Fingerprint {
public:
typedef S StateId;
typedef F FilterState;
typedef ComposeStateTuple<S, F> StateTuple;
size_t operator()(const StateTuple &tuple) { return tuple.state_id1; }
};
// Useful when the second composition state determines the tuple.
template <typename S, typename F>
class ComposeState2Fingerprint {
public:
typedef S StateId;
typedef F FilterState;
typedef ComposeStateTuple<S, F> StateTuple;
size_t operator()(const StateTuple &tuple) { return tuple.state_id2; }
};
// A VectorStateTable over composition tuples. This can be used when
// the product of number of states in FST1 and FST2 (and the
// composition filter state hash) is manageable. If the FSTs are not
// expanded Fsts, they will first have their states counted.
template <typename A, typename F>
class ProductComposeStateTable : public
VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
ComposeFingerprint<typename A::StateId, F> > {
public:
typedef A Arc;
typedef typename A::StateId StateId;
typedef F FilterState;
typedef ComposeStateTuple<StateId, F> StateTuple;
ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
: VectorStateTable<ComposeStateTuple<StateId, F>,
ComposeFingerprint<StateId, F> >
(new ComposeFingerprint<StateId, F>(CountStates(fst1),
CountStates(fst2))) { }
ProductComposeStateTable(const ProductComposeStateTable<A, F> &table)
: VectorStateTable<ComposeStateTuple<StateId, F>,
ComposeFingerprint<StateId, F> >
(new ComposeFingerprint<StateId, F>(table.Fingerprint())) {}
bool Error() const { return false; }
private:
void operator=(const ProductComposeStateTable<A, F> &table); // disallow
};
// A VectorStateTable over composition tuples. This can be used when
// FST1 is a string (satisfies kStringProperties) and FST2 is
// epsilon-free and deterministic. It should be used with a
// composition filter that creates at most one filter state per tuple
// under these conditions (e.g. SequenceComposeFilter or
// MatchComposeFilter).
template <typename A, typename F>
class StringDetComposeStateTable : public
VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
ComposeState1Fingerprint<typename A::StateId, F> > {
public:
typedef A Arc;
typedef typename A::StateId StateId;
typedef F FilterState;
typedef ComposeStateTuple<StateId, F> StateTuple;
StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
: error_(false) {
uint64 props1 = kString;
uint64 props2 = kIDeterministic | kNoIEpsilons;
if (fst1.Properties(props1, true) != props1 ||
fst2.Properties(props2, true) != props2) {
FSTERROR() << "StringDetComposeStateTable: fst1 not a string or"
<< " fst2 not input deterministic and epsilon-free";
error_ = true;
}
}
StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table)
: error_(table.error_) {}
bool Error() const { return error_; }
private:
bool error_;
void operator=(const StringDetComposeStateTable<A, F> &table); // disallow
};
// A VectorStateTable over composition tuples. This can be used when
// FST2 is a string (satisfies kStringProperties) and FST1 is
// epsilon-free and deterministic. It should be used with a
// composition filter that creates at most one filter state per tuple
// under these conditions (e.g. SequenceComposeFilter or
// MatchComposeFilter).
template <typename A, typename F>
class DetStringComposeStateTable : public
VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
ComposeState1Fingerprint<typename A::StateId, F> > {
public:
typedef A Arc;
typedef typename A::StateId StateId;
typedef F FilterState;
typedef ComposeStateTuple<StateId, F> StateTuple;
DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
:error_(false) {
uint64 props1 = kODeterministic | kNoOEpsilons;
uint64 props2 = kString;
if (fst1.Properties(props1, true) != props1 ||
fst2.Properties(props2, true) != props2) {
FSTERROR() << "StringDetComposeStateTable: fst2 not a string or"
<< " fst1 not output deterministic and epsilon-free";
error_ = true;
}
}
DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table)
: error_(table.error_) {}
bool Error() const { return error_; }
private:
bool error_;
void operator=(const DetStringComposeStateTable<A, F> &table); // disallow
};
// An ErasableStateTable over composition tuples. The Erase(StateId) method
// can be called if the user either is sure that composition will never return
// to that tuple or doesn't care that if it does, it is assigned a new
// state ID.
template <typename A, typename F>
class ErasableComposeStateTable : public
ErasableStateTable<ComposeStateTuple<typename A::StateId, F>,
ComposeHash<typename A::StateId, F> > {
public:
typedef A Arc;
typedef typename A::StateId StateId;
typedef F FilterState;
typedef ComposeStateTuple<StateId, F> StateTuple;
ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
ErasableComposeStateTable(const ErasableComposeStateTable<A, F> &table) {}
bool Error() const { return false; }
private:
void operator=(const ErasableComposeStateTable<A, F> &table); // disallow
};
} // namespace fst
#endif // FST_LIB_STATE_TABLE_H__