| // info.h |
| |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // Copyright 2005-2010 Google, Inc. |
| // Author: riley@google.com (Michael Riley) |
| // |
| // \file |
| // Class to compute various information about FSTs, helper class for fstinfo.cc |
| |
| #ifndef FST_SCRIPT_INFO_IMPL_H_ |
| #define FST_SCRIPT_INFO_IMPL_H_ |
| |
| #include <string> |
| #include <vector> |
| using std::vector; |
| |
| #include <fst/connect.h> |
| #include <fst/dfs-visit.h> |
| #include <fst/fst.h> |
| #include <fst/lookahead-matcher.h> |
| #include <fst/matcher.h> |
| #include <fst/queue.h> |
| #include <fst/test-properties.h> |
| #include <fst/verify.h> |
| #include <fst/visit.h> |
| |
| namespace fst { |
| |
| // Compute various information about FSTs, helper class for fstinfo.cc. |
| // WARNING: Stand-alone use of this class is not recommended, most code |
| // should call directly the relevant library functions: Fst<A>::NumStates, |
| // Fst<A>::NumArcs, TestProperties, ... |
| template <class A> class FstInfo { |
| public: |
| typedef A Arc; |
| typedef typename A::StateId StateId; |
| typedef typename A::Label Label; |
| typedef typename A::Weight Weight; |
| |
| // When info_type is "short" (or "auto" and not an ExpandedFst) |
| // then only minimal info is computed and can be requested. |
| FstInfo(const Fst<A> &fst, bool test_properties, |
| const string &arc_filter_type = "any", |
| string info_type = "auto", bool verify = true) |
| : fst_type_(fst.Type()), |
| input_symbols_(fst.InputSymbols() ? |
| fst.InputSymbols()->Name() : "none"), |
| output_symbols_(fst.OutputSymbols() ? |
| fst.OutputSymbols()->Name() : "none"), |
| nstates_(0), narcs_(0), start_(kNoStateId), nfinal_(0), |
| nepsilons_(0), niepsilons_(0), noepsilons_(0), |
| naccess_(0), ncoaccess_(0), nconnect_(0), ncc_(0), nscc_(0), |
| input_match_type_(MATCH_NONE), output_match_type_(MATCH_NONE), |
| input_lookahead_(false), output_lookahead_(false), |
| properties_(0), arc_filter_type_(arc_filter_type), long_info_(true) { |
| if (info_type == "long") { |
| long_info_ = true; |
| } else if (info_type == "short") { |
| long_info_ = false; |
| } else if (info_type == "auto") { |
| long_info_ = fst.Properties(kExpanded, false); |
| } else { |
| FSTERROR() << "Bad info type: " << info_type; |
| return; |
| } |
| |
| if (!long_info_) |
| return; |
| |
| // If the FST is not sane, we return. |
| if (verify && !Verify(fst)) { |
| FSTERROR() << "FstInfo: Verify: FST not well-formed."; |
| return; |
| } |
| |
| start_ = fst.Start(); |
| properties_ = fst.Properties(kFstProperties, test_properties); |
| |
| for (StateIterator< Fst<A> > siter(fst); |
| !siter.Done(); |
| siter.Next()) { |
| ++nstates_; |
| StateId s = siter.Value(); |
| if (fst.Final(s) != Weight::Zero()) |
| ++nfinal_; |
| for (ArcIterator< Fst<A> > aiter(fst, s); |
| !aiter.Done(); |
| aiter.Next()) { |
| const A &arc = aiter.Value(); |
| ++narcs_; |
| if (arc.ilabel == 0 && arc.olabel == 0) |
| ++nepsilons_; |
| if (arc.ilabel == 0) |
| ++niepsilons_; |
| if (arc.olabel == 0) |
| ++noepsilons_; |
| } |
| } |
| |
| { |
| vector<StateId> cc; |
| CcVisitor<Arc> cc_visitor(&cc); |
| FifoQueue<StateId> fifo_queue; |
| if (arc_filter_type == "any") { |
| Visit(fst, &cc_visitor, &fifo_queue); |
| } else if (arc_filter_type == "epsilon") { |
| Visit(fst, &cc_visitor, &fifo_queue, EpsilonArcFilter<Arc>()); |
| } else if (arc_filter_type == "iepsilon") { |
| Visit(fst, &cc_visitor, &fifo_queue, InputEpsilonArcFilter<Arc>()); |
| } else if (arc_filter_type == "oepsilon") { |
| Visit(fst, &cc_visitor, &fifo_queue, OutputEpsilonArcFilter<Arc>()); |
| } else { |
| FSTERROR() << "Bad arc filter type: " << arc_filter_type; |
| return; |
| } |
| |
| for (StateId s = 0; s < cc.size(); ++s) { |
| if (cc[s] >= ncc_) |
| ncc_ = cc[s] + 1; |
| } |
| } |
| |
| { |
| vector<StateId> scc; |
| vector<bool> access, coaccess; |
| uint64 props = 0; |
| SccVisitor<Arc> scc_visitor(&scc, &access, &coaccess, &props); |
| if (arc_filter_type == "any") { |
| DfsVisit(fst, &scc_visitor); |
| } else if (arc_filter_type == "epsilon") { |
| DfsVisit(fst, &scc_visitor, EpsilonArcFilter<Arc>()); |
| } else if (arc_filter_type == "iepsilon") { |
| DfsVisit(fst, &scc_visitor, InputEpsilonArcFilter<Arc>()); |
| } else if (arc_filter_type == "oepsilon") { |
| DfsVisit(fst, &scc_visitor, OutputEpsilonArcFilter<Arc>()); |
| } else { |
| FSTERROR() << "Bad arc filter type: " << arc_filter_type; |
| return; |
| } |
| |
| for (StateId s = 0; s < scc.size(); ++s) { |
| if (access[s]) |
| ++naccess_; |
| if (coaccess[s]) |
| ++ncoaccess_; |
| if (access[s] && coaccess[s]) |
| ++nconnect_; |
| if (scc[s] >= nscc_) |
| nscc_ = scc[s] + 1; |
| } |
| } |
| |
| LookAheadMatcher< Fst<A> > imatcher(fst, MATCH_INPUT); |
| input_match_type_ = imatcher.Type(test_properties); |
| input_lookahead_ = imatcher.Flags() & kInputLookAheadMatcher; |
| |
| LookAheadMatcher< Fst<A> > omatcher(fst, MATCH_OUTPUT); |
| output_match_type_ = omatcher.Type(test_properties); |
| output_lookahead_ = omatcher.Flags() & kOutputLookAheadMatcher; |
| } |
| |
| // Short info |
| const string& FstType() const { return fst_type_; } |
| const string& ArcType() const { return A::Type(); } |
| const string& InputSymbols() const { return input_symbols_; } |
| const string& OutputSymbols() const { return output_symbols_; } |
| const bool LongInfo() const { return long_info_; } |
| const string& ArcFilterType() const { return arc_filter_type_; } |
| |
| // Long info |
| MatchType InputMatchType() const { CheckLong(); return input_match_type_; } |
| MatchType OutputMatchType() const { CheckLong(); return output_match_type_; } |
| bool InputLookAhead() const { CheckLong(); return input_lookahead_; } |
| bool OutputLookAhead() const { CheckLong(); return output_lookahead_; } |
| int64 NumStates() const { CheckLong(); return nstates_; } |
| int64 NumArcs() const { CheckLong(); return narcs_; } |
| int64 Start() const { CheckLong(); return start_; } |
| int64 NumFinal() const { CheckLong(); return nfinal_; } |
| int64 NumEpsilons() const { CheckLong(); return nepsilons_; } |
| int64 NumInputEpsilons() const { CheckLong(); return niepsilons_; } |
| int64 NumOutputEpsilons() const { CheckLong(); return noepsilons_; } |
| int64 NumAccessible() const { CheckLong(); return naccess_; } |
| int64 NumCoAccessible() const { CheckLong(); return ncoaccess_; } |
| int64 NumConnected() const { CheckLong(); return nconnect_; } |
| int64 NumCc() const { CheckLong(); return ncc_; } |
| int64 NumScc() const { CheckLong(); return nscc_; } |
| uint64 Properties() const { CheckLong(); return properties_; } |
| |
| private: |
| void CheckLong() const { |
| if (!long_info_) |
| FSTERROR() << "FstInfo: method only available with long info version"; |
| } |
| |
| string fst_type_; |
| string input_symbols_; |
| string output_symbols_; |
| int64 nstates_; |
| int64 narcs_; |
| int64 start_; |
| int64 nfinal_; |
| int64 nepsilons_; |
| int64 niepsilons_; |
| int64 noepsilons_; |
| int64 naccess_; |
| int64 ncoaccess_; |
| int64 nconnect_; |
| int64 ncc_; |
| int64 nscc_; |
| MatchType input_match_type_; |
| MatchType output_match_type_; |
| bool input_lookahead_; |
| bool output_lookahead_; |
| uint64 properties_; |
| string arc_filter_type_; |
| bool long_info_; |
| DISALLOW_COPY_AND_ASSIGN(FstInfo); |
| }; |
| |
| template <class A> |
| void PrintFstInfo(const FstInfo<A> &fstinfo, bool pipe = false) { |
| ostream &os = pipe ? cerr : cout; |
| |
| ios_base::fmtflags old = os.setf(ios::left); |
| os.width(50); |
| os << "fst type" << fstinfo.FstType() << endl; |
| os.width(50); |
| os << "arc type" << fstinfo.ArcType() << endl; |
| os.width(50); |
| os << "input symbol table" << fstinfo.InputSymbols() << endl; |
| os.width(50); |
| os << "output symbol table" << fstinfo.OutputSymbols() << endl; |
| |
| if (!fstinfo.LongInfo()) { |
| os.setf(old); |
| return; |
| } |
| |
| os.width(50); |
| os << "# of states" << fstinfo.NumStates() << endl; |
| os.width(50); |
| os << "# of arcs" << fstinfo.NumArcs() << endl; |
| os.width(50); |
| os << "initial state" << fstinfo.Start() << endl; |
| os.width(50); |
| os << "# of final states" << fstinfo.NumFinal() << endl; |
| os.width(50); |
| os << "# of input/output epsilons" << fstinfo.NumEpsilons() << endl; |
| os.width(50); |
| os << "# of input epsilons" << fstinfo.NumInputEpsilons() << endl; |
| os.width(50); |
| os << "# of output epsilons" << fstinfo.NumOutputEpsilons() << endl; |
| os.width(50); |
| |
| string arc_type = ""; |
| if (fstinfo.ArcFilterType() == "epsilon") |
| arc_type = "epsilon "; |
| else if (fstinfo.ArcFilterType() == "iepsilon") |
| arc_type = "input-epsilon "; |
| else if (fstinfo.ArcFilterType() == "oepsilon") |
| arc_type = "output-epsilon "; |
| |
| string accessible_label = "# of " + arc_type + "accessible states"; |
| os.width(50); |
| os << accessible_label << fstinfo.NumAccessible() << endl; |
| string coaccessible_label = "# of " + arc_type + "coaccessible states"; |
| os.width(50); |
| os << coaccessible_label << fstinfo.NumCoAccessible() << endl; |
| string connected_label = "# of " + arc_type + "connected states"; |
| os.width(50); |
| os << connected_label << fstinfo.NumConnected() << endl; |
| string numcc_label = "# of " + arc_type + "connected components"; |
| os.width(50); |
| os << numcc_label << fstinfo.NumCc() << endl; |
| string numscc_label = "# of " + arc_type + "strongly conn components"; |
| os.width(50); |
| os << numscc_label << fstinfo.NumScc() << endl; |
| |
| os.width(50); |
| os << "input matcher" |
| << (fstinfo.InputMatchType() == MATCH_INPUT ? 'y' : |
| fstinfo.InputMatchType() == MATCH_NONE ? 'n' : '?') << endl; |
| os.width(50); |
| os << "output matcher" |
| << (fstinfo.OutputMatchType() == MATCH_OUTPUT ? 'y' : |
| fstinfo.OutputMatchType() == MATCH_NONE ? 'n' : '?') << endl; |
| os.width(50); |
| os << "input lookahead" |
| << (fstinfo.InputLookAhead() ? 'y' : 'n') << endl; |
| os.width(50); |
| os << "output lookahead" |
| << (fstinfo.OutputLookAhead() ? 'y' : 'n') << endl; |
| |
| uint64 prop = 1; |
| for (int i = 0; i < 64; ++i, prop <<= 1) { |
| if (prop & kBinaryProperties) { |
| char value = 'n'; |
| if (fstinfo.Properties() & prop) value = 'y'; |
| os.width(50); |
| os << PropertyNames[i] << value << endl; |
| } else if (prop & kPosTrinaryProperties) { |
| char value = '?'; |
| if (fstinfo.Properties() & prop) value = 'y'; |
| else if (fstinfo.Properties() & prop << 1) value = 'n'; |
| os.width(50); |
| os << PropertyNames[i] << value << endl; |
| } |
| } |
| os.setf(old); |
| } |
| |
| } // namespace fst |
| |
| #endif // FST_SCRIPT_INFO_IMPL_H_ |