src/llvm-project/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp - toolchain/rustc - Git at Google

 //===--- GrammarBNF.cpp - build grammar from BNF files  ----------*- C++-*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang/Basic/TokenKinds.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/FormatVariadic.h"
 #include <memory>
 #include <utility>

 namespace clang {
 namespace pseudo {

 namespace {
 static const llvm::StringRef OptSuffix = "_opt";
 static const llvm::StringRef StartSymbol = "_";

 // Builds grammar from BNF files.
 class GrammarBuilder {
 public:
   GrammarBuilder(std::vector<std::string> &Diagnostics)
       : Diagnostics(Diagnostics) {}

   Grammar build(llvm::StringRef BNF) {
     auto Specs = eliminateOptional(parse(BNF));

     assert(llvm::all_of(Specs,
                         [](const RuleSpec &R) {
                           if (R.Target.ends_with(OptSuffix))
                             return false;
                           return llvm::all_of(
                               R.Sequence, [](const RuleSpec::Element &E) {
                                 return !E.Symbol.ends_with(OptSuffix);
                               });
                         }) &&
            "Optional symbols should be eliminated!");

     auto T = std::make_unique<GrammarTable>();

     // Assemble the name->ID and ID->nonterminal name maps.
     llvm::DenseSet<llvm::StringRef> UniqueNonterminals;
     llvm::DenseMap<llvm::StringRef, SymbolID> SymbolIds;

     llvm::DenseSet<llvm::StringRef> UniqueAttributeValues;

     for (uint16_t I = 0; I < NumTerminals; ++I)
       SymbolIds.try_emplace(T->Terminals[I], tokenSymbol(tok::TokenKind(I)));
     auto Consider = [&](llvm::StringRef Name) {
       if (!SymbolIds.count(Name))
         UniqueNonterminals.insert(Name);
     };
     for (const auto &Spec : Specs) {
       Consider(Spec.Target);
       for (const RuleSpec::Element &Elt : Spec.Sequence) {
         Consider(Elt.Symbol);
         for (const auto& KV : Elt.Attributes)
            UniqueAttributeValues.insert(KV.second);
       }
     }
     for (llvm::StringRef Name : UniqueNonterminals) {
       T->Nonterminals.emplace_back();
       T->Nonterminals.back().Name = Name.str();
     }
     assert(T->Nonterminals.size() < (1 << (SymbolBits - 1)) &&
            "Too many nonterminals to fit in SymbolID bits!");
     llvm::sort(T->Nonterminals, [](const GrammarTable::Nonterminal &L,
                                    const GrammarTable::Nonterminal &R) {
       return L.Name < R.Name;
     });
     // Add an empty string for the corresponding sentinel unset attribute.
     T->AttributeValues.push_back("");
     UniqueAttributeValues.erase("");
     for (llvm::StringRef Name : UniqueAttributeValues) {
       T->AttributeValues.emplace_back();
       T->AttributeValues.back() = Name.str();
     }
     llvm::sort(T->AttributeValues);
     assert(T->AttributeValues.front() == "");

     // Build name -> ID maps for nonterminals.
     for (SymbolID SID = 0; SID < T->Nonterminals.size(); ++SID)
       SymbolIds.try_emplace(T->Nonterminals[SID].Name, SID);

     // Convert the rules.
     T->Rules.reserve(Specs.size());
     std::vector<SymbolID> Symbols;
     auto Lookup = [SymbolIds](llvm::StringRef Name) {
       auto It = SymbolIds.find(Name);
       assert(It != SymbolIds.end() && "Didn't find the symbol in SymbolIds!");
       return It->second;
     };
     for (const auto &Spec : Specs) {
       assert(Spec.Sequence.size() <= Rule::MaxElements);
       Symbols.clear();
       for (const RuleSpec::Element &Elt : Spec.Sequence)
         Symbols.push_back(Lookup(Elt.Symbol));
       T->Rules.push_back(Rule(Lookup(Spec.Target), Symbols));
       applyAttributes(Spec, *T, T->Rules.back());
     }

     assert(T->Rules.size() < (1 << RuleBits) &&
            "Too many rules to fit in RuleID bits!");
     const auto &SymbolOrder = getTopologicalOrder(T.get());
     llvm::stable_sort(
         T->Rules, [&SymbolOrder](const Rule &Left, const Rule &Right) {
           // Sorted by the topological order of the nonterminal Target.
           return SymbolOrder[Left.Target] < SymbolOrder[Right.Target];
         });
     for (SymbolID SID = 0; SID < T->Nonterminals.size(); ++SID) {
       auto StartIt = llvm::partition_point(T->Rules, [&](const Rule &R) {
         return SymbolOrder[R.Target] < SymbolOrder[SID];
       });
       RuleID Start = StartIt - T->Rules.begin();
       RuleID End = Start;
       while (End < T->Rules.size() && T->Rules[End].Target == SID)
         ++End;
       T->Nonterminals[SID].RuleRange = {Start, End};
     }
     Grammar G(std::move(T));
     diagnoseGrammar(G);
     return G;
   }

   // Gets topological order for nonterminal symbols.
   //
   // The topological order is defined as: if a *single* nonterminal A produces
   // (or transitively) a nonterminal B (that said, there is a production rule
   // B := A), then A is less than B.
   //
   // It returns the sort key for each symbol, the array is indexed by SymbolID.
   std::vector<unsigned> getTopologicalOrder(GrammarTable *T) {
     std::vector<std::pair<SymbolID, SymbolID>> Dependencies;
     for (const auto &Rule : T->Rules) {
       // if A := B, A depends on B.
       if (Rule.Size == 1 && pseudo::isNonterminal(Rule.Sequence[0]))
         Dependencies.push_back({Rule.Target, Rule.Sequence[0]});
     }
     llvm::sort(Dependencies);
     std::vector<SymbolID> Order;
     // Each nonterminal state flows: NotVisited -> Visiting -> Visited.
     enum State {
       NotVisited,
       Visiting,
       Visited,
     };
     std::vector<State> VisitStates(T->Nonterminals.size(), NotVisited);
     std::function<void(SymbolID)> DFS = [&](SymbolID SID) -> void {
       if (VisitStates[SID] == Visited)
         return;
       if (VisitStates[SID] == Visiting) {
         Diagnostics.push_back(
             llvm::formatv("The grammar contains a cycle involving symbol {0}",
                           T->Nonterminals[SID].Name));
         return;
       }
       VisitStates[SID] = Visiting;
       for (auto It = llvm::lower_bound(Dependencies,
                                        std::pair<SymbolID, SymbolID>{SID, 0});
            It != Dependencies.end() && It->first == SID; ++It)
         DFS(It->second);
       VisitStates[SID] = Visited;
       Order.push_back(SID);
     };
     for (SymbolID ID = 0; ID != T->Nonterminals.size(); ++ID)
       DFS(ID);
     std::vector<unsigned> Result(T->Nonterminals.size(), 0);
     for (size_t I = 0; I < Order.size(); ++I)
       Result[Order[I]] = I;
     return Result;
   }

 private:
   // Text representation of a BNF grammar rule.
   struct RuleSpec {
     llvm::StringRef Target;
     struct Element {
       llvm::StringRef Symbol; // Name of the symbol
       // Attributes that are associated to the sequence symbol or rule.
       std::vector<std::pair<llvm::StringRef/*Key*/, llvm::StringRef/*Value*/>>
           Attributes;
     };
     std::vector<Element> Sequence;

     std::string toString() const {
       std::vector<llvm::StringRef> Body;
       for (const auto &E : Sequence)
         Body.push_back(E.Symbol);
       return llvm::formatv("{0} := {1}", Target, llvm::join(Body, " "));
     }
   };

   std::vector<RuleSpec> parse(llvm::StringRef Lines) {
     std::vector<RuleSpec> Specs;
     for (llvm::StringRef Line : llvm::split(Lines, '\n')) {
       Line = Line.trim();
       // Strip anything coming after the '#' (comment).
       Line = Line.take_while([](char C) { return C != '#'; });
       if (Line.empty())
         continue;
       RuleSpec Rule;
       if (parseLine(Line, Rule))
         Specs.push_back(std::move(Rule));
     }
     return Specs;
   }

   bool parseLine(llvm::StringRef Line, RuleSpec &Out) {
     auto Parts = Line.split(":=");
     if (Parts.first == Line) { // no separator in Line
       Diagnostics.push_back(
           llvm::formatv("Failed to parse '{0}': no separator :=", Line).str());
       return false;
     }

     Out.Target = Parts.first.trim();
     Out.Sequence.clear();
     for (llvm::StringRef Chunk : llvm::split(Parts.second, ' ')) {
       Chunk = Chunk.trim();
       if (Chunk.empty())
         continue; // skip empty
       if (Chunk.starts_with("[") && Chunk.ends_with("]")) {
         if (Out.Sequence.empty())
           continue;

         parseAttributes(Chunk, Out.Sequence.back().Attributes);
         continue;
       }

       Out.Sequence.push_back({Chunk, /*Attributes=*/{}});
     }
     return true;
   }

   bool parseAttributes(
       llvm::StringRef Content,
       std::vector<std::pair<llvm::StringRef, llvm::StringRef>> &Out) {
     assert(Content.starts_with("[") && Content.ends_with("]"));
     auto KV = Content.drop_front().drop_back().split('=');
     Out.push_back({KV.first, KV.second.trim()});

     return true;
   }
   // Apply the parsed extensions (stored in RuleSpec) to the grammar Rule.
   void applyAttributes(const RuleSpec& Spec, const GrammarTable& T, Rule& R) {
     auto LookupExtensionID = [&T](llvm::StringRef Name) {
       const auto It = llvm::partition_point(
           T.AttributeValues, [&](llvm::StringRef X) { return X < Name; });
       assert(It != T.AttributeValues.end() && *It == Name &&
              "Didn't find the attribute in AttrValues!");
       return It - T.AttributeValues.begin();
     };
     for (unsigned I = 0; I < Spec.Sequence.size(); ++I) {
       for (const auto &KV : Spec.Sequence[I].Attributes) {
         if (KV.first == "guard") {
           R.Guarded = true;
         } else if (KV.first == "recover") {
           R.Recovery = LookupExtensionID(KV.second);
           R.RecoveryIndex = I;
         } else {
           Diagnostics.push_back(
               llvm::formatv("Unknown attribute '{0}'", KV.first).str());
         }
       }
     }
   }

   // Inlines all _opt symbols.
   // For example, a rule E := id +_opt id, after elimination, we have two
   // equivalent rules:
   //   1) E := id + id
   //   2) E := id id
   std::vector<RuleSpec> eliminateOptional(llvm::ArrayRef<RuleSpec> Input) {
     std::vector<RuleSpec> Results;
     std::vector<RuleSpec::Element> Storage;
     for (const auto &R : Input) {
       eliminateOptionalTail(
           R.Sequence, Storage, [&Results, &Storage, &R, this]() {
             if (Storage.empty()) {
               Diagnostics.push_back(
                   llvm::formatv("Rule '{0}' has a nullable RHS", R.toString()));
               return;
             }
             Results.push_back({R.Target, Storage});
           });
       assert(Storage.empty());
     }
     return Results;
   }
   void eliminateOptionalTail(llvm::ArrayRef<RuleSpec::Element> Elements,
                              std::vector<RuleSpec::Element> &Result,
                              llvm::function_ref<void()> CB) {
     if (Elements.empty())
       return CB();
     auto Front = Elements.front();
     if (!Front.Symbol.ends_with(OptSuffix)) {
       Result.push_back(std::move(Front));
       eliminateOptionalTail(Elements.drop_front(1), Result, CB);
       Result.pop_back();
       return;
     }
     // Enumerate two options: skip the opt symbol, or inline the symbol.
     eliminateOptionalTail(Elements.drop_front(1), Result, CB); // skip
     Front.Symbol = Front.Symbol.drop_back(OptSuffix.size());   // drop "_opt"
     Result.push_back(std::move(Front));
     eliminateOptionalTail(Elements.drop_front(1), Result, CB);
     Result.pop_back();
   }

   // Diagnoses the grammar and emit warnings if any.
   void diagnoseGrammar(const Grammar &G) {
     const auto &T = G.table();
     for (SymbolID SID = 0; SID < T.Nonterminals.size(); ++SID) {
       auto Range = T.Nonterminals[SID].RuleRange;
       if (Range.Start == Range.End)
         Diagnostics.push_back(
             llvm::formatv("No rules for nonterminal: {0}", G.symbolName(SID)));
       llvm::StringRef NameRef = T.Nonterminals[SID].Name;
       if (llvm::all_of(NameRef, llvm::isAlpha) && NameRef.upper() == NameRef) {
         Diagnostics.push_back(llvm::formatv(
             "Token-like name {0} is used as a nonterminal", G.symbolName(SID)));
       }
     }
     llvm::DenseSet<llvm::hash_code> VisitedRules;
     for (RuleID RID = 0; RID < T.Rules.size(); ++RID) {
       const auto &R = T.Rules[RID];
       auto Code = llvm::hash_combine(
           R.Target, llvm::hash_combine_range(R.seq().begin(), R.seq().end()));
       auto [_, New] = VisitedRules.insert(Code);
       if (!New)
         Diagnostics.push_back(
             llvm::formatv("Duplicate rule: `{0}`", G.dumpRule(RID)));
     }
     // symbol-id -> used counts
     std::vector<unsigned> UseCounts(T.Nonterminals.size(), 0);
     for (const Rule &R : T.Rules)
       for (SymbolID SID : R.seq())
         if (isNonterminal(SID))
           ++UseCounts[SID];
     for (SymbolID SID = 0; SID < UseCounts.size(); ++SID)
       if (UseCounts[SID] == 0 && T.Nonterminals[SID].Name != StartSymbol)
         Diagnostics.push_back(
             llvm::formatv("Nonterminal never used: {0}", G.symbolName(SID)));
   }
   std::vector<std::string> &Diagnostics;
 };
 } // namespace

 Grammar Grammar::parseBNF(llvm::StringRef BNF,
                           std::vector<std::string> &Diagnostics) {
   Diagnostics.clear();
   return GrammarBuilder(Diagnostics).build(BNF);
 }

 } // namespace pseudo
 } // namespace clang
	//===--- GrammarBNF.cpp - build grammar from BNF files ----------- C++--===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "clang-pseudo/grammar/Grammar.h"
	#include "clang/Basic/TokenKinds.h"
	#include "llvm/ADT/DenseSet.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/Support/FormatVariadic.h"
	#include <memory>
	#include <utility>

	namespace clang {
	namespace pseudo {

	namespace {
	static const llvm::StringRef OptSuffix = "_opt";
	static const llvm::StringRef StartSymbol = "_";

	// Builds grammar from BNF files.
	class GrammarBuilder {
	public:
	GrammarBuilder(std::vector<std::string> &Diagnostics)
	: Diagnostics(Diagnostics) {}

	Grammar build(llvm::StringRef BNF) {
	auto Specs = eliminateOptional(parse(BNF));

	assert(llvm::all_of(Specs,
	[](const RuleSpec &R) {
	if (R.Target.ends_with(OptSuffix))
	return false;
	return llvm::all_of(
	R.Sequence, [](const RuleSpec::Element &E) {
	return !E.Symbol.ends_with(OptSuffix);
	});
	}) &&
	"Optional symbols should be eliminated!");

	auto T = std::make_unique<GrammarTable>();

	// Assemble the name->ID and ID->nonterminal name maps.
	llvm::DenseSet<llvm::StringRef> UniqueNonterminals;
	llvm::DenseMap<llvm::StringRef, SymbolID> SymbolIds;

	llvm::DenseSet<llvm::StringRef> UniqueAttributeValues;

	for (uint16_t I = 0; I < NumTerminals; ++I)
	SymbolIds.try_emplace(T->Terminals[I], tokenSymbol(tok::TokenKind(I)));
	auto Consider = [&](llvm::StringRef Name) {
	if (!SymbolIds.count(Name))
	UniqueNonterminals.insert(Name);
	};
	for (const auto &Spec : Specs) {
	Consider(Spec.Target);
	for (const RuleSpec::Element &Elt : Spec.Sequence) {
	Consider(Elt.Symbol);
	for (const auto& KV : Elt.Attributes)
	UniqueAttributeValues.insert(KV.second);
	}
	}
	for (llvm::StringRef Name : UniqueNonterminals) {
	T->Nonterminals.emplace_back();
	T->Nonterminals.back().Name = Name.str();
	}
	assert(T->Nonterminals.size() < (1 << (SymbolBits - 1)) &&
	"Too many nonterminals to fit in SymbolID bits!");
	llvm::sort(T->Nonterminals, [](const GrammarTable::Nonterminal &L,
	const GrammarTable::Nonterminal &R) {
	return L.Name < R.Name;
	});
	// Add an empty string for the corresponding sentinel unset attribute.
	T->AttributeValues.push_back("");
	UniqueAttributeValues.erase("");
	for (llvm::StringRef Name : UniqueAttributeValues) {
	T->AttributeValues.emplace_back();
	T->AttributeValues.back() = Name.str();
	}
	llvm::sort(T->AttributeValues);
	assert(T->AttributeValues.front() == "");

	// Build name -> ID maps for nonterminals.
	for (SymbolID SID = 0; SID < T->Nonterminals.size(); ++SID)
	SymbolIds.try_emplace(T->Nonterminals[SID].Name, SID);

	// Convert the rules.
	T->Rules.reserve(Specs.size());
	std::vector<SymbolID> Symbols;
	auto Lookup = [SymbolIds](llvm::StringRef Name) {
	auto It = SymbolIds.find(Name);
	assert(It != SymbolIds.end() && "Didn't find the symbol in SymbolIds!");
	return It->second;
	};
	for (const auto &Spec : Specs) {
	assert(Spec.Sequence.size() <= Rule::MaxElements);
	Symbols.clear();
	for (const RuleSpec::Element &Elt : Spec.Sequence)
	Symbols.push_back(Lookup(Elt.Symbol));
	T->Rules.push_back(Rule(Lookup(Spec.Target), Symbols));
	applyAttributes(Spec, *T, T->Rules.back());
	}

	assert(T->Rules.size() < (1 << RuleBits) &&
	"Too many rules to fit in RuleID bits!");
	const auto &SymbolOrder = getTopologicalOrder(T.get());
	llvm::stable_sort(
	T->Rules, [&SymbolOrder](const Rule &Left, const Rule &Right) {
	// Sorted by the topological order of the nonterminal Target.
	return SymbolOrder[Left.Target] < SymbolOrder[Right.Target];
	});
	for (SymbolID SID = 0; SID < T->Nonterminals.size(); ++SID) {
	auto StartIt = llvm::partition_point(T->Rules, [&](const Rule &R) {
	return SymbolOrder[R.Target] < SymbolOrder[SID];
	});
	RuleID Start = StartIt - T->Rules.begin();
	RuleID End = Start;
	while (End < T->Rules.size() && T->Rules[End].Target == SID)
	++End;
	T->Nonterminals[SID].RuleRange = {Start, End};
	}
	Grammar G(std::move(T));
	diagnoseGrammar(G);
	return G;
	}

	// Gets topological order for nonterminal symbols.
	//
	// The topological order is defined as: if a single nonterminal A produces
	// (or transitively) a nonterminal B (that said, there is a production rule
	// B := A), then A is less than B.
	//
	// It returns the sort key for each symbol, the array is indexed by SymbolID.
	std::vector<unsigned> getTopologicalOrder(GrammarTable *T) {
	std::vector<std::pair<SymbolID, SymbolID>> Dependencies;
	for (const auto &Rule : T->Rules) {
	// if A := B, A depends on B.
	if (Rule.Size == 1 && pseudo::isNonterminal(Rule.Sequence[0]))
	Dependencies.push_back({Rule.Target, Rule.Sequence[0]});
	}
	llvm::sort(Dependencies);
	std::vector<SymbolID> Order;
	// Each nonterminal state flows: NotVisited -> Visiting -> Visited.
	enum State {
	NotVisited,
	Visiting,
	Visited,
	};
	std::vector<State> VisitStates(T->Nonterminals.size(), NotVisited);
	std::function<void(SymbolID)> DFS = [&](SymbolID SID) -> void {
	if (VisitStates[SID] == Visited)
	return;
	if (VisitStates[SID] == Visiting) {
	Diagnostics.push_back(
	llvm::formatv("The grammar contains a cycle involving symbol {0}",
	T->Nonterminals[SID].Name));
	return;
	}
	VisitStates[SID] = Visiting;
	for (auto It = llvm::lower_bound(Dependencies,
	std::pair<SymbolID, SymbolID>{SID, 0});
	It != Dependencies.end() && It->first == SID; ++It)
	DFS(It->second);
	VisitStates[SID] = Visited;
	Order.push_back(SID);
	};
	for (SymbolID ID = 0; ID != T->Nonterminals.size(); ++ID)
	DFS(ID);
	std::vector<unsigned> Result(T->Nonterminals.size(), 0);
	for (size_t I = 0; I < Order.size(); ++I)
	Result[Order[I]] = I;
	return Result;
	}

	private:
	// Text representation of a BNF grammar rule.
	struct RuleSpec {
	llvm::StringRef Target;
	struct Element {
	llvm::StringRef Symbol; // Name of the symbol
	// Attributes that are associated to the sequence symbol or rule.
	std::vector<std::pair<llvm::StringRef/Key/, llvm::StringRef/Value/>>
	Attributes;
	};
	std::vector<Element> Sequence;

	std::string toString() const {
	std::vector<llvm::StringRef> Body;
	for (const auto &E : Sequence)
	Body.push_back(E.Symbol);
	return llvm::formatv("{0} := {1}", Target, llvm::join(Body, " "));
	}
	};

	std::vector<RuleSpec> parse(llvm::StringRef Lines) {
	std::vector<RuleSpec> Specs;
	for (llvm::StringRef Line : llvm::split(Lines, '\n')) {
	Line = Line.trim();
	// Strip anything coming after the '#' (comment).
	Line = Line.take_while([](char C) { return C != '#'; });
	if (Line.empty())
	continue;
	RuleSpec Rule;
	if (parseLine(Line, Rule))
	Specs.push_back(std::move(Rule));
	}
	return Specs;
	}

	bool parseLine(llvm::StringRef Line, RuleSpec &Out) {
	auto Parts = Line.split(":=");
	if (Parts.first == Line) { // no separator in Line
	Diagnostics.push_back(
	llvm::formatv("Failed to parse '{0}': no separator :=", Line).str());
	return false;
	}

	Out.Target = Parts.first.trim();
	Out.Sequence.clear();
	for (llvm::StringRef Chunk : llvm::split(Parts.second, ' ')) {
	Chunk = Chunk.trim();
	if (Chunk.empty())
	continue; // skip empty
	if (Chunk.starts_with("[") && Chunk.ends_with("]")) {
	if (Out.Sequence.empty())
	continue;

	parseAttributes(Chunk, Out.Sequence.back().Attributes);
	continue;
	}

	Out.Sequence.push_back({Chunk, /Attributes=/{}});
	}
	return true;
	}

	bool parseAttributes(
	llvm::StringRef Content,
	std::vector<std::pair<llvm::StringRef, llvm::StringRef>> &Out) {
	assert(Content.starts_with("[") && Content.ends_with("]"));
	auto KV = Content.drop_front().drop_back().split('=');
	Out.push_back({KV.first, KV.second.trim()});

	return true;
	}
	// Apply the parsed extensions (stored in RuleSpec) to the grammar Rule.
	void applyAttributes(const RuleSpec& Spec, const GrammarTable& T, Rule& R) {
	auto LookupExtensionID = [&T](llvm::StringRef Name) {
	const auto It = llvm::partition_point(
	T.AttributeValues, [&](llvm::StringRef X) { return X < Name; });
	assert(It != T.AttributeValues.end() && *It == Name &&
	"Didn't find the attribute in AttrValues!");
	return It - T.AttributeValues.begin();
	};
	for (unsigned I = 0; I < Spec.Sequence.size(); ++I) {
	for (const auto &KV : Spec.Sequence[I].Attributes) {
	if (KV.first == "guard") {
	R.Guarded = true;
	} else if (KV.first == "recover") {
	R.Recovery = LookupExtensionID(KV.second);
	R.RecoveryIndex = I;
	} else {
	Diagnostics.push_back(
	llvm::formatv("Unknown attribute '{0}'", KV.first).str());
	}
	}
	}
	}

	// Inlines all _opt symbols.
	// For example, a rule E := id +_opt id, after elimination, we have two
	// equivalent rules:
	// 1) E := id + id
	// 2) E := id id
	std::vector<RuleSpec> eliminateOptional(llvm::ArrayRef<RuleSpec> Input) {
	std::vector<RuleSpec> Results;
	std::vector<RuleSpec::Element> Storage;
	for (const auto &R : Input) {
	eliminateOptionalTail(
	R.Sequence, Storage, [&Results, &Storage, &R, this]() {
	if (Storage.empty()) {
	Diagnostics.push_back(
	llvm::formatv("Rule '{0}' has a nullable RHS", R.toString()));
	return;
	}
	Results.push_back({R.Target, Storage});
	});
	assert(Storage.empty());
	}
	return Results;
	}
	void eliminateOptionalTail(llvm::ArrayRef<RuleSpec::Element> Elements,
	std::vector<RuleSpec::Element> &Result,
	llvm::function_ref<void()> CB) {
	if (Elements.empty())
	return CB();
	auto Front = Elements.front();
	if (!Front.Symbol.ends_with(OptSuffix)) {
	Result.push_back(std::move(Front));
	eliminateOptionalTail(Elements.drop_front(1), Result, CB);
	Result.pop_back();
	return;
	}
	// Enumerate two options: skip the opt symbol, or inline the symbol.
	eliminateOptionalTail(Elements.drop_front(1), Result, CB); // skip
	Front.Symbol = Front.Symbol.drop_back(OptSuffix.size()); // drop "_opt"
	Result.push_back(std::move(Front));
	eliminateOptionalTail(Elements.drop_front(1), Result, CB);
	Result.pop_back();
	}

	// Diagnoses the grammar and emit warnings if any.
	void diagnoseGrammar(const Grammar &G) {
	const auto &T = G.table();
	for (SymbolID SID = 0; SID < T.Nonterminals.size(); ++SID) {
	auto Range = T.Nonterminals[SID].RuleRange;
	if (Range.Start == Range.End)
	Diagnostics.push_back(
	llvm::formatv("No rules for nonterminal: {0}", G.symbolName(SID)));
	llvm::StringRef NameRef = T.Nonterminals[SID].Name;
	if (llvm::all_of(NameRef, llvm::isAlpha) && NameRef.upper() == NameRef) {
	Diagnostics.push_back(llvm::formatv(
	"Token-like name {0} is used as a nonterminal", G.symbolName(SID)));
	}
	}
	llvm::DenseSet<llvm::hash_code> VisitedRules;
	for (RuleID RID = 0; RID < T.Rules.size(); ++RID) {
	const auto &R = T.Rules[RID];
	auto Code = llvm::hash_combine(
	R.Target, llvm::hash_combine_range(R.seq().begin(), R.seq().end()));
	auto [_, New] = VisitedRules.insert(Code);
	if (!New)
	Diagnostics.push_back(
	llvm::formatv("Duplicate rule: `{0}`", G.dumpRule(RID)));
	}
	// symbol-id -> used counts
	std::vector<unsigned> UseCounts(T.Nonterminals.size(), 0);
	for (const Rule &R : T.Rules)
	for (SymbolID SID : R.seq())
	if (isNonterminal(SID))
	++UseCounts[SID];
	for (SymbolID SID = 0; SID < UseCounts.size(); ++SID)
	if (UseCounts[SID] == 0 && T.Nonterminals[SID].Name != StartSymbol)
	Diagnostics.push_back(
	llvm::formatv("Nonterminal never used: {0}", G.symbolName(SID)));
	}
	std::vector<std::string> &Diagnostics;
	};
	} // namespace

	Grammar Grammar::parseBNF(llvm::StringRef BNF,
	std::vector<std::string> &Diagnostics) {
	Diagnostics.clear();
	return GrammarBuilder(Diagnostics).build(BNF);
	}

	} // namespace pseudo
	} // namespace clang