| //===-- Relooper.cpp - Top-level interface for WebAssembly ----*- C++ -*-===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===---------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// \brief This implements the Relooper algorithm. This implementation includes |
| /// optimizations added since the original academic paper [1] was published. |
| /// |
| /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In |
| /// Proceedings of the ACM international conference companion on Object |
| /// oriented programming systems languages and applications companion |
| /// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 |
| /// http://doi.acm.org/10.1145/2048147.2048224 |
| /// |
| //===-------------------------------------------------------------------===// |
| |
| #include "Relooper.h" |
| #include "WebAssembly.h" |
| |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/IR/CFG.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/Pass.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| #include <cstring> |
| #include <cstdlib> |
| #include <functional> |
| #include <list> |
| #include <stack> |
| #include <string> |
| |
| #define DEBUG_TYPE "relooper" |
| |
| using namespace llvm; |
| using namespace Relooper; |
| |
| static cl::opt<int> RelooperSplittingFactor( |
| "relooper-splitting-factor", |
| cl::desc( |
| "How much to discount code size when deciding whether to split a node"), |
| cl::init(5)); |
| |
| static cl::opt<unsigned> RelooperMultipleSwitchThreshold( |
| "relooper-multiple-switch-threshold", |
| cl::desc( |
| "How many entries to allow in a multiple before we use a switch"), |
| cl::init(10)); |
| |
| static cl::opt<unsigned> RelooperNestingLimit( |
| "relooper-nesting-limit", |
| cl::desc( |
| "How much nesting is acceptable"), |
| cl::init(20)); |
| |
| |
| namespace { |
| /// |
| /// Implements the relooper algorithm for a function's blocks. |
| /// |
| /// Implementation details: The Relooper instance has |
| /// ownership of the blocks and shapes, and frees them when done. |
| /// |
| struct RelooperAlgorithm { |
| std::deque<Block *> Blocks; |
| std::deque<Shape *> Shapes; |
| Shape *Root; |
| bool MinSize; |
| int BlockIdCounter; |
| int ShapeIdCounter; |
| |
| RelooperAlgorithm(); |
| ~RelooperAlgorithm(); |
| |
| void AddBlock(Block *New, int Id = -1); |
| |
| // Calculates the shapes |
| void Calculate(Block *Entry); |
| |
| // Sets us to try to minimize size |
| void SetMinSize(bool MinSize_) { MinSize = MinSize_; } |
| }; |
| |
| struct RelooperAnalysis final : public FunctionPass { |
| static char ID; |
| RelooperAnalysis() : FunctionPass(ID) {} |
| const char *getPassName() const override { return "relooper"; } |
| void getAnalysisUsage(AnalysisUsage &AU) const override { |
| AU.setPreservesAll(); |
| } |
| bool runOnFunction(Function &F) override; |
| }; |
| } |
| |
| // RelooperAnalysis |
| |
| char RelooperAnalysis::ID = 0; |
| FunctionPass *llvm::createWebAssemblyRelooper() { |
| return new RelooperAnalysis(); |
| } |
| |
| bool RelooperAnalysis::runOnFunction(Function &F) { |
| DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n"); |
| RelooperAlgorithm R; |
| // FIXME: remove duplication between relooper's and LLVM's BBs. |
| std::map<const BasicBlock *, Block *> BB2B; |
| std::map<const Block *, const BasicBlock *> B2BB; |
| for (const BasicBlock &BB : F) { |
| // FIXME: getName is wrong here, Code is meant to represent amount of code. |
| // FIXME: use BranchVarInit for switch. |
| Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr); |
| R.AddBlock(B); |
| assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice"); |
| assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice"); |
| BB2B[&BB] = B; |
| B2BB[B] = &BB; |
| } |
| for (Block *B : R.Blocks) { |
| const BasicBlock *BB = B2BB[B]; |
| for (const BasicBlock *Successor : successors(BB)) |
| // FIXME: add branch's Condition and Code below. |
| B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr); |
| } |
| R.Calculate(BB2B[&F.getEntryBlock()]); |
| return false; // Analysis passes don't modify anything. |
| } |
| |
| // Helpers |
| |
| typedef MapVector<Block *, BlockSet> BlockBlockSetMap; |
| typedef std::list<Block *> BlockList; |
| |
| template <class T, class U> |
| static bool contains(const T &container, const U &contained) { |
| return container.count(contained); |
| } |
| |
| |
| // Branch |
| |
| Branch::Branch(const char *ConditionInit, const char *CodeInit) |
| : Ancestor(nullptr), Labeled(true) { |
| // FIXME: move from char* to LLVM data structures |
| Condition = ConditionInit ? strdup(ConditionInit) : nullptr; |
| Code = CodeInit ? strdup(CodeInit) : nullptr; |
| } |
| |
| Branch::~Branch() { |
| // FIXME: move from char* to LLVM data structures |
| free(static_cast<void *>(const_cast<char *>(Condition))); |
| free(static_cast<void *>(const_cast<char *>(Code))); |
| } |
| |
| // Block |
| |
| Block::Block(const char *CodeInit, const char *BranchVarInit) |
| : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) { |
| // FIXME: move from char* to LLVM data structures |
| Code = strdup(CodeInit); |
| BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr; |
| } |
| |
| Block::~Block() { |
| // FIXME: move from char* to LLVM data structures |
| free(static_cast<void *>(const_cast<char *>(Code))); |
| free(static_cast<void *>(const_cast<char *>(BranchVar))); |
| } |
| |
| void Block::AddBranchTo(Block *Target, const char *Condition, |
| const char *Code) { |
| assert(!contains(BranchesOut, Target) && |
| "cannot add more than one branch to the same target"); |
| BranchesOut[Target] = make_unique<Branch>(Condition, Code); |
| } |
| |
| // Relooper |
| |
| RelooperAlgorithm::RelooperAlgorithm() |
| : Root(nullptr), MinSize(false), BlockIdCounter(1), |
| ShapeIdCounter(0) { // block ID 0 is reserved for clearings |
| } |
| |
| RelooperAlgorithm::~RelooperAlgorithm() { |
| for (auto Curr : Blocks) |
| delete Curr; |
| for (auto Curr : Shapes) |
| delete Curr; |
| } |
| |
| void RelooperAlgorithm::AddBlock(Block *New, int Id) { |
| New->Id = Id == -1 ? BlockIdCounter++ : Id; |
| Blocks.push_back(New); |
| } |
| |
| struct RelooperRecursor { |
| RelooperAlgorithm *Parent; |
| RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {} |
| }; |
| |
| void RelooperAlgorithm::Calculate(Block *Entry) { |
| // Scan and optimize the input |
| struct PreOptimizer : public RelooperRecursor { |
| PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {} |
| BlockSet Live; |
| |
| void FindLive(Block *Root) { |
| BlockList ToInvestigate; |
| ToInvestigate.push_back(Root); |
| while (!ToInvestigate.empty()) { |
| Block *Curr = ToInvestigate.front(); |
| ToInvestigate.pop_front(); |
| if (contains(Live, Curr)) |
| continue; |
| Live.insert(Curr); |
| for (const auto &iter : Curr->BranchesOut) |
| ToInvestigate.push_back(iter.first); |
| } |
| } |
| |
| // If a block has multiple entries but no exits, and it is small enough, it |
| // is useful to split it. A common example is a C++ function where |
| // everything ends up at a final exit block and does some RAII cleanup. |
| // Without splitting, we will be forced to introduce labelled loops to |
| // allow reaching the final block |
| void SplitDeadEnds() { |
| unsigned TotalCodeSize = 0; |
| for (const auto &Curr : Live) { |
| TotalCodeSize += strlen(Curr->Code); |
| } |
| BlockSet Splits; |
| BlockSet Removed; |
| for (const auto &Original : Live) { |
| if (Original->BranchesIn.size() <= 1 || |
| !Original->BranchesOut.empty()) |
| continue; // only dead ends, for now |
| if (contains(Original->BranchesOut, Original)) |
| continue; // cannot split a looping node |
| if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) > |
| TotalCodeSize / RelooperSplittingFactor) |
| continue; // if splitting increases raw code size by a significant |
| // amount, abort |
| // Split the node (for simplicity, we replace all the blocks, even |
| // though we could have reused the original) |
| DEBUG(dbgs() << " Splitting '" << Original->Code << "'\n"); |
| for (const auto &Prior : Original->BranchesIn) { |
| Block *Split = new Block(Original->Code, Original->BranchVar); |
| Parent->AddBlock(Split, Original->Id); |
| Split->BranchesIn.insert(Prior); |
| std::unique_ptr<Branch> Details; |
| Details.swap(Prior->BranchesOut[Original]); |
| Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition, |
| Details->Code); |
| for (const auto &iter : Original->BranchesOut) { |
| Block *Post = iter.first; |
| Branch *Details = iter.second.get(); |
| Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition, |
| Details->Code); |
| Post->BranchesIn.insert(Split); |
| } |
| Splits.insert(Split); |
| Removed.insert(Original); |
| } |
| for (const auto &iter : Original->BranchesOut) { |
| Block *Post = iter.first; |
| Post->BranchesIn.remove(Original); |
| } |
| } |
| for (const auto &iter : Splits) |
| Live.insert(iter); |
| for (const auto &iter : Removed) |
| Live.remove(iter); |
| } |
| }; |
| PreOptimizer Pre(this); |
| Pre.FindLive(Entry); |
| |
| // Add incoming branches from live blocks, ignoring dead code |
| for (unsigned i = 0; i < Blocks.size(); i++) { |
| Block *Curr = Blocks[i]; |
| if (!contains(Pre.Live, Curr)) |
| continue; |
| for (const auto &iter : Curr->BranchesOut) |
| iter.first->BranchesIn.insert(Curr); |
| } |
| |
| if (!MinSize) |
| Pre.SplitDeadEnds(); |
| |
| // Recursively process the graph |
| |
| struct Analyzer : public RelooperRecursor { |
| Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {} |
| |
| // Add a shape to the list of shapes in this Relooper calculation |
| void Notice(Shape *New) { |
| New->Id = Parent->ShapeIdCounter++; |
| Parent->Shapes.push_back(New); |
| } |
| |
| // Create a list of entries from a block. If LimitTo is provided, only |
| // results in that set will appear |
| void GetBlocksOut(Block *Source, BlockSet &Entries, |
| BlockSet *LimitTo = nullptr) { |
| for (const auto &iter : Source->BranchesOut) |
| if (!LimitTo || contains(*LimitTo, iter.first)) |
| Entries.insert(iter.first); |
| } |
| |
| // Converts/processes all branchings to a specific target |
| void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor, |
| BlockSet &From) { |
| DEBUG(dbgs() << " Solipsize '" << Target->Code << "' type " << Type |
| << "\n"); |
| for (auto iter = Target->BranchesIn.begin(); |
| iter != Target->BranchesIn.end();) { |
| Block *Prior = *iter; |
| if (!contains(From, Prior)) { |
| iter++; |
| continue; |
| } |
| std::unique_ptr<Branch> PriorOut; |
| PriorOut.swap(Prior->BranchesOut[Target]); |
| PriorOut->Ancestor = Ancestor; |
| PriorOut->Type = Type; |
| if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor)) |
| Multiple->Breaks++; // We are breaking out of this Multiple, so need a |
| // loop |
| iter++; // carefully increment iter before erasing |
| Target->BranchesIn.remove(Prior); |
| Target->ProcessedBranchesIn.insert(Prior); |
| Prior->ProcessedBranchesOut[Target].swap(PriorOut); |
| } |
| } |
| |
| Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) { |
| DEBUG(dbgs() << " MakeSimple inner block '" << Inner->Code << "'\n"); |
| SimpleShape *Simple = new SimpleShape; |
| Notice(Simple); |
| Simple->Inner = Inner; |
| Inner->Parent = Simple; |
| if (Blocks.size() > 1) { |
| Blocks.remove(Inner); |
| GetBlocksOut(Inner, NextEntries, &Blocks); |
| BlockSet JustInner; |
| JustInner.insert(Inner); |
| for (const auto &iter : NextEntries) |
| Solipsize(iter, Branch::Direct, Simple, JustInner); |
| } |
| return Simple; |
| } |
| |
| Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries, |
| BlockSet &NextEntries) { |
| // Find the inner blocks in this loop. Proceed backwards from the entries |
| // until |
| // you reach a seen block, collecting as you go. |
| BlockSet InnerBlocks; |
| BlockSet Queue = Entries; |
| while (!Queue.empty()) { |
| Block *Curr = *(Queue.begin()); |
| Queue.remove(*Queue.begin()); |
| if (!contains(InnerBlocks, Curr)) { |
| // This element is new, mark it as inner and remove from outer |
| InnerBlocks.insert(Curr); |
| Blocks.remove(Curr); |
| // Add the elements prior to it |
| for (const auto &iter : Curr->BranchesIn) |
| Queue.insert(iter); |
| } |
| } |
| assert(!InnerBlocks.empty()); |
| |
| for (const auto &Curr : InnerBlocks) { |
| for (const auto &iter : Curr->BranchesOut) { |
| Block *Possible = iter.first; |
| if (!contains(InnerBlocks, Possible)) |
| NextEntries.insert(Possible); |
| } |
| } |
| |
| LoopShape *Loop = new LoopShape(); |
| Notice(Loop); |
| |
| // Solipsize the loop, replacing with break/continue and marking branches |
| // as Processed (will not affect later calculations) |
| // A. Branches to the loop entries become a continue to this shape |
| for (const auto &iter : Entries) |
| Solipsize(iter, Branch::Continue, Loop, InnerBlocks); |
| // B. Branches to outside the loop (a next entry) become breaks on this |
| // shape |
| for (const auto &iter : NextEntries) |
| Solipsize(iter, Branch::Break, Loop, InnerBlocks); |
| // Finish up |
| Shape *Inner = Process(InnerBlocks, Entries, nullptr); |
| Loop->Inner = Inner; |
| return Loop; |
| } |
| |
| // For each entry, find the independent group reachable by it. The |
| // independent group is the entry itself, plus all the blocks it can |
| // reach that cannot be directly reached by another entry. Note that we |
| // ignore directly reaching the entry itself by another entry. |
| // @param Ignore - previous blocks that are irrelevant |
| void FindIndependentGroups(BlockSet &Entries, |
| BlockBlockSetMap &IndependentGroups, |
| BlockSet *Ignore = nullptr) { |
| typedef std::map<Block *, Block *> BlockBlockMap; |
| |
| struct HelperClass { |
| BlockBlockSetMap &IndependentGroups; |
| BlockBlockMap Ownership; // For each block, which entry it belongs to. |
| // We have reached it from there. |
| |
| HelperClass(BlockBlockSetMap &IndependentGroupsInit) |
| : IndependentGroups(IndependentGroupsInit) {} |
| void InvalidateWithChildren(Block *New) { |
| // Being in the list means you need to be invalidated |
| BlockList ToInvalidate; |
| ToInvalidate.push_back(New); |
| while (!ToInvalidate.empty()) { |
| Block *Invalidatee = ToInvalidate.front(); |
| ToInvalidate.pop_front(); |
| Block *Owner = Ownership[Invalidatee]; |
| // Owner may have been invalidated, do not add to |
| // IndependentGroups! |
| if (contains(IndependentGroups, Owner)) |
| IndependentGroups[Owner].remove(Invalidatee); |
| if (Ownership[Invalidatee]) { // may have been seen before and |
| // invalidated already |
| Ownership[Invalidatee] = nullptr; |
| for (const auto &iter : Invalidatee->BranchesOut) { |
| Block *Target = iter.first; |
| BlockBlockMap::iterator Known = Ownership.find(Target); |
| if (Known != Ownership.end()) { |
| Block *TargetOwner = Known->second; |
| if (TargetOwner) |
| ToInvalidate.push_back(Target); |
| } |
| } |
| } |
| } |
| } |
| }; |
| HelperClass Helper(IndependentGroups); |
| |
| // We flow out from each of the entries, simultaneously. |
| // When we reach a new block, we add it as belonging to the one we got to |
| // it from. |
| // If we reach a new block that is already marked as belonging to someone, |
| // it is reachable by two entries and is not valid for any of them. |
| // Remove it and all it can reach that have been visited. |
| |
| // Being in the queue means we just added this item, and |
| // we need to add its children |
| BlockList Queue; |
| for (const auto &Entry : Entries) { |
| Helper.Ownership[Entry] = Entry; |
| IndependentGroups[Entry].insert(Entry); |
| Queue.push_back(Entry); |
| } |
| while (!Queue.empty()) { |
| Block *Curr = Queue.front(); |
| Queue.pop_front(); |
| Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership |
| // map if we are in the queue |
| if (!Owner) |
| continue; // we have been invalidated meanwhile after being reached |
| // from two entries |
| // Add all children |
| for (const auto &iter : Curr->BranchesOut) { |
| Block *New = iter.first; |
| BlockBlockMap::iterator Known = Helper.Ownership.find(New); |
| if (Known == Helper.Ownership.end()) { |
| // New node. Add it, and put it in the queue |
| Helper.Ownership[New] = Owner; |
| IndependentGroups[Owner].insert(New); |
| Queue.push_back(New); |
| continue; |
| } |
| Block *NewOwner = Known->second; |
| if (!NewOwner) |
| continue; // We reached an invalidated node |
| if (NewOwner != Owner) |
| // Invalidate this and all reachable that we have seen - we reached |
| // this from two locations |
| Helper.InvalidateWithChildren(New); |
| // otherwise, we have the same owner, so do nothing |
| } |
| } |
| |
| // Having processed all the interesting blocks, we remain with just one |
| // potential issue: |
| // If a->b, and a was invalidated, but then b was later reached by |
| // someone else, we must invalidate b. To check for this, we go over all |
| // elements in the independent groups, if an element has a parent which |
| // does *not* have the same owner, we/ must remove it and all its |
| // children. |
| |
| for (const auto &iter : Entries) { |
| BlockSet &CurrGroup = IndependentGroups[iter]; |
| BlockList ToInvalidate; |
| for (const auto &iter : CurrGroup) { |
| Block *Child = iter; |
| for (const auto &iter : Child->BranchesIn) { |
| Block *Parent = iter; |
| if (Ignore && contains(*Ignore, Parent)) |
| continue; |
| if (Helper.Ownership[Parent] != Helper.Ownership[Child]) |
| ToInvalidate.push_back(Child); |
| } |
| } |
| while (!ToInvalidate.empty()) { |
| Block *Invalidatee = ToInvalidate.front(); |
| ToInvalidate.pop_front(); |
| Helper.InvalidateWithChildren(Invalidatee); |
| } |
| } |
| |
| // Remove empty groups |
| for (const auto &iter : Entries) |
| if (IndependentGroups[iter].empty()) |
| IndependentGroups.erase(iter); |
| } |
| |
| Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries, |
| BlockBlockSetMap &IndependentGroups, Shape *Prev, |
| BlockSet &NextEntries) { |
| bool Fused = isa<SimpleShape>(Prev); |
| MultipleShape *Multiple = new MultipleShape(); |
| Notice(Multiple); |
| BlockSet CurrEntries; |
| for (auto &iter : IndependentGroups) { |
| Block *CurrEntry = iter.first; |
| BlockSet &CurrBlocks = iter.second; |
| // Create inner block |
| CurrEntries.clear(); |
| CurrEntries.insert(CurrEntry); |
| for (const auto &CurrInner : CurrBlocks) { |
| // Remove the block from the remaining blocks |
| Blocks.remove(CurrInner); |
| // Find new next entries and fix branches to them |
| for (auto iter = CurrInner->BranchesOut.begin(); |
| iter != CurrInner->BranchesOut.end();) { |
| Block *CurrTarget = iter->first; |
| auto Next = iter; |
| Next++; |
| if (!contains(CurrBlocks, CurrTarget)) { |
| NextEntries.insert(CurrTarget); |
| Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks); |
| } |
| iter = Next; // increment carefully because Solipsize can remove us |
| } |
| } |
| Multiple->InnerMap[CurrEntry->Id] = |
| Process(CurrBlocks, CurrEntries, nullptr); |
| // If we are not fused, then our entries will actually be checked |
| if (!Fused) |
| CurrEntry->IsCheckedMultipleEntry = true; |
| } |
| // Add entries not handled as next entries, they are deferred |
| for (const auto &Entry : Entries) |
| if (!contains(IndependentGroups, Entry)) |
| NextEntries.insert(Entry); |
| // The multiple has been created, we can decide how to implement it |
| if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) { |
| Multiple->UseSwitch = true; |
| Multiple->Breaks++; // switch captures breaks |
| } |
| return Multiple; |
| } |
| |
| // Main function. |
| // Process a set of blocks with specified entries, returns a shape |
| // The Make* functions receive a NextEntries. If they fill it with data, |
| // those are the entries for the ->Next block on them, and the blocks |
| // are what remains in Blocks (which Make* modify). In this way |
| // we avoid recursing on Next (imagine a long chain of Simples, if we |
| // recursed we could blow the stack). |
| Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) { |
| BlockSet *Entries = &InitialEntries; |
| BlockSet TempEntries[2]; |
| int CurrTempIndex = 0; |
| BlockSet *NextEntries; |
| Shape *Ret = nullptr; |
| |
| auto Make = [&](Shape *Temp) { |
| if (Prev) |
| Prev->Next = Temp; |
| if (!Ret) |
| Ret = Temp; |
| Prev = Temp; |
| Entries = NextEntries; |
| }; |
| |
| while (1) { |
| CurrTempIndex = 1 - CurrTempIndex; |
| NextEntries = &TempEntries[CurrTempIndex]; |
| NextEntries->clear(); |
| |
| if (Entries->empty()) |
| return Ret; |
| if (Entries->size() == 1) { |
| Block *Curr = *(Entries->begin()); |
| if (Curr->BranchesIn.empty()) { |
| // One entry, no looping ==> Simple |
| Make(MakeSimple(Blocks, Curr, *NextEntries)); |
| if (NextEntries->empty()) |
| return Ret; |
| continue; |
| } |
| // One entry, looping ==> Loop |
| Make(MakeLoop(Blocks, *Entries, *NextEntries)); |
| if (NextEntries->empty()) |
| return Ret; |
| continue; |
| } |
| |
| // More than one entry, try to eliminate through a Multiple groups of |
| // independent blocks from an entry/ies. It is important to remove |
| // through multiples as opposed to looping since the former is more |
| // performant. |
| BlockBlockSetMap IndependentGroups; |
| FindIndependentGroups(*Entries, IndependentGroups); |
| |
| if (!IndependentGroups.empty()) { |
| // We can handle a group in a multiple if its entry cannot be reached |
| // by another group. |
| // Note that it might be reachable by itself - a loop. But that is |
| // fine, we will create a loop inside the multiple block (which |
| // is the performant order to do it). |
| for (auto iter = IndependentGroups.begin(); |
| iter != IndependentGroups.end();) { |
| Block *Entry = iter->first; |
| BlockSet &Group = iter->second; |
| auto curr = iter++; // iterate carefully, we may delete |
| for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin(); |
| iterBranch != Entry->BranchesIn.end(); iterBranch++) { |
| Block *Origin = *iterBranch; |
| if (!contains(Group, Origin)) { |
| // Reached from outside the group, so we cannot handle this |
| IndependentGroups.erase(curr); |
| break; |
| } |
| } |
| } |
| |
| // As an optimization, if we have 2 independent groups, and one is a |
| // small dead end, we can handle only that dead end. |
| // The other then becomes a Next - without nesting in the code and |
| // recursion in the analysis. |
| // TODO: if the larger is the only dead end, handle that too |
| // TODO: handle >2 groups |
| // TODO: handle not just dead ends, but also that do not branch to the |
| // NextEntries. However, must be careful there since we create a |
| // Next, and that Next can prevent eliminating a break (since we no |
| // longer naturally reach the same place), which may necessitate a |
| // one-time loop, which makes the unnesting pointless. |
| if (IndependentGroups.size() == 2) { |
| // Find the smaller one |
| auto iter = IndependentGroups.begin(); |
| Block *SmallEntry = iter->first; |
| auto SmallSize = iter->second.size(); |
| iter++; |
| Block *LargeEntry = iter->first; |
| auto LargeSize = iter->second.size(); |
| if (SmallSize != LargeSize) { // ignore the case where they are |
| // identical - keep things symmetrical |
| // there |
| if (SmallSize > LargeSize) { |
| Block *Temp = SmallEntry; |
| SmallEntry = LargeEntry; |
| LargeEntry = Temp; // Note: we did not flip the Sizes too, they |
| // are now invalid. TODO: use the smaller |
| // size as a limit? |
| } |
| // Check if dead end |
| bool DeadEnd = true; |
| BlockSet &SmallGroup = IndependentGroups[SmallEntry]; |
| for (const auto &Curr : SmallGroup) { |
| for (const auto &iter : Curr->BranchesOut) { |
| Block *Target = iter.first; |
| if (!contains(SmallGroup, Target)) { |
| DeadEnd = false; |
| break; |
| } |
| } |
| if (!DeadEnd) |
| break; |
| } |
| if (DeadEnd) |
| IndependentGroups.erase(LargeEntry); |
| } |
| } |
| |
| if (!IndependentGroups.empty()) |
| // Some groups removable ==> Multiple |
| Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev, |
| *NextEntries)); |
| if (NextEntries->empty()) |
| return Ret; |
| continue; |
| } |
| // No independent groups, must be loopable ==> Loop |
| Make(MakeLoop(Blocks, *Entries, *NextEntries)); |
| if (NextEntries->empty()) |
| return Ret; |
| continue; |
| } |
| } |
| }; |
| |
| // Main |
| |
| BlockSet AllBlocks; |
| for (const auto &Curr : Pre.Live) { |
| AllBlocks.insert(Curr); |
| } |
| |
| BlockSet Entries; |
| Entries.insert(Entry); |
| Root = Analyzer(this).Process(AllBlocks, Entries, nullptr); |
| assert(Root); |
| |
| /// |
| /// Relooper post-optimizer |
| /// |
| struct PostOptimizer { |
| RelooperAlgorithm *Parent; |
| std::stack<Shape *> LoopStack; |
| |
| PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {} |
| |
| void ShapeSwitch(Shape* var, |
| std::function<void (SimpleShape*)> simple, |
| std::function<void (MultipleShape*)> multiple, |
| std::function<void (LoopShape*)> loop) { |
| switch (var->getKind()) { |
| case Shape::SK_Simple: { |
| simple(cast<SimpleShape>(var)); |
| break; |
| } |
| case Shape::SK_Multiple: { |
| multiple(cast<MultipleShape>(var)); |
| break; |
| } |
| case Shape::SK_Loop: { |
| loop(cast<LoopShape>(var)); |
| break; |
| } |
| } |
| } |
| |
| // Find the blocks that natural control flow can get us directly to, or |
| // through a multiple that we ignore |
| void FollowNaturalFlow(Shape *S, BlockSet &Out) { |
| ShapeSwitch(S, [&](SimpleShape* Simple) { |
| Out.insert(Simple->Inner); |
| }, [&](MultipleShape* Multiple) { |
| for (const auto &iter : Multiple->InnerMap) { |
| FollowNaturalFlow(iter.second, Out); |
| } |
| FollowNaturalFlow(Multiple->Next, Out); |
| }, [&](LoopShape* Loop) { |
| FollowNaturalFlow(Loop->Inner, Out); |
| }); |
| } |
| |
| void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) { |
| if (Root->Next) { |
| Root->Natural = Root->Next; |
| FindNaturals(Root->Next, Otherwise); |
| } else { |
| Root->Natural = Otherwise; |
| } |
| |
| ShapeSwitch(Root, [](SimpleShape* Simple) { |
| }, [&](MultipleShape* Multiple) { |
| for (const auto &iter : Multiple->InnerMap) { |
| FindNaturals(iter.second, Root->Natural); |
| } |
| }, [&](LoopShape* Loop){ |
| FindNaturals(Loop->Inner, Loop->Inner); |
| }); |
| } |
| |
| // Remove unneeded breaks and continues. |
| // A flow operation is trivially unneeded if the shape we naturally get to |
| // by normal code execution is the same as the flow forces us to. |
| void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr, |
| LoopShape *LastLoop = nullptr, |
| unsigned Depth = 0) { |
| BlockSet NaturalBlocks; |
| FollowNaturalFlow(Natural, NaturalBlocks); |
| Shape *Next = Root; |
| while (Next) { |
| Root = Next; |
| Next = nullptr; |
| ShapeSwitch( |
| Root, |
| [&](SimpleShape* Simple) { |
| if (Simple->Inner->BranchVar) |
| LastLoop = |
| nullptr; // a switch clears out the loop (TODO: only for |
| // breaks, not continue) |
| |
| if (Simple->Next) { |
| if (!Simple->Inner->BranchVar && |
| Simple->Inner->ProcessedBranchesOut.size() == 2 && |
| Depth < RelooperNestingLimit) { |
| // If there is a next block, we already know at Simple |
| // creation time to make direct branches, and we can do |
| // nothing more in general. But, we try to optimize the |
| // case of a break and a direct: This would normally be |
| // if (break?) { break; } .. |
| // but if we make sure to nest the else, we can save the |
| // break, |
| // if (!break?) { .. } |
| // This is also better because the more canonical nested |
| // form is easier to further optimize later. The |
| // downside is more nesting, which adds to size in builds with |
| // whitespace. |
| // Note that we avoid switches, as it complicates control flow |
| // and is not relevant for the common case we optimize here. |
| bool Found = false; |
| bool Abort = false; |
| for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { |
| Block *Target = iter.first; |
| Branch *Details = iter.second.get(); |
| if (Details->Type == Branch::Break) { |
| Found = true; |
| if (!contains(NaturalBlocks, Target)) |
| Abort = true; |
| } else if (Details->Type != Branch::Direct) |
| Abort = true; |
| } |
| if (Found && !Abort) { |
| for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { |
| Branch *Details = iter.second.get(); |
| if (Details->Type == Branch::Break) { |
| Details->Type = Branch::Direct; |
| if (MultipleShape *Multiple = |
| dyn_cast<MultipleShape>(Details->Ancestor)) |
| Multiple->Breaks--; |
| } else { |
| assert(Details->Type == Branch::Direct); |
| Details->Type = Branch::Nested; |
| } |
| } |
| } |
| Depth++; // this optimization increases depth, for us and all |
| // our next chain (i.e., until this call returns) |
| } |
| Next = Simple->Next; |
| } else { |
| // If there is no next then Natural is where we will |
| // go to by doing nothing, so we can potentially optimize some |
| // branches to direct. |
| for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { |
| Block *Target = iter.first; |
| Branch *Details = iter.second.get(); |
| if (Details->Type != Branch::Direct && |
| contains(NaturalBlocks, |
| Target)) { // note: cannot handle split blocks |
| Details->Type = Branch::Direct; |
| if (MultipleShape *Multiple = |
| dyn_cast<MultipleShape>(Details->Ancestor)) |
| Multiple->Breaks--; |
| } else if (Details->Type == Branch::Break && LastLoop && |
| LastLoop->Natural == Details->Ancestor->Natural) { |
| // it is important to simplify breaks, as simpler breaks |
| // enable other optimizations |
| Details->Labeled = false; |
| if (MultipleShape *Multiple = |
| dyn_cast<MultipleShape>(Details->Ancestor)) |
| Multiple->Breaks--; |
| } |
| } |
| } |
| }, [&](MultipleShape* Multiple) |
| { |
| for (const auto &iter : Multiple->InnerMap) { |
| RemoveUnneededFlows(iter.second, Multiple->Next, |
| Multiple->Breaks ? nullptr : LastLoop, |
| Depth + 1); |
| } |
| Next = Multiple->Next; |
| }, [&](LoopShape* Loop) |
| { |
| RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1); |
| Next = Loop->Next; |
| }); |
| } |
| } |
| |
| // After we know which loops exist, we can calculate which need to be |
| // labeled |
| void FindLabeledLoops(Shape *Root) { |
| Shape *Next = Root; |
| while (Next) { |
| Root = Next; |
| Next = nullptr; |
| |
| ShapeSwitch( |
| Root, |
| [&](SimpleShape *Simple) { |
| MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next); |
| // If we are fusing a Multiple with a loop into this Simple, then |
| // visit it now |
| if (Fused && Fused->Breaks) |
| LoopStack.push(Fused); |
| if (Simple->Inner->BranchVar) |
| LoopStack.push(nullptr); // a switch means breaks are now useless, |
| // push a dummy |
| if (Fused) { |
| if (Fused->UseSwitch) |
| LoopStack.push(nullptr); // a switch means breaks are now |
| // useless, push a dummy |
| for (const auto &iter : Fused->InnerMap) { |
| FindLabeledLoops(iter.second); |
| } |
| } |
| for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { |
| Branch *Details = iter.second.get(); |
| if (Details->Type == Branch::Break || |
| Details->Type == Branch::Continue) { |
| assert(!LoopStack.empty()); |
| if (Details->Ancestor != LoopStack.top() && Details->Labeled) { |
| if (MultipleShape *Multiple = |
| dyn_cast<MultipleShape>(Details->Ancestor)) { |
| Multiple->Labeled = true; |
| } else { |
| LoopShape *Loop = cast<LoopShape>(Details->Ancestor); |
| Loop->Labeled = true; |
| } |
| } else { |
| Details->Labeled = false; |
| } |
| } |
| if (Fused && Fused->UseSwitch) |
| LoopStack.pop(); |
| if (Simple->Inner->BranchVar) |
| LoopStack.pop(); |
| if (Fused && Fused->Breaks) |
| LoopStack.pop(); |
| if (Fused) |
| Next = Fused->Next; |
| else |
| Next = Root->Next; |
| } |
| } |
| , [&](MultipleShape* Multiple) { |
| if (Multiple->Breaks) |
| LoopStack.push(Multiple); |
| for (const auto &iter : Multiple->InnerMap) |
| FindLabeledLoops(iter.second); |
| if (Multiple->Breaks) |
| LoopStack.pop(); |
| Next = Root->Next; |
| } |
| , [&](LoopShape* Loop) { |
| LoopStack.push(Loop); |
| FindLabeledLoops(Loop->Inner); |
| LoopStack.pop(); |
| Next = Root->Next; |
| }); |
| } |
| } |
| |
| void Process(Shape * Root) { |
| FindNaturals(Root); |
| RemoveUnneededFlows(Root); |
| FindLabeledLoops(Root); |
| } |
| }; |
| |
| PostOptimizer(this).Process(Root); |
| } |