lib/Target/WebAssembly/Relooper.cpp - platform/external/llvm - Git at Google

 //===-- Relooper.cpp - Top-level interface for WebAssembly  ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===---------------------------------------------------------------------===//
 ///
 /// \file
 /// \brief This implements the Relooper algorithm. This implementation includes
 /// optimizations added since the original academic paper [1] was published.
 ///
 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
 /// Proceedings of the ACM international conference companion on Object
 /// oriented programming systems languages and applications companion
 /// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
 /// http://doi.acm.org/10.1145/2048147.2048224
 ///
 //===-------------------------------------------------------------------===//

 #include "Relooper.h"
 #include "WebAssembly.h"

 #include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"

 #include <cstring>
 #include <cstdlib>
 #include <functional>
 #include <list>
 #include <stack>
 #include <string>

 #define DEBUG_TYPE "relooper"

 using namespace llvm;
 using namespace Relooper;

 static cl::opt<int> RelooperSplittingFactor(
     "relooper-splitting-factor",
     cl::desc(
         "How much to discount code size when deciding whether to split a node"),
     cl::init(5));

 static cl::opt<unsigned> RelooperMultipleSwitchThreshold(
     "relooper-multiple-switch-threshold",
     cl::desc(
         "How many entries to allow in a multiple before we use a switch"),
     cl::init(10));

 static cl::opt<unsigned> RelooperNestingLimit(
     "relooper-nesting-limit",
     cl::desc(
         "How much nesting is acceptable"),
     cl::init(20));


 namespace {
 ///
 /// Implements the relooper algorithm for a function's blocks.
 ///
 /// Implementation details: The Relooper instance has
 /// ownership of the blocks and shapes, and frees them when done.
 ///
 struct RelooperAlgorithm {
   std::deque<Block *> Blocks;
   std::deque<Shape *> Shapes;
   Shape *Root;
   bool MinSize;
   int BlockIdCounter;
   int ShapeIdCounter;

   RelooperAlgorithm();
   ~RelooperAlgorithm();

   void AddBlock(Block *New, int Id = -1);

   // Calculates the shapes
   void Calculate(Block *Entry);

   // Sets us to try to minimize size
   void SetMinSize(bool MinSize_) { MinSize = MinSize_; }
 };

 struct RelooperAnalysis final : public FunctionPass {
   static char ID;
   RelooperAnalysis() : FunctionPass(ID) {}
   const char *getPassName() const override { return "relooper"; }
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
   }
   bool runOnFunction(Function &F) override;
 };
 }

 // RelooperAnalysis

 char RelooperAnalysis::ID = 0;
 FunctionPass *llvm::createWebAssemblyRelooper() {
   return new RelooperAnalysis();
 }

 bool RelooperAnalysis::runOnFunction(Function &F) {
   DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n");
   RelooperAlgorithm R;
   // FIXME: remove duplication between relooper's and LLVM's BBs.
   std::map<const BasicBlock *, Block *> BB2B;
   std::map<const Block *, const BasicBlock *> B2BB;
   for (const BasicBlock &BB : F) {
     // FIXME: getName is wrong here, Code is meant to represent amount of code.
     // FIXME: use BranchVarInit for switch.
     Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr);
     R.AddBlock(B);
     assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice");
     assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice");
     BB2B[&BB] = B;
     B2BB[B] = &BB;
   }
   for (Block *B : R.Blocks) {
     const BasicBlock *BB = B2BB[B];
     for (const BasicBlock *Successor : successors(BB))
       // FIXME: add branch's Condition and Code below.
       B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr);
   }
   R.Calculate(BB2B[&F.getEntryBlock()]);
   return false; // Analysis passes don't modify anything.
 }

 // Helpers

 typedef MapVector<Block *, BlockSet> BlockBlockSetMap;
 typedef std::list<Block *> BlockList;

 template <class T, class U>
 static bool contains(const T &container, const U &contained) {
   return container.count(contained);
 }


 // Branch

 Branch::Branch(const char *ConditionInit, const char *CodeInit)
     : Ancestor(nullptr), Labeled(true) {
   // FIXME: move from char* to LLVM data structures
   Condition = ConditionInit ? strdup(ConditionInit) : nullptr;
   Code = CodeInit ? strdup(CodeInit) : nullptr;
 }

 Branch::~Branch() {
   // FIXME: move from char* to LLVM data structures
   free(static_cast<void *>(const_cast<char *>(Condition)));
   free(static_cast<void *>(const_cast<char *>(Code)));
 }

 // Block

 Block::Block(const char *CodeInit, const char *BranchVarInit)
     : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) {
   // FIXME: move from char* to LLVM data structures
   Code = strdup(CodeInit);
   BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr;
 }

 Block::~Block() {
   // FIXME: move from char* to LLVM data structures
   free(static_cast<void *>(const_cast<char *>(Code)));
   free(static_cast<void *>(const_cast<char *>(BranchVar)));
 }

 void Block::AddBranchTo(Block *Target, const char *Condition,
                         const char *Code) {
   assert(!contains(BranchesOut, Target) &&
          "cannot add more than one branch to the same target");
   BranchesOut[Target] = make_unique<Branch>(Condition, Code);
 }

 // Relooper

 RelooperAlgorithm::RelooperAlgorithm()
     : Root(nullptr), MinSize(false), BlockIdCounter(1),
       ShapeIdCounter(0) { // block ID 0 is reserved for clearings
 }

 RelooperAlgorithm::~RelooperAlgorithm() {
   for (auto Curr : Blocks)
     delete Curr;
   for (auto Curr : Shapes)
     delete Curr;
 }

 void RelooperAlgorithm::AddBlock(Block *New, int Id) {
   New->Id = Id == -1 ? BlockIdCounter++ : Id;
   Blocks.push_back(New);
 }

 struct RelooperRecursor {
   RelooperAlgorithm *Parent;
   RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
 };

 void RelooperAlgorithm::Calculate(Block *Entry) {
   // Scan and optimize the input
   struct PreOptimizer : public RelooperRecursor {
     PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
     BlockSet Live;

     void FindLive(Block *Root) {
       BlockList ToInvestigate;
       ToInvestigate.push_back(Root);
       while (!ToInvestigate.empty()) {
         Block *Curr = ToInvestigate.front();
         ToInvestigate.pop_front();
         if (contains(Live, Curr))
           continue;
         Live.insert(Curr);
         for (const auto &iter : Curr->BranchesOut)
           ToInvestigate.push_back(iter.first);
       }
     }

     // If a block has multiple entries but no exits, and it is small enough, it
     // is useful to split it. A common example is a C++ function where
     // everything ends up at a final exit block and does some RAII cleanup.
     // Without splitting, we will be forced to introduce labelled loops to
     // allow reaching the final block
     void SplitDeadEnds() {
       unsigned TotalCodeSize = 0;
       for (const auto &Curr : Live) {
         TotalCodeSize += strlen(Curr->Code);
       }
       BlockSet Splits;
       BlockSet Removed;
       for (const auto &Original : Live) {
         if (Original->BranchesIn.size() <= 1 ||
             !Original->BranchesOut.empty())
           continue; // only dead ends, for now
         if (contains(Original->BranchesOut, Original))
           continue; // cannot split a looping node
         if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) >
             TotalCodeSize / RelooperSplittingFactor)
           continue; // if splitting increases raw code size by a significant
                     // amount, abort
         // Split the node (for simplicity, we replace all the blocks, even
         // though we could have reused the original)
         DEBUG(dbgs() << "  Splitting '" << Original->Code << "'\n");
         for (const auto &Prior : Original->BranchesIn) {
           Block *Split = new Block(Original->Code, Original->BranchVar);
           Parent->AddBlock(Split, Original->Id);
           Split->BranchesIn.insert(Prior);
           std::unique_ptr<Branch> Details;
           Details.swap(Prior->BranchesOut[Original]);
           Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition,
                                                           Details->Code);
           for (const auto &iter : Original->BranchesOut) {
             Block *Post = iter.first;
             Branch *Details = iter.second.get();
             Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition,
                                                            Details->Code);
             Post->BranchesIn.insert(Split);
           }
           Splits.insert(Split);
           Removed.insert(Original);
         }
         for (const auto &iter : Original->BranchesOut) {
           Block *Post = iter.first;
           Post->BranchesIn.remove(Original);
         }
       }
       for (const auto &iter : Splits)
         Live.insert(iter);
       for (const auto &iter : Removed)
         Live.remove(iter);
     }
   };
   PreOptimizer Pre(this);
   Pre.FindLive(Entry);

   // Add incoming branches from live blocks, ignoring dead code
   for (unsigned i = 0; i < Blocks.size(); i++) {
     Block *Curr = Blocks[i];
     if (!contains(Pre.Live, Curr))
       continue;
     for (const auto &iter : Curr->BranchesOut)
       iter.first->BranchesIn.insert(Curr);
   }

   if (!MinSize)
     Pre.SplitDeadEnds();

   // Recursively process the graph

   struct Analyzer : public RelooperRecursor {
     Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}

     // Add a shape to the list of shapes in this Relooper calculation
     void Notice(Shape *New) {
       New->Id = Parent->ShapeIdCounter++;
       Parent->Shapes.push_back(New);
     }

     // Create a list of entries from a block. If LimitTo is provided, only
     // results in that set will appear
     void GetBlocksOut(Block *Source, BlockSet &Entries,
                       BlockSet *LimitTo = nullptr) {
       for (const auto &iter : Source->BranchesOut)
         if (!LimitTo || contains(*LimitTo, iter.first))
           Entries.insert(iter.first);
     }

     // Converts/processes all branchings to a specific target
     void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor,
                    BlockSet &From) {
       DEBUG(dbgs() << "  Solipsize '" << Target->Code << "' type " << Type
                    << "\n");
       for (auto iter = Target->BranchesIn.begin();
            iter != Target->BranchesIn.end();) {
         Block *Prior = *iter;
         if (!contains(From, Prior)) {
           iter++;
           continue;
         }
         std::unique_ptr<Branch> PriorOut;
         PriorOut.swap(Prior->BranchesOut[Target]);
         PriorOut->Ancestor = Ancestor;
         PriorOut->Type = Type;
         if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor))
           Multiple->Breaks++; // We are breaking out of this Multiple, so need a
                               // loop
         iter++; // carefully increment iter before erasing
         Target->BranchesIn.remove(Prior);
         Target->ProcessedBranchesIn.insert(Prior);
         Prior->ProcessedBranchesOut[Target].swap(PriorOut);
       }
     }

     Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) {
       DEBUG(dbgs() << "  MakeSimple inner block '" << Inner->Code << "'\n");
       SimpleShape *Simple = new SimpleShape;
       Notice(Simple);
       Simple->Inner = Inner;
       Inner->Parent = Simple;
       if (Blocks.size() > 1) {
         Blocks.remove(Inner);
         GetBlocksOut(Inner, NextEntries, &Blocks);
         BlockSet JustInner;
         JustInner.insert(Inner);
         for (const auto &iter : NextEntries)
           Solipsize(iter, Branch::Direct, Simple, JustInner);
       }
       return Simple;
     }

     Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries,
                     BlockSet &NextEntries) {
       // Find the inner blocks in this loop. Proceed backwards from the entries
       // until
       // you reach a seen block, collecting as you go.
       BlockSet InnerBlocks;
       BlockSet Queue = Entries;
       while (!Queue.empty()) {
         Block *Curr = *(Queue.begin());
         Queue.remove(*Queue.begin());
         if (!contains(InnerBlocks, Curr)) {
           // This element is new, mark it as inner and remove from outer
           InnerBlocks.insert(Curr);
           Blocks.remove(Curr);
           // Add the elements prior to it
           for (const auto &iter : Curr->BranchesIn)
             Queue.insert(iter);
         }
       }
       assert(!InnerBlocks.empty());

       for (const auto &Curr : InnerBlocks) {
         for (const auto &iter : Curr->BranchesOut) {
           Block *Possible = iter.first;
           if (!contains(InnerBlocks, Possible))
             NextEntries.insert(Possible);
         }
       }

       LoopShape *Loop = new LoopShape();
       Notice(Loop);

       // Solipsize the loop, replacing with break/continue and marking branches
       // as Processed (will not affect later calculations)
       // A. Branches to the loop entries become a continue to this shape
       for (const auto &iter : Entries)
         Solipsize(iter, Branch::Continue, Loop, InnerBlocks);
       // B. Branches to outside the loop (a next entry) become breaks on this
       // shape
       for (const auto &iter : NextEntries)
         Solipsize(iter, Branch::Break, Loop, InnerBlocks);
       // Finish up
       Shape *Inner = Process(InnerBlocks, Entries, nullptr);
       Loop->Inner = Inner;
       return Loop;
     }

     // For each entry, find the independent group reachable by it. The
     // independent group is the entry itself, plus all the blocks it can
     // reach that cannot be directly reached by another entry. Note that we
     // ignore directly reaching the entry itself by another entry.
     //   @param Ignore - previous blocks that are irrelevant
     void FindIndependentGroups(BlockSet &Entries,
                                BlockBlockSetMap &IndependentGroups,
                                BlockSet *Ignore = nullptr) {
       typedef std::map<Block *, Block *> BlockBlockMap;

       struct HelperClass {
         BlockBlockSetMap &IndependentGroups;
         BlockBlockMap Ownership; // For each block, which entry it belongs to.
                                  // We have reached it from there.

         HelperClass(BlockBlockSetMap &IndependentGroupsInit)
             : IndependentGroups(IndependentGroupsInit) {}
         void InvalidateWithChildren(Block *New) {
           // Being in the list means you need to be invalidated
           BlockList ToInvalidate;
           ToInvalidate.push_back(New);
           while (!ToInvalidate.empty()) {
             Block *Invalidatee = ToInvalidate.front();
             ToInvalidate.pop_front();
             Block *Owner = Ownership[Invalidatee];
             // Owner may have been invalidated, do not add to
             // IndependentGroups!
             if (contains(IndependentGroups, Owner))
               IndependentGroups[Owner].remove(Invalidatee);
             if (Ownership[Invalidatee]) { // may have been seen before and
                                           // invalidated already
               Ownership[Invalidatee] = nullptr;
               for (const auto &iter : Invalidatee->BranchesOut) {
                 Block *Target = iter.first;
                 BlockBlockMap::iterator Known = Ownership.find(Target);
                 if (Known != Ownership.end()) {
                   Block *TargetOwner = Known->second;
                   if (TargetOwner)
                     ToInvalidate.push_back(Target);
                 }
               }
             }
           }
         }
       };
       HelperClass Helper(IndependentGroups);

       // We flow out from each of the entries, simultaneously.
       // When we reach a new block, we add it as belonging to the one we got to
       // it from.
       // If we reach a new block that is already marked as belonging to someone,
       // it is reachable by two entries and is not valid for any of them.
       // Remove it and all it can reach that have been visited.

       // Being in the queue means we just added this item, and
       // we need to add its children
       BlockList Queue;
       for (const auto &Entry : Entries) {
         Helper.Ownership[Entry] = Entry;
         IndependentGroups[Entry].insert(Entry);
         Queue.push_back(Entry);
       }
       while (!Queue.empty()) {
         Block *Curr = Queue.front();
         Queue.pop_front();
         Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership
                                                // map if we are in the queue
         if (!Owner)
           continue; // we have been invalidated meanwhile after being reached
                     // from two entries
         // Add all children
         for (const auto &iter : Curr->BranchesOut) {
           Block *New = iter.first;
           BlockBlockMap::iterator Known = Helper.Ownership.find(New);
           if (Known == Helper.Ownership.end()) {
             // New node. Add it, and put it in the queue
             Helper.Ownership[New] = Owner;
             IndependentGroups[Owner].insert(New);
             Queue.push_back(New);
             continue;
           }
           Block *NewOwner = Known->second;
           if (!NewOwner)
             continue; // We reached an invalidated node
           if (NewOwner != Owner)
             // Invalidate this and all reachable that we have seen - we reached
             // this from two locations
             Helper.InvalidateWithChildren(New);
           // otherwise, we have the same owner, so do nothing
         }
       }

       // Having processed all the interesting blocks, we remain with just one
       // potential issue:
       // If a->b, and a was invalidated, but then b was later reached by
       // someone else, we must invalidate b. To check for this, we go over all
       // elements in the independent groups, if an element has a parent which
       // does *not* have the same owner, we/ must remove it and all its
       // children.

       for (const auto &iter : Entries) {
         BlockSet &CurrGroup = IndependentGroups[iter];
         BlockList ToInvalidate;
         for (const auto &iter : CurrGroup) {
           Block *Child = iter;
           for (const auto &iter : Child->BranchesIn) {
             Block *Parent = iter;
             if (Ignore && contains(*Ignore, Parent))
               continue;
             if (Helper.Ownership[Parent] != Helper.Ownership[Child])
               ToInvalidate.push_back(Child);
           }
         }
         while (!ToInvalidate.empty()) {
           Block *Invalidatee = ToInvalidate.front();
           ToInvalidate.pop_front();
           Helper.InvalidateWithChildren(Invalidatee);
         }
       }

       // Remove empty groups
       for (const auto &iter : Entries)
         if (IndependentGroups[iter].empty())
           IndependentGroups.erase(iter);
     }

     Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries,
                         BlockBlockSetMap &IndependentGroups, Shape *Prev,
                         BlockSet &NextEntries) {
       bool Fused = isa<SimpleShape>(Prev);
       MultipleShape *Multiple = new MultipleShape();
       Notice(Multiple);
       BlockSet CurrEntries;
       for (auto &iter : IndependentGroups) {
         Block *CurrEntry = iter.first;
         BlockSet &CurrBlocks = iter.second;
         // Create inner block
         CurrEntries.clear();
         CurrEntries.insert(CurrEntry);
         for (const auto &CurrInner : CurrBlocks) {
           // Remove the block from the remaining blocks
           Blocks.remove(CurrInner);
           // Find new next entries and fix branches to them
           for (auto iter = CurrInner->BranchesOut.begin();
                iter != CurrInner->BranchesOut.end();) {
             Block *CurrTarget = iter->first;
             auto Next = iter;
             Next++;
             if (!contains(CurrBlocks, CurrTarget)) {
               NextEntries.insert(CurrTarget);
               Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks);
             }
             iter = Next; // increment carefully because Solipsize can remove us
           }
         }
         Multiple->InnerMap[CurrEntry->Id] =
             Process(CurrBlocks, CurrEntries, nullptr);
         // If we are not fused, then our entries will actually be checked
         if (!Fused)
           CurrEntry->IsCheckedMultipleEntry = true;
       }
       // Add entries not handled as next entries, they are deferred
       for (const auto &Entry : Entries)
         if (!contains(IndependentGroups, Entry))
           NextEntries.insert(Entry);
       // The multiple has been created, we can decide how to implement it
       if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) {
         Multiple->UseSwitch = true;
         Multiple->Breaks++; // switch captures breaks
       }
       return Multiple;
     }

     // Main function.
     // Process a set of blocks with specified entries, returns a shape
     // The Make* functions receive a NextEntries. If they fill it with data,
     // those are the entries for the ->Next block on them, and the blocks
     // are what remains in Blocks (which Make* modify). In this way
     // we avoid recursing on Next (imagine a long chain of Simples, if we
     // recursed we could blow the stack).
     Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) {
       BlockSet *Entries = &InitialEntries;
       BlockSet TempEntries[2];
       int CurrTempIndex = 0;
       BlockSet *NextEntries;
       Shape *Ret = nullptr;

       auto Make = [&](Shape *Temp) {
         if (Prev)
           Prev->Next = Temp;
         if (!Ret)
           Ret = Temp;
         Prev = Temp;
         Entries = NextEntries;
       };

       while (1) {
         CurrTempIndex = 1 - CurrTempIndex;
         NextEntries = &TempEntries[CurrTempIndex];
         NextEntries->clear();

         if (Entries->empty())
           return Ret;
         if (Entries->size() == 1) {
           Block *Curr = *(Entries->begin());
           if (Curr->BranchesIn.empty()) {
             // One entry, no looping ==> Simple
             Make(MakeSimple(Blocks, Curr, *NextEntries));
             if (NextEntries->empty())
               return Ret;
             continue;
           }
           // One entry, looping ==> Loop
           Make(MakeLoop(Blocks, *Entries, *NextEntries));
           if (NextEntries->empty())
             return Ret;
           continue;
         }

         // More than one entry, try to eliminate through a Multiple groups of
         // independent blocks from an entry/ies. It is important to remove
         // through multiples as opposed to looping since the former is more
         // performant.
         BlockBlockSetMap IndependentGroups;
         FindIndependentGroups(*Entries, IndependentGroups);

         if (!IndependentGroups.empty()) {
           // We can handle a group in a multiple if its entry cannot be reached
           // by another group.
           // Note that it might be reachable by itself - a loop. But that is
           // fine, we will create a loop inside the multiple block (which
           // is the performant order to do it).
           for (auto iter = IndependentGroups.begin();
                iter != IndependentGroups.end();) {
             Block *Entry = iter->first;
             BlockSet &Group = iter->second;
             auto curr = iter++; // iterate carefully, we may delete
             for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin();
                  iterBranch != Entry->BranchesIn.end(); iterBranch++) {
               Block *Origin = *iterBranch;
               if (!contains(Group, Origin)) {
                 // Reached from outside the group, so we cannot handle this
                 IndependentGroups.erase(curr);
                 break;
               }
             }
           }

           // As an optimization, if we have 2 independent groups, and one is a
           // small dead end, we can handle only that dead end.
           // The other then becomes a Next - without nesting in the code and
           // recursion in the analysis.
           // TODO: if the larger is the only dead end, handle that too
           // TODO: handle >2 groups
           // TODO: handle not just dead ends, but also that do not branch to the
           // NextEntries. However, must be careful there since we create a
           // Next, and that Next can prevent eliminating a break (since we no
           // longer naturally reach the same place), which may necessitate a
           // one-time loop, which makes the unnesting pointless.
           if (IndependentGroups.size() == 2) {
             // Find the smaller one
             auto iter = IndependentGroups.begin();
             Block *SmallEntry = iter->first;
             auto SmallSize = iter->second.size();
             iter++;
             Block *LargeEntry = iter->first;
             auto LargeSize = iter->second.size();
             if (SmallSize != LargeSize) { // ignore the case where they are
                                           // identical - keep things symmetrical
                                           // there
               if (SmallSize > LargeSize) {
                 Block *Temp = SmallEntry;
                 SmallEntry = LargeEntry;
                 LargeEntry = Temp; // Note: we did not flip the Sizes too, they
                                    // are now invalid. TODO: use the smaller
                                    // size as a limit?
               }
               // Check if dead end
               bool DeadEnd = true;
               BlockSet &SmallGroup = IndependentGroups[SmallEntry];
               for (const auto &Curr : SmallGroup) {
                 for (const auto &iter : Curr->BranchesOut) {
                   Block *Target = iter.first;
                   if (!contains(SmallGroup, Target)) {
                     DeadEnd = false;
                     break;
                   }
                 }
                 if (!DeadEnd)
                   break;
               }
               if (DeadEnd)
                 IndependentGroups.erase(LargeEntry);
             }
           }

           if (!IndependentGroups.empty())
             // Some groups removable ==> Multiple
             Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev,
                               *NextEntries));
             if (NextEntries->empty())
               return Ret;
             continue;
         }
         // No independent groups, must be loopable ==> Loop
         Make(MakeLoop(Blocks, *Entries, *NextEntries));
         if (NextEntries->empty())
           return Ret;
         continue;
       }
     }
   };

   // Main

   BlockSet AllBlocks;
   for (const auto &Curr : Pre.Live) {
     AllBlocks.insert(Curr);
   }

   BlockSet Entries;
   Entries.insert(Entry);
   Root = Analyzer(this).Process(AllBlocks, Entries, nullptr);
   assert(Root);

   ///
   /// Relooper post-optimizer
   ///
   struct PostOptimizer {
     RelooperAlgorithm *Parent;
     std::stack<Shape *> LoopStack;

     PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}

     void ShapeSwitch(Shape* var,
                      std::function<void (SimpleShape*)> simple,
                      std::function<void (MultipleShape*)> multiple,
                      std::function<void (LoopShape*)> loop) {
       switch (var->getKind()) {
         case Shape::SK_Simple: {
           simple(cast<SimpleShape>(var));
           break;
         }
         case Shape::SK_Multiple: {
           multiple(cast<MultipleShape>(var));
           break;
         }
         case Shape::SK_Loop: {
           loop(cast<LoopShape>(var));
           break;
         }
       }
     }

     // Find the blocks that natural control flow can get us directly to, or
     // through a multiple that we ignore
     void FollowNaturalFlow(Shape *S, BlockSet &Out) {
       ShapeSwitch(S, [&](SimpleShape* Simple) {
         Out.insert(Simple->Inner);
       }, [&](MultipleShape* Multiple) {
         for (const auto &iter : Multiple->InnerMap) {
           FollowNaturalFlow(iter.second, Out);
         }
         FollowNaturalFlow(Multiple->Next, Out);
       }, [&](LoopShape* Loop) {
         FollowNaturalFlow(Loop->Inner, Out);
       });
     }

     void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) {
       if (Root->Next) {
         Root->Natural = Root->Next;
         FindNaturals(Root->Next, Otherwise);
       } else {
         Root->Natural = Otherwise;
       }

       ShapeSwitch(Root, [](SimpleShape* Simple) {
       }, [&](MultipleShape* Multiple) {
         for (const auto &iter : Multiple->InnerMap) {
           FindNaturals(iter.second, Root->Natural);
         }
       }, [&](LoopShape* Loop){
         FindNaturals(Loop->Inner, Loop->Inner);
       });
     }

     // Remove unneeded breaks and continues.
     // A flow operation is trivially unneeded if the shape we naturally get to
     // by normal code execution is the same as the flow forces us to.
     void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr,
                              LoopShape *LastLoop = nullptr,
                              unsigned Depth = 0) {
       BlockSet NaturalBlocks;
       FollowNaturalFlow(Natural, NaturalBlocks);
       Shape *Next = Root;
       while (Next) {
         Root = Next;
         Next = nullptr;
         ShapeSwitch(
             Root,
             [&](SimpleShape* Simple) {
               if (Simple->Inner->BranchVar)
                 LastLoop =
                     nullptr; // a switch clears out the loop (TODO: only for
                              // breaks, not continue)

               if (Simple->Next) {
                 if (!Simple->Inner->BranchVar &&
                     Simple->Inner->ProcessedBranchesOut.size() == 2 &&
                     Depth < RelooperNestingLimit) {
                   // If there is a next block, we already know at Simple
                   // creation time to make direct branches, and we can do
                   // nothing more in general. But, we try to optimize the
                   // case of a break and a direct: This would normally be
                   //   if (break?) { break; } ..
                   // but if we make sure to nest the else, we can save the
                   // break,
                   //   if (!break?) { .. }
                   // This is also better because the more canonical nested
                   // form is easier to further optimize later. The
                   // downside is more nesting, which adds to size in builds with
                   // whitespace.
                   // Note that we avoid switches, as it complicates control flow
                   // and is not relevant for the common case we optimize here.
                   bool Found = false;
                   bool Abort = false;
                   for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
                     Block *Target = iter.first;
                     Branch *Details = iter.second.get();
                     if (Details->Type == Branch::Break) {
                       Found = true;
                       if (!contains(NaturalBlocks, Target))
                         Abort = true;
                     } else if (Details->Type != Branch::Direct)
                       Abort = true;
                   }
                   if (Found && !Abort) {
                     for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
                       Branch *Details = iter.second.get();
                       if (Details->Type == Branch::Break) {
                         Details->Type = Branch::Direct;
                         if (MultipleShape *Multiple =
                                 dyn_cast<MultipleShape>(Details->Ancestor))
                           Multiple->Breaks--;
                       } else {
                         assert(Details->Type == Branch::Direct);
                         Details->Type = Branch::Nested;
                       }
                     }
                   }
                   Depth++; // this optimization increases depth, for us and all
                            // our next chain (i.e., until this call returns)
                 }
                 Next = Simple->Next;
               } else {
                 // If there is no next then Natural is where we will
                 // go to by doing nothing, so we can potentially optimize some
                 // branches to direct.
                 for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
                   Block *Target = iter.first;
                   Branch *Details = iter.second.get();
                   if (Details->Type != Branch::Direct &&
                       contains(NaturalBlocks,
                                Target)) { // note: cannot handle split blocks
                     Details->Type = Branch::Direct;
                     if (MultipleShape *Multiple =
                             dyn_cast<MultipleShape>(Details->Ancestor))
                       Multiple->Breaks--;
                   } else if (Details->Type == Branch::Break && LastLoop &&
                              LastLoop->Natural == Details->Ancestor->Natural) {
                     // it is important to simplify breaks, as simpler breaks
                     // enable other optimizations
                     Details->Labeled = false;
                     if (MultipleShape *Multiple =
                             dyn_cast<MultipleShape>(Details->Ancestor))
                       Multiple->Breaks--;
                   }
                 }
               }
             }, [&](MultipleShape* Multiple)
             {
               for (const auto &iter : Multiple->InnerMap) {
                 RemoveUnneededFlows(iter.second, Multiple->Next,
                                     Multiple->Breaks ? nullptr : LastLoop,
                                     Depth + 1);
               }
               Next = Multiple->Next;
             }, [&](LoopShape* Loop)
             {
               RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1);
               Next = Loop->Next;
             });
       }
     }

     // After we know which loops exist, we can calculate which need to be
     // labeled
     void FindLabeledLoops(Shape *Root) {
       Shape *Next = Root;
       while (Next) {
         Root = Next;
         Next = nullptr;

         ShapeSwitch(
             Root,
             [&](SimpleShape *Simple) {
           MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next);
           // If we are fusing a Multiple with a loop into this Simple, then
           // visit it now
           if (Fused && Fused->Breaks)
             LoopStack.push(Fused);
           if (Simple->Inner->BranchVar)
             LoopStack.push(nullptr); // a switch means breaks are now useless,
                                      // push a dummy
           if (Fused) {
             if (Fused->UseSwitch)
               LoopStack.push(nullptr); // a switch means breaks are now
                                        // useless, push a dummy
             for (const auto &iter : Fused->InnerMap) {
               FindLabeledLoops(iter.second);
             }
           }
           for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
             Branch *Details = iter.second.get();
             if (Details->Type == Branch::Break ||
                 Details->Type == Branch::Continue) {
               assert(!LoopStack.empty());
               if (Details->Ancestor != LoopStack.top() && Details->Labeled) {
                 if (MultipleShape *Multiple =
                         dyn_cast<MultipleShape>(Details->Ancestor)) {
                   Multiple->Labeled = true;
                 } else {
                   LoopShape *Loop = cast<LoopShape>(Details->Ancestor);
                   Loop->Labeled = true;
                 }
               } else {
                 Details->Labeled = false;
               }
             }
             if (Fused && Fused->UseSwitch)
               LoopStack.pop();
             if (Simple->Inner->BranchVar)
               LoopStack.pop();
             if (Fused && Fused->Breaks)
               LoopStack.pop();
             if (Fused)
               Next = Fused->Next;
             else
               Next = Root->Next;
           }
           }
           , [&](MultipleShape* Multiple) {
             if (Multiple->Breaks)
               LoopStack.push(Multiple);
             for (const auto &iter : Multiple->InnerMap)
               FindLabeledLoops(iter.second);
             if (Multiple->Breaks)
               LoopStack.pop();
             Next = Root->Next;
           }
           , [&](LoopShape* Loop) {
             LoopStack.push(Loop);
             FindLabeledLoops(Loop->Inner);
             LoopStack.pop();
             Next = Root->Next;
           });
       }
     }

     void Process(Shape * Root) {
       FindNaturals(Root);
       RemoveUnneededFlows(Root);
       FindLabeledLoops(Root);
     }
   };

   PostOptimizer(this).Process(Root);
 }