Index: lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp +++ lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -7,39 +7,40 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file implements a pass that transforms irreducible control flow into -/// reducible control flow. Irreducible control flow means multiple-entry -/// loops; they appear as CFG cycles that are not recorded in MachineLoopInfo -/// due to being unnatural. +/// This file implements a pass that removes irreducible control flow. +/// Irreducible control flow means multiple-entry loops, which this pass +/// transforms to have a single entry. /// /// Note that LLVM has a generic pass that lowers irreducible control flow, but /// it linearizes control flow, turning diamonds into two triangles, which is /// both unnecessary and undesirable for WebAssembly. /// -/// The big picture: Ignoring natural loops (seeing them monolithically), we -/// find all the blocks which can return to themselves ("loopers"). Loopers -/// reachable from the non-loopers are loop entries: if there are 2 or more, -/// then we have irreducible control flow. We fix that as follows: a new block -/// is created that can dispatch to each of the loop entries, based on the -/// value of a label "helper" variable, and we replace direct branches to the -/// entries with assignments to the label variable and a branch to the dispatch -/// block. Then the dispatch block is the single entry in a new natural loop. +/// The big picture: We recursively process each "region", defined as a group +/// of blocks with a single entry and no branches back to that entry. A region +/// may be the entire function body, or the inner part of a loop, i.e., the +/// loop's body without branches back to the loop entry. In each region we fix +/// up multi-entry loops by adding a new block that can dispatch to each of the +/// loop entries, based on the value of a label "helper" variable, and we +/// replace direct branches to the entries with assignments to the label +/// variable and a branch to the dispatch block. Then the dispatch block is the +/// single entry in the loop containing the previous multiple entries. After +/// ensuring all the loops in a region are reducible, we recurse into them. The +/// total time complexity of this pass is: +/// O(NumBlocks * NumNestedLoops * NumIrreducibleLoops + +/// NumLoops * NumLoops) /// -/// This is similar to what the Relooper [1] does, both identify looping code -/// that requires multiple entries, and resolve it in a similar way. In -/// Relooper terminology, we implement a Multiple shape in a Loop shape. Note +/// This pass is similar to what the Relooper [1] does. Both identify looping +/// code that requires multiple entries, and resolve it in a similar way (in +/// Relooper terminology, we implement a Multiple shape in a Loop shape). Note /// also that like the Relooper, we implement a "minimal" intervention: we only /// use the "label" helper for the blocks we absolutely must and no others. We -/// also prioritize code size and do not perform node splitting (i.e. we don't -/// duplicate code in order to resolve irreducibility). -/// -/// The difference between this code and the Relooper is that the Relooper also -/// generates ifs and loops and works in a recursive manner, knowing at each -/// point what the entries are, and recursively breaks down the problem. Here -/// we just want to resolve irreducible control flow, and we also want to use -/// as much LLVM infrastructure as possible. So we use the MachineLoopInfo to -/// identify natural loops, etc., and we start with the whole CFG and must -/// identify both the looping code and its entries. +/// also prioritize code size and do not duplicate code in order to resolve +/// irreducibility. The graph algorithms for finding loops and entries and so +/// forth are also similar to the Relooper. The main differences between this +/// pass and the Relooper are: +/// * We just care about irreducibility, so we just look at loops. +/// * The Relooper emits structured control flow (with ifs etc.), while we +/// emit a CFG. /// /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In /// Proceedings of the ACM international conference companion on Object oriented @@ -70,326 +71,354 @@ namespace { -class LoopFixer { -public: - LoopFixer(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop) - : MF(MF), MLI(MLI), Loop(Loop) {} - - // Run the fixer on the given inputs. Returns whether changes were made. - bool run(); - -private: - MachineFunction &MF; - MachineLoopInfo &MLI; - MachineLoop *Loop; +using BlockVector = SmallVector; +using BlockSet = SmallPtrSet; - MachineBasicBlock *Header; - SmallPtrSet LoopBlocks; - - using BlockSet = SmallPtrSet; - DenseMap Reachable; - - // The worklist contains pairs of recent additions, (a, b), where we just - // added a link a => b. - using BlockPair = std::pair; - SmallVector WorkList; - - // Get a canonical block to represent a block or a loop: the block, or if in - // an inner loop, the loop header, of it in an outer loop scope, we can - // ignore it. We need to call this on all blocks we work on. - MachineBasicBlock *canonicalize(MachineBasicBlock *MBB) { - MachineLoop *InnerLoop = MLI.getLoopFor(MBB); - if (InnerLoop == Loop) { - return MBB; - } else { - // This is either in an outer or an inner loop, and not in ours. - if (!LoopBlocks.count(MBB)) { - // It's in outer code, ignore it. - return nullptr; +// Calculates reachability in a region. Ignores branches to blocks outside of +// the region, and ignores branches to the region entry (for the case where +// the region is the inner part of a loop). +class ReachabilityGraph { +public: + ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks) + : Entry(Entry), Blocks(Blocks) { +#ifndef NDEBUG + // The region must have a single entry. + for (auto *MBB : Blocks) { + if (MBB != Entry) { + for (auto *Pred : MBB->predecessors()) { + assert(inRegion(Pred)); + } } - assert(InnerLoop); - // It's in an inner loop, canonicalize it to the header of that loop. - return InnerLoop->getHeader(); } +#endif + calculate(); } - // For a successor we can additionally ignore it if it's a branch back to a - // natural loop top, as when we are in the scope of a loop, we just care - // about internal irreducibility, and can ignore the loop we are in. We need - // to call this on all blocks in a context where they are a successor. - MachineBasicBlock *canonicalizeSuccessor(MachineBasicBlock *MBB) { - if (Loop && MBB == Loop->getHeader()) { - // Ignore branches going to the loop's natural header. - return nullptr; - } - return canonicalize(MBB); + bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) { + assert(inRegion(From) && inRegion(To)); + return Reachable[From].count(To); } - // Potentially insert a new reachable edge, and if so, note it as further - // work. - void maybeInsert(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { - assert(MBB == canonicalize(MBB)); - assert(Succ); - // Succ may not be interesting as a sucessor. - Succ = canonicalizeSuccessor(Succ); - if (!Succ) - return; - if (Reachable[MBB].insert(Succ).second) { - // For there to be further work, it means that we have - // X => MBB => Succ - // for some other X, and in that case X => Succ would be a new edge for - // us to discover later. However, if we don't care about MBB as a - // successor, then we don't care about that anyhow. - if (canonicalizeSuccessor(MBB)) { - WorkList.emplace_back(MBB, Succ); - } - } - } -}; + // "Loopers" are blocks that are in a loop. We detect these by finding blocks + // that can reach themselves. + const BlockSet &getLoopers() { return Loopers; } -bool LoopFixer::run() { - Header = Loop ? Loop->getHeader() : &*MF.begin(); + // Get all blocks that are loop entries. + const BlockSet &getLoopEntries() { return LoopEntries; } - // Identify all the blocks in this loop scope. - if (Loop) { - for (auto *MBB : Loop->getBlocks()) { - LoopBlocks.insert(MBB); - } - } else { - for (auto &MBB : MF) { - LoopBlocks.insert(&MBB); - } + // Get all blocks that enter a particular loop from outside. + const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) { + assert(inRegion(LoopEntry)); + return LoopEnterers[LoopEntry]; } - // Compute which (canonicalized) blocks each block can reach. +private: + MachineBasicBlock *Entry; + const BlockSet &Blocks; + + BlockSet Loopers, LoopEntries; + DenseMap LoopEnterers; - // Add all the initial work. - for (auto *MBB : LoopBlocks) { - MachineLoop *InnerLoop = MLI.getLoopFor(MBB); + bool inRegion(MachineBasicBlock *MBB) { return Blocks.count(MBB); } + + // Maps a block to all the other blocks it can reach. + DenseMap Reachable; - if (InnerLoop == Loop) { + void calculate() { + // Reachability computation work list. Contains pairs of recent additions + // (A, B) where we just added a link A => B. + using BlockPair = std::pair; + SmallVector WorkList; + + // Add all relevant direct branches. + for (auto *MBB : Blocks) { for (auto *Succ : MBB->successors()) { - maybeInsert(MBB, Succ); - } - } else { - // It can't be in an outer loop - we loop on LoopBlocks - and so it must - // be an inner loop. - assert(InnerLoop); - // Check if we are the canonical block for this loop. - if (canonicalize(MBB) != MBB) { - continue; - } - // The successors are those of the loop. - SmallVector ExitBlocks; - InnerLoop->getExitBlocks(ExitBlocks); - for (auto *Succ : ExitBlocks) { - maybeInsert(MBB, Succ); + if (Succ != Entry && inRegion(Succ)) { + Reachable[MBB].insert(Succ); + WorkList.emplace_back(MBB, Succ); + } } } - } - // Do work until we are all done. - while (!WorkList.empty()) { - MachineBasicBlock *MBB; - MachineBasicBlock *Succ; - std::tie(MBB, Succ) = WorkList.pop_back_val(); - // The worklist item is an edge we just added, so it must have valid blocks - // (and not something canonicalized to nullptr). - assert(MBB); - assert(Succ); - // The successor in that pair must also be a valid successor. - assert(MBB == canonicalizeSuccessor(MBB)); - // We recently added MBB => Succ, and that means we may have enabled - // Pred => MBB => Succ. Check all the predecessors. Note that our loop here - // is correct for both a block and a block representing a loop, as the loop - // is natural and so the predecessors are all predecessors of the loop - // header, which is the block we have here. - for (auto *Pred : MBB->predecessors()) { - // Canonicalize, make sure it's relevant, and check it's not the same - // block (an update to the block itself doesn't help compute that same - // block). - Pred = canonicalize(Pred); - if (Pred && Pred != MBB) { - maybeInsert(Pred, Succ); + while (!WorkList.empty()) { + MachineBasicBlock *MBB, *Succ; + std::tie(MBB, Succ) = WorkList.pop_back_val(); + assert(inRegion(MBB) && Succ != Entry && inRegion(Succ)); + if (MBB != Entry) { + // We recently added MBB => Succ, and that means we may have enabled + // Pred => MBB => Succ. + for (auto *Pred : MBB->predecessors()) { + if (Reachable[Pred].insert(Succ).second) { + WorkList.emplace_back(Pred, Succ); + } + } } } - } - // It's now trivial to identify the loopers. - SmallPtrSet Loopers; - for (auto MBB : LoopBlocks) { - if (Reachable[MBB].count(MBB)) { - Loopers.insert(MBB); + // Blocks that can return to themselves are in a loop. + for (auto *MBB : Blocks) { + if (canReach(MBB, MBB)) { + Loopers.insert(MBB); + } } - } - // The header cannot be a looper. At the toplevel, LLVM does not allow the - // entry to be in a loop, and in a natural loop we should ignore the header. - assert(Loopers.count(Header) == 0); - - // Find the entries, loopers reachable from non-loopers. - SmallPtrSet Entries; - SmallVector SortedEntries; - for (auto *Looper : Loopers) { - for (auto *Pred : Looper->predecessors()) { - Pred = canonicalize(Pred); - if (Pred && !Loopers.count(Pred)) { - Entries.insert(Looper); - SortedEntries.push_back(Looper); - break; + assert(!Loopers.count(Entry)); + + // Find the loop entries - loopers reachable from blocks not in that loop - + // and those outside blocks that reach them, the "loop enterers". + for (auto *Looper : Loopers) { + for (auto *Pred : Looper->predecessors()) { + // Pred can reach Looper. If Looper can reach Pred, it is in the loop; + // otherwise, it is a block that enters into the loop. + if (!canReach(Looper, Pred)) { + LoopEntries.insert(Looper); + LoopEnterers[Looper].insert(Pred); + } } } } +}; - // Check if we found irreducible control flow. - if (LLVM_LIKELY(Entries.size() <= 1)) - return false; +// Finds the blocks in a single-entry loop, given the loop entry and the +// list of blocks that enter the loop. +class LoopBlocks { +public: + LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers) + : Entry(Entry), Enterers(Enterers) { + calculate(); + } - // Sort the entries to ensure a deterministic build. - llvm::sort(SortedEntries, - [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { - auto ANum = A->getNumber(); - auto BNum = B->getNumber(); - return ANum < BNum; - }); + BlockSet &getBlocks() { return Blocks; } -#ifndef NDEBUG - for (auto Block : SortedEntries) - assert(Block->getNumber() != -1); - if (SortedEntries.size() > 1) { - for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; - I != E; ++I) { - auto ANum = (*I)->getNumber(); - auto BNum = (*(std::next(I)))->getNumber(); - assert(ANum != BNum); +private: + MachineBasicBlock *Entry; + const BlockSet &Enterers; + + BlockSet Blocks; + + void calculate() { + // Going backwards from the loop entry, if we ignore the blocks entering + // from outside, we will traverse all the blocks in the loop. + BlockSet WorkList; + Blocks.insert(Entry); + for (auto *Pred : Entry->predecessors()) { + if (!Enterers.count(Pred)) { + WorkList.insert(Pred); + } } - } -#endif - // Create a dispatch block which will contain a jump table to the entries. - MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock(); - MF.insert(MF.end(), Dispatch); - MLI.changeLoopFor(Dispatch, Loop); - - // Add the jump table. - const auto &TII = *MF.getSubtarget().getInstrInfo(); - MachineInstrBuilder MIB = BuildMI(*Dispatch, Dispatch->end(), DebugLoc(), - TII.get(WebAssembly::BR_TABLE_I32)); - - // Add the register which will be used to tell the jump table which block to - // jump to. - MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - MIB.addReg(Reg); - - // Compute the indices in the superheader, one for each bad block, and - // add them as successors. - DenseMap Indices; - for (auto *MBB : SortedEntries) { - auto Pair = Indices.insert(std::make_pair(MBB, 0)); - if (!Pair.second) { - continue; + while (!WorkList.empty()) { + auto *MBB = *WorkList.begin(); + WorkList.erase(MBB); + assert(!Enterers.count(MBB)); + if (Blocks.insert(MBB).second) { + for (auto *Pred : MBB->predecessors()) { + WorkList.insert(Pred); + } + } } + } +}; - unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1; - Pair.first->second = Index; - - MIB.addMBB(MBB); - Dispatch->addSuccessor(MBB); +class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass { + StringRef getPassName() const override { + return "WebAssembly Fix Irreducible Control Flow"; } - // Rewrite the problematic successors for every block that wants to reach the - // bad blocks. For simplicity, we just introduce a new block for every edge - // we need to rewrite. (Fancier things are possible.) + bool runOnMachineFunction(MachineFunction &MF) override; - SmallVector AllPreds; - for (auto *MBB : SortedEntries) { - for (auto *Pred : MBB->predecessors()) { - if (Pred != Dispatch) { - AllPreds.push_back(Pred); + bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks, + MachineFunction &MF) { + bool Changed = false; + + // Remove irreducibility before processing child loops, which may take + // multiple iterations. + while (true) { + ReachabilityGraph Graph(Entry, Blocks); + + bool FoundIrreducibility = false; + + for (auto *LoopEntry : Graph.getLoopEntries()) { + // Find mutual entries - other entries which can reach this one, and + // are reached by it. Such mutual entries must be in the same loop, and + // so indicate irreducible control flow. + // + // Note that irreducibility may involve inner loops, e.g. imagine A + // starts one loop, and it has B inside it which starts an inner loop. + // If we add a branch from all the way on the outside to B, then in a + // sense B is no longer an "inner" loop, semantically speaking. We will + // fix that irreducibility by adding a block that dispatches to either + // either A or B, so B will no longer be an inner loop in our output. + // (A fancier approach might try to keep it as such.) + // + // Note that we still need to recurse into inner loops later, to handle + // the case where the irreducibility is entirely nested - we would not + // be able to identify that at this point, since the enclosing loop is + // a group of blocks all of whom can reach each other. (We'll see the + // irreducibility after removing branches to the top of that enclosing + // loop.) + BlockSet MutualLoopEntries; + for (auto *OtherLoopEntry : Graph.getLoopEntries()) { + if (OtherLoopEntry != LoopEntry && + Graph.canReach(LoopEntry, OtherLoopEntry) && + Graph.canReach(OtherLoopEntry, LoopEntry)) { + MutualLoopEntries.insert(OtherLoopEntry); + } + } + + if (!MutualLoopEntries.empty()) { + auto AllLoopEntries = std::move(MutualLoopEntries); + AllLoopEntries.insert(LoopEntry); + makeSingleEntryLoop(AllLoopEntries, Blocks, MF); + FoundIrreducibility = true; + Changed = true; + break; + } } - } - } - - for (MachineBasicBlock *MBB : AllPreds) { - DenseMap Map; - for (auto *Succ : MBB->successors()) { - if (!Entries.count(Succ)) { + // Only go on to actually process the inner loops when we are done + // removing irreducible control flow and changing the graph. Modifying + // the graph as we go is possible, and that might let us avoid looking at + // the already-fixed loops again if we are careful, but all that is + // complex and bug-prone. Since irreducible loops are rare, just starting + // another iteration is best. + if (FoundIrreducibility) { continue; } - // This is a successor we need to rewrite. - MachineBasicBlock *Split = MF.CreateMachineBasicBlock(); - MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ) - : MF.end(), - Split); - MLI.changeLoopFor(Split, Loop); - - // Set the jump table's register of the index of the block we wish to - // jump to, and jump to the jump table. - BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::CONST_I32), - Reg) - .addImm(Indices[Succ]); - BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::BR)) - .addMBB(Dispatch); - Split->addSuccessor(Dispatch); - Map[Succ] = Split; + for (auto *LoopEntry : Graph.getLoopEntries()) { + LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry)); + // Each of these calls to processRegion may change the graph, but are + // guaranteed not to interfere with each other. The only changes we make + // to the graph are to add blocks on the way to a loop entry. As the + // loops are disjoint, that means we may only alter branches exiting + // another loop, which are ignored when recursing into that other loop + // anyhow. + if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) { + Changed = true; + } + } + + return Changed; } - // Remap the terminator operands and the successor list. - for (MachineInstr &Term : MBB->terminators()) - for (auto &Op : Term.explicit_uses()) - if (Op.isMBB() && Indices.count(Op.getMBB())) - Op.setMBB(Map[Op.getMBB()]); - for (auto Rewrite : Map) - MBB->replaceSuccessor(Rewrite.first, Rewrite.second); } - // Create a fake default label, because br_table requires one. - MIB.addMBB(MIB.getInstr() - ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1) - .getMBB()); + // Given a set of entries to a single loop, create a single entry for that + // loop by creating a dispatch block for them, routing control flow using + // a helper variable. Also updates Blocks with any new blocks created, so + // that we properly track all the blocks in the region. + void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks, + MachineFunction &MF) { + assert(Entries.size() >= 2); + + // Sort the entries to ensure a deterministic build. + BlockVector SortedEntries(Entries.begin(), Entries.end()); + llvm::sort(SortedEntries, + [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { + auto ANum = A->getNumber(); + auto BNum = B->getNumber(); + return ANum < BNum; + }); - return true; -} - -class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass { - StringRef getPassName() const override { - return "WebAssembly Fix Irreducible Control Flow"; - } +#ifndef NDEBUG + for (auto Block : SortedEntries) + assert(Block->getNumber() != -1); + if (SortedEntries.size() > 1) { + for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E; + ++I) { + auto ANum = (*I)->getNumber(); + auto BNum = (*(std::next(I)))->getNumber(); + assert(ANum != BNum); + } + } +#endif - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - MachineFunctionPass::getAnalysisUsage(AU); - } + // Create a dispatch block which will contain a jump table to the entries. + MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock(); + MF.insert(MF.end(), Dispatch); + Blocks.insert(Dispatch); + + // Add the jump table. + const auto &TII = *MF.getSubtarget().getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(*Dispatch, Dispatch->end(), DebugLoc(), + TII.get(WebAssembly::BR_TABLE_I32)); + + // Add the register which will be used to tell the jump table which block to + // jump to. + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + MIB.addReg(Reg); + + // Compute the indices in the superheader, one for each bad block, and + // add them as successors. + DenseMap Indices; + for (auto *Entry : SortedEntries) { + auto Pair = Indices.insert(std::make_pair(Entry, 0)); + assert(Pair.second); + + unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1; + Pair.first->second = Index; + + MIB.addMBB(Entry); + Dispatch->addSuccessor(Entry); + } - bool runOnMachineFunction(MachineFunction &MF) override; + // Rewrite the problematic successors for every block that wants to reach + // the bad blocks. For simplicity, we just introduce a new block for every + // edge we need to rewrite. (Fancier things are possible.) - bool runIteration(MachineFunction &MF, MachineLoopInfo &MLI) { - // Visit the function body, which is identified as a null loop. - if (LoopFixer(MF, MLI, nullptr).run()) { - return true; + BlockVector AllPreds; + for (auto *Entry : SortedEntries) { + for (auto *Pred : Entry->predecessors()) { + if (Pred != Dispatch) { + AllPreds.push_back(Pred); + } + } } - // Visit all the loops. - SmallVector Worklist(MLI.begin(), MLI.end()); - while (!Worklist.empty()) { - MachineLoop *Loop = Worklist.pop_back_val(); - Worklist.append(Loop->begin(), Loop->end()); - if (LoopFixer(MF, MLI, Loop).run()) { - return true; + for (MachineBasicBlock *Pred : AllPreds) { + DenseMap Map; + for (auto *Entry : Pred->successors()) { + if (!Entries.count(Entry)) { + continue; + } + + // This is a successor we need to rewrite. + MachineBasicBlock *Split = MF.CreateMachineBasicBlock(); + MF.insert(Pred->isLayoutSuccessor(Entry) + ? MachineFunction::iterator(Entry) + : MF.end(), + Split); + Blocks.insert(Split); + + // Set the jump table's register of the index of the block we wish to + // jump to, and jump to the jump table. + BuildMI(*Split, Split->end(), DebugLoc(), + TII.get(WebAssembly::CONST_I32), Reg) + .addImm(Indices[Entry]); + BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::BR)) + .addMBB(Dispatch); + Split->addSuccessor(Dispatch); + Map[Entry] = Split; } + // Remap the terminator operands and the successor list. + for (MachineInstr &Term : Pred->terminators()) + for (auto &Op : Term.explicit_uses()) + if (Op.isMBB() && Indices.count(Op.getMBB())) + Op.setMBB(Map[Op.getMBB()]); + for (auto Rewrite : Map) + Pred->replaceSuccessor(Rewrite.first, Rewrite.second); } - return false; + // Create a fake default label, because br_table requires one. + MIB.addMBB(MIB.getInstr() + ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1) + .getMBB()); } public: static char ID; // Pass identification, replacement for typeid WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {} }; + } // end anonymous namespace char WebAssemblyFixIrreducibleControlFlow::ID = 0; @@ -406,23 +435,18 @@ "********** Function: " << MF.getName() << '\n'); - bool Changed = false; - auto &MLI = getAnalysis(); - - // When we modify something, bail out and recompute MLI, then start again, as - // we create a new natural loop when we resolve irreducible control flow, and - // other loops may become nested in it, etc. In practice this is not an issue - // because irreducible control flow is rare, only very few cycles are needed - // here. - while (LLVM_UNLIKELY(runIteration(MF, MLI))) { - // We rewrote part of the function; recompute MLI and start again. - LLVM_DEBUG(dbgs() << "Recomputing loops.\n"); + // Start the recursive process on the entire function body. + BlockSet AllBlocks; + for (auto &MBB : MF) { + AllBlocks.insert(&MBB); + } + + if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) { + // We rewrote part of the function; recompute relevant things. MF.getRegInfo().invalidateLiveness(); MF.RenumberBlocks(); - getAnalysis().runOnMachineFunction(MF); - MLI.runOnMachineFunction(MF); - Changed = true; + return true; } - return Changed; + return false; } Index: test/CodeGen/WebAssembly/irreducible-cfg.ll =================================================================== --- test/CodeGen/WebAssembly/irreducible-cfg.ll +++ test/CodeGen/WebAssembly/irreducible-cfg.ll @@ -217,3 +217,37 @@ ret void } +; Complex control flow without irreducibility. This requires we properly identify +; the blocks entering each nested loop properly (in particular, even if they +; are the entry to a parent loop). +; CHECK-NOT: br_table +define hidden void @ps_hints_apply() { +entry: + br label %psh + +psh: ; preds = %entry + br i1 undef, label %for.cond, label %for.body + +for.body: ; preds = %psh + br label %do.body + +do.body: ; preds = %do.cond, %for.body + %cmp118 = icmp eq i32* undef, undef + br i1 %cmp118, label %Skip, label %do.cond + +do.cond: ; preds = %do.body + br label %do.body + +for.cond: ; preds = %Skip, %psh + br label %for.body39 + +for.body39: ; preds = %for.cond + br i1 undef, label %Skip, label %do.body45 + +do.body45: ; preds = %for.body39 + unreachable + +Skip: ; preds = %for.body39, %do.body + br label %for.cond +} + Index: test/CodeGen/WebAssembly/non-irreducible-cfg.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/non-irreducible-cfg.ll @@ -0,0 +1,43 @@ +; RUN: llc < %s -O0 -asm-verbose=false -verify-machineinstrs -disable-block-placement -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s + +; Test irreducible CFG handling. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; A simple sequence of loops with blocks in between, that should not be +; misinterpreted as irreducible control flow. + +; CHECK-NOT: br_table +define hidden i32 @_Z15fannkuch_workerPv(i8* %_arg) #0 { +for.cond: ; preds = %entry + br label %do.body + +do.body: ; preds = %do.cond, %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.body, %do.body + br i1 1, label %for.cond1, label %for.end + +for.end: ; preds = %for.cond1 + br label %do.cond + +do.cond: ; preds = %for.end + br i1 1, label %do.body, label %do.end + +do.end: ; preds = %do.cond + br label %for.cond2 + +for.cond2: ; preds = %for.end6, %do.end + br label %for.cond3 + +for.cond3: ; preds = %for.body5, %for.cond2 + br i1 1, label %for.cond3, label %for.end6 + +for.end6: ; preds = %for.cond3 + br label %for.cond2 + +return: ; No predecessors! + ret i32 1 +} +