Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file implements a pass that transforms irreducible control flow -/// into reducible control flow. Irreducible control flow means multiple-entry +/// This file implements a pass that transforms irreducible control flow into +/// reducible control flow. Irreducible control flow means multiple-entry /// loops; they appear as CFG cycles that are not recorded in MachineLoopInfo /// due to being unnatural. /// @@ -17,12 +17,36 @@ /// it linearizes control flow, turning diamonds into two triangles, which is /// both unnecessary and undesirable for WebAssembly. /// -/// TODO: The transformation implemented here handles all irreducible control -/// flow, without exponential code-size expansion, though it does so by creating -/// inefficient code in many cases. Ideally, we should add other -/// transformations, including code-duplicating cases, which can be more -/// efficient in common cases, and they can fall back to this conservative -/// implementation as needed. +/// The big picture: Ignoring natural loops (seeing them monolithically), we +/// find all the blocks which can return to themselves ("loopers"). Loopers +/// reachable from the non-loopers are loop entries: if there are 2 or more, +/// then we have irreducible control flow. We fix that as follows: a new block +/// is created that can dispatch to each of the loop entries, based on the +/// value of a label "helper" variable, and we replace direct branches to the +/// entries with assignments to the label variable and a branch to the dispatch +/// block. Then the dispatch block is the single entry in a new natural loop. +/// +/// This is similar to what the Relooper [1] does, both identify looping code +/// that requires multiple entries, and resolve it in a similar way. In +/// Relooper terminology, we implement a Multiple shape in a Loop shape. Note +/// also that like the Relooper, we implement a "minimal" intervention: we only +/// use the "label" helper for the blocks we absolutely must and no others. We +/// also prioritize code size and do not perform node splitting (i.e. we don't +/// duplicate code in order to resolve irreducibility). +/// +/// The difference between this code and the Relooper is that the Relooper also +/// generates ifs and loops and works in a recursive manner, knowing at each +/// point what the entries are, and recursively breaks down the problem. Here +/// we just want to resolve irreducible control flow, and we also want to use +/// as much LLVM infrastructure as possible. So we use the MachineLoopInfo to +/// identify natural loops, etc., and we start with the whole CFG and must +/// identify both the looping code and its entries. +/// +/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In +/// Proceedings of the ACM international conference companion on Object oriented +/// programming systems languages and applications companion (SPLASH '11). ACM, +/// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 +/// http://doi.acm.org/10.1145/2048147.2048224 /// //===----------------------------------------------------------------------===// @@ -46,141 +70,192 @@ #define DEBUG_TYPE "wasm-fix-irreducible-control-flow" namespace { -class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass { - StringRef getPassName() const override { - return "WebAssembly Fix Irreducible Control Flow"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - - bool VisitLoop(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop); +class LoopFixer { public: - static char ID; // Pass identification, replacement for typeid - WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {} -}; -} // end anonymous namespace + LoopFixer(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop) + : MF(MF), MLI(MLI), Loop(Loop) {} -char WebAssemblyFixIrreducibleControlFlow::ID = 0; -INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE, - "Removes irreducible control flow", false, false) + // Run the fixer on the given inputs. Returns whether changes were made. + bool run(); -FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() { - return new WebAssemblyFixIrreducibleControlFlow(); -} - -namespace { - -/// A utility for walking the blocks of a loop, handling a nested inner -/// loop as a monolithic conceptual block. -class MetaBlock { - MachineBasicBlock *Block; - SmallVector Preds; - SmallVector Succs; - -public: - explicit MetaBlock(MachineBasicBlock *MBB) - : Block(MBB), Preds(MBB->pred_begin(), MBB->pred_end()), - Succs(MBB->succ_begin(), MBB->succ_end()) {} - - explicit MetaBlock(MachineLoop *Loop) : Block(Loop->getHeader()) { - Loop->getExitBlocks(Succs); - for (MachineBasicBlock *Pred : Block->predecessors()) - if (!Loop->contains(Pred)) - Preds.push_back(Pred); +private: + MachineFunction &MF; + MachineLoopInfo &MLI; + MachineLoop *Loop; + + MachineBasicBlock *Header; + SmallPtrSet LoopBlocks; + + using BlockSet = SmallPtrSet; + DenseMap Reachable; + + // The worklist contains pairs of recent additions, (a, b), where we just + // added a link a => b. + using BlockPair = std::pair; + SmallVector WorkList; + + // Get a canonical block to represent a block or a loop: the block, or if in + // an inner loop, the loop header, of it in an outer loop scope, we can + // ignore it. We need to call this on all blocks we work on. + MachineBasicBlock *canonicalize(MachineBasicBlock *MBB) { + MachineLoop *InnerLoop = MLI.getLoopFor(MBB); + if (InnerLoop == Loop) { + return MBB; + } else { + // This is either in an outer or an inner loop, and not in ours. + if (!LoopBlocks.count(MBB)) { + // It's in outer code, ignore it. + return nullptr; + } + assert(InnerLoop); + // It's in an inner loop, canonicalize it to the header of that loop. + return InnerLoop->getHeader(); + } } - MachineBasicBlock *getBlock() const { return Block; } - - const SmallVectorImpl &predecessors() const { - return Preds; - } - const SmallVectorImpl &successors() const { - return Succs; + // For a successor we can additionally ignore it if it's a branch back to a + // natural loop top, as when we are in the scope of a loop, we just care + // about internal irreducibility, and can ignore the loop we are in. We need + // to call this on all blocks in a context where they are a successor. + MachineBasicBlock *canonicalizeSuccessor(MachineBasicBlock *MBB) { + if (Loop && MBB == Loop->getHeader()) { + // Ignore branches going to the loop's natural header. + return nullptr; + } + return canonicalize(MBB); } - bool operator==(const MetaBlock &MBB) { return Block == MBB.Block; } - bool operator!=(const MetaBlock &MBB) { return Block != MBB.Block; } + // Potentially insert a new reachable edge, and if so, note it as further + // work. + void maybeInsert(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { + assert(MBB == canonicalize(MBB)); + assert(Succ); + // Succ may not be interesting as a sucessor. + Succ = canonicalizeSuccessor(Succ); + if (!Succ) + return; + if (Reachable[MBB].insert(Succ).second) { + // For there to be further work, it means that we have + // X => MBB => Succ + // for some other X, and in that case X => Succ would be a new edge for + // us to discover later. However, if we don't care about MBB as a + // successor, then we don't care about that anyhow. + if (canonicalizeSuccessor(MBB)) { + WorkList.emplace_back(MBB, Succ); + } + } + } }; -class SuccessorList final : public MetaBlock { - size_t Index; - size_t Num; - -public: - explicit SuccessorList(MachineBasicBlock *MBB) - : MetaBlock(MBB), Index(0), Num(successors().size()) {} +bool LoopFixer::run() { + Header = Loop ? Loop->getHeader() : &*MF.begin(); - explicit SuccessorList(MachineLoop *Loop) - : MetaBlock(Loop), Index(0), Num(successors().size()) {} + // Identify all the blocks in this loop scope. + if (Loop) { + for (auto *MBB : Loop->getBlocks()) { + LoopBlocks.insert(MBB); + } + } else { + for (auto &MBB : MF) { + LoopBlocks.insert(&MBB); + } + } - bool HasNext() const { return Index != Num; } + // Compute which (canonicalized) blocks each block can reach. - MachineBasicBlock *Next() { - assert(HasNext()); - return successors()[Index++]; + // Add all the initial work. + for (auto *MBB : LoopBlocks) { + MachineLoop *InnerLoop = MLI.getLoopFor(MBB); + + if (InnerLoop == Loop) { + for (auto *Succ : MBB->successors()) { + maybeInsert(MBB, Succ); + } + } else { + // It can't be in an outer loop - we loop on LoopBlocks - and so it must + // be an inner loop. + assert(InnerLoop); + // Check if we are the canonical block for this loop. + if (canonicalize(MBB) != MBB) { + continue; + } + // The successors are those of the loop. + SmallVector ExitBlocks; + InnerLoop->getExitBlocks(ExitBlocks); + for (auto *Succ : ExitBlocks) { + maybeInsert(MBB, Succ); + } + } } -}; -} // end anonymous namespace + // Do work until we are all done. + while (!WorkList.empty()) { + MachineBasicBlock *MBB; + MachineBasicBlock *Succ; + std::tie(MBB, Succ) = WorkList.pop_back_val(); + // The worklist item is an edge we just added, so it must have valid blocks + // (and not something canonicalized to nullptr). + assert(MBB); + assert(Succ); + // The successor in that pair must also be a valid successor. + assert(MBB == canonicalizeSuccessor(MBB)); + // We recently added MBB => Succ, and that means we may have enabled + // Pred => MBB => Succ. Check all the predecessors. Note that our loop here + // is correct for both a block and a block representing a loop, as the loop + // is natural and so the predecessors are all predecessors of the loop + // header, which is the block we have here. + for (auto *Pred : MBB->predecessors()) { + // Canonicalize, make sure it's relevant, and check it's not the same + // block (an update to the block itself doesn't help compute that same + // block). + Pred = canonicalize(Pred); + if (Pred && Pred != MBB) { + maybeInsert(Pred, Succ); + } + } + } -bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF, - MachineLoopInfo &MLI, - MachineLoop *Loop) { - MachineBasicBlock *Header = Loop ? Loop->getHeader() : &*MF.begin(); - SetVector RewriteSuccs; - - // DFS through Loop's body, looking for irreducible control flow. Loop is - // natural, and we stay in its body, and we treat any nested loops - // monolithically, so any cycles we encounter indicate irreducibility. - SmallPtrSet OnStack; - SmallPtrSet Visited; - SmallVector LoopWorklist; - LoopWorklist.push_back(SuccessorList(Header)); - OnStack.insert(Header); - Visited.insert(Header); - while (!LoopWorklist.empty()) { - SuccessorList &Top = LoopWorklist.back(); - if (Top.HasNext()) { - MachineBasicBlock *Next = Top.Next(); - if (Next == Header || (Loop && !Loop->contains(Next))) - continue; - if (LLVM_LIKELY(OnStack.insert(Next).second)) { - if (!Visited.insert(Next).second) { - OnStack.erase(Next); - continue; - } - MachineLoop *InnerLoop = MLI.getLoopFor(Next); - if (InnerLoop != Loop) - LoopWorklist.push_back(SuccessorList(InnerLoop)); - else - LoopWorklist.push_back(SuccessorList(Next)); - } else { - RewriteSuccs.insert(Top.getBlock()); + // It's now trivial to identify the loopers. + SmallPtrSet Loopers; + for (auto MBB : LoopBlocks) { + if (Reachable[MBB].count(MBB)) { + Loopers.insert(MBB); + } + } + // The header cannot be a looper. At the toplevel, LLVM does not allow the + // entry to be in a loop, and in a natural loop we should ignore the header. + assert(Loopers.count(Header) == 0); + + // Find the entries, loopers reachable from non-loopers. + SmallPtrSet Entries; + SmallVector SortedEntries; + for (auto *Looper : Loopers) { + for (auto *Pred : Looper->predecessors()) { + Pred = canonicalize(Pred); + if (Pred && !Loopers.count(Pred)) { + Entries.insert(Looper); + SortedEntries.push_back(Looper); + break; } - continue; } - OnStack.erase(Top.getBlock()); - LoopWorklist.pop_back(); } - // Most likely, we didn't find any irreducible control flow. - if (LLVM_LIKELY(RewriteSuccs.empty())) + // Check if we found irreducible control flow. + if (LLVM_LIKELY(Entries.size() <= 1)) return false; - LLVM_DEBUG(dbgs() << "Irreducible control flow detected!\n"); + // Sort the entries to ensure a deterministic build. + llvm::sort(SortedEntries, + [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { + auto ANum = A->getNumber(); + auto BNum = B->getNumber(); + assert(ANum != -1 && BNum != -1); + assert(ANum != BNum); + return ANum < BNum; + }); - // Ok. We have irreducible control flow! Create a dispatch block which will - // contains a jump table to any block in the problematic set of blocks. + // Create a dispatch block which will contain a jump table to the entries. MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock(); MF.insert(MF.end(), Dispatch); MLI.changeLoopFor(Dispatch, Loop); @@ -196,43 +271,43 @@ unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); MIB.addReg(Reg); - // Collect all the blocks which need to have their successors rewritten, - // add the successors to the jump table, and remember their index. + // Compute the indices in the superheader, one for each bad block, and + // add them as successors. DenseMap Indices; - SmallVector SuccWorklist(RewriteSuccs.begin(), - RewriteSuccs.end()); - while (!SuccWorklist.empty()) { - MachineBasicBlock *MBB = SuccWorklist.pop_back_val(); + for (auto *MBB : SortedEntries) { auto Pair = Indices.insert(std::make_pair(MBB, 0)); - if (!Pair.second) + if (!Pair.second) { continue; + } unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1; - LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has index " << Index - << "\n"); - Pair.first->second = Index; - for (auto Pred : MBB->predecessors()) - RewriteSuccs.insert(Pred); MIB.addMBB(MBB); Dispatch->addSuccessor(MBB); + } - MetaBlock Meta(MBB); - for (auto *Succ : Meta.successors()) - if (Succ != Header && (!Loop || Loop->contains(Succ))) - SuccWorklist.push_back(Succ); + // Rewrite the problematic successors for every block that wants to reach the + // bad blocks. For simplicity, we just introduce a new block for every edge + // we need to rewrite. (Fancier things are possible.) + + SmallVector AllPreds; + for (auto *MBB : SortedEntries) { + for (auto *Pred : MBB->predecessors()) { + if (Pred != Dispatch) { + AllPreds.push_back(Pred); + } + } } - // Rewrite the problematic successors for every block in RewriteSuccs. - // For simplicity, we just introduce a new block for every edge we need to - // rewrite. Fancier things are possible. - for (MachineBasicBlock *MBB : RewriteSuccs) { + for (MachineBasicBlock *MBB : AllPreds) { DenseMap Map; for (auto *Succ : MBB->successors()) { - if (!Indices.count(Succ)) + if (!Entries.count(Succ)) { continue; + } + // This is a successor we need to rewrite. MachineBasicBlock *Split = MF.CreateMachineBasicBlock(); MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ) : MF.end(), @@ -266,6 +341,55 @@ return true; } +class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass { + StringRef getPassName() const override { + return "WebAssembly Fix Irreducible Control Flow"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + bool runIteration(MachineFunction &MF, MachineLoopInfo &MLI) { + // Visit the function body, which is identified as a null loop. + if (LoopFixer(MF, MLI, nullptr).run()) { + return true; + } + + // Visit all the loops. + SmallVector Worklist(MLI.begin(), MLI.end()); + while (!Worklist.empty()) { + MachineLoop *Loop = Worklist.pop_back_val(); + Worklist.append(Loop->begin(), Loop->end()); + if (LoopFixer(MF, MLI, Loop).run()) { + return true; + } + } + + return false; + } + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyFixIrreducibleControlFlow::ID = 0; +INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE, + "Removes irreducible control flow", false, false) + +FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() { + return new WebAssemblyFixIrreducibleControlFlow(); +} + bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction( MachineFunction &MF) { LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n" @@ -275,24 +399,19 @@ bool Changed = false; auto &MLI = getAnalysis(); - // Visit the function body, which is identified as a null loop. - Changed |= VisitLoop(MF, MLI, nullptr); - - // Visit all the loops. - SmallVector Worklist(MLI.begin(), MLI.end()); - while (!Worklist.empty()) { - MachineLoop *CurLoop = Worklist.pop_back_val(); - Worklist.append(CurLoop->begin(), CurLoop->end()); - Changed |= VisitLoop(MF, MLI, CurLoop); - } - - // If we made any changes, completely recompute everything. - if (LLVM_UNLIKELY(Changed)) { - LLVM_DEBUG(dbgs() << "Recomputing dominators and loops.\n"); + // When we modify something, bail out and recompute MLI, then start again, as + // we create a new natural loop when we resolve irreducible control flow, and + // other loops may become nested in it, etc. In practice this is not an issue + // because irreducible control flow is rare, only very few cycles are needed + // here. + while (LLVM_UNLIKELY(runIteration(MF, MLI))) { + // We rewrote part of the function; recompute MLI and start again. + LLVM_DEBUG(dbgs() << "Recomputing loops.\n"); MF.getRegInfo().invalidateLiveness(); MF.RenumberBlocks(); getAnalysis().runOnMachineFunction(MF); MLI.runOnMachineFunction(MF); + Changed = true; } return Changed; Index: llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-exceptions.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-exceptions.ll +++ llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-exceptions.ll @@ -0,0 +1,108 @@ +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-block-placement -wasm-disable-explicit-locals -wasm-keep-registers -enable-emscripten-cxx-exceptions | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +declare i32 @__gxx_personality_v0(...) + +; Check an interesting case of complex control flow due to exceptions CFG rewriting. +; There should *not* be any irreducible control flow here. + +; CHECK-LABEL: crashy: +; CHECK-NOT: br_table + +; Function Attrs: minsize noinline optsize +define void @crashy() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + invoke void undef() + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %entry + invoke void undef() + to label %invoke.cont4 unwind label %lpad3 + +invoke.cont4: ; preds = %invoke.cont + %call.i82 = invoke i8* undef() + to label %invoke.cont6 unwind label %lpad3 + +invoke.cont6: ; preds = %invoke.cont4 + invoke void undef() + to label %invoke.cont13 unwind label %lpad12 + +invoke.cont13: ; preds = %invoke.cont6 + br label %for.cond + +for.cond: ; preds = %for.cond.backedge, %invoke.cont13 + br i1 undef, label %exit2, label %land.lhs + +land.lhs: ; preds = %for.cond + %call.i.i.i.i92 = invoke i32 undef() + to label %exit1 unwind label %lpad16.loopexit + +exit1: ; preds = %land.lhs + br label %exit2 + +exit2: ; preds = %exit1, %for.cond + %call.i.i12.i.i93 = invoke i32 undef() + to label %exit3 unwind label %lpad16.loopexit + +exit3: ; preds = %exit2 + invoke void undef() + to label %invoke.cont23 unwind label %lpad22 + +invoke.cont23: ; preds = %exit3 + invoke void undef() + to label %invoke.cont25 unwind label %lpad22 + +invoke.cont25: ; preds = %invoke.cont23 + %call.i.i137 = invoke i32 undef() + to label %invoke.cont29 unwind label %lpad16.loopexit + +lpad: ; preds = %entry + %0 = landingpad { i8*, i32 } + cleanup + unreachable + +lpad3: ; preds = %invoke.cont4, %invoke.cont + %1 = landingpad { i8*, i32 } + cleanup + unreachable + +lpad12: ; preds = %invoke.cont6 + %2 = landingpad { i8*, i32 } + cleanup + resume { i8*, i32 } undef + +lpad16.loopexit: ; preds = %if.then, %invoke.cont29, %invoke.cont25, %exit2, %land.lhs + %lpad.loopexit = landingpad { i8*, i32 } + cleanup + unreachable + +lpad22: ; preds = %invoke.cont23, %exit3 + %3 = landingpad { i8*, i32 } + cleanup + unreachable + +invoke.cont29: ; preds = %invoke.cont25 + invoke void undef() + to label %invoke.cont33 unwind label %lpad16.loopexit + +invoke.cont33: ; preds = %invoke.cont29 + br label %for.inc + +for.inc: ; preds = %invoke.cont33 + %cmp.i.i141 = icmp eq i8* undef, undef + br i1 %cmp.i.i141, label %if.then, label %if.end.i.i146 + +if.then: ; preds = %for.inc + %call.i.i148 = invoke i32 undef() + to label %for.cond.backedge unwind label %lpad16.loopexit + +for.cond.backedge: ; preds = %if.end.i.i146, %if.then + br label %for.cond + +if.end.i.i146: ; preds = %for.inc + call void undef() + br label %for.cond.backedge +} + Index: llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-nested.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-nested.ll +++ llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-nested.ll @@ -0,0 +1,63 @@ +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-block-placement -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + + +; Test an interesting pattern of nested irreducibility. +; Just check we resolve all the irreducibility here (if not we'd crash). + +; CHECK-LABEL: tre_parse: + +define void @tre_parse() { +entry: + br label %for.cond.outer + +for.cond.outer: ; preds = %do.body14, %entry + br label %for.cond + +for.cond: ; preds = %for.cond.backedge, %for.cond.outer + %nbranch.0 = phi i32* [ null, %for.cond.outer ], [ %call188, %for.cond.backedge ] + switch i8 undef, label %if.else [ + i8 40, label %do.body14 + i8 41, label %if.then63 + ] + +do.body14: ; preds = %for.cond + br label %for.cond.outer + +if.then63: ; preds = %for.cond + unreachable + +if.else: ; preds = %for.cond + switch i8 undef, label %if.then84 [ + i8 92, label %if.end101 + i8 42, label %if.end101 + ] + +if.then84: ; preds = %if.else + switch i8 undef, label %cleanup.thread [ + i8 43, label %if.end101 + i8 63, label %if.end101 + i8 123, label %if.end101 + ] + +if.end101: ; preds = %if.then84, %if.then84, %if.then84, %if.else, %if.else + unreachable + +cleanup.thread: ; preds = %if.then84 + %call188 = tail call i32* undef(i32* %nbranch.0) + switch i8 undef, label %for.cond.backedge [ + i8 92, label %land.lhs.true208 + i8 0, label %if.else252 + ] + +land.lhs.true208: ; preds = %cleanup.thread + unreachable + +for.cond.backedge: ; preds = %cleanup.thread + br label %for.cond + +if.else252: ; preds = %cleanup.thread + unreachable +} Index: llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-nested2.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-nested2.ll +++ llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg-nested2.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-block-placement -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; Test an interesting pattern of nested irreducibility. +; Just check we resolve all the irreducibility here (if not we'd crash). + +; CHECK-LABEL: func_2: + +; Function Attrs: noinline nounwind optnone +define void @func_2() { +entry: + br i1 undef, label %lbl_937, label %if.else787 + +lbl_937: ; preds = %for.body978, %entry + br label %if.end965 + +if.else787: ; preds = %entry + br label %if.end965 + +if.end965: ; preds = %if.else787, %lbl_937 + br label %for.cond967 + +for.cond967: ; preds = %for.end1035, %if.end965 + br label %for.cond975 + +for.cond975: ; preds = %if.end984, %for.cond967 + br i1 undef, label %for.body978, label %for.end1035 + +for.body978: ; preds = %for.cond975 + br i1 undef, label %lbl_937, label %if.end984 + +if.end984: ; preds = %for.body978 + br label %for.cond975 + +for.end1035: ; preds = %for.cond975 + br label %for.cond967 +} Index: llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg.ll +++ llvm/trunk/test/CodeGen/WebAssembly/irreducible-cfg.ll @@ -9,7 +9,7 @@ ; CHECK-LABEL: test0: ; CHECK: f64.load -; CHECK: i32.const $[[REG:[^,]+]]=, 0{{$}} +; CHECK: i32.const $[[REG:[^,]+]]= ; CHECK: br_table $[[REG]], define void @test0(double* %arg, i32 %arg1, i32 %arg2, i32 %arg3) { bb: @@ -50,7 +50,7 @@ ; CHECK-LABEL: test1: ; CHECK: f64.load -; CHECK: i32.const $[[REG:[^,]+]]=, 0{{$}} +; CHECK: i32.const $[[REG:[^,]+]]= ; CHECK: br_table $[[REG]], define void @test1(double* %arg, i32 %arg1, i32 %arg2, i32 %arg3) { bb: @@ -92,3 +92,128 @@ bb19: ret void } + +; A simple loop 2 blocks that are both entries. + +; CHECK-LABEL: test2: +; CHECK: br_if +; CHECK: i32.const $[[REG:[^,]+]]= +; CHECK: br_table $[[REG]], +define internal i32 @test2(i32) noinline { +entry: + br label %A0 + +A0: + %a0a = tail call i32 @test2(i32 1) + %a0b = icmp eq i32 %a0a, 0 + br i1 %a0b, label %A1, label %A2 + +A1: + %a1a = tail call i32 @test2(i32 2) + %a1b = icmp eq i32 %a1a, 0 + br i1 %a1b, label %A1, label %A2 + +A2: + %a2a = tail call i32 @test2(i32 3) + %a2b = icmp eq i32 %a2a, 0 + br i1 %a2b, label %A1, label %A2 +} + +; An interesting loop with inner loop and if-else structure too. + +; CHECK-LABEL: test3: +; CHECK: br_if +define void @test3(i32 %ws) { +entry: + %ws.addr = alloca i32, align 4 + store volatile i32 %ws, i32* %ws.addr, align 4 + %0 = load volatile i32, i32* %ws.addr, align 4 + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + br label %wynn + +if.end: ; preds = %entry + %1 = load volatile i32, i32* %ws.addr, align 4 + %tobool1 = icmp ne i32 %1, 0 + br i1 %tobool1, label %if.end9, label %if.then2 + +if.then2: ; preds = %if.end + br label %for.cond + +for.cond: ; preds = %wynn, %if.then7, %if.then2 + %2 = load volatile i32, i32* %ws.addr, align 4 + %tobool3 = icmp ne i32 %2, 0 + br i1 %tobool3, label %if.then4, label %if.end5 + +if.then4: ; preds = %for.cond + br label %if.end5 + +if.end5: ; preds = %if.then4, %for.cond + %3 = load volatile i32, i32* %ws.addr, align 4 + %tobool6 = icmp ne i32 %3, 0 + br i1 %tobool6, label %if.then7, label %if.end8 + +if.then7: ; preds = %if.end5 + br label %for.cond + +if.end8: ; preds = %if.end5 + br label %wynn + +wynn: ; preds = %if.end8, %if.then + br label %for.cond + +if.end9: ; preds = %if.end + ret void +} + +; Multi-level irreducibility, after reducing in the main scope we must then +; reduce in the inner loop that we just created. +; CHECK: br_table +; CHECK: br_table +define void @pi_next() { +entry: + br i1 undef, label %sw.bb5, label %return + +sw.bb5: ; preds = %entry + br i1 undef, label %if.then.i49, label %if.else.i52 + +if.then.i49: ; preds = %sw.bb5 + br label %for.inc197.i + +if.else.i52: ; preds = %sw.bb5 + br label %for.cond57.i + +for.cond57.i: ; preds = %for.inc205.i, %if.else.i52 + store i32 0, i32* undef, align 4 + br label %for.cond65.i + +for.cond65.i: ; preds = %for.inc201.i, %for.cond57.i + br i1 undef, label %for.body70.i, label %for.inc205.i + +for.body70.i: ; preds = %for.cond65.i + br label %for.cond76.i + +for.cond76.i: ; preds = %for.inc197.i, %for.body70.i + %0 = phi i32 [ %inc199.i, %for.inc197.i ], [ 0, %for.body70.i ] + %cmp81.i = icmp slt i32 %0, 0 + br i1 %cmp81.i, label %for.body82.i, label %for.inc201.i + +for.body82.i: ; preds = %for.cond76.i + br label %for.inc197.i + +for.inc197.i: ; preds = %for.body82.i, %if.then.i49 + %inc199.i = add nsw i32 undef, 1 + br label %for.cond76.i + +for.inc201.i: ; preds = %for.cond76.i + br label %for.cond65.i + +for.inc205.i: ; preds = %for.cond65.i + br label %for.cond57.i + +return: ; preds = %entry + ret void +} +