Index: lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp +++ lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -17,15 +17,43 @@ /// it linearizes control flow, turning diamonds into two triangles, which is /// both unnecessary and undesirable for WebAssembly. /// -/// TODO: The transformation implemented here handles all irreducible control -/// flow, without exponential code-size expansion, though it does so by creating -/// inefficient code in many cases. Ideally, we should add other -/// transformations, including code-duplicating cases, which can be more -/// efficient in common cases, and they can fall back to this conservative -/// implementation as needed. +/// The big picture: Ignoring natural loops (seeing them monolithically), +/// we find all the blocks which can return to themselves ("loopers"). +/// Loopers reachable from the non-loopers are loop entries: if there are 2 +/// or more, then we have irreducible control flow. We fix that as follows: +/// in each block reaching an entry we assign to a "label" helper variable +/// with an id of the block we wish to reach, and branch to a new block that +/// dispatches to all the options. That block is then the single entry in +/// a new natural loop. +/// +/// This is similar to what the Relooper [1] does, both identify looping +/// code that requires multiple entries, and resolve it in a similar way. +/// In Relooper terminology, we implement a Multiple shape in a Loop +/// shape. Note also that like the Relooper, we implement a "minimal" +/// intervention: we only use the "label" helper for the blocks we +/// absolutely must and no others. We also prioritize code size and do not +/// perform node splitting (i.e. we don't duplicate code in order to resolve +/// irreducibility). +/// +/// The difference between this code and the Relooper is that the Relooper also +/// generates ifs and loops and works in a recursive manner, knowing at each +/// point what the entries are, and recursively breaks down the problem. Here +/// we just want to resolve irreducible control flow, and we also want to use +/// as much LLVM infrastructure as possible. So we use the MachineLoopInfo +/// to identify natural loops, etc., and we start with the whole CFG and must +/// identify both the looping code and its entries. +/// +/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In +/// Proceedings of the ACM international conference companion on Object oriented +/// programming systems languages and applications companion (SPLASH '11). ACM, +/// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 +/// http://doi.acm.org/10.1145/2048147.2048224 /// //===----------------------------------------------------------------------===// +#include +#include + #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" @@ -78,109 +106,171 @@ return new WebAssemblyFixIrreducibleControlFlow(); } -namespace { - -/// A utility for walking the blocks of a loop, handling a nested inner -/// loop as a monolithic conceptual block. -class MetaBlock { - MachineBasicBlock *Block; - SmallVector Preds; - SmallVector Succs; - -public: - explicit MetaBlock(MachineBasicBlock *MBB) - : Block(MBB), Preds(MBB->pred_begin(), MBB->pred_end()), - Succs(MBB->succ_begin(), MBB->succ_end()) {} - - explicit MetaBlock(MachineLoop *Loop) : Block(Loop->getHeader()) { - Loop->getExitBlocks(Succs); - for (MachineBasicBlock *Pred : Block->predecessors()) - if (!Loop->contains(Pred)) - Preds.push_back(Pred); - } - - MachineBasicBlock *getBlock() const { return Block; } +bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF, + MachineLoopInfo &MLI, + MachineLoop *Loop) { + MachineBasicBlock *Header = Loop ? Loop->getHeader() : &*MF.begin(); - const SmallVectorImpl &predecessors() const { - return Preds; - } - const SmallVectorImpl &successors() const { - return Succs; + // Identify all the blocks in this loop scope. + std::set LoopBlocks; + if (Loop) { + for (auto *MBB : Loop->getBlocks()) { + LoopBlocks.insert(MBB); + } + } else { + for (auto &MBB : MF) { + LoopBlocks.insert(&MBB); + } } - bool operator==(const MetaBlock &MBB) { return Block == MBB.Block; } - bool operator!=(const MetaBlock &MBB) { return Block != MBB.Block; } -}; - -class SuccessorList final : public MetaBlock { - size_t Index; - size_t Num; - -public: - explicit SuccessorList(MachineBasicBlock *MBB) - : MetaBlock(MBB), Index(0), Num(successors().size()) {} - - explicit SuccessorList(MachineLoop *Loop) - : MetaBlock(Loop), Index(0), Num(successors().size()) {} + // Get a canonical block to represent a block or a loop: the block, + // or if in a loop, the loop header, of it in an outer loop scope, + // we can ignore it. + auto Canonicalize = [&](MachineBasicBlock *MBB) -> MachineBasicBlock * { + MachineLoop *InnerLoop = MLI.getLoopFor(MBB); + if (InnerLoop == Loop) { + return MBB; + } else { + // This is either in an outer or an inner loop, and not in ours. + if (!LoopBlocks.count(MBB)) { + // It's in outer code, ignore it. + return nullptr; + } + assert(InnerLoop); + // It's in an inner loop, canonicalize it to the header of that loop. + return InnerLoop->getHeader(); + } + }; + + // For a successor we can additionally ignore it if it's a branch back + // to a natural loop top, as when we are in the scope of a loop, we + // just care about internal irreducibility, and can ignore the loop + // we are in. + auto CanonicalizeSuccessor = + [&](MachineBasicBlock *MBB) -> MachineBasicBlock * { + if (Loop && MBB == Loop->getHeader()) { + // Ignore branches going to the loop's natural header. + return nullptr; + } + return Canonicalize(MBB); + }; + + // Compute which (canonicalized) blocks each block can reach. + typedef std::unordered_set BlockSet; + std::unordered_map Reachable; + + // The worklist contains pairs of recent additions, (a, b), where we just + // added a link a => b. + typedef std::pair BlockPair; + std::vector WorkList; + + // Potentially insert a new reachable edge, and if so, note it as + // further work. + auto MaybeInsert = [&](MachineBasicBlock *MBB, MachineBasicBlock *Succ) { + assert(MBB == Canonicalize(MBB)); + assert(Succ); + Succ = CanonicalizeSuccessor(Succ); + if (!Succ) + return; + if (Reachable[MBB].insert(Succ).second) { + // There may be no work, if we don't care about MBB as a successor, + // when considering something else => MBB => Succ. + if (CanonicalizeSuccessor(MBB)) { + WorkList.push_back(BlockPair(MBB, Succ)); + } + } + }; - bool HasNext() const { return Index != Num; } + // Add all the initial work. + for (auto *MBB : LoopBlocks) { + MachineLoop *InnerLoop = MLI.getLoopFor(MBB); - MachineBasicBlock *Next() { - assert(HasNext()); - return successors()[Index++]; + if (InnerLoop == Loop) { + for (auto *Succ : MBB->successors()) { + MaybeInsert(MBB, Succ); + } + } else { + // It can't be in an outer loop - we loop on LoopBlocks - and so it must + // be an inner loop. + assert(InnerLoop); + // Check if we are the canonical block for this loop. + if (Canonicalize(MBB) != MBB) { + continue; + } + // The successors are those of the loop. + SmallVector ExitBlocks; + InnerLoop->getExitBlocks(ExitBlocks); + for (auto *Succ : ExitBlocks) { + MaybeInsert(MBB, Succ); + } + } } -}; -} // end anonymous namespace + // Do work until we are all done. + while (!WorkList.empty()) { + BlockPair Pair = WorkList.back(); + WorkList.pop_back(); + auto *MBB = Pair.first; + auto *Succ = Pair.second; + assert(MBB); + assert(Succ); + assert(MBB == CanonicalizeSuccessor(MBB)); + // We recently added MBB => Succ, and that means we may have enabled + // Pred => MBB => Succ. Check all the predecessors. Note that our loop + // here is correct for both a block and a block representing a loop, as + // the loop is natural and so the predecessors are all predecessors of + // the loop header, which is the block we have here. + for (auto *Pred : MBB->predecessors()) { + // Canonicalize, make sure it's relevant, and check it's not the + // same block (an update to the block itself doesn't help compute + // that same block). + Pred = Canonicalize(Pred); + if (Pred && Pred != MBB) { + MaybeInsert(Pred, Succ); + } + } + } -bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF, - MachineLoopInfo &MLI, - MachineLoop *Loop) { - MachineBasicBlock *Header = Loop ? Loop->getHeader() : &*MF.begin(); - SetVector RewriteSuccs; - - // DFS through Loop's body, looking for irreducible control flow. Loop is - // natural, and we stay in its body, and we treat any nested loops - // monolithically, so any cycles we encounter indicate irreducibility. - SmallPtrSet OnStack; - SmallPtrSet Visited; - SmallVector LoopWorklist; - LoopWorklist.push_back(SuccessorList(Header)); - OnStack.insert(Header); - Visited.insert(Header); - while (!LoopWorklist.empty()) { - SuccessorList &Top = LoopWorklist.back(); - if (Top.HasNext()) { - MachineBasicBlock *Next = Top.Next(); - if (Next == Header || (Loop && !Loop->contains(Next))) - continue; - if (LLVM_LIKELY(OnStack.insert(Next).second)) { - if (!Visited.insert(Next).second) { - OnStack.erase(Next); - continue; - } - MachineLoop *InnerLoop = MLI.getLoopFor(Next); - if (InnerLoop != Loop) - LoopWorklist.push_back(SuccessorList(InnerLoop)); - else - LoopWorklist.push_back(SuccessorList(Next)); - } else { - RewriteSuccs.insert(Top.getBlock()); + // It's now trivial to identify the loopers. + SetVector Loopers; + for (auto MBB : LoopBlocks) { + if (Reachable[MBB].count(MBB)) { + Loopers.insert(MBB); + } + } + // The header cannot be a looper. At the toplevel, LLVM does not allow the + // entry to be in a loop, and in a natural loop we should ignore the header. + assert(Loopers.count(Header) == 0); + + // Find the entries, loopers reachable from non-loopers. + SmallPtrSet Entries; + SmallVector SortedEntries; + for (auto *Looper : Loopers) { + for (auto *Pred : Looper->predecessors()) { + Pred = Canonicalize(Pred); + if (Pred && !Loopers.count(Pred)) { + Entries.insert(Looper); + SortedEntries.push_back(Looper); + break; } - continue; } - OnStack.erase(Top.getBlock()); - LoopWorklist.pop_back(); } - // Most likely, we didn't find any irreducible control flow. - if (LLVM_LIKELY(RewriteSuccs.empty())) + // Check if we found irreducible control flow. + if (Entries.size() <= 1) return false; - LLVM_DEBUG(dbgs() << "Irreducible control flow detected!\n"); - - // Ok. We have irreducible control flow! Create a dispatch block which will - // contains a jump table to any block in the problematic set of blocks. + // Sort the entries to ensure a deterministic build. + llvm::sort(SortedEntries, + [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { + auto ANum = A->getNumber(); + auto BNum = B->getNumber(); + assert(ANum != -1 && BNum != -1); + assert(ANum != BNum); + return ANum < BNum; + }); + + // Create a dispatch block which will contain a jump table to the entries. MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock(); MF.insert(MF.end(), Dispatch); MLI.changeLoopFor(Dispatch, Loop); @@ -196,43 +286,43 @@ unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); MIB.addReg(Reg); - // Collect all the blocks which need to have their successors rewritten, - // add the successors to the jump table, and remember their index. + // Compute the indices in the superheader, one for each bad block, and + // add them as successors. DenseMap Indices; - SmallVector SuccWorklist(RewriteSuccs.begin(), - RewriteSuccs.end()); - while (!SuccWorklist.empty()) { - MachineBasicBlock *MBB = SuccWorklist.pop_back_val(); + for (auto *MBB : SortedEntries) { auto Pair = Indices.insert(std::make_pair(MBB, 0)); - if (!Pair.second) + if (!Pair.second) { continue; + } unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1; - LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has index " << Index - << "\n"); - Pair.first->second = Index; - for (auto Pred : MBB->predecessors()) - RewriteSuccs.insert(Pred); MIB.addMBB(MBB); Dispatch->addSuccessor(MBB); + } + + // Rewrite the problematic successors for every that wants to reach the + // bad blocks. For simplicity, we just introduce a new block for every + // edge we need to rewrite. (Fancier things are possible.) - MetaBlock Meta(MBB); - for (auto *Succ : Meta.successors()) - if (Succ != Header && (!Loop || Loop->contains(Succ))) - SuccWorklist.push_back(Succ); + SetVector AllPreds; + for (auto *MBB : SortedEntries) { + for (auto *Pred : MBB->predecessors()) { + if (Pred != Dispatch) { + AllPreds.insert(Pred); + } + } } - // Rewrite the problematic successors for every block in RewriteSuccs. - // For simplicity, we just introduce a new block for every edge we need to - // rewrite. Fancier things are possible. - for (MachineBasicBlock *MBB : RewriteSuccs) { + for (MachineBasicBlock *MBB : AllPreds) { DenseMap Map; for (auto *Succ : MBB->successors()) { - if (!Indices.count(Succ)) + if (!Entries.count(Succ)) { continue; + } + // This is a successor we need to rewrite. MachineBasicBlock *Split = MF.CreateMachineBasicBlock(); MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ) : MF.end(), @@ -275,24 +365,44 @@ bool Changed = false; auto &MLI = getAnalysis(); - // Visit the function body, which is identified as a null loop. - Changed |= VisitLoop(MF, MLI, nullptr); + // When we modify something, bail out and recompute MLI, then start again, as + // we create a new natural loop when we resolve irreducible control flow, and + // other loops may become nested in it, etc. In practice this is not an issue + // because irreducible control flow is rare, only very few cycles are needed + // here. + + auto Iteration = [&]() { + auto DoVisitLoop = [&](MachineFunction &MF, MachineLoopInfo &MLI, + MachineLoop *Loop) { + if (VisitLoop(MF, MLI, Loop)) { + // We rewrote part of the function; recompute MLI and start again. + LLVM_DEBUG(dbgs() << "Recomputing dominators and loops.\n"); + MF.getRegInfo().invalidateLiveness(); + MF.RenumberBlocks(); + getAnalysis().runOnMachineFunction(MF); + MLI.runOnMachineFunction(MF); + return Changed = true; + } + return false; + }; + + // Visit the function body, which is identified as a null loop. + if (DoVisitLoop(MF, MLI, nullptr)) + return true; + + // Visit all the loops. + SmallVector Worklist(MLI.begin(), MLI.end()); + while (!Worklist.empty()) { + MachineLoop *Loop = Worklist.pop_back_val(); + Worklist.append(Loop->begin(), Loop->end()); + if (DoVisitLoop(MF, MLI, Loop)) + return true; + } - // Visit all the loops. - SmallVector Worklist(MLI.begin(), MLI.end()); - while (!Worklist.empty()) { - MachineLoop *CurLoop = Worklist.pop_back_val(); - Worklist.append(CurLoop->begin(), CurLoop->end()); - Changed |= VisitLoop(MF, MLI, CurLoop); - } + return false; + }; - // If we made any changes, completely recompute everything. - if (LLVM_UNLIKELY(Changed)) { - LLVM_DEBUG(dbgs() << "Recomputing dominators and loops.\n"); - MF.getRegInfo().invalidateLiveness(); - MF.RenumberBlocks(); - getAnalysis().runOnMachineFunction(MF); - MLI.runOnMachineFunction(MF); + while (Iteration()) { } return Changed; Index: test/CodeGen/WebAssembly/irreducible-cfg-exceptions.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/irreducible-cfg-exceptions.ll @@ -0,0 +1,117 @@ +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-block-placement -wasm-disable-explicit-locals -wasm-keep-registers -enable-emscripten-cxx-exceptions | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown-wasm" + +$crashy = comdat any + +declare i32 @__gxx_personality_v0(...) + +; Check an interesting case of complex control flow due to exceptions CFG rewriting. +; There should *not* be any irreducible control flow here. + +; CHECK-LABEL: crashy: +; CHECK-NOT: br_table + +; Function Attrs: minsize noinline optsize +define hidden void @crashy() unnamed_addr #0 comdat personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + invoke void undef() #1 + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %entry + invoke void undef() #1 + to label %invoke.cont4 unwind label %lpad3 + +invoke.cont4: ; preds = %invoke.cont + %call.i82 = invoke i8* undef() #1 + to label %invoke.cont6 unwind label %lpad3 + +invoke.cont6: ; preds = %invoke.cont4 + invoke void undef() #1 + to label %invoke.cont13 unwind label %lpad12 + +invoke.cont13: ; preds = %invoke.cont6 + br label %for.cond + +for.cond: ; preds = %for.cond.backedge, %invoke.cont13 + br i1 undef, label %_ZNKSt3__219istreambuf_iteratorIcNS_11char_traitsIcEEE14__test_for_eofEv.exit.i.i, label %land.lhs.true.i.i.i + +land.lhs.true.i.i.i: ; preds = %for.cond + %call.i.i.i.i92 = invoke i32 undef() #1 + to label %_ZNSt3__215basic_streambufIcNS_11char_traitsIcEEE5sgetcEv.exit.i.i.i unwind label %lpad16.loopexit + +_ZNSt3__215basic_streambufIcNS_11char_traitsIcEEE5sgetcEv.exit.i.i.i: ; preds = %land.lhs.true.i.i.i + br label %_ZNKSt3__219istreambuf_iteratorIcNS_11char_traitsIcEEE14__test_for_eofEv.exit.i.i + +_ZNKSt3__219istreambuf_iteratorIcNS_11char_traitsIcEEE14__test_for_eofEv.exit.i.i: ; preds = %_ZNSt3__215basic_streambufIcNS_11char_traitsIcEEE5sgetcEv.exit.i.i.i, %for.cond + %call.i.i12.i.i93 = invoke i32 undef() #1 + to label %_ZNSt3__215basic_streambufIcNS_11char_traitsIcEEE5sgetcEv.exit.i19.i.i unwind label %lpad16.loopexit + +_ZNSt3__215basic_streambufIcNS_11char_traitsIcEEE5sgetcEv.exit.i19.i.i: ; preds = %_ZNKSt3__219istreambuf_iteratorIcNS_11char_traitsIcEEE14__test_for_eofEv.exit.i.i + invoke void undef() #1 + to label %invoke.cont23 unwind label %lpad22 + +invoke.cont23: ; preds = %_ZNSt3__215basic_streambufIcNS_11char_traitsIcEEE5sgetcEv.exit.i19.i.i + invoke void undef() #1 + to label %invoke.cont25 unwind label %lpad22 + +invoke.cont25: ; preds = %invoke.cont23 + %call.i.i137 = invoke i32 undef() #1 + to label %invoke.cont29 unwind label %lpad16.loopexit + +lpad: ; preds = %entry + %0 = landingpad { i8*, i32 } + cleanup + unreachable + +lpad3: ; preds = %invoke.cont4, %invoke.cont + %1 = landingpad { i8*, i32 } + cleanup + unreachable + +lpad12: ; preds = %invoke.cont6 + %2 = landingpad { i8*, i32 } + cleanup + resume { i8*, i32 } undef + +lpad16.loopexit: ; preds = %if.then.i.i144, %invoke.cont29, %invoke.cont25, %_ZNKSt3__219istreambuf_iteratorIcNS_11char_traitsIcEEE14__test_for_eofEv.exit.i.i, %land.lhs.true.i.i.i + %lpad.loopexit = landingpad { i8*, i32 } + cleanup + unreachable + +lpad22: ; preds = %invoke.cont23, %_ZNSt3__215basic_streambufIcNS_11char_traitsIcEEE5sgetcEv.exit.i19.i.i + %3 = landingpad { i8*, i32 } + cleanup + unreachable + +invoke.cont29: ; preds = %invoke.cont25 + invoke void undef() #1 + to label %invoke.cont33 unwind label %lpad16.loopexit + +invoke.cont33: ; preds = %invoke.cont29 + br label %for.inc + +for.inc: ; preds = %invoke.cont33 + %cmp.i.i141 = icmp eq i8* undef, undef + br i1 %cmp.i.i141, label %if.then.i.i144, label %if.end.i.i146 + +if.then.i.i144: ; preds = %for.inc + %call.i.i148 = invoke i32 undef() #1 + to label %for.cond.backedge unwind label %lpad16.loopexit + +for.cond.backedge: ; preds = %if.end.i.i146, %if.then.i.i144 + br label %for.cond + +if.end.i.i146: ; preds = %for.inc + call void undef() #2 + br label %for.cond.backedge +} + +attributes #0 = { minsize noinline optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { minsize optsize } +attributes #2 = { minsize nounwind optsize } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 8.0.0 (git@github.com:llvm-mirror/clang.git f8ec7c38feebd5cccae31acc7a50182b5474bfa9) (git@github.com:llvm-mirror/llvm.git eb54373b783a878c7397bc4503e93563c322e19a)"} Index: test/CodeGen/WebAssembly/irreducible-cfg-nested.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/irreducible-cfg-nested.ll @@ -0,0 +1,62 @@ +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-block-placement -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown-wasm" + +; Test an interesting pattern of nested irreducibility. +; Just check we resolve all the irreducibility here (if not we'd crash). + +; CHECK-LABEL: tre_parse: + +define dso_local fastcc void @tre_parse() unnamed_addr { +entry: + br label %for.cond.outer + +for.cond.outer: ; preds = %do.body14, %entry + br label %for.cond + +for.cond: ; preds = %for.cond.backedge, %for.cond.outer + %nbranch.0 = phi i32* [ null, %for.cond.outer ], [ %call188, %for.cond.backedge ] + switch i8 undef, label %if.else [ + i8 40, label %do.body14 + i8 41, label %if.then63 + ] + +do.body14: ; preds = %for.cond + br label %for.cond.outer + +if.then63: ; preds = %for.cond + unreachable + +if.else: ; preds = %for.cond + switch i8 undef, label %if.then84 [ + i8 92, label %if.end101 + i8 42, label %if.end101 + ] + +if.then84: ; preds = %if.else + switch i8 undef, label %cleanup.thread [ + i8 43, label %if.end101 + i8 63, label %if.end101 + i8 123, label %if.end101 + ] + +if.end101: ; preds = %if.then84, %if.then84, %if.then84, %if.else, %if.else + unreachable + +cleanup.thread: ; preds = %if.then84 + %call188 = tail call fastcc i32* undef(i32* %nbranch.0) + switch i8 undef, label %for.cond.backedge [ + i8 92, label %land.lhs.true208 + i8 0, label %if.else252 + ] + +land.lhs.true208: ; preds = %cleanup.thread + unreachable + +for.cond.backedge: ; preds = %cleanup.thread + br label %for.cond + +if.else252: ; preds = %cleanup.thread + unreachable +} Index: test/CodeGen/WebAssembly/irreducible-cfg-nested2.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/irreducible-cfg-nested2.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-block-placement -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown-wasm" + +; Test an interesting pattern of nested irreducibility. +; Just check we resolve all the irreducibility here (if not we'd crash). + +; CHECK-LABEL: func_2: + +; Function Attrs: noinline nounwind optnone +define dso_local void @func_2() #0 { +entry: + br i1 undef, label %lbl_937, label %if.else787 + +lbl_937: ; preds = %for.body978, %entry + br label %if.end965 + +if.else787: ; preds = %entry + br label %if.end965 + +if.end965: ; preds = %if.else787, %lbl_937 + br label %for.cond967 + +for.cond967: ; preds = %for.end1035, %if.end965 + br label %for.cond975 + +for.cond975: ; preds = %if.end984, %for.cond967 + br i1 undef, label %for.body978, label %for.end1035 + +for.body978: ; preds = %for.cond975 + br i1 undef, label %lbl_937, label %if.end984 + +if.end984: ; preds = %for.body978 + br label %for.cond975 + +for.end1035: ; preds = %for.cond975 + br label %for.cond967 +} + +attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 8.0.0 (git@github.com:llvm-mirror/clang.git f8ec7c38feebd5cccae31acc7a50182b5474bfa9) (git@github.com:llvm-mirror/llvm.git 51f7aeba60419c51dac11a8a37436837b5b6b549)"} Index: test/CodeGen/WebAssembly/irreducible-cfg.ll =================================================================== --- test/CodeGen/WebAssembly/irreducible-cfg.ll +++ test/CodeGen/WebAssembly/irreducible-cfg.ll @@ -9,7 +9,7 @@ ; CHECK-LABEL: test0: ; CHECK: f64.load -; CHECK: i32.const $[[REG:[^,]+]]=, 0{{$}} +; CHECK: i32.const $[[REG:[^,]+]]= ; CHECK: br_table $[[REG]], define void @test0(double* %arg, i32 %arg1, i32 %arg2, i32 %arg3) { bb: @@ -50,7 +50,7 @@ ; CHECK-LABEL: test1: ; CHECK: f64.load -; CHECK: i32.const $[[REG:[^,]+]]=, 0{{$}} +; CHECK: i32.const $[[REG:[^,]+]]= ; CHECK: br_table $[[REG]], define void @test1(double* %arg, i32 %arg1, i32 %arg2, i32 %arg3) { bb: @@ -92,3 +92,128 @@ bb19: ret void } + +; A simple loop 2 blocks that are both entries. + +; CHECK-LABEL: test2: +; CHECK: br_if +; CHECK: i32.const $[[REG:[^,]+]]= +; CHECK: br_table $[[REG]], +define internal i32 @test2(i32) noinline { +entry: + br label %A0 + +A0: + %a0a = tail call i32 @test2(i32 1) + %a0b = icmp eq i32 %a0a, 0 + br i1 %a0b, label %A1, label %A2 + +A1: + %a1a = tail call i32 @test2(i32 2) + %a1b = icmp eq i32 %a1a, 0 + br i1 %a1b, label %A1, label %A2 + +A2: + %a2a = tail call i32 @test2(i32 3) + %a2b = icmp eq i32 %a2a, 0 + br i1 %a2b, label %A1, label %A2 +} + +; An interesting loop with inner loop and if-else structure too. + +; CHECK-LABEL: test3: +; CHECK: br_if +define hidden void @test3(i32 %ws) #0 { +entry: + %ws.addr = alloca i32, align 4 + store volatile i32 %ws, i32* %ws.addr, align 4 + %0 = load volatile i32, i32* %ws.addr, align 4 + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + br label %wynn + +if.end: ; preds = %entry + %1 = load volatile i32, i32* %ws.addr, align 4 + %tobool1 = icmp ne i32 %1, 0 + br i1 %tobool1, label %if.end9, label %if.then2 + +if.then2: ; preds = %if.end + br label %for.cond + +for.cond: ; preds = %wynn, %if.then7, %if.then2 + %2 = load volatile i32, i32* %ws.addr, align 4 + %tobool3 = icmp ne i32 %2, 0 + br i1 %tobool3, label %if.then4, label %if.end5 + +if.then4: ; preds = %for.cond + br label %if.end5 + +if.end5: ; preds = %if.then4, %for.cond + %3 = load volatile i32, i32* %ws.addr, align 4 + %tobool6 = icmp ne i32 %3, 0 + br i1 %tobool6, label %if.then7, label %if.end8 + +if.then7: ; preds = %if.end5 + br label %for.cond + +if.end8: ; preds = %if.end5 + br label %wynn + +wynn: ; preds = %if.end8, %if.then + br label %for.cond + +if.end9: ; preds = %if.end + ret void +} + +; Multi-level irreducibility, after reducing in the main scope we must then +; reduce in the inner loop that we just created. +; CHECK: br_table +; CHECK: br_table +define hidden void @pi_next() local_unnamed_addr #0 { +entry: + br i1 undef, label %sw.bb5, label %return + +sw.bb5: ; preds = %entry + br i1 undef, label %if.then.i49, label %if.else.i52 + +if.then.i49: ; preds = %sw.bb5 + br label %for.inc197.i + +if.else.i52: ; preds = %sw.bb5 + br label %for.cond57.i + +for.cond57.i: ; preds = %for.inc205.i, %if.else.i52 + store i32 0, i32* undef, align 4 + br label %for.cond65.i + +for.cond65.i: ; preds = %for.inc201.i, %for.cond57.i + br i1 undef, label %for.body70.i, label %for.inc205.i + +for.body70.i: ; preds = %for.cond65.i + br label %for.cond76.i + +for.cond76.i: ; preds = %for.inc197.i, %for.body70.i + %0 = phi i32 [ %inc199.i, %for.inc197.i ], [ 0, %for.body70.i ] + %cmp81.i = icmp slt i32 %0, 0 + br i1 %cmp81.i, label %for.body82.i, label %for.inc201.i + +for.body82.i: ; preds = %for.cond76.i + br label %for.inc197.i + +for.inc197.i: ; preds = %for.body82.i, %if.then.i49 + %inc199.i = add nsw i32 undef, 1 + br label %for.cond76.i + +for.inc201.i: ; preds = %for.cond76.i + br label %for.cond65.i + +for.inc205.i: ; preds = %for.cond65.i + br label %for.cond57.i + +return: ; preds = %entry + ret void +} +