Index: lib/CodeGen/WinEHPrepare.cpp =================================================================== --- lib/CodeGen/WinEHPrepare.cpp +++ lib/CodeGen/WinEHPrepare.cpp @@ -130,8 +130,9 @@ DenseMap &Loads, Function &F); void demoteNonlocalUses(Value *V, std::set &ColorsForBB, Function &F); - bool prepareExplicitEH(Function &F); - void numberFunclet(BasicBlock *InitialBB, BasicBlock *FuncletBB); + bool prepareExplicitEH(Function &F, + SmallVectorImpl &EntryBlocks); + void colorFunclets(Function &F, SmallVectorImpl &EntryBlocks); Triple TheTriple; @@ -175,6 +176,7 @@ std::map> BlockColors; std::map> FuncletBlocks; + std::map> FuncletChildren; }; class WinEHFrameVariableMaterializer : public ValueMaterializer { @@ -392,20 +394,25 @@ SmallVector LPads; SmallVector Resumes; + SmallVector EntryBlocks; bool ForExplicitEH = false; for (BasicBlock &BB : Fn) { - if (auto *LP = BB.getLandingPadInst()) { + Instruction *First = BB.getFirstNonPHI(); + if (auto *LP = dyn_cast(First)) { LPads.push_back(LP); - } else if (BB.getFirstNonPHI()->isEHPad()) { + } else if (First->isEHPad()) { + if (!ForExplicitEH) + EntryBlocks.push_back(&Fn.getEntryBlock()); + if (!isa(First)) + EntryBlocks.push_back(&BB); ForExplicitEH = true; - break; } if (auto *Resume = dyn_cast(BB.getTerminator())) Resumes.push_back(Resume); } if (ForExplicitEH) - return prepareExplicitEH(Fn); + return prepareExplicitEH(Fn, EntryBlocks); // No need to prepare functions that lack landing pads. if (LPads.empty()) @@ -3062,72 +3069,108 @@ Num.processCallSite(None, ImmutableCallSite()); } -void WinEHPrepare::numberFunclet(BasicBlock *InitialBB, BasicBlock *FuncletBB) { - Instruction *FirstNonPHI = FuncletBB->getFirstNonPHI(); - bool IsCatch = isa(FirstNonPHI); - bool IsCleanup = isa(FirstNonPHI); +void WinEHPrepare::colorFunclets(Function &F, + SmallVectorImpl &EntryBlocks) { + SmallVector, 16> Worklist; + BasicBlock *EntryBlock = &F.getEntryBlock(); - // Initialize the worklist with the funclet's entry point. - std::vector Worklist; - Worklist.push_back(InitialBB); + // Build up the color map, which maps each block to its set of 'colors'. + // For any block B, the "colors" of B are the set of funclets F (possibly + // including a root "funclet" representing the main function), such that + // F will need to directly contain B or a copy of B (where the term "directly + // contain" is used to distinguish from being "transitively contained" in + // a nested funclet). + // Use a CFG walk driven by a worklist of (block, color) pairs. The "color" + // sets attached during this processing to a block which is the entry of some + // funclet F is actually the set of F's parents -- i.e. the union of colors + // of all predecessors of F's entry. For all other blocks, the color sets + // are as defined above. A post-pass fixes up the block color map to reflect + // the same sense of "color" for funclet entries as for other blocks. + + Worklist.push_back({EntryBlock, EntryBlock}); while (!Worklist.empty()) { - BasicBlock *BB = Worklist.back(); - Worklist.pop_back(); - - // There can be only one "pad" basic block in the funclet: the initial one. - if (BB != FuncletBB && BB->isEHPad()) - continue; - - // Add 'FuncletBB' as a possible color for 'BB'. - if (BlockColors[BB].insert(FuncletBB).second == false) { - // Skip basic blocks which we have already visited. - continue; + BasicBlock *Visiting; + BasicBlock *Color; + std::tie(Visiting, Color) = Worklist.pop_back_val(); + Instruction *VisitingHead = Visiting->getFirstNonPHI(); + if (VisitingHead->isEHPad() && !isa(VisitingHead)) { + // Mark this as a funclet head as a member of itself. + FuncletBlocks[Visiting].insert(Visiting); + // Queue exits with the parent color. + for (User *Exit : VisitingHead->users()) + for (BasicBlock *Succ : + successors(cast(Exit)->getParent())) + if (BlockColors[Succ].insert(Color).second) { + Worklist.push_back({Succ, Color}); + } + // Handle CatchPad specially since its successors need different colors. + if (CatchPadInst *CatchPad = dyn_cast(VisitingHead)) { + // Visit the normal successor with the color of the new EH pad, and + // visit the unwind successor with the color of the parent. + BasicBlock *NormalSucc = CatchPad->getNormalDest(); + if (BlockColors[NormalSucc].insert(Visiting).second) { + Worklist.push_back({NormalSucc, Visiting}); + } + BasicBlock *UnwindSucc = CatchPad->getUnwindDest(); + if (BlockColors[UnwindSucc].insert(Color).second) { + Worklist.push_back({UnwindSucc, Color}); + } + continue; + } + // Switch color to the current node, except for terminate pads which + // have no bodies and only unwind successors and so need their successors + // visited with the color of the parent. + if (!isa(VisitingHead)) + Color = Visiting; + } else { + // Note that this is a member of the given color. + FuncletBlocks[Color].insert(Visiting); + TerminatorInst *Terminator = Visiting->getTerminator(); + if (isa(Terminator) || + isa(Terminator)) { + // These block's successors have already been queued with the parent + // color. + continue; + } } + for (BasicBlock *Succ : successors(Visiting)) { + if (isa(Succ->getFirstNonPHI())) { + // The catchendpad needs to be visited with the parent's color, not + // the current color. This will happen in the code above that visits + // any catchpad unwind successor with the parent color, so we can + // safely skip this successor here. + continue; + } + if (BlockColors[Succ].insert(Color).second) { + Worklist.push_back({Succ, Color}); + } + } + } - FuncletBlocks[FuncletBB].insert(BB); - - Instruction *Terminator = BB->getTerminator(); - // The catchret's successors cannot be part of the funclet. - if (IsCatch && isa(Terminator)) - continue; - // The cleanupret's successors cannot be part of the funclet. - if (IsCleanup && isa(Terminator)) - continue; - - Worklist.insert(Worklist.end(), succ_begin(BB), succ_end(BB)); + // The processing above actually accumulated the parent set for this + // funclet into the color set for its entry; use the parent set to + // populate the children map, and reset the color set to include just + // the funclet itself (no instruction can target a funclet entry except on + // that transitions to the child funclet). + for (BasicBlock *FuncletEntry : EntryBlocks) { + std::set &ColorMapItem = BlockColors[FuncletEntry]; + for (BasicBlock *Parent : ColorMapItem) + FuncletChildren[Parent].insert(FuncletEntry); + ColorMapItem.clear(); + ColorMapItem.insert(FuncletEntry); } } -bool WinEHPrepare::prepareExplicitEH(Function &F) { +bool WinEHPrepare::prepareExplicitEH( + Function &F, SmallVectorImpl &EntryBlocks) { // Remove unreachable blocks. It is not valuable to assign them a color and // their existence can trick us into thinking values are alive when they are // not. removeUnreachableBlocks(F); - BasicBlock *EntryBlock = &F.getEntryBlock(); - - // Number everything starting from the entry block. - numberFunclet(EntryBlock, EntryBlock); - - for (BasicBlock &BB : F) { - // Remove single entry PHIs to simplify preparation. - if (auto *PN = dyn_cast(BB.begin())) - if (PN->getNumIncomingValues() == 1) - FoldSingleEntryPHINodes(&BB); - - // EH pad instructions are always the first non-PHI nodes in a block if they - // are at all present. - Instruction *I = BB.getFirstNonPHI(); - if (I->isEHPad()) - numberFunclet(&BB, &BB); - - // It is possible for a normal basic block to only be reachable via an - // exceptional basic block. The successor of a catchret is the only case - // where this is possible. - if (auto *CRI = dyn_cast(BB.getTerminator())) - numberFunclet(CRI->getSuccessor(), EntryBlock); - } + // Determine which blocks are reachable from which funclet entries. + colorFunclets(F, EntryBlocks); // Strip PHI nodes off of EH pads. SmallVector PHINodes; @@ -3178,9 +3221,8 @@ // We need to clone all blocks which belong to multiple funclets. Values are // remapped throughout the funclet to propogate both the new instructions // *and* the new basic blocks themselves. - for (auto &Funclet : FuncletBlocks) { - BasicBlock *FuncletPadBB = Funclet.first; - std::set &BlocksInFunclet = Funclet.second; + for (BasicBlock *FuncletPadBB : EntryBlocks) { + std::set &BlocksInFunclet = FuncletBlocks[FuncletPadBB]; std::map Orig2Clone; ValueToValueMapTy VMap; @@ -3191,12 +3233,12 @@ if (NumColorsForBB == 1) continue; - assert(!isa(BB->front()) && - "Polychromatic PHI nodes should have been demoted!"); - // Create a new basic block and copy instructions into it! - BasicBlock *CBB = CloneBasicBlock( - BB, VMap, Twine(".for.", FuncletPadBB->getName()), &F); + BasicBlock *CBB = + CloneBasicBlock(BB, VMap, Twine(".for.", FuncletPadBB->getName())); + // Insert the clone immediately after the original to ensure determinism + // and to keep the same relative ordering of any funclet's blocks. + CBB->insertInto(&F, BB->getNextNode()); // Add basic block mapping. VMap[BB] = CBB; @@ -3282,6 +3324,7 @@ BlockColors.clear(); FuncletBlocks.clear(); + FuncletChildren.clear(); return true; } @@ -3394,12 +3437,11 @@ auto *UsingInst = cast(U.getUser()); BasicBlock *UsingBB = UsingInst->getParent(); - // Is the Use inside a block which is colored with a subset of the Def? + // Is the Use inside a block which is colored the same as the Def? // If so, we don't need to escape the Def because we will clone // ourselves our own private copy. std::set &ColorsForUsingBB = BlockColors[UsingBB]; - if (std::includes(ColorsForBB.begin(), ColorsForBB.end(), - ColorsForUsingBB.begin(), ColorsForUsingBB.end())) + if (ColorsForUsingBB == ColorsForBB) continue; replaceUseWithLoad(V, U, SpillSlot, Loads, F); Index: test/CodeGen/WinEH/wineh-cloning.ll =================================================================== --- /dev/null +++ test/CodeGen/WinEH/wineh-cloning.ll @@ -0,0 +1,384 @@ +; RUN: opt -mtriple=x86_x64-pc-windows-msvc -S -winehprepare < %s | FileCheck %s + +declare i32 @__CxxFrameHandler3(...) + +declare void @f() +declare i32 @g() +declare void @h(i32) +declare i1 @b() + + +define void @test1() personality i32 (...)* @__CxxFrameHandler3 { +entry: + ; %x def colors: {entry} subset of use colors; must spill + %x = call i32 @g() + invoke void @f() + to label %noreturn unwind label %catch +catch: + catchpad [] + to label %noreturn unwind label %endcatch +noreturn: + ; %x use colors: {entry, cleanup} + call void @h(i32 %x) + unreachable +endcatch: + catchendpad unwind to caller +} +; Need two copies of the call to @h, one under entry and one under catch. +; Currently we generate a load for each, though we shouldn't need one +; for the use in entry's copy. +; CHECK-LABEL: @test1( +; CHECK: entry: +; CHECK: store i32 %x, i32* [[Slot:%[^ ]+]] +; CHECK: invoke void @f() +; CHECK: to label %[[EntryCopy:[^ ]+]] unwind label %catch +; CHECK: catch: +; CHECK: catchpad [] to label %[[CatchCopy:[^ ]+]] unwind +; CHECK: [[CatchCopy]]: +; CHECK: [[LoadX2:%[^ ]+]] = load i32, i32* [[Slot]] +; CHECK: call void @h(i32 [[LoadX2]] +; CHECK: [[EntryCopy]]: +; CHECK: [[LoadX1:%[^ ]+]] = load i32, i32* [[Slot]] +; CHECK: call void @h(i32 [[LoadX1]] + + +define void @test2() personality i32 (...)* @__CxxFrameHandler3 { +entry: + invoke void @f() + to label %exit unwind label %cleanup +cleanup: + cleanuppad [] + br label %exit +exit: + call void @f() + ret void +} +; Need two copies of %exit's call to @f -- the subsequent ret is only +; valid when coming from %entry, but on the path from %cleanup, this +; might be a valid call to @f which might dynamically not return. +; CHECK-LABEL: @test2( +; CHECK: entry: +; CHECK: invoke void @f() +; CHECK: to label %[[exit:[^ ]+]] unwind label %cleanup +; CHECK: cleanup: +; CHECK: cleanuppad [] +; CHECK: call void @f() +; CHECK-NEXT: unreachable +; CHECK: [[exit]]: +; CHECK: call void @f() +; CHECK-NEXT: ret void + + +define void @test3() personality i32 (...)* @__CxxFrameHandler3 { +entry: + invoke void @f() + to label %invoke.cont unwind label %catch +invoke.cont: + invoke void @f() + to label %exit unwind label %cleanup +catch: + catchpad [] to label %shared unwind label %endcatch +endcatch: + catchendpad unwind to caller +cleanup: + cleanuppad [] + br label %shared +shared: + call void @f() + br label %exit +exit: + ret void +} +; Need two copies of %shared's call to @f (similar to @test2 but +; the two regions here are siblings, not parent-child). +; CHECK-LABEL: @test3( +; CHECK: invoke void @f() +; CHECK: invoke void @f() +; CHECK: to label %[[exit:[^ ]+]] unwind +; CHECK: catch: +; CHECK: catchpad [] to label %[[shared:[^ ]+]] unwind +; CHECK: cleanup: +; CHECK: cleanuppad [] +; CHECK: call void @f() +; CHECK-NEXT: unreachable +; CHECK: [[shared]]: +; CHECK: call void @f() +; CHECK-NEXT: unreachable +; CHECK: [[exit]]: +; CHECK: ret void + + +define void @test4() personality i32 (...)* @__CxxFrameHandler3 { +entry: + invoke void @f() + to label %shared unwind label %catch +catch: + catchpad [] + to label %shared unwind label %endcatch +endcatch: + catchendpad unwind to caller +shared: + %x = call i32 @g() + %i = call i32 @g() + %zero.trip = icmp eq i32 %i, 0 + br i1 %zero.trip, label %exit, label %loop +loop: + %i.loop = phi i32 [ %i, %shared ], [ %i.dec, %loop.tail ] + %b = call i1 @b() + br i1 %b, label %left, label %right +left: + %y = call i32 @g() + br label %loop.tail +right: + call void @h(i32 %x) + br label %loop.tail +loop.tail: + %i.dec = sub i32 %i.loop, 1 + %done = icmp eq i32 %i.dec, 0 + br i1 %done, label %exit, label %loop +exit: + call void @h(i32 %x) + unreachable +} +; Make sure we can clone regions that have internal control +; flow and SSA values. Here we need two copies of everything +; from %shared to %exit. +; CHECK-LABEL: @test4( +; CHECK: entry: +; CHECK: to label %[[shared_E:[^ ]+]] unwind label %catch +; CHECK: catch: +; CHECK: to label %[[shared_C:[^ ]+]] unwind label %endcatch +; CHECK: [[shared_C]]: +; CHECK: [[x_C:%[^ ]+]] = call i32 @g() +; CHECK: [[i_C:%[^ ]+]] = call i32 @g() +; CHECK: [[zt_C:%[^ ]+]] = icmp eq i32 [[i_C]], 0 +; CHECK: br i1 [[zt_C]], label %[[exit_C:[^ ]+]], label %[[loop_C:[^ ]+]] +; CHECK: [[shared_E]]: +; CHECK: [[x_E:%[^ ]+]] = call i32 @g() +; CHECK: [[i_E:%[^ ]+]] = call i32 @g() +; CHECK: [[zt_E:%[^ ]+]] = icmp eq i32 [[i_E]], 0 +; CHECK: br i1 [[zt_E]], label %[[exit_E:[^ ]+]], label %[[loop_E:[^ ]+]] +; CHECK: [[loop_C]]: +; CHECK: [[iloop_C:%[^ ]+]] = phi i32 [ [[i_C]], %[[shared_C]] ], [ [[idec_C:%[^ ]+]], %[[looptail_C:[^ ]+]] ] +; CHECK: [[b_C:%[^ ]+]] = call i1 @b() +; CHECK: br i1 [[b_C]], label %[[left_C:[^ ]+]], label %[[right_C:[^ ]+]] +; CHECK: [[loop_E]]: +; CHECK: [[iloop_E:%[^ ]+]] = phi i32 [ [[i_E]], %[[shared_E]] ], [ [[idec_E:%[^ ]+]], %[[looptail_E:[^ ]+]] ] +; CHECK: [[b_E:%[^ ]+]] = call i1 @b() +; CHECK: br i1 [[b_E]], label %[[left_E:[^ ]+]], label %[[right_E:[^ ]+]] +; CHECK: [[left_C]]: +; CHECK: [[y_C:%[^ ]+]] = call i32 @g() +; CHECK br label %[[looptail_C]] +; CHECK: [[left_E]]: +; CHECK: [[y_E:%[^ ]+]] = call i32 @g() +; CHECK br label %[[looptail_E]] +; CHECK: [[right_C]]: +; CHECK: call void @h(i32 [[x_C]]) +; CHECK: br label %[[looptail_C]] +; CHECK: [[right_E]]: +; CHECK: call void @h(i32 [[x_E]]) +; CHECK: br label %[[looptail_E]] +; CHECK: [[looptail_C]]: +; CHECK: [[idec_C]] = sub i32 [[iloop_C]], 1 +; CHECK: [[done_C:%[^ ]+]] = icmp eq i32 [[idec_C]], 0 +; CHECK: br i1 [[done_C]], label %[[exit_C]], label %[[loop_C]] +; CHECK: [[looptail_E]]: +; CHECK: [[idec_E]] = sub i32 [[iloop_E]], 1 +; CHECK: [[done_E:%[^ ]+]] = icmp eq i32 [[idec_E]], 0 +; CHECK: br i1 [[done_E]], label %[[exit_E]], label %[[loop_E]] +; CHECK: [[exit_C]]: +; CHECK: call void @h(i32 [[x_C]]) +; CHECK: unreachable +; CHECK: [[exit_E]]: +; CHECK: call void @h(i32 [[x_E]]) +; CHECK: unreachable + + +define void @test5() personality i32 (...)* @__CxxFrameHandler3 { +entry: + invoke void @f() + to label %exit unwind label %outer +outer: + %o = cleanuppad [] + %x = call i32 @g() + invoke void @f() + to label %outer.ret unwind label %inner +inner: + %i = catchpad [] + to label %inner.catch unwind label %inner.endcatch +inner.catch: + catchret %i to label %outer.post-inner +inner.endcatch: + catchendpad unwind to caller +outer.post-inner: + call void @h(i32 %x) + br label %outer.ret +outer.ret: + cleanupret %o unwind to caller +exit: + ret void +} +; Simple nested case (catch-inside-cleanup). Nothing needs +; to be cloned. The def and use of %x are both in %outer +; and so don't need to be spilled. +; CHECK-LABEL: @test5( +; CHECK: outer: +; CHECK: %x = call i32 @g() +; CHECK-NEXT: invoke void @f() +; CHECK-NEXT: to label %outer.ret unwind label %inner +; CHECK: inner: +; CHECK: to label %inner.catch unwind label %inner.endcatch +; CHECK: inner.catch: +; CHECK-NEXT: catchret %i to label %outer.post-inner +; CHECK: outer.post-inner: +; CHECK-NEXT: call void @h(i32 %x) +; CHECK-NEXT: br label %outer.ret + + +define void @test6() personality i32 (...)* @__CxxFrameHandler3 { +entry: + invoke void @f() + to label %invoke.cont unwind label %left +invoke.cont: + invoke void @f() + to label %exit unwind label %right +left: + cleanuppad [] + br label %shared +right: + catchpad [] + to label %right.catch unwind label %right.end +right.catch: + br label %shared +right.end: + catchendpad unwind to caller +shared: + %x = call i32 @g() + invoke void @f() + to label %shared.cont unwind label %inner +shared.cont: + unreachable +inner: + %i = cleanuppad [] + call void @h(i32 %x) + cleanupret %i unwind label %right.end +exit: + ret void +} +; %inner is a cleanup which appears both as a child of +; %left and as a child of %right. Since statically we +; need each funclet to have a single parent, we need to +; clone the entire %inner funclet so we can have one +; copy under each parent. The cleanupret in %inner +; unwinds to the catchendpad for %right, so the copy +; of %inner under %right should include it; the copy +; of %inner under %left should instead have an +; `unreachable` inserted there, but the copy under +; %left still needs to be created because it's possible +; the dynamic path enters %left, then enters %inner, +; then calls @h, and that the call to @h doesn't return. +; CHECK-LABEL: @test6( +; TODO: CHECKs + + +define void @test7() personality i32 (...)* @__CxxFrameHandler3 { +entry: + invoke void @f() + to label %invoke.cont unwind label %left +invoke.cont: + invoke void @f() + to label %unreachable unwind label %right +left: + cleanuppad [] + invoke void @f() to label %unreachable unwind label %inner +right: + catchpad [] + to label %right.catch unwind label %right.end +right.catch: + invoke void @f() to label %unreachable unwind label %inner +right.end: + catchendpad unwind to caller +inner: + %i = cleanuppad [] + %x = call i32 @g() + call void @h(i32 %x) + cleanupret %i unwind label %right.end +unreachable: + unreachable +} +; Another case of a two-parent child (like @test6), this time +; with the join at the entry itself instead of following a +; non-pad join. +; CHECK-LABEL: @test7( +; TODO: CHECKs + + +define void @test8() personality i32 (...)* @__CxxFrameHandler3 { +entry: + invoke void @f() + to label %invoke.cont unwind label %left +invoke.cont: + invoke void @f() + to label %unreachable unwind label %right +left: + cleanuppad [] + br label %shared +right: + catchpad [] + to label %right.catch unwind label %right.end +right.catch: + br label %shared +right.end: + catchendpad unwind to caller +shared: + invoke void @f() + to label %unreachable unwind label %inner +inner: + cleanuppad [] + invoke void @f() + to label %unreachable unwind label %inner.child +inner.child: + cleanuppad [] + %x = call i32 @g() + call void @h(i32 %x) + unreachable +unreachable: + unreachable +} +; %inner is a two-parent child which itself has a child; need +; to make two copies of both the %inner and %inner.child. +; CHECK-LABEL: @test8( +; TODO: CHECKs + + +define void @test9() personality i32 (...)* @__CxxFrameHandler3 { +entry: + invoke void @f() + to label %invoke.cont unwind label %left +invoke.cont: + invoke void @f() + to label %unreachable unwind label %right +left: + cleanuppad [] + call void @h(i32 1) + invoke void @f() + to label %unreachable unwind label %right +right: + cleanuppad [] + call void @h(i32 2) + invoke void @f() + to label %unreachable unwind label %left +unreachable: + unreachable +} +; This is an irreducible loop with two funclets that enter each other; +; need to make two copies of each funclet (one a child of root, the +; other a child of the opposite funclet), but also make sure not to +; clone self-descendants (if we tried to do that we'd need to make an +; infinite number of them). Presumably if optimizations ever generated +; such a thing it would mean that one of the two cleanups was originally +; the parent of the other, but that we'd somehow lost track in the CFG +; of which was which along the way; generating each possibility lets +; whichever case was correct execute correctly. +; CHECK-LABEL: @test9( +; TODO: CHECKs