Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -262,6 +262,7 @@ void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, SmallVectorImpl &BlockWorkList, + SmallVectorImpl &EHPadWorkList, const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, BlockChain &Chain, @@ -282,9 +283,11 @@ void fillWorkLists(MachineBasicBlock *MBB, SmallPtrSetImpl &UpdatedPreds, SmallVectorImpl &BlockWorkList, + SmallVectorImpl &EHPadWorkList, const BlockFilterSet *BlockFilter); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl &BlockWorkList, + SmallVectorImpl &EHPadWorkList, const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet); @@ -350,6 +353,7 @@ void MachineBlockPlacement::markChainSuccessors( BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, SmallVectorImpl &BlockWorkList, + SmallVectorImpl &EHPadWorkList, const BlockFilterSet *BlockFilter) { // Walk all the blocks in this chain, marking their successors as having // a predecessor placed. @@ -368,8 +372,15 @@ // This is a cross-chain edge that is within the loop, so decrement the // loop predecessor count of the destination chain. - if (SuccChain.UnscheduledPredecessors > 0 && --SuccChain.UnscheduledPredecessors == 0) - BlockWorkList.push_back(*SuccChain.begin()); + if (SuccChain.UnscheduledPredecessors == 0 || + --SuccChain.UnscheduledPredecessors > 0) + continue; + + auto *MBB = *SuccChain.begin(); + if (MBB->isEHPad()) + EHPadWorkList.push_back(MBB); + else + BlockWorkList.push_back(MBB); } } } @@ -412,7 +423,7 @@ SmallVector Successors; for (MachineBasicBlock *Succ : BB->successors()) { bool SkipSucc = false; - if (BlockFilter && !BlockFilter->count(Succ)) { + if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) { SkipSucc = true; } else { BlockChain *SuccChain = BlockToChain[Succ]; @@ -532,9 +543,16 @@ }), WorkList.end()); + if (WorkList.empty()) + return nullptr; + + bool IsEHPad = WorkList[0]->isEHPad(); + MachineBasicBlock *BestBlock = nullptr; BlockFrequency BestFreq; for (MachineBasicBlock *MBB : WorkList) { + assert(MBB->isEHPad() == IsEHPad); + BlockChain &SuccChain = *BlockToChain[MBB]; if (&SuccChain == &Chain) continue; @@ -544,11 +562,32 @@ BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB); DEBUG(dbgs() << " " << getBlockName(MBB) << " -> "; MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); - if (BestBlock && BestFreq >= CandidateFreq) + + // For ehpad, we layout the least probable first as to avoid jumping back + // from least probable landingpads to more probable ones. + // + // FIXME: Using probability is probably (!) not the best way to achieve + // this. We should probably have a more principled approach to layout + // cleanup code. + // + // The goal is to get: + // + // +--------------------------+ + // | V + // InnerLp -> InnerCleanup OuterLp -> OuterCleanup -> Resume + // + // Rather than: + // + // +-------------------------------------+ + // V | + // OuterLp -> OuterCleanup -> Resume InnerLp -> InnerCleanup + if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq))) continue; + BestBlock = MBB; BestFreq = CandidateFreq; } + return BestBlock; } @@ -582,6 +621,7 @@ MachineBasicBlock *MBB, SmallPtrSetImpl &UpdatedPreds, SmallVectorImpl &BlockWorkList, + SmallVectorImpl &EHPadWorkList, const BlockFilterSet *BlockFilter = nullptr) { BlockChain &Chain = *BlockToChain[MBB]; if (!UpdatedPreds.insert(&Chain).second) @@ -599,13 +639,20 @@ } } - if (Chain.UnscheduledPredecessors == 0) - BlockWorkList.push_back(*Chain.begin()); + if (Chain.UnscheduledPredecessors != 0) + return; + + MBB = *Chain.begin(); + if (MBB->isEHPad()) + EHPadWorkList.push_back(MBB); + else + BlockWorkList.push_back(MBB); } void MachineBlockPlacement::buildChain( MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl &BlockWorkList, + SmallVectorImpl &EHPadWorkList, const BlockFilterSet *BlockFilter) { assert(BB); assert(BlockToChain[BB] == &Chain); @@ -613,7 +660,8 @@ MachineFunction::iterator PrevUnplacedBlockIt = F.begin(); MachineBasicBlock *LoopHeaderBB = BB; - markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); + markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, EHPadWorkList, + BlockFilter); BB = *std::prev(Chain.end()); for (;;) { assert(BB); @@ -629,6 +677,8 @@ // this point. This won't be a fallthrough, but it will increase locality. if (!BestSucc) BestSucc = selectBestCandidateBlock(Chain, BlockWorkList); + if (!BestSucc) + BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList); if (!BestSucc) { BestSucc = @@ -647,7 +697,8 @@ SuccChain.UnscheduledPredecessors = 0; DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to " << getBlockName(BestSucc) << "\n"); - markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); + markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, EHPadWorkList, + BlockFilter); Chain.merge(BestSucc, &SuccChain); BB = *std::prev(Chain.end()); } @@ -1067,6 +1118,7 @@ buildLoopChains(F, *InnerLoop); SmallVector BlockWorkList; + SmallVector EHPadWorkList; BlockFilterSet LoopBlockSet = collectLoopBlockSet(F, L); // Check if we have profile data for this function. If yes, we will rotate @@ -1100,9 +1152,10 @@ UpdatedPreds.insert(&LoopChain); for (MachineBasicBlock *LoopBB : LoopBlockSet) - fillWorkLists(LoopBB, UpdatedPreds, BlockWorkList, &LoopBlockSet); + fillWorkLists(LoopBB, UpdatedPreds, BlockWorkList, EHPadWorkList, + &LoopBlockSet); - buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet); + buildChain(LoopTop, LoopChain, BlockWorkList, EHPadWorkList, &LoopBlockSet); if (RotateLoopWithProfile) rotateLoopWithProfile(LoopChain, L, LoopBlockSet); @@ -1199,13 +1252,14 @@ buildLoopChains(F, *L); SmallVector BlockWorkList; + SmallVector EHPadWorkList; SmallPtrSet UpdatedPreds; for (MachineBasicBlock &MBB : F) - fillWorkLists(&MBB, UpdatedPreds, BlockWorkList); + fillWorkLists(&MBB, UpdatedPreds, BlockWorkList, EHPadWorkList); BlockChain &FunctionChain = *BlockToChain[&F.front()]; - buildChain(&F.front(), FunctionChain, BlockWorkList); + buildChain(&F.front(), FunctionChain, BlockWorkList, EHPadWorkList); #ifndef NDEBUG typedef SmallPtrSet FunctionBlockSetType; Index: test/CodeGen/PowerPC/pr25802.ll =================================================================== --- test/CodeGen/PowerPC/pr25802.ll +++ /dev/null @@ -1,52 +0,0 @@ -; RUN: llc < %s | FileCheck %s -; CHECK: .long .Ltmp9-.Ltmp8 # Call between .Ltmp8 and .Ltmp9 - -; We used to crash in filetype=obj when computing a negative value. -; RUN: llc -filetype=obj < %s - -target triple = "powerpc--netbsd" -@_ZTI1I = external constant { i8*, i8* } -define void @f(i8 %foo, i32 %bar) personality i8* bitcast (void ()* @g to i8*) { - invoke void @g() - to label %try.cont unwind label %lpad -lpad: ; preds = %0 - %tmp = landingpad { i8*, i32 } - catch i8* bitcast ({ i8*, i8* }* @_ZTI1I to i8*) - br i1 undef, label %catch10, label %catch -catch10: ; preds = %lpad - %tmp8 = load i32, i32* undef, align 4 - %conv.i.i = zext i8 %foo to i32 - %cond.i.i = select i1 undef, i32 %conv.i.i, i32 %tmp8 - invoke void @_Z24__put_character_sequenceIccEvR1AIT_T0_Ej(i32 %cond.i.i) - to label %invoke.cont20 unwind label %lpad15 -invoke.cont20: ; preds = %catch10 - ret void -try.cont: ; preds = %0 - ret void -catch: ; preds = %lpad - %tmp14 = load i32, i32* undef, align 4 - %conv.i.i34 = zext i8 %foo to i32 - %cond.i.i35 = select i1 undef, i32 %conv.i.i34, i32 %tmp14 - invoke void @_Z24__put_character_sequenceIccEvR1AIT_T0_Ej(i32 %cond.i.i35) - to label %invoke.cont8 unwind label %lpad3 -invoke.cont8: ; preds = %call2.i.i.noexc36 - ret void -lpad3: ; preds = %call2.i.i.noexc36, %catch - %tmp16 = landingpad { i8*, i32 } - cleanup - invoke void @g() - to label %eh.resume unwind label %terminate.lpad -lpad15: ; preds = %catch10 - %tmp19 = landingpad { i8*, i32 } - cleanup - invoke void @g() - to label %eh.resume unwind label %terminate.lpad -eh.resume: ; preds = %lpad15, %lpad3 - ret void -terminate.lpad: ; preds = %lpad15, %lpad3 - %tmp22 = landingpad { i8*, i32 } - catch i8* null - ret void -} -declare void @g() -declare void @_Z24__put_character_sequenceIccEvR1AIT_T0_Ej(i32) Index: test/CodeGen/X86/block-placement.ll =================================================================== --- test/CodeGen/X86/block-placement.ll +++ test/CodeGen/X86/block-placement.ll @@ -1083,3 +1083,96 @@ %ret = phi i32 [ %val1, %then ], [ %val2, %else ] ret i32 %ret } + +; Make sure we put landingpads out of the way. +declare i32 @pers(...) + +declare i32 @foo(); + +declare i32 @bar(); + +define i32 @test_lp(i32 %a) personality i32 (...)* @pers { +; CHECK-LABEL: test_lp: +; CHECK: %entry +; CHECK: %hot +; CHECK: %then +; CHECK: %cold +; CHECK: %coldlp +; CHECK: %hotlp +; CHECK: %lpret +entry: + %0 = icmp sgt i32 %a, 1 + br i1 %0, label %hot, label %cold, !prof !4 + +hot: + %1 = invoke i32 @foo() + to label %then unwind label %hotlp + +cold: + %2 = invoke i32 @bar() + to label %then unwind label %coldlp + +then: + %3 = phi i32 [ %1, %hot ], [ %2, %cold ] + ret i32 %3 + +hotlp: + %4 = landingpad { i8*, i32 } + cleanup + br label %lpret + +coldlp: + %5 = landingpad { i8*, i32 } + cleanup + br label %lpret + +lpret: + %6 = phi i32 [-1, %hotlp], [-2, %coldlp] + %7 = add i32 %6, 42 + ret i32 %7 +} + +!4 = !{!"branch_weights", i32 65536, i32 0} + +; Make sure that ehpad are scheduled from the least probable one +; to the most probable one. See selectBestCandidateBlock as to why. +declare void @clean(); + +define void @test_flow_unwind() personality i32 (...)* @pers { +; CHECK-LABEL: test_flow_unwind: +; CHECK: %entry +; CHECK: %then +; CHECK: %exit +; CHECK: %innerlp +; CHECK: %outerlp +; CHECK: %outercleanup +entry: + %0 = invoke i32 @foo() + to label %then unwind label %outerlp + +then: + %1 = invoke i32 @bar() + to label %exit unwind label %innerlp + +exit: + ret void + +innerlp: + %2 = landingpad { i8*, i32 } + cleanup + br label %innercleanup + +outerlp: + %3 = landingpad { i8*, i32 } + cleanup + br label %outercleanup + +outercleanup: + %4 = phi { i8*, i32 } [%2, %innercleanup], [%3, %outerlp] + call void @clean() + resume { i8*, i32 } %4 + +innercleanup: + call void @clean() + br label %outercleanup +} Index: test/CodeGen/X86/seh-safe-div-win32.ll =================================================================== --- test/CodeGen/X86/seh-safe-div-win32.ll +++ test/CodeGen/X86/seh-safe-div-win32.ll @@ -65,13 +65,13 @@ ; Landing pad code -; CHECK: [[handler0:LBB0_[0-9]+]]: # %handler0 +; CHECK: [[handler1:LBB0_[0-9]+]]: # %handler1 ; Restore SP ; CHECK: movl {{.*}}(%ebp), %esp ; CHECK: calll _puts ; CHECK: jmp [[cont_bb]] -; CHECK: [[handler1:LBB0_[0-9]+]]: # %handler1 +; CHECK: [[handler0:LBB0_[0-9]+]]: # %handler0 ; Restore SP ; CHECK: movl {{.*}}(%ebp), %esp ; CHECK: calll _puts Index: test/CodeGen/X86/seh-safe-div.ll =================================================================== --- test/CodeGen/X86/seh-safe-div.ll +++ test/CodeGen/X86/seh-safe-div.ll @@ -67,14 +67,14 @@ ; Landing pad code -; CHECK: [[handler0:\.LBB0_[0-9]+]]: # %handler0 +; CHECK: [[handler1:\.LBB0_[0-9]+]]: # %handler1 ; CHECK: callq puts -; CHECK: movl $-1, [[rloc]] +; CHECK: movl $-2, [[rloc]] ; CHECK: jmp [[cont_bb]] -; CHECK: [[handler1:\.LBB0_[0-9]+]]: # %handler1 +; CHECK: [[handler0:\.LBB0_[0-9]+]]: # %handler0 ; CHECK: callq puts -; CHECK: movl $-2, [[rloc]] +; CHECK: movl $-1, [[rloc]] ; CHECK: jmp [[cont_bb]] ; CHECK: .seh_handlerdata Index: test/CodeGen/X86/win-catchpad.ll =================================================================== --- test/CodeGen/X86/win-catchpad.ll +++ test/CodeGen/X86/win-catchpad.ll @@ -63,17 +63,17 @@ ; X86: [[contbb:LBB0_[0-9]+]]: # %try.cont ; X86: retl -; X86: [[restorebb1:LBB0_[0-9]+]]: # Block address taken -; X86-NEXT: # %handler1 -; X86-NEXT: addl $12, %ebp -; X86: jmp [[contbb]] - ; FIXME: These should be de-duplicated. ; X86: [[restorebb2:LBB0_[0-9]+]]: # Block address taken ; X86-NEXT: # %handler2 ; X86-NEXT: addl $12, %ebp ; X86: jmp [[contbb]] +; X86: [[restorebb1:LBB0_[0-9]+]]: # Block address taken +; X86-NEXT: # %handler1 +; X86-NEXT: addl $12, %ebp +; X86: jmp [[contbb]] + ; X86: "?catch$[[catch1bb:[0-9]+]]@?0?try_catch_catch@4HA": ; X86: LBB0_[[catch1bb]]: # %handler1{{$}} ; X86: pushl %ebp