Index: lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- lib/CodeGen/MachineBlockPlacement.cpp +++ lib/CodeGen/MachineBlockPlacement.cpp @@ -262,6 +262,7 @@ void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, SmallVectorImpl &BlockWorkList, + SmallVectorImpl &EHPadWorkList, const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, BlockChain &Chain, @@ -274,8 +275,14 @@ getFirstUnplacedBlock(MachineFunction &F, const BlockChain &PlacedChain, MachineFunction::iterator &PrevUnplacedBlockIt, const BlockFilterSet *BlockFilter); + void fillWorkLists(MachineBasicBlock *MBB, + SmallPtrSetImpl &UpdatedPreds, + SmallVectorImpl &BlockWorkList, + SmallVectorImpl &EHPadWorkList, + const BlockFilterSet *BlockFilter); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl &BlockWorkList, + SmallVectorImpl &EHPadWorkList, const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet); @@ -341,6 +348,7 @@ void MachineBlockPlacement::markChainSuccessors( BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, SmallVectorImpl &BlockWorkList, + SmallVectorImpl &EHPadWorkList, const BlockFilterSet *BlockFilter) { // Walk all the blocks in this chain, marking their successors as having // a predecessor placed. @@ -359,8 +367,14 @@ // This is a cross-chain edge that is within the loop, so decrement the // loop predecessor count of the destination chain. - if (SuccChain.UnscheduledPredecessors > 0 && --SuccChain.UnscheduledPredecessors == 0) - BlockWorkList.push_back(*SuccChain.begin()); + if (SuccChain.UnscheduledPredecessors == 0 || --SuccChain.UnscheduledPredecessors > 0) + continue; + + auto *MBB = *SuccChain.begin(); + if (MBB->isEHPad()) + EHPadWorkList.push_back(MBB); + else + BlockWorkList.push_back(MBB); } } } @@ -403,7 +417,7 @@ SmallVector Successors; for (MachineBasicBlock *Succ : BB->successors()) { bool SkipSucc = false; - if (BlockFilter && !BlockFilter->count(Succ)) { + if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) { SkipSucc = true; } else { BlockChain *SuccChain = BlockToChain[Succ]; @@ -524,9 +538,16 @@ }), WorkList.end()); + if (WorkList.empty()) + return nullptr; + + bool IsEHPad = WorkList[0]->isEHPad(); + MachineBasicBlock *BestBlock = nullptr; BlockFrequency BestFreq; for (MachineBasicBlock *MBB : WorkList) { + assert(MBB->isEHPad() == IsEHPad); + BlockChain &SuccChain = *BlockToChain[MBB]; if (&SuccChain == &Chain) continue; @@ -536,11 +557,16 @@ BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB); DEBUG(dbgs() << " " << getBlockName(MBB) << " -> "; MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); - if (BestBlock && BestFreq >= CandidateFreq) + + // For ehpad, we layout the least probable first as to avoid jumping back + // from least probable landingpads to more probable ones. + if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq))) continue; + BestBlock = MBB; BestFreq = CandidateFreq; } + return BestBlock; } @@ -570,9 +596,42 @@ return nullptr; } +void MachineBlockPlacement::fillWorkLists( + MachineBasicBlock *MBB, + SmallPtrSetImpl &UpdatedPreds, + SmallVectorImpl &BlockWorkList, + SmallVectorImpl &EHPadWorkList, + const BlockFilterSet *BlockFilter = nullptr) { + BlockChain &Chain = *BlockToChain[MBB]; + if (!UpdatedPreds.insert(&Chain).second) + return; + + assert(Chain.UnscheduledPredecessors == 0); + for (MachineBasicBlock *ChainBB : Chain) { + assert(BlockToChain[ChainBB] == &Chain); + for (MachineBasicBlock *Pred : ChainBB->predecessors()) { + if (BlockFilter && !BlockFilter->count(Pred)) + return; + if (BlockToChain[Pred] == &Chain) + return; + ++Chain.UnscheduledPredecessors; + } + } + + if (Chain.UnscheduledPredecessors != 0) + return; + + MBB = *Chain.begin(); + if (MBB->isEHPad()) + EHPadWorkList.push_back(MBB); + else + BlockWorkList.push_back(MBB); +} + void MachineBlockPlacement::buildChain( MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl &BlockWorkList, + SmallVectorImpl &EHPadWorkList, const BlockFilterSet *BlockFilter) { assert(BB); assert(BlockToChain[BB] == &Chain); @@ -580,7 +639,7 @@ MachineFunction::iterator PrevUnplacedBlockIt = F.begin(); MachineBasicBlock *LoopHeaderBB = BB; - markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); + markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, EHPadWorkList, BlockFilter); BB = *std::prev(Chain.end()); for (;;) { assert(BB); @@ -596,6 +655,8 @@ // this point. This won't be a fallthrough, but it will increase locality. if (!BestSucc) BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); + if (!BestSucc) + BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList, BlockFilter); if (!BestSucc) { BestSucc = @@ -614,7 +675,7 @@ SuccChain.UnscheduledPredecessors = 0; DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to " << getBlockName(BestSucc) << "\n"); - markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); + markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, EHPadWorkList, BlockFilter); Chain.merge(BestSucc, &SuccChain); BB = *std::prev(Chain.end()); } @@ -1035,6 +1096,7 @@ buildLoopChains(F, *InnerLoop); SmallVector BlockWorkList; + SmallVector EHPadWorkList; BlockFilterSet LoopBlockSet = collectLoopBlockSet(F, L); // Check if we have profile data for this function. If yes, we will rotate @@ -1067,26 +1129,10 @@ assert(LoopChain.UnscheduledPredecessors == 0); UpdatedPreds.insert(&LoopChain); - for (MachineBasicBlock *LoopBB : LoopBlockSet) { - BlockChain &Chain = *BlockToChain[LoopBB]; - if (!UpdatedPreds.insert(&Chain).second) - continue; - - assert(Chain.UnscheduledPredecessors == 0); - for (MachineBasicBlock *ChainBB : Chain) { - assert(BlockToChain[ChainBB] == &Chain); - for (MachineBasicBlock *Pred : ChainBB->predecessors()) { - if (BlockToChain[Pred] == &Chain || !LoopBlockSet.count(Pred)) - continue; - ++Chain.UnscheduledPredecessors; - } - } + for (MachineBasicBlock *LoopBB : LoopBlockSet) + fillWorkLists(LoopBB, UpdatedPreds, BlockWorkList, EHPadWorkList, &LoopBlockSet); - if (Chain.UnscheduledPredecessors == 0) - BlockWorkList.push_back(*Chain.begin()); - } - - buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet); + buildChain(LoopTop, LoopChain, BlockWorkList, EHPadWorkList, &LoopBlockSet); if (RotateLoopWithProfile) rotateLoopWithProfile(LoopChain, L, LoopBlockSet); @@ -1183,29 +1229,14 @@ buildLoopChains(F, *L); SmallVector BlockWorkList; + SmallVector EHPadWorkList; SmallPtrSet UpdatedPreds; - for (MachineBasicBlock &MBB : F) { - BlockChain &Chain = *BlockToChain[&MBB]; - if (!UpdatedPreds.insert(&Chain).second) - continue; - - assert(Chain.UnscheduledPredecessors == 0); - for (MachineBasicBlock *ChainBB : Chain) { - assert(BlockToChain[ChainBB] == &Chain); - for (MachineBasicBlock *Pred : ChainBB->predecessors()) { - if (BlockToChain[Pred] == &Chain) - continue; - ++Chain.UnscheduledPredecessors; - } - } - - if (Chain.UnscheduledPredecessors == 0) - BlockWorkList.push_back(*Chain.begin()); - } + for (MachineBasicBlock &MBB : F) + fillWorkLists(&MBB, UpdatedPreds, BlockWorkList, EHPadWorkList); BlockChain &FunctionChain = *BlockToChain[&F.front()]; - buildChain(&F.front(), FunctionChain, BlockWorkList); + buildChain(&F.front(), FunctionChain, BlockWorkList, EHPadWorkList); #ifndef NDEBUG typedef SmallPtrSet FunctionBlockSetType; Index: test/CodeGen/PowerPC/pr25802.ll =================================================================== --- test/CodeGen/PowerPC/pr25802.ll +++ /dev/null @@ -1,52 +0,0 @@ -; RUN: llc < %s | FileCheck %s -; CHECK: .long .Ltmp6-.Ltmp12 # Call between .Ltmp12 and .Ltmp6 - -; We used to crash in filetype=obj when computing a negative value. -; RUN: llc -filetype=obj < %s - -target triple = "powerpc--netbsd" -@_ZTI1I = external constant { i8*, i8* } -define void @f(i8 %foo, i32 %bar) personality i8* bitcast (void ()* @g to i8*) { - invoke void @g() - to label %try.cont unwind label %lpad -lpad: ; preds = %0 - %tmp = landingpad { i8*, i32 } - catch i8* bitcast ({ i8*, i8* }* @_ZTI1I to i8*) - br i1 undef, label %catch10, label %catch -catch10: ; preds = %lpad - %tmp8 = load i32, i32* undef, align 4 - %conv.i.i = zext i8 %foo to i32 - %cond.i.i = select i1 undef, i32 %conv.i.i, i32 %tmp8 - invoke void @_Z24__put_character_sequenceIccEvR1AIT_T0_Ej(i32 %cond.i.i) - to label %invoke.cont20 unwind label %lpad15 -invoke.cont20: ; preds = %catch10 - ret void -try.cont: ; preds = %0 - ret void -catch: ; preds = %lpad - %tmp14 = load i32, i32* undef, align 4 - %conv.i.i34 = zext i8 %foo to i32 - %cond.i.i35 = select i1 undef, i32 %conv.i.i34, i32 %tmp14 - invoke void @_Z24__put_character_sequenceIccEvR1AIT_T0_Ej(i32 %cond.i.i35) - to label %invoke.cont8 unwind label %lpad3 -invoke.cont8: ; preds = %call2.i.i.noexc36 - ret void -lpad3: ; preds = %call2.i.i.noexc36, %catch - %tmp16 = landingpad { i8*, i32 } - cleanup - invoke void @g() - to label %eh.resume unwind label %terminate.lpad -lpad15: ; preds = %catch10 - %tmp19 = landingpad { i8*, i32 } - cleanup - invoke void @g() - to label %eh.resume unwind label %terminate.lpad -eh.resume: ; preds = %lpad15, %lpad3 - ret void -terminate.lpad: ; preds = %lpad15, %lpad3 - %tmp22 = landingpad { i8*, i32 } - catch i8* null - ret void -} -declare void @g() -declare void @_Z24__put_character_sequenceIccEvR1AIT_T0_Ej(i32) Index: test/CodeGen/X86/block-placement.ll =================================================================== --- test/CodeGen/X86/block-placement.ll +++ test/CodeGen/X86/block-placement.ll @@ -1083,3 +1083,53 @@ %ret = phi i32 [ %val1, %then ], [ %val2, %else ] ret i32 %ret } + +; Make sure we put landingpads out of the way. +declare i32 @pers(...) + +declare i32 @foo(); + +declare i32 @bar(); + +define i32 @test_lp(i32 %a) personality i32 (...)* @pers { +; CHECK-LABEL: test_lp: +; CHECK: %entry +; CHECK: %hot +; CHECK: %then +; CHECK: %cold +; CHECK: %coldlp +; CHECK: %hotlp +; CHECK: %lpret +entry: + %0 = icmp sgt i32 %a, 1 + br i1 %0, label %hot, label %cold, !prof !3 + +hot: + %1 = invoke i32 @foo() + to label %then unwind label %hotlp + +cold: + %2 = invoke i32 @bar() + to label %then unwind label %coldlp + +then: + %3 = phi i32 [ %1, %hot ], [ %2, %cold ] + ret i32 %3 + +hotlp: + %4 = landingpad { i8*, i32 } + cleanup + br label %lpret + +coldlp: + %5 = landingpad { i8*, i32 } + cleanup + br label %lpret + +lpret: + %6 = phi i32 [-1, %hotlp], [-2, %coldlp] + %7 = add i32 %6, 42 + ret i32 %7 +} + +!3 = !{!"branch_weights", i32 65536, i32 0} Index: test/CodeGen/X86/seh-safe-div-win32.ll =================================================================== --- test/CodeGen/X86/seh-safe-div-win32.ll +++ test/CodeGen/X86/seh-safe-div-win32.ll @@ -65,13 +65,13 @@ ; Landing pad code -; CHECK: [[handler0:LBB0_[0-9]+]]: # %handler0 +; CHECK: [[handler1:LBB0_[0-9]+]]: # %handler1 ; Restore SP ; CHECK: movl {{.*}}(%ebp), %esp ; CHECK: calll _puts ; CHECK: jmp [[cont_bb]] -; CHECK: [[handler1:LBB0_[0-9]+]]: # %handler1 +; CHECK: [[handler0:LBB0_[0-9]+]]: # %handler0 ; Restore SP ; CHECK: movl {{.*}}(%ebp), %esp ; CHECK: calll _puts Index: test/CodeGen/X86/seh-safe-div.ll =================================================================== --- test/CodeGen/X86/seh-safe-div.ll +++ test/CodeGen/X86/seh-safe-div.ll @@ -67,14 +67,14 @@ ; Landing pad code -; CHECK: [[handler0:\.LBB0_[0-9]+]]: # %handler0 +; CHECK: [[handler1:\.LBB0_[0-9]+]]: # %handler1 ; CHECK: callq puts -; CHECK: movl $-1, [[rloc]] +; CHECK: movl $-2, [[rloc]] ; CHECK: jmp [[cont_bb]] -; CHECK: [[handler1:\.LBB0_[0-9]+]]: # %handler1 +; CHECK: [[handler0:\.LBB0_[0-9]+]]: # %handler0 ; CHECK: callq puts -; CHECK: movl $-2, [[rloc]] +; CHECK: movl $-1, [[rloc]] ; CHECK: jmp [[cont_bb]] ; CHECK: .seh_handlerdata Index: test/CodeGen/X86/win-catchpad.ll =================================================================== --- test/CodeGen/X86/win-catchpad.ll +++ test/CodeGen/X86/win-catchpad.ll @@ -63,17 +63,17 @@ ; X86: [[contbb:LBB0_[0-9]+]]: # %try.cont ; X86: retl -; X86: [[restorebb1:LBB0_[0-9]+]]: # Block address taken -; X86-NEXT: # %handler1 -; X86-NEXT: addl $12, %ebp -; X86: jmp [[contbb]] - ; FIXME: These should be de-duplicated. ; X86: [[restorebb2:LBB0_[0-9]+]]: # Block address taken ; X86-NEXT: # %handler2 ; X86-NEXT: addl $12, %ebp ; X86: jmp [[contbb]] +; X86: [[restorebb1:LBB0_[0-9]+]]: # Block address taken +; X86-NEXT: # %handler1 +; X86-NEXT: addl $12, %ebp +; X86: jmp [[contbb]] + ; X86: "?catch$[[catch1bb:[0-9]+]]@?0?try_catch_catch@4HA": ; X86: LBB0_[[catch1bb]]: # %handler1{{$}} ; X86: pushl %ebp