diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -500,7 +500,9 @@ // create the following structure: // A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1 // If BPI and BFI aren't non-null, BPI/BFI will be updated accordingly. -bool SplitIndirectBrCriticalEdges(Function &F, +// When `IgnoreBlocksWithoutPHI` is set to `true` critical edges leading to a +// block without phi-instructions will not be split. +bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -524,7 +524,8 @@ // Split some critical edges where one of the sources is an indirect branch, // to help generate sane code for PHIs involving such edges. - EverMadeChange |= SplitIndirectBrCriticalEdges(F); + EverMadeChange |= + SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true); bool MadeChange = true; while (MadeChange) { diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -862,7 +862,8 @@ // Split indirectbr critical edges here before computing the MST rather // than later in getInstrBB() to avoid invalidating it. - SplitIndirectBrCriticalEdges(F, BPI, BFI); + SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, + BFI); CFGMST MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -940,7 +940,7 @@ bool IsCS) { // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. - SplitIndirectBrCriticalEdges(F, BPI, BFI); + SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); FuncPGOInstrumentation FuncInfo( F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); @@ -1929,7 +1929,7 @@ auto *BFI = LookupBFI(F); // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. - SplitIndirectBrCriticalEdges(F, BPI, BFI); + SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, InstrumentFuncEntry); // When AllMinusOnes is true, it means the profile for the function diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp --- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -317,18 +317,11 @@ // predecessors of BB. static BasicBlock * findIBRPredecessor(BasicBlock *BB, SmallVectorImpl &OtherPreds) { - // If the block doesn't have any PHIs, we don't care about it, since there's - // no point in splitting it. - PHINode *PN = dyn_cast(BB->begin()); - if (!PN) - return nullptr; - // Verify we have exactly one IBR predecessor. // Conservatively bail out if one of the other predecessors is not a "regular" // terminator (that is, not a switch or a br). BasicBlock *IBB = nullptr; - for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) { - BasicBlock *PredBB = PN->getIncomingBlock(Pred); + for (BasicBlock *PredBB : predecessors(BB)) { Instruction *PredTerm = PredBB->getTerminator(); switch (PredTerm->getOpcode()) { case Instruction::IndirectBr: @@ -349,6 +342,7 @@ } bool llvm::SplitIndirectBrCriticalEdges(Function &F, + bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI) { // Check whether the function has any indirectbrs, and collect which blocks @@ -370,6 +364,9 @@ bool ShouldUpdateAnalysis = BPI && BFI; bool Changed = false; for (BasicBlock *Target : Targets) { + if (IgnoreBlocksWithoutPHI && Target->phis().empty()) + continue; + SmallVector OtherPreds; BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds); // If we did not found an indirectbr, or the indirectbr is the only diff --git a/llvm/test/Transforms/GCOVProfiling/split-indirectbr-critical-edges.ll b/llvm/test/Transforms/GCOVProfiling/split-indirectbr-critical-edges.ll --- a/llvm/test/Transforms/GCOVProfiling/split-indirectbr-critical-edges.ll +++ b/llvm/test/Transforms/GCOVProfiling/split-indirectbr-critical-edges.ll @@ -33,6 +33,11 @@ indirect: ; preds = %indirect.preheader, %indirect indirectbr i8* %2, [label %indirect, label %end] +indirect2: + ; For this test we do not want critical edges split. Adding a 2nd `indirectbr` + ; does the trick. + indirectbr i8* %2, [label %indirect, label %end] + end: ; preds = %indirect ret i32 0, !dbg !22 } diff --git a/llvm/test/Transforms/PGOProfile/Inputs/irreducible.proftext b/llvm/test/Transforms/PGOProfile/Inputs/irreducible.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/irreducible.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/irreducible.proftext @@ -14,11 +14,10 @@ _Z11irreduciblePh # Func Hash: -331779889035882993 +52047014671956012 # Num Counters: 9 # Counter Values: -100 300 99 300 @@ -27,3 +26,4 @@ 1 0 0 +0 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/irreducible_entry.proftext b/llvm/test/Transforms/PGOProfile/Inputs/irreducible_entry.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/irreducible_entry.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/irreducible_entry.proftext @@ -15,12 +15,11 @@ _Z11irreduciblePh # Func Hash: -331779889035882993 +52047014671956012 # Num Counters: 9 # Counter Values: 1 -100 300 99 300 @@ -28,3 +27,4 @@ 1 0 0 +0 diff --git a/llvm/test/Transforms/PGOProfile/irreducible.ll b/llvm/test/Transforms/PGOProfile/irreducible.ll --- a/llvm/test/Transforms/PGOProfile/irreducible.ll +++ b/llvm/test/Transforms/PGOProfile/irreducible.ll @@ -139,4 +139,4 @@ ; USE: ![[IF_END9_IRR_LOOP]] = !{!"loop_header_weight", i64 1000} ; USE: ![[SW_BB6_IRR_LOOP]] = !{!"loop_header_weight", i64 501} ; USE: ![[SW_BB15_IRR_LOOP]] = !{!"loop_header_weight", i64 100} -; USE: ![[INDIRECTGOTO_IRR_LOOP]] = !{!"loop_header_weight", i64 400} +; USE: ![[INDIRECTGOTO_IRR_LOOP]] = !{!"loop_header_weight", i64 399} diff --git a/llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll b/llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll --- a/llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll +++ b/llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll @@ -43,7 +43,10 @@ ; CHECK-LABEL: @cannot_split( ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @llvm.instrprof.increment +; CHECK: indirect: ; CHECK-NOT: call void @llvm.instrprof.increment +; CHECK: indirect2: +; CHECK-NEXT: call void @llvm.instrprof.increment define i32 @cannot_split(i8* nocapture readonly %p) { entry: %targets = alloca <2 x i8*>, align 16 @@ -56,6 +59,11 @@ br label %indirect indirect: ; preds = %entry, %indirect + indirectbr i8* %1, [label %indirect, label %end, label %indirect2] + +indirect2: + ; For this test we do not want critical edges split. Adding a 2nd `indirectbr` + ; does the trick. indirectbr i8* %1, [label %indirect, label %end] end: ; preds = %indirect diff --git a/llvm/unittests/Transforms/Utils/BasicBlockUtilsTest.cpp b/llvm/unittests/Transforms/Utils/BasicBlockUtilsTest.cpp --- a/llvm/unittests/Transforms/Utils/BasicBlockUtilsTest.cpp +++ b/llvm/unittests/Transforms/Utils/BasicBlockUtilsTest.cpp @@ -437,21 +437,23 @@ EXPECT_TRUE(PDT.verify()); } -TEST(BasicBlockUtils, SplitIndirectBrCriticalEdge) { +TEST(BasicBlockUtils, SplitIndirectBrCriticalEdgesIgnorePHIs) { LLVMContext C; std::unique_ptr M = parseIR(C, R"IR( -define void @crit_edge(i8* %cond0, i1 %cond1) { +define void @crit_edge(i8* %tgt, i1 %cond0, i1 %cond1) { entry: - indirectbr i8* %cond0, [label %bb0, label %bb1] + indirectbr i8* %tgt, [label %bb0, label %bb1, label %bb2] bb0: - br label %bb1 + br i1 %cond0, label %bb1, label %bb2 bb1: %p = phi i32 [0, %bb0], [0, %entry] - br i1 %cond1, label %bb2, label %bb3 + br i1 %cond1, label %bb3, label %bb4 bb2: ret void bb3: ret void +bb4: + ret void } )IR"); Function *F = M->getFunction("crit_edge"); @@ -460,14 +462,69 @@ BranchProbabilityInfo BPI(*F, LI); BlockFrequencyInfo BFI(*F, BPI, LI); - ASSERT_TRUE(SplitIndirectBrCriticalEdges(*F, &BPI, &BFI)); + ASSERT_TRUE(SplitIndirectBrCriticalEdges(*F, /*IgnoreBlocksWithoutPHI=*/true, + &BPI, &BFI)); // Check that successors of the split block get their probability correct. BasicBlock *BB1 = getBasicBlockByName(*F, "bb1"); BasicBlock *SplitBB = BB1->getTerminator()->getSuccessor(0); - EXPECT_EQ(2u, SplitBB->getTerminator()->getNumSuccessors()); + ASSERT_EQ(2u, SplitBB->getTerminator()->getNumSuccessors()); EXPECT_EQ(BranchProbability(1, 2), BPI.getEdgeProbability(SplitBB, 0u)); EXPECT_EQ(BranchProbability(1, 2), BPI.getEdgeProbability(SplitBB, 1u)); + + // bb2 has no PHI, so we shouldn't split bb0 -> bb2 + BasicBlock *BB0 = getBasicBlockByName(*F, "bb0"); + ASSERT_EQ(2u, BB0->getTerminator()->getNumSuccessors()); + EXPECT_EQ(BB0->getTerminator()->getSuccessor(1), + getBasicBlockByName(*F, "bb2")); +} + +TEST(BasicBlockUtils, SplitIndirectBrCriticalEdges) { + LLVMContext C; + std::unique_ptr M = parseIR(C, R"IR( +define void @crit_edge(i8* %tgt, i1 %cond0, i1 %cond1) { +entry: + indirectbr i8* %tgt, [label %bb0, label %bb1, label %bb2] +bb0: + br i1 %cond0, label %bb1, label %bb2 +bb1: + %p = phi i32 [0, %bb0], [0, %entry] + br i1 %cond1, label %bb3, label %bb4 +bb2: + ret void +bb3: + ret void +bb4: + ret void +} +)IR"); + Function *F = M->getFunction("crit_edge"); + DominatorTree DT(*F); + LoopInfo LI(DT); + BranchProbabilityInfo BPI(*F, LI); + BlockFrequencyInfo BFI(*F, BPI, LI); + + ASSERT_TRUE(SplitIndirectBrCriticalEdges(*F, /*IgnoreBlocksWithoutPHI=*/false, + &BPI, &BFI)); + + // Check that successors of the split block get their probability correct. + BasicBlock *BB1 = getBasicBlockByName(*F, "bb1"); + BasicBlock *SplitBB = BB1->getTerminator()->getSuccessor(0); + ASSERT_EQ(2u, SplitBB->getTerminator()->getNumSuccessors()); + EXPECT_EQ(BranchProbability(1, 2), BPI.getEdgeProbability(SplitBB, 0u)); + EXPECT_EQ(BranchProbability(1, 2), BPI.getEdgeProbability(SplitBB, 1u)); + + // Should split, resulting in: + // bb0 -> bb2.clone; bb2 -> split1; bb2.clone -> split, + BasicBlock *BB0 = getBasicBlockByName(*F, "bb0"); + ASSERT_EQ(2u, BB0->getTerminator()->getNumSuccessors()); + BasicBlock *BB2Clone = BB0->getTerminator()->getSuccessor(1); + BasicBlock *BB2 = getBasicBlockByName(*F, "bb2"); + EXPECT_NE(BB2Clone, BB2); + ASSERT_EQ(1u, BB2->getTerminator()->getNumSuccessors()); + ASSERT_EQ(1u, BB2Clone->getTerminator()->getNumSuccessors()); + EXPECT_EQ(BB2->getTerminator()->getSuccessor(0), + BB2Clone->getTerminator()->getSuccessor(0)); } TEST(BasicBlockUtils, SetEdgeProbability) {