Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -369,6 +369,10 @@ MachineBasicBlock *SwitchMBB, MachineBasicBlock *DefaultMBB); + /// Peel the top probability case if it exceeds the threshold + MachineBasicBlock *peelDominantCaseCluster(const SwitchInst &SI, + CaseClusterVector &Clusters, + BranchProbability &PeeledCaseProb); /// A class which encapsulates all of the information needed to generate a /// stack protector check and signals to isel via its state being initialized Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -134,6 +134,12 @@ cl::location(LimitFloatPrecision), cl::init(0)); +static cl::opt SwitchPeelThreshold( + "switch-peel-threshold", cl::Hidden, cl::init(66), + cl::desc("Set the case probability threshold for peeling the case from a " + "switch statement. A value greater than 100 will void this " + "optimization")); + // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to @@ -9834,6 +9840,74 @@ SwitchCases.push_back(CB); } +// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb +// from the swith statement. +static BranchProbability scaleCaseProbality(BranchProbability CaseProb, + BranchProbability PeeledCaseProb) { + if (PeeledCaseProb == BranchProbability::getOne()) + return BranchProbability::getZero(); + BranchProbability SwitchProb = PeeledCaseProb.getCompl(); + return BranchProbability(CaseProb.getNumerator(), + SwitchProb.scale(CaseProb.getDenominator())); +} + +// Try to peel the top probability case if it exceeds the threshold. +// Return current MachineBasicBlock for the switch statement if the peeling +// does not occur. +// If the peeling is performed, return the newly created MachineBasicBlock +// for the peeled switch statement. Also update Clusters to remove the peeled +// case. PeeledCaseProb is the BranchProbability for the peeled case. +MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( + const SwitchInst &SI, CaseClusterVector &Clusters, + BranchProbability &PeeledCaseProb) { + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; + // Don't perform if there is only one cluster or optimizing for size. + if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 || + TM.getOptLevel() == CodeGenOpt::None || + SwitchMBB->getParent()->getFunction()->optForMinSize()) + return SwitchMBB; + + BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100); + unsigned PeeledCaseIndex = 0; + bool SwitchPeeled = false; + for (unsigned Index = 0; Index < Clusters.size(); ++Index) { + CaseCluster &CC = Clusters[Index]; + if (CC.Prob < TopCaseProb) + continue; + TopCaseProb = CC.Prob; + PeeledCaseIndex = Index; + SwitchPeeled = true; + } + if (!SwitchPeeled) + return SwitchMBB; + + DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " << TopCaseProb + << "\n"); + + // Record the MBB for the peeled switch statement. + MachineFunction::iterator BBI(SwitchMBB); + ++BBI; + MachineBasicBlock *PeeledSwitchMBB = + FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, PeeledSwitchMBB); + + ExportFromCurrentBlock(SI.getCondition()); + auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex; + SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt, + nullptr, nullptr, TopCaseProb.getCompl()}; + lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB); + + Clusters.erase(PeeledCaseIt); + for (CaseCluster &CC : Clusters) { + DEBUG(dbgs() << "Scale the probablity for one cluster, before scaling: " + << CC.Prob << "\n"); + CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb); + DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n"); + } + PeeledCaseProb = TopCaseProb; + return PeeledSwitchMBB; +} + void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Extract cases from the switch. BranchProbabilityInfo *BPI = FuncInfo.BPI; @@ -9887,9 +9961,15 @@ } } + // The branch probablity of the peeled case. + BranchProbability PeeledCaseProb = BranchProbability::getZero(); + MachineBasicBlock *PeeledSwitchMBB = + peelDominantCaseCluster(SI, Clusters, PeeledCaseProb); + // If there is only the default destination, jump there directly. MachineBasicBlock *SwitchMBB = FuncInfo.MBB; if (Clusters.empty()) { + assert(PeeledSwitchMBB == SwitchMBB); SwitchMBB->addSuccessor(DefaultMBB); if (DefaultMBB != NextBlock(SwitchMBB)) { DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, @@ -9921,8 +10001,14 @@ SwitchWorkList WorkList; CaseClusterIt First = Clusters.begin(); CaseClusterIt Last = Clusters.end() - 1; - auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB); - WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); + auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB); + // Scale the branchprobability for DefaultMBB if the peel occurs and + // DefaultMBB is not replaced. + if (PeeledCaseProb != BranchProbability::getZero() && + DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()]) + DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb); + WorkList.push_back( + {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); while (!WorkList.empty()) { SwitchWorkListItem W = WorkList.back(); Index: llvm/trunk/test/CodeGen/Generic/MachineBranchProb.ll =================================================================== --- llvm/trunk/test/CodeGen/Generic/MachineBranchProb.ll +++ llvm/trunk/test/CodeGen/Generic/MachineBranchProb.ll @@ -19,12 +19,15 @@ i64 1, label %sw.bb i64 4, label %sw.bb i64 5, label %sw.bb1 + i64 15, label %sw.bb ], !prof !0 ; CHECK: BB#0: derived from LLVM BB %entry -; CHECK: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}75.29%) BB#4({{[0-9a-fx/= ]+}}24.71%) +; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}92.17%) BB#4({{[0-9a-fx/= ]+}}7.83%) ; CHECK: BB#4: derived from LLVM BB %entry -; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}47.62%) BB#5({{[0-9a-fx/= ]+}}52.38%) +; CHECK: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}75.29%) BB#5({{[0-9a-fx/= ]+}}24.71%) ; CHECK: BB#5: derived from LLVM BB %entry +; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}47.62%) BB#6({{[0-9a-fx/= ]+}}52.38%) +; CHECK: BB#6: derived from LLVM BB %entry ; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}36.36%) BB#3({{[0-9a-fx/= ]+}}63.64%) sw.bb: @@ -40,7 +43,7 @@ ret i32 %retval.0 } -!0 = !{!"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64} +!0 = !{!"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64, i21 1000} declare void @g(i32) Index: llvm/trunk/test/CodeGen/SystemZ/loop-03.ll =================================================================== --- llvm/trunk/test/CodeGen/SystemZ/loop-03.ll +++ llvm/trunk/test/CodeGen/SystemZ/loop-03.ll @@ -3,7 +3,7 @@ ; FP128 registers part of the callee saved registers list in order to avoid ; spilling / reloading. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +; RUN: llc -switch-peel-threshold=101 < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s %0 = type { %0*, %0*, %0*, i32, %1*, i64, i64, i64, i64, i64, i64, %2, %5, %7 } %1 = type { i32, i32, i32 (%1*, i64, i32)*, i32 (%1*, i64, i64, i32, i8**)*, i32 (%1*, i64, i64, i64, i32)*, i32 (%1*)*, void (i8*)*, i8*, i8* } Index: llvm/trunk/test/CodeGen/X86/switch-bt.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/switch-bt.ll +++ llvm/trunk/test/CodeGen/X86/switch-bt.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=x86_64-- -asm-verbose=false < %s -jump-table-density=40 | FileCheck %s +; RUN: llc -mtriple=x86_64-- -asm-verbose=false < %s -jump-table-density=40 -switch-peel-threshold=101 | FileCheck %s ; This switch should use bit tests, and the third bit test case is just ; testing for one possible value, so it doesn't need a bt. Index: llvm/trunk/test/CodeGen/X86/switch-lower-peel-top-case.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/switch-lower-peel-top-case.ll +++ llvm/trunk/test/CodeGen/X86/switch-lower-peel-top-case.ll @@ -0,0 +1,135 @@ +; RUN: llc -stop-after=isel < %s | FileCheck %s + +define i32 @foo(i32 %n) !prof !1 { +entry: + switch i32 %n, label %bb_default [ + i32 8, label %bb1 + i32 -8826, label %bb2 + i32 18312, label %bb3 + i32 18568, label %bb4 + i32 129, label %bb5 + ], !prof !2 + +; CHECK: successors: %[[PEELED_CASE_LABEL:.*]](0x5999999a), %[[PEELED_SWITCH_LABEL:.*]](0x26666666) +; CHECK: %[[VAL:[0-9]+]]:gr32 = COPY %edi +; CHECK: %{{[0-9]+}}:gr32 = SUB32ri %[[VAL]], 18568, implicit-def %eflags +; CHECK: JE_1 %[[PEELED_CASE_LABEL]], implicit %eflags +; CHECK: JMP_1 %[[PEELED_SWITCH_LABEL]] +; CHECK: [[PEELED_SWITCH_LABEL]]: +; CHECK: successors: %[[BB1_LABEL:.*]](0x0206d3a0), %[[BB2_LABEL:.*]](0x7df92c60) +; CHECK: %{{[0-9]+}}:gr32 = SUB32ri %[[VAL]], 18311, implicit-def %eflags +; CHECK: JG_1 %[[BB2_LABEL]], implicit %eflags +; CHECK: JMP_1 %[[BB1_LABEL]] +; CHECK: [[BB1_LABEL]]: +; CHECK: successors: %[[CASE2_LABEL:.*]](0x35e50d5b), %[[BB3_LABEL:.*]](0x4a1af2a5) +; CHECK: %{{[0-9]+}}:gr32 = SUB32ri %[[VAL]], -8826, implicit-def %eflags +; CHECK: JE_1 %[[CASE2_LABEL]], implicit %eflags +; CHECK: JMP_1 %[[BB3_LABEL]] +; CHECK: [[BB3_LABEL]] +; CHECK: successors: %[[CASE5_LABEL:.*]](0x45d173c8), %[[BB4_LABEL:.*]](0x3a2e8c38) +; CHECK: %{{[0-9]+}}:gr32 = SUB32ri %[[VAL]], 129, implicit-def %eflags +; CHECK: JE_1 %[[CASE5_LABEL]], implicit %eflags +; CHECK: JMP_1 %[[BB4_LABEL]] +; CHECK: [[BB4_LABEL:.*]]: +; CHECK: successors: %[[CASE1_LABEL:.*]](0x66666666), %[[DEFAULT_BB_LABEL:.*]](0x1999999a) +; CHECK: %{{[0-9]+}}:gr32 = SUB32ri8 %[[VAL]], 8, implicit-def %eflags +; CHECK: JE_1 %[[CASE1_LABEL]], implicit %eflags +; CHECK: JMP_1 %[[DEFAULT_BB_LABEL]] +; CHECK: [[BB2_LABEL]]: +; CHECK: successors: %[[CASE3_LABEL:.*]](0x7fe44107), %[[DEFAULT_BB_LABEL]](0x001bbef9) +; CHECK: %{{[0-9]+}}:gr32 = SUB32ri %[[VAL]], 18312, implicit-def %eflags +; CHECK: JE_1 %[[CASE3_LABEL]], implicit %eflags +; CHECK: JMP_1 %[[DEFAULT_BB_LABEL]] + +bb1: + br label %return +bb2: + br label %return +bb3: + br label %return +bb4: + br label %return +bb5: + br label %return +bb_default: + br label %return + +return: + %retval = phi i32 [ 0, %bb_default ], [ 5, %bb5 ], [ 4, %bb4 ], [ 3, %bb3 ], [ 2, %bb2 ], [ 1, %bb1 ] + ret i32 %retval +} + +; Test the peeling of the merged cases value 85 and 86. +define i32 @foo1(i32 %n) !prof !1 { +entry: + switch i32 %n, label %bb_default [ + i32 -40, label %bb1 + i32 86, label %bb2 + i32 85, label %bb2 + i32 1, label %bb3 + i32 5, label %bb4 + i32 7, label %bb5 + i32 49, label %bb6 + ], !prof !3 + +; CHECK: successors: %[[PEELED_CASE_LABEL:.*]](0x59999999), %[[PEELED_SWITCH_LABEL:.*]](0x26666667) +; CHECK: %[[VAL:[0-9]+]]:gr32 = COPY %edi +; CHECK: %{{[0-9]+}}:gr32 = ADD32ri8 %{{[0-9]+}}, -85, implicit-def dead %eflags +; CHECK: %{{[0-9]+}}:gr32 = SUB32ri8 %{{[0-9]+}}, 2, implicit-def %eflags +; CHECK: JB_1 %[[PEELED_CASE_LABEL]], implicit %eflags +; CHECK: JMP_1 %[[PEELED_SWITCH_LABEL]] +; CHECK: [[PEELED_SWITCH_LABEL]]: +; CHECK: successors: %[[BB1_LABEL:.*]](0x0088888a), %[[BB2_LABEL:.*]](0x7f777776) +; CHECK: %{{[0-9]+}}:gr32 = SUB32ri8 %[[VAL]], 4, implicit-def %eflags +; CHECK: JG_1 %[[BB2_LABEL]], implicit %eflags +; CHECK: JMP_1 %[[BB1_LABEL]] +; CHECK: [[BB1_LABEL]]: +; CHECK: successors: %[[CASE4_LABEL:.*]](0x7f775a4f), %[[BB3_LABEL:.*]](0x0088a5b1) +; CHECK: %{{[0-9]+}}:gr32 = SUB32ri8 %[[VAL]], 1, implicit-def %eflags +; CHECK: JE_1 %[[CASE4_LABEL]], implicit %eflags +; CHECK: JMP_1 %[[BB3_LABEL]] +; CHECK: [[BB3_LABEL]]: +; CHECK: successors: %[[CASE1_LABEL:.*]](0x66666666), %[[DEFAULT_BB_LABEL:.*]](0x1999999a) +; CHECK: %{{[0-9]+}}:gr32 = SUB32ri8 %[[VAL]], -40, implicit-def %eflags +; CHECK: JE_1 %[[CASE1_LABEL]], implicit %eflags +; CHECK: JMP_1 %[[DEFAULT_BB_LABEL]] +; CHECK: [[BB2_LABEL]]: +; CHECK: successors: %[[CASE5_LABEL:.*]](0x00000000), %[[BB4_LABEL:.*]](0x80000000) +; CHECK: %{{[0-9]+}}:gr32 = SUB32ri8 %[[VAL]], 5, implicit-def %eflags +; CHECK: JE_1 %[[CASE5_LABEL]], implicit %eflags +; CHECK: JMP_1 %[[BB4_LABEL]] +; CHECK: [[BB4_LABEL]]: +; CHECK: successors: %[[CASE6_LABEL:.*]](0x00000000), %[[BB5_LABEL:.*]](0x80000000) +; CHECK: %{{[0-9]+}}:gr32 = SUB32ri8 %[[VAL]], 7, implicit-def %eflags +; CHECK: JE_1 %[[CASE6_LABEL]], implicit %eflags +; CHECK: JMP_1 %[[BB5_LABEL]] +; CHECK: [[BB5_LABEL]]: +; CHECK: successors: %[[CASE7_LABEL:.*]](0x00000000), %[[DEFAULT_BB_LABEL]](0x80000000) +; CHECK: %{{[0-9]+}}:gr32 = SUB32ri8 %[[VAL]], 49, implicit-def %eflags +; CHECK: JE_1 %[[CASE7_LABEL]], implicit %eflags +; CHECK: JMP_1 %[[DEFAULT_BB_LABEL]] + + +bb1: + br label %return +bb2: + br label %return +bb3: + br label %return +bb4: + br label %return +bb5: + br label %return +bb6: + br label %return +bb_default: + br label %return + +return: + %retval = phi i32 [ 0, %bb_default ], [ 6, %bb6 ], [ 5, %bb5 ], [ 4, %bb4 ], [ 3, %bb3 ], [ 2, %bb2 ], [ 1, %bb1 ] + ret i32 %retval +} +!1 = !{!"function_entry_count", i64 100000} +!2 = !{!"branch_weights", i32 50, i32 100, i32 200, i32 29500, i32 70000, i32 150} +!3 = !{!"branch_weights", i32 50, i32 100, i32 500, i32 69500, i32 29850, i32 0, i32 0, i32 0} + Index: llvm/trunk/test/CodeGen/X86/switch.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/switch.ll +++ llvm/trunk/test/CodeGen/X86/switch.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -jump-table-density=40 -verify-machineinstrs | FileCheck %s +; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -jump-table-density=40 -switch-peel-threshold=101 -verify-machineinstrs | FileCheck %s ; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -O0 -jump-table-density=40 -verify-machineinstrs | FileCheck --check-prefix=NOOPT %s declare void @g(i32)