Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -369,6 +369,10 @@ MachineBasicBlock *SwitchMBB, MachineBasicBlock *DefaultMBB); + /// Peel the top probability case if it exceeds the threshold + MachineBasicBlock *peelOneDominantWorkItem(const SwitchInst &SI, + CaseClusterVector &Clusters, + BranchProbability &PeeledProb); /// A class which encapsulates all of the information needed to generate a /// stack protector check and signals to isel via its state being initialized Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -134,6 +134,12 @@ cl::location(LimitFloatPrecision), cl::init(0)); +static cl::opt DominantCasePercentThreshold( + "dominant-case-percent-threshold", cl::Hidden, cl::init(66), + cl::desc("Set the case probability threshold for peeling the case from a " + "switch statement. A value greater than 100 will void this " + "optimization")); + // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to @@ -9830,6 +9836,67 @@ SwitchCases.push_back(CB); } +inline static BranchProbability +scaleCaseProbality(BranchProbability CaseProb, BranchProbability SwitchProb) { + if (SwitchProb == BranchProbability::getZero()) + return BranchProbability::getZero(); + return BranchProbability(CaseProb.getNumerator(), + SwitchProb.scale(CaseProb.getDenominator())); +} + +// Peel the top probability case if it exceeds the threshold. +MachineBasicBlock * +SelectionDAGBuilder::peelOneDominantWorkItem(const SwitchInst &SI, + CaseClusterVector &Clusters, + BranchProbability &PeeledProb) { + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; + if (DominantCasePercentThreshold > 100 || !FuncInfo.BPI || + TM.getOptLevel() == CodeGenOpt::None || + SwitchMBB->getParent()->getFunction()->optForMinSize()) + return SwitchMBB; + + BranchProbability TopCaseProb = + BranchProbability(DominantCasePercentThreshold, 100); + unsigned PeeledCaseIndex = 0; + bool SwitchPeeled = false; + for (unsigned Index = 0; Index < Clusters.size(); ++Index) { + CaseCluster &CC = Clusters[Index]; + if (CC.Low != CC.High) + continue; + if (CC.Prob <= TopCaseProb) + continue; + TopCaseProb = CC.Prob; + PeeledCaseIndex = Index; + SwitchPeeled = true; + } + if (!SwitchPeeled) + return SwitchMBB; + + DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " << TopCaseProb + << "\n"); + PeeledProb = TopCaseProb.getCompl(); + MachineFunction::iterator BBI(SwitchMBB); + ++BBI; + MachineBasicBlock *PeeledSwitchMBB = + FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, PeeledSwitchMBB); + auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex; + + ExportFromCurrentBlock(SI.getCondition()); + SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt, + nullptr, nullptr, PeeledProb}; + lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB); + + Clusters.erase(PeeledCaseIt); + for (CaseCluster &CC : Clusters) { + DEBUG(dbgs() << "Scale the probablity for one cluster, before scaling: " + << CC.Prob << "\n"); + CC.Prob = scaleCaseProbality(CC.Prob, PeeledProb); + DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n"); + } + return PeeledSwitchMBB; +} + void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Extract cases from the switch. BranchProbabilityInfo *BPI = FuncInfo.BPI; @@ -9844,6 +9911,11 @@ Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob)); } + // The probablity of the switch statement after peeling out the top case. + BranchProbability PeeledProb = BranchProbability::getOne(); + MachineBasicBlock *PeeledSwitchMBB = + peelOneDominantWorkItem(SI, Clusters, PeeledProb); + MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; // Cluster adjacent cases with the same destination. We do this at all @@ -9917,8 +9989,14 @@ SwitchWorkList WorkList; CaseClusterIt First = Clusters.begin(); CaseClusterIt Last = Clusters.end() - 1; - auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB); - WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); + auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB); + // Scale the branchprobability for DefaultMBB if the peel occurs and + // DefaultMBB is not replaced. + if (PeeledProb != BranchProbability::getOne() && + DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()]) + DefaultProb = scaleCaseProbality(DefaultProb, PeeledProb); + WorkList.push_back( + {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); while (!WorkList.empty()) { SwitchWorkListItem W = WorkList.back(); Index: test/CodeGen/Generic/MachineBranchProb.ll =================================================================== --- test/CodeGen/Generic/MachineBranchProb.ll +++ test/CodeGen/Generic/MachineBranchProb.ll @@ -19,12 +19,15 @@ i64 1, label %sw.bb i64 4, label %sw.bb i64 5, label %sw.bb1 + i64 15, label %sw.bb ], !prof !0 ; CHECK: BB#0: derived from LLVM BB %entry -; CHECK: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}75.29%) BB#4({{[0-9a-fx/= ]+}}24.71%) +; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}92.17%) BB#4({{[0-9a-fx/= ]+}}7.83%) ; CHECK: BB#4: derived from LLVM BB %entry -; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}47.62%) BB#5({{[0-9a-fx/= ]+}}52.38%) +; CHECK: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}75.29%) BB#5({{[0-9a-fx/= ]+}}24.71%) ; CHECK: BB#5: derived from LLVM BB %entry +; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}47.62%) BB#6({{[0-9a-fx/= ]+}}52.38%) +; CHECK: BB#6: derived from LLVM BB %entry ; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}36.36%) BB#3({{[0-9a-fx/= ]+}}63.64%) sw.bb: @@ -40,7 +43,7 @@ ret i32 %retval.0 } -!0 = !{!"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64} +!0 = !{!"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64, i21 1000} declare void @g(i32) Index: test/CodeGen/Generic/switch-lower-peel-top-case.ll =================================================================== --- test/CodeGen/Generic/switch-lower-peel-top-case.ll +++ test/CodeGen/Generic/switch-lower-peel-top-case.ll @@ -0,0 +1,46 @@ +; RUN: llc -debug-only=isel < %s -o /dev/null 2>&1 | FileCheck %s + +define i32 @foo(i16 signext %n) local_unnamed_addr !prof !1 { +entry: + %conv = sext i16 %n to i32 + switch i32 %conv, label %sw.epilog [ + i32 8, label %return + i32 -8826, label %sw.bb1 + i32 18312, label %sw.bb3 + i32 18568, label %sw.bb5 + i32 129, label %sw.bb7 + ], !prof !2 +; CHECK: Peeled one top case in switch stmt, prob: 0x5999999a / 0x80000000 = 70.00% +; CHECK: Scale the probablity for one cluster, before scaling: 0x0020c49c / 0x80000000 = 0.10% +; CHECK: After scaling: 0x006d3a08 / 0x80000000 = 0.33% +; CHECK: Scale the probablity for one cluster, before scaling: 0x00418937 / 0x80000000 = 0.20% +; CHECK: After scaling: 0x00da740d / 0x80000000 = 0.67% +; CHECK: Scale the probablity for one cluster, before scaling: 0x25c28f5c / 0x80000000 = 29.50% +; CHECK: After scaling: 0x7ddddddf / 0x80000000 = 98.33% +; CHECK: Scale the probablity for one cluster, before scaling: 0x003126e9 / 0x80000000 = 0.15% +; CHECK: After scaling: 0x00a3d709 / 0x80000000 = 0.50% +; CHECK: Case clusters: -8826 8 129 18312 + +sw.bb1: + br label %return + +sw.bb3: + br label %return + +sw.bb5: + br label %return + +sw.bb7: + br label %return + +sw.epilog: + br label %return + +return: + %retval = phi i32 [ 0, %sw.epilog ], [ 5, %sw.bb7 ], [ 4, %sw.bb5 ], [ 3, %sw.bb3 ], [ 2, %sw.bb1 ], [ 1, %entry ] + ret i32 %retval +} + +!1 = !{!"function_entry_count", i64 100000} +!2 = !{!"branch_weights", i32 50, i32 100, i32 200, i32 29500, i32 70000, i32 150} + Index: test/CodeGen/X86/switch.ll =================================================================== --- test/CodeGen/X86/switch.ll +++ test/CodeGen/X86/switch.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -jump-table-density=40 -verify-machineinstrs | FileCheck %s +; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -jump-table-density=40 -dominant-case-percent-threshold=101 -verify-machineinstrs | FileCheck %s ; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -O0 -jump-table-density=40 -verify-machineinstrs | FileCheck --check-prefix=NOOPT %s declare void @g(i32)