Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -134,6 +134,14 @@ cl::location(LimitFloatPrecision), cl::init(0)); +static cl::opt PeelDominantCaseInLowering( + "peel-dominant-case-in-lowering", cl::Hidden, cl::init(true), + cl::desc("Peel the dominant case in switch statement in lowering")); +static cl::opt DominantCasePercentThreshold( + "dominant-case-percent-threshold", cl::Hidden, cl::init(66), + cl::desc("Set the case probability threshold (<=100) for peeling out from " + " a switch statement")); + // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to @@ -9835,13 +9843,77 @@ BranchProbabilityInfo *BPI = FuncInfo.BPI; CaseClusterVector Clusters; Clusters.reserve(SI.getNumCases()); + + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; + MachineBasicBlock *PeeledSwitchMBB = SwitchMBB; + // Set to true if the case with top probablity will be peeled out from the + // switch statement. + bool SwitchPeeled = false; + // The index of the peeled case -- only valid when SwitchPeeled is true. + unsigned PeeledIndex = 0; + // The probablity of the switch statement after peeling out the top case. + BranchProbability PeeledProb = BranchProbability::getOne(); + + // Peel the top probability case if it exceeds the threshold. + if (PeelDominantCaseInLowering && BPI && + TM.getOptLevel() != CodeGenOpt::None && + !SwitchMBB->getParent()->getFunction()->optForMinSize()) { + BranchProbability TopCaseProb = + BranchProbability(DominantCasePercentThreshold, 100); + MachineBasicBlock *PeeledCaseMBB = nullptr; + const ConstantInt *PeeledCaseVal = nullptr; + for (auto I : SI.cases()) { + int Index = I.getSuccessorIndex(); + BranchProbability Prob = BPI->getEdgeProbability(SI.getParent(), Index); + if (Prob <= TopCaseProb) + continue; + PeeledCaseMBB = FuncInfo.MBBMap[I.getCaseSuccessor()]; + PeeledCaseVal = I.getCaseValue(); + TopCaseProb = Prob; + PeeledIndex = Index; + SwitchPeeled = true; + } + if (SwitchPeeled) { + DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " + << TopCaseProb << "\n"); + PeeledProb = TopCaseProb.getCompl(); + MachineFunction::iterator BBI(SwitchMBB); + ++BBI; + PeeledSwitchMBB = + FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, PeeledSwitchMBB); + CaseBlock CB(ISD::SETEQ, SI.getCondition(), PeeledCaseVal, nullptr, + PeeledCaseMBB, PeeledSwitchMBB, SwitchMBB, getCurSDLoc(), + TopCaseProb, PeeledProb); + ExportFromCurrentBlock(SI.getCondition()); + visitSwitchCase(CB, SwitchMBB); + } + } + for (auto I : SI.cases()) { + unsigned Index = I.getSuccessorIndex(); + if (SwitchPeeled && Index == PeeledIndex) + continue; MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; const ConstantInt *CaseVal = I.getCaseValue(); - BranchProbability Prob = - BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex()) - : BranchProbability(1, SI.getNumCases() + 1); - Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob)); + + auto ScaledProb = [&](BranchProbability Prob) { + unsigned NumCases = SI.getNumCases() + (SwitchPeeled ? 1 : 0); + if (!BPI || Prob == BranchProbability::getZero()) + return BranchProbability(1, NumCases); + BranchProbability P = BPI->getEdgeProbability(SI.getParent(), Index); + if (Prob == BranchProbability::getOne()) + return P; + DEBUG(dbgs() << "Scale the probablity, before scaling: " << P << "\n"); + uint32_t Numerator = P.getNumerator(); + uint32_t Denominator = P.getDenominator(); + P = BranchProbability(Numerator, Prob.scale(Denominator)); + DEBUG(dbgs() << "After scaling: " << P << "\n"); + return P; + }; + + Clusters.push_back( + CaseCluster::range(CaseVal, CaseVal, Succ, ScaledProb(PeeledProb))); } MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; @@ -9861,6 +9933,8 @@ unsigned MaxPop = 0; const BasicBlock *MaxBB = nullptr; for (auto I : SI.cases()) { + if (SwitchPeeled && PeeledIndex == I.getSuccessorIndex()) + continue; const BasicBlock *BB = I.getCaseSuccessor(); if (++Popularity[BB] > MaxPop) { MaxPop = Popularity[BB]; @@ -9884,7 +9958,6 @@ } // If there is only the default destination, jump there directly. - MachineBasicBlock *SwitchMBB = FuncInfo.MBB; if (Clusters.empty()) { SwitchMBB->addSuccessor(DefaultMBB); if (DefaultMBB != NextBlock(SwitchMBB)) { @@ -9918,7 +9991,16 @@ CaseClusterIt First = Clusters.begin(); CaseClusterIt Last = Clusters.end() - 1; auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB); - WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); + if (PeeledProb == BranchProbability::getZero()) + DefaultProb = BranchProbability::getZero(); + else if (PeeledProb != BranchProbability::getOne()) { + uint32_t Numerator = DefaultProb.getNumerator(); + uint32_t Denominator = DefaultProb.getDenominator(); + DefaultProb = BranchProbability(Numerator, PeeledProb.scale(Denominator)); + } + + WorkList.push_back( + {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); while (!WorkList.empty()) { SwitchWorkListItem W = WorkList.back(); Index: test/CodeGen/Generic/MachineBranchProb.ll =================================================================== --- test/CodeGen/Generic/MachineBranchProb.ll +++ test/CodeGen/Generic/MachineBranchProb.ll @@ -19,12 +19,15 @@ i64 1, label %sw.bb i64 4, label %sw.bb i64 5, label %sw.bb1 + i64 15, label %sw.bb ], !prof !0 ; CHECK: BB#0: derived from LLVM BB %entry -; CHECK: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}75.29%) BB#4({{[0-9a-fx/= ]+}}24.71%) +; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}92.17%) BB#4({{[0-9a-fx/= ]+}}7.83%) ; CHECK: BB#4: derived from LLVM BB %entry -; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}47.62%) BB#5({{[0-9a-fx/= ]+}}52.38%) +; CHECK: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}75.29%) BB#5({{[0-9a-fx/= ]+}}24.71%) ; CHECK: BB#5: derived from LLVM BB %entry +; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}47.62%) BB#6({{[0-9a-fx/= ]+}}52.38%) +; CHECK: BB#6: derived from LLVM BB %entry ; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}36.36%) BB#3({{[0-9a-fx/= ]+}}63.64%) sw.bb: @@ -40,7 +43,7 @@ ret i32 %retval.0 } -!0 = !{!"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64} +!0 = !{!"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64, i21 1000} declare void @g(i32) Index: test/CodeGen/Generic/switch-lower-peel-top-case.ll =================================================================== --- test/CodeGen/Generic/switch-lower-peel-top-case.ll +++ test/CodeGen/Generic/switch-lower-peel-top-case.ll @@ -0,0 +1,46 @@ +; RUN: llc -debug-only=isel < %s -o /dev/null 2>&1 | FileCheck %s + +define i32 @foo(i16 signext %n) local_unnamed_addr !prof !1 { +entry: + %conv = sext i16 %n to i32 + switch i32 %conv, label %sw.epilog [ + i32 8, label %return + i32 -8826, label %sw.bb1 + i32 18312, label %sw.bb3 + i32 18568, label %sw.bb5 + i32 129, label %sw.bb7 + ], !prof !2 +; CHECK: Peeled one top case in switch stmt, prob: 0x5999999a / 0x80000000 = 70.00% +; CHECK: Scale the probablity, before scaling: 0x0020c49c / 0x80000000 = 0.10% +; CHECK: After scaling: 0x006d3a08 / 0x80000000 = 0.33% +; CHECK: Scale the probablity, before scaling: 0x00418937 / 0x80000000 = 0.20% +; CHECK: After scaling: 0x00da740d / 0x80000000 = 0.67% +; CHECK: Scale the probablity, before scaling: 0x25c28f5c / 0x80000000 = 29.50% +; CHECK: After scaling: 0x7ddddddf / 0x80000000 = 98.33% +; CHECK: Scale the probablity, before scaling: 0x003126e9 / 0x80000000 = 0.15% +; CHECK: After scaling: 0x00a3d709 / 0x80000000 = 0.50% +; CHECK: Case clusters: -8826 8 129 18312 + +sw.bb1: + br label %return + +sw.bb3: + br label %return + +sw.bb5: + br label %return + +sw.bb7: + br label %return + +sw.epilog: + br label %return + +return: + %retval = phi i32 [ 0, %sw.epilog ], [ 5, %sw.bb7 ], [ 4, %sw.bb5 ], [ 3, %sw.bb3 ], [ 2, %sw.bb1 ], [ 1, %entry ] + ret i32 %retval +} + +!1 = !{!"function_entry_count", i64 100000} +!2 = !{!"branch_weights", i32 50, i32 100, i32 200, i32 29500, i32 70000, i32 150} + Index: test/CodeGen/X86/switch.ll =================================================================== --- test/CodeGen/X86/switch.ll +++ test/CodeGen/X86/switch.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -jump-table-density=40 -verify-machineinstrs | FileCheck %s +; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -jump-table-density=40 -peel-dominant-case-in-lowering=false -verify-machineinstrs | FileCheck %s ; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -O0 -jump-table-density=40 -verify-machineinstrs | FileCheck --check-prefix=NOOPT %s declare void @g(i32)