Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -284,17 +284,18 @@ struct BitTestBlock { BitTestBlock(APInt F, APInt R, const Value* SV, - unsigned Rg, MVT RgVT, bool E, + unsigned Rg, MVT RgVT, bool E, bool CR, MachineBasicBlock* P, MachineBasicBlock* D, BitTestInfo C): First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), - Parent(P), Default(D), Cases(std::move(C)) { } + ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)) { } APInt First; APInt Range; const Value *SValue; unsigned Reg; MVT RegVT; bool Emitted; + bool ContiguousRange; MachineBasicBlock *Parent; MachineBasicBlock *Default; BitTestInfo Cases; Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7740,10 +7740,21 @@ .getSizeInBits(); assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); - if (Low.isNonNegative() && High.slt(BitWidth)) { - // Optimize the case where all the case values fit in a - // word without having to subtract minValue. In this case, - // we can optimize away the subtraction. + // Check if the clusters cover a contiguous range such that no value in the + // range will jump to the default statement. + bool ContiguousRange = true; + for (int64_t I = First + 1; I <= Last; ++I) { + if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) { + ContiguousRange = false; + break; + } + } + + if (!ContiguousRange && Low.isNonNegative() && High.slt(BitWidth)) { + // Optimize the case where all the case values fit in a word without having + // to subtract minValue. In this case, we can optimize away the subtraction. + // When ContiguousRange is true, we can save a bit test and a branch so we + // still do subtraction. LowBound = APInt::getNullValue(Low.getBitWidth()); CmpRange = High; } else { @@ -7788,8 +7799,8 @@ BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight)); } BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), - SI->getCondition(), -1U, MVT::Other, false, nullptr, - nullptr, std::move(BTI)); + SI->getCondition(), -1U, MVT::Other, false, + ContiguousRange, nullptr, nullptr, std::move(BTI)); BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, BitTestCases.size() - 1, TotalWeight); Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1499,25 +1499,32 @@ FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code - if (j+1 != ej) - SDB->visitBitTestCase(SDB->BitTestCases[i], - SDB->BitTestCases[i].Cases[j+1].ThisBB, - UnhandledWeight, - SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j], - FuncInfo->MBB); + + // If all cases cover a contiguous range, it is not necessary to jump to + // the default block after the last bit test fails. This is because the + // range check during bit test header creation has guaranteed that every + // case here doesn't go outside the range. + MachineBasicBlock *NextMBB; + if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) + NextMBB = SDB->BitTestCases[i].Cases[j + 1].TargetBB; + else if (j + 1 != ej) + NextMBB = SDB->BitTestCases[i].Cases[j + 1].ThisBB; else - SDB->visitBitTestCase(SDB->BitTestCases[i], - SDB->BitTestCases[i].Default, - UnhandledWeight, - SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j], - FuncInfo->MBB); + NextMBB = SDB->BitTestCases[i].Default; + SDB->visitBitTestCase(SDB->BitTestCases[i], + NextMBB, + UnhandledWeight, + SDB->BitTestCases[i].Reg, + SDB->BitTestCases[i].Cases[j], + FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); + + if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) + break; } // Update PHI Nodes Index: test/CodeGen/X86/switch.ll =================================================================== --- test/CodeGen/X86/switch.ll +++ test/CodeGen/X86/switch.ll @@ -116,6 +116,10 @@ ; This could be lowered as a jump table, but bit tests is more efficient. ; CHECK-LABEL: bt_is_better +; The bit test on 2,5,8 is unnecessary as all cases cover the rage [0, 8]. +; The range check guarantees that cases other than 0,3,6 and 1,4,7 must be +; in 2,5,8. +; ; 73 = 2^0 + 2^3 + 2^6 ; CHECK: movl $73 ; CHECK: btl @@ -123,8 +127,8 @@ ; CHECK: movl $146 ; CHECK: btl ; 292 = 2^2 + 2^5 + 2^8 -; CHECK: movl $292 -; CHECK: btl +; CHECK-NOT: movl $292 +; CHECK-NOT: btl } @@ -410,6 +414,9 @@ ; Cases 1,4,7 have a very large branch weight (which shouldn't overflow), so ; their bit test should come first. 0,3,6 and 2,5,8,9 both have a weight of 12, ; but the latter set has more cases, so should be tested for earlier. +; The bit test on 0,3,6 is unnecessary as all cases cover the rage [0, 9]. +; The range check guarantees that cases other than 1,4,7 and 2,5,8,9 must be +; in 0,3,6. ; CHECK-LABEL: bt_order_by_weight ; 146 = 2^1 + 2^4 + 2^7 @@ -419,8 +426,8 @@ ; CHECK: movl $804 ; CHECK: btl ; 73 = 2^0 + 2^3 + 2^6 -; CHECK: movl $73 -; CHECK: btl +; CHECK-NOT: movl $73 +; CHECK-NOT: btl } !1 = !{!"branch_weights",