Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -284,17 +284,18 @@ struct BitTestBlock { BitTestBlock(APInt F, APInt R, const Value* SV, - unsigned Rg, MVT RgVT, bool E, + unsigned Rg, MVT RgVT, bool E, bool CR, MachineBasicBlock* P, MachineBasicBlock* D, BitTestInfo C): First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), - Parent(P), Default(D), Cases(std::move(C)) { } + ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)) { } APInt First; APInt Range; const Value *SValue; unsigned Reg; MVT RegVT; bool Emitted; + bool ContiguousRange; MachineBasicBlock *Parent; MachineBasicBlock *Default; BitTestInfo Cases; Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7740,12 +7740,22 @@ .getSizeInBits(); assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); - if (Low.isNonNegative() && High.slt(BitWidth)) { - // Optimize the case where all the case values fit in a - // word without having to subtract minValue. In this case, - // we can optimize away the subtraction. + // Check if the clusters cover a contiguous range such that no value in the + // range will jump to the default statement. + bool ContiguousRange = true; + for (int64_t I = First + 1; I <= Last; ++I) { + if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) { + ContiguousRange = false; + break; + } + } + + if (Low.isStrictlyPositive() && High.slt(BitWidth)) { + // Optimize the case where all the case values fit in a word without having + // to subtract minValue. In this case, we can optimize away the subtraction. LowBound = APInt::getNullValue(Low.getBitWidth()); CmpRange = High; + ContiguousRange = false; } else { LowBound = Low; CmpRange = High - Low; @@ -7788,8 +7798,8 @@ BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight)); } BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), - SI->getCondition(), -1U, MVT::Other, false, nullptr, - nullptr, std::move(BTI)); + SI->getCondition(), -1U, MVT::Other, false, + ContiguousRange, nullptr, nullptr, std::move(BTI)); BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, BitTestCases.size() - 1, TotalWeight); Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1499,25 +1499,32 @@ FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code - if (j+1 != ej) - SDB->visitBitTestCase(SDB->BitTestCases[i], - SDB->BitTestCases[i].Cases[j+1].ThisBB, - UnhandledWeight, - SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j], - FuncInfo->MBB); + + // If all cases cover a contiguous range, it is not necessary to jump to + // the default block after the last bit test fails. This is because the + // range check during bit test header creation has guaranteed that every + // case here doesn't go outside the range. + MachineBasicBlock *NextMBB; + if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) + NextMBB = SDB->BitTestCases[i].Cases[j + 1].TargetBB; + else if (j + 1 != ej) + NextMBB = SDB->BitTestCases[i].Cases[j + 1].ThisBB; else - SDB->visitBitTestCase(SDB->BitTestCases[i], - SDB->BitTestCases[i].Default, - UnhandledWeight, - SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j], - FuncInfo->MBB); + NextMBB = SDB->BitTestCases[i].Default; + SDB->visitBitTestCase(SDB->BitTestCases[i], + NextMBB, + UnhandledWeight, + SDB->BitTestCases[i].Reg, + SDB->BitTestCases[i].Cases[j], + FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); + + if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) + break; } // Update PHI Nodes Index: test/CodeGen/X86/switch.ll =================================================================== --- test/CodeGen/X86/switch.ll +++ test/CodeGen/X86/switch.ll @@ -116,6 +116,10 @@ ; This could be lowered as a jump table, but bit tests is more efficient. ; CHECK-LABEL: bt_is_better +; The bit test on 2,5,8 is unnecessary as all cases cover the rage [0, 8]. +; The range check guarantees that cases other than 0,3,6 and 1,4,7 must be +; in 2,5,8. +; ; 73 = 2^0 + 2^3 + 2^6 ; CHECK: movl $73 ; CHECK: btl @@ -123,7 +127,76 @@ ; CHECK: movl $146 ; CHECK: btl ; 292 = 2^2 + 2^5 + 2^8 -; CHECK: movl $292 +; CHECK-NOT: movl $292 +; CHECK-NOT: btl +} + +define void @bt_is_better2(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 0, label %bb0 + i32 3, label %bb0 + i32 6, label %bb0 + i32 1, label %bb1 + i32 4, label %bb1 + i32 7, label %bb1 + i32 2, label %bb2 + i32 8, label %bb2 + + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +bb2: tail call void @g(i32 2) br label %return +return: ret void + +; This will also be lowered as bit test, but as the range [0,8] is not fully +; covered (5 missing), the default statement can be jumped to and we end up +; with one more branch. +; CHECK-LABEL: bt_is_better2 +; +; 73 = 2^0 + 2^3 + 2^6 +; CHECK: movl $73 +; CHECK: btl +; 146 = 2^1 + 2^4 + 2^7 +; CHECK: movl $146 +; CHECK: btl +; 260 = 2^2 + 2^8 +; CHECK: movl $260 +; CHECK: btl +} + +define void @bt_is_better3(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 10, label %bb0 + i32 13, label %bb0 + i32 16, label %bb0 + i32 11, label %bb1 + i32 14, label %bb1 + i32 17, label %bb1 + i32 12, label %bb2 + i32 18, label %bb2 + + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +bb2: tail call void @g(i32 2) br label %return +return: ret void + +; We don't have to subtract 10 from the case value to let the range become +; [0, 8], as each value in the range [10, 18] can be represented by bits in a +; word. Then we still need a branch to jump to the default statement for the +; range [0, 10). +; CHECK-LABEL: bt_is_better3 +; +; 74752 = 2^10 + 2^13 + 2^16 +; CHECK: movl $74752 +; CHECK: btl +; 149504 = 2^11 + 2^14 + 2^17 +; CHECK: movl $149504 +; CHECK: btl +; 266240 = 2^12 + 2^15 + 2^18 +; CHECK: movl $266240 ; CHECK: btl } @@ -410,6 +483,9 @@ ; Cases 1,4,7 have a very large branch weight (which shouldn't overflow), so ; their bit test should come first. 0,3,6 and 2,5,8,9 both have a weight of 12, ; but the latter set has more cases, so should be tested for earlier. +; The bit test on 0,3,6 is unnecessary as all cases cover the rage [0, 9]. +; The range check guarantees that cases other than 1,4,7 and 2,5,8,9 must be +; in 0,3,6. ; CHECK-LABEL: bt_order_by_weight ; 146 = 2^1 + 2^4 + 2^7 @@ -419,8 +495,8 @@ ; CHECK: movl $804 ; CHECK: btl ; 73 = 2^0 + 2^3 + 2^6 -; CHECK: movl $73 -; CHECK: btl +; CHECK-NOT: movl $73 +; CHECK-NOT: btl } !1 = !{!"branch_weights",