Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5520,9 +5520,9 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI) { - // The number of cases that need to be removed by a subtraction operation - // to make it worth using. - const unsigned SubThreshold = (SI->getFunction()->hasOptSize() ? 2 : 8); + // The number of cases that need to be removed by a subtraction or + // shift right operation to make it worth using. + const unsigned Threshold = (SI->getFunction()->hasOptSize() ? 2 : 8); auto *CondTy = cast(SI->getCondition()->getType()); unsigned BitWidth = CondTy->getIntegerBitWidth(); if (BitWidth > 64 || !DL.fitsInLegalInteger(BitWidth)) @@ -5570,25 +5570,29 @@ Shift = std::min(Shift, countTrailingZeros(V ^ BestIndexXor)); assert(Shift < 64); if (Shift > 0) { - MadeChanges = true; - for (auto &V : Values) - V >>= Shift; + if (SaturatingMultiply((1ULL << Shift) - 1, Values.size()) >= Threshold) { + MadeChanges = true; + for (auto &V : Values) + V >>= Shift; + } else + // Not worth it. + Shift = 0; } // We Xor against Values[] (any element will do) because the if we do not - // start at zero, but also don't meet the SubThreshold, then we still might + // start at zero, but also don't meet the Threshold, then we still might // share common rights bits, and if this transform succeeds // then we should insert the subtraction anyways, because the rotate trick // below to avoid a branch needs the shifted away bits to be zero. // Now transform the values such that they start at zero and ascend. Do not - // do this if the shift reduces the lowest value to less than SubThreshold, - // or if the subtraction is less than SubThreshold and it does not enable a + // do this if the shift reduces the lowest value to less than Threshold, + // or if the subtraction is less than Threshold and it does not enable a // rotate. uint64_t Base = 0; - if ((BestIndexXor >= SubThreshold && Shift == 0) || - (Shift > countTrailingZeros(BestIndexXor) && - Values[BestIndex] >= SubThreshold)) { + if ((BestIndexXor >= Threshold && Shift == 0) || + ((Shift > countTrailingZeros(BestIndexXor) && + Values[BestIndex] >= Threshold))) { Base = BestIndexXor; MadeChanges = true; for (auto &V : Values) Index: llvm/test/Transforms/SimplifyCFG/rangereduce.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/rangereduce.ll +++ llvm/test/Transforms/SimplifyCFG/rangereduce.ll @@ -309,14 +309,11 @@ define i32 @test9(i32 %a) { ; CHECK-LABEL: @test9( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[A:%.*]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[A]], 31 -; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]] -; CHECK-NEXT: switch i32 [[TMP3]], label [[DEF:%.*]] [ -; CHECK-NEXT: i32 9, label [[ONE:%.*]] -; CHECK-NEXT: i32 10, label [[TWO:%.*]] -; CHECK-NEXT: i32 3, label [[THREE:%.*]] -; CHECK-NEXT: i32 5, label [[THREE]] +; CHECK-NEXT: switch i32 [[A:%.*]], label [[DEF:%.*]] [ +; CHECK-NEXT: i32 18, label [[ONE:%.*]] +; CHECK-NEXT: i32 20, label [[TWO:%.*]] +; CHECK-NEXT: i32 6, label [[THREE:%.*]] +; CHECK-NEXT: i32 10, label [[THREE]] ; CHECK-NEXT: ] ; CHECK: def: ; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ] Index: llvm/test/Transforms/SimplifyCFG/switch-simplify-range.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/switch-simplify-range.ll +++ llvm/test/Transforms/SimplifyCFG/switch-simplify-range.ll @@ -9,11 +9,13 @@ ; CHECK-LABEL: @switch_common_right_bits( ; CHECK-NEXT: Entry: ; CHECK-NEXT: [[TMP0:%.*]] = sub i8 [[A:%.*]], 123 -; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP0]], i8 [[TMP0]], i8 1) -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 5 -; CHECK-NEXT: br i1 [[TMP2]], label [[SWITCH_LOOKUP:%.*]], label [[SWITCHELSE:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP0]], 7 +; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i8 [[TMP3]], 9 +; CHECK-NEXT: br i1 [[TMP4]], label [[SWITCH_LOOKUP:%.*]], label [[SWITCHELSE:%.*]] ; CHECK: switch.lookup: -; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* @switch.table.switch_common_right_bits, i32 0, i8 [[TMP1]] +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* @switch.table.switch_common_right_bits, i32 0, i8 [[TMP3]] ; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i64, i64* [[SWITCH_GEP]] ; CHECK-NEXT: ret i64 [[SWITCH_LOAD]] ; CHECK: SwitchElse: @@ -26,6 +28,10 @@ i8 127, label %SwitchProng2 i8 129, label %SwitchProng3 i8 131, label %SwitchProng4 + i8 133, label %SwitchProng5 + i8 135, label %SwitchProng6 + i8 137, label %SwitchProng7 + i8 139, label %SwitchProng8 ] SwitchElse: ; preds = %Entry ret i64 10 @@ -39,6 +45,14 @@ ret i64 2 SwitchProng4: ; preds = %Entry ret i64 1 +SwitchProng5: + ret i64 5 +SwitchProng6: + ret i64 849 +SwitchProng7: + ret i64 993 +SwitchProng8: + ret i64 342 } define i64 @switch_clz1(i16 %a) optsize #0 {