Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -5592,13 +5592,13 @@ llvm::sort(Values); bool MadeChanges = false; - // We must first look find the best start point, for example if we have a series - // that crosses zero: -2, -1, 0, 1, 2. bool UseCtlz = false, UseCttz = false, UseInvert = false; if (ReduceSwitchRangeWithCtlzOrCttz(*SI, Values, UseCtlz, UseCttz, UseInvert)) MadeChanges = true; + // We must first look find the best start point, for example if we have a series + // that crosses zero: -2, -1, 0, 1, 2. uint64_t BestDistance = APInt::getMaxValue(CondTy->getIntegerBitWidth()).getLimitedValue() - Values.back() + Values.front() + 1; unsigned BestIndex = 0; @@ -5608,18 +5608,6 @@ BestDistance = Values[I] - Values[I-1]; } } - uint64_t Base = 0; - // Now transform the values such that they start at zero and ascend. - if (Values[BestIndex] >= SubThreshold) { - Base = Values[BestIndex]; - MadeChanges = true; - for (auto &V : Values) - V = (APInt(BitWidth, V) - Base).getLimitedValue(); - } - - // Now we have signed numbers that have been shifted so that, given enough - // precision, there are no negative values. Since the rest of the transform - // is bitwise only, we switch now to an unsigned representation. // This transform can be done speculatively because it is so cheap - it results // in a single rotate operation being inserted. @@ -5629,15 +5617,38 @@ // 0 is the only value then a shift does nothing, and LLVM requires // well-formed IR to not have duplicate cases (so the minimum will not // be BitWidth) + unsigned Shift = 64; + // We need to store this from _before_ the transform + uint64_t BestIndexXor = Values[BestIndex]; for (auto &V : Values) - Shift = std::min(Shift, countTrailingZeros((uint64_t)V)); + Shift = std::min(Shift, countTrailingZeros(V ^ BestIndexXor)); if (Shift > 0) { MadeChanges = true; for (auto &V : Values) V >>= Shift; } + // We Xor against Values[] (any element will do) because the if we do not + // start at zero, but also don't meet the SubThreshold, then we still might + // share common rights bits, and if this transform succeeds + // then we should insert the subtraction anyways, because the rotate trick + // below to avoid a branch needs the shifted away bits to be zero. + + // Now transform the values such that they start at zero and ascend. Do not + // do this if the shift reduces the lowest value to less than SubThreshold, + // or if the subtraction is less than SubThreshold and it does not enable a rotate. + uint64_t Base = 0; + if ((BestIndexXor >= SubThreshold && + Shift == 0) || + (Shift > countTrailingZeros(BestIndexXor) && + Values[BestIndex] >= SubThreshold)) { + Base = BestIndexXor; + MadeChanges = true; + for (auto &V : Values) + V = (APInt(BitWidth, V) - Base).getLimitedValue(); + } + if (!MadeChanges) // We didn't do anything. return false; Index: test/Transforms/SimplifyCFG/rangereduce.ll =================================================================== --- test/Transforms/SimplifyCFG/rangereduce.ll +++ test/Transforms/SimplifyCFG/rangereduce.ll @@ -230,8 +230,8 @@ define i8 @test7(i8 %a) optsize { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i8 [[A:%.*]], -36 -; CHECK-NEXT: [[SWITCH_RANGEREDUCE1:%.*]] = call i8 @llvm.fshr.i8(i8 [[SWITCH_RANGEREDUCE]], i8 [[SWITCH_RANGEREDUCE]], i8 2) +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = call i8 @llvm.fshr.i8(i8 [[A:%.*]], i8 [[A]], i8 2) +; CHECK-NEXT: [[SWITCH_RANGEREDUCE1:%.*]] = sub i8 [[SWITCH_RANGEREDUCE]], 55 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[SWITCH_RANGEREDUCE1]], 4 ; CHECK-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[DEF:%.*]] ; CHECK: switch.lookup: @@ -336,3 +336,81 @@ ret i32 99783 } +; should have a subtraction to enable a shift +define i32 @test10(i32 %a) { +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[A:%.*]], 1 +; CHECK-NEXT: [[SWITCH_RANGEREDUCE1:%.*]] = call i32 @llvm.fshr.i32(i32 [[SWITCH_RANGEREDUCE]], i32 [[SWITCH_RANGEREDUCE]], i32 1) +; CHECK-NEXT: switch i32 [[SWITCH_RANGEREDUCE1]], label [[DEF:%.*]] [ +; CHECK-NEXT: i32 0, label [[ONE:%.*]] +; CHECK-NEXT: i32 1, label [[TWO:%.*]] +; CHECK-NEXT: i32 2, label [[THREE:%.*]] +; CHECK-NEXT: i32 3, label [[THREE]] +; CHECK-NEXT: ] +; CHECK: def: +; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ] +; CHECK-NEXT: ret i32 [[MERGE]] +; CHECK: one: +; CHECK-NEXT: br label [[DEF]] +; CHECK: two: +; CHECK-NEXT: br label [[DEF]] +; CHECK: three: +; CHECK-NEXT: br label [[DEF]] +; + switch i32 %a, label %def [ + i32 1, label %one + i32 3, label %two + i32 5, label %three + i32 7, label %three + ] + +def: + ret i32 8867 + +one: + ret i32 11984 +two: + ret i32 1143 +three: + ret i32 99783 +} + +; shouldn't have a subtraction if the shift transform gives us a index 1, even if +; it wouldn't break the shift transform +define i32 @test11(i32 %a) { +; CHECK-LABEL: @test11( +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 1) +; CHECK-NEXT: switch i32 [[SWITCH_RANGEREDUCE]], label [[DEF:%.*]] [ +; CHECK-NEXT: i32 1, label [[ONE:%.*]] +; CHECK-NEXT: i32 2, label [[TWO:%.*]] +; CHECK-NEXT: i32 3, label [[THREE:%.*]] +; CHECK-NEXT: i32 4, label [[THREE]] +; CHECK-NEXT: ] +; CHECK: def: +; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ] +; CHECK-NEXT: ret i32 [[MERGE]] +; CHECK: one: +; CHECK-NEXT: br label [[DEF]] +; CHECK: two: +; CHECK-NEXT: br label [[DEF]] +; CHECK: three: +; CHECK-NEXT: br label [[DEF]] +; + switch i32 %a, label %def [ + i32 2, label %one + i32 4, label %two + i32 6, label %three + i32 8, label %three + ] + +def: + ret i32 8867 + +one: + ret i32 11984 +two: + ret i32 1143 +three: + ret i32 99783 +} +