Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -5549,13 +5549,13 @@ llvm::sort(Values); bool MadeChanges = false; - // We must first look find the best start point, for example if we have a series - // that crosses zero: -2, -1, 0, 1, 2. bool UseCtlz = ReduceSwitchRangeWithCtlz(*SI, Values); if (UseCtlz) MadeChanges = true; + // We must first look find the best start point, for example if we have a series + // that crosses zero: -2, -1, 0, 1, 2. uint64_t BestDistance = APInt::getMaxValue(CondTy->getIntegerBitWidth()).getLimitedValue() - Values.back() + Values.front() + 1; unsigned BestIndex = 0; @@ -5565,18 +5565,6 @@ BestDistance = Values[I] - Values[I-1]; } } - uint64_t Base = 0; - // Now transform the values such that they start at zero and ascend. - if (Values[BestIndex] >= SubThreshold) { - Base = Values[BestIndex]; - MadeChanges = true; - for (auto &V : Values) - V = (APInt(BitWidth, V) - Base).getLimitedValue(); - } - - // Now we have signed numbers that have been shifted so that, given enough - // precision, there are no negative values. Since the rest of the transform - // is bitwise only, we switch now to an unsigned representation. // This transform can be done speculatively because it is so cheap - it results // in a single rotate operation being inserted. @@ -5586,15 +5574,33 @@ // 0 is the only value then a shift does nothing, and LLVM requires // well-formed IR to not have duplicate cases (so the minimum will not // be BitWidth) + + // We Xor against Values[any] because the if we do not start at zero, + // but also don't meet the SubThreshold, then we still might share + // common rights bits, and if this transform succeeds... unsigned Shift = 64; + // We need to store this from _before_ the transform + uint64_t BestIndexXor = Values[BestIndex]; for (auto &V : Values) - Shift = std::min(Shift, countTrailingZeros((uint64_t)V)); + Shift = std::min(Shift, countTrailingZeros(V ^ BestIndexXor)); if (Shift > 0) { MadeChanges = true; for (auto &V : Values) V >>= Shift; } + // ...then we should insert the subtraction anyways, because the rotate trick + // below to avoid a branch needs the shifted away bits to be zero. + + // Now transform the values such that they start at zero and ascend. + uint64_t Base = 0; + if (BestIndexXor >= SubThreshold || Shift > 0) { + Base = BestIndexXor; + MadeChanges = true; + for (auto &V : Values) + V = (APInt(BitWidth, V) - (Base >> Shift)).getLimitedValue(); + } + if (!MadeChanges) // We didn't do anything. return false; Index: test/Transforms/SimplifyCFG/rangereduce.ll =================================================================== --- test/Transforms/SimplifyCFG/rangereduce.ll +++ test/Transforms/SimplifyCFG/rangereduce.ll @@ -301,12 +301,13 @@ define i32 @test9(i32 %a) { ; CHECK-LABEL: @test9( -; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 1) -; CHECK-NEXT: switch i32 [[SWITCH_RANGEREDUCE]], label [[DEF:%.*]] [ -; CHECK-NEXT: i32 9, label [[ONE:%.*]] -; CHECK-NEXT: i32 10, label [[TWO:%.*]] -; CHECK-NEXT: i32 3, label [[THREE:%.*]] -; CHECK-NEXT: i32 5, label [[THREE]] +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[A:%.*]], 6 +; CHECK-NEXT: [[SWITCH_RANGEREDUCE1:%.*]] = call i32 @llvm.fshr.i32(i32 [[SWITCH_RANGEREDUCE]], i32 [[SWITCH_RANGEREDUCE]], i32 1) +; CHECK-NEXT: switch i32 [[SWITCH_RANGEREDUCE1]], label [[DEF:%.*]] [ +; CHECK-NEXT: i32 6, label [[ONE:%.*]] +; CHECK-NEXT: i32 7, label [[TWO:%.*]] +; CHECK-NEXT: i32 0, label [[THREE:%.*]] +; CHECK-NEXT: i32 2, label [[THREE]] ; CHECK-NEXT: ] ; CHECK: def: ; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ] @@ -336,3 +337,41 @@ ret i32 99783 } +define i32 @test10(i32 %a) { +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[A:%.*]], 1 +; CHECK-NEXT: [[SWITCH_RANGEREDUCE1:%.*]] = call i32 @llvm.fshr.i32(i32 [[SWITCH_RANGEREDUCE]], i32 [[SWITCH_RANGEREDUCE]], i32 1) +; CHECK-NEXT: switch i32 [[SWITCH_RANGEREDUCE1]], label [[DEF:%.*]] [ +; CHECK-NEXT: i32 0, label [[ONE:%.*]] +; CHECK-NEXT: i32 1, label [[TWO:%.*]] +; CHECK-NEXT: i32 2, label [[THREE:%.*]] +; CHECK-NEXT: i32 3, label [[THREE]] +; CHECK-NEXT: ] +; CHECK: def: +; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ] +; CHECK-NEXT: ret i32 [[MERGE]] +; CHECK: one: +; CHECK-NEXT: br label [[DEF]] +; CHECK: two: +; CHECK-NEXT: br label [[DEF]] +; CHECK: three: +; CHECK-NEXT: br label [[DEF]] +; + switch i32 %a, label %def [ + i32 1, label %one + i32 3, label %two + i32 5, label %three + i32 7, label %three + ] + +def: + ret i32 8867 + +one: + ret i32 11984 +two: + ret i32 1143 +three: + ret i32 99783 +} +