Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -5574,10 +5574,6 @@ V = (APInt(BitWidth, V) - Base).getLimitedValue(); } - // Now we have signed numbers that have been shifted so that, given enough - // precision, there are no negative values. Since the rest of the transform - // is bitwise only, we switch now to an unsigned representation. - // This transform can be done speculatively because it is so cheap - it results // in a single rotate operation being inserted. @@ -5586,15 +5582,28 @@ // 0 is the only value then a shift does nothing, and LLVM requires // well-formed IR to not have duplicate cases (so the minimum will not // be BitWidth) + + // We Xor against Values[0] because the if we do not start at zero, + // but also don't meet the SubThreshold, then we still might share + // common rights bits, and if this transform succeeds... unsigned Shift = 64; for (auto &V : Values) - Shift = std::min(Shift, countTrailingZeros((uint64_t)V)); + Shift = std::min(Shift, countTrailingZeros(V ^ Values[0])); if (Shift > 0) { MadeChanges = true; for (auto &V : Values) V >>= Shift; } + // ...then we should insert the subtraction anyways, because the rotate trick + // below to avoid a branch needs the shifted away bits to be zero. + if (Shift > countTrailingZeros(Values[0])) { + Base = Values[BestIndex]; + MadeChanges = true; + for (auto &V : Values) + V = (APInt(BitWidth, V) - Base).getLimitedValue(); + } + if (!MadeChanges) // We didn't do anything. return false; Index: test/Transforms/SimplifyCFG/rangereduce.ll =================================================================== --- test/Transforms/SimplifyCFG/rangereduce.ll +++ test/Transforms/SimplifyCFG/rangereduce.ll @@ -301,12 +301,13 @@ define i32 @test9(i32 %a) { ; CHECK-LABEL: @test9( -; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 1) -; CHECK-NEXT: switch i32 [[SWITCH_RANGEREDUCE]], label [[DEF:%.*]] [ -; CHECK-NEXT: i32 9, label [[ONE:%.*]] -; CHECK-NEXT: i32 10, label [[TWO:%.*]] -; CHECK-NEXT: i32 3, label [[THREE:%.*]] -; CHECK-NEXT: i32 5, label [[THREE]] +; CHECK-NEXT: [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[A:%.*]], 3 +; CHECK-NEXT: [[SWITCH_RANGEREDUCE1:%.*]] = call i32 @llvm.fshr.i32(i32 [[SWITCH_RANGEREDUCE]], i32 [[SWITCH_RANGEREDUCE]], i32 1) +; CHECK-NEXT: switch i32 [[SWITCH_RANGEREDUCE1]], label [[DEF:%.*]] [ +; CHECK-NEXT: i32 7, label [[ONE:%.*]] +; CHECK-NEXT: i32 8, label [[TWO:%.*]] +; CHECK-NEXT: i32 1, label [[THREE:%.*]] +; CHECK-NEXT: i32 3, label [[THREE]] ; CHECK-NEXT: ] ; CHECK: def: ; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ]