Index: lib/Transforms/Utils/SimplifyCFG.cpp
===================================================================
--- lib/Transforms/Utils/SimplifyCFG.cpp
+++ lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5592,13 +5592,13 @@
   llvm::sort(Values);
 
   bool MadeChanges = false;
-  // We must first look find the best start point, for example if we have a series
-  // that crosses zero: -2, -1, 0, 1, 2.
 
   bool UseCtlz = false, UseCttz = false, UseInvert = false;
   if (ReduceSwitchRangeWithCtlzOrCttz(*SI, Values, UseCtlz, UseCttz, UseInvert))
     MadeChanges = true;
 
+  // We must first look find the best start point, for example if we have a series
+  // that crosses zero: -2, -1, 0, 1, 2.
   uint64_t BestDistance = APInt::getMaxValue(CondTy->getIntegerBitWidth()).getLimitedValue() -
     Values.back() + Values.front() + 1;
   unsigned BestIndex = 0;
@@ -5608,18 +5608,6 @@
       BestDistance = Values[I] - Values[I-1];
     }
   }
-  uint64_t Base = 0;
-  // Now transform the values such that they start at zero and ascend.
-  if (Values[BestIndex] >= SubThreshold) {
-    Base = Values[BestIndex];
-    MadeChanges = true;
-    for (auto &V : Values)
-      V = (APInt(BitWidth, V) - Base).getLimitedValue();
-  }
-
-  // Now we have signed numbers that have been shifted so that, given enough
-  // precision, there are no negative values. Since the rest of the transform
-  // is bitwise only, we switch now to an unsigned representation.
 
   // This transform can be done speculatively because it is so cheap - it results
   // in a single rotate operation being inserted.
@@ -5629,15 +5617,38 @@
   // 0 is the only value then a shift does nothing, and LLVM requires
   // well-formed IR to not have duplicate cases (so the minimum will not
   // be BitWidth)
+
   unsigned Shift = 64;
+  // We need to store this from _before_ the transform
+  uint64_t BestIndexXor = Values[BestIndex];
   for (auto &V : Values)
-    Shift = std::min(Shift, countTrailingZeros((uint64_t)V));
+    Shift = std::min(Shift, countTrailingZeros(V ^ BestIndexXor));
   if (Shift > 0) {
     MadeChanges = true;
     for (auto &V : Values)
       V >>= Shift;
   }
 
+  // We Xor against Values[] (any element will do) because the if we do not
+  // start at zero, but also don't meet the SubThreshold, then we still might
+  // share common rights bits, and if this transform succeeds
+  // then we should insert the subtraction anyways, because the rotate trick
+  // below to avoid a branch needs the shifted away bits to be zero.
+
+  // Now transform the values such that they start at zero and ascend. Do not
+  // do this if the shift reduces the lowest value to less than SubThreshold,
+  // or if the subtraction is less than SubThreshold and it does not enable a rotate.
+  uint64_t Base = 0;
+  if ((BestIndexXor >= SubThreshold &&
+        Shift == 0) ||
+      (Shift > countTrailingZeros(BestIndexXor) &&
+        Values[BestIndex] >= SubThreshold)) {
+    Base = BestIndexXor;
+    MadeChanges = true;
+    for (auto &V : Values)
+      V = (APInt(BitWidth, V) - Base).getLimitedValue();
+  }
+
   if (!MadeChanges)
     // We didn't do anything.
     return false;
Index: test/Transforms/SimplifyCFG/rangereduce.ll
===================================================================
--- test/Transforms/SimplifyCFG/rangereduce.ll
+++ test/Transforms/SimplifyCFG/rangereduce.ll
@@ -230,8 +230,8 @@
 
 define i8 @test7(i8 %a) optsize {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[SWITCH_RANGEREDUCE:%.*]] = sub i8 [[A:%.*]], -36
-; CHECK-NEXT:    [[SWITCH_RANGEREDUCE1:%.*]] = call i8 @llvm.fshr.i8(i8 [[SWITCH_RANGEREDUCE]], i8 [[SWITCH_RANGEREDUCE]], i8 2)
+; CHECK-NEXT:    [[SWITCH_RANGEREDUCE:%.*]] = call i8 @llvm.fshr.i8(i8 [[A:%.*]], i8 [[A]], i8 2)
+; CHECK-NEXT:    [[SWITCH_RANGEREDUCE1:%.*]] = sub i8 [[SWITCH_RANGEREDUCE]], 55
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[SWITCH_RANGEREDUCE1]], 4
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[DEF:%.*]]
 ; CHECK:       switch.lookup:
@@ -336,3 +336,81 @@
   ret i32 99783
 }
 
+; should have a subtraction to enable a shift
+define i32 @test10(i32 %a) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[SWITCH_RANGEREDUCE:%.*]] = sub i32 [[A:%.*]], 1
+; CHECK-NEXT:    [[SWITCH_RANGEREDUCE1:%.*]] = call i32 @llvm.fshr.i32(i32 [[SWITCH_RANGEREDUCE]], i32 [[SWITCH_RANGEREDUCE]], i32 1)
+; CHECK-NEXT:    switch i32 [[SWITCH_RANGEREDUCE1]], label [[DEF:%.*]] [
+; CHECK-NEXT:    i32 0, label [[ONE:%.*]]
+; CHECK-NEXT:    i32 1, label [[TWO:%.*]]
+; CHECK-NEXT:    i32 2, label [[THREE:%.*]]
+; CHECK-NEXT:    i32 3, label [[THREE]]
+; CHECK-NEXT:    ]
+; CHECK:       def:
+; CHECK-NEXT:    [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ]
+; CHECK-NEXT:    ret i32 [[MERGE]]
+; CHECK:       one:
+; CHECK-NEXT:    br label [[DEF]]
+; CHECK:       two:
+; CHECK-NEXT:    br label [[DEF]]
+; CHECK:       three:
+; CHECK-NEXT:    br label [[DEF]]
+;
+  switch i32 %a, label %def [
+  i32 1, label %one
+  i32 3, label %two
+  i32 5, label %three
+  i32 7, label %three
+  ]
+
+def:
+  ret i32 8867
+
+one:
+  ret i32 11984
+two:
+  ret i32 1143
+three:
+  ret i32 99783
+}
+
+; shouldn't have a subtraction if the shift transform gives us a index 1, even if
+; it wouldn't break the shift transform
+define i32 @test11(i32 %a) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[SWITCH_RANGEREDUCE:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 1)
+; CHECK-NEXT:    switch i32 [[SWITCH_RANGEREDUCE]], label [[DEF:%.*]] [
+; CHECK-NEXT:    i32 1, label [[ONE:%.*]]
+; CHECK-NEXT:    i32 2, label [[TWO:%.*]]
+; CHECK-NEXT:    i32 3, label [[THREE:%.*]]
+; CHECK-NEXT:    i32 4, label [[THREE]]
+; CHECK-NEXT:    ]
+; CHECK:       def:
+; CHECK-NEXT:    [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ]
+; CHECK-NEXT:    ret i32 [[MERGE]]
+; CHECK:       one:
+; CHECK-NEXT:    br label [[DEF]]
+; CHECK:       two:
+; CHECK-NEXT:    br label [[DEF]]
+; CHECK:       three:
+; CHECK-NEXT:    br label [[DEF]]
+;
+  switch i32 %a, label %def [
+  i32 2, label %one
+  i32 4, label %two
+  i32 6, label %three
+  i32 8, label %three
+  ]
+
+def:
+  ret i32 8867
+
+one:
+  ret i32 11984
+two:
+  ret i32 1143
+three:
+  ret i32 99783
+}
+