Index: lib/Transforms/Utils/SimplifyCFG.cpp
===================================================================
--- lib/Transforms/Utils/SimplifyCFG.cpp
+++ lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5490,17 +5490,6 @@
   return true;
 }
 
-static bool isSwitchDense(ArrayRef<int64_t> Values) {
-  // See also SelectionDAGBuilder::isDense(), which this function was based on.
-  uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
-  uint64_t Range = Diff + 1;
-  uint64_t NumCases = Values.size();
-  // 40% is the default density for building a jump table in optsize/minsize mode.
-  uint64_t MinDensity = 40;
-
-  return NumCases * 100 >= Range * MinDensity;
-}
-
 /// Try to transform a switch that has "holes" in it to a contiguous sequence
 /// of cases.
 ///
@@ -5512,6 +5501,7 @@
 static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
                               const DataLayout &DL,
                               const TargetTransformInfo &TTI) {
+  bool MadeChanges = false;
   auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
   if (CondTy->getIntegerBitWidth() > 64 ||
       !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
@@ -5521,49 +5511,53 @@
   if (SI->getNumCases() < 4)
     return false;
 
-  // This transform is agnostic to the signedness of the input or case values. We
-  // can treat the case values as signed or unsigned. We can optimize more common
-  // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
-  // as signed.
-  SmallVector<int64_t,4> Values;
+  SmallVector<uint64_t,4> Values;
   for (auto &C : SI->cases())
-    Values.push_back(C.getCaseValue()->getValue().getSExtValue());
+    Values.push_back(C.getCaseValue()->getLimitedValue());
   llvm::sort(Values);
 
-  // If the switch is already dense, there's nothing useful to do here.
-  if (isSwitchDense(Values))
-    return false;
-
-  // First, transform the values such that they start at zero and ascend.
-  int64_t Base = Values[0];
-  for (auto &V : Values)
-    V -= (uint64_t)(Base);
+  // Find the element that has the most distance from it's previous, wrapping around.
+  uint64_t BestDistance = APInt::getMaxValue(CondTy->getIntegerBitWidth()).getLimitedValue() -
+    Values.back() + Values.front() + 1;
+  unsigned BestIndex = 0;
+  for (unsigned i = 1;i != Values.size();i++) {
+    if (Values[i] - Values[i-1] > BestDistance) {
+      BestIndex = i;
+      BestDistance = Values[i] - Values[i-1];
+    }
+  }
 
-  // Now we have signed numbers that have been shifted so that, given enough
-  // precision, there are no negative values. Since the rest of the transform
-  // is bitwise only, we switch now to an unsigned representation.
-  uint64_t GCD = 0;
-  for (auto &V : Values)
-    GCD = GreatestCommonDivisor64(GCD, (uint64_t)V);
+  uint64_t Base = Values[BestIndex];
+  // Now transform the values such that they start at zero and ascend.
+  if ((BestDistance > (SI->getFunction()->hasOptSize() ? 2 : 8)) &&
+    (BestIndex != 0 || Values[0] != 0)) {
+    MadeChanges = true;
+    for (auto &V : Values)
+      V -= Base;
+  }
 
   // This transform can be done speculatively because it is so cheap - it results
-  // in a single rotate operation being inserted. This can only happen if the
-  // factor extracted is a power of 2.
-  // FIXME: If the GCD is an odd number we can multiply by the multiplicative
-  // inverse of GCD and then perform this transform.
+  // in a single rotate operation being inserted.
   // FIXME: It's possible that optimizing a switch on powers of two might also
   // be beneficial - flag values are often powers of two and we could use a CLZ
   // as the key function.
-  if (GCD <= 1 || !isPowerOf2_64(GCD))
-    // No common divisor found or too expensive to compute key function.
-    return false;
-
-  unsigned Shift = Log2_64(GCD);
-  for (auto &V : Values)
-    V = (int64_t)((uint64_t)V >> Shift);
+  unsigned Shift = 64;
+  for (auto &V : Values) {
+    unsigned TZ = APInt(64, V ^ Values[0]).countTrailingZeros();
+    if (TZ < Shift) {
+      Shift = TZ;
+      if (Shift == 0)
+        break;
+    }
+  }
+  if (Shift) {
+    MadeChanges = true;
+    for (auto &V : Values)
+      V = V >> Shift;
+  }
 
-  if (!isSwitchDense(Values))
-    // Transform didn't create a dense switch.
+  if (!MadeChanges)
+    // Didn't do anything
     return false;
 
   // The obvious transform is to shift the switch condition right and emit a
@@ -5577,19 +5571,19 @@
   // default case.
 
   auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
+  Function *Fshr = Intrinsic::getDeclaration(SI->getModule(), Intrinsic::fshr, Ty);
   Builder.SetInsertPoint(SI);
   auto *ShiftC = ConstantInt::get(Ty, Shift);
-  auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
-  auto *LShr = Builder.CreateLShr(Sub, ShiftC);
-  auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift);
-  auto *Rot = Builder.CreateOr(LShr, Shl);
-  SI->replaceUsesOfWith(SI->getCondition(), Rot);
+  uint64_t CommonRightBits = (Shift == 0) ? 0 : (Values[0] << (64 - Shift)) >> (64 - Shift);
+  auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base + CommonRightBits));
+  auto *Rotr = Builder.Insert(CallInst::Create(Fshr, {Sub, Sub, ShiftC}));
+  SI->replaceUsesOfWith(SI->getCondition(), Rotr);
 
   for (auto Case : SI->cases()) {
     auto *Orig = Case.getCaseValue();
     auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
     Case.setValue(
-        cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))));
+        cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
   }
   return true;
 }
@@ -5630,6 +5624,9 @@
   if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
     return requestResimplify();
 
+  if (ReduceSwitchRange(SI, Builder, DL, TTI))
+    return requestResimplify();
+
   // The conversion from switch to lookup tables results in difficult-to-analyze
   // code and makes pruning branches much harder. This is a problem if the
   // switch expression itself can still be restricted as a result of inlining or
@@ -5639,9 +5636,6 @@
       SwitchToLookupTable(SI, Builder, DL, TTI))
     return requestResimplify();
 
-  if (ReduceSwitchRange(SI, Builder, DL, TTI))
-    return requestResimplify();
-
   return false;
 }
 
Index: test/Transforms/SimplifyCFG/switch-simplify-range.ll
===================================================================
--- /dev/null
+++ test/Transforms/SimplifyCFG/switch-simplify-range.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i64 @switch_common_right_bits(i8 %a) #0  {
+; CHECK-LABEL: @switch_common_right_bits(
+; CHECK-NEXT:  Entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i8 [[A:%.*]], -3
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.fshr.i8(i8 [[TMP0]], i8 [[TMP0]], i8 1)
+; CHECK-NEXT:    switch i8 [[TMP1]], label [[SWITCHELSE:%.*]] [
+; CHECK-NEXT:    i8 0, label [[SWITCHPRONG:%.*]]
+; CHECK-NEXT:    i8 1, label [[SWITCHPRONG1:%.*]]
+; CHECK-NEXT:    i8 2, label [[SWITCHPRONG2:%.*]]
+; CHECK-NEXT:    i8 3, label [[SWITCHPRONG3:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       SwitchElse:
+; CHECK-NEXT:    [[MERGE:%.*]] = phi i64 [ 10, [[ENTRY:%.*]] ], [ 6, [[SWITCHPRONG]] ], [ 3, [[SWITCHPRONG1]] ], [ 3, [[SWITCHPRONG2]] ], [ 3, [[SWITCHPRONG3]] ]
+; CHECK-NEXT:    ret i64 [[MERGE]]
+; CHECK:       SwitchProng:
+; CHECK-NEXT:    br label [[SWITCHELSE]]
+; CHECK:       SwitchProng1:
+; CHECK-NEXT:    br label [[SWITCHELSE]]
+; CHECK:       SwitchProng2:
+; CHECK-NEXT:    br label [[SWITCHELSE]]
+; CHECK:       SwitchProng3:
+; CHECK-NEXT:    br label [[SWITCHELSE]]
+;
+Entry:
+  switch i8 %a, label %SwitchElse [
+  i8 253, label %SwitchProng
+  i8 255, label %SwitchProng1
+  i8 1, label %SwitchProng2
+  i8 3, label %SwitchProng3
+  ]
+SwitchElse:                                       ; preds = %Entry
+  ret i64 10
+SwitchProng:                                      ; preds = %Entry
+  ret i64 6
+SwitchProng1:                                     ; preds = %Entry
+  ret i64 3
+SwitchProng2:                                     ; preds = %Entry
+  ret i64 3
+SwitchProng3:                                     ; preds = %Entry
+  ret i64 3
+}
+