Index: lib/Transforms/Utils/SimplifyCFG.cpp
===================================================================
--- lib/Transforms/Utils/SimplifyCFG.cpp
+++ lib/Transforms/Utils/SimplifyCFG.cpp
@@ -4968,6 +4968,112 @@
   return true;
 }
 
+static bool isSwitchDense(ArrayRef<int64_t> Values) {
+  // See also SelectionDAGBuilder::isDense(), which this function was based on.
+  uint64_t Diff;
+  if (Values.front() <= Values.back())
+    Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
+  else
+    Diff = (uint64_t)Values.front() - (uint64_t)Values.back();
+  uint64_t Range = Diff + 1;
+  uint64_t NumCases = Values.size();
+  // 40% is the default density for building a jump table in optsize/minsize mode.
+  uint64_t MinDensity = 40;
+  
+  return NumCases * 100 >= Range * MinDensity;
+}
+
+// Try and transform a switch that has "holes" in it to a contiguous sequence
+// of cases.
+//
+// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
+// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
+//
+// This converts a sparse switch into a dense switch which allows better
+// lowering and could also allow transforming into a lookup table.
+static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
+                              const DataLayout &DL,
+                              const TargetTransformInfo &TTI) {
+  if (SI->getCondition()->getType()->getIntegerBitWidth() > 64)
+    return false;
+  
+  // Only bother with this optimization if it can cause a good lowering of dense
+  // switches (for example a jump table) to be emitted.
+  // FIXME: Add a hook to TTI to query this, like TLI->getMinimumJumpTableEntries()?
+  if (SI->getNumCases() < 4)
+    return false;
+
+  // This transform is agnostic to the signedness of the input or case values. We
+  // can treat the case values as signed or unsigned. We can optimize more common
+  // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
+  // as signed.
+  SmallVector<int64_t,4> Values;
+  for (auto &C : SI->cases())
+    Values.push_back(C.getCaseValue()->getValue().getSExtValue());
+  std::sort(Values.begin(), Values.end());
+
+  // If the switch is already dense, there's nothing useful to do here.
+  if (isSwitchDense(Values))
+    return false;
+
+  // First, transform the values such that they start at zero and ascend.
+  uint64_t Base = Values[0];
+  for (auto &V : Values)
+    V -= Base;
+
+  // Now we have signed numbers that have been shifted so that, given enough
+  // precision, there are no negative values. Since the rest of the transform
+  // is bitwise only, we switch now to an unsigned representation.
+  uint64_t GCD = 0;
+  for (auto &V : Values)
+    GCD = llvm::GreatestCommonDivisor64(GCD, (uint64_t)V);
+  if (GCD <= 1 || !llvm::isPowerOf2_64(GCD))
+    // We refuse to output a costly urem/udiv pair.
+    return false;
+  unsigned Shift = llvm::Log2_64(GCD);
+  
+  for (auto &V : Values)
+    V = (int64_t)((uint64_t)V >> Shift);
+
+  if (!isSwitchDense(Values))
+    // Transform didn't create a dense switch.
+    return false;
+  
+  // We can range-reduce this switch (X) to switch (X - Base) / Divisor.
+  // This should be profitable if the resulting switch has no holes -
+  // this should result in a better lowering.
+  auto *Ty = SI->getCondition()->getType();
+  Builder.SetInsertPoint(SI);
+  auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
+  auto *Div = Builder.CreateLShr(Sub, ConstantInt::get(Ty, Shift));
+  auto *Rem = Builder.CreateAnd(Sub, ConstantInt::get(Ty, GCD-1));
+  auto *Cmp = Builder.CreateICmpEQ(Rem, ConstantInt::get(Ty, 0));
+  SI->replaceUsesOfWith(SI->getCondition(), Div);
+
+  BasicBlock *OldBB = SI->getParent();
+  SplitBlock(OldBB, SI);
+  OldBB->getTerminator()->eraseFromParent();
+  BranchInst::Create(SI->getParent(), SI->getDefaultDest(), Cmp, OldBB);
+
+  // Correct any PHIs at the destination.
+  for (auto &I : *SI->getDefaultDest()) {
+    auto *PN = dyn_cast<PHINode>(&I);
+    if (!PN)
+      break;
+    auto *V = PN->getIncomingValueForBlock(SI->getParent());
+    PN->addIncoming(V, OldBB);
+  }
+
+  for (auto &C : SI->cases()) {
+    auto *Orig = C.getCaseValue();
+    auto BW = Orig->getValue().getBitWidth();
+    auto Sub = Orig->getValue() - APInt(BW, Base);
+    auto *New = ConstantInt::get(Ty, Sub.lshr(APInt(BW, Shift)));
+    SI->replaceUsesOfWith(Orig, New);
+  }
+  return true;
+}
+
 bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   BasicBlock *BB = SI->getParent();
 
@@ -5011,6 +5117,9 @@
   if (SwitchToLookupTable(SI, Builder, DL, TTI))
     return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
 
+  if (ReduceSwitchRange(SI, Builder, DL, TTI))
+    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
   return false;
 }
 
Index: test/Transforms/SimplifyCFG/rangereduce.ll
===================================================================
--- /dev/null
+++ test/Transforms/SimplifyCFG/rangereduce.ll
@@ -0,0 +1,253 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; CHECK-LABEL: @test1
+; CHECK: %1 = sub i32 %a, 97
+; CHECK: %2 = lshr i32 %1, 2
+; CHECK: %3 = and i32 %1, 3
+; CHECK: %4 = icmp eq i32 %3, 0
+; CHECK: br i1 %4, label %.split, label %def
+; CHECK:.split:
+; CHECK:  switch i32 %2, label %def [
+; CHECK:    i32 0, label %one
+; CHECK:    i32 1, label %two
+; CHECK:    i32 2, label %three
+; CHECK:    i32 3, label %three
+; CHECK:  ]
+; CHECK: phi i32 [ 8867, %.split ], [ 11984, %one ], [ 1143, %two ], [ 99783, %three ], [ 8867, %0 ]
+define i32 @test1(i32 %a) {
+  switch i32 %a, label %def [
+    i32 97, label %one
+    i32 101, label %two
+    i32 105, label %three
+    i32 109, label %three
+  ]
+
+def:
+  ret i32 8867
+
+one:
+  ret i32 11984
+two:
+  ret i32 1143
+three:
+  ret i32 99783
+}
+
+; Optimization shouldn't trigger; bitwidth > 64
+; CHECK-LABEL: @test2
+; CHECK: switch i128 %a, label %def
+define i128 @test2(i128 %a) {
+  switch i128 %a, label %def [
+    i128 97, label %one
+    i128 101, label %two
+    i128 105, label %three
+    i128 109, label %three
+  ]
+
+def:
+  ret i128 8867
+
+one:
+  ret i128 11984
+two:
+  ret i128 1143
+three:
+  ret i128 99783
+}
+
+
+; Optimization shouldn't trigger; no holes present
+; CHECK-LABEL: @test3
+; CHECK: switch i32 %a, label %def
+define i32 @test3(i32 %a) {
+  switch i32 %a, label %def [
+    i32 97, label %one
+    i32 98, label %two
+    i32 99, label %three
+    i32 100, label %three
+  ]
+
+def:
+  ret i32 8867
+
+one:
+  ret i32 11984
+two:
+  ret i32 1143
+three:
+  ret i32 99783
+}
+
+; Optimization shouldn't trigger; not an arithmetic progression
+; CHECK-LABEL: @test4
+; CHECK: switch i32 %a, label %def
+define i32 @test4(i32 %a) {
+  switch i32 %a, label %def [
+    i32 97, label %one
+    i32 102, label %two
+    i32 105, label %three
+    i32 109, label %three
+  ]
+
+def:
+  ret i32 8867
+
+one:
+  ret i32 11984
+two:
+  ret i32 1143
+three:
+  ret i32 99783
+}
+
+; Optimization shouldn't trigger; not a power of two
+; CHECK-LABEL: @test5
+; CHECK: switch i32 %a, label %def
+define i32 @test5(i32 %a) {
+  switch i32 %a, label %def [
+    i32 97, label %one
+    i32 102, label %two
+    i32 107, label %three
+    i32 112, label %three
+  ]
+
+def:
+  ret i32 8867
+
+one:
+  ret i32 11984
+two:
+  ret i32 1143
+three:
+  ret i32 99783
+}
+
+; CHECK-LABEL: @test6
+; CHECK: %1 = sub i32 %a, -109
+; CHECK: %2 = lshr i32 %1, 2
+; CHECK: %3 = and i32 %1, 3
+; CHECK: %4 = icmp eq i32 %3, 0
+; CHECK: br i1 %4, label %.split, label %def
+; CHECK:.split:
+; CHECK:  switch i32 %2, label %def [
+; CHECK:    i32 3, label %one
+; CHECK:    i32 2, label %two
+; CHECK:    i32 1, label %three
+; CHECK:    i32 0, label %three
+; CHECK:  ]
+; CHECK: phi i32 [ 8867, %.split ], [ 11984, %one ], [ 1143, %two ], [ 99783, %three ], [ 8867, %0 ]
+define i32 @test6(i32 %a) {
+  switch i32 %a, label %def [
+    i32 -97, label %one
+    i32 -101, label %two
+    i32 -105, label %three
+    i32 -109, label %three
+  ]
+
+def:
+  ret i32 8867
+
+one:
+  ret i32 11984
+two:
+  ret i32 1143
+three:
+  ret i32 99783
+}
+
+; CHECK-LABEL: @test7
+; CHECK: %1 = sub i8 %a, -36
+; CHECK: %2 = lshr i8 %1, 2
+; CHECK: %3 = and i8 %1, 3
+; CHECK: %4 = icmp eq i8 %3, 0
+; CHECK: br i1 %4, label %.split, label %def
+; CHECK:.split:
+; CHECK:  switch i8 %2, label %def [
+; CHECK:    i8 0, label %one
+; CHECK:    i8 1, label %two
+; CHECK:    i8 2, label %three
+; CHECK:    i8 3, label %three
+; CHECK:  ]
+define i8 @test7(i8 %a) {
+  switch i8 %a, label %def [
+    i8 220, label %one
+    i8 224, label %two
+    i8 228, label %three
+    i8 232, label %three
+  ]
+
+def:
+  ret i8 8867
+
+one:
+  ret i8 11984
+two:
+  ret i8 1143
+three:
+  ret i8 99783
+}
+
+; CHECK-LABEL: @test8
+; CHECK: %1 = sub i8 %a, -3
+; CHECK: %2 = lshr i8 %1, 2
+; CHECK: %3 = and i8 %1, 3
+; CHECK: %4 = icmp eq i8 %3, 0
+; CHECK: br i1 %4, label %.split, label %def
+; CHECK:.split:
+; CHECK:  switch i8 %2, label %def [
+; CHECK:    i8 0, label %one
+; CHECK:    i8 1, label %two
+; CHECK:    i8 2, label %three
+; CHECK:    i8 3, label %three
+; CHECK:  ]
+define i8 @test8(i8 %a) {
+  switch i8 %a, label %def [
+    i8 -3, label %one
+    i8 1, label %two
+    i8 5, label %three
+    i8 9, label %three
+  ]
+
+def:
+  ret i8 8867
+
+one:
+  ret i8 11984
+two:
+  ret i8 1143
+three:
+  ret i8 99783
+}
+
+; CHECK-LABEL: @test9
+; CHECK: %1 = sub i32 %a, 97
+; CHECK: %2 = lshr i32 %1, 2
+; CHECK: %3 = and i32 %1, 3
+; CHECK: %4 = icmp eq i32 %3, 0
+; CHECK: br i1 %4, label %.split, label %def
+; CHECK:.split:
+; CHECK:  switch i32 %2, label %def [
+; CHECK:    i32 0, label %one
+; CHECK:    i32 1, label %two
+; CHECK:    i32 2, label %three
+; CHECK:    i32 4, label %three
+; CHECK:  ]
+; CHECK: phi i32 [ 8867, %.split ], [ 11984, %one ], [ 1143, %two ], [ 99783, %three ], [ 8867, %0 ]
+define i32 @test9(i32 %a) {
+  switch i32 %a, label %def [
+    i32 97, label %one
+    i32 101, label %two
+    i32 105, label %three
+    i32 113, label %three
+  ]
+
+def:
+  ret i32 8867
+
+one:
+  ret i32 11984
+two:
+  ret i32 1143
+three:
+  ret i32 99783
+}