diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -2013,6 +2013,7 @@
   SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
 
   void OptimizeShadowIV();
+  void optimizeDiv();
   bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
   ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
   void OptimizeLoopTermCond();
@@ -2243,6 +2244,140 @@
   }
 }
 
+/// Transform divide instruction to add instruction.
+/// for (int k = 0; k < m; ++k) {
+///   int div = k / KW;
+/// }
+/// -->
+/// for (int k = 0, cnt = 0, div = 0; k < m; ++k, ++i) {
+///   if (i == KW) {
+///     ++div;
+///     cnt = 0;
+///   }
+/// }
+void LSRInstance::optimizeDiv() {
+  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+    return;
+
+  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E;
+       /* empty */) {
+    IVUsers::const_iterator CandidateUI = UI;
+    ++UI;
+    Instruction *Div = CandidateUI->getUser();
+    bool IsSigned = false;
+    Type *Ty = Div->getType();
+
+    if (Div->getOpcode() == Instruction::UDiv)
+      IsSigned = false;
+    else if (Div->getOpcode() == Instruction::SDiv)
+      IsSigned = true;
+    else
+      continue;
+
+    PHINode *PH = dyn_cast<PHINode>(Div->getOperand(0));
+    if (!PH)
+      continue;
+    if (PH->getNumIncomingValues() != 2)
+      continue;
+
+    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PH));
+    if (!AR)
+      continue;
+    // TODO, support negative and non-one positive step
+    if (!AR->getStepRecurrence(SE)->isOne())
+      continue;
+    // Drop the transform if there is overflow.
+    if (IsSigned && !AR->hasNoSignedWrap())
+      continue;
+    if (!IsSigned && !AR->hasNoUnsignedWrap())
+      continue;
+
+    if (TTI.hasDivRemOp(Ty, IsSigned))
+      continue;
+
+    const SCEV *Divisor = SE.getSCEV(Div->getOperand(1));
+    if (!SE.isLoopInvariant(Divisor, L))
+      continue;
+
+    unsigned Entry, Latch;
+    if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
+      Entry = 0;
+      Latch = 1;
+    } else {
+      Entry = 1;
+      Latch = 0;
+    }
+
+    ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
+    if (!Init)
+      continue;
+    // TODO: suppport any constant initial value.
+    if (!Init->isZero() && !Init->isOne())
+      continue;
+
+    // 7:
+    // %8 = phi i32 [ %12, %7 ], [ 0, %3 ]
+    // %div = sdiv i32 %8, %1
+    // latch
+    // %12 = add nuw nsw i32 %8, 1
+    // %13 = icmp eq i32 %12, %4
+    // br i1 %13, label %6, label %7
+    // -->
+    // 6:
+    // %divph = phi i32 [ %div, %6 ], [ 0, %3 ]
+    // %cntph = phi i32 [ %cnt, %6 ], [ 0, %3 ]
+    // %k = phi i32 [ %15, %6 ], [ 0, %3 ]
+    // %10 = icmp eq i32 %cntph, %1
+    // %11 = zext i1 %10 to i32                   # step 1
+    // %div = add nuw nsw i32 %divph, %11         # div++
+    // %15 = add nuw nsw i32 %k, 1
+    // %16 = add nsw i32 %cntph, 1
+    // %cnt = select i1 %10, i32 1, i32 %16
+    // latch
+    // %18 = icmp eq i32 %15, %0
+    // br i1 %18, label %5, label %6
+
+    // %7 = phi i32 [ %div, %6 ], [ 0, %3 ]
+    PHINode *DivPH = PHINode::Create(Ty, 2, "div", PH);
+    DivPH->addIncoming(Init, PH->getIncomingBlock(Entry));
+
+    // %cntph = phi i32 [ %cnt, %6 ], [ 0, %3 ]
+    PHINode *CountPH = PHINode::Create(Ty, 2, "cnt", PH);
+    CountPH->addIncoming(Init, PH->getIncomingBlock(Entry));
+
+    // %10 = icmp eq i32 %cnt, %1
+    ICmpInst *Cond = new ICmpInst(Div, ICmpInst::ICMP_EQ, CountPH,
+                                  Div->getOperand(1), "icmp");
+    // %11 = zext i1 %10 to i32
+    Instruction *ZExt =
+        CastInst::Create(Instruction::ZExt, Cond, Ty, "zext", Div);
+    // %div = add nuw nsw i32 %divph, %11
+    Instruction *DivRes =
+        BinaryOperator::Create(Instruction::Add, DivPH, ZExt, "add1", Div);
+    DivPH->addIncoming(DivRes, PH->getIncomingBlock(Latch));
+    // %15 = add nuw nsw i32 %k, 1
+    Instruction *CntAdd = BinaryOperator::Create(
+        Instruction::Add, CountPH, ConstantInt::get(Ty, 1), "add2", Div);
+    if (IsSigned) {
+      DivRes->setHasNoSignedWrap();
+      CntAdd->setHasNoSignedWrap();
+    } else {
+      DivRes->setHasNoUnsignedWrap();
+      CntAdd->setHasNoUnsignedWrap();
+    }
+    // %cnt = select i1 %10, i32 1, i32 %16
+    SelectInst *Sel =
+        SelectInst::Create(Cond, ConstantInt::get(Ty, 1), CntAdd, "sel", Div);
+    CountPH->addIncoming(Sel, PH->getIncomingBlock(Latch));
+
+    Div->replaceAllUsesWith(DivRes);
+    Div->eraseFromParent();
+
+    Changed = true;
+  }
+}
+
 /// If Cond has an operand that is an expression of an IV, set the IV user and
 /// stride information and return true, otherwise return false.
 bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
@@ -5911,6 +6046,7 @@
 
   // First, perform some low-level loop optimizations.
   OptimizeShadowIV();
+  optimizeDiv();
   OptimizeLoopTermCond();
 
   // If loop preparation eliminates all interesting IV users, bail.
diff --git a/llvm/test/Transforms/LoopStrengthReduce/div.ll b/llvm/test/Transforms/LoopStrengthReduce/div.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/div.ll
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+; Provide legal integer types.
+target datalayout = "n8:16:32:64"
+
+define void @sdiv(i32 %0, i32 %1, ptr %2) {
+; CHECK-LABEL: define void @sdiv
+; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], ptr [[TMP2:%.*]]) {
+; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw i32 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[DOTPREHEADER:%.*]], label [[TMP6:%.*]]
+; CHECK:       .preheader:
+; CHECK-NEXT:    br label [[TMP7:%.*]]
+; CHECK:       .loopexit:
+; CHECK-NEXT:    br label [[TMP6]]
+; CHECK:       6:
+; CHECK-NEXT:    ret void
+; CHECK:       7:
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i32 [ [[TMP4]], [[DOTPREHEADER]] ], [ [[LSR_IV_NEXT:%.*]], [[TMP7]] ]
+; CHECK-NEXT:    [[DIV:%.*]] = phi i32 [ 0, [[DOTPREHEADER]] ], [ [[ADD1:%.*]], [[TMP7]] ]
+; CHECK-NEXT:    [[CNT:%.*]] = phi i32 [ 0, [[DOTPREHEADER]] ], [ [[SEL:%.*]], [[TMP7]] ]
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp eq i32 [[CNT]], [[TMP1]]
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[ICMP]] to i32
+; CHECK-NEXT:    [[ADD1]] = add nsw i32 [[DIV]], [[ZEXT]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add nsw i32 [[CNT]], 1
+; CHECK-NEXT:    [[SEL]] = select i1 [[ICMP]], i32 1, i32 [[ADD2]]
+; CHECK-NEXT:    [[TMP8:%.*]] = sext i32 [[ADD1]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP8]]
+; CHECK-NEXT:    store i32 0, ptr [[TMP9]], align 4
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], -1
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 0
+; CHECK-NEXT:    br i1 [[TMP10]], label [[DOTLOOPEXIT:%.*]], label [[TMP7]]
+;
+  %4 = mul nsw i32 %1, %0
+  %5 = icmp sgt i32 %4, 0
+  br i1 %5, label %7, label %6
+
+6:                                                ; preds = %7, %3
+  ret void
+
+7:                                                ; preds = %7, %3
+  %8 = phi i32 [ %12, %7 ], [ 0, %3 ]
+  %9 = sdiv i32 %8, %1
+  %10 = sext i32 %9 to i64
+  %11 = getelementptr inbounds i32, ptr %2, i64 %10
+  store i32 0, ptr %11, align 4
+  %12 = add nuw nsw i32 %8, 1
+  %13 = icmp eq i32 %12, %4
+  br i1 %13, label %6, label %7
+}
+
+define void @udiv(i32 %0, i32 %1, ptr %2) {
+; CHECK-LABEL: define void @udiv
+; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], ptr [[TMP2:%.*]]) {
+; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw i32 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[DOTPREHEADER:%.*]], label [[TMP6:%.*]]
+; CHECK:       .preheader:
+; CHECK-NEXT:    br label [[TMP7:%.*]]
+; CHECK:       .loopexit:
+; CHECK-NEXT:    br label [[TMP6]]
+; CHECK:       6:
+; CHECK-NEXT:    ret void
+; CHECK:       7:
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i32 [ [[TMP4]], [[DOTPREHEADER]] ], [ [[LSR_IV_NEXT:%.*]], [[TMP7]] ]
+; CHECK-NEXT:    [[DIV:%.*]] = phi i32 [ 0, [[DOTPREHEADER]] ], [ [[ADD1:%.*]], [[TMP7]] ]
+; CHECK-NEXT:    [[CNT:%.*]] = phi i32 [ 0, [[DOTPREHEADER]] ], [ [[SEL:%.*]], [[TMP7]] ]
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp eq i32 [[CNT]], [[TMP1]]
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i1 [[ICMP]] to i32
+; CHECK-NEXT:    [[ADD1]] = add nuw i32 [[DIV]], [[ZEXT]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add nuw i32 [[CNT]], 1
+; CHECK-NEXT:    [[SEL]] = select i1 [[ICMP]], i32 1, i32 [[ADD2]]
+; CHECK-NEXT:    [[TMP8:%.*]] = sext i32 [[ADD1]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP8]]
+; CHECK-NEXT:    store i32 0, ptr [[TMP9]], align 4
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], -1
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 0
+; CHECK-NEXT:    br i1 [[TMP10]], label [[DOTLOOPEXIT:%.*]], label [[TMP7]]
+;
+  %4 = mul nsw i32 %1, %0
+  %5 = icmp sgt i32 %4, 0
+  br i1 %5, label %7, label %6
+
+6:                                                ; preds = %7, %3
+  ret void
+
+7:                                                ; preds = %7, %3
+  %8 = phi i32 [ %12, %7 ], [ 0, %3 ]
+  %9 = udiv i32 %8, %1
+  %10 = sext i32 %9 to i64
+  %11 = getelementptr inbounds i32, ptr %2, i64 %10
+  store i32 0, ptr %11, align 4
+  %12 = add nuw nsw i32 %8, 1
+  %13 = icmp eq i32 %12, %4
+  br i1 %13, label %6, label %7
+}