Index: llvm/lib/Transforms/Scalar/LoopFlatten.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -166,6 +166,20 @@
                    m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) {
     Increment = dyn_cast<BinaryOperator>(Compare->getOperand(1));
     Limit = Compare->getOperand(0);
+  } else {
+    // The compare may have been altered by another transformation
+    // (e.g icmp ult %inc, limit -> icmp ult %j, limit-1).
+    // In this case the increment is obtained from the InductionPHI
+    // and the limit is the RHS of the compare + 1.
+    Value *LatchValue = InductionPHI->getIncomingValueForBlock(Latch);
+    if (match(LatchValue,
+              m_c_Add(m_Specific(InductionPHI), m_ConstantInt<1>()))) {
+      Increment = dyn_cast<BinaryOperator>(LatchValue);
+      ConstantInt *RHS = cast<ConstantInt>(Compare->getOperand(1));
+      ConstantInt *One = ConstantInt::get(RHS->getType(), 1, true);
+      Limit = ConstantInt::get(Compare->getContext(),
+                               RHS->getValue() + One->getValue());
+    }
   }
   if (!Increment || Increment->hasNUsesOrMore(3)) {
     LLVM_DEBUG(dbgs() << "Cound not find valid increment\n");
@@ -383,7 +397,13 @@
       LLVM_DEBUG(dbgs() << "Use is optimisable\n");
       ValidOuterPHIUses.insert(MatchedMul);
       FI.LinearIVUses.insert(U);
-    } else {
+    } else if (U == FI.InnerBranch->getCondition())
+      // The use is in the compare which is also the condition of the inner
+      // branch. In this case the compare has been altered by another
+      // transformation (e.g icmp ult %inc, limit -> icmp ult %j, limit-1).
+      // Ignore this use as the compare gets removed later anyway.
+      continue;
+    else {
       LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
       return false;
     }
Index: llvm/test/Transforms/LoopFlatten/loop-flatten-negative.ll
===================================================================
--- llvm/test/Transforms/LoopFlatten/loop-flatten-negative.ll
+++ llvm/test/Transforms/LoopFlatten/loop-flatten-negative.ll
@@ -341,6 +341,70 @@
   ret i32 10
 }
 
+; test_10 and test_11 are for the case when the inner limit is a
+; defined integer (e.g. 20), then InstCombine makes the transformation:
+; icmp ult i32 %inc, 20 -> icmp ult i32 %j, 20-step.
+
+; test_10: If the step is not 1, the loop shouldn't be flattened.
+define i32 @test_10(i32* nocapture %A) {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %i.017 = phi i32 [ 0, %entry ], [ %inc, %for.cond.cleanup3 ]
+  %mul = mul i32 %i.017, 20
+  br label %for.body4
+
+for.body4:
+  %j.016 = phi i32 [ 0, %for.cond1.preheader ], [ %add5, %for.body4 ]
+  %add = add i32 %j.016, %mul
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add
+  store i32 30, i32* %arrayidx, align 4
+  %add5 = add nuw nsw i32 %j.016, 2
+  %cmp2 = icmp ult i32 %j.016, 18
+  br i1 %cmp2, label %for.body4, label %for.cond.cleanup3
+
+for.cond.cleanup3:
+  %inc = add i32 %i.017, 1
+  %cmp = icmp ult i32 %inc, 11
+  br i1 %cmp, label %for.cond1.preheader, label %for.cond.cleanup
+
+for.cond.cleanup:
+  %0 = load i32, i32* %A, align 4
+  ret i32 %0
+}
+
+; test_11: The inner inducation variable is used in a compare which
+; isn't the condition of the inner branch.
+define i32 @test_11(i32* nocapture %A) {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %i.020 = phi i32 [ 0, %entry ], [ %inc7, %for.cond.cleanup3 ]
+  %mul = mul i32 %i.020, 20
+  br label %for.body4
+
+for.body4:
+  %j.019 = phi i32 [ 0, %for.cond1.preheader ], [ %inc, %for.body4 ]
+  %cmp5 = icmp ult i32 %j.019, 5
+  %cond = select i1 %cmp5, i32 30, i32 15
+  %add = add i32 %j.019, %mul
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add
+  store i32 %cond, i32* %arrayidx, align 4
+  %inc = add nuw nsw i32 %j.019, 1
+  %cmp2 = icmp ult i32 %j.019, 19
+  br i1 %cmp2, label %for.body4, label %for.cond.cleanup3
+
+for.cond.cleanup3:
+  %inc7 = add i32 %i.020, 1
+  %cmp = icmp ult i32 %inc7, 11
+  br i1 %cmp, label %for.cond1.preheader, label %for.cond.cleanup
+
+for.cond.cleanup:
+  %0 = load i32, i32* %A, align 4
+  ret i32 %0
+}
 
 ; Outer loop conditional phi
 define i32 @e() {
Index: llvm/test/Transforms/LoopFlatten/loop-flatten.ll
===================================================================
--- llvm/test/Transforms/LoopFlatten/loop-flatten.ll
+++ llvm/test/Transforms/LoopFlatten/loop-flatten.ll
@@ -586,6 +586,59 @@
   ret i32 10
 }
 
+; CHECK-LABEL: test9
+; When the inner loop limit is a defined integer (e.g. 20) and the step
+; is 1, InstCombine causes the transformation:
+; icmp ult i32 %inc, 20 -> icmp ult i32 %j, 19.
+; This is an 'unoptimizable' use of the inner induction variable %j but
+; we should still flatten the loop as this compare instruction is
+; removed later anyway.
+define i32 @test9(i32* nocapture %A) {
+entry:
+  br label %for.cond1.preheader
+; CHECK: entry:
+; CHECK: %flatten.tripcount = mul i32 20, 11
+; CHECK: br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %i.017 = phi i32 [ 0, %entry ], [ %inc6, %for.cond.cleanup3 ]
+  %mul = mul i32 %i.017, 20
+  br label %for.body4
+; CHECK: for.cond1.preheader:
+; CHECK:   %i.017 = phi i32 [ 0, %entry ], [ %inc6, %for.cond.cleanup3 ]
+; CHECK:   %mul = mul i32 %i.017, 20
+; CHECK:   br label %for.body4
+
+for.cond.cleanup3:
+  %inc6 = add i32 %i.017, 1
+  %cmp = icmp ult i32 %inc6, 11
+  br i1 %cmp, label %for.cond1.preheader, label %for.cond.cleanup
+; CHECK: for.cond.cleanup3:
+; CHECK:   %inc6 = add i32 %i.017, 1
+; CHECK:   %cmp = icmp ult i32 %inc6, %flatten.tripcount
+; CHECK:   br i1 %cmp, label %for.cond1.preheader, label %for.cond.cleanup
+
+for.body4:
+  %j.016 = phi i32 [ 0, %for.cond1.preheader ], [ %inc, %for.body4 ]
+  %add = add i32 %j.016, %mul
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add
+  store i32 30, i32* %arrayidx, align 4
+  %inc = add nuw nsw i32 %j.016, 1
+  %cmp2 = icmp ult i32 %j.016, 19
+  br i1 %cmp2, label %for.body4, label %for.cond.cleanup3
+; CHECK: for.body4
+; CHECK:   %j.016 = phi i32 [ 0, %for.cond1.preheader ]
+; CHECK:   %add = add i32 %j.016, %mul
+; CHECK:   %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.017
+; CHECK:   store i32 30, i32* %arrayidx, align 4
+; CHECK:   %inc = add nuw nsw i32 %j.016, 1
+; CHECK:   %cmp2 = icmp ult i32 %j.016, 19
+; CHECK:   br label %for.cond.cleanup3
+
+for.cond.cleanup:
+  %0 = load i32, i32* %A, align 4
+  ret i32 %0
+}
 
 declare i32 @func(i32)