diff --git a/mlir/include/mlir/Dialect/SCF/Transforms.h b/mlir/include/mlir/Dialect/SCF/Transforms.h
--- a/mlir/include/mlir/Dialect/SCF/Transforms.h
+++ b/mlir/include/mlir/Dialect/SCF/Transforms.h
@@ -17,6 +17,7 @@
 
 namespace mlir {
 
+class AffineMinOp;
 class ConversionTarget;
 class LogicalResult;
 class MLIRContext;
@@ -26,6 +27,7 @@
 class RewritePatternSet;
 using OwningRewritePatternList = RewritePatternSet;
 class Operation;
+class Value;
 
 namespace scf {
 
@@ -66,10 +68,35 @@
 /// ```
 ///
 /// This function rewrites the given scf.for loop in-place and creates a new
-/// scf.if operation (returned via `ifOp`) for the last iteration.
+/// scf.if operation for the last iteration, returned via `ifOp`. The boundary
+/// at which the loop is split (new upper bound) is returned via `splitBound`.
+LogicalResult peelForLoop(RewriterBase &b, ForOp forOp, scf::IfOp &ifOp,
+                          Value &splitBound);
+
+/// Try to simplify an affine.min operation after loop peeling. This function
+/// detects affine.min operations such as (ub is the previous upper bound of the
+/// unpeeled loop):
+/// ```
+/// #map = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>
+/// %r = affine.min #affine.min #map(%iv)[%step, %ub]
+/// ```
+/// and rewrites them into (in the case the peeled loop):
+/// ```
+/// %r = %step
+/// ```
+/// affine.min operations inside the generated scf.if operation are rewritten in
+/// a similar way.
+///
+/// This function builds up a set of constraints, capable of proving that:
+/// * min(step, ub - iv) == step inside the peeled loop
+/// * min(step, ub - iv) == ub - iv inside the if-statement
 ///
-/// TODO: Simplify affine.min ops inside the new loop/if statement.
-LogicalResult peelForLoop(RewriterBase &b, ForOp forOp, scf::IfOp &ifOp);
+/// Note: `ub` is the previous upper bound of the loop (before peeling).
+/// `insideLoop` must be true for affine.min ops inside the loop and false for
+/// affine.min ops inside the scf.for op.
+LogicalResult rewritePeeledAffineOp(RewriterBase &rewriter, AffineMinOp minOp,
+                                    Value iv, Value ub, Value step,
+                                    bool insideLoop);
 
 /// Tile a parallel loop of the form
 ///   scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
--- a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
+#include "mlir/Analysis/AffineStructures.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/SCF/Passes.h"
 #include "mlir/Dialect/SCF/SCF.h"
@@ -98,7 +99,7 @@
 /// into a for loop where the step divides the iteration space evenly, followed
 /// by an scf.if for the last (partial) iteration (if any).
 LogicalResult mlir::scf::peelForLoop(RewriterBase &b, ForOp forOp,
-                                     scf::IfOp &ifOp) {
+                                     scf::IfOp &ifOp, Value &splitBound) {
   RewriterBase::InsertionGuard guard(b);
   auto lbInt = getConstantIntValue(forOp.lowerBound());
   auto ubInt = getConstantIntValue(forOp.upperBound());
@@ -116,7 +117,7 @@
   bindDims(b.getContext(), dim0, dim1, dim2);
   // New upper bound: %ub - (%ub - %lb) mod %step
   auto modMap = AffineMap::get(3, 0, {dim1 - ((dim1 - dim0) % dim2)});
-  Value splitBound = b.createOrFold<AffineApplyOp>(
+  splitBound = b.createOrFold<AffineApplyOp>(
       loc, modMap,
       ValueRange{forOp.lowerBound(), forOp.upperBound(), forOp.step()});
 
@@ -153,6 +154,136 @@
   return success();
 }
 
+/// Try to simplify an affine.min operation `minOp` after loop peeling.
+///
+/// 1. Set up a constraint system with dimensions for `iv`, `ub` (previous
+///    upper bound), `step`, `minOp` and `minOpUb` (upper bound of `minOp`).
+/// 2. a. If inside the main loop: Add inequality: ub - iv >= step
+/// 2. b. If inside the if-statement: Add inequality: ub - iv < step
+/// 3. Add each result of `minOp` as a dimension `r_i` to the constraint system.
+/// 4. Compute an upper bound of `minOp` and bind it to `minOpUb`.
+/// 5. For each result of `minOp`: Prove that r_i >= minOpUb. If this is the
+///    case, upper_bound(minOp) == lower_bound(minOp) and `minOp` can be
+///    replaced with the that bound.
+LogicalResult mlir::scf::rewritePeeledAffineOp(RewriterBase &rewriter,
+                                               AffineMinOp minOp, Value iv,
+                                               Value ub, Value step,
+                                               bool insideLoop) {
+  RewriterBase::InsertionGuard guard(rewriter);
+  AffineMap minOpMap = minOp.getAffineMap();
+
+  // Set up constraint system.
+  FlatAffineConstraints constraints;
+  static const unsigned kDimIv = 0;      // Iteration variable
+  static const unsigned kDimUb = 1;      // Upper loop bound before peeling
+  static const unsigned kDimStep = 2;    // Loop step size
+  static const unsigned kDimMinOp = 3;   // `minOp`
+  static const unsigned kDimMinOpUb = 4; // Upper bound of `minOp`
+  // Add an SSA value as a dimension to the constraint system. If the SSA value
+  // is a constant, a new equality is added, setting the dimension to the
+  // constant value.
+  auto addDim = [&](unsigned dimId, Value value = nullptr) {
+    constraints.addDimId(dimId, value);
+    if (value)
+      // Note: Fails if no constant value can be derived from `value`.
+      (void)constraints.addLowerOrUpperBound(
+          dimId, rewriter.getDimIdentityMap(), ValueRange{value},
+          /*eq=*/true, /*lower=*/true, /*composeMapAndOperands=*/false);
+  };
+
+  addDim(kDimIv, iv);
+  addDim(kDimUb, ub);
+  addDim(kDimStep, step);
+  addDim(kDimMinOp);
+  addDim(kDimMinOpUb);
+
+  // Add loop peeling invariant. This is the main piece of knowledge that
+  // enables AffineMinOp simplification.
+  if (insideLoop) {
+    // ub - iv >= step (equiv.: -iv + ub - step + 0*minOp + 0*minOpUb + 0 >= 0)
+    // Intuitively: Inside the peeled loop, every iteration is "full" iteration,
+    // i.e., step divides the iteration space `ub - lb` evenly.
+    constraints.addInequality({-1, 1, -1, 0, 0, 0});
+  } else {
+    // ub - iv < step (equiv.: iv + -ub + step + 0*minOp + 0*minOpUb - 1 >= 0)
+    // Intuitively: `iv` is the split bound here, i.e., the iteration variable
+    // value of the very last iteration (in the unpeeled loop). At that point,
+    // there are less than `step` elements remaining. (Otherwise, the peeled
+    // loop would run for at least one more iteration.)
+    constraints.addInequality({1, -1, 1, 0, 0, -1});
+  }
+
+  // Add an inequality for each result expr_i of minOpMap: minOp <= expr_i
+  if (failed(constraints.addLowerOrUpperBound(
+          kDimMinOp, minOpMap, minOp.operands(), /*eq=*/false, /*lower=*/false,
+          /*composeMapAndOperands=*/false)))
+    return failure();
+
+  // Add a helper dimension r_i for each result expr_i of minOpMap.
+  unsigned resultStartDim = constraints.getNumDimIds();
+  unsigned numResults = minOpMap.getNumResults();
+  for (unsigned i = 0; i < minOpMap.getNumResults(); ++i) {
+    constraints.addDimId(resultStartDim + i);
+    // Add an equality: r_i = expr_i
+    if (failed(constraints.addLowerOrUpperBound(
+            resultStartDim + i, minOpMap.getSubMap({i}), minOp.operands(),
+            /*eq=*/true, /*lower=*/true, /*composeMapAndOperands=*/false)))
+      return failure();
+  }
+
+  // Try to compute an upper bound for minOp, expressed in terms of the other
+  // dimensions iv, ub and step.
+  SmallVector<AffineMap> minOpValLb(1), minOpValUb(1);
+  constraints.getSliceBounds(kDimMinOp, 1, minOp.getContext(), &minOpValLb,
+                             &minOpValUb);
+  if (minOpValUb.size() != 1 || minOpValUb[0].getNumResults() != 1)
+    return failure(); // No upper bound found.
+
+  // Add an equality: kDimMinOpUb = minOpValUb[0]
+  std::vector<SmallVector<int64_t, 8>> flattened;
+  if (failed(getFlattenedAffineExprs(minOpValUb[0], &flattened)))
+    return failure();
+  assert(flattened.size() == 1);
+  SmallVector<int64_t, 8> &eq = flattened[0];
+  eq.insert(eq.begin() + kDimMinOp, 0); // Add back kDimMinOp dimension.
+  assert(eq.size() == constraints.getNumCols());
+  eq[kDimMinOpUb] = -1;
+  constraints.addEquality(eq);
+
+  // Prove that each result of minOpMap has a lower bound that is equal (or
+  // greater than) the upper bound of minOp (`kDimMinOpUb`). In that case,
+  // minOp can be replaced with the bound. I.e., prove that for each result
+  // expr_i (represented by dimension r_i):
+  //
+  // r_i >= minOpUb
+  //
+  // To prove this inequality, add its negation to the constraint set and prove
+  // that the constraint set is empty.
+  for (unsigned i = resultStartDim; i < resultStartDim + numResults; ++i) {
+    FlatAffineConstraints newConstr(constraints);
+    // Add inequality: r_i < minOpUb (equiv.: minOpUb - r_i - 1 >= 0)
+    SmallVector<int64_t> ineq(newConstr.getNumCols(), 0);
+    ineq[kDimMinOpUb] = 1;
+    ineq[i] = -1;
+    ineq[newConstr.getNumCols() - 1] = -1;
+    newConstr.addInequality(ineq);
+    // If the constraint set it empty:
+    // \not \exists r_i: r_i < minOpUb
+    // ==> \forall r_i: r_i >= minOpUb
+    if (!newConstr.isEmpty())
+      return failure();
+  }
+
+  // Lower and upper bound of minOp are equal. Replace minOp with its upper
+  // bound.
+  auto newMap = AffineMap::get(/*dimCount=*/3, /*symbolCount=*/0,
+                               minOpValUb[0].getResult(0));
+  rewriter.setInsertionPoint(minOp);
+  rewriter.replaceOpWithNewOp<AffineApplyOp>(minOp, newMap,
+                                             ValueRange{iv, ub, step});
+  return success();
+}
+
 namespace {
 struct ForLoopPeelingPattern : public OpRewritePattern<ForOp> {
   using OpRewritePattern<ForOp>::OpRewritePattern;
@@ -164,13 +295,25 @@
     if (forOp->hasAttr(kLoopLabel))
       return failure();
 
+    Value ub = forOp.upperBound();
     scf::IfOp ifOp;
-    if (failed(peelForLoop(rewriter, forOp, ifOp)))
+    Value splitBound;
+    if (failed(peelForLoop(rewriter, forOp, ifOp, splitBound)))
       return failure();
     // Apply label, so that the same loop is not rewritten a second time.
     rewriter.updateRootInPlace(
         forOp, [&]() { forOp->setAttr(kLoopLabel, rewriter.getUnitAttr()); });
 
+    // Rewrite affine.min ops.
+    forOp.walk([&](AffineMinOp minOp) {
+      (void)scf::rewritePeeledAffineOp(rewriter, minOp, forOp.getInductionVar(),
+                                       ub, forOp.step(), /*insideLoop=*/true);
+    });
+    ifOp.walk([&](AffineMinOp minOp) {
+      (void)scf::rewritePeeledAffineOp(rewriter, minOp, splitBound, ub,
+                                       forOp.step(), /*insideLoop=*/false);
+    });
+
     return success();
   }
 };
diff --git a/mlir/test/Dialect/SCF/for-loop-peeling.mlir b/mlir/test/Dialect/SCF/for-loop-peeling.mlir
--- a/mlir/test/Dialect/SCF/for-loop-peeling.mlir
+++ b/mlir/test/Dialect/SCF/for-loop-peeling.mlir
@@ -1,22 +1,20 @@
 // RUN: mlir-opt %s -for-loop-peeling -canonicalize -split-input-file | FileCheck %s
 
 //  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (s1 - s0) mod s2)>
-//  CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>
-//  CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0, s1, s2] -> (s0, s2 - (s2 - (s2 - s1) mod s0))>
+//  CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (s1 - (s1 - s0) mod s2))>
 //      CHECK: func @fully_dynamic_bounds(
 // CHECK-SAME:     %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index
 //      CHECK:   %[[C0_I32:.*]] = constant 0 : i32
 //      CHECK:   %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[LB]], %[[UB]], %[[STEP]]]
 //      CHECK:   %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[LB]] to %[[NEW_UB]]
 // CHECK-SAME:       step %[[STEP]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
-//      CHECK:     %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[STEP]], %[[UB]]]
-//      CHECK:     %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32
+//      CHECK:     %[[CAST:.*]] = index_cast %[[STEP]] : index to i32
 //      CHECK:     %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32
 //      CHECK:     scf.yield %[[ADD]]
 //      CHECK:   }
 //      CHECK:   %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]]
 //      CHECK:   %[[RESULT:.*]] = scf.if %[[HAS_MORE]] -> (i32) {
-//      CHECK:     %[[REM:.*]] = affine.min #[[MAP2]]()[%[[STEP]], %[[LB]], %[[UB]]]
+//      CHECK:     %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[LB]], %[[UB]], %[[STEP]]]
 //      CHECK:     %[[CAST2:.*]] = index_cast %[[REM]]
 //      CHECK:     %[[ADD2:.*]] = addi %[[LOOP]], %[[CAST2]]
 //      CHECK:     scf.yield %[[ADD2]]
@@ -38,18 +36,16 @@
 
 // -----
 
-//  CHECK-DAG: #[[MAP:.*]] = affine_map<(d0) -> (4, -d0 + 17)>
 //      CHECK: func @fully_static_bounds(
 //  CHECK-DAG:   %[[C0_I32:.*]] = constant 0 : i32
 //  CHECK-DAG:   %[[C1_I32:.*]] = constant 1 : i32
+//  CHECK-DAG:   %[[C4_I32:.*]] = constant 4 : i32
 //  CHECK-DAG:   %[[C0:.*]] = constant 0 : index
 //  CHECK-DAG:   %[[C4:.*]] = constant 4 : index
 //  CHECK-DAG:   %[[C16:.*]] = constant 16 : index
 //      CHECK:   %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C16]]
 // CHECK-SAME:       step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
-//      CHECK:     %[[MINOP:.*]] = affine.min #[[MAP]](%[[IV]])
-//      CHECK:     %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32
-//      CHECK:     %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32
+//      CHECK:     %[[ADD:.*]] = addi %[[ACC]], %[[C4_I32]] : i32
 //      CHECK:     scf.yield %[[ADD]]
 //      CHECK:   }
 //      CHECK:   %[[RESULT:.*]] = addi %[[LOOP]], %[[C1_I32]] : i32
@@ -73,24 +69,22 @@
 // -----
 
 //  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)>
-//  CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
-//  CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (4, s0 mod 4)>
+//  CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 mod 4)>
 //      CHECK: func @dynamic_upper_bound(
 // CHECK-SAME:     %[[UB:.*]]: index
 //  CHECK-DAG:   %[[C0_I32:.*]] = constant 0 : i32
+//  CHECK-DAG:   %[[C4_I32:.*]] = constant 4 : i32
 //  CHECK-DAG:   %[[C0:.*]] = constant 0 : index
 //  CHECK-DAG:   %[[C4:.*]] = constant 4 : index
 //      CHECK:   %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]]
 //      CHECK:   %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]]
 // CHECK-SAME:       step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
-//      CHECK:     %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[UB]]]
-//      CHECK:     %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32
-//      CHECK:     %[[ADD:.*]] = addi %[[ACC]], %[[CAST]] : i32
+//      CHECK:     %[[ADD:.*]] = addi %[[ACC]], %[[C4_I32]] : i32
 //      CHECK:     scf.yield %[[ADD]]
 //      CHECK:   }
 //      CHECK:   %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]]
 //      CHECK:   %[[RESULT:.*]] = scf.if %[[HAS_MORE]] -> (i32) {
-//      CHECK:     %[[REM:.*]] = affine.min #[[MAP2]]()[%[[UB]]]
+//      CHECK:     %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[UB]]]
 //      CHECK:     %[[CAST2:.*]] = index_cast %[[REM]]
 //      CHECK:     %[[ADD2:.*]] = addi %[[LOOP]], %[[CAST2]]
 //      CHECK:     scf.yield %[[ADD2]]
@@ -116,23 +110,21 @@
 // -----
 
 //  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)>
-//  CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
-//  CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (4, s0 mod 4)>
+//  CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 mod 4)>
 //      CHECK: func @no_loop_results(
 // CHECK-SAME:     %[[UB:.*]]: index, %[[MEMREF:.*]]: memref<i32>
+//  CHECK-DAG:   %[[C4_I32:.*]] = constant 4 : i32
 //  CHECK-DAG:   %[[C0:.*]] = constant 0 : index
 //  CHECK-DAG:   %[[C4:.*]] = constant 4 : index
 //      CHECK:   %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]]
 //      CHECK:   scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]] step %[[C4]] {
-//      CHECK:     %[[MINOP:.*]] = affine.min #[[MAP1]](%[[IV]])[%[[UB]]]
 //      CHECK:     %[[LOAD:.*]] = memref.load %[[MEMREF]][]
-//      CHECK:     %[[CAST:.*]] = index_cast %[[MINOP]] : index to i32
-//      CHECK:     %[[ADD:.*]] = addi %[[LOAD]], %[[CAST]] : i32
+//      CHECK:     %[[ADD:.*]] = addi %[[LOAD]], %[[C4_I32]] : i32
 //      CHECK:     memref.store %[[ADD]], %[[MEMREF]]
 //      CHECK:   }
 //      CHECK:   %[[HAS_MORE:.*]] = cmpi slt, %[[NEW_UB]], %[[UB]]
 //      CHECK:   scf.if %[[HAS_MORE]] {
-//      CHECK:     %[[REM:.*]] = affine.min #[[MAP2]]()[%[[UB]]]
+//      CHECK:     %[[REM:.*]] = affine.apply #[[MAP1]]()[%[[UB]]]
 //      CHECK:     %[[LOAD2:.*]] = memref.load %[[MEMREF]][]
 //      CHECK:     %[[CAST2:.*]] = index_cast %[[REM]]
 //      CHECK:     %[[ADD2:.*]] = addi %[[LOAD2]], %[[CAST2]]
@@ -153,3 +145,75 @@
   }
   return
 }
+
+// -----
+
+// Test rewriting of affine.min ops. Make sure that more general cases than
+// the ones above are successfully rewritten. Also make sure that the pattern
+// does not rewrite affine.min ops that should not be rewritten.
+
+//  CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)>
+//  CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0, -d0 + s1 - 1)>
+//  CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0)[s0, s1, s2] -> (s0, -d0 + s1, s2)>
+//  CHECK-DAG: #[[MAP4:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (s1 - (s1 - s0) mod s2))>
+//  CHECK-DAG: #[[MAP5:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (s1 - (s1 - s0) mod s2) + 1)>
+//  CHECK-DAG: #[[MAP6:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (s1 - (s1 - s0) mod s2) - 1)>
+//  CHECK-DAG: #[[MAP7:.*]] = affine_map<()[s0, s1, s2, s3] -> (s0, s2 - (s2 - (s2 - s1) mod s0), s3)>
+//      CHECK: func @test_affine_min_rewrite(
+// CHECK-SAME:     %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index,
+// CHECK-SAME:     %[[MEMREF:.*]]: memref<?xindex>, %[[SOME_VAL:.*]]: index
+//      CHECK:   scf.for %[[IV:.*]] = %[[LB]] to %{{.*}} step %[[STEP]] {
+//      CHECK:     %[[RES2:.*]] = affine.apply #[[MAP1]]()[%[[STEP]]]
+//      CHECK:     %[[RES3:.*]] = affine.min #[[MAP2]](%[[IV]])[%[[STEP]], %[[UB]]]
+//      CHECK:     %[[RES4:.*]] = affine.min #map3(%[[IV]])[%[[STEP]], %[[UB]], %[[SOME_VAL]]]
+//      CHECK:     memref.store %[[STEP]]
+//      CHECK:     memref.store %[[STEP]]
+//      CHECK:     memref.store %[[RES2]]
+//      CHECK:     memref.store %[[RES3]]
+//      CHECK:     memref.store %[[RES4]]
+//      CHECK:   }
+//      CHECK:   scf.if {{.*}} {
+//      CHECK:     %[[RES_IF_0:.*]] = affine.apply #[[MAP4]]()[%[[LB]], %[[UB]], %[[STEP]]]
+//      CHECK:     %[[RES_IF_1:.*]] = affine.apply #[[MAP5]]()[%[[LB]], %[[UB]], %[[STEP]]]
+//      CHECK:     %[[RES_IF_2:.*]] = affine.apply #[[MAP5]]()[%[[LB]], %[[UB]], %[[STEP]]]
+//      CHECK:     %[[RES_IF_3:.*]] = affine.apply #[[MAP6]]()[%[[LB]], %[[UB]], %[[STEP]]]
+//      CHECK:     %[[RES_IF_4:.*]] = affine.min #[[MAP7]]()[%[[STEP]], %[[LB]], %[[UB]], %[[SOME_VAL]]]
+//      CHECK:     memref.store %[[RES_IF_0]]
+//      CHECK:     memref.store %[[RES_IF_1]]
+//      CHECK:     memref.store %[[RES_IF_2]]
+//      CHECK:     memref.store %[[RES_IF_3]]
+//      CHECK:     memref.store %[[RES_IF_4]]
+#map0 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
+#map1 = affine_map<(d0, d1)[s0] -> (d0 - d1 + 1, s0)>
+#map2 = affine_map<(d0, d1)[s0] -> (s0 + 1, d0 - d1 + 1)>
+#map3 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1 - 1)>
+#map4 = affine_map<(d0, d1, d2)[s0] -> (s0, d0 - d1, d2)>
+func @test_affine_min_rewrite(%lb : index, %ub: index,
+                              %step: index, %d : memref<?xindex>,
+                              %some_val: index) {
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c2 = constant 2 : index
+  %c3 = constant 3 : index
+  %c4 = constant 4 : index
+  scf.for %iv = %lb to %ub step %step {
+    // Most common case: Rewrite min(%ub - %iv, %step) to %step.
+    %m0 = affine.min #map0(%ub, %iv)[%step]
+    // Increase %ub - %iv a little bit, pattern should still apply.
+    %m1 = affine.min #map1(%ub, %iv)[%step]
+    // Rewrite min(%ub - %iv + 1, %step + 1) to %step + 1.
+    %m2 = affine.min #map2(%ub, %iv)[%step]
+    // min(%ub - %iv - 1, %step) cannot be simplified because %ub - %iv - 1
+    // can be smaller than %step. (Can be simplified in if-statement.)
+    %m3 = affine.min #map3(%ub, %iv)[%step]
+    // min(%ub - %iv, %step, %some_val) cannot be simplified because the range
+    // of %some_val is unknown.
+    %m4 = affine.min #map4(%ub, %iv, %some_val)[%step]
+    memref.store %m0, %d[%c0] : memref<?xindex>
+    memref.store %m1, %d[%c1] : memref<?xindex>
+    memref.store %m2, %d[%c2] : memref<?xindex>
+    memref.store %m3, %d[%c3] : memref<?xindex>
+    memref.store %m4, %d[%c4] : memref<?xindex>
+  }
+  return
+}
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -1453,6 +1453,7 @@
     includes = ["include"],
     deps = [
         ":Affine",
+        ":Analysis",
         ":DialectUtils",
         ":IR",
         ":MemRefDialect",