diff --git a/mlir/include/mlir/Dialect/SCF/Passes.h b/mlir/include/mlir/Dialect/SCF/Passes.h
--- a/mlir/include/mlir/Dialect/SCF/Passes.h
+++ b/mlir/include/mlir/Dialect/SCF/Passes.h
@@ -28,6 +28,8 @@
 /// better vectorization.
 std::unique_ptr<Pass> createForLoopPeelingPass();
 
+std::unique_ptr<Pass> createAffineMinSCFCanonicalizationPass();
+
 /// Creates a loop fusion pass which fuses parallel loops.
 std::unique_ptr<Pass> createParallelLoopFusionPass();
 
diff --git a/mlir/include/mlir/Dialect/SCF/Passes.td b/mlir/include/mlir/Dialect/SCF/Passes.td
--- a/mlir/include/mlir/Dialect/SCF/Passes.td
+++ b/mlir/include/mlir/Dialect/SCF/Passes.td
@@ -17,6 +17,14 @@
   let dependentDialects = ["memref::MemRefDialect"];
 }
 
+def AffineMinSCFCanonicalization
+    : FunctionPass<"canonicalize-scf-affine-min"> {
+  let summary = "Canonicalize affine.min ops in the context of SCF loops with "
+                "known bounds";
+  let constructor = "mlir::createAffineMinSCFCanonicalizationPass()";
+  let dependentDialects = ["AffineDialect"];
+}
+
 def SCFForLoopPeeling
     : FunctionPass<"for-loop-peeling"> {
   let summary = "Peel `for` loops at their upper bounds.";
diff --git a/mlir/include/mlir/Dialect/SCF/Transforms.h b/mlir/include/mlir/Dialect/SCF/Transforms.h
--- a/mlir/include/mlir/Dialect/SCF/Transforms.h
+++ b/mlir/include/mlir/Dialect/SCF/Transforms.h
@@ -13,10 +13,12 @@
 #ifndef MLIR_DIALECT_SCF_TRANSFORMS_H_
 #define MLIR_DIALECT_SCF_TRANSFORMS_H_
 
+#include "mlir/Support/LLVM.h"
 #include "llvm/ADT/ArrayRef.h"
 
 namespace mlir {
 
+class AffineMinOp;
 class ConversionTarget;
 struct LogicalResult;
 class MLIRContext;
@@ -26,6 +28,7 @@
 class RewritePatternSet;
 using OwningRewritePatternList = RewritePatternSet;
 class Operation;
+class Value;
 
 namespace scf {
 
@@ -34,6 +37,21 @@
 class ParallelOp;
 class ForOp;
 
+/// Try to canonicalize an affine.min operation in the context of for loops with
+/// a known range.
+///
+/// `loopMatcher` is used to retrieve loop bounds and step size for a given
+/// iteration variable: If the first parameter is an iteration variable, return
+/// lower/upper bounds via the second/third parameter and the step size via the
+/// last parameter. The function should return `success` in that case. If the
+/// first parameter is not an iteration variable, return `failure`.
+///
+/// Note: `loopMatcher` allows this function to be used with any "for loop"-like
+/// operation (scf.for, scf.parallel and even ops defined in other dialects).
+LogicalResult canonicalizeAffineMinOpInLoop(
+    AffineMinOp minOp, RewriterBase &rewriter,
+    function_ref<LogicalResult(Value, Value &, Value &, Value &)> loopMatcher);
+
 /// Fuses all adjacent scf.parallel operations with identical bounds and step
 /// into one scf.parallel operations. Uses a naive aliasing and dependency
 /// analysis.
@@ -139,6 +157,11 @@
 void populateSCFLoopPipeliningPatterns(RewritePatternSet &patterns,
                                        const PipeliningOption &options);
 
+/// Populate patterns for canonicalizing operations inside SCF loop bodies.
+/// At the moment, only affine.min computations with iteration variables,
+/// loop bounds and loop steps are canonicalized.
+void populateSCFLoopBodyCanonicalizationPatterns(RewritePatternSet &patterns);
+
 } // namespace scf
 } // namespace mlir
 
diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
--- a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
@@ -396,6 +396,81 @@
   return success();
 }
 
+/// Canonicalize AffineMinOp operations in the context of for loops with a known
+/// range. Call `canonicalizeAffineMinOp` and add the following constraints to
+/// the constraint system (along with the missing dimensions):
+///
+/// * iv >= lb
+/// * iv < lb + step * ((ub - lb - 1) floorDiv step) + 1
+///
+/// Note: Due to limitations of FlatAffineConstraints, only constant step sizes
+/// are currently supported.
+LogicalResult mlir::scf::canonicalizeAffineMinOpInLoop(
+    AffineMinOp minOp, RewriterBase &rewriter,
+    function_ref<LogicalResult(Value, Value &, Value &, Value &)> loopMatcher) {
+  SmallVector<Value> dims;
+  SmallVector<int64_t> steps;
+
+  // Find all iteration variables among `minOp`'s operands add constrain them.
+  for (Value operand : minOp.operands()) {
+    // If `operand` is an iteration variable: Find corresponding loop
+    // bounds and step.
+    Value iv = operand, lb, ub, step;
+    if (failed(loopMatcher(operand, lb, ub, step)))
+      continue;
+
+    // FlatAffineConstraints does not support semi-affine expressions.
+    // Therefore, only constant step values are supported.
+    auto stepInt = getConstantIntValue(step);
+    if (!stepInt)
+      continue;
+
+    dims.push_back(iv);
+    dims.push_back(lb);
+    dims.push_back(ub);
+    steps.push_back(*stepInt);
+  }
+
+  return canonicalizeAffineMinOp(
+      rewriter, minOp, dims, [&](FlatAffineConstraints &constraints) {
+        for (int i = 0; i < dims.size(); i += 3) {
+          unsigned dimIv = i, dimLb = i + 1, dimUb = i + 2;
+          Value lb = dims[dimLb], ub = dims[dimUb];
+          int64_t step = steps[i / 3];
+
+          // If loop lower/upper bounds are constant: Add constraint.
+          Optional<int64_t> lbInt = getConstantIntValue(lb);
+          Optional<int64_t> ubInt = getConstantIntValue(ub);
+          if (lbInt)
+            constraints.setIdToConstant(i + 1, *lbInt);
+          if (ubInt)
+            constraints.setIdToConstant(i + 2, *ubInt);
+
+          // iv >= lb (equiv.: iv - lb >= 0)
+          SmallVector<int64_t> ineqLb(constraints.getNumCols(), 0);
+          ineqLb[dimIv] = 1;
+          ineqLb[dimLb] = -1;
+          constraints.addInequality(ineqLb);
+
+          // iv < lb + step * ((ub - lb - 1) floorDiv step) + 1
+          AffineExpr exprLb = lbInt ? rewriter.getAffineConstantExpr(*lbInt)
+                                    : rewriter.getAffineDimExpr(dimLb);
+          AffineExpr exprUb = ubInt ? rewriter.getAffineConstantExpr(*ubInt)
+                                    : rewriter.getAffineDimExpr(dimUb);
+          AffineExpr ivUb =
+              exprLb + 1 + (step * ((exprUb - exprLb - 1).floorDiv(step)));
+          auto map = AffineMap::get(
+              /*dimCount=*/constraints.getNumDimIds(),
+              /*symbolCount=*/constraints.getNumSymbolIds(), /*result=*/ivUb);
+
+          (void)constraints.addLowerOrUpperBound(dimIv, map, /*eq=*/false,
+                                                 /*lower=*/false);
+        }
+
+        return success();
+      });
+}
+
 static constexpr char kPeeledLoopLabel[] = "__peeled_loop__";
 
 namespace {
@@ -415,6 +490,39 @@
     return success();
   }
 };
+
+/// Canonicalize AffineMinOp operations in the context of scf.for and
+/// scf.parallel loops with a known range.
+struct AffineMinSCFCanonicalizationPattern
+    : public OpRewritePattern<AffineMinOp> {
+  using OpRewritePattern<AffineMinOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(AffineMinOp minOp,
+                                PatternRewriter &rewriter) const override {
+    return scf::canonicalizeAffineMinOpInLoop(
+        minOp, rewriter,
+        /*loopMatcher=*/[](Value iv, Value &lb, Value &ub, Value &step) {
+          if (scf::ForOp forOp = scf::getForInductionVarOwner(iv)) {
+            lb = forOp.lowerBound();
+            ub = forOp.upperBound();
+            step = forOp.step();
+            return success();
+          }
+          if (scf::ParallelOp parOp =
+                  scf::getParallelForInductionVarOwner(iv)) {
+            for (unsigned idx = 0; idx < parOp.getNumLoops(); ++idx) {
+              if (parOp.getInductionVars()[idx] == iv) {
+                lb = parOp.lowerBound()[idx];
+                ub = parOp.upperBound()[idx];
+                step = parOp.step()[idx];
+                return success();
+              }
+            }
+          }
+          return failure();
+        });
+  }
+};
 } // namespace
 
 namespace {
@@ -445,8 +553,23 @@
     funcOp.walk([](ForOp op) { op->removeAttr(kPeeledLoopLabel); });
   }
 };
+
+struct AffineMinSCFCanonicalization
+    : public AffineMinSCFCanonicalizationBase<AffineMinSCFCanonicalization> {
+  void runOnFunction() override {
+    FuncOp funcOp = getFunction();
+    MLIRContext *ctx = funcOp.getContext();
+    RewritePatternSet patterns(ctx);
+    patterns.add<AffineMinSCFCanonicalizationPattern>(ctx);
+    (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
+  }
+};
 } // namespace
 
+std::unique_ptr<Pass> mlir::createAffineMinSCFCanonicalizationPass() {
+  return std::make_unique<AffineMinSCFCanonicalization>();
+}
+
 std::unique_ptr<Pass> mlir::createParallelLoopSpecializationPass() {
   return std::make_unique<ParallelLoopSpecialization>();
 }
@@ -458,3 +581,8 @@
 std::unique_ptr<Pass> mlir::createForLoopPeelingPass() {
   return std::make_unique<ForLoopPeeling>();
 }
+
+void mlir::scf::populateSCFLoopBodyCanonicalizationPatterns(
+    RewritePatternSet &patterns) {
+  patterns.insert<AffineMinSCFCanonicalizationPattern>(patterns.getContext());
+}
diff --git a/mlir/test/Dialect/SCF/canonicalize-scf-affine-min.mlir b/mlir/test/Dialect/SCF/canonicalize-scf-affine-min.mlir
new file mode 100644
--- /dev/null
+++ b/mlir/test/Dialect/SCF/canonicalize-scf-affine-min.mlir
@@ -0,0 +1,157 @@
+// RUN: mlir-opt %s -canonicalize-scf-affine-min -split-input-file | FileCheck %s
+
+// Note: This is mostly a copy of test/Dialect/Linalg/fold-affine-min-scf.mlir
+
+// CHECK-LABEL: scf_for
+func @scf_for(%A : memref<i64>, %step : index) {
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c2 = constant 2 : index
+  %c7 = constant 7 : index
+  %c4 = constant 4 : index
+  %c16 = constant 16 : index
+  %c256 = constant 256 : index
+
+  // CHECK:      %[[C2:.*]] = constant 2 : i64
+  //      CHECK: scf.for
+  // CHECK-NEXT:   memref.store %[[C2]], %{{.*}}[] : memref<i64>
+  scf.for %i = %c0 to %c4 step %c2 {
+    %1 = affine.min affine_map<(d0, d1)[] -> (2, d1 - d0)> (%i, %c4)
+    %2 = index_cast %1: index to i64
+    memref.store %2, %A[]: memref<i64>
+  }
+
+  //      CHECK: scf.for
+  // CHECK-NEXT:   memref.store %[[C2]], %{{.*}}[] : memref<i64>
+  scf.for %i = %c1 to %c7 step %c2 {
+    %1 = affine.min affine_map<(d0)[s0] -> (s0 - d0, 2)> (%i)[%c7]
+    %2 = index_cast %1: index to i64
+    memref.store %2, %A[]: memref<i64>
+  }
+
+  // This should not canonicalize because: 4 - %i may take the value 1 < 2.
+  //     CHECK:   scf.for
+  //     CHECK:     affine.min
+  //     CHECK:     index_cast
+  scf.for %i = %c1 to %c4 step %c2 {
+    %1 = affine.min affine_map<(d0)[s0] -> (2, s0 - d0)> (%i)[%c4]
+    %2 = index_cast %1: index to i64
+    memref.store %2, %A[]: memref<i64>
+  }
+
+  // This should canonicalize only partly: 256 - %i <= 256.
+  //     CHECK:   scf.for
+  //     CHECK:     affine.apply
+  //     CHECK:     index_cast
+  scf.for %i = %c1 to %c256 step %c16 {
+    %1 = affine.min affine_map<(d0) -> (256, 256 - d0)> (%i)
+    %2 = index_cast %1: index to i64
+    memref.store %2, %A[]: memref<i64>
+  }
+
+  // This example should simplify but affine_map is currently missing
+  // semi-affine canonicalizations: `((s0 * 42 - 1) floordiv s0) * s0`
+  // should evaluate to 41 * s0.
+  // Note that this may require positivity assumptions on `s0`.
+  // Revisit when support is added.
+  // CHECK: scf.for
+  // CHECK:   affine.min
+  // CHECK:   index_cast
+  %ub = affine.apply affine_map<(d0) -> (42 * d0)> (%step)
+  scf.for %i = %c0 to %ub step %step {
+    %1 = affine.min affine_map<(d0, d1, d2) -> (d0, d1 - d2)> (%step, %ub, %i)
+    %2 = index_cast %1: index to i64
+    memref.store %2, %A[]: memref<i64>
+  }
+
+  // This example should simplify but affine_map is currently missing
+  // semi-affine canonicalizations.
+  // This example should simplify but affine_map is currently missing
+  // semi-affine canonicalizations: ` -(((s0 * s0 - 1) floordiv s0) * s0)`
+  // should evaluate to (s0 - 1) * s0.
+  // Note that this may require positivity assumptions on `s0`.
+  // Revisit when support is added.
+  // CHECK: scf.for
+  // CHECK:   affine.min
+  // CHECK:   index_cast
+  %ub2 = affine.apply affine_map<(d0)[s0] -> (s0 * d0)> (%step)[%step]
+  scf.for %i = %c0 to %ub2 step %step {
+    %1 = affine.min affine_map<(d0, d1, d2) -> (d0, d2 - d1)> (%step, %i, %ub2)
+    %2 = index_cast %1: index to i64
+    memref.store %2, %A[]: memref<i64>
+  }
+
+  // This is an invalid loop. It should not be touched by the canonicalization
+  // pattern.
+  // CHECK: scf.for
+  // CHECK:   affine.min
+  // CHECK:   index_cast
+  scf.for %i = %c256 to %c1 step %c1 {
+    %1 = affine.min affine_map<(d0)[s0] -> (s0 + d0, 0)> (%i)[%c7]
+    %2 = index_cast %1: index to i64
+    memref.store %2, %A[]: memref<i64>
+  }
+
+  return
+}
+
+// -----
+
+// CHECK-LABEL: scf_parallel
+func @scf_parallel(%A : memref<i64>, %step : index) {
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c2 = constant 2 : index
+  %c7 = constant 7 : index
+  %c4 = constant 4 : index
+
+  // CHECK:   %[[C2:.*]] = constant 2 : i64
+  // CHECK: scf.parallel
+  // CHECK-NEXT:   memref.store %[[C2]], %{{.*}}[] : memref<i64>
+  scf.parallel (%i) = (%c0) to (%c4) step (%c2) {
+    %1 = affine.min affine_map<(d0, d1)[] -> (2, d1 - d0)> (%i, %c4)
+    %2 = index_cast %1: index to i64
+    memref.store %2, %A[]: memref<i64>
+  }
+
+  // CHECK: scf.parallel
+  // CHECK-NEXT:   memref.store %[[C2]], %{{.*}}[] : memref<i64>
+  scf.parallel (%i) = (%c1) to (%c7) step (%c2) {
+    %1 = affine.min affine_map<(d0)[s0] -> (2, s0 - d0)> (%i)[%c7]
+    %2 = index_cast %1: index to i64
+    memref.store %2, %A[]: memref<i64>
+  }
+
+  // This example should simplify but affine_map is currently missing
+  // semi-affine canonicalizations.
+  // This affine map does not currently evaluate to (0, 0):
+  //   (d0)[s0] -> (s0 mod s0, (-((d0 floordiv s0) * s0) + s0 * 42) mod s0)
+  // TODO: Revisit when support is added.
+  // CHECK: scf.parallel
+  // CHECK:   affine.min
+  // CHECK:   index_cast
+  %ub = affine.apply affine_map<(d0) -> (42 * d0)> (%step)
+  scf.parallel (%i) = (%c0) to (%ub) step (%step) {
+    %1 = affine.min affine_map<(d0, d1, d2) -> (d0, d2 - d1)> (%step, %i, %ub)
+    %2 = index_cast %1: index to i64
+    memref.store %2, %A[]: memref<i64>
+  }
+
+  // This example should simplify but affine_map is currently missing
+  // semi-affine canonicalizations.
+  // This affine map does not currently evaluate to (0, 0):
+  //   (d0)[s0] -> (s0 mod s0, (-((d0 floordiv s0) * s0) + s0 * s0) mod s0)
+  // TODO: Revisit when support is added.
+  // CHECK: scf.parallel
+  // CHECK:   affine.min
+  // CHECK:   index_cast
+  %ub2 = affine.apply affine_map<(d0)[s0] -> (s0 * d0)> (%step)[%step]
+  scf.parallel (%i) = (%c0) to (%ub2) step (%step) {
+    %1 = affine.min affine_map<(d0, d1, d2) -> (d0, d2 - d1)> (%step, %i, %ub2)
+    %2 = index_cast %1: index to i64
+    memref.store %2, %A[]: memref<i64>
+  }
+
+  return
+}
+