diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -46,8 +46,11 @@
 /// ops.
 std::unique_ptr<OperationPass<func::FuncOp>> createAffineParallelizePass();
 
-/// Apply normalization transformations to affine loop-like ops.
-std::unique_ptr<OperationPass<func::FuncOp>> createAffineLoopNormalizePass();
+/// Apply normalization transformations to affine loop-like ops. If
+/// `promoteSingleIter` is true, single iteration loops are promoted (i.e., the
+/// loop is replaced by its loop body).
+std::unique_ptr<OperationPass<func::FuncOp>>
+createAffineLoopNormalizePass(bool promoteSingleIter = false);
 
 /// Performs packing (or explicit copying) of accessed memref regions into
 /// buffers in the specified faster memory space through either pointwise copies
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -381,6 +381,10 @@
 def AffineLoopNormalize : Pass<"affine-loop-normalize", "func::FuncOp"> {
   let summary = "Apply normalization transformations to affine loop-like ops";
   let constructor = "mlir::createAffineLoopNormalizePass()";
+  let options = [
+    Option<"promoteSingleIter", "promote-single-iter", "bool",
+           /*default=*/"true", "Promote single iteration loops">,
+  ];
 }
 
 def LoopCoalescing : Pass<"affine-loop-coalescing", "func::FuncOp"> {
diff --git a/mlir/include/mlir/Dialect/Affine/Utils.h b/mlir/include/mlir/Dialect/Affine/Utils.h
--- a/mlir/include/mlir/Dialect/Affine/Utils.h
+++ b/mlir/include/mlir/Dialect/Affine/Utils.h
@@ -159,14 +159,15 @@
 /// early if the op is already in a normalized form.
 void normalizeAffineParallel(AffineParallelOp op);
 
-/// Normalize an affine.for op. If the affine.for op has only a single iteration
-/// only then it is simply promoted, else it is normalized in the traditional
-/// way, by converting the lower bound to zero and loop step to one. The upper
-/// bound is set to the trip count of the loop. Original loops must have a
-/// lower bound with only a single result. There is no such restriction on upper
-/// bounds. Returns success if the loop has been normalized (or is already in
-/// the normal form).
-LogicalResult normalizeAffineFor(AffineForOp op);
+/// Normalize an affine.for op. An affine.for op is normalized by converting the
+/// lower bound to zero and loop step to one. The upper bound is set to the trip
+/// count of the loop. Original loops must have a lower bound with only a single
+/// result. There is no such restriction on upper bounds. Returns success if the
+/// loop has been normalized (or is already in the normal form). If
+/// `promoteSingleIter` is true, the loop is simply promoted if it has a single
+/// iteration.
+LogicalResult normalizeAffineFor(AffineForOp op,
+                                 bool promoteSingleIter = false);
 
 /// Traverse `e` and return an AffineExpr where all occurrences of `dim` have
 /// been replaced by either:
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopNormalize.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopNormalize.cpp
--- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopNormalize.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopNormalize.cpp
@@ -30,13 +30,16 @@
 /// that are already in a normalized form.
 struct AffineLoopNormalizePass
     : public impl::AffineLoopNormalizeBase<AffineLoopNormalizePass> {
+  explicit AffineLoopNormalizePass(bool promoteSingleIter) {
+    this->promoteSingleIter = promoteSingleIter;
+  }
 
   void runOnOperation() override {
-    getOperation().walk([](Operation *op) {
+    getOperation().walk([&](Operation *op) {
       if (auto affineParallel = dyn_cast<AffineParallelOp>(op))
         normalizeAffineParallel(affineParallel);
       else if (auto affineFor = dyn_cast<AffineForOp>(op))
-        (void)normalizeAffineFor(affineFor);
+        (void)normalizeAffineFor(affineFor, promoteSingleIter);
     });
   }
 };
@@ -44,6 +47,6 @@
 } // namespace
 
 std::unique_ptr<OperationPass<func::FuncOp>>
-mlir::createAffineLoopNormalizePass() {
-  return std::make_unique<AffineLoopNormalizePass>();
+mlir::createAffineLoopNormalizePass(bool promoteSingleIter) {
+  return std::make_unique<AffineLoopNormalizePass>(promoteSingleIter);
 }
diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
--- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
@@ -546,15 +546,8 @@
                                     ubExprs, op.getContext());
   op.setUpperBounds(ranges.getOperands(), newUpperMap);
 }
-
-/// Normalizes affine.for ops. If the affine.for op has only a single iteration
-/// only then it is simply promoted, else it is normalized in the traditional
-/// way, by converting the lower bound to zero and loop step to one. The upper
-/// bound is set to the trip count of the loop. For now, original loops must
-/// have lower bound with a single result only. There is no such restriction on
-/// upper bounds.
-LogicalResult mlir::normalizeAffineFor(AffineForOp op) {
-  if (succeeded(promoteIfSingleIteration(op)))
+LogicalResult mlir::normalizeAffineFor(AffineForOp op, bool promoteSingleIter) {
+  if (promoteSingleIter && succeeded(promoteIfSingleIteration(op)))
     return success();
 
   // Check if the forop is already normalized.
diff --git a/mlir/test/Dialect/Affine/affine-loop-normalize.mlir b/mlir/test/Dialect/Affine/affine-loop-normalize.mlir
--- a/mlir/test/Dialect/Affine/affine-loop-normalize.mlir
+++ b/mlir/test/Dialect/Affine/affine-loop-normalize.mlir
@@ -1,4 +1,5 @@
 // RUN: mlir-opt %s -affine-loop-normalize -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -affine-loop-normalize='promote-single-iter=1' -split-input-file | FileCheck %s --check-prefix=PROMOTE-SINGLE-ITER
 
 // Normalize steps to 1 and lower bounds to 0.
 
@@ -39,8 +40,9 @@
 // Check that single iteration loop is removed and its body is promoted to the
 // parent block.
 
-// CHECK-LABEL: func @single_iteration_loop
-func.func @single_iteration_loop(%in: memref<1xf32>, %out: memref<1xf32>) {
+// CHECK-LABEL: func @promote_single_iter_loop
+// PROMOTE-SINGLE-ITER-LABEL: func @promote_single_iter_loop
+func.func @promote_single_iter_loop(%in: memref<1xf32>, %out: memref<1xf32>) {
   affine.for %i = 0 to 1 {
     %1 = affine.load %in[%i] : memref<1xf32>
     affine.store %1, %out[%i] : memref<1xf32>
@@ -48,10 +50,10 @@
   return
 }
 
-// CHECK-NOT:  affine.for
-// CHECK:      affine.load
-// CHECK-NEXT: affine.store
-// CHECK-NEXT: return
+// PROMOTE-SINGLE-ITER-NEXT: arith.constant
+// PROMOTE-SINGLE-ITER-NEXT: affine.load
+// PROMOTE-SINGLE-ITER-NEXT: affine.store
+// PROMOTE-SINGLE-ITER-NEXT: return
 
 // -----