diff --git a/mlir/docs/EDSC.md b/mlir/docs/EDSC.md
--- a/mlir/docs/EDSC.md
+++ b/mlir/docs/EDSC.md
@@ -103,7 +103,7 @@
 //       CHECK: store {{.*}}, %arg2[] : memref<f32>
 ```
 
-Similar APIs are provided to emit the lower-level `loop.for` op with
+Similar APIs are provided to emit the lower-level `scf.for` op with
 `LoopNestBuilder`. See the `builder-api-test.cpp` test for more usage examples.
 
 Since the implementation of declarative builders is in C++, it is also available
diff --git a/mlir/include/mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h b/mlir/include/mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h
--- a/mlir/include/mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h
+++ b/mlir/include/mlir/Conversion/LoopToStandard/ConvertLoopToStandard.h
@@ -21,13 +21,13 @@
 // Owning list of rewriting patterns.
 class OwningRewritePatternList;
 
-/// Collect a set of patterns to lower from loop.for, loop.if, and
+/// Collect a set of patterns to lower from scf.for, scf.if, and
 /// loop.terminator to CFG operations within the Standard dialect, in particular
 /// convert structured control flow into CFG branch-based control flow.
 void populateLoopToStdConversionPatterns(OwningRewritePatternList &patterns,
                                          MLIRContext *ctx);
 
-/// Creates a pass to convert loop.for, loop.if and loop.terminator ops to CFG.
+/// Creates a pass to convert scf.for, scf.if and loop.terminator ops to CFG.
 std::unique_ptr<Pass> createLowerToCFGPass();
 
 } // namespace mlir
diff --git a/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPU.h b/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPU.h
--- a/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPU.h
+++ b/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPU.h
@@ -74,7 +74,7 @@
                                      ArrayRef<Value> numWorkGroups,
                                      ArrayRef<Value> workGroupSizes);
 
-/// Adds the conversion pattern from `loop.parallel` to `gpu.launch` to the
+/// Adds the conversion pattern from `scf.parallel` to `gpu.launch` to the
 /// provided pattern list.
 void populateParallelLoopToGPUPatterns(OwningRewritePatternList &patterns,
                                        MLIRContext *ctx);
diff --git a/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h b/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h
--- a/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h
+++ b/mlir/include/mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h
@@ -40,7 +40,7 @@
                     ArrayRef<int64_t> workGroupSize);
 std::unique_ptr<OperationPass<FuncOp>> createLoopToGPUPass();
 
-/// Creates a pass that converts loop.parallel operations into a gpu.launch
+/// Creates a pass that converts scf.parallel operations into a gpu.launch
 /// operation. The mapping of loop dimensions to launch dimensions is derived
 /// from mapping attributes. See ParallelToGpuLaunchLowering::matchAndRewrite
 /// for a description of the used attributes.
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -16,16 +16,16 @@
 //===----------------------------------------------------------------------===//
 
 def ConvertAffineToStandard : FunctionPass<"lower-affine"> {
-  let summary = "Lower Affine operations to a combination of Standard and Loop "
+  let summary = "Lower Affine operations to a combination of Standard and SCF "
                 "operations";
   let description = [{
 
     Convert operations from the affine dialect into operations from the loop and
     standard dialects.
 
-    `affine.for` operations are converted to `loop.for` operations that are free
+    `affine.for` operations are converted to `scf.for` operations that are free
     of certain structural restrictions (on their bounds and step). `affine.if`
-    is similarly converted to the `loop.if` operation. `affine.apply` operations
+    is similarly converted to the `scf.if` operation. `affine.apply` operations
     are converted into sequences of primitive arithmetic operations from the
     standard dialect that have the same effect, using operands of the `index`
     type. Consequently, named maps and sets thare are no longer in use may be
@@ -155,7 +155,7 @@
 //===----------------------------------------------------------------------===//
 
 def ConvertLoopToStandard : Pass<"convert-loop-to-std"> {
-  let summary = "Convert Loop dialect to Standard dialect, replacing structured"
+  let summary = "Convert SCF dialect to Standard dialect, replacing structured"
                 " control flow with a CFG";
   let constructor = "mlir::createLowerToCFGPass()";
 }
@@ -189,7 +189,7 @@
 }
 
 def ConvertParallelLoopToGpu : Pass<"convert-parallel-loops-to-gpu"> {
-  let summary = "Convert mapped loop.parallel ops to gpu launch operations";
+  let summary = "Convert mapped scf.parallel ops to gpu launch operations";
   let constructor = "mlir::createParallelLoopToGpuPass()";
 }
 
diff --git a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h
--- a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h
+++ b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h
@@ -49,7 +49,7 @@
                                                      AffineMap map,
                                                      AffineMap bound);
 
-/// Sets the mapping attribute of a loop.parallel operation. Verifies that the
+/// Sets the mapping attribute of a scf.parallel operation. Verifies that the
 /// mapping passed is valid.
 /// - the number of DimMapperAttr provided is same as the number of loops of
 ///   the `ploopOp`.
diff --git a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td
--- a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td
+++ b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Defines the attribute used for driving conversion from loop.parallel to
+// Defines the attribute used for driving conversion from scf.parallel to
 // gpu.launch operations
 //
 //===----------------------------------------------------------------------===//
@@ -30,7 +30,7 @@
   let cppNamespace = "::mlir::gpu";
 }
 
-// Attribute that drives conversion of a loop.parallel to gpu.launch
+// Attribute that drives conversion of a scf.parallel to gpu.launch
 // operation.
 // processor: the hardware id to map to.
 // map : An affine map that is used to pre-process hardware ids before
diff --git a/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h b/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h
--- a/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h
+++ b/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h
@@ -34,13 +34,13 @@
 class AffineLoopNestBuilder;
 class ParallelLoopNestBuilder;
 
-/// A LoopRangeBuilder is a generic NestedBuilder for loop.for operations.
+/// A LoopRangeBuilder is a generic NestedBuilder for scf.for operations.
 /// More specifically it is meant to be used as a temporary object for
 /// representing any nested MLIR construct that is "related to" an mlir::Value
 /// (for now an induction variable).
 class LoopRangeBuilder : public NestedBuilder {
 public:
-  /// Constructs a new loop.for and captures the associated induction
+  /// Constructs a new scf.for and captures the associated induction
   /// variable. A Value pointer is passed as the first argument and is the
   /// *only* way to capture the loop induction variable.
   LoopRangeBuilder(Value *iv, Value range);
@@ -58,9 +58,9 @@
   Value operator()(std::function<void(void)> fun = nullptr);
 };
 
-/// Helper class to sugar building loop.for loop nests from ranges.
+/// Helper class to sugar building scf.for loop nests from ranges.
 /// This is similar to edsc::AffineLoopNestBuilder except it works on ranges
-/// directly. In the current implementation it produces loop.for operations.
+/// directly. In the current implementation it produces scf.for operations.
 class LoopNestRangeBuilder {
 public:
   LoopNestRangeBuilder(MutableArrayRef<Value> ivs, ArrayRef<Value> ranges);
@@ -72,7 +72,7 @@
   SmallVector<LoopRangeBuilder, 4> loops;
 };
 
-/// Helper template class for building loop.for and affine.loop nests from
+/// Helper template class for building scf.for and affine.loop nests from
 /// ranges.
 template <typename LoopTy> class GenericLoopNestRangeBuilder {
 public:
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
@@ -71,7 +71,7 @@
 
     ```mlir
     %0 = linalg.dim %arg0, 0 : index
-    loop.for %i0 = %c0 to %0 step %c1 {
+    scf.for %i0 = %c0 to %0 step %c1 {
       %1 = load %arg0[%i0] : memref<?xf32, stride_specification>
       store %1, %arg1[%i0] : memref<?xf32, stride_specification>
     }
@@ -95,9 +95,9 @@
     %0 = linalg.dim %arg0, 0
     %1 = linalg.dim %arg0, 1
     %2 = linalg.dim %arg0, 2
-    loop.for %i0 = %c0 to %{{.*}} step %c1 {
-      loop.for %i1 = %c0 to %{{.*}} step %c1 {
-        loop.for %i2 = %c0 to %{{.*}} step %c1 {
+    scf.for %i0 = %c0 to %{{.*}} step %c1 {
+      scf.for %i1 = %c0 to %{{.*}} step %c1 {
+        scf.for %i2 = %c0 to %{{.*}} step %c1 {
           %3 = load %arg0[%i0, %i2, %i1] :
                   memref<?x?x?xf32, stride_specification>
           store %3, %arg1[%i2, %i1, %i0] :
@@ -628,9 +628,9 @@
 
     or IR resembling:
     ```mlir
-    loop.for %m = %c0 to %M step %c1 {
-      loop.for %n = %c0 to %N step %c1 {
-        loop.for %k = %c0 to %K step %c1 {
+    scf.for %m = %c0 to %M step %c1 {
+      scf.for %n = %c0 to %N step %c1 {
+        scf.for %k = %c0 to %K step %c1 {
           %a = load %A[%m, %k] : memref<?x?xf32, stride_specification>
           %b = load %B[%k, %n] : memref<?x?xf32, stride_specification>
           %c = load %C[%m, %n] : memref<?x?xf32, stride_specification>
@@ -752,9 +752,9 @@
     or IR resembling:
 
     ```mlir
-    loop.for %m = %c0 to %M step %c1 {
-      loop.for %n = %c0 to %N step %c1 {
-        loop.for %k = %c0 to %K step %c1 {
+    scf.for %m = %c0 to %M step %c1 {
+      scf.for %n = %c0 to %N step %c1 {
+        scf.for %k = %c0 to %K step %c1 {
           %a = load %A[%m, %k] : memref<?x?xf32, stride_specification>
           %b = load %B[%k, %n] : memref<?x?xf32, stride_specification>
           %c = load %C[%m, %n] : memref<?x?xf32, stride_specification>
diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h
--- a/mlir/include/mlir/Dialect/Linalg/Passes.h
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.h
@@ -37,11 +37,11 @@
 createLinalgPromotionPass(bool dynamicBuffers);
 std::unique_ptr<OperationPass<FuncOp>> createLinalgPromotionPass();
 
-/// Create a pass to convert Linalg operations to loop.for loops and
+/// Create a pass to convert Linalg operations to scf.for loops and
 /// std.load/std.store accesses.
 std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToLoopsPass();
 
-/// Create a pass to convert Linalg operations to loop.parallel loops and
+/// Create a pass to convert Linalg operations to scf.parallel loops and
 /// std.load/std.store accesses.
 std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToParallelLoopsPass();
 
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -121,11 +121,11 @@
 template <typename LoopTy, typename ConcreteOp>
 Optional<LinalgLoops> linalgLowerOpToLoops(OpBuilder &builder, Operation *op);
 
-/// Emits a loop nest of `loop.for` with the proper body for `op`.
+/// Emits a loop nest of `scf.for` with the proper body for `op`.
 template <typename ConcreteOp>
 LogicalResult linalgOpToLoops(OpBuilder &builder, Operation *op);
 
-/// Emits a loop nest of `loop.parallel` with the proper body for `op`.
+/// Emits a loop nest of `scf.parallel` with the proper body for `op`.
 template <typename ConcreteOp>
 LogicalResult linalgOpToParallelLoops(OpBuilder &builder, Operation *op);
 
@@ -362,8 +362,8 @@
 private:
   /// LinalgTransformMarker handles special attribute manipulations.
   LinalgMarker marker;
-  /// Controls whether the pattern lowers to library calls, loop.for, affine.for
-  /// or loop.parallel.
+  /// Controls whether the pattern lowers to library calls, scf.for, affine.for
+  /// or scf.parallel.
   LinalgLoweringType loweringType;
 };
 
diff --git a/mlir/include/mlir/Dialect/SCF/CMakeLists.txt b/mlir/include/mlir/Dialect/SCF/CMakeLists.txt
--- a/mlir/include/mlir/Dialect/SCF/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/SCF/CMakeLists.txt
@@ -1,4 +1,4 @@
-add_mlir_dialect(SCFOps loop Ops)
+add_mlir_dialect(SCFOps scf Ops)
 add_mlir_doc(SCFOps -gen-dialect-doc SCFDialect Dialects/)
 
 set(LLVM_TARGET_DEFINITIONS Passes.td)
diff --git a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
--- a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
+++ b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
@@ -41,7 +41,7 @@
   return makeLoopBuilder(iv, lb, ub, step, MutableArrayRef<Value>{}, {});
 }
 
-/// Helper class to sugar building loop.parallel loop nests from lower/upper
+/// Helper class to sugar building scf.parallel loop nests from lower/upper
 /// bounds and step sizes.
 class ParallelLoopNestBuilder {
 public:
@@ -54,9 +54,9 @@
   SmallVector<LoopBuilder, 4> loops;
 };
 
-/// Helper class to sugar building loop.for loop nests from ranges.
+/// Helper class to sugar building scf.for loop nests from ranges.
 /// This is similar to edsc::AffineLoopNestBuilder except it operates on
-/// loop.for.
+/// scf.for.
 class LoopNestBuilder {
 public:
   LoopNestBuilder(Value *iv, Value lb, Value ub, Value step);
diff --git a/mlir/include/mlir/Dialect/SCF/SCF.h b/mlir/include/mlir/Dialect/SCF/SCF.h
--- a/mlir/include/mlir/Dialect/SCF/SCF.h
+++ b/mlir/include/mlir/Dialect/SCF/SCF.h
@@ -29,8 +29,8 @@
 #define GET_OP_CLASSES
 #include "mlir/Dialect/SCF/SCFOps.h.inc"
 
-// Insert `loop.terminator` at the end of the only region's only block if it
-// does not have a terminator already.  If a new `loop.terminator` is inserted,
+// Insert `loop.yield` at the end of the only region's only block if it
+// does not have a terminator already.  If a new `loop.yield` is inserted,
 // the location is specified by `loc`. If the region is empty, insert a new
 // block first.
 void ensureLoopTerminator(Region &region, Builder &builder, Location loc);
diff --git a/mlir/include/mlir/Dialect/SCF/SCFOps.td b/mlir/include/mlir/Dialect/SCF/SCFOps.td
--- a/mlir/include/mlir/Dialect/SCF/SCFOps.td
+++ b/mlir/include/mlir/Dialect/SCF/SCFOps.td
@@ -18,7 +18,7 @@
 include "mlir/Interfaces/SideEffectInterfaces.td"
 
 def SCF_Dialect : Dialect {
-  let name = "loop";
+  let name = "scf";
   let cppNamespace = "";
 }
 
@@ -43,7 +43,7 @@
        RecursiveSideEffects]> {
   let summary = "for operation";
   let description = [{
-    The "loop.for" operation represents a loop taking 3 SSA value as operands
+    The "scf.for" operation represents a loop taking 3 SSA value as operands
     that represent the lower bound, upper bound and step respectively.  The
     operation defines an SSA value for its induction variable. It has one
     region capturing the loop body. The induction variable is represented as an
@@ -54,30 +54,30 @@
     the lower bound but does not include the upper bound.
 
     The body region must contain exactly one block that terminates with
-    "loop.yield". Calling ForOp::build will create such a region and insert
+    "scf.yield". Calling ForOp::build will create such a region and insert
     the terminator implicitly if none is defined, so will the parsing even in
     cases when it is absent from the custom format. For example:
 
     ```mlir
-    loop.for %iv = %lb to %ub step %step {
+    scf.for %iv = %lb to %ub step %step {
       ... // body
     }
     ```
 
-    `loop.for` can also operate on loop-carried variables and returns the final
+    `scf.for` can also operate on loop-carried variables and returns the final
     values after loop termination. The initial values of the variables are
-    passed as additional SSA operands to the "loop.for" following the 3 loop
+    passed as additional SSA operands to the "scf.for" following the 3 loop
     control SSA values mentioned above (lower bound, upper bound and step). The
     operation region has equivalent arguments for each variable representing
     the value of the variable at the current iteration.
 
-    The region must terminate with a "loop.yield" that passes all the current
-    iteration variables to the next iteration, or to the "loop.for" result, if
+    The region must terminate with a "scf.yield" that passes all the current
+    iteration variables to the next iteration, or to the "scf.for" result, if
     at the last iteration. Note, that when the loop-carried variables are
     present, calling ForOp::build will not insert the terminator implicitly.
-    The caller must insert "loop.yield" in that case.
+    The caller must insert "scf.yield" in that case.
 
-    "loop.for" results hold the final values after the last iteration.
+    "scf.for" results hold the final values after the last iteration.
     For example, to sum-reduce a memref:
 
     ```mlir
@@ -86,23 +86,23 @@
       // Initial sum set to 0.
       %sum_0 = constant 0.0 : f32
       // iter_args binds initial values to the loop's region arguments.
-      %sum = loop.for %iv = %lb to %ub step %step
+      %sum = scf.for %iv = %lb to %ub step %step
           iter_args(%sum_iter = %sum_0) -> (f32) {
         %t = load %buffer[%iv] : memref<1024xf32>
         %sum_next = addf %sum_iter, %t : f32
         // Yield current iteration sum to next iteration %sum_iter or to %sum
         // if final iteration.
-        loop.yield %sum_next : f32
+        scf.yield %sum_next : f32
       }
       return %sum : f32
     }
     ```
 
-    If the "loop.for" defines any values, a yield must be explicitly present.
-    The number and types of the "loop.for" results must match the initial
+    If the "scf.for" defines any values, a yield must be explicitly present.
+    The number and types of the "scf.for" results must match the initial
     values in the "iter_args" binding and the yield operands.
 
-    Another example with a nested "loop.if" (see "loop.if" for details) to
+    Another example with a nested "scf.if" (see "scf.if" for details) to
     perform conditional reduction:
 
     ```mlir
@@ -110,17 +110,17 @@
                              %ub: index, %step: index) -> (f32) {
       %sum_0 = constant 0.0 : f32
       %c0 = constant 0.0 : f32
-      %sum = loop.for %iv = %lb to %ub step %step
+      %sum = scf.for %iv = %lb to %ub step %step
           iter_args(%sum_iter = %sum_0) -> (f32) {
         %t = load %buffer[%iv] : memref<1024xf32>
         %cond = cmpf "ugt", %t, %c0 : f32
-        %sum_next = loop.if %cond -> (f32) {
+        %sum_next = scf.if %cond -> (f32) {
           %new_sum = addf %sum_iter, %t : f32
-          loop.yield %new_sum : f32
+          scf.yield %new_sum : f32
         } else {
-          loop.yield %sum_iter : f32
+          scf.yield %sum_iter : f32
         }
-        loop.yield %sum_next : f32
+        scf.yield %sum_next : f32
       }
       return %sum : f32
     }
@@ -181,45 +181,45 @@
        SingleBlockImplicitTerminator<"YieldOp">, RecursiveSideEffects]> {
   let summary = "if-then-else operation";
   let description = [{
-    The `loop.if` operation represents an if-then-else construct for
+    The `scf.if` operation represents an if-then-else construct for
     conditionally executing two regions of code. The operand to an if operation
     is a boolean value. For example:
 
     ```mlir
-    loop.if %b  {
+    scf.if %b  {
       ...
     } else {
       ...
     }
     ```
 
-    `loop.if` may also return results that are defined in its regions. The
+    `scf.if` may also return results that are defined in its regions. The
     values defined are determined by which execution path is taken.
 
     Example:
 
     ```mlir
-    %x, %y = loop.if %b -> (f32, f32) {
+    %x, %y = scf.if %b -> (f32, f32) {
       %x_true = ...
       %y_true = ...
-      loop.yield %x_true, %y_true : f32, f32
+      scf.yield %x_true, %y_true : f32, f32
     } else {
       %x_false = ...
       %y_false = ...
-      loop.yield %x_false, %y_false : f32, f32
+      scf.yield %x_false, %y_false : f32, f32
     }
     ```
 
-    `loop.if` regions are always terminated with "loop.yield". If "loop.if"
-    defines no values, the "loop.yield" can be left out, and will be inserted
+    `scf.if` regions are always terminated with "scf.yield". If "scf.if"
+    defines no values, the "scf.yield" can be left out, and will be inserted
     implicitly. Otherwise, it must be explicit.
-    Also, if "loop.if" defines one or more values, the 'else' block cannot be
+    Also, if "scf.if" defines one or more values, the 'else' block cannot be
     omitted.
 
     Example:
 
     ```mlir
-    loop.if %b  {
+    scf.if %b  {
       ...
     }
     ```
@@ -257,7 +257,7 @@
      SingleBlockImplicitTerminator<"YieldOp">]> {
   let summary = "parallel for operation";
   let description = [{
-    The "loop.parallel" operation represents a loop nest taking 4 groups of SSA
+    The "scf.parallel" operation represents a loop nest taking 4 groups of SSA
     values as operands that represent the lower bounds, upper bounds, steps and
     initial values, respectively. The operation defines a variadic number of
     SSA values for its induction variables. It has one region capturing the
@@ -266,7 +266,7 @@
     machine word. The steps are values of type index, required to be positive.
     The lower and upper bounds specify a half-open range: the range includes
     the lower bound but does not include the upper bound. The initial values
-    have the same types as results of "loop.parallel". If there are no results,
+    have the same types as results of "scf.parallel". If there are no results,
     the keyword `init` can be omitted.
 
     Semantically we require that the iteration space can be iterated in any
@@ -275,27 +275,27 @@
 
     The parallel loop operation supports reduction of values produced by
     individual iterations into a single result. This is modeled using the
-    loop.reduce operation (see loop.reduce for details). Each result of a
-    loop.parallel operation is associated with an initial value operand and
+    scf.reduce operation (see scf.reduce for details). Each result of a
+    scf.parallel operation is associated with an initial value operand and
     reduce operation that is an immediate child. Reductions are matched to
     result and initial values in order of their appearance in the body.
     Consequently, we require that the body region has the same number of
     results and initial values as it has reduce operations.
 
     The body region must contain exactly one block that terminates with
-    "loop.yield" without operands. Parsing ParallelOp will create such a region
+    "scf.yield" without operands. Parsing ParallelOp will create such a region
     and insert the terminator when it is absent from the custom format.
 
     Example:
 
     ```mlir
     %init = constant 0.0 : f32
-    loop.parallel (%iv) = (%lb) to (%ub) step (%step) init (%init) -> f32 {
+    scf.parallel (%iv) = (%lb) to (%ub) step (%step) init (%init) -> f32 {
       %elem_to_reduce = load %buffer[%iv] : memref<100xf32>
-      loop.reduce(%elem_to_reduce) : f32 {
+      scf.reduce(%elem_to_reduce) : f32 {
         ^bb0(%lhs : f32, %rhs: f32):
           %res = addf %lhs, %rhs : f32
-          loop.reduce.return %res : f32
+          scf.reduce.return %res : f32
       }
     }
     ```
@@ -327,17 +327,17 @@
 def ReduceOp : SCF_Op<"reduce", [HasParent<"ParallelOp">]> {
   let summary = "reduce operation for parallel for";
   let description = [{
-    "loop.reduce" is an operation occurring inside "loop.parallel" operations.
+    "scf.reduce" is an operation occurring inside "scf.parallel" operations.
     It consists of one block with two arguments which have the same type as the
-    operand of "loop.reduce".
+    operand of "scf.reduce".
 
-    "loop.reduce" is used to model the value for reduction computations of a
-    "loop.parallel" operation. It has to appear as an immediate child of a
-    "loop.parallel" and is associated with a result value of its parent
+    "scf.reduce" is used to model the value for reduction computations of a
+    "scf.parallel" operation. It has to appear as an immediate child of a
+    "scf.parallel" and is associated with a result value of its parent
     operation.
 
     Association is in the order of appearance in the body where the first
-    result of a parallel loop operation corresponds to the first "loop.reduce"
+    result of a parallel loop operation corresponds to the first "scf.reduce"
     in the operation's body region. The reduce operation takes a single
     operand, which is the value to be used in the reduction.
 
@@ -353,10 +353,10 @@
 
     ```mlir
     %operand = constant 1.0 : f32
-    loop.reduce(%operand) : f32 {
+    scf.reduce(%operand) : f32 {
       ^bb0(%lhs : f32, %rhs: f32):
         %res = addf %lhs, %rhs : f32
-        loop.reduce.return %res : f32
+        scf.reduce.return %res : f32
     }
     ```
   }];
@@ -376,12 +376,12 @@
                               Terminator]> {
   let summary = "terminator for reduce operation";
   let description = [{
-    "loop.reduce.return" is a special terminator operation for the block inside
-    "loop.reduce". It terminates the region. It should have the same type as
-    the operand of "loop.reduce". Example for the custom format:
+    "scf.reduce.return" is a special terminator operation for the block inside
+    "scf.reduce". It terminates the region. It should have the same type as
+    the operand of "scf.reduce". Example for the custom format:
 
     ```mlir
-    loop.reduce.return %res : f32
+    scf.reduce.return %res : f32
     ```
   }];
 
@@ -392,12 +392,12 @@
 def YieldOp : SCF_Op<"yield", [NoSideEffect, ReturnLike, Terminator]> {
   let summary = "loop yield and termination operation";
   let description = [{
-    "loop.yield" yields an SSA value from a loop dialect op region and
+    "scf.yield" yields an SSA value from the SCF dialect op region and
     terminates the regions. The semantics of how the values are yielded is
     defined by the parent operation.
-    If "loop.yield" has any operands, the operands must match the parent
+    If "scf.yield" has any operands, the operands must match the parent
     operation's results.
-    If the parent operation defines no values, then the "loop.yield" may be
+    If the parent operation defines no values, then the "scf.yield" may be
     left out in the custom syntax and the builders will insert one implicitly.
     Otherwise, it has to be present in the syntax to indicate which values are
     yielded.
diff --git a/mlir/include/mlir/Dialect/SCF/Transforms.h b/mlir/include/mlir/Dialect/SCF/Transforms.h
--- a/mlir/include/mlir/Dialect/SCF/Transforms.h
+++ b/mlir/include/mlir/Dialect/SCF/Transforms.h
@@ -23,20 +23,20 @@
 
 class ParallelOp;
 
-/// Fuses all adjacent loop.parallel operations with identical bounds and step
-/// into one loop.parallel operations. Uses a naive aliasing and dependency
+/// Fuses all adjacent scf.parallel operations with identical bounds and step
+/// into one scf.parallel operations. Uses a naive aliasing and dependency
 /// analysis.
 void naivelyFuseParallelOps(Region &region);
 
 /// Tile a parallel loop of the form
-///   loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+///   scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
 ///                                             step (%arg4, %arg5)
 ///
 /// into
-///   loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+///   scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
 ///                                             step (%arg4*tileSize[0],
 ///                                                   %arg5*tileSize[1])
-///     loop.parallel (%j0, %j1) = (0, 0) to (min(tileSize[0], %arg2-%j0)
+///     scf.parallel (%j0, %j1) = (0, 0) to (min(tileSize[0], %arg2-%j0)
 ///                                           min(tileSize[1], %arg3-%j1))
 ///                                        step (%arg4, %arg5)
 /// The old loop is replaced with the new one.
diff --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Transforms/LoopUtils.h
--- a/mlir/include/mlir/Transforms/LoopUtils.h
+++ b/mlir/include/mlir/Transforms/LoopUtils.h
@@ -251,7 +251,7 @@
 /// numProcessors = [gridDim.x, blockDim.x], the loop:
 ///
 /// ```
-///    loop.for %i = %lb to %ub step %step {
+///    scf.for %i = %lb to %ub step %step {
 ///      ...
 ///    }
 /// ```
@@ -259,7 +259,7 @@
 /// is rewritten into a version resembling the following pseudo-IR:
 ///
 /// ```
-///    loop.for %i = %lb + %step * (threadIdx.x + blockIdx.x * blockDim.x)
+///    scf.for %i = %lb + %step * (threadIdx.x + blockIdx.x * blockDim.x)
 ///       to %ub step %gridDim.x * blockDim.x * %step {
 ///      ...
 ///    }
diff --git a/mlir/lib/Conversion/LoopToStandard/LoopToStandard.cpp b/mlir/lib/Conversion/LoopToStandard/LoopToStandard.cpp
--- a/mlir/lib/Conversion/LoopToStandard/LoopToStandard.cpp
+++ b/mlir/lib/Conversion/LoopToStandard/LoopToStandard.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements a pass to convert loop.for, loop.if and loop.terminator
+// This file implements a pass to convert scf.for, scf.if and loop.terminator
 // ops into standard CFG ops.
 //
 //===----------------------------------------------------------------------===//
@@ -41,7 +41,7 @@
 // first/last blocks in the parent region.  The original loop operation is
 // replaced by the initialization operations that set up the initial value of
 // the loop induction variable (%iv) and computes the loop bounds that are loop-
-// invariant for affine loops.  The operations following the original loop.for
+// invariant for affine loops.  The operations following the original scf.for
 // are split out into a separate continuation (exit) block. A condition block is
 // created before the continuation block. It checks the exit condition of the
 // loop and branches either to the continuation block, or to the first block of
@@ -102,27 +102,27 @@
                                 PatternRewriter &rewriter) const override;
 };
 
-// Create a CFG subgraph for the loop.if operation (including its "then" and
+// Create a CFG subgraph for the scf.if operation (including its "then" and
 // optional "else" operation blocks).  We maintain the invariants that the
 // subgraph has a single entry and a single exit point, and that the entry/exit
 // blocks are respectively the first/last block of the enclosing region. The
-// operations following the loop.if are split into a continuation (subgraph
+// operations following the scf.if are split into a continuation (subgraph
 // exit) block. The condition is lowered to a chain of blocks that implement the
-// short-circuit scheme. The "loop.if" operation is replaced with a conditional
+// short-circuit scheme. The "scf.if" operation is replaced with a conditional
 // branch to either the first block of the "then" region, or to the first block
-// of the "else" region. In these blocks, "loop.yield" is unconditional branches
-// to the post-dominating block. When the "loop.if" does not return values, the
+// of the "else" region. In these blocks, "scf.yield" is unconditional branches
+// to the post-dominating block. When the "scf.if" does not return values, the
 // post-dominating block is the same as the continuation block. When it returns
 // values, the post-dominating block is a new block with arguments that
-// correspond to the values returned by the "loop.if" that unconditionally
+// correspond to the values returned by the "scf.if" that unconditionally
 // branches to the continuation block. This allows block arguments to dominate
-// any uses of the hitherto "loop.if" results that they replaced. (Inserting a
+// any uses of the hitherto "scf.if" results that they replaced. (Inserting a
 // new block allows us to avoid modifying the argument list of an existing
 // block, which is illegal in a conversion pattern). When the "else" region is
-// empty, which is only allowed for "loop.if"s that don't return values, the
+// empty, which is only allowed for "scf.if"s that don't return values, the
 // condition branches directly to the continuation block.
 //
-// CFG for a loop.if with else and without results.
+// CFG for a scf.if with else and without results.
 //
 //      +--------------------------------+
 //      | <code before the IfOp>         |
@@ -152,7 +152,7 @@
 //      |   <code after the IfOp>        |
 //      +--------------------------------+
 //
-// CFG for a loop.if with results.
+// CFG for a scf.if with results.
 //
 //      +--------------------------------+
 //      | <code before the IfOp>         |
@@ -207,7 +207,7 @@
                                            PatternRewriter &rewriter) const {
   Location loc = forOp.getLoc();
 
-  // Start by splitting the block containing the 'loop.for' into two parts.
+  // Start by splitting the block containing the 'scf.for' into two parts.
   // The part before will get the init code, the part after will be the end
   // point.
   auto *initBlock = rewriter.getInsertionBlock();
@@ -273,7 +273,7 @@
                                           PatternRewriter &rewriter) const {
   auto loc = ifOp.getLoc();
 
-  // Start by splitting the block containing the 'loop.if' into two parts.
+  // Start by splitting the block containing the 'scf.if' into two parts.
   // The part before will contain the condition, the part after will be the
   // continuation point.
   auto *condBlock = rewriter.getInsertionBlock();
@@ -288,7 +288,7 @@
     rewriter.create<BranchOp>(loc, remainingOpsBlock);
   }
 
-  // Move blocks from the "then" region to the region containing 'loop.if',
+  // Move blocks from the "then" region to the region containing 'scf.if',
   // place it before the continuation block, and branch to it.
   auto &thenRegion = ifOp.thenRegion();
   auto *thenBlock = &thenRegion.front();
@@ -300,7 +300,7 @@
   rewriter.inlineRegionBefore(thenRegion, continueBlock);
 
   // Move blocks from the "else" region (if present) to the region containing
-  // 'loop.if', place it before the continuation block and branch to it.  It
+  // 'scf.if', place it before the continuation block and branch to it.  It
   // will be placed after the "then" regions.
   auto *elseBlock = continueBlock;
   auto &elseRegion = ifOp.elseRegion();
@@ -331,7 +331,7 @@
   BlockAndValueMapping mapping;
 
   // For a parallel loop, we essentially need to create an n-dimensional loop
-  // nest. We do this by translating to loop.for ops and have those lowered in
+  // nest. We do this by translating to scf.for ops and have those lowered in
   // a further rewrite. If a parallel loop contains reductions (and thus returns
   // values), forward the initial values for the reductions down the loop
   // hierarchy and bubble up the results by modifying the "yield" terminator.
@@ -375,10 +375,10 @@
     }
 
     // Clone the body of the reduction operation into the body of the loop,
-    // using operands of "loop.reduce" and iteration arguments corresponding
+    // using operands of "scf.reduce" and iteration arguments corresponding
     // to the reduction value to replace arguments of the reduction block.
-    // Collect operands of "loop.reduce.return" to be returned by a final
-    // "loop.yield" instead.
+    // Collect operands of "scf.reduce.return" to be returned by a final
+    // "scf.yield" instead.
     Value arg = iterArgs[yieldOperands.size()];
     Block &reduceBlock = reduce.reductionOperator().front();
     mapping.map(reduceBlock.getArgument(0), mapping.lookupOrDefault(arg));
diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
--- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
+++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
@@ -563,7 +563,7 @@
 }
 
 /// Modifies the current transformation state to capture the effect of the given
-/// `loop.parallel` operation on index substitutions and the operations to be
+/// `scf.parallel` operation on index substitutions and the operations to be
 /// inserted.
 /// Specifically, if a dimension of a parallel loop is mapped to a hardware id,
 /// this function will
@@ -734,11 +734,11 @@
   return success();
 }
 
-/// Lower a `loop.parallel` operation into a corresponding `gpu.launch`
+/// Lower a `scf.parallel` operation into a corresponding `gpu.launch`
 /// operation.
 ///
 /// This essentially transforms a loop nest into a corresponding SIMT function.
-/// The conversion is driven by mapping annotations on the `loop.parallel`
+/// The conversion is driven by mapping annotations on the `scf.parallel`
 /// operations. The mapping is provided via a `DictionaryAttribute` named
 /// `mapping`, which has three entries:
 ///  - processor: the hardware id to map to. 0-2 are block dimensions, 3-5 are
@@ -747,9 +747,9 @@
 ///          substitution.
 ///  - bound : An affine map that is used to compute the bound of the hardware
 ///            id based on an upper bound of the number of iterations.
-/// If the `loop.parallel` contains nested `loop.parallel` operations, those
+/// If the `scf.parallel` contains nested `scf.parallel` operations, those
 /// need to be annotated, as well. Structurally, the transformation works by
-/// splicing all operations from nested `loop.parallel` operations into a single
+/// splicing all operations from nested `scf.parallel` operations into a single
 /// sequence. Indices mapped to hardware ids are substituted with those ids,
 /// wheras sequential mappings result in a sequential for-loop. To have more
 /// flexibility when mapping code to hardware ids, the transform supports two
@@ -791,7 +791,7 @@
   while (!worklist.empty()) {
     Operation *op = worklist.pop_back_val();
     // Now walk over the body and clone it.
-    // TODO: This is only correct if there either is no further loop.parallel
+    // TODO: This is only correct if there either is no further scf.parallel
     //       nested or this code is side-effect free. Otherwise we might need
     //       predication. We are overly conservative for now and only allow
     //       side-effects in the innermost scope.
@@ -800,7 +800,7 @@
       // sideeffects until now.
       if (seenSideeffects)
         return failure();
-      // A nested loop.parallel needs insertion of code to compute indices.
+      // A nested scf.parallel needs insertion of code to compute indices.
       // Insert that now. This will also update the worklist with the loops
       // body.
       if (failed(processParallelLoop(nestedParallel, launchOp, cloningMap,
diff --git a/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp b/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp
--- a/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp
+++ b/mlir/lib/Conversion/VectorToLoops/ConvertVectorToLoops.cpp
@@ -350,9 +350,9 @@
 ///    // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into
 ///    // vector<32x256xf32> and pad with %f0 to handle the boundary case:
 ///    %f0 = constant 0.0f : f32
-///    loop.for %i0 = 0 to %0 {
-///      loop.for %i1 = 0 to %1 step %c256 {
-///        loop.for %i2 = 0 to %2 step %c32 {
+///    scf.for %i0 = 0 to %0 {
+///      scf.for %i1 = 0 to %1 step %c256 {
+///        scf.for %i2 = 0 to %2 step %c32 {
 ///          %v = vector.transfer_read %A[%i0, %i1, %i2], %f0
 ///               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
 ///               memref<?x?x?xf32>, vector<32x256xf32>
@@ -364,8 +364,8 @@
 /// abstraction):
 ///
 /// ```mlir
-///    loop.for %d2 = 0 to %c256 {
-///      loop.for %d1 = 0 to %c32 {
+///    scf.for %d2 = 0 to %c256 {
+///      scf.for %d1 = 0 to %c32 {
 ///        %s = %A[%i0, %i1 + %d1, %i2 + %d2] : f32
 ///        %tmp[%d2, %d1] = %s
 ///      }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
@@ -134,9 +134,9 @@
 /// An example output may resemble:
 ///
 /// ```
-///    loop.for %i = %c0 to %0 step %c1 {
-///      loop.for %j = %c0 to %1 step %c1 {
-///        loop.for %k = %c0 to %4 step %c1 {
+///    scf.for %i = %c0 to %0 step %c1 {
+///      scf.for %j = %c0 to %1 step %c1 {
+///        scf.for %k = %c0 to %4 step %c1 {
 ///          %11 = load %arg0[%i, %j] :
 ///            memref<?x?xf32, stride_specification>
 ///          %12 = load %arg1[%i, %j, %k] :
@@ -419,9 +419,9 @@
 /// An example output may resemble:
 ///
 /// ```
-///    loop.for %i = %c0 to %0 step %c1 {
-///      loop.for %j = %c0 to %1 step %c1 {
-///        loop.for %k = %c0 to %4 step %c1 {
+///    scf.for %i = %c0 to %0 step %c1 {
+///      scf.for %j = %c0 to %1 step %c1 {
+///        scf.for %k = %c0 to %4 step %c1 {
 ///          %11 = load %arg0[%i, %j] :
 ///            memref<?x?xf32, stride_specification>
 ///          %12 = load %arg1[%i, %j, %k] :
@@ -509,8 +509,8 @@
   }
 };
 
-/// Generates loop nest using loop.parallel. loop.parallel is only used for the
-/// outer parallel loops. All other loops are generated using loop.for
+/// Generates loop nest using scf.parallel. scf.parallel is only used for the
+/// outer parallel loops. All other loops are generated using scf.for
 /// operation.
 template <typename ConcreteOpTy>
 class GenerateLoopNest<scf::ParallelOp, ConcreteOpTy> {
@@ -519,9 +519,9 @@
 
   static void doit(ConcreteOpTy linalgOp, ArrayRef<Value> loopRanges,
                    MutableArrayRef<Value> allIvs) {
-    // Only generate loop.parallel for outer consecutive "parallel"
+    // Only generate scf.parallel for outer consecutive "parallel"
     // iterator_types.
-    // TODO(ravishankarm): Generate loop.parallel for all "parallel" iterator
+    // TODO(ravishankarm): Generate scf.parallel for all "parallel" iterator
     // types, not just the outer most ones. Also handle "reduction" iterator
     // types.
     auto nOuterPar = linalgOp.iterator_types()
@@ -532,7 +532,7 @@
                          })
                          .size();
     // If there are no outer parallel loops, then number of loop ops is same as
-    // the number of loops, and they are all loop.for ops.
+    // the number of loops, and they are all scf.for ops.
     if (nOuterPar) {
       GenericLoopNestRangeBuilder<scf::ParallelOp>(
           allIvs.take_front(nOuterPar), loopRanges.take_front(nOuterPar))([&] {
@@ -545,7 +545,7 @@
         });
       });
     } else {
-      // If there are no parallel loops then fallback to generating all loop.for
+      // If there are no parallel loops then fallback to generating all scf.for
       // operations.
       GenericLoopNestRangeBuilder<scf::ForOp>(allIvs, loopRanges)([&] {
         SmallVector<Value, 4> allIvValues(allIvs.begin(), allIvs.end());
@@ -595,7 +595,7 @@
   assert(loopRanges.size() == allIvs.size());
   Impl::doit(linalgOp, loopRanges, allIvs);
   // Number of loop ops might be different from the number of ivs since some
-  // loops like affine.parallel and loop.parallel have multiple ivs.
+  // loops like affine.parallel and scf.parallel have multiple ivs.
   llvm::SetVector<Operation *> loopSet;
   for (Value iv : allIvs) {
     if (!iv)
@@ -747,7 +747,7 @@
   return linalgOpToLoopsImpl<LoopTy, ConcreteOp>(op, builder);
 }
 
-/// Emits a loop nest of `loop.for` with the proper body for `op`.
+/// Emits a loop nest of `scf.for` with the proper body for `op`.
 template <typename ConcreteOp>
 LogicalResult mlir::linalg::linalgOpToLoops(OpBuilder &builder, Operation *op) {
   Optional<LinalgLoops> loops =
@@ -764,7 +764,7 @@
   return loops ? success() : failure();
 }
 
-/// Emits a loop nest of `loop.parallel` with the proper body for `op`.
+/// Emits a loop nest of `scf.parallel` with the proper body for `op`.
 template <typename ConcreteOp>
 LogicalResult mlir::linalg::linalgOpToParallelLoops(OpBuilder &builder,
                                                     Operation *op) {
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -144,8 +144,8 @@
 // %c10 = constant 10 : index
 // operand_dim_0 = dim %operand, 0 : memref<50x100xf32>
 // operand_dim_1 = dim %operand, 1 : memref<50x100xf32>
-// loop.for %k = %c0 to operand_dim_0 step %c10 {
-//   loop.for %l = %c0 to operand_dim_1 step %c25 {
+// scf.for %k = %c0 to operand_dim_0 step %c10 {
+//   scf.for %l = %c0 to operand_dim_1 step %c25 {
 //     %4 = std.subview %operand[%k, %l][%c10, %c25][%c1, %c1]
 //       : memref<50x100xf32> to memref<?x?xf32, #strided>
 //     %5 = std.subview %result[%k, %l][%c10, %c25][%c1, %c1]
diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp
--- a/mlir/lib/Dialect/SCF/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/SCF.cpp
@@ -582,7 +582,7 @@
   // Check that the block is terminated by a ReduceReturnOp.
   if (!isa<ReduceReturnOp>(block.getTerminator()))
     return op.emitOpError("the block inside reduce should be terminated with a "
-                          "'loop.reduce.return' op");
+                          "'scf.reduce.return' op");
 
   return success();
 }
@@ -649,7 +649,7 @@
   } else if (isa<ParallelOp>(parentOp)) {
     if (op.getNumOperands() != 0)
       return op.emitOpError()
-             << "yield inside loop.parallel is not allowed to have operands";
+             << "yield inside scf.parallel is not allowed to have operands";
   } else {
     return op.emitOpError()
            << "yield only terminates If, For or Parallel regions";
diff --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopSpecialization.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopSpecialization.cpp
--- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopSpecialization.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopSpecialization.cpp
@@ -1,4 +1,4 @@
-//===- ParallelLoopSpecialization.cpp - loop.parallel specialization ------===//
+//===- ParallelLoopSpecialization.cpp - scf.parallel specialization ------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp
--- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp
@@ -1,4 +1,4 @@
-//===- ParallelLoopTiling.cpp - Tiles loop.parallel ---------------===//
+//===- ParallelLoopTiling.cpp - Tiles scf.parallel ---------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -23,14 +23,14 @@
 using namespace mlir::scf;
 
 /// Tile a parallel loop of the form
-///   loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+///   scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
 ///                                             step (%arg4, %arg5)
 ///
 /// into
-///   loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+///   scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
 ///                                             step (%arg4*tileSize[0],
 ///                                                   %arg5*tileSize[1])
-///     loop.parallel (%j0, %j1) = (0, 0) to (min(tileSize[0], %arg2-%j0)
+///     scf.parallel (%j0, %j1) = (0, 0) to (min(tileSize[0], %arg2-%j0)
 ///                                           min(tileSize[1], %arg3-%j1))
 ///                                        step (%arg4, %arg5)
 /// The old loop is replaced with the new one.
diff --git a/mlir/test/Analysis/test-dominance.mlir b/mlir/test/Analysis/test-dominance.mlir
--- a/mlir/test/Analysis/test-dominance.mlir
+++ b/mlir/test/Analysis/test-dominance.mlir
@@ -91,7 +91,7 @@
 
 // CHECK-LABEL: Testing : nested_region
 func @nested_region(%arg0 : index, %arg1 : index, %arg2 : index) {
-  loop.for %arg3 = %arg0 to %arg1 step %arg2 { }
+  scf.for %arg3 = %arg0 to %arg1 step %arg2 { }
   return
 }
 
@@ -110,9 +110,9 @@
 
 // CHECK-LABEL: Testing : nested_region2
 func @nested_region2(%arg0 : index, %arg1 : index, %arg2 : index) {
-  loop.for %arg3 = %arg0 to %arg1 step %arg2 {
-    loop.for %arg4 = %arg0 to %arg1 step %arg2 {
-      loop.for %arg5 = %arg0 to %arg1 step %arg2 { }
+  scf.for %arg3 = %arg0 to %arg1 step %arg2 {
+    scf.for %arg4 = %arg0 to %arg1 step %arg2 {
+      scf.for %arg5 = %arg0 to %arg1 step %arg2 { }
     }
   }
   return
@@ -160,8 +160,8 @@
 ^loopBody:
   %const0 = constant 1 : i32
   %inc = addi %counter, %const0 : i32
-  loop.for %arg5 = %arg2 to %arg3 step %arg4 {
-    loop.for %arg6 = %arg2 to %arg3 step %arg4 { }
+  scf.for %arg5 = %arg2 to %arg3 step %arg4 {
+    scf.for %arg6 = %arg2 to %arg3 step %arg4 { }
   }
   br ^loopHeader(%inc : i32)
 ^exit:
diff --git a/mlir/test/Analysis/test-liveness.mlir b/mlir/test/Analysis/test-liveness.mlir
--- a/mlir/test/Analysis/test-liveness.mlir
+++ b/mlir/test/Analysis/test-liveness.mlir
@@ -205,17 +205,17 @@
   // CHECK-NEXT: val_7
   // CHECK-NEXT:    %0 = addi
   // CHECK-NEXT:    %1 = addi
-  // CHECK-NEXT:    loop.for
+  // CHECK-NEXT:    scf.for
   // CHECK:         // %2 = addi
   // CHECK-NEXT:    %3 = addi
   // CHECK-NEXT: val_8
   // CHECK-NEXT:    %1 = addi
-  // CHECK-NEXT:    loop.for
+  // CHECK-NEXT:    scf.for
   // CHECK:         // return %1
   // CHECK: EndLiveness
   %0 = addi %arg3, %arg4 : i32
   %1 = addi %arg4, %arg5 : i32
-  loop.for %arg6 = %arg0 to %arg1 step %arg2 {
+  scf.for %arg6 = %arg0 to %arg1 step %arg2 {
     // CHECK: Block: 1
     // CHECK-NEXT: LiveIn: arg5@0 arg6@0 val_7
     // CHECK-NEXT: LiveOut:{{ *$}}
@@ -238,13 +238,13 @@
   // CHECK-NEXT: val_7
   // CHECK-NEXT:    %0 = addi
   // CHECK-NEXT:    %1 = addi
-  // CHECK-NEXT:    loop.for
+  // CHECK-NEXT:    scf.for
   // CHECK:         // %2 = addi
-  // CHECK-NEXT:    loop.for
+  // CHECK-NEXT:    scf.for
   // CHECK:         // %3 = addi
   // CHECK-NEXT: val_8
   // CHECK-NEXT:    %1 = addi
-  // CHECK-NEXT:    loop.for
+  // CHECK-NEXT:    scf.for
   // CHECK:         // return %1
   // CHECK: EndLiveness
   %arg0 : index, %arg1 : index, %arg2 : index,
@@ -252,18 +252,18 @@
   %buffer : memref<i32>) -> i32 {
   %0 = addi %arg3, %arg4 : i32
   %1 = addi %arg4, %arg5 : i32
-  loop.for %arg6 = %arg0 to %arg1 step %arg2 {
+  scf.for %arg6 = %arg0 to %arg1 step %arg2 {
     // CHECK: Block: 1
     // CHECK-NEXT: LiveIn: arg0@0 arg1@0 arg2@0 arg5@0 arg6@0 val_7
     // CHECK-NEXT: LiveOut:{{ *$}}
     // CHECK-NEXT: BeginLiveness
     // CHECK-NEXT: val_10
     // CHECK-NEXT:    %2 = addi
-    // CHECK-NEXT:    loop.for
+    // CHECK-NEXT:    scf.for
     // CHECK:         // %3 = addi
     // CHECK: EndLiveness
     %2 = addi %0, %arg5 : i32
-    loop.for %arg7 = %arg0 to %arg1 step %arg2 {
+    scf.for %arg7 = %arg0 to %arg1 step %arg2 {
       %3 = addi %2, %0 : i32
       store %3, %buffer[] : memref<i32>
     }
@@ -283,10 +283,10 @@
   // CHECK-NEXT: val_7
   // CHECK-NEXT:    %0 = addi
   // CHECK-NEXT:    %1 = addi
-  // CHECK-NEXT:    loop.for
+  // CHECK-NEXT:    scf.for
   // CHECK:         // br ^bb1
   // CHECK-NEXT:    %2 = addi
-  // CHECK-NEXT:    loop.for
+  // CHECK-NEXT:    scf.for
   // CHECK:         // %2 = addi
   // CHECK: EndLiveness
   %arg0 : index, %arg1 : index, %arg2 : index,
@@ -294,7 +294,7 @@
   %buffer : memref<i32>) -> i32 {
   %0 = addi %arg3, %arg4 : i32
   %1 = addi %arg4, %arg5 : i32
-  loop.for %arg6 = %arg0 to %arg1 step %arg2 {
+  scf.for %arg6 = %arg0 to %arg1 step %arg2 {
     // CHECK: Block: 1
     // CHECK-NEXT: LiveIn: arg5@0 arg6@0 val_7
     // CHECK-NEXT: LiveOut:{{ *$}}
@@ -307,7 +307,7 @@
   // CHECK: Block: 2
   // CHECK-NEXT: LiveIn: arg0@0 arg1@0 arg2@0 arg6@0 val_7 val_8
   // CHECK-NEXT: LiveOut:{{ *$}}
-  loop.for %arg7 = %arg0 to %arg1 step %arg2 {
+  scf.for %arg7 = %arg0 to %arg1 step %arg2 {
     // CHECK: Block: 3
     // CHECK-NEXT: LiveIn: arg6@0 val_7 val_8
     // CHECK-NEXT: LiveOut:{{ *$}}
diff --git a/mlir/test/Conversion/GPUToSPIRV/if.mlir b/mlir/test/Conversion/GPUToSPIRV/if.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/if.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/if.mlir
@@ -29,7 +29,7 @@
       // CHECK-NEXT:  }
       // CHECK-NEXT:  spv.Return
 
-      loop.if %arg3 {
+      scf.if %arg3 {
         store %value, %arg2[%i] : memref<10xf32>
       }
       gpu.return
@@ -70,8 +70,8 @@
       // CHECK-NEXT:  }
       // CHECK-NEXT:  spv.Return
 
-      loop.if %arg5 {
-        loop.if %arg6 {
+      scf.if %arg5 {
+        scf.if %arg6 {
           %value = load %arg3[%i] : memref<10xf32>
           store %value, %arg4[%i] : memref<10xf32>
         } else {
@@ -79,7 +79,7 @@
           store %value, %arg3[%i] : memref<10xf32>
         }
       } else {
-        loop.if %arg6 {
+        scf.if %arg6 {
           %value = load %arg3[%j] : memref<10xf32>
           store %value, %arg4[%j] : memref<10xf32>
         } else {
diff --git a/mlir/test/Conversion/GPUToSPIRV/loop.mlir b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/loop.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
@@ -39,7 +39,7 @@
       // CHECK:      [[MERGE]]
       // CHECK:        spv._merge
       // CHECK:      }
-      loop.for %arg4 = %lb to %ub step %step {
+      scf.for %arg4 = %lb to %ub step %step {
         %1 = load %arg2[%arg4] : memref<10xf32>
         store %1, %arg3[%arg4] : memref<10xf32>
       }
diff --git a/mlir/test/Conversion/LoopsToGPU/imperfect_2D.mlir b/mlir/test/Conversion/LoopsToGPU/imperfect_2D.mlir
--- a/mlir/test/Conversion/LoopsToGPU/imperfect_2D.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/imperfect_2D.mlir
@@ -12,15 +12,15 @@
     // CHECK: [[TEMP1:%.*]] = muli [[ARG3]], [[ARG6]] : index
     // CHECK: [[BLOCKLOOPYLB:%.*]] = addi {{%.*}}, [[TEMP1]] : index
     // CHECK: [[BLOCKLOOPYSTEP:%.*]] = muli [[ARG3]], [[ARG12]] : index
-    // CHECK: loop.for [[BLOCKLOOPYIV:%.*]] = [[BLOCKLOOPYLB]] to {{%.*}} step [[BLOCKLOOPYSTEP]]
-    loop.for %iv1 = %c0 to %0 step %arg3 {
+    // CHECK: scf.for [[BLOCKLOOPYIV:%.*]] = [[BLOCKLOOPYLB]] to {{%.*}} step [[BLOCKLOOPYSTEP]]
+    scf.for %iv1 = %c0 to %0 step %arg3 {
 
       // CHECK: [[TEMP2:%.*]] = muli [[ARG4]], [[ARG5]] : index
       // CHECK: [[BLOCKLOOPXLB:%.*]] = addi  {{%.*}}, [[TEMP2]] : index
       // CHECK: [[BLOCKLOOPXSTEP:%.*]] = muli [[ARG4]], [[ARG11]] : index
-      // CHECK: loop.for [[BLOCKLOOPXIV:%.*]] = [[BLOCKLOOPXLB]] to {{%.*}} step [[BLOCKLOOPXSTEP]]
+      // CHECK: scf.for [[BLOCKLOOPXIV:%.*]] = [[BLOCKLOOPXLB]] to {{%.*}} step [[BLOCKLOOPXSTEP]]
 
-      loop.for %iv2 = %c0 to %1 step %arg4 {
+      scf.for %iv2 = %c0 to %1 step %arg4 {
 
         // TODO: This is effectively shared memory. Lower it to a
         // shared memory.
@@ -30,13 +30,13 @@
         // CHECK: [[TEMP3:%.*]] = muli [[ARG20:%.*]], [[ARG9:%.*]] : index
         // CHECK: [[THREADLOOP1YLB:%.*]] = addi {{%.*}}, [[TEMP3]] : index
         // CHECK: [[THREADLOOP1YSTEP:%.*]] = muli [[ARG20]], [[ARG15]] : index
-        // CHECK: loop.for [[THREADLOOP1YIV:%.*]] = [[THREADLOOP1YLB]] to {{%.*}} step [[THREADLOOP1YSTEP]]
-        loop.for %iv3 = %c0 to %arg3 step %c1 {
+        // CHECK: scf.for [[THREADLOOP1YIV:%.*]] = [[THREADLOOP1YLB]] to {{%.*}} step [[THREADLOOP1YSTEP]]
+        scf.for %iv3 = %c0 to %arg3 step %c1 {
           // CHECK: [[TEMP4:%.*]] = muli [[ARG20]], [[ARG8]] : index
           // CHECK: [[THREADLOOP1XLB:%.*]] = addi {{%.*}}, [[TEMP4]] : index
           // CHECK: [[THREADLOOP1XSTEP:%.*]] = muli [[ARG20]], [[ARG14]] : index
-          // CHECK: loop.for [[THREADLOOP1XIV:%.*]] = [[THREADLOOP1XLB]] to {{%.*}} step [[THREADLOOP1XSTEP]]
-          loop.for %iv4 = %c1 to %arg4 step %c1 {
+          // CHECK: scf.for [[THREADLOOP1XIV:%.*]] = [[THREADLOOP1XLB]] to {{%.*}} step [[THREADLOOP1XSTEP]]
+          scf.for %iv4 = %c1 to %arg4 step %c1 {
             // CHECK: [[INDEX2:%.*]] = addi [[BLOCKLOOPYIV]], [[THREADLOOP1YIV]] : index
             %10 = addi %iv1, %iv3 : index
             // CHECK: [[INDEX1:%.*]] = addi [[BLOCKLOOPXIV]], [[THREADLOOP1XIV]] : index
@@ -54,13 +54,13 @@
         // CHECK: [[TEMP5:%.*]] = muli [[ARG20]], [[ARG9]] : index
         // CHECK: [[THREADLOOP2YLB:%.*]] = addi {{%.*}}, [[TEMP5]] : index
         // CHECK: [[THREADLOOP2YSTEP:%.*]] = muli [[ARG20]], [[ARG15]] : index
-        // CHECK: loop.for [[THREADLOOP2YIV:%.*]] = [[THREADLOOP2YLB]] to {{%.*}} step [[THREADLOOP2YSTEP]]
-        loop.for %iv3 = %c0 to %arg3 step %c1 {
+        // CHECK: scf.for [[THREADLOOP2YIV:%.*]] = [[THREADLOOP2YLB]] to {{%.*}} step [[THREADLOOP2YSTEP]]
+        scf.for %iv3 = %c0 to %arg3 step %c1 {
           // CHECK: [[TEMP6:%.*]] = muli [[ARG20]], [[ARG8]] : index
           // CHECK: [[THREADLOOP2XLB:%.*]] = addi {{%.*}}, [[TEMP6]] : index
           // CHECK: [[THREADLOOP2XSTEP:%.*]] = muli [[ARG20]], [[ARG14]] : index
-          // CHECK: loop.for [[THREADLOOP2XIV:%.*]] = [[THREADLOOP2XLB]] to {{%.*}} step [[THREADLOOP2XSTEP]]
-          loop.for %iv4 = %c1 to %arg4 step %c1 {
+          // CHECK: scf.for [[THREADLOOP2XIV:%.*]] = [[THREADLOOP2XLB]] to {{%.*}} step [[THREADLOOP2XSTEP]]
+          scf.for %iv4 = %c1 to %arg4 step %c1 {
             // CHECK: [[INDEX3:%.*]] = addi [[BLOCKLOOPYIV]], [[THREADLOOP2YIV]] : index
             %13 = addi %iv1, %iv3 : index
             // CHECK: [[INDEX4:%.*]] = addi [[BLOCKLOOPXIV]], [[THREADLOOP2XIV]] : index
diff --git a/mlir/test/Conversion/LoopsToGPU/imperfect_3D.mlir b/mlir/test/Conversion/LoopsToGPU/imperfect_3D.mlir
--- a/mlir/test/Conversion/LoopsToGPU/imperfect_3D.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/imperfect_3D.mlir
@@ -7,13 +7,13 @@
     %2 = dim %arg0, 2 : memref<?x?x?xf32>
     %c0 = constant 0 : index
     // CHECK: gpu.launch
-    // CHECK:   loop.for {{.*}} {
-    // CHECK:     loop.for {{.*}} {
-    // CHECK:       loop.for {{.*}} {
+    // CHECK:   scf.for {{.*}} {
+    // CHECK:     scf.for {{.*}} {
+    // CHECK:       scf.for {{.*}} {
     // CHECK:         alloc
-    // CHECK:         loop.for {{.*}} {
-    // CHECK:           loop.for {{.*}} {
-    // CHECK:             loop.for {{.*}} {
+    // CHECK:         scf.for {{.*}} {
+    // CHECK:           scf.for {{.*}} {
+    // CHECK:             scf.for {{.*}} {
     // CHECK:               load
     // CHECK:               load
     // CHECK:               addf
@@ -21,9 +21,9 @@
     // CHECK:             }
     // CHECK-NEXT:      }
     // CHECK-NEXT:    }
-    // CHECK:         loop.for {{.*}} {
-    // CHECK:           loop.for {{.*}} {
-    // CHECK:             loop.for {{.*}} {
+    // CHECK:         scf.for {{.*}} {
+    // CHECK:           scf.for {{.*}} {
+    // CHECK:             scf.for {{.*}} {
     // CHECK:               load
     // CHECK:               load
     // CHECK:               mulf
@@ -32,9 +32,9 @@
     // CHECK-NEXT:      }
     // CHECK-NEXT:    }
     // CHECK:         dealloc
-    loop.for %iv1 = %c0 to %0 step %t1 {
-      loop.for %iv2 = %c0 to %1 step %t2 {
-        loop.for %iv3 = %c0 to %2 step %t3 {
+    scf.for %iv1 = %c0 to %0 step %t1 {
+      scf.for %iv2 = %c0 to %1 step %t2 {
+        scf.for %iv3 = %c0 to %2 step %t3 {
           %6 = alloc(%t1, %t2, %t3) : memref<?x?x?xf32>
           %ubcmp1 = cmpi "slt", %0, %t1 : index
           %ub1 = select %ubcmp1, %0, %t1 : index
@@ -42,9 +42,9 @@
           %ub2 = select %ubcmp2, %1, %t2 : index
           %ubcmp3 = cmpi "slt", %2, %t3 : index
           %ub3 = select %ubcmp3, %2, %t3 : index
-          loop.for %iv4 = %iv1 to %ub1 step %step1 {
-            loop.for %iv5 = %iv2 to %ub2 step %step2 {
-              loop.for %iv6 = %iv3 to %ub3 step %step3 {
+          scf.for %iv4 = %iv1 to %ub1 step %step1 {
+            scf.for %iv5 = %iv2 to %ub2 step %step2 {
+              scf.for %iv6 = %iv3 to %ub3 step %step3 {
                 %7 = load %arg0[%iv4, %iv5, %iv6] : memref<?x?x?xf32>
                 %8 = load %arg1[%iv4, %iv6, %iv5] : memref<?x?x?xf32>
                 %9 = addf %7, %8 : f32
@@ -58,9 +58,9 @@
               }
             }
           }
-          loop.for %iv7 = %iv1 to %ub1 step %step1 {
-            loop.for %iv8 = %iv2 to %ub2 step %step2 {
-              loop.for %iv9 = %iv3 to %ub3 step %step3 {
+          scf.for %iv7 = %iv1 to %ub1 step %step1 {
+            scf.for %iv8 = %iv2 to %ub2 step %step2 {
+              scf.for %iv9 = %iv3 to %ub3 step %step3 {
                 %16 = subi %iv7, %iv1 : index
                 %17 = divi_signed %16, %step1 : index
                 %18 = subi %iv8, %iv2 : index
diff --git a/mlir/test/Conversion/LoopsToGPU/imperfect_4D.mlir b/mlir/test/Conversion/LoopsToGPU/imperfect_4D.mlir
--- a/mlir/test/Conversion/LoopsToGPU/imperfect_4D.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/imperfect_4D.mlir
@@ -8,30 +8,30 @@
     %3 = dim %arg0, 3 : memref<?x?x?x?xf32>
     %c0 = constant 0 : index
     // CHECK: gpu.launch
-    // CHECK:   loop.for
-    // CHECK:     loop.for
-    // CHECK:       loop.for
+    // CHECK:   scf.for
+    // CHECK:     scf.for
+    // CHECK:       scf.for
     // CHECK:         alloc
-    // CHECK:         loop.for
-    // CHECK:           loop.for
-    // CHECK:             loop.for
-    // CHECK:               loop.for
+    // CHECK:         scf.for
+    // CHECK:           scf.for
+    // CHECK:             scf.for
+    // CHECK:               scf.for
     // CHECK:                 load
     // CHECK:                 load
     // CHECK:                 addf
     // CHECK:                 store
-    // CHECK:         loop.for
-    // CHECK:           loop.for
-    // CHECK:             loop.for
-    // CHECK:               loop.for
+    // CHECK:         scf.for
+    // CHECK:           scf.for
+    // CHECK:             scf.for
+    // CHECK:               scf.for
     // CHECK:                 load
     // CHECK:                 load
     // CHECK:                 mulf
     // CHECK:                 store
     // CHECK:         dealloc
-    loop.for %iv1 = %c0 to %0 step %t1 {
-      loop.for %iv2 = %c0 to %1 step %t2 {
-        loop.for %iv3 = %c0 to %2 step %t3 {
+    scf.for %iv1 = %c0 to %0 step %t1 {
+      scf.for %iv2 = %c0 to %1 step %t2 {
+        scf.for %iv3 = %c0 to %2 step %t3 {
           %6 = alloc(%t1, %t2, %t3, %3) : memref<?x?x?x?xf32>
           %ubcmp1 = cmpi "slt", %0, %t1 : index
           %ub1 = select %ubcmp1, %0, %t1 : index
@@ -41,10 +41,10 @@
           %ub3 = select %ubcmp3, %2, %t3 : index
           %ubcmp4 = cmpi "slt", %3, %t4 : index
           %ub4 = select %ubcmp3, %3, %t4 : index
-          loop.for %iv5 = %iv1 to %ub1 step %step1 {
-            loop.for %iv6 = %iv2 to %ub2 step %step2 {
-              loop.for %iv7 = %iv3 to %ub3 step %step3 {
-                loop.for %iv8 = %c0 to %3 step %step4 {
+          scf.for %iv5 = %iv1 to %ub1 step %step1 {
+            scf.for %iv6 = %iv2 to %ub2 step %step2 {
+              scf.for %iv7 = %iv3 to %ub3 step %step3 {
+                scf.for %iv8 = %c0 to %3 step %step4 {
                   %7 = load %arg0[%iv5, %iv6, %iv7, %iv8] : memref<?x?x?x?xf32>
                   %8 = load %arg1[%iv5, %iv6, %iv7, %iv8] : memref<?x?x?x?xf32>
                   %9 = addf %7, %8 : f32
@@ -59,10 +59,10 @@
               }
             }
           }
-          loop.for %iv9 = %iv1 to %ub1 step %step1 {
-            loop.for %iv10 = %iv2 to %ub2 step %step2 {
-              loop.for %iv11 = %iv3 to %ub3 step %step3 {
-                loop.for %iv12 = %c0 to %3 step %step4 {
+          scf.for %iv9 = %iv1 to %ub1 step %step1 {
+            scf.for %iv10 = %iv2 to %ub2 step %step2 {
+              scf.for %iv11 = %iv3 to %ub3 step %step3 {
+                scf.for %iv12 = %c0 to %3 step %step4 {
                   %18 = subi %iv9, %iv1 : index
                   %19 = divi_signed %18, %step1 : index
                   %20 = subi %iv10, %iv2 : index
diff --git a/mlir/test/Conversion/LoopsToGPU/imperfect_linalg.mlir b/mlir/test/Conversion/LoopsToGPU/imperfect_linalg.mlir
--- a/mlir/test/Conversion/LoopsToGPU/imperfect_linalg.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/imperfect_linalg.mlir
@@ -8,24 +8,24 @@
     %0 = dim %arg0, 0 : memref<?x?xf32>
     %1 = dim %arg0, 1 : memref<?x?xf32>
     // CHECK-LABEL: gpu.launch
-    // CHECK:   loop.for
-    // CHECK:     loop.for
-    // CHECK:       loop.for
-    // CHECK:         loop.for
+    // CHECK:   scf.for
+    // CHECK:     scf.for
+    // CHECK:       scf.for
+    // CHECK:         scf.for
     // CHECK:           load
     // CHECK:           load
     // CHECK:           load
     // CHECK:           mulf
     // CHECK:           store
-    loop.for %arg3 = %c0 to %0 step %c2 {
-      loop.for %arg4 = %c0 to %1 step %c2 {
+    scf.for %arg3 = %c0 to %0 step %c2 {
+      scf.for %arg4 = %c0 to %1 step %c2 {
         %4 = std.subview %arg0[%arg3, %arg4][%c2, %c2][%c1, %c1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         %7 = std.subview %arg1[%arg3, %arg4][%c2, %c2][%c1, %c1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         %10 = std.subview %arg2[%arg3, %arg4][%c2, %c2][%c1, %c1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         %11 = dim %4, 0 : memref<?x?xf32, offset: ?, strides: [?, ?]>
         %12 = dim %4, 1 : memref<?x?xf32, offset: ?, strides: [?, ?]>
-        loop.for %arg5 = %c0 to %11 step %c1 {
-          loop.for %arg6 = %c0 to %12 step %c1 {
+        scf.for %arg5 = %c0 to %11 step %c1 {
+          scf.for %arg6 = %c0 to %12 step %c1 {
             %13 = load %4[%arg5, %arg6] : memref<?x?xf32, offset: ?, strides: [?, ?]>
             %14 = load %7[%arg5, %arg6] : memref<?x?xf32, offset: ?, strides: [?, ?]>
             %15 = load %10[%arg5, %arg6] : memref<?x?xf32, offset: ?, strides: [?, ?]>
diff --git a/mlir/test/Conversion/LoopsToGPU/linalg_to_gpu.mlir b/mlir/test/Conversion/LoopsToGPU/linalg_to_gpu.mlir
--- a/mlir/test/Conversion/LoopsToGPU/linalg_to_gpu.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/linalg_to_gpu.mlir
@@ -7,10 +7,10 @@
   %c3 = constant 3 : index
   // CHECK:      subi %{{.*}}, %{{.*}} : index
   // CHECK-NEXT: %[[range_i:.*]] = divi_signed {{.*}}, %{{.*}} : index
-  loop.for %i0 = %c0 to %c42 step %c3 {
+  scf.for %i0 = %c0 to %c42 step %c3 {
     // CHECK:      subi %{{.*}}, %{{.*}} : index
     // CHECK-NEXT: %[[range_j:.*]] = divi_signed {{.*}}, %{{.*}} : index
-    loop.for %i1 = %c3 to %c42 step %arg1 {
+    scf.for %i1 = %c3 to %c42 step %arg1 {
       // CHECK:      gpu.launch
       // CHECK-SAME: blocks
       // CHECK-SAME: threads
diff --git a/mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir b/mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir
--- a/mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/parallel_loop.mlir
@@ -7,7 +7,7 @@
                               %buf : memref<?x?xf32>,
                               %res : memref<?x?xf32>) {
   %step = constant 2 : index
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%arg4, %step)  {
     %val = load %buf[%i0, %i1] : memref<?x?xf32>
     store %val, %res[%i1, %i0] : memref<?x?xf32>
@@ -47,9 +47,9 @@
   %zero = constant 0 : index
   %one = constant 1 : index
   %four = constant 4 : index
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%four, %four)  {
-    loop.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four)
+    scf.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four)
                                             step (%one, %one)  {
       %idx0 = addi %i0, %si0 : index
       %idx1 = addi %i1, %si1 : index
@@ -104,7 +104,7 @@
                              %buf : memref<?x?xf32>,
                              %res : memref<?x?xf32>) {
   %step = constant 2 : index
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%arg4, %step)  {
     %val = load %buf[%i0, %i1] : memref<?x?xf32>
     store %val, %res[%i1, %i0] : memref<?x?xf32>
@@ -126,7 +126,7 @@
 // CHECK:           [[VAL_68:%.*]] = affine.apply #[[MAP0]](){{\[}}[[VAL_61]], [[VAL_59]], [[VAL_63]]]
 // CHECK:           gpu.launch blocks([[VAL_69:%.*]], [[VAL_70:%.*]], [[VAL_71:%.*]]) in ([[VAL_72:%.*]] = [[VAL_67]], [[VAL_73:%.*]] = [[VAL_68]], [[VAL_74:%.*]] = [[VAL_67]]) threads([[VAL_75:%.*]], [[VAL_76:%.*]], [[VAL_77:%.*]]) in ([[VAL_78:%.*]] = [[VAL_67]], [[VAL_79:%.*]] = [[VAL_67]], [[VAL_80:%.*]] = [[VAL_67]]) {
 // CHECK:             [[VAL_81:%.*]] = affine.apply #[[MAP1]]([[VAL_70]]){{\[}}[[VAL_63]], [[VAL_59]]]
-// CHECK:             loop.for [[VAL_82:%.*]] = [[VAL_60]] to [[VAL_62]] step [[VAL_66]] {
+// CHECK:             scf.for [[VAL_82:%.*]] = [[VAL_60]] to [[VAL_62]] step [[VAL_66]] {
 // CHECK:               [[VAL_83:%.*]] = load [[VAL_64]]{{\[}}[[VAL_81]], [[VAL_82]]] : memref<?x?xf32>
 // CHECK:               store [[VAL_83]], [[VAL_65]]{{\[}}[[VAL_82]], [[VAL_81]]] : memref<?x?xf32>
 // CHECK:             }
@@ -147,9 +147,9 @@
   %zero = constant 0 : index
   %one = constant 1 : index
   %four = constant 4 : index
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%four, %four)  {
-    loop.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four)
+    scf.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four)
                                             step (%one, %one)  {
       %idx0 = addi %i0, %si0 : index
       %idx1 = addi %i1, %si1 : index
@@ -180,9 +180,9 @@
 // CHECK:           [[VAL_95:%.*]] = affine.apply #[[MAP0]](){{\[}}[[VAL_92]], [[VAL_90]], [[VAL_91]]]
 // CHECK:           gpu.launch blocks([[VAL_96:%.*]], [[VAL_97:%.*]], [[VAL_98:%.*]]) in ([[VAL_99:%.*]] = [[VAL_93]], [[VAL_100:%.*]] = [[VAL_94]], [[VAL_101:%.*]] = [[VAL_93]]) threads([[VAL_102:%.*]], [[VAL_103:%.*]], [[VAL_104:%.*]]) in ([[VAL_105:%.*]] = [[VAL_93]], [[VAL_106:%.*]] = [[VAL_95]], [[VAL_107:%.*]] = [[VAL_93]]) {
 // CHECK:             [[VAL_108:%.*]] = affine.apply #[[MAP1]]([[VAL_97]]){{\[}}[[VAL_92]], [[VAL_84]]]
-// CHECK:             loop.for [[VAL_109:%.*]] = [[VAL_85]] to [[VAL_87]] step [[VAL_92]] {
+// CHECK:             scf.for [[VAL_109:%.*]] = [[VAL_85]] to [[VAL_87]] step [[VAL_92]] {
 // CHECK:               [[VAL_110:%.*]] = affine.apply #[[MAP1]]([[VAL_103]]){{\[}}[[VAL_91]], [[VAL_90]]]
-// CHECK:               loop.for [[VAL_111:%.*]] = [[VAL_90]] to [[VAL_92]] step [[VAL_91]] {
+// CHECK:               scf.for [[VAL_111:%.*]] = [[VAL_90]] to [[VAL_92]] step [[VAL_91]] {
 // CHECK:                 [[VAL_112:%.*]] = addi [[VAL_108]], [[VAL_110]] : index
 // CHECK:                 [[VAL_113:%.*]] = addi [[VAL_109]], [[VAL_111]] : index
 // CHECK:                 [[VAL_114:%.*]] = load [[VAL_88]]{{\[}}[[VAL_112]], [[VAL_113]]] : memref<?x?xf32>
@@ -210,7 +210,7 @@
     %c2 = constant 2 : index
     %0 = dim %arg0, 0 : memref<?x?xf32, #map0>
     %1 = dim %arg0, 1 : memref<?x?xf32, #map0>
-    loop.parallel (%arg3, %arg4) = (%c0, %c0) to (%0, %1) step (%c2, %c3) {
+    scf.parallel (%arg3, %arg4) = (%c0, %c0) to (%0, %1) step (%c2, %c3) {
       %2 = dim %arg0, 0 : memref<?x?xf32, #map0>
       %3 = affine.min #map1(%arg3)[%2]
       %squared_min = muli %3, %3 : index
@@ -227,15 +227,15 @@
       %14 = dim %arg2, 1 : memref<?x?xf32, #map0>
       %15 = affine.min #map2(%arg4)[%14]
       %16 = std.subview %arg2[%arg3, %arg4][%13, %15][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3>
-      loop.parallel (%arg5, %arg6) = (%c0, %c0) to (%squared_min, %5) step (%c1, %c1) {
+      scf.parallel (%arg5, %arg6) = (%c0, %c0) to (%squared_min, %5) step (%c1, %c1) {
         %17 = load %6[%arg5, %arg6] : memref<?x?xf32, #map3>
         %18 = load %11[%arg5, %arg6] : memref<?x?xf32, #map3>
         %19 = load %16[%arg5, %arg6] : memref<?x?xf32, #map3>
         %20 = addf %17, %18 : f32
         store %20, %16[%arg5, %arg6] : memref<?x?xf32, #map3>
-        loop.yield
+        scf.yield
       } {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 3 : i64}, {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 4 : i64}]}
-      loop.yield
+      scf.yield
     } {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 0 : i64}, {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 1 : i64}]}
     return
   }
@@ -285,10 +285,10 @@
 // CHECK:             [[VAL_44:%.*]] = subview [[VAL_2]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_41]], [[VAL_43]]] {{\[}}[[VAL_3]], [[VAL_3]]] : memref<?x?xf32, #[[MAP0]]> to memref<?x?xf32, #[[MAP5]]>
 // CHECK:             [[VAL_45:%.*]] = affine.apply #[[MAP2]]([[VAL_22]]){{\[}}[[VAL_3]], [[VAL_4]]]
 // CHECK:             [[VAL_46:%.*]] = cmpi "slt", [[VAL_45]], [[VAL_31_SQUARED]] : index
-// CHECK:             loop.if [[VAL_46]] {
+// CHECK:             scf.if [[VAL_46]] {
 // CHECK:               [[VAL_47:%.*]] = affine.apply #[[MAP2]]([[VAL_23]]){{\[}}[[VAL_3]], [[VAL_4]]]
 // CHECK:               [[VAL_48:%.*]] = cmpi "slt", [[VAL_47]], [[VAL_33]] : index
-// CHECK:               loop.if [[VAL_48]] {
+// CHECK:               scf.if [[VAL_48]] {
 // CHECK:                 [[VAL_49:%.*]] = load [[VAL_34]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[MAP5]]>
 // CHECK:                 [[VAL_50:%.*]] = load [[VAL_39]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[MAP5]]>
 // CHECK:                 [[VAL_51:%.*]] = load [[VAL_44]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[MAP5]]>
@@ -312,8 +312,8 @@
                           %res : memref<?x?xf32>) {
   %four = constant 4 : index
   // expected-error@+2 {{cannot redefine the bound for processor 1}}
-  // expected-error@+1 {{failed to legalize operation 'loop.parallel'}}
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  // expected-error@+1 {{failed to legalize operation 'scf.parallel'}}
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%four, %four)  {
   } { mapping = [
       {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>},
@@ -333,11 +333,11 @@
   %zero = constant 0 : index
   %one = constant 1 : index
   %four = constant 4 : index
-  // expected-error@+1 {{failed to legalize operation 'loop.parallel'}}
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  // expected-error@+1 {{failed to legalize operation 'scf.parallel'}}
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%four, %four)  {
     // expected-error@+1 {{cannot derive loop-invariant upper bound}}
-    loop.parallel (%si0, %si1) = (%zero, %zero) to (%i0, %i1)
+    scf.parallel (%si0, %si1) = (%zero, %zero) to (%i0, %i1)
                                             step (%one, %one)  {
       %idx0 = addi %i0, %si0 : index
       %idx1 = addi %i1, %si1 : index
diff --git a/mlir/test/Conversion/LoopsToGPU/perfect_1D_setlaunch.mlir b/mlir/test/Conversion/LoopsToGPU/perfect_1D_setlaunch.mlir
--- a/mlir/test/Conversion/LoopsToGPU/perfect_1D_setlaunch.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/perfect_1D_setlaunch.mlir
@@ -7,14 +7,14 @@
     %c0 = constant 0 : index
     %c1 = constant 1 : index
     // CHECK: gpu.launch
-    // CHECK:   loop.for
-    // CHECK:     loop.for
+    // CHECK:   scf.for
+    // CHECK:     scf.for
     // CHECK:       load
     // CHECK:       load
     // CHECK:       add
     // CHECK:       store
-    loop.for %iv1 = %c0 to %0 step %c1 {
-      loop.for %iv2 = %c0 to %1 step %c1 {
+    scf.for %iv1 = %c0 to %0 step %c1 {
+      scf.for %iv2 = %c0 to %1 step %c1 {
          %12 = load %arg0[%iv1, %iv2] : memref<?x?xf32>
          %13 = load %arg1[%iv2, %iv1] : memref<?x?xf32>
          %14 = addf %12, %13 : f32
diff --git a/mlir/test/Conversion/LoopsToGPU/step_one.mlir b/mlir/test/Conversion/LoopsToGPU/step_one.mlir
--- a/mlir/test/Conversion/LoopsToGPU/step_one.mlir
+++ b/mlir/test/Conversion/LoopsToGPU/step_one.mlir
@@ -57,7 +57,7 @@
         // CHECK-22-SAME: blocks
         // CHECK-22-SAME: threads
 
-          // Remapping of the loop induction variables in the last mapped loop.
+          // Remapping of the loop induction variables in the last mapped scf.
           // CHECK-22:        %[[i:.*]] = addi %{{.*}}, %{{.*}} : index
           // CHECK-22-NEXT:   %[[j:.*]] = addi %{{.*}}, %{{.*}} : index
           // CHECK-22-NEXT:   %[[ii:.*]] = addi %{{.*}}, %{{.*}} : index
diff --git a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir
--- a/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir
+++ b/mlir/test/Conversion/VectorToLoops/vector-to-loops.mlir
@@ -68,9 +68,9 @@
   // CHECK-NEXT:      affine.for %[[I2:.*]] = 0 to %{{.*}} {
   // CHECK-NEXT:        affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 {
   //      CHECK:          %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
-  // CHECK-NEXT:          loop.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
-  // CHECK-NEXT:            loop.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
-  // CHECK-NEXT:              loop.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
+  // CHECK-NEXT:          scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
+  // CHECK-NEXT:            scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
+  // CHECK-NEXT:              scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
   // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
   // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]]()[%{{.*}}]
   // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}} : index
@@ -147,9 +147,9 @@
   // CHECK:               %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
   // CHECK-NEXT:          %[[VECTOR_VIEW:.*]] = vector.type_cast {{.*}} : memref<5x4x3xf32>
   //      CHECK:          store %{{.*}}, {{.*}} : memref<vector<5x4x3xf32>>
-  // CHECK-NEXT:          loop.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
-  // CHECK-NEXT:            loop.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
-  // CHECK-NEXT:              loop.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
+  // CHECK-NEXT:          scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
+  // CHECK-NEXT:            scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
+  // CHECK-NEXT:              scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
   // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
   // CHECK-NEXT:                {{.*}} = affine.apply #[[SUB]]()[%{{.*}}]
   // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index
@@ -228,7 +228,7 @@
   // CHECK:   %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]]
   // CHECK:   %[[cmp:.*]] = cmpi "slt", %[[add]], %[[dim]] : index
   // CHECK:   %[[cond1:.*]] = and %[[cmp]], %[[cond0]] : i1
-  // CHECK:   loop.if %[[cond1]] {
+  // CHECK:   scf.if %[[cond1]] {
   // CHECK:     %[[vec_1d:.*]] = vector.transfer_read %[[A]][%[[add]], %[[base]]], %[[cst]]  {permutation_map = #[[MAP1]]} : memref<?x?xf32>, vector<15xf32>
   // CHECK:     store %[[vec_1d]], %[[alloc]][%[[I]]] : memref<17xvector<15xf32>>
   // CHECK:   } else {
@@ -262,7 +262,7 @@
   // CHECK:   %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]]
   // CHECK:   %[[cmp:.*]] = cmpi "slt", %[[add]], %[[dim]] : index
   // CHECK:   %[[cond1:.*]] = and %[[cmp]], %[[cond0]] : i1
-  // CHECK:   loop.if %[[cond1]] {
+  // CHECK:   scf.if %[[cond1]] {
   // CHECK:     %[[vec_1d:.*]] = load %0[%[[I]]] : memref<17xvector<15xf32>>
   // CHECK:     vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] {permutation_map = #[[MAP1]]} : vector<15xf32>, memref<?x?xf32>
   // CHECK:   }
diff --git a/mlir/test/Conversion/convert-to-cfg.mlir b/mlir/test/Conversion/convert-to-cfg.mlir
--- a/mlir/test/Conversion/convert-to-cfg.mlir
+++ b/mlir/test/Conversion/convert-to-cfg.mlir
@@ -12,7 +12,7 @@
 //  CHECK-NEXT:  ^bb3:   // pred: ^bb1
 //  CHECK-NEXT:    return
 func @simple_std_for_loop(%arg0 : index, %arg1 : index, %arg2 : index) {
-  loop.for %i0 = %arg0 to %arg1 step %arg2 {
+  scf.for %i0 = %arg0 to %arg1 step %arg2 {
     %c1 = constant 1 : index
   }
   return
@@ -39,9 +39,9 @@
 //  CHECK-NEXT:  ^bb6:   // pred: ^bb1
 //  CHECK-NEXT:    return
 func @simple_std_2_for_loops(%arg0 : index, %arg1 : index, %arg2 : index) {
-  loop.for %i0 = %arg0 to %arg1 step %arg2 {
+  scf.for %i0 = %arg0 to %arg1 step %arg2 {
     %c1 = constant 1 : index
-    loop.for %i1 = %arg0 to %arg1 step %arg2 {
+    scf.for %i1 = %arg0 to %arg1 step %arg2 {
       %c1_0 = constant 1 : index
     }
   }
@@ -56,7 +56,7 @@
 //  CHECK-NEXT:   ^bb2:   // 2 preds: ^bb0, ^bb1
 //  CHECK-NEXT:     return
 func @simple_std_if(%arg0: i1) {
-  loop.if %arg0 {
+  scf.if %arg0 {
     %c1 = constant 1 : index
   }
   return
@@ -73,7 +73,7 @@
 //  CHECK-NEXT:   ^bb3:   // 2 preds: ^bb1, ^bb2
 //  CHECK-NEXT:     return
 func @simple_std_if_else(%arg0: i1) {
-  loop.if %arg0 {
+  scf.if %arg0 {
     %c1 = constant 1 : index
   } else {
     %c1_0 = constant 1 : index
@@ -97,9 +97,9 @@
 //  CHECK-NEXT: ^bb5:   // 2 preds: ^bb0, ^bb4
 //  CHECK-NEXT:   return
 func @simple_std_2_ifs(%arg0: i1) {
-  loop.if %arg0 {
+  scf.if %arg0 {
     %c1 = constant 1 : index
-    loop.if %arg0 {
+    scf.if %arg0 {
       %c1_0 = constant 1 : index
     } else {
       %c1_1 = constant 1 : index
@@ -134,11 +134,11 @@
 //  CHECK-NEXT:     return
 //  CHECK-NEXT: }
 func @simple_std_for_loop_with_2_ifs(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : i1) {
-  loop.for %i0 = %arg0 to %arg1 step %arg2 {
+  scf.for %i0 = %arg0 to %arg1 step %arg2 {
     %c1 = constant 1 : index
-    loop.if %arg3 {
+    scf.if %arg3 {
       %c1_0 = constant 1 : index
-      loop.if %arg3 {
+      scf.if %arg3 {
         %c1_1 = constant 1 : index
       } else {
         %c1_2 = constant 1 : index
@@ -151,14 +151,14 @@
 // CHECK-LABEL: func @simple_if_yield
 func @simple_if_yield(%arg0: i1) -> (i1, i1) {
 // CHECK:   cond_br %{{.*}}, ^[[then:.*]], ^[[else:.*]]
-  %0:2 = loop.if %arg0 -> (i1, i1) {
+  %0:2 = scf.if %arg0 -> (i1, i1) {
 // CHECK: ^[[then]]:
 // CHECK:   %[[v0:.*]] = constant 0
 // CHECK:   %[[v1:.*]] = constant 1
 // CHECK:   br ^[[dom:.*]](%[[v0]], %[[v1]] : i1, i1)
     %c0 = constant 0 : i1
     %c1 = constant 1 : i1
-    loop.yield %c0, %c1 : i1, i1
+    scf.yield %c0, %c1 : i1, i1
   } else {
 // CHECK: ^[[else]]:
 // CHECK:   %[[v2:.*]] = constant 0
@@ -166,7 +166,7 @@
 // CHECK:   br ^[[dom]](%[[v3]], %[[v2]] : i1, i1)
     %c0 = constant 0 : i1
     %c1 = constant 1 : i1
-    loop.yield %c1, %c0 : i1, i1
+    scf.yield %c1, %c0 : i1, i1
   }
 // CHECK: ^[[dom]](%[[arg1:.*]]: i1, %[[arg2:.*]]: i1):
 // CHECK:   br ^[[cont:.*]]
@@ -178,45 +178,45 @@
 // CHECK-LABEL: func @nested_if_yield
 func @nested_if_yield(%arg0: i1) -> (index) {
 // CHECK:   cond_br %{{.*}}, ^[[first_then:.*]], ^[[first_else:.*]]
-  %0 = loop.if %arg0 -> i1 {
+  %0 = scf.if %arg0 -> i1 {
 // CHECK: ^[[first_then]]:
     %1 = constant 1 : i1
 // CHECK:   br ^[[first_dom:.*]]({{.*}})
-    loop.yield %1 : i1
+    scf.yield %1 : i1
   } else {
 // CHECK: ^[[first_else]]:
     %2 = constant 0 : i1
 // CHECK:   br ^[[first_dom]]({{.*}})
-    loop.yield %2 : i1
+    scf.yield %2 : i1
   }
 // CHECK: ^[[first_dom]](%[[arg1:.*]]: i1):
 // CHECK:   br ^[[first_cont:.*]]
 // CHECK: ^[[first_cont]]:
 // CHECK:   cond_br %[[arg1]], ^[[second_outer_then:.*]], ^[[second_outer_else:.*]]
-  %1 = loop.if %0 -> index {
+  %1 = scf.if %0 -> index {
 // CHECK: ^[[second_outer_then]]:
 // CHECK:   cond_br %arg0, ^[[second_inner_then:.*]], ^[[second_inner_else:.*]]
-    %3 = loop.if %arg0 -> index {
+    %3 = scf.if %arg0 -> index {
 // CHECK: ^[[second_inner_then]]:
       %4 = constant 40 : index
 // CHECK:   br ^[[second_inner_dom:.*]]({{.*}})
-      loop.yield %4 : index
+      scf.yield %4 : index
     } else {
 // CHECK: ^[[second_inner_else]]:
       %5 = constant 41 : index
 // CHECK:   br ^[[second_inner_dom]]({{.*}})
-      loop.yield %5 : index
+      scf.yield %5 : index
     }
 // CHECK: ^[[second_inner_dom]](%[[arg2:.*]]: index):
 // CHECK:   br ^[[second_inner_cont:.*]]
 // CHECK: ^[[second_inner_cont]]:
 // CHECK:   br ^[[second_outer_dom:.*]]({{.*}})
-    loop.yield %3 : index
+    scf.yield %3 : index
   } else {
 // CHECK: ^[[second_outer_else]]:
     %6 = constant 42 : index
 // CHECK:   br ^[[second_outer_dom]]({{.*}}
-    loop.yield %6 : index
+    scf.yield %6 : index
   }
 // CHECK: ^[[second_outer_dom]](%[[arg3:.*]]: index):
 // CHECK:   br ^[[second_outer_cont:.*]]
@@ -251,7 +251,7 @@
 func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
                         %arg3 : index, %arg4 : index) {
   %step = constant 1 : index
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%arg4, %step) {
     %c1 = constant 1 : index
   }
@@ -278,9 +278,9 @@
 func @for_yield(%arg0 : index, %arg1 : index, %arg2 : index) -> (f32, f32) {
   %s0 = constant 0.0 : f32
   %s1 = constant 1.0 : f32
-  %result:2 = loop.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0, %sj = %s1) -> (f32, f32) {
+  %result:2 = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0, %sj = %s1) -> (f32, f32) {
     %sn = addf %si, %sj : f32
-    loop.yield %sn, %sn : f32, f32
+    scf.yield %sn, %sn : f32, f32
   }
   return %result#0, %result#1 : f32, f32
 }
@@ -304,12 +304,12 @@
 // CHECK:         return %[[ARG_OUT]] : f32
 func @nested_for_yield(%arg0 : index, %arg1 : index, %arg2 : index) -> f32 {
   %s0 = constant 1.0 : f32
-  %r = loop.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%iter = %s0) -> (f32) {
-    %result = loop.for %i1 = %arg0 to %arg1 step %arg2 iter_args(%si = %iter) -> (f32) {
+  %r = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%iter = %s0) -> (f32) {
+    %result = scf.for %i1 = %arg0 to %arg1 step %arg2 iter_args(%si = %iter) -> (f32) {
       %sn = addf %si, %si : f32
-      loop.yield %sn : f32
+      scf.yield %sn : f32
     }
-    loop.yield %result : f32
+    scf.yield %result : f32
   }
   return %r : f32
 }
@@ -333,7 +333,7 @@
   // CHECK:   %[[COMP:.*]] = cmpi "slt", %[[ITER]], %[[UB]]
   // CHECK:   cond_br %[[COMP]], ^[[BODY:.*]], ^[[CONTINUE:.*]]
 
-  // Bodies of loop.reduce operations are folded into the main loop body. The
+  // Bodies of scf.reduce operations are folded into the main loop body. The
   // result of this partial reduction is passed as argument to the condition
   // block.
   // CHECK: ^[[BODY]]:
@@ -345,12 +345,12 @@
   // The continuation block has access to the (last value of) reduction.
   // CHECK: ^[[CONTINUE]]:
   // CHECK:   return %[[ITER_ARG]]
-  %0 = loop.parallel (%i) = (%arg0) to (%arg1) step (%arg2) init(%arg3) -> f32 {
+  %0 = scf.parallel (%i) = (%arg0) to (%arg1) step (%arg2) init(%arg3) -> f32 {
     %cst = constant 42.0 : f32
-    loop.reduce(%cst) : f32 {
+    scf.reduce(%cst) : f32 {
     ^bb0(%lhs: f32, %rhs: f32):
       %1 = mulf %lhs, %rhs : f32
-      loop.reduce.return %1 : f32
+      scf.reduce.return %1 : f32
     }
   }
   return %0 : f32
@@ -380,20 +380,20 @@
   // CHECK:   return %[[ITER_ARG1_OUT]], %[[ITER_ARG2_OUT]]
   %step = constant 1 : index
   %init = constant 42 : i64
-  %0:2 = loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  %0:2 = scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                        step (%arg4, %step) init(%arg5, %init) -> (f32, i64) {
     %cf = constant 42.0 : f32
-    loop.reduce(%cf) : f32 {
+    scf.reduce(%cf) : f32 {
     ^bb0(%lhs: f32, %rhs: f32):
       %1 = addf %lhs, %rhs : f32
-      loop.reduce.return %1 : f32
+      scf.reduce.return %1 : f32
     }
 
     %2 = call @generate() : () -> i64
-    loop.reduce(%2) : i64 {
+    scf.reduce(%2) : i64 {
     ^bb0(%lhs: i64, %rhs: i64):
       %3 = or %lhs, %rhs : i64
-      loop.reduce.return %3 : i64
+      scf.reduce.return %3 : i64
     }
   }
   return %0#0, %0#1 : f32, i64
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
@@ -363,7 +363,7 @@
   return
 }
 
-// This should not vectorize due to the sequential dependence in the loop.
+// This should not vectorize due to the sequential dependence in the scf.
 // CHECK-LABEL: @vec_rejected_sequential
 func @vec_rejected_sequential(%A : memref<?xf32>) {
   %N = dim %A, 0 : memref<?xf32>
diff --git a/mlir/test/Dialect/Affine/dma-generate.mlir b/mlir/test/Dialect/Affine/dma-generate.mlir
--- a/mlir/test/Dialect/Affine/dma-generate.mlir
+++ b/mlir/test/Dialect/Affine/dma-generate.mlir
@@ -375,7 +375,7 @@
   return
 }
 // There are three regions here - the 'load' preceding the loop, the loop
-// itself, and the operations appearing after the loop.
+// itself, and the operations appearing after the scf.
 // CHECK:       alloc() : memref<256xf32>
 // CHECK-NEXT:  alloc() : memref<1xf32, 2>
 // CHECK-NEXT:  alloc() : memref<1xi32>
@@ -583,7 +583,7 @@
 // With fast mem capacity set to 16 KB, the DMAs if placed under %k will fit.
 // However, the region of arg2 accessed is invariant w.r.t the %k loop unlike
 // %arg0 and %arg1. So, its DMA can be hoisted one level up and placed under
-// %j, while the DMAs for arg0 and arg1 appear right under the %k loop.
+// %j, while the DMAs for arg0 and arg1 appear right under the %k scf.
 
 #map0 = affine_map<(d0) -> (d0)>
 #map1 = affine_map<(d0) -> (d0 + 4)>
diff --git a/mlir/test/Dialect/Affine/slicing-utils.mlir b/mlir/test/Dialect/Affine/slicing-utils.mlir
--- a/mlir/test/Dialect/Affine/slicing-utils.mlir
+++ b/mlir/test/Dialect/Affine/slicing-utils.mlir
@@ -229,7 +229,7 @@
       // BWD: matched: %[[b:.*]] {{.*}} backward static slice:
       // BWD: affine.for {{.*}}
 
-      // affine.for appears in the body of loop.for
+      // affine.for appears in the body of scf.for
       // BWD: affine.for {{.*}}
 
       // affine.for appears as a proper op in the backward slice
@@ -239,10 +239,10 @@
       // BWD: matched: %[[c:.*]] {{.*}} backward static slice:
       // BWD: affine.for {{.*}}
 
-      // affine.for appears in the body of loop.for
+      // affine.for appears in the body of scf.for
       // BWD-NEXT: affine.for {{.*}}
 
-      // affine.for only appears in the body of loop.for
+      // affine.for only appears in the body of scf.for
       // BWD-NOT: affine.for {{.*}}
       %c = "slicing-test-op"(%i0): (index) -> index
     }
@@ -257,9 +257,9 @@
   %f = constant 1.0 : f32
   %c = "slicing-test-op"(%f): (f32) -> index
   // FWD: matched: {{.*}} (f32) -> index forward static slice:
-  // FWD: loop.for {{.*}}
+  // FWD: scf.for {{.*}}
   // FWD: matched: {{.*}} (index, index) -> index forward static slice:
-  loop.for %i2 = %c to %c step %c {
+  scf.for %i2 = %c to %c step %c {
     %d = "slicing-test-op"(%c, %i2): (index, index) -> index
   }
   return
diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@@ -550,7 +550,7 @@
 // CHECK-NEXT:  return
 
 // The trip count here is a multiple of four, but this can be inferred only
-// through composition. Check for no cleanup loop.
+// through composition. Check for no cleanup scf.
 // UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_upper_bound
 func @loop_nest_non_trivial_multiple_upper_bound(%M : index, %N : index) {
   %T = affine.apply affine_map<(d0) -> (4*d0 + 1)>(%M)
diff --git a/mlir/test/Dialect/GPU/mapping.mlir b/mlir/test/Dialect/GPU/mapping.mlir
--- a/mlir/test/Dialect/GPU/mapping.mlir
+++ b/mlir/test/Dialect/GPU/mapping.mlir
@@ -5,9 +5,9 @@
   %zero = constant 0 : index
   %one = constant 1 : index
   %four = constant 4 : index
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%four, %four)  {
-    loop.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four)
+    scf.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four)
                                             step (%one, %one)  {
     }
   }
@@ -15,8 +15,8 @@
 }
 
 // CHECK-LABEL:   func @parallel_loop(
-// CHECK:           loop.parallel 
-// CHECK:             loop.parallel 
+// CHECK:           scf.parallel 
+// CHECK:             scf.parallel 
 // CHECK:      {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 3 : i64},
 // CHECK-SAME:             {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 4 : i64}]}
 // CHECK:      {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 0 : i64},
@@ -30,11 +30,11 @@
   %zero = constant 0 : index
   %one = constant 1 : index
   %four = constant 4 : index
-  loop.parallel (%i0, %i1, %i2, %i3) = (%zero, %zero, %zero, %zero) to (%arg0, %arg1, %arg2, %arg3)
+  scf.parallel (%i0, %i1, %i2, %i3) = (%zero, %zero, %zero, %zero) to (%arg0, %arg1, %arg2, %arg3)
                                        step (%four, %four, %four, %four)  {
-    loop.parallel (%si0, %si1, %si2, %si3) = (%zero, %zero, %zero, %zero) to (%four, %four, %four, %four)
+    scf.parallel (%si0, %si1, %si2, %si3) = (%zero, %zero, %zero, %zero) to (%four, %four, %four, %four)
                                              step (%one, %one, %one, %one)  {
-      loop.parallel (%ti0, %ti1, %ti2, %ti3) = (%zero, %zero, %zero, %zero) to (%four, %four, %four, %four)
+      scf.parallel (%ti0, %ti1, %ti2, %ti3) = (%zero, %zero, %zero, %zero) to (%four, %four, %four, %four)
                                                step (%one, %one, %one, %one)  {
       }
     }
@@ -43,9 +43,9 @@
 }
 
 // CHECK-LABEL:   func @parallel_loop_4d(
-// CHECK:           loop.parallel 
-// CHECK:             loop.parallel 
-// CHECK:               loop.parallel
+// CHECK:           scf.parallel 
+// CHECK:             scf.parallel 
+// CHECK:               scf.parallel
 // CHECK:      {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 6 : i64},
 // CHECK-SAME:             {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 6 : i64},
 // CHECK-SAME:             {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 6 : i64},
diff --git a/mlir/test/Dialect/GPU/promotion.mlir b/mlir/test/Dialect/GPU/promotion.mlir
--- a/mlir/test/Dialect/GPU/promotion.mlir
+++ b/mlir/test/Dialect/GPU/promotion.mlir
@@ -21,9 +21,9 @@
     // Verify that loops for the copy are emitted. We only check the number of
     // loops here since their bounds are produced by mapLoopToProcessorIds,
     // tested separately.
-    // CHECK: loop.for %[[i0:.*]] =
-    // CHECK:   loop.for %[[i1:.*]] =
-    // CHECK:     loop.for %[[i2:.*]] =
+    // CHECK: scf.for %[[i0:.*]] =
+    // CHECK:   scf.for %[[i1:.*]] =
+    // CHECK:     scf.for %[[i2:.*]] =
 
     // Verify that the copy is emitted and uses only the last two loops.
     // CHECK:       %[[v:.*]] = load %[[arg]][%[[i1]], %[[i2]]]
@@ -37,9 +37,9 @@
     // Verify that loops for the copy are emitted. We only check the number of
     // loops here since their bounds are produced by mapLoopToProcessorIds,
     // tested separately.
-    // CHECK: loop.for %[[i0:.*]] =
-    // CHECK:   loop.for %[[i1:.*]] =
-    // CHECK:     loop.for %[[i2:.*]] =
+    // CHECK: scf.for %[[i0:.*]] =
+    // CHECK:   scf.for %[[i1:.*]] =
+    // CHECK:     scf.for %[[i2:.*]] =
 
     // Verify that the copy is emitted and uses only the last two loops.
     // CHECK:       %[[v:.*]] = load %[[promoted]][%[[i1]], %[[i2]]]
@@ -73,11 +73,11 @@
     // CHECK-DAG: %[[bdz:.*]] = "gpu.block_dim"() {dimension = "z"}
 
     // Verify that loops for the copy are emitted.
-    // CHECK: loop.for %[[i0:.*]] =
-    // CHECK:   loop.for %[[i1:.*]] =
-    // CHECK:     loop.for %[[i2:.*]] =
-    // CHECK:       loop.for %[[i3:.*]] =
-    // CHECK:         loop.for %[[i4:.*]] =
+    // CHECK: scf.for %[[i0:.*]] =
+    // CHECK:   scf.for %[[i1:.*]] =
+    // CHECK:     scf.for %[[i2:.*]] =
+    // CHECK:       scf.for %[[i3:.*]] =
+    // CHECK:         scf.for %[[i4:.*]] =
 
     // Verify that the copy is emitted.
     // CHECK:           %[[v:.*]] = load %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
@@ -88,11 +88,11 @@
     "use"(%arg0) : (memref<8x7x6x5x4xf32>) -> ()
 
     // Verify that loop loops for the copy are emitted.
-    // CHECK: loop.for %[[i0:.*]] =
-    // CHECK:   loop.for %[[i1:.*]] =
-    // CHECK:     loop.for %[[i2:.*]] =
-    // CHECK:       loop.for %[[i3:.*]] =
-    // CHECK:         loop.for %[[i4:.*]] =
+    // CHECK: scf.for %[[i0:.*]] =
+    // CHECK:   scf.for %[[i1:.*]] =
+    // CHECK:     scf.for %[[i2:.*]] =
+    // CHECK:       scf.for %[[i3:.*]] =
+    // CHECK:         scf.for %[[i4:.*]] =
 
     // Verify that the copy is emitted.
     // CHECK:           %[[v:.*]] = load %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
diff --git a/mlir/test/Dialect/Linalg/fusion-2-level.mlir b/mlir/test/Dialect/Linalg/fusion-2-level.mlir
--- a/mlir/test/Dialect/Linalg/fusion-2-level.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-2-level.mlir
@@ -13,18 +13,18 @@
   %1 = dim %C, 1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
   %2 = dim %D, 1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
   linalg.matmul(%A, %B, %C) : memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>
-  loop.for %arg5 = %c0 to %0 step %c20 {
-    loop.for %arg6 = %c0 to %2 step %c30 {
-      loop.for %arg7 = %c0 to %1 step %c40 {
+  scf.for %arg5 = %c0 to %0 step %c20 {
+    scf.for %arg6 = %c0 to %2 step %c30 {
+      scf.for %arg7 = %c0 to %1 step %c40 {
         %5 = std.subview %C[%arg5, %arg7][%c20, %c40][%c1, %c1] : memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         %7 = std.subview %D[%arg7, %arg6][%c40, %c30][%c1, %c1]: memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         %8 = std.subview %E[%arg5, %arg6][%c20, %c40][%c1, %c1] : memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         %9 = dim %5, 0 : memref<?x?xf32, offset: ?, strides: [?, ?]>
         %10 = dim %5, 1 : memref<?x?xf32, offset: ?, strides: [?, ?]>
         %11 = dim %7, 1 : memref<?x?xf32, offset: ?, strides: [?, ?]>
-        loop.for %arg8 = %c0 to %9 step %c2 {
-          loop.for %arg9 = %c0 to %11 step %c3 {
-            loop.for %arg10 = %c0 to %10 step %c4 {
+        scf.for %arg8 = %c0 to %9 step %c2 {
+          scf.for %arg9 = %c0 to %11 step %c3 {
+            scf.for %arg10 = %c0 to %10 step %c4 {
               %14 = std.subview %5[%arg8, %arg10][%c2, %c4][%c1, %c1] : memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
               %16 = std.subview %7[%arg10, %arg9][%c4, %c3][%c1, %c1]: memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
               %17 = std.subview %8[%arg8, %arg9][%c2, %c4][%c1, %c1] : memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -39,11 +39,11 @@
 }
 // CHECK-LABEL: func @f1
 //       CHECK:   (%[[A:.*]]:{{.*}}, %[[B:.*]]:{{.*}}, %[[C:.*]]:{{.*}}, %[[D:.*]]:{{.*}}, %[[E:.*]]:{{.*}})
-//      CHECK: loop.for
-//      CHECK:   loop.for
-//      CHECK:     loop.for
-//      CHECK:      loop.for
-//      CHECK:        loop.for
-//      CHECK:          loop.for
+//      CHECK: scf.for
+//      CHECK:   scf.for
+//      CHECK:     scf.for
+//      CHECK:      scf.for
+//      CHECK:        scf.for
+//      CHECK:          scf.for
 //      CHECK:            linalg.matmul
 //      CHECK:            linalg.matmul
diff --git a/mlir/test/Dialect/Linalg/fusion.mlir b/mlir/test/Dialect/Linalg/fusion.mlir
--- a/mlir/test/Dialect/Linalg/fusion.mlir
+++ b/mlir/test/Dialect/Linalg/fusion.mlir
@@ -18,9 +18,9 @@
     memref<?x?xf32, offset: 0, strides: [?, 1]>,
     memref<?x?xf32, offset: 0, strides: [?, 1]>
   %c1 = constant 1 : index
-  loop.for %arg5 = %c0 to %0 step %c2 {
-    loop.for %arg6 = %c0 to %2 step %c3 {
-      loop.for %arg7 = %c0 to %1 step %c4 {
+  scf.for %arg5 = %c0 to %0 step %c2 {
+    scf.for %arg6 = %c0 to %2 step %c3 {
+      scf.for %arg7 = %c0 to %1 step %c4 {
         %5 = std.subview %A[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, 1]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -41,9 +41,9 @@
 }
 // CHECK-LABEL: func @f1
 // CHECK:   (%[[A:.*]]:{{.*}}, %[[B:.*]]:{{.*}}, %[[C:.*]]:{{.*}}, %[[D:.*]]:{{.*}}, %[[E:.*]]:{{.*}})
-// CHECK: loop.for
-// CHECK:   loop.for
-// CHECK:     loop.for
+// CHECK: scf.for
+// CHECK:   scf.for
+// CHECK:     scf.for
 // CHECK:       linalg.matmul
 // CHECK:       linalg.matmul
 
@@ -68,9 +68,9 @@
   %0 = dim %C, 0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   %1 = dim %C, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   %2 = dim %D, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  loop.for %arg5 = %c0 to %0 step %c2 {
-    loop.for %arg6 = %c0 to %2 step %c3 {
-      loop.for %arg7 = %c0 to %1 step %c4 {
+  scf.for %arg5 = %c0 to %0 step %c2 {
+    scf.for %arg6 = %c0 to %2 step %c3 {
+      scf.for %arg7 = %c0 to %1 step %c4 {
         %5 = std.subview %C[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -94,9 +94,9 @@
 // CHECK-DAG:  %[[C_0:.*]] = dim %[[C]], 0 : memref<?x?xf32, #[[strided2D]]>
 // CHECK-DAG:  %[[C_1:.*]] = dim %[[C]], 1 : memref<?x?xf32, #[[strided2D]]>
 // CHECK-DAG:  %[[D_1:.*]] = dim %[[D]], 1 : memref<?x?xf32, #[[strided2D]]>
-// CHECK:  loop.for %{{.*}} = %{{.*}} to %[[C_0]] step %{{.*}} {
-// CHECK:    loop.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
-// CHECK:      loop.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
+// CHECK:  scf.for %{{.*}} = %{{.*}} to %[[C_0]] step %{{.*}} {
+// CHECK:    scf.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
+// CHECK:      scf.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
 // CHECK:        linalg.matmul
 // CHECK:        linalg.matmul
 
@@ -120,9 +120,9 @@
   %0 = dim %D, 0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   %1 = dim %D, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   %2 = dim %C, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  loop.for %arg5 = %c0 to %0 step %c2 {
-    loop.for %arg6 = %c0 to %2 step %c3 {
-      loop.for %arg7 = %c0 to %1 step %c4 {
+  scf.for %arg5 = %c0 to %0 step %c2 {
+    scf.for %arg6 = %c0 to %2 step %c3 {
+      scf.for %arg7 = %c0 to %1 step %c4 {
         %5 = std.subview %D[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -146,9 +146,9 @@
 // CHECK:  %[[D_0:.*]] = dim %[[D]], 0 : memref<?x?xf32, #[[strided2D]]>
 // CHECK:  %[[D_1:.*]] = dim %[[D]], 1 : memref<?x?xf32, #[[strided2D]]>
 // CHECK:  %[[C_1:.*]] = dim %[[C]], 1 : memref<?x?xf32, #[[strided2D]]>
-// CHECK:  loop.for %{{.*}} = %{{.*}} to %[[D_0]] step %{{.*}} {
-// CHECK:    loop.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
-// CHECK:      loop.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
+// CHECK:  scf.for %{{.*}} = %{{.*}} to %[[D_0]] step %{{.*}} {
+// CHECK:    scf.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
+// CHECK:      scf.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
 // CHECK:        linalg.matmul
 // CHECK:        linalg.matmul
 
@@ -176,9 +176,9 @@
   %0 = dim %C, 0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   %1 = dim %C, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   %2 = dim %D, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  loop.for %arg5 = %c0 to %0 step %c2 {
-    loop.for %arg6 = %c0 to %2 step %c3 {
-      loop.for %arg7 = %c0 to %1 step %c4 {
+  scf.for %arg5 = %c0 to %0 step %c2 {
+    scf.for %arg6 = %c0 to %2 step %c3 {
+      scf.for %arg7 = %c0 to %1 step %c4 {
         %5 = std.subview %C[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -202,9 +202,9 @@
 // CHECK:  %[[C_0:.*]] = dim %[[C]], 0 : memref<?x?xf32, #[[strided2D]]>
 // CHECK:  %[[C_1:.*]] = dim %[[C]], 1 : memref<?x?xf32, #[[strided2D]]>
 // CHECK:  %[[D_1:.*]] = dim %[[D]], 1 : memref<?x?xf32, #[[strided2D]]>
-// CHECK:  loop.for %{{.*}} = %{{.*}} to %[[C_0]] step %{{.*}} {
-// CHECK:    loop.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
-// CHECK:      loop.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
+// CHECK:  scf.for %{{.*}} = %{{.*}} to %[[C_0]] step %{{.*}} {
+// CHECK:    scf.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
+// CHECK:      scf.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
 // Fuse D then fuse C, no false dependence prevent it.
 // CHECK:        linalg.matmul
 // CHECK:        linalg.matmul
@@ -235,9 +235,9 @@
     memref<?x?xf32, offset: 0, strides: [?, ?]>,
     memref<?x?xf32, offset: 0, strides: [?, ?]>,
     memref<?x?xf32, offset: 0, strides: [?, ?]>
-  loop.for %arg5 = %c0 to %1 step %c2 {
-    loop.for %arg6 = %c0 to %0 step %c3 {
-      loop.for %arg7 = %c0 to %2 step %c4 {
+  scf.for %arg5 = %c0 to %1 step %c2 {
+    scf.for %arg6 = %c0 to %0 step %c3 {
+      scf.for %arg7 = %c0 to %2 step %c4 {
         %5 = std.subview %D[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -261,9 +261,9 @@
 // CHECK-DAG:  %[[B_1:.*]] = dim %[[B]], 1 : memref<?x?xf32, #[[strided2D]]>
 // CHECK-DAG:  %[[D_0:.*]] = dim %[[D]], 0 : memref<?x?xf32, #[[strided2D]]>
 // CHECK-DAG:  %[[D_1:.*]] = dim %[[D]], 1 : memref<?x?xf32, #[[strided2D]]>
-// CHECK:  loop.for %[[I:.*]] = %{{.*}} to %[[D_0]] step %{{.*}} {
-// CHECK:    loop.for %[[J:.*]] = %{{.*}} to %[[B_1]] step %{{.*}} {
-// CHECK:      loop.for %[[K:.*]] = %{{.*}} to %[[D_1]] step %{{.*}} {
+// CHECK:  scf.for %[[I:.*]] = %{{.*}} to %[[D_0]] step %{{.*}} {
+// CHECK:    scf.for %[[J:.*]] = %{{.*}} to %[[B_1]] step %{{.*}} {
+// CHECK:      scf.for %[[K:.*]] = %{{.*}} to %[[D_1]] step %{{.*}} {
 // CHECK-DAG:    %[[D_IK:.*]] = subview %[[D]][%[[I]], %[[K]]]
 // CHECK-DAG:    %[[B_KJ:.*]] = subview %[[B]][%[[K]], %[[J]]]
 // CHECK-DAG:    %[[E_IJ:.*]] = subview %[[E]][%[[I]], %[[J]]]
@@ -307,9 +307,9 @@
     memref<?x?xf32, offset: 0, strides: [?, ?]>
   %1 = dim %C, 0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   %2 = dim %D, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  loop.for %arg5 = %c0 to %1 step %c2 {
-    loop.for %arg6 = %c0 to %2 step %c3 {
-      loop.for %arg7 = %c0 to %0 step %c4 {
+  scf.for %arg5 = %c0 to %1 step %c2 {
+    scf.for %arg6 = %c0 to %2 step %c3 {
+      scf.for %arg7 = %c0 to %0 step %c4 {
         %3 = affine.apply #map0(%arg5)
         %4 = affine.apply #map1(%arg7)
         %5 = std.subview %C[%arg5, %arg7][%c2, %c4][%c1, %c1] :
@@ -334,9 +334,9 @@
 // CHECK-LABEL: func @f6
 // CHECK:  (%[[A:.*]]:{{.*}}, %[[B:.*]]:{{.*}}, %[[C:.*]]:{{.*}}, %[[D:.*]]:{{.*}}, %[[E:.*]]:{{.*}})
 // Fuse the producer of E (WAW) then the producer of C (WAR).
-// CHECK:  loop.for
-// CHECK:    loop.for
-// CHECK:      loop.for
+// CHECK:  scf.for
+// CHECK:    scf.for
+// CHECK:      scf.for
 // CHECK:        linalg.matmul
 // CHECK:        linalg.matmul
 // CHECK:        linalg.matmul
@@ -367,9 +367,9 @@
     memref<?x?xf32, offset: 0, strides: [?, ?]>,
     memref<?x?xf32, offset: 0, strides: [?, ?]>,
     memref<?x?xf32, offset: 0, strides: [?, ?]>
-  loop.for %arg5 = %c0 to %0 step %c2 {
-    loop.for %arg6 = %c0 to %2 step %c3 {
-      loop.for %arg7 = %c0 to %1 step %c4 {
+  scf.for %arg5 = %c0 to %0 step %c2 {
+    scf.for %arg6 = %c0 to %2 step %c3 {
+      scf.for %arg7 = %c0 to %1 step %c4 {
         %7 = std.subview %A[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -386,9 +386,9 @@
       }
     }
   }
-  loop.for %arg5 = %c0 to %3 step %c2 {
-    loop.for %arg6 = %c0 to %4 step %c3 {
-      loop.for %arg7 = %c0 to %2 step %c4 {
+  scf.for %arg5 = %c0 to %3 step %c2 {
+    scf.for %arg6 = %c0 to %4 step %c3 {
+      scf.for %arg7 = %c0 to %2 step %c4 {
         %7 = std.subview %C[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -415,14 +415,14 @@
 // CHECK:  %[[C_0:.*]] = dim %[[C]], 0 : memref<?x?xf32, #[[strided2D]]>
 // CHECK:  %[[D_1:.*]] = dim %[[D]], 1 : memref<?x?xf32, #[[strided2D]]>
 // CHECK:  linalg.matmul(%[[A]], %[[C]], %[[E]])
-// CHECK:  loop.for %{{.*}} = %{{.*}} to %[[A_0]] step %{{.*}} {
-// CHECK:    loop.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
-// CHECK:      loop.for %{{.*}} = %{{.*}} to %[[A_1]] step %{{.*}} {
+// CHECK:  scf.for %{{.*}} = %{{.*}} to %[[A_0]] step %{{.*}} {
+// CHECK:    scf.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
+// CHECK:      scf.for %{{.*}} = %{{.*}} to %[[A_1]] step %{{.*}} {
 // CHECK:        linalg.matmul
 // CHECK:        linalg.matmul
-// CHECK:  loop.for %{{.*}} = %{{.*}} to %[[C_0]] step %{{.*}} {
-// CHECK:    loop.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
-// CHECK:      loop.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
+// CHECK:  scf.for %{{.*}} = %{{.*}} to %[[C_0]] step %{{.*}} {
+// CHECK:    scf.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
+// CHECK:      scf.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
 // CHECK:        linalg.matmul
 // CHECK-NOT:      linalg.matmul
 
@@ -454,9 +454,9 @@
     memref<?x?xf32, offset: 0, strides: [?, ?]>,
     memref<?x?xf32, offset: 0, strides: [?, ?]>
   %2 = dim %D, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  loop.for %arg5 = %c0 to %0 step %c2 {
-    loop.for %arg6 = %c0 to %2 step %c3 {
-      loop.for %arg7 = %c0 to %1 step %c4 {
+  scf.for %arg5 = %c0 to %0 step %c2 {
+    scf.for %arg6 = %c0 to %2 step %c3 {
+      scf.for %arg7 = %c0 to %1 step %c4 {
         %3 = affine.apply #map0(%arg5)
         %4 = affine.apply #map1(%arg7)
         %5 = std.subview %A[%arg5, %arg7][%c2, %c4][%c1, %c1] :
@@ -482,9 +482,9 @@
 // CHECK:  (%[[A:.*]]: memref{{.*}}, %[[B:.*]]: memref{{.*}}, %[[C:.*]]: memref{{.*}}, %[[D:.*]]: memref{{.*}}, %[[E:.*]]: memref{{.*}})
 // CHECK:  linalg.matmul
 // CHECK:  linalg.matmul
-// CHECK:  loop.for
-// CHECK:    loop.for
-// CHECK:      loop.for
+// CHECK:  scf.for
+// CHECK:    scf.for
+// CHECK:      scf.for
 // CHECK:        linalg.matmul
 // CHECK-NOT:      linalg.matmul
 
@@ -514,8 +514,8 @@
      memref<?x?xf32, offset: 0, strides: [?, ?]>
   %0 = dim %B, 0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   %1 = dim %B, 1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  loop.for %arg4 = %c0 to %0 step %c2 {
-    loop.for %arg5 = %c0 to %1 step %c3 {
+  scf.for %arg4 = %c0 to %0 step %c2 {
+    scf.for %arg5 = %c0 to %1 step %c3 {
       %4 = std.subview %B[%arg4, %arg5][%c2, %c3][%c1, %c1] :
         memref<?x?xf32, offset: 0, strides: [?, ?]> to
         memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -537,9 +537,9 @@
   return
 }
 // CHECK-LABEL: func @pointwise
-// CHECK:  loop.for
-// CHECK:    loop.for
-// CHECK-NOT:  loop.for
+// CHECK:  scf.for
+// CHECK:    scf.for
+// CHECK-NOT:  scf.for
 // CHECK:      linalg.generic
 // CHECK:        addf
 // CHECK:      linalg.generic
@@ -573,8 +573,8 @@
      memref<?x?xf32>
   %0 = dim %B, 0 : memref<?x?xf32>
   %1 = dim %B, 1 : memref<?x?xf32>
-  loop.for %arg4 = %c0 to %0 step %c2 {
-    loop.for %arg5 = %c0 to %1 step %c3 {
+  scf.for %arg4 = %c0 to %0 step %c2 {
+    scf.for %arg5 = %c0 to %1 step %c3 {
       %4 = std.subview %B[%arg4, %arg5][%c2, %c3][%c1, %c1] :
         memref<?x?xf32> to
         memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -596,9 +596,9 @@
   return
 }
 // CHECK-LABEL: func @pointwise_no_view
-// CHECK:  loop.for
-// CHECK:    loop.for
-// CHECK-NOT:  loop.for
+// CHECK:  scf.for
+// CHECK:    scf.for
+// CHECK-NOT:  scf.for
 // CHECK:      linalg.generic
 // CHECK:        addf
 // CHECK:      linalg.generic
@@ -642,8 +642,8 @@
   %3 = dim %1, 1 : memref<100x10xf32>
   %4 = dim %arg2, 0 : memref<100x10xf32>
   %5 = dim %arg2, 1 : memref<100x10xf32>
-  loop.for %i = %c0 to %2 step %c1 {
-    loop.for %j = %c0 to %3 step %c1 {
+  scf.for %i = %c0 to %2 step %c1 {
+    scf.for %j = %c0 to %3 step %c1 {
       %6 = std.subview %1[%i, %j][%c1, %c1][%c1, %c1] :
       memref<100x10xf32> to memref<?x?xf32, #map2>
       %7 = std.subview %arg2[%i, %j][%c1, %c1][%c1, %c1] :
@@ -666,9 +666,9 @@
 }
 // CHECK-LABEL: func @fusion
 // CHECK-NOT: linalg.generic
-// CHECK:     loop.for
-// CHECK:       loop.for
-// CHECK-NOT: loop.for
+// CHECK:     scf.for
+// CHECK:       scf.for
+// CHECK-NOT: scf.for
 // CHECK:       linalg.generic
 // CHECK:         linalg.yield
 // CHECK:       linalg.generic
@@ -704,8 +704,8 @@
   %12 = dim %arg2, 3 : memref<1x4x5x1xf32>
   %13 = linalg.range %c0 : %6 : %c2 : !linalg.range
   %14 = linalg.range %c0 : %10 : %c3 : !linalg.range
-  loop.for %arg3 = %c0 to %6 step %c2 {
-    loop.for %arg4 = %c0 to %10 step %c3 {
+  scf.for %arg3 = %c0 to %6 step %c2 {
+    scf.for %arg4 = %c0 to %10 step %c3 {
       %15 = affine.min #map0(%c2, %c1, %arg3)
       %16 = affine.apply #map2()[%7]
       %17 = affine.min #map0(%16, %c4, %arg4)
@@ -723,8 +723,8 @@
   return
 }
 // CHECK-LABEL: func @fill_and_conv
-// CHECK: loop.for
-// CHECK:   loop.for
+// CHECK: scf.for
+// CHECK:   scf.for
 // CHECK:     linalg.fill
 // CHECK:     linalg.conv
 
@@ -747,9 +747,9 @@
     memref<?x?xf32, offset: 0, strides: [?, ?]>,
     memref<?x?xf32, offset: 0, strides: [?, ?]>
 
-  loop.for %i = %c0 to %dim step %c2 {
-    loop.for %j = %c0 to %dim step %c3 {
-      loop.for %k = %c0 to %dim step %c4 {
+  scf.for %i = %c0 to %dim step %c2 {
+    scf.for %j = %c0 to %dim step %c3 {
+      scf.for %k = %c0 to %dim step %c4 {
         %0 = std.subview %A[%i, %k][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -770,5 +770,5 @@
 }
 
 // CHECK-LABEL: func @accept_different_alloc_ops
-// CHECK-COUNT-3: loop.for
+// CHECK-COUNT-3: scf.for
 // CHECK-COUNT-2:   linalg.matmul
diff --git a/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir b/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir
--- a/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir
+++ b/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir
@@ -25,8 +25,8 @@
   %1 = dim %C, 1 : memref<?x?xf32>
   %2 = dim %D, 0 : memref<?x?xf32>
   %3 = dim %D, 1 : memref<?x?xf32>
-  loop.for %arg2 = %c0 to %0 step %c10 {
-    loop.for %arg3 = %c0 to %1 step %c25 {
+  scf.for %arg2 = %c0 to %0 step %c10 {
+    scf.for %arg3 = %c0 to %1 step %c25 {
       %4 = std.subview %C[%arg2, %arg3][%c10, %c25][%c1, %c1] :
           memref<?x?xf32> to memref<?x?xf32, #map>
       %5 = std.subview %D[%arg2, %arg3][%c10, %c25][%c1, %c1] :
@@ -52,9 +52,9 @@
   return
 }
 // CHECK-LABEL: func @fuse_indexed_generic_consumer
-// CHECK:  loop.for
-// CHECK:    loop.for
-// CHECK-NOT:  loop.for
+// CHECK:  scf.for
+// CHECK:    scf.for
+// CHECK-NOT:  scf.for
 // CHECK:      linalg.generic
 // CHECK-NOT:    addi
 // CHECK:        addf
@@ -91,7 +91,7 @@
   %C_Y = dim %C, 1 : memref<?x?xf32>
   %D_X = dim %D, 0 : memref<?x?xf32>
   %D_Y = dim %D, 1 : memref<?x?xf32>
-  loop.parallel (%arg2, %arg3) = (%c0, %c0) to (%C_X, %C_Y) step (%c10, %c25) {
+  scf.parallel (%arg2, %arg3) = (%c0, %c0) to (%C_X, %C_Y) step (%c10, %c25) {
     %C_view = std.subview %C[%arg2, %arg3][%c10, %c25][%c1, %c1] :
         memref<?x?xf32> to memref<?x?xf32, #map>
     %D_view = std.subview %D[%arg2, %arg3][%c10, %c25][%c1, %c1] :
@@ -110,8 +110,8 @@
   return
 }
 // CHECK-LABEL: func @fuse_indexed_generic_producer
-// CHECK:  loop.parallel ([[I:%.*]], [[J:%.*]]) =
-// CHECK-NOT:  loop.parallel
+// CHECK:  scf.parallel ([[I:%.*]], [[J:%.*]]) =
+// CHECK-NOT:  scf.parallel
 // CHECK:      linalg.indexed_generic
 // CHECK:        ^bb0([[i:%.*]]: index, [[j:%.*]]: index
 // CHECK:          [[i_new:%.*]] = addi [[i]], [[I]] : index
@@ -150,7 +150,7 @@
   %D_X = dim %D, 0 : memref<?x?xf32>
   %D_Y = dim %D, 1 : memref<?x?xf32>
   %3 = linalg.range %c0 : %C_Y : %c3 : !linalg.range
-  loop.parallel (%j) = (%c0) to (%C_Y) step (%c3) {
+  scf.parallel (%j) = (%c0) to (%C_Y) step (%c3) {
     %0 = affine.min affine_map<(d0, d1, d2) -> (d0, d1 - d2)>(%c3, %C_Y, %j)
     %C_view = subview %C[%c0, %j] [%C_X, %0] [%c1, %c1] :
       memref<?x?xf32> to memref<?x?xf32, #map>
@@ -169,14 +169,14 @@
       %ab = addf %a, %b : f32
       linalg.yield %ab : f32
     }: memref<?x?xf32, #map>, memref<?x?xf32, #map>
-    loop.yield
+    scf.yield
   }
   return
 }
 // CHECK-LABEL: func @fuse_indexed_generic_producer_tile_second_dim_only
 // CHECK:  [[C0:%.*]] = constant 0 : index
-// CHECK:  loop.parallel ([[J:%.*]]) =
-// CHECK-NOT:  loop.parallel
+// CHECK:  scf.parallel ([[J:%.*]]) =
+// CHECK-NOT:  scf.parallel
 // CHECK:      linalg.indexed_generic
 // CHECK:        ^bb0([[i:%.*]]: index, [[j:%.*]]: index
 // CHECK:          [[i_new:%.*]] = addi [[i]], [[C0]] : index
diff --git a/mlir/test/Dialect/Linalg/llvm.mlir b/mlir/test/Dialect/Linalg/llvm.mlir
--- a/mlir/test/Dialect/Linalg/llvm.mlir
+++ b/mlir/test/Dialect/Linalg/llvm.mlir
@@ -62,7 +62,7 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %R = linalg.range %c0:%c1:%c1 : !linalg.range
-  loop.for %i0 = %c0 to %c1 step %c1 {
+  scf.for %i0 = %c0 to %c1 step %c1 {
     %1 = linalg.slice %arg0[%i0, %R] : memref<?x?xf64, offset: ?, strides: [?, 1]>, index, !linalg.range, memref<?xf64, offset: ?, strides: [1]>
   }
   return
@@ -180,9 +180,9 @@
 // LLVM-LOOPS: %[[T0:.*]] = dim %[[A]], 0 : memref<?x?xvector<4xf32>>
 // LLVM-LOOPS: %[[T1:.*]] = dim %[[A]], 1 : memref<?x?xvector<4xf32>>
 // LLVM-LOOPS: %[[T2:.*]] = dim %[[B]], 1 : memref<?x?xvector<4xf32>>
-// LLVM-LOOPS: loop.for %[[I:.*]] = %[[C0]] to %[[T0]] step %[[C1]] {
-// LLVM-LOOPS: loop.for %[[J:.*]] = %[[C0]] to %[[T2]] step %[[C1]] {
-// LLVM-LOOPS: loop.for %[[K:.*]] = %[[C0]] to %[[T1]] step %[[C1]] {
+// LLVM-LOOPS: scf.for %[[I:.*]] = %[[C0]] to %[[T0]] step %[[C1]] {
+// LLVM-LOOPS: scf.for %[[J:.*]] = %[[C0]] to %[[T2]] step %[[C1]] {
+// LLVM-LOOPS: scf.for %[[K:.*]] = %[[C0]] to %[[T1]] step %[[C1]] {
 // LLVM-LOOPS:   %[[T3:.*]] = load %[[A]][%[[I]], %[[K]]] : memref<?x?xvector<4xf32>>
 // LLVM-LOOPS:   %[[T4:.*]] = load %[[B]][%[[K]], %[[J]]] : memref<?x?xvector<4xf32>>
 // LLVM-LOOPS:   %[[T5:.*]] = load %[[C]][%[[I]], %[[J]]] : memref<?x?xvector<4x4xf32>>
diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir
--- a/mlir/test/Dialect/Linalg/loops.mlir
+++ b/mlir/test/Dialect/Linalg/loops.mlir
@@ -43,9 +43,9 @@
 //       CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
 //       CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
 //       CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} {
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %[[N]] step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[N]] step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //   CHECKLOOP-DAG:       %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //   CHECKLOOP-DAG:       %[[b:.*]] = load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //   CHECKLOOP-DAG:       %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -60,8 +60,8 @@
 //       CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
 //       CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
 //       CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[M]], %[[N]]) step (%{{.*}}, %{{.*}} {
-//       CHECKPARALLEL:   loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[M]], %[[N]]) step (%{{.*}}, %{{.*}} {
+//       CHECKPARALLEL:   scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //   CHECKPARALLEL-DAG:     %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //   CHECKPARALLEL-DAG:     %[[b:.*]] = load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //   CHECKPARALLEL-DAG:     %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -86,8 +86,8 @@
 //       CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
 //       CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
 //       CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
-//       CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} {
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //   CHECKLOOP-DAG:     %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //   CHECKLOOP-DAG:     %[[b:.*]] = load %[[B]][%{{.*}}] : memref<?xf32>
 //   CHECKLOOP-DAG:     %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -101,8 +101,8 @@
 //       CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
 //       CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
 //       CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
-//       CHECKPARALLEL: loop.parallel (%{{.*}}) = (%{{.*}}) to (%[[M]]) step (%{{.*}}) {
-//       CHECKPARALLEL:   loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKPARALLEL: scf.parallel (%{{.*}}) = (%{{.*}}) to (%[[M]]) step (%{{.*}}) {
+//       CHECKPARALLEL:   scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //   CHECKPARALLEL-DAG:     %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //   CHECKPARALLEL-DAG:     %[[b:.*]] = load %[[B]][%{{.*}}] : memref<?xf32>
 //   CHECKPARALLEL-DAG:     %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -125,7 +125,7 @@
 //       CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
 //       CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
 //       CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32>
-//       CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //   CHECKLOOP-DAG:   %[[a:.*]] = load %[[A]][%{{.*}}] : memref<?xf32>
 //   CHECKLOOP-DAG:   %[[b:.*]] = load %[[B]][%{{.*}}] : memref<?xf32>
 //   CHECKLOOP-DAG:   %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -138,7 +138,7 @@
 //       CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
 //       CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
 //       CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32>
-//       CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //   CHECKPARALLEL-DAG:   %[[a:.*]] = load %[[A]][%{{.*}}] : memref<?xf32>
 //   CHECKPARALLEL-DAG:   %[[b:.*]] = load %[[B]][%{{.*}}] : memref<?xf32>
 //   CHECKPARALLEL-DAG:   %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -154,7 +154,7 @@
 // CHECKLOOP-LABEL: func @dot_view(
 //       CHECKLOOP:   %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: memref<f32>) {
 //       CHECKLOOP: %[[K:.*]] = dim %arg0, 0 : memref<?xf32, #[[strided1D]]>
-//       CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //   CHECKLOOP-DAG:   %[[a:.*]] = load %arg0[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 //   CHECKLOOP-DAG:   %[[b:.*]] = load %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 //   CHECKLOOP-DAG:   %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -165,7 +165,7 @@
 // CHECKPARALLEL-LABEL: func @dot_view(
 //       CHECKPARALLEL:   %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: memref<f32>) {
 //       CHECKPARALLEL: %[[K:.*]] = dim %arg0, 0 : memref<?xf32, #[[strided1D]]>
-//       CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //   CHECKPARALLEL-DAG:   %[[a:.*]] = load %arg0[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 //   CHECKPARALLEL-DAG:   %[[b:.*]] = load %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 //   CHECKPARALLEL-DAG:   %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
@@ -179,12 +179,12 @@
 }
 // CHECKLOOP-LABEL: func @fill_view(
 //       CHECKLOOP: %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: f32) {
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECKLOOP:     store %{{.*}}, %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 
 // CHECKPARALLEL-LABEL: func @fill_view(
 //       CHECKPARALLEL: %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: f32) {
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
 //       CHECKPARALLEL:     store %{{.*}}, %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 
 func @fill_view0(%arg0: memref<f32>, %arg1: f32) {
@@ -203,14 +203,14 @@
 }
 // CHECKLOOP-LABEL: func @fill_view3(
 //       CHECKLOOP: %{{.*}}: memref<?x?x?xf32, #[[strided3D]]>, %{{.*}}: f32) {
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECKLOOP:       loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECKLOOP:         store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
 
 // CHECKPARALLEL-LABEL: func @fill_view3(
 //       CHECKPARALLEL: %{{.*}}: memref<?x?x?xf32, #[[strided3D]]>, %{{.*}}: f32) {
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) {
 //       CHECKPARALLEL:     store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
 
 func @copy_view(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf32, offset: ?, strides: [1]>) {
@@ -219,13 +219,13 @@
 }
 // CHECKLOOP-LABEL: func @copy_view(
 //       CHECKLOOP: %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: memref<?xf32, #[[strided1D]]>) {
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECKLOOP:     %[[L:.*]] = load %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 //       CHECKLOOP:     store %[[L]], %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 
 // CHECKPARALLEL-LABEL: func @copy_view(
 //       CHECKPARALLEL: %{{.*}}: memref<?xf32, #[[strided1D]]>, %{{.*}}: memref<?xf32, #[[strided1D]]>) {
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
 //       CHECKPARALLEL:     %[[L:.*]] = load %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 //       CHECKPARALLEL:     store %[[L]], %{{.*}}[%{{.*}}] : memref<?xf32, #[[strided1D]]>
 
@@ -249,15 +249,15 @@
 }
 // CHECKLOOP-LABEL: func @copy_view3
 //       CHECKLOOP: (%{{.*}}: memref<?x?x?xf32, #[[strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[strided3D]]>) {
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECKLOOP:       loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECKLOOP:         %[[L:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKLOOP:         store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
 
 // CHECKPARALLEL-LABEL: func @copy_view3
 //       CHECKPARALLEL: (%{{.*}}: memref<?x?x?xf32, #[[strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[strided3D]]>) {
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) {
 //       CHECKPARALLEL:     %[[L:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKPARALLEL:     store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
 
@@ -272,11 +272,11 @@
 //       CHECKLOOP:   %[[K:.*]] = dim %arg0, 2 : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKLOOP:   %[[B:.*]] = dim %arg1, 0 : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKLOOP:   %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?xf32, #[[strided3D]]>
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
-//       CHECKLOOP:       loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-//       CHECKLOOP:         loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-//       CHECKLOOP:           loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
+//       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+//       CHECKLOOP:           scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
 //       CHECKLOOP:             %[[SUM:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:             %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKLOOP:             %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
@@ -292,9 +292,9 @@
 //       CHECKPARALLEL:   %[[K:.*]] = dim %arg0, 2 : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKPARALLEL:   %[[B:.*]] = dim %arg1, 0 : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKPARALLEL:   %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?xf32, #[[strided3D]]>
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}) {
-//       CHECKPARALLEL:     loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-//       CHECKPARALLEL:       loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}) {
+//       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+//       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
 //       CHECKPARALLEL:         %[[SUM:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKPARALLEL:         %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[strided3D]]>
@@ -316,13 +316,13 @@
 //       CHECKLOOP:   %[[B:.*]] = dim %arg1, 0 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       CHECKLOOP:   %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       CHECKLOOP:   %[[X1:.*]] = dim %arg2, 2 : memref<?x?x?x?xf32, #[[strided4D]]>
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
-//       CHECKLOOP:       loop.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} {
-//       CHECKLOOP:         loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-//       CHECKLOOP:           loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-//       CHECKLOOP:             loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-//       CHECKLOOP:               loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
+//       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} {
+//       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKLOOP:           scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+//       CHECKLOOP:             scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+//       CHECKLOOP:               scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
 //       CHECKLOOP:                 %[[SUM0:.*]] = affine.apply #[[Stride2Dilation4]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:                 %[[SUM1:.*]] = affine.apply #[[Stride3Dilation5]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:                 %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[strided4D]]>
@@ -341,10 +341,10 @@
 //       CHECKPARALLEL:   %[[B:.*]] = dim %arg1, 0 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       CHECKPARALLEL:   %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       CHECKPARALLEL:   %[[X1:.*]] = dim %arg2, 2 : memref<?x?x?x?xf32, #[[strided4D]]>
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {
-//       CHECKPARALLEL:     loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-//       CHECKPARALLEL:       loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-//       CHECKPARALLEL:         loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {
+//       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+//       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+//       CHECKPARALLEL:         scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
 //       CHECKPARALLEL:           %[[SUM0:.*]] = affine.apply #[[Stride2Dilation4]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:           %[[SUM1:.*]] = affine.apply #[[Stride3Dilation5]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:           %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[strided4D]]>
@@ -373,13 +373,13 @@
 //       CHECKLOOP:   %[[B:.*]] =  dim %arg1, 0 : memref<?x?x?x?xf32>
 //       CHECKLOOP:   %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?x?xf32>
 //       CHECKLOOP:   %[[X1:.*]] = dim %arg2, 2 : memref<?x?x?x?xf32>
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
-//       CHECKLOOP:       loop.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} {
-//       CHECKLOOP:         loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-//       CHECKLOOP:           loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-//       CHECKLOOP:             loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-//       CHECKLOOP:               loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
+//       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} {
+//       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKLOOP:           scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+//       CHECKLOOP:             scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+//       CHECKLOOP:               scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
 //       CHECKLOOP:                 %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
 //       CHECKLOOP:                 %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
 //       CHECKLOOP:                 %[[IDX:.*]] = affine.max #[[clampMinMap]](%[[SUM0]])
@@ -402,10 +402,10 @@
 //       CHECKPARALLEL:   %[[B:.*]] =  dim %arg1, 0 : memref<?x?x?x?xf32>
 //       CHECKPARALLEL:   %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?x?xf32>
 //       CHECKPARALLEL:   %[[X1:.*]] = dim %arg2, 2 : memref<?x?x?x?xf32>
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {
-//       CHECKPARALLEL:     loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
-//       CHECKPARALLEL:       loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
-//       CHECKPARALLEL:         loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {
+//       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
+//       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
+//       CHECKPARALLEL:         scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
 //       CHECKPARALLEL:           %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:           %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:           %[[IDX:.*]] = affine.max #[[clampMinMap]](%[[SUM0]])
@@ -430,10 +430,10 @@
 //       CHECKLOOP:   %[[WY:.*]] = dim %arg1, 1 : memref<?x?xi32>
 //       CHECKLOOP:   %[[OX:.*]] = dim %arg2, 0 : memref<?x?xf32>
 //       CHECKLOOP:   %[[OY:.*]] = dim %arg2, 1 : memref<?x?xf32>
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
-//       CHECKLOOP:       loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-//       CHECKLOOP:         loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
+//       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
+//       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKLOOP:           %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
@@ -446,9 +446,9 @@
 //       CHECKPARALLEL:   %[[WY:.*]] = dim %arg1, 1 : memref<?x?xi32>
 //       CHECKPARALLEL:   %[[OX:.*]] = dim %arg2, 0 : memref<?x?xf32>
 //       CHECKPARALLEL:   %[[OY:.*]] = dim %arg2, 1 : memref<?x?xf32>
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
-//       CHECKPARALLEL:     loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-//       CHECKPARALLEL:       loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
+//       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
+//       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKPARALLEL:         %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
@@ -468,10 +468,10 @@
 //       CHECKLOOP:   %[[WY:.*]] = dim %arg1, 1 : memref<?x?xi32>
 //       CHECKLOOP:   %[[OX:.*]] = dim %arg2, 0 : memref<?x?xf32>
 //       CHECKLOOP:   %[[OY:.*]] = dim %arg2, 1 : memref<?x?xf32>
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
-//       CHECKLOOP:       loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-//       CHECKLOOP:         loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
+//       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
+//       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKLOOP:           %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
@@ -484,9 +484,9 @@
 //       CHECKPARALLEL:   %[[WY:.*]] = dim %arg1, 1 : memref<?x?xi32>
 //       CHECKPARALLEL:   %[[OX:.*]] = dim %arg2, 0 : memref<?x?xf32>
 //       CHECKPARALLEL:   %[[OY:.*]] = dim %arg2, 1 : memref<?x?xf32>
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
-//       CHECKPARALLEL:     loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-//       CHECKPARALLEL:       loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
+//       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
+//       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKPARALLEL:         %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
@@ -506,10 +506,10 @@
 //       CHECKLOOP:   %[[WY:.*]] = dim %arg1, 1 : memref<?x?xi32>
 //       CHECKLOOP:   %[[OX:.*]] = dim %arg2, 0 : memref<?x?xf32>
 //       CHECKLOOP:   %[[OY:.*]] = dim %arg2, 1 : memref<?x?xf32>
-//       CHECKLOOP:   loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
-//       CHECKLOOP:     loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
-//       CHECKLOOP:       loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-//       CHECKLOOP:         loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
+//       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
+//       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
+//       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKLOOP:           %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[RHS:.*]] = load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
@@ -522,9 +522,9 @@
 //       CHECKPARALLEL:   %[[WY:.*]] = dim %arg1, 1 : memref<?x?xi32>
 //       CHECKPARALLEL:   %[[OX:.*]] = dim %arg2, 0 : memref<?x?xf32>
 //       CHECKPARALLEL:   %[[OY:.*]] = dim %arg2, 1 : memref<?x?xf32>
-//       CHECKPARALLEL:   loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
-//       CHECKPARALLEL:     loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
-//       CHECKPARALLEL:       loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
+//       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
+//       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
+//       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKPARALLEL:         %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[RHS:.*]] = load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
@@ -555,9 +555,9 @@
   return
 }
 // CHECKLOOP-LABEL: @generic_region
-//       CHECKLOOP: loop.for %[[i:.*]] = {{.*}}
-//       CHECKLOOP:   loop.for %[[j:.*]] = {{.*}}
-//       CHECKLOOP:     loop.for %[[k:.*]] = {{.*}}
+//       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
+//       CHECKLOOP:   scf.for %[[j:.*]] = {{.*}}
+//       CHECKLOOP:     scf.for %[[k:.*]] = {{.*}}
 //       CHECKLOOP:       %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[strided2D]]>
 //       CHECKLOOP:       %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKLOOP:       %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
@@ -567,7 +567,7 @@
 //       CHECKLOOP:       store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
 
 // CHECKPARALLEL-LABEL: @generic_region
-//       CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]])
+//       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]])
 //       CHECKPARALLEL:   %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[strided2D]]>
 //       CHECKPARALLEL:   %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKPARALLEL:   %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
@@ -606,9 +606,9 @@
 }
 
 // CHECKLOOP-LABEL: @indexed_generic_region
-//       CHECKLOOP: loop.for %[[i:.*]] = {{.*}}
-//       CHECKLOOP:   loop.for %[[j:.*]] = {{.*}}
-//       CHECKLOOP:     loop.for %[[k:.*]] = {{.*}}
+//       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
+//       CHECKLOOP:   scf.for %[[j:.*]] = {{.*}}
+//       CHECKLOOP:     scf.for %[[k:.*]] = {{.*}}
 //       CHECKLOOP:       %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]]
 //       CHECKLOOP:       %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]]
 //       CHECKLOOP:       %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]]
@@ -622,7 +622,7 @@
 //       CHECKLOOP:       store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]]
 
 // CHECKPARALLEL-LABEL: @indexed_generic_region
-//       CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]])
+//       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]])
 //       CHECKPARALLEL:   %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]]
 //       CHECKPARALLEL:   %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]]
 //       CHECKPARALLEL:   %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]]
@@ -662,15 +662,15 @@
 // CHECKLOOP-LABEL: @generic_op_zero_rank
 //  CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32>
 //  CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32>
-//       CHECKLOOP: loop.for %[[i:.*]] = {{.*}}
-//       CHECKLOOP:   loop.for %[[j:.*]] = {{.*}}
+//       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
+//       CHECKLOOP:   scf.for %[[j:.*]] = {{.*}}
 //       CHECKLOOP:     %[[a:.*]] = load %[[ARG0]][]
 //       CHECKLOOP:     store %[[a]], %[[ARG1]][%[[i]], %[[j]]]
 
 // CHECKPARALLEL-LABEL: @generic_op_zero_rank
 //  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32>
 //  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32>
-//       CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]])
+//       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]])
 //       CHECKPARALLEL:   %[[a:.*]] = load %[[ARG0]][]
 //       CHECKPARALLEL:   store %[[a]], %[[ARG1]][%[[i]], %[[j]]]
 
@@ -689,8 +689,8 @@
 // CHECKLOOP-LABEL: @indexed_generic_op_zero_rank
 //  CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<i32>
 //  CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32>
-//       CHECKLOOP: loop.for %[[i:.*]] = {{.*}}
-//       CHECKLOOP:   loop.for %[[j:.*]] = {{.*}}
+//       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
+//       CHECKLOOP:   scf.for %[[j:.*]] = {{.*}}
 //       CHECKLOOP:     %[[a:.*]] = load %[[ARG0]][
 //       CHECKLOOP:     %[[ij:.*]] = addi %[[i]], %[[j]] : index
 //       CHECKLOOP:     %[[ij_int:.*]] = index_cast %[[ij]] : index to i32
@@ -700,7 +700,7 @@
 // CHECKPARALLEL-LABEL: @indexed_generic_op_zero_rank
 //  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<i32>
 //  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32>
-//       CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]])
+//       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]])
 //       CHECKPARALLEL:   %[[a:.*]] = load %[[ARG0]][
 //       CHECKPARALLEL:   %[[ij:.*]] = addi %[[i]], %[[j]] : index
 //       CHECKPARALLEL:   %[[ij_int:.*]] = index_cast %[[ij]] : index to i32
@@ -732,7 +732,7 @@
 // CHECKLOOP-LABEL: @generic_op_1D_reduce
 //  CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
 //  CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
-//       CHECKLOOP: loop.for %[[i:.*]] = {{.*}}
+//       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
 //       CHECKLOOP:   %[[a:.*]] = load %[[ARG0]][%[[i]]]
 //       CHECKLOOP:   %[[b:.*]] = load %[[ARG1]][]
 //       CHECKLOOP:   %[[c:.*]] = addf %[[a]], %[[b]] : f32
@@ -741,7 +741,7 @@
 // CHECKPARALLEL-LABEL: @generic_op_1D_reduce
 //  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
 //  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
-//       CHECKPARALLEL: loop.for %[[i:.*]] = {{.*}}
+//       CHECKPARALLEL: scf.for %[[i:.*]] = {{.*}}
 //       CHECKPARALLEL:   %[[a:.*]] = load %[[ARG0]][%[[i]]]
 //       CHECKPARALLEL:   %[[b:.*]] = load %[[ARG1]][]
 //       CHECKPARALLEL:   %[[c:.*]] = addf %[[a]], %[[b]] : f32
@@ -780,7 +780,7 @@
 //  CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
 //  CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
 //  CHECKLOOP-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32>
-//       CHECKLOOP: loop.for %[[i:.*]] = {{.*}}
+//       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
 //       CHECKLOOP:   %[[a:.*]] = load %[[ARG0]][%[[i]]]
 //       CHECKLOOP:   %[[b:.*]] = load %[[ARG1]][]
 //       CHECKLOOP:   %[[c:.*]] = load %[[ARG2]][]
@@ -792,7 +792,7 @@
 //  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
 //  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
 //  CHECKPARALLEL-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32>
-//       CHECKPARALLEL: loop.for %[[i:.*]] = {{.*}}
+//       CHECKPARALLEL: scf.for %[[i:.*]] = {{.*}}
 //       CHECKPARALLEL:   %[[a:.*]] = load %[[ARG0]][%[[i]]]
 //       CHECKPARALLEL:   %[[b:.*]] = load %[[ARG1]][]
 //       CHECKPARALLEL:   %[[c:.*]] = load %[[ARG2]][]
@@ -818,13 +818,13 @@
 // CHECKLOOP-LABEL: @generic_const_init
 //  CHECKLOOP-SAME: %[[ARG0:.*]]: memref<?xf32>
 //       CHECKLOOP: %[[CONST:.*]] = constant 1.000000e+00 : f32
-//       CHECKLOOP: loop.for %[[i:.*]] = {{.*}}
+//       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
 //       CHECKLOOP:   store %[[CONST]], %[[ARG0]]
 
 // CHECKPARALLEL-LABEL: @generic_const_init
 //  CHECKPARALLEL-SAME: %[[ARG0:.*]]: memref<?xf32>
 //       CHECKPARALLEL: %[[CONST:.*]] = constant 1.000000e+00 : f32
-//       CHECKPARALLEL: loop.parallel (%[[i:.*]])
+//       CHECKPARALLEL: scf.parallel (%[[i:.*]])
 //       CHECKPARALLEL:   store %[[CONST]], %[[ARG0]]
 
 #scalar_access = [
@@ -852,7 +852,7 @@
 //  CHECKLOOP-SAME: %[[ARG0]]: memref<f32>
 //  CHECKLOOP-SAME: %[[ARG1]]: memref<f32>
 //  CHECKLOOP-SAME: %[[ARG2]]: memref<f32>
-//   CHECKLOOP-NOT: loop.for
+//   CHECKLOOP-NOT: scf.for
 //       CHECKLOOP: load %[[ARG0]][]
 //       CHECKLOOP: load %[[ARG1]][]
 //       CHECKLOOP: addf
@@ -862,7 +862,7 @@
 //  CHECKPARALLEL-SAME: %[[ARG0]]: memref<f32>
 //  CHECKPARALLEL-SAME: %[[ARG1]]: memref<f32>
 //  CHECKPARALLEL-SAME: %[[ARG2]]: memref<f32>
-//   CHECKPARALLEL-NOT: loop.for
+//   CHECKPARALLEL-NOT: scf.for
 //       CHECKPARALLEL: load %[[ARG0]][]
 //       CHECKPARALLEL: load %[[ARG1]][]
 //       CHECKPARALLEL: addf
@@ -883,10 +883,10 @@
 //       CHECKLOOP: %[[M:.*]] = dim %[[mA]], 1 : memref<?x?x?xf32>
 //       CHECKLOOP: %[[K:.*]] = dim %[[mA]], 2 : memref<?x?x?xf32>
 //       CHECKLOOP: %[[N:.*]] = dim %[[mB]], 2 : memref<?x?x?xf32>
-//       CHECKLOOP: loop.for %[[b:.*]] = %{{.*}} to %[[B]] step %{{.*}} {
-//       CHECKLOOP:   loop.for %[[m:.*]] = %{{.*}} to %[[M]] step %{{.*}} {
-//       CHECKLOOP:     loop.for %[[n:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
-//       CHECKLOOP:       loop.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKLOOP: scf.for %[[b:.*]] = %{{.*}} to %[[B]] step %{{.*}} {
+//       CHECKLOOP:   scf.for %[[m:.*]] = %{{.*}} to %[[M]] step %{{.*}} {
+//       CHECKLOOP:     scf.for %[[n:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
+//       CHECKLOOP:       scf.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} {
 //       CHECKLOOP:       %[[va:.*]] = load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
 //       CHECKLOOP:       %[[vb:.*]] = load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
 //       CHECKLOOP:       %[[vc:.*]] = load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
@@ -902,8 +902,8 @@
 //       CHECKPARALLEL: %[[M:.*]] = dim %[[mA]], 1 : memref<?x?x?xf32>
 //       CHECKPARALLEL: %[[K:.*]] = dim %[[mA]], 2 : memref<?x?x?xf32>
 //       CHECKPARALLEL: %[[N:.*]] = dim %[[mB]], 2 : memref<?x?x?xf32>
-//       CHECKPARALLEL: loop.parallel (%[[b:.*]], %[[m:.*]], %[[n:.*]]) = ({{.*}}) to (%[[B]], %[[M]], %[[N]]) step ({{.*}}) {
-//       CHECKPARALLEL:   loop.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} {
+//       CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]], %[[n:.*]]) = ({{.*}}) to (%[[B]], %[[M]], %[[N]]) step ({{.*}}) {
+//       CHECKPARALLEL:   scf.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} {
 //       CHECKPARALLEL:       %[[va:.*]] = load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
 //       CHECKPARALLEL:       %[[vb:.*]] = load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
 //       CHECKPARALLEL:       %[[vc:.*]] = load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/parallel_loops.mlir b/mlir/test/Dialect/Linalg/parallel_loops.mlir
--- a/mlir/test/Dialect/Linalg/parallel_loops.mlir
+++ b/mlir/test/Dialect/Linalg/parallel_loops.mlir
@@ -21,12 +21,12 @@
 // CHECK-DAG: %[[C2:.*]] = constant 2
 // CHECK-DAG: %[[C0:.*]] = constant 0
 // CHECK-DAG: %[[C1:.*]] = constant 1
-// CHECK: loop.parallel (%[[I:.*]], %[[J:.*]]) = {{.*}}
+// CHECK: scf.parallel (%[[I:.*]], %[[J:.*]]) = {{.*}}
 // CHECK:   %[[LHS_ELEM:.*]] = load %[[LHS]][%[[I]], %[[J]]]
 // CHECK:   %[[RHS_ELEM:.*]] = load %[[RHS]][%[[I]], %[[J]]]
 // CHECK:   %[[SUM:.*]] = addf %[[LHS_ELEM]], %[[RHS_ELEM]] : f32
 // CHECK:   store %[[SUM]], %{{.*}}[%[[I]], %[[J]]]
-// CHECK:   loop.yield
+// CHECK:   scf.yield
 
 // -----
 
@@ -55,8 +55,8 @@
 //   CHECK-DAG: %[[D1:.*]] = dim %{{.*}}, 1
 //   CHECK-DAG: %[[D2:.*]] = dim %{{.*}}, 2
 //   CHECK-DAG: %[[D3:.*]] = dim %{{.*}}, 3
-//       CHECK: loop.parallel (%[[IV0:.*]], %[[IV1:.*]]) = (%[[C0]], %[[C0]]) to (%[[D0]], %[[D1]]) step (%[[C1]], %[[C1]])
-//       CHECK:   loop.for %[[IV2:.*]] = %[[C0]] to %[[D2]] step %[[C1]]
-//       CHECK:     loop.for %[[IV3:.*]] = %[[C0]] to %[[D3]] step %[[C1]]
+//       CHECK: scf.parallel (%[[IV0:.*]], %[[IV1:.*]]) = (%[[C0]], %[[C0]]) to (%[[D0]], %[[D1]]) step (%[[C1]], %[[C1]])
+//       CHECK:   scf.for %[[IV2:.*]] = %[[C0]] to %[[D2]] step %[[C1]]
+//       CHECK:     scf.for %[[IV3:.*]] = %[[C0]] to %[[D3]] step %[[C1]]
 //       CHECK:       load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
 //       CHECK:       store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV1]], %[[IV3]]]
diff --git a/mlir/test/Dialect/Linalg/promote.mlir b/mlir/test/Dialect/Linalg/promote.mlir
--- a/mlir/test/Dialect/Linalg/promote.mlir
+++ b/mlir/test/Dialect/Linalg/promote.mlir
@@ -20,9 +20,9 @@
   %6 = dim %3, 0 : memref<?x?xf32>
   %7 = dim %3, 1 : memref<?x?xf32>
   %8 = dim %4, 1 : memref<?x?xf32>
-  loop.for %arg4 = %c0 to %6 step %c2 {
-    loop.for %arg5 = %c0 to %8 step %c3 {
-      loop.for %arg6 = %c0 to %7 step %c4 {
+  scf.for %arg4 = %c0 to %6 step %c2 {
+    scf.for %arg5 = %c0 to %8 step %c3 {
+      scf.for %arg6 = %c0 to %7 step %c4 {
         %11 = std.subview %3[%arg4, %arg6][%c2, %c4][1, 1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, 1]>
         %14 = std.subview %4[%arg6, %arg5][%c4, %c3][1, 1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, 1]>
         %17 = std.subview %5[%arg4, %arg5][%c2, %c3][1, 1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, 1]>
@@ -34,9 +34,9 @@
 }
 
 // CHECK-LABEL: func @matmul_f32(%{{.*}}: memref<?xi8>, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
-//       CHECK:   loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECK:     loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECK:       loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:     scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECK:         %[[vA:.*]] = subview {{.*}} : memref<?x?xf32>
 //       CHECK:         %[[vB:.*]] = subview {{.*}} : memref<?x?xf32>
 //       CHECK:         %[[vC:.*]] = subview {{.*}} : memref<?x?xf32>
@@ -85,9 +85,9 @@
   %6 = dim %3, 0 : memref<?x?xf64>
   %7 = dim %3, 1 : memref<?x?xf64>
   %8 = dim %4, 1 : memref<?x?xf64>
-  loop.for %arg4 = %c0 to %6 step %c2 {
-    loop.for %arg5 = %c0 to %8 step %c3 {
-      loop.for %arg6 = %c0 to %7 step %c4 {
+  scf.for %arg4 = %c0 to %6 step %c2 {
+    scf.for %arg5 = %c0 to %8 step %c3 {
+      scf.for %arg6 = %c0 to %7 step %c4 {
         %11 = std.subview %3[%arg4, %arg6][%c2, %c4][1, 1] : memref<?x?xf64> to memref<?x?xf64, offset: ?, strides: [?, 1]>
         %14 = std.subview %4[%arg6, %arg5][%c4, %c3][1, 1] : memref<?x?xf64> to memref<?x?xf64, offset: ?, strides: [?, 1]>
         %17 = std.subview %5[%arg4, %arg5][%c2, %c3][1, 1] : memref<?x?xf64> to memref<?x?xf64, offset: ?, strides: [?, 1]>
@@ -99,9 +99,9 @@
 }
 
 // CHECK-LABEL: func @matmul_f64(%{{.*}}: memref<?xi8>, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
-//       CHECK:   loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECK:     loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECK:       loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:     scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECK:         %[[vA_f64:.*]] = subview {{.*}} : memref<?x?xf64>
 //       CHECK:         %[[vB_f64:.*]] = subview {{.*}} : memref<?x?xf64>
 //       CHECK:         %[[vC_f64:.*]] = subview {{.*}} : memref<?x?xf64>
@@ -150,9 +150,9 @@
   %6 = dim %3, 0 : memref<?x?xi32>
   %7 = dim %3, 1 : memref<?x?xi32>
   %8 = dim %4, 1 : memref<?x?xi32>
-  loop.for %arg4 = %c0 to %6 step %c2 {
-    loop.for %arg5 = %c0 to %8 step %c3 {
-      loop.for %arg6 = %c0 to %7 step %c4 {
+  scf.for %arg4 = %c0 to %6 step %c2 {
+    scf.for %arg5 = %c0 to %8 step %c3 {
+      scf.for %arg6 = %c0 to %7 step %c4 {
         %11 = std.subview %3[%arg4, %arg6][%c2, %c4][1, 1] : memref<?x?xi32> to memref<?x?xi32, offset: ?, strides: [?, 1]>
         %14 = std.subview %4[%arg6, %arg5][%c4, %c3][1, 1] : memref<?x?xi32> to memref<?x?xi32, offset: ?, strides: [?, 1]>
         %17 = std.subview %5[%arg4, %arg5][%c2, %c3][1, 1] : memref<?x?xi32> to memref<?x?xi32, offset: ?, strides: [?, 1]>
@@ -164,9 +164,9 @@
 }
 
 // CHECK-LABEL: func @matmul_i32(%{{.*}}: memref<?xi8>, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
-//       CHECK:   loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECK:     loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECK:       loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:     scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//       CHECK:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECK:         %[[vA_i32:.*]] = subview {{.*}} : memref<?x?xi32>
 //       CHECK:         %[[vB_i32:.*]] = subview {{.*}} : memref<?x?xi32>
 //       CHECK:         %[[vC_i32:.*]] = subview {{.*}} : memref<?x?xi32>
diff --git a/mlir/test/Dialect/Linalg/tile.mlir b/mlir/test/Dialect/Linalg/tile.mlir
--- a/mlir/test/Dialect/Linalg/tile.mlir
+++ b/mlir/test/Dialect/Linalg/tile.mlir
@@ -43,7 +43,7 @@
 //       TILE-2-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-2-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-2: %[[M:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-2: loop.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
+//       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
 //       TILE-2:   %[[localM:.*]] = dim %{{.*}}, 0
 //       TILE-2:   %[[szM:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localM]], %[[I]])
 //       TILE-2:   %[[K:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
@@ -59,7 +59,7 @@
 //       TILE-02-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-02-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-02: %[[N:.*]] = dim %arg1, 1 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-02: loop.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
+//       TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
 //       TILE-02:   %[[K:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-02:   %[[localN:.*]] = dim %{{.*}}, 1
 //       TILE-02:   %[[szN:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localN]], %[[J]])
@@ -75,7 +75,7 @@
 //       TILE-002-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-002-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-002: %[[ubK:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-002: loop.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
+//       TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
 //       TILE-002:   %[[M:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-002:   %[[localK:.*]] = dim %{{.*}}, 1
 //       TILE-002:   %[[szK:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localK]], %[[K]])
@@ -95,9 +95,9 @@
 //       TILE-234: %[[ubM:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-234: %[[ubK:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-234: %[[ubN:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-234:  loop.for %[[I:.*]] = %{{.*}}{{.*}} to %[[ubM]] step %{{.*}} {
-//       TILE-234:    loop.for %[[J:.*]] = %{{.*}}{{.*}} to %[[ubN]] step %{{.*}} {
-//       TILE-234:      loop.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
+//       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[ubM]] step %{{.*}} {
+//       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[ubN]] step %{{.*}} {
+//       TILE-234:      scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
 //       TILE-234:        %[[localM:.*]] = dim %{{.*}}, 0
 //       TILE-234:        %[[szM:.*]] = affine.min #[[bound_map_2]](%[[C2]], %[[localM]], %[[I]])
 //       TILE-234:        %[[localK:.*]] = dim %{{.*}}, 1
@@ -129,7 +129,7 @@
 //       TILE-2-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-2-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-2: %[[M:.*]] = dim %{{.*}}, 0 : memref<10x16xf32, #[[strided2D]]>
-//       TILE-2: loop.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
+//       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
 //       TILE-2:   %[[K:.*]] = dim %{{.*}}, 1 : memref<10x16xf32, #[[strided2D]]>
 //       TILE-2:   %[[sAi:.*]] = subview %{{.*}}[%[[I]], %[[C0]]] [%[[C2]], %[[K]]] [%[[C1]], %[[C1]]] : memref<10x16xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
 //       TILE-2:   %[[N:.*]] = dim %{{.*}}, 1 : memref<10x12xf32, #[[strided2D]]>
@@ -141,7 +141,7 @@
 //       TILE-02-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-02-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-02: %[[N:.*]] = dim %arg1, 1 : memref<16x12xf32, #[[strided2D]]>
-//       TILE-02: loop.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
+//       TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
 //       TILE-02:   %[[K:.*]] = dim %{{.*}}, 0 : memref<16x12xf32, #[[strided2D]]>
 //   TILE-02-NOT:   affine.min
 //       TILE-02:   %[[sBj:.*]] = subview %{{.*}}[%[[C0]], %[[J]]] [%[[K]], %[[C2]]] [%[[C1]], %[[C1]]] : memref<16x12xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
@@ -155,7 +155,7 @@
 //       TILE-002-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-002-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-002: %[[ubK:.*]] = dim %{{.*}}, 1 : memref<10x16xf32, #[[strided2D]]>
-//       TILE-002: loop.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
+//       TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
 //       TILE-002:   %[[M:.*]] = dim %{{.*}}, 0 : memref<10x16xf32, #[[strided2D]]>
 //   TILE-002-NOT:   affine.min
 //       TILE-002:   %[[sAj:.*]] = subview %{{.*}}[%[[C0]], %[[K]]] [%[[M]], %[[C2]]] [%[[C1]], %[[C1]]] : memref<10x16xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
@@ -173,9 +173,9 @@
 //       TILE-234: %[[ubM:.*]] = dim %{{.*}}, 0 : memref<10x16xf32, #[[strided2D]]>
 //       TILE-234: %[[ubK:.*]] = dim %{{.*}}, 1 : memref<10x16xf32, #[[strided2D]]>
 //       TILE-234: %[[ubN:.*]] = dim %{{.*}}, 1 : memref<16x12xf32, #[[strided2D]]>
-//       TILE-234:  loop.for %[[I:.*]] = %{{.*}}{{.*}} to %[[ubM]] step %{{.*}} {
-//       TILE-234:    loop.for %[[J:.*]] = %{{.*}}{{.*}} to %[[ubN]] step %{{.*}} {
-//       TILE-234:      loop.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
+//       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[ubM]] step %{{.*}} {
+//       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[ubN]] step %{{.*}} {
+//       TILE-234:      scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
 //   TILE-234-NOT:   affine.min
 //       TILE-234:        %[[sAik:.*]] = subview %{{.*}}[%[[I]], %[[K]]] [%[[C2]], %[[C4]]] [%[[C1]], %[[C1]]] : memref<10x16xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
 //   TILE-234-NOT:   affine.min
@@ -194,7 +194,7 @@
 //       TILE-2-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-2-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-2: %[[M:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-2: loop.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
+//       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
 //       TILE-2:   %[[localM:.*]] = dim %{{.*}}, 0
 //       TILE-2:   %[[szM:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localM]], %[[I]])
 //       TILE-2:   %[[N:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
@@ -209,7 +209,7 @@
 //       TILE-02-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-02-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-02: %[[K:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-02: loop.for %[[J]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
+//       TILE-02: scf.for %[[J]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
 //       TILE-02:   %[[M:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-02:   %[[localN:.*]] = dim %{{.*}}, 1
 //       TILE-02:   %[[szN:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localN]], %[[J]])
@@ -220,7 +220,7 @@
 //       TILE-02:   linalg.matvec(%[[sAj]], %[[sBj]], %{{.*}}) : memref<?x?xf32, #[[strided2D_dynamic]]>, memref<?xf32, #[[strided1D_dynamic]]>, memref<?xf32, #[[strided1D]]>
 
 // TILE-002-LABEL: func @matvec(
-//   TILE-002-NOT: loop.for
+//   TILE-002-NOT: scf.for
 
 // TILE-234-LABEL: func @matvec(
 //       TILE-234-DAG: %[[C0:.*]] = constant 0 : index
@@ -229,8 +229,8 @@
 //       TILE-234-DAG: %[[C3:.*]] = constant 3 : index
 //       TILE-234: %[[M:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-234: %[[K:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-234:  loop.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-234:    loop.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
+//       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
+//       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
 //       TILE-234:      %[[localM:.*]] = dim %{{.*}}, 0
 //       TILE-234:      %[[szM:.*]] = affine.min #[[bound_map_2]](%[[C2]], %[[localM]], %[[I]])
 //       TILE-234:      %[[localN:.*]] = dim %{{.*}}, 1
@@ -254,7 +254,7 @@
 //       TILE-2-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-2-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-2: %[[M:.*]] = dim %{{.*}}, 0 : memref<?xf32, #[[strided1D]]>
-//       TILE-2: loop.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
+//       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
 //       TILE-2:   %[[localM:.*]] = dim %{{.*}}, 0
 //       TILE-2:   %[[szM:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localM]], %[[I]])
 //       TILE-2:   %[[sAi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [%[[C1]]] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D_dynamic]]>
@@ -264,17 +264,17 @@
 //       TILE-2:   linalg.dot(%[[sAi]], %[[sBi]], {{.*}}) : memref<?xf32, #[[strided1D_dynamic]]>, memref<?xf32, #[[strided1D_dynamic]]>, memref<f32>
 
 // TILE-02-LABEL: func @dot(
-//   TILE-02-NOT: loop.for
+//   TILE-02-NOT: scf.for
 
 // TILE-002-LABEL: func @dot(
-//   TILE-002-NOT: loop.for
+//   TILE-002-NOT: scf.for
 
 // TILE-234-LABEL: func @dot(
 //       TILE-234-DAG: %[[C0:.*]] = constant 0 : index
 //       TILE-234-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-234-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-234:  %[[ubK:.*]] = dim %{{.*}}, 0 : memref<?xf32, #[[strided1D]]>
-//       TILE-234:  loop.for %[[I:.*]] = %{{.*}} to %[[ubK]] step %{{.*}} {
+//       TILE-234:  scf.for %[[I:.*]] = %{{.*}} to %[[ubK]] step %{{.*}} {
 //       TILE-234:    %[[localM:.*]] = dim %{{.*}}, 0
 //       TILE-234:    %[[szM:.*]] = affine.min #[[bound_map_2]](%[[C2]], %[[localM]], %[[I]])
 //       TILE-234:    %[[sAi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [%[[C1]]] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D_dynamic]]>
diff --git a/mlir/test/Dialect/Linalg/tile_conv.mlir b/mlir/test/Dialect/Linalg/tile_conv.mlir
--- a/mlir/test/Dialect/Linalg/tile_conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile_conv.mlir
@@ -21,9 +21,9 @@
 //       TILE-23004:   %[[B:.*]] = dim %{{.*}}, 0 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       TILE-23004:   %[[PaddedInput0:.*]] = dim %{{.*}}, 1 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       TILE-23004:   %[[X0:.*]] = dim %{{.*}}, 1 : memref<?x?x?x?xf32, #[[strided4D]]>
-//       TILE-23004:   loop.for %[[ivI:.*]] = %{{.*}} to %[[B]] step %{{.*}} {
-//       TILE-23004:     loop.for %[[ivJ:.*]] = %{{.*}} to %[[X0]] step %{{.*}} {
-//       TILE-23004:       loop.for %[[ivK:.*]] = %{{.*}} to %[[Q]] step %{{.*}} {
+//       TILE-23004:   scf.for %[[ivI:.*]] = %{{.*}} to %[[B]] step %{{.*}} {
+//       TILE-23004:     scf.for %[[ivJ:.*]] = %{{.*}} to %[[X0]] step %{{.*}} {
+//       TILE-23004:       scf.for %[[ivK:.*]] = %{{.*}} to %[[Q]] step %{{.*}} {
 //       TILE-23004:         %[[Z0:.*]] = dim %{{.*}}, 0 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       TILE-23004:         %[[Z1:.*]] = dim %{{.*}}, 1 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       TILE-23004:         %[[Z2:.*]] = dim %{{.*}}, 2 : memref<?x?x?x?xf32, #[[strided4D]]>
diff --git a/mlir/test/Dialect/Linalg/tile_conv_padding.mlir b/mlir/test/Dialect/Linalg/tile_conv_padding.mlir
--- a/mlir/test/Dialect/Linalg/tile_conv_padding.mlir
+++ b/mlir/test/Dialect/Linalg/tile_conv_padding.mlir
@@ -24,7 +24,7 @@
 //   TILE-20000-DAG:   %[[C1:.*]] = constant 1 : index
 //   TILE-20000-DAG:   %[[C2:.*]] = constant 2 : index
 //       TILE-20000:   %[[B:.*]] = dim %[[ARG1]], 0
-//       TILE-20000:   loop.for %[[ivI:.*]] = %[[C0]] to %[[B]] step %[[C2]] {
+//       TILE-20000:   scf.for %[[ivI:.*]] = %[[C0]] to %[[B]] step %[[C2]] {
 //       TILE-20000:     %[[DIM10:.*]] = dim %[[ARG1]], 0
 //       TILE-20000:     %[[EXTENT:.*]] = affine.min #[[minmap]](%[[C2]], %[[DIM10]], %[[ivI]])
 //       TILE-20000:     %[[DIM11:.*]] = dim %[[ARG1]], 1
diff --git a/mlir/test/Dialect/Linalg/tile_indexed_generic.mlir b/mlir/test/Dialect/Linalg/tile_indexed_generic.mlir
--- a/mlir/test/Dialect/Linalg/tile_indexed_generic.mlir
+++ b/mlir/test/Dialect/Linalg/tile_indexed_generic.mlir
@@ -21,7 +21,7 @@
 }
 // TILE-10n25-LABEL: func @indexed_generic_vector
 // TILE-10n25: %[[C10:.*]] = constant 10 : index
-// TILE-10n25: loop.for %[[J:.*]] = {{.*}} step %[[C10]]
+// TILE-10n25: scf.for %[[J:.*]] = {{.*}} step %[[C10]]
 // TILE-10n25:   linalg.indexed_generic
 // TILE-10n25:   ^bb0(%[[I:.*]]: index, %[[IN:.*]]: f32, %[[OUT:.*]]: f32)
 // TILE-10n25:     %[[NEW_I:.*]] = addi %[[I]], %[[J]] : index
@@ -31,7 +31,7 @@
 
 // TILE-25n0-LABEL: func @indexed_generic_vector
 // TILE-25n0: %[[C25:.*]] = constant 25 : index
-// TILE-25n0: loop.for %[[J:.*]] = {{.*}} step %[[C25]]
+// TILE-25n0: scf.for %[[J:.*]] = {{.*}} step %[[C25]]
 // TILE-25n0:   linalg.indexed_generic
 // TILE-25n0:   ^bb0(%[[I:.*]]: index, %[[IN:.*]]: f32, %[[OUT:.*]]: f32)
 // TILE-25n0:     %[[NEW_I:.*]] = addi %[[I]], %[[J]] : index
@@ -40,7 +40,7 @@
 // TILE-25n0:     %[[OUT:.*]] = addf %[[IN]], %[[NEW_I_FLOAT]] : f32
 
 // TILE-0n25-LABEL: func @indexed_generic_vector
-// TILE-0n25-NOT: loop.for %[[J:.*]] = {{.*}} step %[[C25]]
+// TILE-0n25-NOT: scf.for %[[J:.*]] = {{.*}} step %[[C25]]
 // TILE-0n25: linalg.indexed_generic
 
 #combined_indices_trait = {
@@ -67,8 +67,8 @@
 // TILE-10n25-LABEL: func @indexed_generic_matrix
 // TILE-10n25: %[[C25:.*]] = constant 25 : index
 // TILE-10n25: %[[C10:.*]] = constant 10 : index
-// TILE-10n25: loop.for %[[K:.*]] = {{.*}} step %[[C10]]
-// TILE-10n25:   loop.for %[[L:.*]] = {{.*}} step %[[C25]]
+// TILE-10n25: scf.for %[[K:.*]] = {{.*}} step %[[C10]]
+// TILE-10n25:   scf.for %[[L:.*]] = {{.*}} step %[[C25]]
 // TILE-10n25:     linalg.indexed_generic
 // TILE-10n25:     ^bb0(%[[I:.*]]: index, %[[J:.*]]: index, %[[IN:.*]]: f32, %[[OUT:.*]]: f32):
 // TILE-10n25:       %[[NEW_I:.*]] = addi %[[I]], %[[K]] : index
@@ -81,7 +81,7 @@
 
 // TILE-25n0-LABEL: func @indexed_generic_matrix
 // TILE-25n0: %[[C25:.*]] = constant 25 : index
-// TILE-25n0: loop.for %[[L:.*]] = {{.*}} step %[[C25]]
+// TILE-25n0: scf.for %[[L:.*]] = {{.*}} step %[[C25]]
 // TILE-25n0:   linalg.indexed_generic
 // TILE-25n0:   ^bb0(%[[I:.*]]: index, %[[J:.*]]: index, %[[IN:.*]]: f32, %[[OUT:.*]]: f32):
 // TILE-25n0:     %[[NEW_I:.*]] = addi %[[I]], %[[L]] : index
@@ -93,7 +93,7 @@
 
 // TILE-0n25-LABEL: func @indexed_generic_matrix
 // TILE-0n25: %[[C25:.*]] = constant 25 : index
-// TILE-0n25: loop.for %[[L:.*]] = {{.*}} step %[[C25]]
+// TILE-0n25: scf.for %[[L:.*]] = {{.*}} step %[[C25]]
 // TILE-0n25:   linalg.indexed_generic
 // TILE-0n25:   ^bb0(%[[I:.*]]: index, %[[J:.*]]: index, %[[IN:.*]]: f32, %[[OUT:.*]]: f32):
 // TILE-0n25:     %[[NEW_J:.*]] = addi %[[J]], %[[L]] : index
diff --git a/mlir/test/Dialect/Linalg/tile_parallel.mlir b/mlir/test/Dialect/Linalg/tile_parallel.mlir
--- a/mlir/test/Dialect/Linalg/tile_parallel.mlir
+++ b/mlir/test/Dialect/Linalg/tile_parallel.mlir
@@ -29,8 +29,8 @@
 // TILE-2-DAG: [[C1:%.*]] = constant 1 : index
 // TILE-2-DAG: [[C2:%.*]] = constant 2 : index
 // TILE-2: [[LHS_ROWS:%.*]] = dim [[LHS]], 0
-// TILE-2: loop.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_ROWS]]) step ([[C2]]) {
-// TILE-2-NO: loop.parallel
+// TILE-2: scf.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_ROWS]]) step ([[C2]]) {
+// TILE-2-NO: scf.parallel
 // TILE-2:   [[LHS_SUBVIEW:%.*]] = subview [[LHS]]
 // TILE-2:   [[RHS_SUBVIEW:%.*]] = subview [[RHS]]
 // TILE-2:   [[SUM_SUBVIEW:%.*]] = subview [[SUM]]
@@ -42,8 +42,8 @@
 // TILE-02-DAG: [[C1:%.*]] = constant 1 : index
 // TILE-02-DAG: [[C2:%.*]] = constant 2 : index
 // TILE-02: [[LHS_COLS:%.*]] = dim [[LHS]], 1
-// TILE-02: loop.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_COLS]]) step ([[C2]]) {
-// TILE-02-NO: loop.parallel
+// TILE-02: scf.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_COLS]]) step ([[C2]]) {
+// TILE-02-NO: scf.parallel
 // TILE-02:   [[LHS_SUBVIEW:%.*]] = subview [[LHS]]
 // TILE-02:   [[RHS_SUBVIEW:%.*]] = subview [[RHS]]
 // TILE-02:   [[SUM_SUBVIEW:%.*]] = subview [[SUM]]
@@ -51,7 +51,7 @@
 
 // TILE-002-LABEL: func @sum(
 // TILE-002-SAME:    [[LHS:%.*]]: {{.*}}, [[RHS:%.*]]: {{.*}}, [[SUM:%.*]]: {{.*}}) {
-// TILE-002-NO: loop.parallel
+// TILE-002-NO: scf.parallel
 // TILE-002:   linalg.generic {{.*}} [[LHS]], [[RHS]], [[SUM]] {
 
 // TILE-234-LABEL: func @sum(
@@ -62,8 +62,8 @@
 // TILE-234-DAG: [[C3:%.*]] = constant 3 : index
 // TILE-234: [[LHS_ROWS:%.*]] = dim [[LHS]], 0
 // TILE-234: [[LHS_COLS:%.*]] = dim [[LHS]], 1
-// TILE-234: loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]]) to ([[LHS_ROWS]], [[LHS_COLS]]) step ([[C2]], [[C3]]) {
-// TILE-234-NO: loop.parallel
+// TILE-234: scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]]) to ([[LHS_ROWS]], [[LHS_COLS]]) step ([[C2]], [[C3]]) {
+// TILE-234-NO: scf.parallel
 // TILE-234:   [[LHS_SUBVIEW:%.*]] = subview [[LHS]]
 // TILE-234:   [[RHS_SUBVIEW:%.*]] = subview [[RHS]]
 // TILE-234:   [[SUM_SUBVIEW:%.*]] = subview [[SUM]]
diff --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir
--- a/mlir/test/Dialect/Linalg/transform-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir
@@ -23,8 +23,8 @@
 // CHECK-DAG:     %[[c0:.*]] = constant 0 : index
 // CHECK-DAG:     %[[c1:.*]] = constant 1 : index
 // CHECK-DAG:     %[[c8000:.*]] = constant 8000 : index
-// CHECK:         loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c8000]] {
-// CHECK:             loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c1]] {
+// CHECK:         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c8000]] {
+// CHECK:             scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c1]] {
 // CHECK:               load
 // CHECK:               load
 // CHECK:               mulf
@@ -44,7 +44,7 @@
 // CHECK-DAG:     %[[c0:.*]] = constant 0 : index
 // CHECK-DAG:     %[[c5:.*]] = constant 5 : index
 // CHECK-DAG:     %[[c6:.*]] = constant 6 : index
-// CHECK:         loop.parallel {{.*}} step (%[[c5]], %[[c6]])
+// CHECK:         scf.parallel {{.*}} step (%[[c5]], %[[c6]])
 // CHECK:             linalg.matvec({{.*}}, {{.*}}, {{.*}}) : memref<?x?xf32, #[[STRIDED_2D]]>, memref<?xf32, #[[STRIDED_1D]]>, memref<?xf32, #[[STRIDED_1D]]>
 
 func @matmul(%A: memref<?x?xf32, offset: ?, strides: [?, 1]>,
@@ -69,18 +69,18 @@
 // CHECK-DAG:     %[[c2000:.*]] = constant 2000 : index
 // CHECK-DAG:     %[[c3000:.*]] = constant 3000 : index
 // CHECK-DAG:     %[[c4000:.*]] = constant 4000 : index
-// CHECK:         loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
-// CHECK:           loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
-// CHECK:             loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
-// CHECK:               loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c200]] {
-// CHECK:                 loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c300]] {
-// CHECK:                   loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c400]] {
-// CHECK:                     loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c20]] {
-// CHECK:                       loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c30]] {
-// CHECK:                         loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c40]] {
-// CHECK:                           loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c2]] {
-// CHECK:                             loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c3]] {
-// CHECK:                               loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c4]] {
+// CHECK:         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
+// CHECK:           scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
+// CHECK:             scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
+// CHECK:               scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c200]] {
+// CHECK:                 scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c300]] {
+// CHECK:                   scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c400]] {
+// CHECK:                     scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c20]] {
+// CHECK:                       scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c30]] {
+// CHECK:                         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c40]] {
+// CHECK:                           scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c2]] {
+// CHECK:                             scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c3]] {
+// CHECK:                               scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c4]] {
 // CHECK:                                 linalg.matmul({{.*}}, {{.*}}, {{.*}}) : memref<?x?xf32, #[[STRIDED_2D]]>, memref<?x?xf32, #[[STRIDED_2D]]>, memref<?x?xf32, #[[STRIDED_2D]]>
 
 #matmul_trait = {
@@ -208,8 +208,8 @@
 // CHECK-DAG:     %[[c0:.*]] = constant 0 : index
 // CHECK-DAG:     %[[c5:.*]] = constant 5 : index
 // CHECK-DAG:     %[[c6:.*]] = constant 6 : index
-// CHECK:         loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c6]]
-// CHECK:           loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c5]]
+// CHECK:         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c6]]
+// CHECK:           scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c5]]
 // CHECK:             linalg.matvec({{.*}}, {{.*}}, {{.*}}) : memref<?x?xf32, #[[STRIDED_2D]]>, memref<?xf32, #[[STRIDED_1D]]>, memref<?xf32, #[[STRIDED_1D]]>
 
 func @matmul_perm(%A: memref<?x?xf32, offset: ?, strides: [?, 1]>,
@@ -232,15 +232,15 @@
 // CHECK-DAG:     %[[c2000:.*]] = constant 2000 : index
 // CHECK-DAG:     %[[c3000:.*]] = constant 3000 : index
 // CHECK-DAG:     %[[c4000:.*]] = constant 4000 : index
-// CHECK:         loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
-// CHECK:           loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
-// CHECK:             loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
-// CHECK:               loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c300]] {
-// CHECK:                 loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c200]] {
-// CHECK:                   loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c400]] {
-// CHECK:                     loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c20]] {
-// CHECK:                       loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c30]] {
-// CHECK:                         loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c40]] {
+// CHECK:         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
+// CHECK:           scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
+// CHECK:             scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
+// CHECK:               scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c300]] {
+// CHECK:                 scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c200]] {
+// CHECK:                   scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c400]] {
+// CHECK:                     scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c20]] {
+// CHECK:                       scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c30]] {
+// CHECK:                         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c40]] {
 // CHECK:                                 linalg.matmul({{.*}}, {{.*}}, {{.*}}) : memref<?x?xf32, #[[STRIDED_2D]]>, memref<?x?xf32, #[[STRIDED_2D]]>, memref<?x?xf32, #[[STRIDED_2D]]>
 
 func @promote_subview_matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>,
@@ -254,9 +254,9 @@
   %0 = dim %arg0, 0 : memref<?x?xf32, offset: ?, strides: [?, 1]>
   %1 = dim %arg0, 1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
   %2 = dim %arg1, 1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
-  loop.for %arg3 = %c0 to %0 step %c2000 {
-    loop.for %arg4 = %c0 to %2 step %c3000 {
-      loop.for %arg5 = %c0 to %1 step %c4000 {
+  scf.for %arg3 = %c0 to %0 step %c2000 {
+    scf.for %arg4 = %c0 to %2 step %c3000 {
+      scf.for %arg5 = %c0 to %1 step %c4000 {
         %3 = subview %arg0[%arg3, %arg5][%c2000, %c4000][%c1, %c1] :
              memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         %4 = subview %arg1[%arg5, %arg4][%c4000, %c3000][%c1, %c1] :
@@ -273,9 +273,9 @@
   return
 }
 // CHECK-LABEL: func @promote_subview_matmul
-// CHECK:         loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
-// CHECK:           loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
-// CHECK:             loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
+// CHECK:         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
+// CHECK:           scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
+// CHECK:             scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
 // CHECK:               %[[s0:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
 // CHECK:               %[[s1:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
 // CHECK:               %[[s2:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
@@ -304,9 +304,9 @@
   %0 = dim %arg0, 0 : memref<?x?xf32, offset: ?, strides: [?, 1]>
   %1 = dim %arg0, 1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
   %2 = dim %arg1, 1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
-  loop.for %arg3 = %c0 to %0 step %c2000 {
-    loop.for %arg4 = %c0 to %2 step %c3000 {
-      loop.for %arg5 = %c0 to %1 step %c4000 {
+  scf.for %arg3 = %c0 to %0 step %c2000 {
+    scf.for %arg4 = %c0 to %2 step %c3000 {
+      scf.for %arg5 = %c0 to %1 step %c4000 {
         %3 = std.subview %arg0[%arg3, %arg5][%c2000, %c4000][%c1, %c1] :
              memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         %4 = std.subview %arg1[%arg5, %arg4][%c4000, %c3000][%c1, %c1] :
@@ -323,9 +323,9 @@
   return
 }
 // CHECK-LABEL: func @promote_first_subview_matmul
-// CHECK:   loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
-// CHECK:     loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
-// CHECK:       loop.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
+// CHECK:   scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
+// CHECK:     scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
+// CHECK:       scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
 // CHECK:         %[[s0:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
 // CHECK:         %[[s1:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
 // CHECK:         %[[s2:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
diff --git a/mlir/test/Dialect/SCF/invalid.mlir b/mlir/test/Dialect/SCF/invalid.mlir
--- a/mlir/test/Dialect/SCF/invalid.mlir
+++ b/mlir/test/Dialect/SCF/invalid.mlir
@@ -2,7 +2,7 @@
 
 func @loop_for_lb(%arg0: f32, %arg1: index) {
   // expected-error@+1 {{operand #0 must be index}}
-  "loop.for"(%arg0, %arg1, %arg1) ({}) : (f32, index, index) -> ()
+  "scf.for"(%arg0, %arg1, %arg1) ({}) : (f32, index, index) -> ()
   return
 }
 
@@ -10,7 +10,7 @@
 
 func @loop_for_ub(%arg0: f32, %arg1: index) {
   // expected-error@+1 {{operand #1 must be index}}
-  "loop.for"(%arg1, %arg0, %arg1) ({}) : (index, f32, index) -> ()
+  "scf.for"(%arg1, %arg0, %arg1) ({}) : (index, f32, index) -> ()
   return
 }
 
@@ -18,7 +18,7 @@
 
 func @loop_for_step(%arg0: f32, %arg1: index) {
   // expected-error@+1 {{operand #2 must be index}}
-  "loop.for"(%arg1, %arg1, %arg0) ({}) : (index, index, f32) -> ()
+  "scf.for"(%arg1, %arg1, %arg0) ({}) : (index, index, f32) -> ()
   return
 }
 
@@ -27,9 +27,9 @@
 func @loop_for_step_positive(%arg0: index) {
   // expected-error@+2 {{constant step operand must be positive}}
   %c0 = constant 0 : index
-  "loop.for"(%arg0, %arg0, %c0) ({
+  "scf.for"(%arg0, %arg0, %c0) ({
     ^bb0(%arg1: index):
-      loop.yield
+      scf.yield
   }) : (index, index, index) -> ()
   return
 }
@@ -38,9 +38,9 @@
 
 func @loop_for_one_region(%arg0: index) {
   // expected-error@+1 {{requires one region}}
-  "loop.for"(%arg0, %arg0, %arg0) (
-    {loop.yield},
-    {loop.yield}
+  "scf.for"(%arg0, %arg0, %arg0) (
+    {scf.yield},
+    {scf.yield}
   ) : (index, index, index) -> ()
   return
 }
@@ -49,12 +49,12 @@
 
 func @loop_for_single_block(%arg0: index) {
   // expected-error@+1 {{expects region #0 to have 0 or 1 blocks}}
-  "loop.for"(%arg0, %arg0, %arg0) (
+  "scf.for"(%arg0, %arg0, %arg0) (
     {
     ^bb1:
-      loop.yield
+      scf.yield
     ^bb2:
-      loop.yield
+      scf.yield
     }
   ) : (index, index, index) -> ()
   return
@@ -64,10 +64,10 @@
 
 func @loop_for_single_index_argument(%arg0: index) {
   // expected-error@+1 {{op expected body first argument to be an index argument for the induction variable}}
-  "loop.for"(%arg0, %arg0, %arg0) (
+  "scf.for"(%arg0, %arg0, %arg0) (
     {
     ^bb0(%i0 : f32):
-      loop.yield
+      scf.yield
     }
   ) : (index, index, index) -> ()
   return
@@ -77,7 +77,7 @@
 
 func @loop_if_not_i1(%arg0: index) {
   // expected-error@+1 {{operand #0 must be 1-bit signless integer}}
-  "loop.if"(%arg0) ({}, {}) : (index) -> ()
+  "scf.if"(%arg0) ({}, {}) : (index) -> ()
   return
 }
 
@@ -85,7 +85,7 @@
 
 func @loop_if_more_than_2_regions(%arg0: i1) {
   // expected-error@+1 {{expected 2 regions}}
-  "loop.if"(%arg0) ({}, {}, {}): (i1) -> ()
+  "scf.if"(%arg0) ({}, {}, {}): (i1) -> ()
   return
 }
 
@@ -93,11 +93,11 @@
 
 func @loop_if_not_one_block_per_region(%arg0: i1) {
   // expected-error@+1 {{expects region #0 to have 0 or 1 blocks}}
-  "loop.if"(%arg0) ({
+  "scf.if"(%arg0) ({
     ^bb0:
-      loop.yield
+      scf.yield
     ^bb1:
-      loop.yield
+      scf.yield
   }, {}): (i1) -> ()
   return
 }
@@ -106,9 +106,9 @@
 
 func @loop_if_illegal_block_argument(%arg0: i1) {
   // expected-error@+1 {{requires that child entry blocks have no arguments}}
-  "loop.if"(%arg0) ({
+  "scf.if"(%arg0) ({
     ^bb0(%0 : index):
-      loop.yield
+      scf.yield
   }, {}): (i1) -> ()
   return
 }
@@ -117,8 +117,8 @@
 
 func @parallel_arguments_different_tuple_size(
     %arg0: index, %arg1: index, %arg2: index) {
-  // expected-error@+1 {{custom op 'loop.parallel' expected 1 operands}}
-  loop.parallel (%i0) = (%arg0) to (%arg1, %arg2) step () {
+  // expected-error@+1 {{custom op 'scf.parallel' expected 1 operands}}
+  scf.parallel (%i0) = (%arg0) to (%arg1, %arg2) step () {
   }
   return
 }
@@ -127,10 +127,10 @@
 
 func @parallel_body_arguments_wrong_type(
     %arg0: index, %arg1: index, %arg2: index) {
-  // expected-error@+1 {{'loop.parallel' op expects arguments for the induction variable to be of index type}}
-  "loop.parallel"(%arg0, %arg1, %arg2) ({
+  // expected-error@+1 {{'scf.parallel' op expects arguments for the induction variable to be of index type}}
+  "scf.parallel"(%arg0, %arg1, %arg2) ({
     ^bb0(%i0: f32):
-      loop.yield
+      scf.yield
   }) {operand_segment_sizes = dense<[1, 1, 1, 0]>: vector<4xi32>}: (index, index, index) -> ()
   return
 }
@@ -139,10 +139,10 @@
 
 func @parallel_body_wrong_number_of_arguments(
     %arg0: index, %arg1: index, %arg2: index) {
-  // expected-error@+1 {{'loop.parallel' op expects the same number of induction variables: 2 as bound and step values: 1}}
-  "loop.parallel"(%arg0, %arg1, %arg2) ({
+  // expected-error@+1 {{'scf.parallel' op expects the same number of induction variables: 2 as bound and step values: 1}}
+  "scf.parallel"(%arg0, %arg1, %arg2) ({
     ^bb0(%i0: index, %i1: index):
-      loop.yield
+      scf.yield
   }) {operand_segment_sizes = dense<[1, 1, 1, 0]>: vector<4xi32>}: (index, index, index) -> ()
   return
 }
@@ -150,8 +150,8 @@
 // -----
 
 func @parallel_no_tuple_elements() {
-  // expected-error@+1 {{'loop.parallel' op needs at least one tuple element for lowerBound, upperBound and step}}
-  loop.parallel () = () to () step () {
+  // expected-error@+1 {{'scf.parallel' op needs at least one tuple element for lowerBound, upperBound and step}}
+  scf.parallel () = () to () step () {
   }
   return
 }
@@ -163,7 +163,7 @@
   // expected-error@+3 {{constant step operand must be positive}}
   %c0 = constant 1 : index
   %c1 = constant 0 : index
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%c0, %c1) {
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%c0, %c1) {
   }
   return
 }
@@ -173,11 +173,11 @@
 func @parallel_fewer_results_than_reduces(
     %arg0 : index, %arg1: index, %arg2: index) {
   // expected-error@+1 {{expects number of results: 0 to be the same as number of reductions: 1}}
-  loop.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) {
+  scf.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) {
     %c0 = constant 1.0 : f32
-    loop.reduce(%c0) : f32 {
+    scf.reduce(%c0) : f32 {
       ^bb0(%lhs: f32, %rhs: f32):
-        loop.reduce.return %lhs : f32
+        scf.reduce.return %lhs : f32
     }
   }
   return
@@ -189,7 +189,7 @@
     %arg0 : index, %arg1 : index, %arg2 : index) {
   // expected-error@+2 {{expects number of results: 1 to be the same as number of reductions: 0}}
   %zero = constant 1.0 : f32
-  %res = loop.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) init (%zero) -> f32 {
+  %res = scf.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) init (%zero) -> f32 {
   }
 
   return
@@ -200,10 +200,10 @@
 func @parallel_more_results_than_initial_values(
     %arg0 : index, %arg1: index, %arg2: index) {
   // expected-error@+1 {{expects number of results: 1 to be the same as number of initial values: 0}}
-  %res = loop.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) -> f32 {
-    loop.reduce(%arg0) : index {
+  %res = scf.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) -> f32 {
+    scf.reduce(%arg0) : index {
       ^bb0(%lhs: index, %rhs: index):
-        loop.reduce.return %lhs : index
+        scf.reduce.return %lhs : index
     }
   }
 }
@@ -213,12 +213,12 @@
 func @parallel_different_types_of_results_and_reduces(
     %arg0 : index, %arg1: index, %arg2: index) {
   %zero = constant 0.0 : f32
-  %res = loop.parallel (%i0) = (%arg0) to (%arg1)
+  %res = scf.parallel (%i0) = (%arg0) to (%arg1)
                                        step (%arg2) init (%zero) -> f32 {
     // expected-error@+1 {{expects type of reduce: 'index' to be the same as result type: 'f32'}}
-    loop.reduce(%arg0) : index {
+    scf.reduce(%arg0) : index {
       ^bb0(%lhs: index, %rhs: index):
-        loop.reduce.return %lhs : index
+        scf.reduce.return %lhs : index
     }
   }
   return
@@ -227,10 +227,10 @@
 // -----
 
 func @top_level_reduce(%arg0 : f32) {
-  // expected-error@+1 {{expects parent op 'loop.parallel'}}
-  loop.reduce(%arg0) : f32 {
+  // expected-error@+1 {{expects parent op 'scf.parallel'}}
+  scf.reduce(%arg0) : f32 {
     ^bb0(%lhs : f32, %rhs : f32):
-      loop.reduce.return %lhs : f32
+      scf.reduce.return %lhs : f32
   }
   return
 }
@@ -239,10 +239,10 @@
 
 func @reduce_empty_block(%arg0 : index, %arg1 : f32) {
   %zero = constant 0.0 : f32
-  %res = loop.parallel (%i0) = (%arg0) to (%arg0)
+  %res = scf.parallel (%i0) = (%arg0) to (%arg0)
                                        step (%arg0) init (%zero) -> f32 {
     // expected-error@+1 {{the block inside reduce should not be empty}}
-    loop.reduce(%arg1) : f32 {
+    scf.reduce(%arg1) : f32 {
       ^bb0(%lhs : f32, %rhs : f32):
     }
   }
@@ -253,12 +253,12 @@
 
 func @reduce_too_many_args(%arg0 : index, %arg1 : f32) {
   %zero = constant 0.0 : f32
-  %res = loop.parallel (%i0) = (%arg0) to (%arg0)
+  %res = scf.parallel (%i0) = (%arg0) to (%arg0)
                                        step (%arg0) init (%zero) -> f32 {
     // expected-error@+1 {{expects two arguments to reduce block of type 'f32'}}
-    loop.reduce(%arg1) : f32 {
+    scf.reduce(%arg1) : f32 {
       ^bb0(%lhs : f32, %rhs : f32, %other : f32):
-        loop.reduce.return %lhs : f32
+        scf.reduce.return %lhs : f32
     }
   }
   return
@@ -268,12 +268,12 @@
 
 func @reduce_wrong_args(%arg0 : index, %arg1 : f32) {
   %zero = constant 0.0 : f32
-  %res = loop.parallel (%i0) = (%arg0) to (%arg0)
+  %res = scf.parallel (%i0) = (%arg0) to (%arg0)
                                        step (%arg0) init (%zero) -> f32 {
     // expected-error@+1 {{expects two arguments to reduce block of type 'f32'}}
-    loop.reduce(%arg1) : f32 {
+    scf.reduce(%arg1) : f32 {
       ^bb0(%lhs : f32, %rhs : i32):
-        loop.reduce.return %lhs : f32
+        scf.reduce.return %lhs : f32
     }
   }
   return
@@ -284,12 +284,12 @@
 
 func @reduce_wrong_terminator(%arg0 : index, %arg1 : f32) {
   %zero = constant 0.0 : f32
-  %res = loop.parallel (%i0) = (%arg0) to (%arg0)
+  %res = scf.parallel (%i0) = (%arg0) to (%arg0)
                                        step (%arg0) init (%zero) -> f32 {
-    // expected-error@+1 {{the block inside reduce should be terminated with a 'loop.reduce.return' op}}
-    loop.reduce(%arg1) : f32 {
+    // expected-error@+1 {{the block inside reduce should be terminated with a 'scf.reduce.return' op}}
+    scf.reduce(%arg1) : f32 {
       ^bb0(%lhs : f32, %rhs : f32):
-        loop.yield
+        scf.yield
     }
   }
   return
@@ -299,13 +299,13 @@
 
 func @reduceReturn_wrong_type(%arg0 : index, %arg1: f32) {
   %zero = constant 0.0 : f32
-  %res = loop.parallel (%i0) = (%arg0) to (%arg0)
+  %res = scf.parallel (%i0) = (%arg0) to (%arg0)
                                        step (%arg0) init (%zero) -> f32 {
-    loop.reduce(%arg1) : f32 {
+    scf.reduce(%arg1) : f32 {
       ^bb0(%lhs : f32, %rhs : f32):
         %c0 = constant 1 : index
         // expected-error@+1 {{needs to have type 'f32' (the type of the enclosing ReduceOp)}}
-        loop.reduce.return %c0 : index
+        scf.reduce.return %c0 : index
     }
   }
   return
@@ -315,8 +315,8 @@
 
 func @reduceReturn_not_inside_reduce(%arg0 : f32) {
   "foo.region"() ({
-    // expected-error@+1 {{expects parent op 'loop.reduce'}}
-    loop.reduce.return %arg0 : f32
+    // expected-error@+1 {{expects parent op 'scf.reduce'}}
+    scf.reduce.return %arg0 : f32
   }): () -> ()
   return
 }
@@ -325,13 +325,13 @@
 
 func @std_if_incorrect_yield(%arg0: i1, %arg1: f32)
 {
-  %x, %y = loop.if %arg0 -> (f32, f32) {
+  %x, %y = scf.if %arg0 -> (f32, f32) {
     %0 = addf %arg1, %arg1 : f32
     // expected-error@+1 {{parent of yield must have same number of results as the yield operands}}
-    loop.yield %0 : f32
+    scf.yield %0 : f32
   } else {
     %0 = subf %arg1, %arg1 : f32
-    loop.yield %0 : f32
+    scf.yield %0 : f32
   }
   return
 }
@@ -341,9 +341,9 @@
 func @std_if_missing_else(%arg0: i1, %arg1: f32)
 {
   // expected-error@+1 {{must have an else block if defining values}}
-  %x = loop.if %arg0 -> (f32) {
+  %x = scf.if %arg0 -> (f32) {
     %0 = addf %arg1, %arg1 : f32
-    loop.yield %0 : f32
+    scf.yield %0 : f32
   }
   return
 }
@@ -354,11 +354,11 @@
   %s0 = constant 0.0 : f32
   %t0 = constant 1 : i32
   // expected-error@+1 {{mismatch in number of loop-carried values and defined values}}
-  %result1:3 = loop.for %i0 = %arg0 to %arg1 step %arg2
+  %result1:3 = scf.for %i0 = %arg0 to %arg1 step %arg2
                     iter_args(%si = %s0, %ti = %t0) -> (f32, i32, f32) {
     %sn = addf %si, %si : f32
     %tn = addi %ti, %ti : i32
-    loop.yield %sn, %tn, %sn : f32, i32, f32
+    scf.yield %sn, %tn, %sn : f32, i32, f32
   }
   return
 }
@@ -370,12 +370,12 @@
   %t0 = constant 1 : i32
   %u0 = constant 1.0 : f32
   // expected-error@+1 {{mismatch in number of loop-carried values and defined values}}
-  %result1:2 = loop.for %i0 = %arg0 to %arg1 step %arg2
+  %result1:2 = scf.for %i0 = %arg0 to %arg1 step %arg2
                     iter_args(%si = %s0, %ti = %t0, %ui = %u0) -> (f32, i32) {
     %sn = addf %si, %si : f32
     %tn = addi %ti, %ti : i32
     %un = subf %ui, %ui : f32
-    loop.yield %sn, %tn, %un : f32, i32, f32
+    scf.yield %sn, %tn, %un : f32, i32, f32
   }
   return
 }
@@ -387,11 +387,11 @@
   %s0 = constant 0.0 : f32
   %t0 = constant 1.0 : f32
   // expected-error@+2 {{expects different type than prior uses: 'i32' vs 'f32'}}
-  %result1:2 = loop.for %i0 = %arg0 to %arg1 step %arg2
+  %result1:2 = scf.for %i0 = %arg0 to %arg1 step %arg2
                     iter_args(%si = %s0, %ti = %t0) -> (i32, i32) {
     %sn = addf %si, %si : i32
     %tn = addf %ti, %ti : i32
-    loop.yield %sn, %tn : i32, i32
+    scf.yield %sn, %tn : i32, i32
   }
   return
 }
@@ -400,10 +400,10 @@
 
 func @parallel_invalid_yield(
     %arg0: index, %arg1: index, %arg2: index) {
-  loop.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) {
+  scf.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) {
     %c0 = constant 1.0 : f32
-    // expected-error@+1 {{yield inside loop.parallel is not allowed to have operands}}
-    loop.yield %c0 : f32
+    // expected-error@+1 {{yield inside scf.parallel is not allowed to have operands}}
+    scf.yield %c0 : f32
   }
   return
 }
diff --git a/mlir/test/Dialect/SCF/loop-unroll.mlir b/mlir/test/Dialect/SCF/loop-unroll.mlir
--- a/mlir/test/Dialect/SCF/loop-unroll.mlir
+++ b/mlir/test/Dialect/SCF/loop-unroll.mlir
@@ -6,7 +6,7 @@
 func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index,
                           %arg3: memref<?xf32>) {
   %0 = constant 7.0 : f32
-  loop.for %i0 = %arg0 to %arg1 step %arg2 {
+  scf.for %i0 = %arg0 to %arg1 step %arg2 {
     store %0, %arg3[%i0] : memref<?xf32>
   }
   return
@@ -32,14 +32,14 @@
 //   UNROLL-BY-2-DAG:  %[[V7:.*]] = addi %[[LB]], %[[V6]] : index
 //       Compute step of unrolled loop in V8.
 //   UNROLL-BY-2-DAG:  %[[V8:.*]] = muli %[[STEP]], %[[C2]] : index
-//       UNROLL-BY-2:  loop.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
+//       UNROLL-BY-2:  scf.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
 //  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-2-NEXT:    %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-BY-2-NEXT:    %[[V9:.*]] = muli %[[STEP]], %[[C1_IV]] : index
 //  UNROLL-BY-2-NEXT:    %[[V10:.*]] = addi %[[IV]], %[[V9]] : index
 //  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[V10]]] : memref<?xf32>
 //  UNROLL-BY-2-NEXT:  }
-//  UNROLL-BY-2-NEXT:  loop.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
+//  UNROLL-BY-2-NEXT:  scf.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
 //  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-2-NEXT:  }
 //  UNROLL-BY-2-NEXT:  return
@@ -65,7 +65,7 @@
 //   UNROLL-BY-3-DAG:  %[[V7:.*]] = addi %[[LB]], %[[V6]] : index
 //       Compute step of unrolled loop in V8.
 //   UNROLL-BY-3-DAG:  %[[V8:.*]] = muli %[[STEP]], %[[C3]] : index
-//       UNROLL-BY-3:  loop.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
+//       UNROLL-BY-3:  scf.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
 //  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-BY-3-NEXT:    %[[V9:.*]] = muli %[[STEP]], %[[C1_IV]] : index
@@ -76,7 +76,7 @@
 //  UNROLL-BY-3-NEXT:    %[[V12:.*]] = addi %[[IV]], %[[V11]] : index
 //  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V12]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:  }
-//  UNROLL-BY-3-NEXT:  loop.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
+//  UNROLL-BY-3-NEXT:  scf.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
 //  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:  }
 //  UNROLL-BY-3-NEXT:  return
@@ -85,8 +85,8 @@
   %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index,
   %arg5 : index, %arg6: memref<?xf32>) {
   %0 = constant 7.0 : f32
-  loop.for %i0 = %arg0 to %arg1 step %arg2 {
-    loop.for %i1 = %arg3 to %arg4 step %arg5 {
+  scf.for %i0 = %arg0 to %arg1 step %arg2 {
+    scf.for %i1 = %arg3 to %arg4 step %arg5 {
      store %0, %arg6[%i1] : memref<?xf32>
     }
   }
@@ -101,16 +101,16 @@
 //  UNROLL-OUTER-BY-2-SAME:  %[[STEP1:.*5]]: index,
 //  UNROLL-OUTER-BY-2-SAME:  %[[MEM:.*6]]: memref<?xf32>
 //
-//       UNROLL-OUTER-BY-2:  loop.for %[[IV0:.*]] = %[[LB0]] to %{{.*}} step %{{.*}} {
-//  UNROLL-OUTER-BY-2-NEXT:    loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
+//       UNROLL-OUTER-BY-2:  scf.for %[[IV0:.*]] = %[[LB0]] to %{{.*}} step %{{.*}} {
+//  UNROLL-OUTER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
 //  UNROLL-OUTER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
 //  UNROLL-OUTER-BY-2-NEXT:    }
-//  UNROLL-OUTER-BY-2-NEXT:    loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
+//  UNROLL-OUTER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
 //  UNROLL-OUTER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
 //  UNROLL-OUTER-BY-2-NEXT:    }
 //  UNROLL-OUTER-BY-2-NEXT:  }
-//  UNROLL-OUTER-BY-2-NEXT:  loop.for %[[IV0:.*]] = %{{.*}} to %[[UB0]] step %[[STEP0]] {
-//  UNROLL-OUTER-BY-2-NEXT:    loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
+//  UNROLL-OUTER-BY-2-NEXT:  scf.for %[[IV0:.*]] = %{{.*}} to %[[UB0]] step %[[STEP0]] {
+//  UNROLL-OUTER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
 //  UNROLL-OUTER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
 //  UNROLL-OUTER-BY-2-NEXT:    }
 //  UNROLL-OUTER-BY-2-NEXT:  }
@@ -120,8 +120,8 @@
   %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index,
   %arg5 : index, %arg6: memref<?xf32>) {
   %0 = constant 7.0 : f32
-  loop.for %i0 = %arg0 to %arg1 step %arg2 {
-    loop.for %i1 = %arg3 to %arg4 step %arg5 {
+  scf.for %i0 = %arg0 to %arg1 step %arg2 {
+    scf.for %i1 = %arg3 to %arg4 step %arg5 {
      store %0, %arg6[%i1] : memref<?xf32>
     }
   }
@@ -136,15 +136,15 @@
 //  UNROLL-INNER-BY-2-SAME:  %[[STEP1:.*5]]: index,
 //  UNROLL-INNER-BY-2-SAME:  %[[MEM:.*6]]: memref<?xf32>
 //
-//       UNROLL-INNER-BY-2:  loop.for %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
-//       UNROLL-INNER-BY-2:    loop.for %[[IV1:.*]] = %[[LB1]] to %{{.*}} step %{{.*}} {
+//       UNROLL-INNER-BY-2:  scf.for %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
+//       UNROLL-INNER-BY-2:    scf.for %[[IV1:.*]] = %[[LB1]] to %{{.*}} step %{{.*}} {
 //  UNROLL-INNER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
 //  UNROLL-INNER-BY-2-NEXT:      %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-INNER-BY-2-NEXT:      %[[V0:.*]] = muli %[[STEP1]], %[[C1_IV]] : index
 //  UNROLL-INNER-BY-2-NEXT:      %[[V1:.*]] = addi %[[IV1]], %[[V0]] : index
 //  UNROLL-INNER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
 //  UNROLL-INNER-BY-2-NEXT:    }
-//  UNROLL-INNER-BY-2-NEXT:    loop.for %[[IV1:.*]] = %{{.*}} to %[[UB1]] step %[[STEP1]] {
+//  UNROLL-INNER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %{{.*}} to %[[UB1]] step %[[STEP1]] {
 //  UNROLL-INNER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
 //  UNROLL-INNER-BY-2-NEXT:    }
 //  UNROLL-INNER-BY-2-NEXT:  }
@@ -157,7 +157,7 @@
   %lb = constant 0 : index
   %ub = constant 20 : index
   %step = constant 1 : index
-  loop.for %i0 = %lb to %ub step %step {
+  scf.for %i0 = %lb to %ub step %step {
     store %0, %arg0[%i0] : memref<?xf32>
   }
   return
@@ -169,7 +169,7 @@
 //   UNROLL-BY-2-DAG:  %[[C1:.*]] = constant 1 : index
 //   UNROLL-BY-2-DAG:  %[[C20:.*]] = constant 20 : index
 //   UNROLL-BY-2-DAG:  %[[C2:.*]] = constant 2 : index
-//   UNROLL-BY-2:  loop.for %[[IV:.*]] = %[[C0]] to %[[C20]] step %[[C2]] {
+//   UNROLL-BY-2:  scf.for %[[IV:.*]] = %[[C0]] to %[[C20]] step %[[C2]] {
 //  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-2-NEXT:    %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-BY-2-NEXT:    %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
@@ -185,7 +185,7 @@
   %lb = constant 0 : index
   %ub = constant 20 : index
   %step = constant 1 : index
-  loop.for %i0 = %lb to %ub step %step {
+  scf.for %i0 = %lb to %ub step %step {
     store %0, %arg0[%i0] : memref<?xf32>
   }
   return
@@ -199,7 +199,7 @@
 //   UNROLL-BY-3-DAG:  %[[C20:.*]] = constant 20 : index
 //   UNROLL-BY-3-DAG:  %[[C18:.*]] = constant 18 : index
 //   UNROLL-BY-3-DAG:  %[[C3:.*]] = constant 3 : index
-//       UNROLL-BY-3: loop.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]] {
+//       UNROLL-BY-3: scf.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]] {
 //  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-BY-3-NEXT:    %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
@@ -210,7 +210,7 @@
 //  UNROLL-BY-3-NEXT:    %[[V3:.*]] = addi %[[IV]], %[[V2]] : index
 //  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V3]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:  }
-//  UNROLL-BY-3-NEXT:  loop.for %[[IV:.*]] = %[[C18]] to %[[C20]] step %[[C1]] {
+//  UNROLL-BY-3-NEXT:  scf.for %[[IV:.*]] = %[[C18]] to %[[C20]] step %[[C1]] {
 //  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:  }
 //  UNROLL-BY-3-NEXT:  return
@@ -222,7 +222,7 @@
   %lb = constant 0 : index
   %ub = constant 10 : index
   %step = constant 1 : index
-  loop.for %i0 = %lb to %ub step %step {
+  scf.for %i0 = %lb to %ub step %step {
     store %0, %arg0[%i0] : memref<?xf32>
   }
   return
@@ -235,7 +235,7 @@
 //   UNROLL-BY-3-DAG:  %[[C10:.*]] = constant 10 : index
 //   UNROLL-BY-3-DAG:  %[[C9:.*]] = constant 9 : index
 //   UNROLL-BY-3-DAG:  %[[C3:.*]] = constant 3 : index
-//       UNROLL-BY-3: loop.for %[[IV:.*]] = %[[C0]] to %[[C9]] step %[[C3]] {
+//       UNROLL-BY-3: scf.for %[[IV:.*]] = %[[C0]] to %[[C9]] step %[[C3]] {
 //  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-BY-3-NEXT:    %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
diff --git a/mlir/test/Dialect/SCF/ops.mlir b/mlir/test/Dialect/SCF/ops.mlir
--- a/mlir/test/Dialect/SCF/ops.mlir
+++ b/mlir/test/Dialect/SCF/ops.mlir
@@ -5,39 +5,39 @@
 // RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s --dump-input-on-failure
 
 func @std_for(%arg0 : index, %arg1 : index, %arg2 : index) {
-  loop.for %i0 = %arg0 to %arg1 step %arg2 {
-    loop.for %i1 = %arg0 to %arg1 step %arg2 {
+  scf.for %i0 = %arg0 to %arg1 step %arg2 {
+    scf.for %i1 = %arg0 to %arg1 step %arg2 {
       %min_cmp = cmpi "slt", %i0, %i1 : index
       %min = select %min_cmp, %i0, %i1 : index
       %max_cmp = cmpi "sge", %i0, %i1 : index
       %max = select %max_cmp, %i0, %i1 : index
-      loop.for %i2 = %min to %max step %i1 {
+      scf.for %i2 = %min to %max step %i1 {
       }
     }
   }
   return
 }
 // CHECK-LABEL: func @std_for(
-//  CHECK-NEXT:   loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//  CHECK-NEXT:     loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//  CHECK-NEXT:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//  CHECK-NEXT:     scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //  CHECK-NEXT:       %{{.*}} = cmpi "slt", %{{.*}}, %{{.*}} : index
 //  CHECK-NEXT:       %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : index
 //  CHECK-NEXT:       %{{.*}} = cmpi "sge", %{{.*}}, %{{.*}} : index
 //  CHECK-NEXT:       %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : index
-//  CHECK-NEXT:       loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
+//  CHECK-NEXT:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 
 func @std_if(%arg0: i1, %arg1: f32) {
-  loop.if %arg0 {
+  scf.if %arg0 {
     %0 = addf %arg1, %arg1 : f32
   }
   return
 }
 // CHECK-LABEL: func @std_if(
-//  CHECK-NEXT:   loop.if %{{.*}} {
+//  CHECK-NEXT:   scf.if %{{.*}} {
 //  CHECK-NEXT:     %{{.*}} = addf %{{.*}}, %{{.*}} : f32
 
 func @std_if_else(%arg0: i1, %arg1: f32) {
-  loop.if %arg0 {
+  scf.if %arg0 {
     %0 = addf %arg1, %arg1 : f32
   } else {
     %1 = addf %arg1, %arg1 : f32
@@ -45,7 +45,7 @@
   return
 }
 // CHECK-LABEL: func @std_if_else(
-//  CHECK-NEXT:   loop.if %{{.*}} {
+//  CHECK-NEXT:   scf.if %{{.*}} {
 //  CHECK-NEXT:     %{{.*}} = addf %{{.*}}, %{{.*}} : f32
 //  CHECK-NEXT:   } else {
 //  CHECK-NEXT:     %{{.*}} = addf %{{.*}}, %{{.*}} : f32
@@ -53,7 +53,7 @@
 func @std_parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
                         %arg3 : index, %arg4 : index) {
   %step = constant 1 : index
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%arg4, %step) {
     %min_cmp = cmpi "slt", %i0, %i1 : index
     %min = select %min_cmp, %i0, %i1 : index
@@ -61,19 +61,19 @@
     %max = select %max_cmp, %i0, %i1 : index
     %zero = constant 0.0 : f32
     %int_zero = constant 0 : i32
-    %red:2 = loop.parallel (%i2) = (%min) to (%max) step (%i1)
+    %red:2 = scf.parallel (%i2) = (%min) to (%max) step (%i1)
                                       init (%zero, %int_zero) -> (f32, i32) {
       %one = constant 1.0 : f32
-      loop.reduce(%one) : f32 {
+      scf.reduce(%one) : f32 {
         ^bb0(%lhs : f32, %rhs: f32):
           %res = addf %lhs, %rhs : f32
-          loop.reduce.return %res : f32
+          scf.reduce.return %res : f32
       }
       %int_one = constant 1 : i32
-      loop.reduce(%int_one) : i32 {
+      scf.reduce(%int_one) : i32 {
         ^bb0(%lhs : i32, %rhs: i32):
           %res = muli %lhs, %rhs : i32
-          loop.reduce.return %res : i32
+          scf.reduce.return %res : i32
       }
     }
   }
@@ -86,7 +86,7 @@
 //  CHECK-SAME: %[[ARG3:[A-Za-z0-9]+]]:
 //  CHECK-SAME: %[[ARG4:[A-Za-z0-9]+]]:
 //       CHECK:   %[[STEP:.*]] = constant 1 : index
-//  CHECK-NEXT:   loop.parallel (%[[I0:.*]], %[[I1:.*]]) = (%[[ARG0]], %[[ARG1]]) to
+//  CHECK-NEXT:   scf.parallel (%[[I0:.*]], %[[I1:.*]]) = (%[[ARG0]], %[[ARG1]]) to
 //       CHECK:   (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[STEP]]) {
 //  CHECK-NEXT:     %[[MIN_CMP:.*]] = cmpi "slt", %[[I0]], %[[I1]] : index
 //  CHECK-NEXT:     %[[MIN:.*]] = select %[[MIN_CMP]], %[[I0]], %[[I1]] : index
@@ -94,29 +94,29 @@
 //  CHECK-NEXT:     %[[MAX:.*]] = select %[[MAX_CMP]], %[[I0]], %[[I1]] : index
 //  CHECK-NEXT:     %[[ZERO:.*]] = constant 0.000000e+00 : f32
 //  CHECK-NEXT:     %[[INT_ZERO:.*]] = constant 0 : i32
-//  CHECK-NEXT:     loop.parallel (%{{.*}}) = (%[[MIN]]) to (%[[MAX]])
+//  CHECK-NEXT:     scf.parallel (%{{.*}}) = (%[[MIN]]) to (%[[MAX]])
 //  CHECK-SAME:          step (%[[I1]])
 //  CHECK-SAME:          init (%[[ZERO]], %[[INT_ZERO]]) -> (f32, i32) {
 //  CHECK-NEXT:       %[[ONE:.*]] = constant 1.000000e+00 : f32
-//  CHECK-NEXT:       loop.reduce(%[[ONE]]) : f32 {
+//  CHECK-NEXT:       scf.reduce(%[[ONE]]) : f32 {
 //  CHECK-NEXT:       ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32):
 //  CHECK-NEXT:         %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32
-//  CHECK-NEXT:         loop.reduce.return %[[RES]] : f32
+//  CHECK-NEXT:         scf.reduce.return %[[RES]] : f32
 //  CHECK-NEXT:       }
 //  CHECK-NEXT:       %[[INT_ONE:.*]] = constant 1 : i32
-//  CHECK-NEXT:       loop.reduce(%[[INT_ONE]]) : i32 {
+//  CHECK-NEXT:       scf.reduce(%[[INT_ONE]]) : i32 {
 //  CHECK-NEXT:       ^bb0(%[[LHS:.*]]: i32, %[[RHS:.*]]: i32):
 //  CHECK-NEXT:         %[[RES:.*]] = muli %[[LHS]], %[[RHS]] : i32
-//  CHECK-NEXT:         loop.reduce.return %[[RES]] : i32
+//  CHECK-NEXT:         scf.reduce.return %[[RES]] : i32
 //  CHECK-NEXT:       }
-//  CHECK-NEXT:       loop.yield
+//  CHECK-NEXT:       scf.yield
 //  CHECK-NEXT:     }
-//  CHECK-NEXT:     loop.yield
+//  CHECK-NEXT:     scf.yield
 
 func @parallel_explicit_yield(
     %arg0: index, %arg1: index, %arg2: index) {
-  loop.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) {
-    loop.yield
+  scf.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) {
+    scf.yield
   }
   return
 }
@@ -125,43 +125,43 @@
 //  CHECK-SAME: %[[ARG0:[A-Za-z0-9]+]]:
 //  CHECK-SAME: %[[ARG1:[A-Za-z0-9]+]]:
 //  CHECK-SAME: %[[ARG2:[A-Za-z0-9]+]]:
-//  CHECK-NEXT: loop.parallel (%{{.*}}) = (%[[ARG0]]) to (%[[ARG1]]) step (%[[ARG2]])
-//  CHECK-NEXT: loop.yield
+//  CHECK-NEXT: scf.parallel (%{{.*}}) = (%[[ARG0]]) to (%[[ARG1]]) step (%[[ARG2]])
+//  CHECK-NEXT: scf.yield
 //  CHECK-NEXT: }
 //  CHECK-NEXT: return
 //  CHECK-NEXT: }
 
 func @std_if_yield(%arg0: i1, %arg1: f32)
 {
-  %x, %y = loop.if %arg0 -> (f32, f32) {
+  %x, %y = scf.if %arg0 -> (f32, f32) {
     %0 = addf %arg1, %arg1 : f32
     %1 = subf %arg1, %arg1 : f32
-    loop.yield %0, %1 : f32, f32
+    scf.yield %0, %1 : f32, f32
   } else {
     %0 = subf %arg1, %arg1 : f32
     %1 = addf %arg1, %arg1 : f32
-    loop.yield %0, %1 : f32, f32
+    scf.yield %0, %1 : f32, f32
   }
   return
 }
 // CHECK-LABEL: func @std_if_yield(
 //  CHECK-SAME: %[[ARG0:[A-Za-z0-9]+]]:
 //  CHECK-SAME: %[[ARG1:[A-Za-z0-9]+]]:
-//  CHECK-NEXT: %{{.*}}:2 = loop.if %[[ARG0]] -> (f32, f32) {
+//  CHECK-NEXT: %{{.*}}:2 = scf.if %[[ARG0]] -> (f32, f32) {
 //  CHECK-NEXT: %[[T1:.*]] = addf %[[ARG1]], %[[ARG1]]
 //  CHECK-NEXT: %[[T2:.*]] = subf %[[ARG1]], %[[ARG1]]
-//  CHECK-NEXT: loop.yield %[[T1]], %[[T2]] : f32, f32
+//  CHECK-NEXT: scf.yield %[[T1]], %[[T2]] : f32, f32
 //  CHECK-NEXT: } else {
 //  CHECK-NEXT: %[[T3:.*]] = subf %[[ARG1]], %[[ARG1]]
 //  CHECK-NEXT: %[[T4:.*]] = addf %[[ARG1]], %[[ARG1]]
-//  CHECK-NEXT: loop.yield %[[T3]], %[[T4]] : f32, f32
+//  CHECK-NEXT: scf.yield %[[T3]], %[[T4]] : f32, f32
 //  CHECK-NEXT: }
 
 func @std_for_yield(%arg0 : index, %arg1 : index, %arg2 : index) {
   %s0 = constant 0.0 : f32
-  %result = loop.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0) -> (f32) {
+  %result = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0) -> (f32) {
     %sn = addf %si, %si : f32
-    loop.yield %sn : f32
+    scf.yield %sn : f32
   }
   return
 }
@@ -170,10 +170,10 @@
 // CHECK-SAME: %[[ARG1:[A-Za-z0-9]+]]:
 // CHECK-SAME: %[[ARG2:[A-Za-z0-9]+]]:
 // CHECK-NEXT: %[[INIT:.*]] = constant
-// CHECK-NEXT: %{{.*}} = loop.for %{{.*}} = %[[ARG0]] to %[[ARG1]] step %[[ARG2]]
+// CHECK-NEXT: %{{.*}} = scf.for %{{.*}} = %[[ARG0]] to %[[ARG1]] step %[[ARG2]]
 // CHECK-SAME: iter_args(%[[ITER:.*]] = %[[INIT]]) -> (f32) {
 // CHECK-NEXT: %[[NEXT:.*]] = addf %[[ITER]], %[[ITER]] : f32
-// CHECK-NEXT: loop.yield %[[NEXT]] : f32
+// CHECK-NEXT: scf.yield %[[NEXT]] : f32
 // CHECK-NEXT: }
 
 
@@ -181,11 +181,11 @@
   %s0 = constant 0.0 : f32
   %t0 = constant 1 : i32
   %u0 = constant 1.0 : f32
-  %result1:3 = loop.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0, %ti = %t0, %ui = %u0) -> (f32, i32, f32) {
+  %result1:3 = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0, %ti = %t0, %ui = %u0) -> (f32, i32, f32) {
     %sn = addf %si, %si : f32
     %tn = addi %ti, %ti : i32
     %un = subf %ui, %ui : f32
-    loop.yield %sn, %tn, %un : f32, i32, f32
+    scf.yield %sn, %tn, %un : f32, i32, f32
   }
   return
 }
@@ -196,27 +196,27 @@
 // CHECK-NEXT: %[[INIT1:.*]] = constant
 // CHECK-NEXT: %[[INIT2:.*]] = constant
 // CHECK-NEXT: %[[INIT3:.*]] = constant
-// CHECK-NEXT: %{{.*}}:3 = loop.for %{{.*}} = %[[ARG0]] to %[[ARG1]] step %[[ARG2]]
+// CHECK-NEXT: %{{.*}}:3 = scf.for %{{.*}} = %[[ARG0]] to %[[ARG1]] step %[[ARG2]]
 // CHECK-SAME: iter_args(%[[ITER1:.*]] = %[[INIT1]], %[[ITER2:.*]] = %[[INIT2]], %[[ITER3:.*]] = %[[INIT3]]) -> (f32, i32, f32) {
 // CHECK-NEXT: %[[NEXT1:.*]] = addf %[[ITER1]], %[[ITER1]] : f32
 // CHECK-NEXT: %[[NEXT2:.*]] = addi %[[ITER2]], %[[ITER2]] : i32
 // CHECK-NEXT: %[[NEXT3:.*]] = subf %[[ITER3]], %[[ITER3]] : f32
-// CHECK-NEXT: loop.yield %[[NEXT1]], %[[NEXT2]], %[[NEXT3]] : f32, i32, f32
+// CHECK-NEXT: scf.yield %[[NEXT1]], %[[NEXT2]], %[[NEXT3]] : f32, i32, f32
 
 
 func @conditional_reduce(%buffer: memref<1024xf32>, %lb: index, %ub: index, %step: index) -> (f32) {
   %sum_0 = constant 0.0 : f32
   %c0 = constant 0.0 : f32
-  %sum = loop.for %iv = %lb to %ub step %step iter_args(%sum_iter = %sum_0) -> (f32) {
+  %sum = scf.for %iv = %lb to %ub step %step iter_args(%sum_iter = %sum_0) -> (f32) {
 	  %t = load %buffer[%iv] : memref<1024xf32>
 	  %cond = cmpf "ugt", %t, %c0 : f32
-	  %sum_next = loop.if %cond -> (f32) {
+	  %sum_next = scf.if %cond -> (f32) {
 	    %new_sum = addf %sum_iter, %t : f32
-      loop.yield %new_sum : f32
+      scf.yield %new_sum : f32
 	  } else {
-  		loop.yield %sum_iter : f32
+  		scf.yield %sum_iter : f32
 	  }
-    loop.yield %sum_next : f32
+    scf.yield %sum_next : f32
   }
   return %sum : f32
 }
@@ -227,16 +227,16 @@
 //  CHECK-SAME: %[[ARG3:[A-Za-z0-9]+]]
 //  CHECK-NEXT: %[[INIT:.*]] = constant
 //  CHECK-NEXT: %[[ZERO:.*]] = constant
-//  CHECK-NEXT: %[[RESULT:.*]] = loop.for %[[IV:.*]] = %[[ARG1]] to %[[ARG2]] step %[[ARG3]]
+//  CHECK-NEXT: %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[ARG1]] to %[[ARG2]] step %[[ARG3]]
 //  CHECK-SAME: iter_args(%[[ITER:.*]] = %[[INIT]]) -> (f32) {
 //  CHECK-NEXT: %[[T:.*]] = load %[[ARG0]][%[[IV]]]
 //  CHECK-NEXT: %[[COND:.*]] = cmpf "ugt", %[[T]], %[[ZERO]]
-//  CHECK-NEXT: %[[IFRES:.*]] = loop.if %[[COND]] -> (f32) {
+//  CHECK-NEXT: %[[IFRES:.*]] = scf.if %[[COND]] -> (f32) {
 //  CHECK-NEXT: %[[THENRES:.*]] = addf %[[ITER]], %[[T]]
-//  CHECK-NEXT: loop.yield %[[THENRES]] : f32
+//  CHECK-NEXT: scf.yield %[[THENRES]] : f32
 //  CHECK-NEXT: } else {
-//  CHECK-NEXT: loop.yield %[[ITER]] : f32
+//  CHECK-NEXT: scf.yield %[[ITER]] : f32
 //  CHECK-NEXT: }
-//  CHECK-NEXT: loop.yield %[[IFRES]] : f32
+//  CHECK-NEXT: scf.yield %[[IFRES]] : f32
 //  CHECK-NEXT: }
 //  CHECK-NEXT: return %[[RESULT]]
diff --git a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
--- a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
+++ b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
@@ -4,11 +4,11 @@
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
   "xla_lhlo.terminator"() : () -> ()
 }
@@ -16,11 +16,11 @@
 // CHECK:        [[C2:%.*]] = constant 2 : index
 // CHECK:        [[C0:%.*]] = constant 0 : index
 // CHECK:        [[C1:%.*]] = constant 1 : index
-// CHECK:        loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
+// CHECK:        scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:       to ([[C2]], [[C2]]) step ([[C1]], [[C1]]) {
-// CHECK:          loop.yield
+// CHECK:          scf.yield
 // CHECK:        }
-// CHECK-NOT:    loop.parallel
+// CHECK-NOT:    scf.parallel
 
 // -----
 
@@ -30,19 +30,19 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %sum = alloc()  : memref<2x2xf32>
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     %B_elem = load %B[%i, %j] : memref<2x2xf32>
     %C_elem = load %C[%i, %j] : memref<2x2xf32>
     %sum_elem = addf %B_elem, %C_elem : f32
     store %sum_elem, %sum[%i, %j] : memref<2x2xf32>
-    loop.yield
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     %sum_elem = load %sum[%i, %j] : memref<2x2xf32>
     %A_elem = load %A[%i, %j] : memref<2x2xf32>
     %product_elem = mulf %sum_elem, %A_elem : f32
     store %product_elem, %result[%i, %j] : memref<2x2xf32>
-    loop.yield
+    scf.yield
   }
   dealloc %sum : memref<2x2xf32>
   return
@@ -54,7 +54,7 @@
 // CHECK:      [[C0:%.*]] = constant 0 : index
 // CHECK:      [[C1:%.*]] = constant 1 : index
 // CHECK:      [[SUM:%.*]] = alloc()
-// CHECK:      loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
+// CHECK:      scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:     to ([[C2]], [[C2]]) step ([[C1]], [[C1]]) {
 // CHECK:        [[B_ELEM:%.*]] = load [[B]]{{\[}}[[I]], [[J]]]
 // CHECK:        [[C_ELEM:%.*]] = load [[C]]{{\[}}[[I]], [[J]]]
@@ -64,7 +64,7 @@
 // CHECK:        [[A_ELEM:%.*]] = load [[A]]{{\[}}[[I]], [[J]]]
 // CHECK:        [[PRODUCT_ELEM:%.*]] = mulf [[SUM_ELEM_]], [[A_ELEM]]
 // CHECK:        store [[PRODUCT_ELEM]], [[RESULT]]{{\[}}[[I]], [[J]]]
-// CHECK:        loop.yield
+// CHECK:        scf.yield
 // CHECK:      }
 // CHECK:      dealloc [[SUM]]
 
@@ -78,23 +78,23 @@
   %c1 = constant 1 : index
   %broadcast_rhs = alloc() : memref<100x10xf32>
   %diff = alloc() : memref<100x10xf32>
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c100, %c10) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c100, %c10) step (%c1, %c1) {
     %rhs_elem = load %rhs[%i] : memref<100xf32>
     store %rhs_elem, %broadcast_rhs[%i, %j] : memref<100x10xf32>
-    loop.yield
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c100, %c10) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c100, %c10) step (%c1, %c1) {
     %lhs_elem = load %lhs[%i, %j] : memref<100x10xf32>
     %broadcast_rhs_elem = load %broadcast_rhs[%i, %j] : memref<100x10xf32>
     %diff_elem = subf %lhs_elem, %broadcast_rhs_elem : f32
     store %diff_elem, %diff[%i, %j] : memref<100x10xf32>
-    loop.yield
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c100, %c10) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c100, %c10) step (%c1, %c1) {
     %diff_elem = load %diff[%i, %j] : memref<100x10xf32>
     %exp_elem = exp %diff_elem : f32
     store %exp_elem, %result[%i, %j] : memref<100x10xf32>
-    loop.yield
+    scf.yield
   }
   dealloc %broadcast_rhs : memref<100x10xf32>
   dealloc %diff : memref<100x10xf32>
@@ -109,7 +109,7 @@
 // CHECK:      [[C1:%.*]] = constant 1 : index
 // CHECK:      [[BROADCAST_RHS:%.*]] = alloc()
 // CHECK:      [[DIFF:%.*]] = alloc()
-// CHECK:      loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
+// CHECK:      scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:     to ([[C100]], [[C10]]) step ([[C1]], [[C1]]) {
 // CHECK:        [[RHS_ELEM:%.*]] = load [[RHS]]{{\[}}[[I]]]
 // CHECK:        store [[RHS_ELEM]], [[BROADCAST_RHS]]{{\[}}[[I]], [[J]]]
@@ -120,7 +120,7 @@
 // CHECK:        [[DIFF_ELEM_:%.*]] = load [[DIFF]]{{\[}}[[I]], [[J]]]
 // CHECK:        [[EXP_ELEM:%.*]] = exp [[DIFF_ELEM_]]
 // CHECK:        store [[EXP_ELEM]], [[RESULT]]{{\[}}[[I]], [[J]]]
-// CHECK:        loop.yield
+// CHECK:        scf.yield
 // CHECK:      }
 // CHECK:      dealloc [[BROADCAST_RHS]]
 // CHECK:      dealloc [[DIFF]]
@@ -131,21 +131,21 @@
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.parallel (%k, %l) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-      loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.parallel (%k, %l) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+      scf.yield
     }
-    loop.yield
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
   "xla_lhlo.terminator"() : () -> ()
 }
 // CHECK-LABEL: func @do_not_fuse_nested_ploop1
-// CHECK:        loop.parallel
-// CHECK:          loop.parallel
-// CHECK:        loop.parallel
+// CHECK:        scf.parallel
+// CHECK:          scf.parallel
+// CHECK:        scf.parallel
 
 // -----
 
@@ -153,21 +153,21 @@
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.parallel (%k, %l) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-      loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.parallel (%k, %l) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+      scf.yield
     }
-    loop.yield
+    scf.yield
   }
   "xla_lhlo.terminator"() : () -> ()
 }
 // CHECK-LABEL: func @do_not_fuse_nested_ploop2
-// CHECK:        loop.parallel
-// CHECK:        loop.parallel
-// CHECK:          loop.parallel
+// CHECK:        scf.parallel
+// CHECK:        scf.parallel
+// CHECK:          scf.parallel
 
 // -----
 
@@ -175,17 +175,17 @@
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
-  loop.parallel (%i) = (%c0) to (%c2) step (%c1) {
-    loop.yield
+  scf.parallel (%i) = (%c0) to (%c2) step (%c1) {
+    scf.yield
   }
   "xla_lhlo.terminator"() : () -> ()
 }
 // CHECK-LABEL: func @do_not_fuse_loops_unmatching_num_loops
-// CHECK:        loop.parallel
-// CHECK:        loop.parallel
+// CHECK:        scf.parallel
+// CHECK:        scf.parallel
 
 // -----
 
@@ -193,18 +193,18 @@
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
   %buffer  = alloc() : memref<2x2xf32>
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
   "xla_lhlo.terminator"() : () -> ()
 }
 // CHECK-LABEL: func @do_not_fuse_loops_with_side_effecting_ops_in_between
-// CHECK:        loop.parallel
-// CHECK:        loop.parallel
+// CHECK:        scf.parallel
+// CHECK:        scf.parallel
 
 // -----
 
@@ -213,17 +213,17 @@
   %c1 = constant 1 : index
   %c2 = constant 2 : index
   %c4 = constant 4 : index
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c4, %c4) step (%c2, %c2) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c4, %c4) step (%c2, %c2) {
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
   "xla_lhlo.terminator"() : () -> ()
 }
 // CHECK-LABEL: func @do_not_fuse_loops_unmatching_iteration_space
-// CHECK:        loop.parallel
-// CHECK:        loop.parallel
+// CHECK:        scf.parallel
+// CHECK:        scf.parallel
 
 // -----
 
@@ -234,27 +234,27 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %common_buf = alloc() : memref<2x2xf32>
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     %B_elem = load %B[%i, %j] : memref<2x2xf32>
     %C_elem = load %C[%i, %j] : memref<2x2xf32>
     %sum_elem = addf %B_elem, %C_elem : f32
     store %sum_elem, %common_buf[%i, %j] : memref<2x2xf32>
-    loop.yield
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     %k = addi %i, %c1 : index
     %sum_elem = load %common_buf[%k, %j] : memref<2x2xf32>
     %A_elem = load %A[%i, %j] : memref<2x2xf32>
     %product_elem = mulf %sum_elem, %A_elem : f32
     store %product_elem, %result[%i, %j] : memref<2x2xf32>
-    loop.yield
+    scf.yield
   }
   dealloc %common_buf : memref<2x2xf32>
   return
 }
 // CHECK-LABEL: func @do_not_fuse_unmatching_write_read_patterns
-// CHECK:        loop.parallel
-// CHECK:        loop.parallel
+// CHECK:        scf.parallel
+// CHECK:        scf.parallel
 
 // -----
 
@@ -264,27 +264,27 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %sum = alloc() : memref<2x2xf32>
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     %B_elem = load %B[%i, %j] : memref<2x2xf32>
     %C_elem = load %common_buf[%i, %j] : memref<2x2xf32>
     %sum_elem = addf %B_elem, %C_elem : f32
     store %sum_elem, %sum[%i, %j] : memref<2x2xf32>
-    loop.yield
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     %k = addi %i, %c1 : index
     %sum_elem = load %sum[%k, %j] : memref<2x2xf32>
     %A_elem = load %A[%i, %j] : memref<2x2xf32>
     %product_elem = mulf %sum_elem, %A_elem : f32
     store %product_elem, %common_buf[%j, %i] : memref<2x2xf32>
-    loop.yield
+    scf.yield
   }
   dealloc %sum : memref<2x2xf32>
   return
 }
 // CHECK-LABEL: func @do_not_fuse_unmatching_read_write_patterns
-// CHECK:        loop.parallel
-// CHECK:        loop.parallel
+// CHECK:        scf.parallel
+// CHECK:        scf.parallel
 
 // -----
 
@@ -293,20 +293,20 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %buffer  = alloc() : memref<2x2xf32>
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.yield
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.yield
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     %A = subview %buffer[%c0, %c0][%c2, %c2][%c1, %c1]
       : memref<2x2xf32> to memref<?x?xf32, offset: ?, strides:[?, ?]>
     %A_elem = load %A[%i, %j] : memref<?x?xf32, offset: ?, strides:[?, ?]>
-    loop.yield
+    scf.yield
   }
   "xla_lhlo.terminator"() : () -> ()
 }
 // CHECK-LABEL: func @do_not_fuse_loops_with_memref_defined_in_loop_bodies
-// CHECK:        loop.parallel
-// CHECK:        loop.parallel
+// CHECK:        scf.parallel
+// CHECK:        scf.parallel
 
 // -----
 
@@ -316,20 +316,20 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %sum = alloc()  : memref<2x2xf32>
-  loop.parallel (%k) = (%c0) to (%c2) step (%c1) {
-    loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%k) = (%c0) to (%c2) step (%c1) {
+    scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
       %B_elem = load %B[%i, %j] : memref<2x2xf32>
       %C_elem = load %C[%i, %j] : memref<2x2xf32>
       %sum_elem = addf %B_elem, %C_elem : f32
       store %sum_elem, %sum[%i, %j] : memref<2x2xf32>
-      loop.yield
+      scf.yield
     }
-    loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
       %sum_elem = load %sum[%i, %j] : memref<2x2xf32>
       %A_elem = load %A[%i, %j] : memref<2x2xf32>
       %product_elem = mulf %sum_elem, %A_elem : f32
       store %product_elem, %result[%i, %j] : memref<2x2xf32>
-      loop.yield
+      scf.yield
     }
   }
   dealloc %sum : memref<2x2xf32>
@@ -342,8 +342,8 @@
 // CHECK:      [[C0:%.*]] = constant 0 : index
 // CHECK:      [[C1:%.*]] = constant 1 : index
 // CHECK:      [[SUM:%.*]] = alloc()
-// CHECK:      loop.parallel
-// CHECK:        loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
+// CHECK:      scf.parallel
+// CHECK:        scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:       to ([[C2]], [[C2]]) step ([[C1]], [[C1]]) {
 // CHECK:          [[B_ELEM:%.*]] = load [[B]]{{\[}}[[I]], [[J]]]
 // CHECK:          [[C_ELEM:%.*]] = load [[C]]{{\[}}[[I]], [[J]]]
@@ -353,7 +353,7 @@
 // CHECK:          [[A_ELEM:%.*]] = load [[A]]{{\[}}[[I]], [[J]]]
 // CHECK:          [[PRODUCT_ELEM:%.*]] = mulf [[SUM_ELEM_]], [[A_ELEM]]
 // CHECK:          store [[PRODUCT_ELEM]], [[RESULT]]{{\[}}[[I]], [[J]]]
-// CHECK:          loop.yield
+// CHECK:          scf.yield
 // CHECK:        }
 // CHECK:      }
 // CHECK:      dealloc [[SUM]]
diff --git a/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir b/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir
--- a/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir
+++ b/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir
@@ -11,7 +11,7 @@
   %d1 = dim %A, 1 : memref<?x?xf32>
   %b0 = affine.min #map0()[%d0, %outer_i0]
   %b1 = affine.min #map1()[%d1, %outer_i1]
-  loop.parallel (%i0, %i1) = (%c0, %c0) to (%b0, %b1) step (%c1, %c1) {
+  scf.parallel (%i0, %i1) = (%c0, %c0) to (%b0, %b1) step (%c1, %c1) {
     %B_elem = load %B[%i0, %i1] : memref<?x?xf32>
     %C_elem = load %C[%i0, %i1] : memref<?x?xf32>
     %sum_elem = addf %B_elem, %C_elem : f32
@@ -33,12 +33,12 @@
 // CHECK:           [[VAL_14:%.*]] = constant 64 : index
 // CHECK:           [[VAL_15:%.*]] = cmpi "eq", [[VAL_11]], [[VAL_14]] : index
 // CHECK:           [[VAL_16:%.*]] = and [[VAL_13]], [[VAL_15]] : i1
-// CHECK:           loop.if [[VAL_16]] {
-// CHECK:             loop.parallel ([[VAL_17:%.*]], [[VAL_18:%.*]]) = ([[VAL_6]], [[VAL_6]]) to ([[VAL_12]], [[VAL_14]]) step ([[VAL_7]], [[VAL_7]]) {
+// CHECK:           scf.if [[VAL_16]] {
+// CHECK:             scf.parallel ([[VAL_17:%.*]], [[VAL_18:%.*]]) = ([[VAL_6]], [[VAL_6]]) to ([[VAL_12]], [[VAL_14]]) step ([[VAL_7]], [[VAL_7]]) {
 // CHECK:               store
 // CHECK:             }
 // CHECK:           } else {
-// CHECK:             loop.parallel ([[VAL_22:%.*]], [[VAL_23:%.*]]) = ([[VAL_6]], [[VAL_6]]) to ([[VAL_10]], [[VAL_11]]) step ([[VAL_7]], [[VAL_7]]) {
+// CHECK:             scf.parallel ([[VAL_22:%.*]], [[VAL_23:%.*]]) = ([[VAL_6]], [[VAL_6]]) to ([[VAL_10]], [[VAL_11]]) step ([[VAL_7]], [[VAL_7]]) {
 // CHECK:               store
 // CHECK:             }
 // CHECK:           }
diff --git a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
--- a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
+++ b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
@@ -4,7 +4,7 @@
                     %arg3 : index, %arg4 : index, %arg5 : index,
 		    %A: memref<?x?xf32>, %B: memref<?x?xf32>,
                     %C: memref<?x?xf32>, %result: memref<?x?xf32>) {
-  loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
+  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
     %B_elem = load %B[%i0, %i1] : memref<?x?xf32>
     %C_elem = load %C[%i0, %i1] : memref<?x?xf32>
     %sum_elem = addf %B_elem, %C_elem : f32
@@ -21,10 +21,10 @@
 // CHECK:           [[VAL_12:%.*]] = constant 4 : index
 // CHECK:           [[VAL_13:%.*]] = muli [[VAL_4]], [[VAL_11]] : index
 // CHECK:           [[VAL_14:%.*]] = muli [[VAL_5]], [[VAL_12]] : index
-// CHECK:           loop.parallel ([[VAL_15:%.*]], [[VAL_16:%.*]]) = ([[VAL_0]], [[VAL_1]]) to ([[VAL_2]], [[VAL_3]]) step ([[VAL_13]], [[VAL_14]]) {
+// CHECK:           scf.parallel ([[VAL_15:%.*]], [[VAL_16:%.*]]) = ([[VAL_0]], [[VAL_1]]) to ([[VAL_2]], [[VAL_3]]) step ([[VAL_13]], [[VAL_14]]) {
 // CHECK:             [[VAL_17:%.*]] = affine.min #map0([[VAL_11]], [[VAL_2]], [[VAL_15]])
 // CHECK:             [[VAL_18:%.*]] = affine.min #map0([[VAL_12]], [[VAL_3]], [[VAL_16]])
-// CHECK:             loop.parallel ([[VAL_19:%.*]], [[VAL_20:%.*]]) = ([[VAL_10]], [[VAL_10]]) to ([[VAL_17]], [[VAL_18]]) step ([[VAL_4]], [[VAL_5]]) {
+// CHECK:             scf.parallel ([[VAL_19:%.*]], [[VAL_20:%.*]]) = ([[VAL_10]], [[VAL_10]]) to ([[VAL_17]], [[VAL_18]]) step ([[VAL_4]], [[VAL_5]]) {
 // CHECK:               [[VAL_21:%.*]] = load [[VAL_7]]{{\[}}[[VAL_19]], [[VAL_20]]] : memref<?x?xf32>
 // CHECK:               [[VAL_22:%.*]] = load [[VAL_8]]{{\[}}[[VAL_19]], [[VAL_20]]] : memref<?x?xf32>
 // CHECK:               [[VAL_23:%.*]] = addf [[VAL_21]], [[VAL_22]] : f32
@@ -39,11 +39,11 @@
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    loop.parallel (%k, %l) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+    scf.parallel (%k, %l) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     }
   }
-  loop.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
+  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
   }
   return
 }
@@ -52,16 +52,16 @@
 // CHECK:           [[VAL_24:%.*]] = constant 2 : index
 // CHECK:           [[VAL_25:%.*]] = constant 0 : index
 // CHECK:           [[VAL_26:%.*]] = constant 1 : index
-// CHECK:           loop.parallel ([[VAL_27:%.*]], [[VAL_28:%.*]]) = ([[VAL_25]], [[VAL_25]]) to ([[VAL_24]], [[VAL_24]]) step ([[VAL_26]], [[VAL_26]]) {
+// CHECK:           scf.parallel ([[VAL_27:%.*]], [[VAL_28:%.*]]) = ([[VAL_25]], [[VAL_25]]) to ([[VAL_24]], [[VAL_24]]) step ([[VAL_26]], [[VAL_26]]) {
 // CHECK:             [[VAL_29:%.*]] = constant 0 : index
 // CHECK:             [[VAL_30:%.*]] = constant 1 : index
 // CHECK:             [[VAL_31:%.*]] = constant 4 : index
 // CHECK:             [[VAL_32:%.*]] = muli [[VAL_26]], [[VAL_30]] : index
 // CHECK:             [[VAL_33:%.*]] = muli [[VAL_26]], [[VAL_31]] : index
-// CHECK:             loop.parallel ([[VAL_34:%.*]], [[VAL_35:%.*]]) = ([[VAL_25]], [[VAL_25]]) to ([[VAL_24]], [[VAL_24]]) step ([[VAL_32]], [[VAL_33]]) {
+// CHECK:             scf.parallel ([[VAL_34:%.*]], [[VAL_35:%.*]]) = ([[VAL_25]], [[VAL_25]]) to ([[VAL_24]], [[VAL_24]]) step ([[VAL_32]], [[VAL_33]]) {
 // CHECK:               [[VAL_36:%.*]] = affine.min #map0([[VAL_30]], [[VAL_24]], [[VAL_34]])
 // CHECK:               [[VAL_37:%.*]] = affine.min #map0([[VAL_31]], [[VAL_24]], [[VAL_35]])
-// CHECK:               loop.parallel ([[VAL_38:%.*]], [[VAL_39:%.*]]) = ([[VAL_29]], [[VAL_29]]) to ([[VAL_36]], [[VAL_37]]) step ([[VAL_26]], [[VAL_26]]) {
+// CHECK:               scf.parallel ([[VAL_38:%.*]], [[VAL_39:%.*]]) = ([[VAL_29]], [[VAL_29]]) to ([[VAL_36]], [[VAL_37]]) step ([[VAL_26]], [[VAL_26]]) {
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
@@ -70,10 +70,10 @@
 // CHECK:           [[VAL_42:%.*]] = constant 4 : index
 // CHECK:           [[VAL_43:%.*]] = muli [[VAL_26]], [[VAL_41]] : index
 // CHECK:           [[VAL_44:%.*]] = muli [[VAL_26]], [[VAL_42]] : index
-// CHECK:           loop.parallel ([[VAL_45:%.*]], [[VAL_46:%.*]]) = ([[VAL_25]], [[VAL_25]]) to ([[VAL_24]], [[VAL_24]]) step ([[VAL_43]], [[VAL_44]]) {
+// CHECK:           scf.parallel ([[VAL_45:%.*]], [[VAL_46:%.*]]) = ([[VAL_25]], [[VAL_25]]) to ([[VAL_24]], [[VAL_24]]) step ([[VAL_43]], [[VAL_44]]) {
 // CHECK:             [[VAL_47:%.*]] = affine.min #map0([[VAL_41]], [[VAL_24]], [[VAL_45]])
 // CHECK:             [[VAL_48:%.*]] = affine.min #map0([[VAL_42]], [[VAL_24]], [[VAL_46]])
-// CHECK:             loop.parallel ([[VAL_49:%.*]], [[VAL_50:%.*]]) = ([[VAL_40]], [[VAL_40]]) to ([[VAL_47]], [[VAL_48]]) step ([[VAL_26]], [[VAL_26]]) {
+// CHECK:             scf.parallel ([[VAL_49:%.*]], [[VAL_50:%.*]]) = ([[VAL_40]], [[VAL_40]]) to ([[VAL_47]], [[VAL_48]]) step ([[VAL_26]], [[VAL_26]]) {
 // CHECK:             }
 // CHECK:           }
 // CHECK:           return
diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp
--- a/mlir/test/EDSC/builder-api-test.cpp
+++ b/mlir/test/EDSC/builder-api-test.cpp
@@ -148,7 +148,7 @@
   // CHECK-LABEL: func @builder_loop_for(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
   // CHECK-DAG:    [[r0:%[0-9]+]] = affine.apply affine_map<()[s0, s1] -> (s0 - s1)>()[%{{.*}}, %{{.*}}]
   // CHECK-DAG:    [[r1:%[0-9]+]] = affine.apply affine_map<()[s0, s1] -> (s0 + s1)>()[%{{.*}}, %{{.*}}]
-  // CHECK-NEXT:   loop.for %{{.*}} = [[r0]] to [[r1]] step {{.*}} {
+  // CHECK-NEXT:   scf.for %{{.*}} = [[r0]] to [[r1]] step {{.*}} {
   // clang-format on
   f.print(llvm::outs());
   f.erase();
@@ -1094,9 +1094,9 @@
   // CHECK:     [[init1:%.*]] = constant
   // CHECK-DAG:    [[r0:%[0-9]+]] = affine.apply affine_map<()[s0, s1] -> (s0 - s1)>()[%{{.*}}, %{{.*}}]
   // CHECK-DAG:    [[r1:%[0-9]+]] = affine.apply affine_map<()[s0, s1] -> (s0 + s1)>()[%{{.*}}, %{{.*}}]
-  // CHECK-NEXT: [[res:%[0-9]+]]:2 = loop.for %{{.*}} = [[r0]] to [[r1]] step {{.*}} iter_args([[arg0:%.*]] = [[init0]], [[arg1:%.*]] = [[init1]]) -> (f32, f32) {
+  // CHECK-NEXT: [[res:%[0-9]+]]:2 = scf.for %{{.*}} = [[r0]] to [[r1]] step {{.*}} iter_args([[arg0:%.*]] = [[init0]], [[arg1:%.*]] = [[init1]]) -> (f32, f32) {
   // CHECK:     [[sum:%[0-9]+]] = addf [[arg0]], [[arg1]] : f32
-  // CHECK:     loop.yield [[arg1]], [[sum]] : f32, f32
+  // CHECK:     scf.yield [[arg1]], [[sum]] : f32, f32
   // CHECK:     addf [[res]]#0, [[res]]#1 : f32
   // clang-format on
 
diff --git a/mlir/test/Transforms/canonicalize-block-merge.mlir b/mlir/test/Transforms/canonicalize-block-merge.mlir
--- a/mlir/test/Transforms/canonicalize-block-merge.mlir
+++ b/mlir/test/Transforms/canonicalize-block-merge.mlir
@@ -163,12 +163,12 @@
   cond_br %cond, ^bb1, ^bb2
 
 ^bb1:
-  loop.if %cond {
+  scf.if %cond {
     "foo.op"() : () -> ()
   }
   return
 ^bb2:
-  loop.if %cond {
+  scf.if %cond {
     "foo.op"() : () -> ()
   }
   return
diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -469,9 +469,9 @@
   %M_ = dim %A, 0 : memref<?x?xf32>
   %K_ = dim %A, 1 : memref<?x?xf32>
   %N_ = dim %C, 1 : memref<?x?xf32>
-  loop.for %i = %c0 to %M_ step %c1 {
-    loop.for %j = %c0 to %N_ step %c1 {
-      loop.for %k = %c0 to %K_ step %c1 {
+  scf.for %i = %c0 to %M_ step %c1 {
+    scf.for %j = %c0 to %N_ step %c1 {
+      scf.for %k = %c0 to %K_ step %c1 {
       }
     }
   }
diff --git a/mlir/test/Transforms/loop-coalescing.mlir b/mlir/test/Transforms/loop-coalescing.mlir
--- a/mlir/test/Transforms/loop-coalescing.mlir
+++ b/mlir/test/Transforms/loop-coalescing.mlir
@@ -15,23 +15,23 @@
   %c3 = constant 3 : index
   %c42 = constant 42 : index
   %c56 = constant 56 : index
-  // The range of the new loop.
+  // The range of the new scf.
   // CHECK:     %[[partial_range:.*]] = muli %[[orig_ub_i]], %[[orig_ub_j]]
   // CHECK-NEXT:%[[range:.*]] = muli %[[partial_range]], %[[orig_ub_k]]
 
   // Updated loop bounds.
-  // CHECK: loop.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]]
-  loop.for %i = %c0 to %c42 step %c1 {
+  // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]]
+  scf.for %i = %c0 to %c42 step %c1 {
     // Inner loops must have been removed.
-    // CHECK-NOT: loop.for
+    // CHECK-NOT: scf.for
 
     // Reconstruct original IVs from the linearized one.
     // CHECK: %[[orig_k:.*]] = remi_signed %[[i]], %[[orig_ub_k]]
     // CHECK: %[[div:.*]] = divi_signed %[[i]], %[[orig_ub_k]]
     // CHECK: %[[orig_j:.*]] = remi_signed %[[div]], %[[orig_ub_j]]
     // CHECK: %[[orig_i:.*]] = divi_signed %[[div]], %[[orig_ub_j]]
-    loop.for %j = %c0 to %c56 step %c1 {
-      loop.for %k = %c0 to %c3 step %c1 {
+    scf.for %j = %c0 to %c56 step %c1 {
+      scf.for %k = %c0 to %c3 step %c1 {
         // CHECK: "use"(%[[orig_i]], %[[orig_j]], %[[orig_k]])
         "use"(%i, %j, %k) : (index, index, index) -> ()
       }
@@ -48,10 +48,10 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c10 = constant 10 : index
-  // CHECK: loop.for %[[iv:.*]] =
-  loop.for %i = %c1 to %c10 step %c1 {
-    loop.for %j = %c1 to %c10 step %c1 {
-      loop.for %k = %c1 to %c10 step %c1 {
+  // CHECK: scf.for %[[iv:.*]] =
+  scf.for %i = %c1 to %c10 step %c1 {
+    scf.for %j = %c1 to %c10 step %c1 {
+      scf.for %k = %c1 to %c10 step %c1 {
         // CHECK: %[[k_unshifted:.*]] = remi_signed %[[iv]], %[[k_extent:.*]]
         // CHECK: %[[ij:.*]] = divi_signed %[[iv]], %[[k_extent]]
         // CHECK: %[[j_unshifted:.*]] = remi_signed %[[ij]], %[[j_extent:.*]]
@@ -86,14 +86,14 @@
   %c10 = constant 10 : index
   %c17 = constant 17 : index
 
-  // Number of iterations in the outer loop.
+  // Number of iterations in the outer scf.
   // CHECK: %[[diff_i:.*]] = subi %[[orig_ub_i]], %[[orig_lb_i]]
   // CHECK: %[[c1:.*]] = constant 1
   // CHECK: %[[step_minus_c1:.*]] = subi %[[orig_step_i]], %[[c1]]
   // CHECK: %[[dividend:.*]] = addi %[[diff_i]], %[[step_minus_c1]]
   // CHECK: %[[numiter_i:.*]] = divi_signed %[[dividend]], %[[orig_step_i]]
 
-  // Normalized lower bound and step for the outer loop.
+  // Normalized lower bound and step for the outer scf.
   // CHECK: %[[lb_i:.*]] = constant 0
   // CHECK: %[[step_i:.*]] = constant 1
 
@@ -101,13 +101,13 @@
   // only capture the final result.
   // CHECK: %[[numiter_j:.*]] = divi_signed {{.*}}, %[[orig_step_j]]
 
-  // New bounds of the outer loop.
+  // New bounds of the outer scf.
   // CHECK: %[[range:.*]] = muli %[[numiter_i]], %[[numiter_j]]
-  // CHECK: loop.for %[[i:.*]] = %[[lb_i]] to %[[range]] step %[[step_i]]
-  loop.for %i = %c5 to %c10 step %c2 {
+  // CHECK: scf.for %[[i:.*]] = %[[lb_i]] to %[[range]] step %[[step_i]]
+  scf.for %i = %c5 to %c10 step %c2 {
     // The inner loop has been removed.
-    // CHECK-NOT: loop.for
-    loop.for %j = %c7 to %c17 step %c3 {
+    // CHECK-NOT: scf.for
+    scf.for %j = %c7 to %c17 step %c3 {
       // The IVs are rewritten.
       // CHECK: %[[normalized_j:.*]] = remi_signed %[[i]], %[[numiter_j]]
       // CHECK: %[[normalized_i:.*]] = divi_signed %[[i]], %[[numiter_j]]
@@ -145,11 +145,11 @@
   // CHECK: %[[range:.*]] = muli %[[numiter1]], %[[numiter2]] : index
 
   // Check that the outer loop is updated.
-  // CHECK: loop.for %[[i:.*]] = %c0{{.*}} to %[[range]] step %c1
-  loop.for %i = %lb1 to %ub1 step %step1 {
+  // CHECK: scf.for %[[i:.*]] = %c0{{.*}} to %[[range]] step %c1
+  scf.for %i = %lb1 to %ub1 step %step1 {
     // Check that the inner loop is removed.
-    // CHECK-NOT: loop.for
-    loop.for %j = %lb2 to %ub2 step %step2 {
+    // CHECK-NOT: scf.for
+    scf.for %j = %lb2 to %ub2 step %step2 {
       // Remapping of the induction variables.
       // CHECK: %[[normalized_j:.*]] = remi_signed %[[i]], %[[numiter2]] : index
       // CHECK: %[[normalized_i:.*]] = divi_signed %[[i]], %[[numiter2]] : index
@@ -171,19 +171,19 @@
   %c1 = constant 1 : index
   %c10 = constant 10 : index
   // CHECK: %[[outer_range:.*]] = muli
-  // CHECK: loop.for %{{.*}} = %{{.*}} to %[[outer_range]]
-  loop.for %i = %c0 to %c10 step %c1 {
+  // CHECK: scf.for %{{.*}} = %{{.*}} to %[[outer_range]]
+  scf.for %i = %c0 to %c10 step %c1 {
     // Check that the "j" loop was removed and that the inner loops were
     // coalesced as well.  The preparation step for coalescing will inject the
     // subtraction operation unlike the IV remapping.
-    // CHECK-NOT: loop.for
+    // CHECK-NOT: scf.for
     // CHECK: subi
-    loop.for %j = %c0 to %c10 step %c1 {
+    scf.for %j = %c0 to %c10 step %c1 {
       // The inner pair of loops is coalesced separately.
-      // CHECK: loop.for
-      loop.for %k = %i to %j step %c1 {
-        // CHECK_NOT: loop.for
-        loop.for %l = %i to %j step %c1 {
+      // CHECK: scf.for
+      scf.for %k = %i to %j step %c1 {
+        // CHECK_NOT: scf.for
+        scf.for %l = %i to %j step %c1 {
           "foo"() : () -> ()
         }
       }
diff --git a/mlir/test/Transforms/loop-fusion-slice-computation.mlir b/mlir/test/Transforms/loop-fusion-slice-computation.mlir
--- a/mlir/test/Transforms/loop-fusion-slice-computation.mlir
+++ b/mlir/test/Transforms/loop-fusion-slice-computation.mlir
@@ -41,7 +41,7 @@
 
 // -----
 
-// Slices at loop depth 1 should only slice the loop bounds of the first loop.
+// Slices at loop depth 1 should only slice the loop bounds of the first scf.
 // Slices at loop depth 2 should slice loop bounds of both loops.
 // CHECK-LABEL: func @slice_depth2_loop_nest() {
 func @slice_depth2_loop_nest() {
@@ -121,7 +121,7 @@
 
 // -----
 
-// Test loop nest which has a smaller outer trip count than its inner loop.
+// Test loop nest which has a smaller outer trip count than its inner scf.
 // CHECK-LABEL: func @slice_loop_nest_with_smaller_outer_trip_count() {
 func @slice_loop_nest_with_smaller_outer_trip_count() {
   %0 = alloc() : memref<100x100xf32>
diff --git a/mlir/test/Transforms/loop-fusion.mlir b/mlir/test/Transforms/loop-fusion.mlir
--- a/mlir/test/Transforms/loop-fusion.mlir
+++ b/mlir/test/Transforms/loop-fusion.mlir
@@ -242,7 +242,7 @@
   }
 
   // Should fuse first loop into the second (last loop should not be fused).
-  // Should create private memref '%2' for fused loop.
+  // Should create private memref '%2' for fused scf.
   // CHECK:      affine.for %{{.*}} = 0 to 10 {
   // CHECK-NEXT:   affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
   // CHECK-NEXT:   affine.load %{{.*}}[0] : memref<1xf32>
diff --git a/mlir/test/Transforms/loop-invariant-code-motion.mlir b/mlir/test/Transforms/loop-invariant-code-motion.mlir
--- a/mlir/test/Transforms/loop-invariant-code-motion.mlir
+++ b/mlir/test/Transforms/loop-invariant-code-motion.mlir
@@ -228,8 +228,8 @@
   %m = alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
   %cf8 = constant 8.0 : f32
-  loop.for %arg0 = %ci0 to %ci10 step %ci1 {
-    loop.for %arg1 = %ci0 to %ci10 step %ci1 {
+  scf.for %arg0 = %ci0 to %ci10 step %ci1 {
+    scf.for %arg1 = %ci0 to %ci10 step %ci1 {
       %v0 = addf %cf7, %cf8 : f32
     }
   }
@@ -249,15 +249,15 @@
   %ci10 = constant 10 : index
   %ci1 = constant 1 : index
   %m = alloc() : memref<10xf32>
-  loop.for %arg0 = %ci0 to %ci10 step %ci1 {
-    loop.for %arg1 = %ci0 to %ci10 step %ci1 {
+  scf.for %arg0 = %ci0 to %ci10 step %ci1 {
+    scf.for %arg1 = %ci0 to %ci10 step %ci1 {
       %v0 = addi %arg0, %arg1 : index
     }
   }
 
   // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: loop.for
-  // CHECK-NEXT: loop.for
+  // CHECK-NEXT: scf.for
+  // CHECK-NEXT: scf.for
   // CHECK-NEXT: addi
 
   return
@@ -271,7 +271,7 @@
   %c1 = constant 1 : index
   %c7 = constant 7 : i32
   %c8 = constant 8 : i32
-  loop.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
+  scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
       %v0 = addi %c7, %c8 : i32
       %v3 = addi %arg0, %arg1 : index
   }
@@ -283,7 +283,7 @@
   // CHECK-NEXT: %c7_i32 = constant 7 : i32
   // CHECK-NEXT: %c8_i32 = constant 8 : i32
   // CHECK-NEXT: addi %c7_i32, %c8_i32 : i32
-  // CHECK-NEXT: loop.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1)
+  // CHECK-NEXT: scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1)
   // CHECK-NEXT:   addi %arg0, %arg1 : index
   // CHECK-NEXT:   yield
   // CHECK-NEXT: }
diff --git a/mlir/test/Transforms/memref-dependence-check.mlir b/mlir/test/Transforms/memref-dependence-check.mlir
--- a/mlir/test/Transforms/memref-dependence-check.mlir
+++ b/mlir/test/Transforms/memref-dependence-check.mlir
@@ -36,7 +36,7 @@
   %0 = alloc() : memref<10xf32>
   %cst = constant 7.000000e+00 : f32
   // There is a dependence from 0 to 1 at depth 1 (common surrounding loops 0)
-  // because the first loop with the store dominates the second loop.
+  // because the first loop with the store dominates the second scf.
   affine.for %i0 = 0 to 10 {
     affine.store %cst, %0[%i0] : memref<10xf32>
     // expected-remark@above {{dependence from 0 to 0 at depth 1 = false}}
@@ -332,7 +332,7 @@
     %a0 = affine.apply affine_map<(d0) -> (d0)> (%i0)
     // Dependence from 0 to 1 at depth 1 is a range because all loads at
     // constant index zero are reads after first store at index zero during
-    // first iteration of the loop.
+    // first iteration of the scf.
     affine.store %c7, %m[%a0] : memref<100xf32>
     // expected-remark@above {{dependence from 0 to 0 at depth 1 = false}}
     // expected-remark@above {{dependence from 0 to 0 at depth 2 = false}}
@@ -785,7 +785,7 @@
 
 // -----
 
-// Load and store ops access the same elements in strided loop.
+// Load and store ops access the same elements in strided scf.
 // CHECK-LABEL: func @strided_loop_with_dependence_at_depth2
 func @strided_loop_with_dependence_at_depth2() {
   %0 = alloc() : memref<10xf32>
diff --git a/mlir/test/Transforms/parallel-loop-collapsing.mlir b/mlir/test/Transforms/parallel-loop-collapsing.mlir
--- a/mlir/test/Transforms/parallel-loop-collapsing.mlir
+++ b/mlir/test/Transforms/parallel-loop-collapsing.mlir
@@ -17,7 +17,7 @@
   %c13 = constant 13 : index
   %c14 = constant 14 : index
 
-  loop.parallel (%i0, %i1, %i2, %i3, %i4) = (%c0, %c3, %c6, %c9, %c12) to (%c2, %c5, %c8, %c11, %c14)
+  scf.parallel (%i0, %i1, %i2, %i3, %i4) = (%c0, %c3, %c6, %c9, %c12) to (%c2, %c5, %c8, %c11, %c14)
                                           step (%c1, %c4, %c7, %c10, %c13) {
     %result = "magic.op"(%i0, %i1, %i2, %i3, %i4): (index, index, index, index, index) -> index
   }
@@ -35,7 +35,7 @@
 // CHECK:         [[C0:%.*]] = constant 0 : index
 // CHECK:         [[C1:%.*]] = constant 1 : index
 // CHECK:         [[C2:%.*]] = constant 2 : index
-// CHECK:         loop.parallel ([[NEW_I0:%.*]], [[NEW_I1:%.*]], [[NEW_I2:%.*]]) = ([[C0]], [[C0]], [[C0]]) to ([[C2]], [[C1]], [[C1]]) step ([[C1]], [[C1]], [[C1]]) {
+// CHECK:         scf.parallel ([[NEW_I0:%.*]], [[NEW_I1:%.*]], [[NEW_I2:%.*]]) = ([[C0]], [[C0]], [[C0]]) to ([[C2]], [[C1]], [[C1]]) step ([[C1]], [[C1]], [[C1]]) {
 // CHECK:           [[I0:%.*]] = remi_signed [[NEW_I0]], [[C2]] : index
 // CHECK:           [[VAL_16:%.*]] = muli [[NEW_I1]], [[C13]] : index
 // CHECK:           [[I4:%.*]] = addi [[VAL_16]], [[C12]] : index
@@ -44,6 +44,6 @@
 // CHECK:           [[VAL_20:%.*]] = muli [[NEW_I2]], [[C7]] : index
 // CHECK:           [[I2:%.*]] = addi [[VAL_20]], [[C6]] : index
 // CHECK:           "magic.op"([[I0]], [[C3]], [[I2]], [[I3]], [[I4]]) : (index, index, index, index, index) -> index
-// CHECK:           loop.yield
+// CHECK:           scf.yield
 // CHECK-NEXT:    }
 // CHECK-NEXT:    return
diff --git a/mlir/test/Transforms/parametric-mapping.mlir b/mlir/test/Transforms/parametric-mapping.mlir
--- a/mlir/test/Transforms/parametric-mapping.mlir
+++ b/mlir/test/Transforms/parametric-mapping.mlir
@@ -9,8 +9,8 @@
   // CHECK: %[[thread_offset:.*]] = muli %[[step]], %[[threads]]#0
   // CHECK: %[[new_lb:.*]] = addi %[[lb]], %[[thread_offset]]
   // CHECK: %[[new_step:.*]] = muli %[[step]], %[[threads]]#1
-  // CHECK: loop.for %{{.*}} = %[[new_lb]] to %[[ub]] step %[[new_step]] {
-  loop.for %i = %lb to %ub step %step {}
+  // CHECK: scf.for %{{.*}} = %[[new_lb]] to %[[ub]] step %[[new_step]] {
+  scf.for %i = %lb to %ub step %step {}
   return
 }
 
@@ -41,7 +41,7 @@
   // new_step = step * gridDim.x * blockDim.x
   // CHECK: %[[new_step:.*]] = muli %[[stepXgdimx]], %[[threads]]#1 : index
   //
-  // CHECK: loop.for %{{.*}} = %[[new_lb]] to %[[ub]] step %[[new_step]] {
-  loop.for %i = %lb to %ub step %step {}
+  // CHECK: scf.for %{{.*}} = %[[new_lb]] to %[[ub]] step %[[new_step]] {
+  scf.for %i = %lb to %ub step %step {}
   return
 }
diff --git a/mlir/test/Transforms/parametric-tiling.mlir b/mlir/test/Transforms/parametric-tiling.mlir
--- a/mlir/test/Transforms/parametric-tiling.mlir
+++ b/mlir/test/Transforms/parametric-tiling.mlir
@@ -28,16 +28,16 @@
   // TILE_74-NEXT: %[[diff2_adj:.*]] = addi %[[diff2]], %[[adjustment2]]
   // TILE_74-NEXT: %[[range2:.*]] = divi_signed %[[diff2_adj]], %c2
 
-  // Ceildiv to get the parametric tile size for the second original loop.
+  // Ceildiv to get the parametric tile size for the second original scf.
   // TILE_74:      %[[sum2:.*]] = addi %[[range2]], %c3
   // TILE_74-NEXT: %[[size2:.*]] = divi_signed %[[sum2]], %c4
   // New inner step (original is %c2).
   // TILE_74-NEXT:     %[[step2:.*]] = muli %c2, %[[size2]]
 
   // Updated outer loop(s) use new steps.
-  // COMMON: loop.for %[[i:.*]] = %c2 to %c44 step %[[step]]
-  // TILE_74:loop.for %[[j:.*]] = %c1 to %c44 step %[[step2]]
- loop.for %i = %c2 to %c44 step %c1 {
+  // COMMON: scf.for %[[i:.*]] = %c2 to %c44 step %[[step]]
+  // TILE_74:scf.for %[[j:.*]] = %c1 to %c44 step %[[step2]]
+ scf.for %i = %c2 to %c44 step %c1 {
     // Upper bound for the inner loop min(%i + %step, %c44).
     // COMMON:      %[[stepped:.*]] = addi %[[i]], %[[step]]
     // COMMON-NEXT: cmpi "slt", %c44, %[[stepped]]
@@ -47,15 +47,15 @@
     // TILE_74-NEXT: cmpi "slt", %c44, %[[stepped2]]
     // TILE_74-NEXT: %[[ub2:.*]] = select {{.*}}, %c44, %[[stepped2]]
 
-    // Created inner loop.
-    // COMMON:loop.for %[[ii:.*]] = %[[i]] to %[[ub:.*]] step %c1
+    // Created inner scf.
+    // COMMON:scf.for %[[ii:.*]] = %[[i]] to %[[ub:.*]] step %c1
 
     // This loop is not modified in TILE_7 case.
-    // TILE_7: loop.for %[[j:.*]] = %c1 to %c44 step %c2
+    // TILE_7: scf.for %[[j:.*]] = %c1 to %c44 step %c2
     //
     // But is modified in TILE_74 case.
-    // TILE_74:loop.for %[[jj:.*]] = %[[j]] to %[[ub2]] step %c2
-   loop.for %j = %c1 to %c44 step %c2 {
+    // TILE_74:scf.for %[[jj:.*]] = %[[j]] to %[[ub2]] step %c2
+   scf.for %j = %c1 to %c44 step %c2 {
       // The right iterator are used.
       // TILE_7:  load %arg0[%[[ii]], %[[j]]]
       // TILE_74: load %arg0[%[[ii]], %[[jj]]]
@@ -87,8 +87,8 @@
   // Constant adjustment for inner loop has been hoisted out.
   // TILE_74:      %[[adjustment2:.*]] = subi %c2, %c1_{{.*}}
 
-  // New outer loop.
-  // COMMON: loop.for %[[i:.*]] = %c2 to %c44 step %[[step]]
+  // New outer scf.
+  // COMMON: scf.for %[[i:.*]] = %c2 to %c44 step %[[step]]
 
   // Range of the original inner loop
   //   (upper - lower + step - 1) / step
@@ -97,15 +97,15 @@
   // TILE_74-NEXT: %[[diff2_adj:.*]] = addi %[[diff2]], %[[adjustment2]]
   // TILE_74-NEXT: %[[range2:.*]] = divi_signed %[[diff2_adj]], %c2
 
-  // Ceildiv to get the parametric tile size for the second original loop.
+  // Ceildiv to get the parametric tile size for the second original scf.
   // TILE_74:      %[[sum2:.*]] = addi %[[range2]], %c3
   // TILE_74-NEXT: %[[size2:.*]] = divi_signed %[[sum2]], %c4
   // New inner step (original is %c2).
   // TILE_74-NEXT:     %[[step2:.*]] = muli %c2, %[[size2]]
 
-  // New inner loop.
-  // TILE_74:loop.for %[[j:.*]] = %c1 to %[[i]] step %[[step2]]
- loop.for %i = %c2 to %c44 step %c1 {
+  // New inner scf.
+  // TILE_74:scf.for %[[j:.*]] = %c1 to %[[i]] step %[[step2]]
+ scf.for %i = %c2 to %c44 step %c1 {
     // Upper bound for the inner loop min(%i + %step, %c44).
     // COMMON:      %[[stepped:.*]] = addi %[[i]], %[[step]]
     // COMMON-NEXT: cmpi "slt", %c44, %[[stepped]]
@@ -114,15 +114,15 @@
     // TILE_74-NEXT: cmpi "slt", %[[i]], %[[stepped2]]
     // TILE_74-NEXT: %[[ub2:.*]] = select {{.*}}, %[[i]], %[[stepped2]]
     //
-    // Created inner loop.
-    // COMMON:loop.for %[[ii:.*]] = %[[i]] to %[[ub:.*]] step %c1
+    // Created inner scf.
+    // COMMON:scf.for %[[ii:.*]] = %[[i]] to %[[ub:.*]] step %c1
 
     // This loop is not modified in TILE_7 case.
-    // TILE_7: loop.for %[[j:.*]] = %c1 to %[[ii]] step %c2
+    // TILE_7: scf.for %[[j:.*]] = %c1 to %[[ii]] step %c2
     //
     // But is modified in TILE_74 case.
-    // TILE_74:loop.for %[[jj:.*]] = %[[j]] to %[[ub2]] step %c2
-   loop.for %j = %c1 to %i step %c2 {
+    // TILE_74:scf.for %[[jj:.*]] = %[[j]] to %[[ub2]] step %c2
+   scf.for %j = %c1 to %i step %c2 {
       // The right iterator are used.
       // TILE_7:  load %arg0[%[[ii]], %[[j]]]
       // TILE_74: load %arg0[%[[ii]], %[[jj]]]
diff --git a/mlir/test/Transforms/sccp-structured.mlir b/mlir/test/Transforms/sccp-structured.mlir
--- a/mlir/test/Transforms/sccp-structured.mlir
+++ b/mlir/test/Transforms/sccp-structured.mlir
@@ -5,15 +5,15 @@
 // CHECK-LABEL: func @simple(
 func @simple(%arg0 : i32) -> i32 {
   // CHECK: %[[CST:.*]] = constant 1 : i32
-  // CHECK-NOT: loop.if
+  // CHECK-NOT: scf.if
   // CHECK: return %[[CST]] : i32
 
   %cond = constant true
-  %res = loop.if %cond -> (i32) {
+  %res = scf.if %cond -> (i32) {
     %1 = constant 1 : i32
-    loop.yield %1 : i32
+    scf.yield %1 : i32
   } else {
-    loop.yield %arg0 : i32
+    scf.yield %arg0 : i32
   }
   return %res : i32
 }
@@ -24,15 +24,15 @@
 // CHECK-LABEL: func @simple_both_same(
 func @simple_both_same(%cond : i1) -> i32 {
   // CHECK: %[[CST:.*]] = constant 1 : i32
-  // CHECK-NOT: loop.if
+  // CHECK-NOT: scf.if
   // CHECK: return %[[CST]] : i32
 
-  %res = loop.if %cond -> (i32) {
+  %res = scf.if %cond -> (i32) {
     %1 = constant 1 : i32
-    loop.yield %1 : i32
+    scf.yield %1 : i32
   } else {
     %2 = constant 1 : i32
-    loop.yield %2 : i32
+    scf.yield %2 : i32
   }
   return %res : i32
 }
@@ -42,14 +42,14 @@
 
 // CHECK-LABEL: func @overdefined_unknown_condition(
 func @overdefined_unknown_condition(%cond : i1, %arg0 : i32) -> i32 {
-  // CHECK: %[[RES:.*]] = loop.if
+  // CHECK: %[[RES:.*]] = scf.if
   // CHECK: return %[[RES]] : i32
 
-  %res = loop.if %cond -> (i32) {
+  %res = scf.if %cond -> (i32) {
     %1 = constant 1 : i32
-    loop.yield %1 : i32
+    scf.yield %1 : i32
   } else {
-    loop.yield %arg0 : i32
+    scf.yield %arg0 : i32
   }
   return %res : i32
 }
@@ -59,15 +59,15 @@
 
 // CHECK-LABEL: func @overdefined_different_constants(
 func @overdefined_different_constants(%cond : i1) -> i32 {
-  // CHECK: %[[RES:.*]] = loop.if
+  // CHECK: %[[RES:.*]] = scf.if
   // CHECK: return %[[RES]] : i32
 
-  %res = loop.if %cond -> (i32) {
+  %res = scf.if %cond -> (i32) {
     %1 = constant 1 : i32
-    loop.yield %1 : i32
+    scf.yield %1 : i32
   } else {
     %2 = constant 2 : i32
-    loop.yield %2 : i32
+    scf.yield %2 : i32
   }
   return %res : i32
 }
@@ -77,13 +77,13 @@
 // CHECK-LABEL: func @simple_loop(
 func @simple_loop(%arg0 : index, %arg1 : index, %arg2 : index) -> i32 {
   // CHECK: %[[CST:.*]] = constant 0 : i32
-  // CHECK-NOT: loop.for
+  // CHECK-NOT: scf.for
   // CHECK: return %[[CST]] : i32
 
   %s0 = constant 0 : i32
-  %result = loop.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0) -> (i32) {
+  %result = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0) -> (i32) {
     %sn = addi %si, %si : i32
-    loop.yield %sn : i32
+    scf.yield %sn : i32
   }
   return %result : i32
 }
@@ -93,13 +93,13 @@
 
 // CHECK-LABEL: func @loop_overdefined(
 func @loop_overdefined(%arg0 : index, %arg1 : index, %arg2 : index) -> i32 {
-  // CHECK: %[[RES:.*]] = loop.for
+  // CHECK: %[[RES:.*]] = scf.for
   // CHECK: return %[[RES]] : i32
 
   %s0 = constant 1 : i32
-  %result = loop.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0) -> (i32) {
+  %result = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %s0) -> (i32) {
     %sn = addi %si, %si : i32
-    loop.yield %sn : i32
+    scf.yield %sn : i32
   }
   return %result : i32
 }
@@ -111,22 +111,22 @@
 // CHECK-LABEL: func @loop_inner_control_flow(
 func @loop_inner_control_flow(%arg0 : index, %arg1 : index, %arg2 : index) -> i32 {
   // CHECK: %[[CST:.*]] = constant 1 : i32
-  // CHECK-NOT: loop.for
-  // CHECK-NOT: loop.if
+  // CHECK-NOT: scf.for
+  // CHECK-NOT: scf.if
   // CHECK: return %[[CST]] : i32
 
   %cst_1 = constant 1 : i32
-  %result = loop.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %cst_1) -> (i32) {
+  %result = scf.for %i0 = %arg0 to %arg1 step %arg2 iter_args(%si = %cst_1) -> (i32) {
     %cst_20 = constant 20 : i32
     %cond = cmpi "ult", %si, %cst_20 : i32
-    %inner_res = loop.if %cond -> (i32) {
+    %inner_res = scf.if %cond -> (i32) {
       %1 = constant 1 : i32
-      loop.yield %1 : i32
+      scf.yield %1 : i32
     } else {
       %si_inc = addi %si, %cst_1 : i32
-      loop.yield %si_inc : i32
+      scf.yield %si_inc : i32
     }
-    loop.yield %inner_res : i32
+    scf.yield %inner_res : i32
   }
   return %result : i32
 }
diff --git a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
--- a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
+++ b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir
@@ -7,7 +7,7 @@
   %c3 = constant 29 : index
   %c4 = constant 3 : index
   %c5 = constant 4 : index
-  loop.parallel (%i0, %i1) = (%c0, %c1) to (%c2, %c3) step (%c4, %c5) {
+  scf.parallel (%i0, %i1) = (%c0, %c1) to (%c2, %c3) step (%c4, %c5) {
     %result = "magic.op"(%i0, %i1): (index, index) -> index
   }
   return
@@ -21,7 +21,7 @@
 // CHECK:         [[C6:%.*]] = constant 6 : index
 // CHECK:         [[C0:%.*]] = constant 0 : index
 // CHECK:         [[C1:%.*]] = constant 1 : index
-// CHECK:         loop.parallel ([[NEW_I:%.*]]) = ([[C0]]) to ([[C18]]) step ([[C1]]) {
+// CHECK:         scf.parallel ([[NEW_I:%.*]]) = ([[C0]]) to ([[C18]]) step ([[C1]]) {
 // CHECK:           [[I0_COUNT:%.*]] = remi_signed [[NEW_I]], [[C3]] : index
 // CHECK:           [[I1_COUNT:%.*]] = divi_signed [[NEW_I]], [[C6]] : index
 // CHECK:           [[VAL_10:%.*]] = muli [[I1_COUNT]], [[C4]] : index
@@ -29,6 +29,6 @@
 // CHECK:           [[VAL_12:%.*]] = muli [[I0_COUNT]], [[C3]] : index
 // CHECK:           [[I0:%.*]] = addi [[VAL_12]], [[C3]] : index
 // CHECK:           "magic.op"([[I0]], [[I1]]) : (index, index) -> index
-// CHECK:           loop.yield
+// CHECK:           scf.yield
 // CHECK-NEXT:    }
 // CHECK-NEXT:    return
diff --git a/mlir/test/lib/Transforms/TestLoopMapping.cpp b/mlir/test/lib/Transforms/TestLoopMapping.cpp
--- a/mlir/test/lib/Transforms/TestLoopMapping.cpp
+++ b/mlir/test/lib/Transforms/TestLoopMapping.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements a pass to parametrically map loop.for loops to virtual
+// This file implements a pass to parametrically map scf.for loops to virtual
 // processing element dimensions.
 //
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir b/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir
--- a/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir
+++ b/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir
@@ -11,7 +11,7 @@
   %c1 = constant 1 : index
   %cst = constant 1.000000e+00 : f32
   %cst_0 = constant 2.000000e+00 : f32
-  loop.for %arg2 = %c0 to %c2 step %c1 {
+  scf.for %arg2 = %c0 to %c2 step %c1 {
     %0 = load %arg0[%arg2] : memref<2xf32>
     %1 = addf %0, %cst : f32
     store %1, %arg0[%arg2] : memref<2xf32>
@@ -46,7 +46,7 @@
 //  %cst_0 = constant 2.000000e+00 : f32
 //  %a = alloc() : memref<2xf32>
 //  %b = alloc() : memref<2xf32>
-//  loop.for %i = %c0 to %c2 step %c1 {
+//  scf.for %i = %c0 to %c2 step %c1 {
 //    store %cst, %a[%i] : memref<2xf32>
 //    store %cst, %b[%i] : memref<2xf32>
 //  }
diff --git a/mlir/test/mlir-opt/commandline.mlir b/mlir/test/mlir-opt/commandline.mlir
--- a/mlir/test/mlir-opt/commandline.mlir
+++ b/mlir/test/mlir-opt/commandline.mlir
@@ -4,11 +4,11 @@
 // CHECK: gpu
 // CHECK: linalg
 // CHECK: llvm
-// CHECK: loop
 // CHECK: nvvm
 // CHECK: omp
 // CHECK: quant
 // CHECK: rocdl
+// CHECK: scf
 // CHECK: sdbm
 // CHECK: spv
 // CHECK: std