diff --git a/mlir/docs/BufferDeallocationInternals.md b/mlir/docs/BufferDeallocationInternals.md
--- a/mlir/docs/BufferDeallocationInternals.md
+++ b/mlir/docs/BufferDeallocationInternals.md
@@ -779,8 +779,8 @@
 ## Known Limitations
 
 BufferDeallocation introduces additional copies using allocations from the
-“std” dialect (“std.alloc”). Analogous, all deallocations use the “std”
-dialect-free operation “std.dealloc”. The actual copy process is realized using
-“linalg.copy”. Furthermore, buffers are essentially immutable after their
-creation in a block. Another limitations are known in the case using
-unstructered control flow.
+“memref” dialect (“memref.alloc”). Analogous, all deallocations use the
+“memref” dialect-free operation “memref.dealloc”. The actual copy process is
+realized using “linalg.copy”. Furthermore, buffers are essentially immutable
+after their creation in a block. Another limitations are known in the case
+using unstructered control flow.
diff --git a/mlir/docs/Bufferization.md b/mlir/docs/Bufferization.md
--- a/mlir/docs/Bufferization.md
+++ b/mlir/docs/Bufferization.md
@@ -190,8 +190,8 @@
 `BufferizeTypeConverter`, which comes pre-loaded with the necessary conversions
 and materializations between `tensor` and `memref`.
 
-In this case, the `StandardOpsDialect` is marked as legal, so the `tensor_load`
-and `tensor_to_memref` ops, which are inserted automatically by the dialect
+In this case, the `MemRefOpsDialect` is marked as legal, so the `tensor_load`
+and `buffer_cast` ops, which are inserted automatically by the dialect
 conversion framework as materializations, are legal. There is a helper
 `populateBufferizeMaterializationLegality`
 ([code](https://github.com/llvm/llvm-project/blob/a0b65a7bcd6065688189b3d678c42ed6af9603db/mlir/include/mlir/Transforms/Bufferize.h#L53))
@@ -247,7 +247,7 @@
 
 The easiest way to write a finalizing bufferize pass is to not write one at all!
 MLIR provides a pass `finalizing-bufferize` which eliminates the `tensor_load` /
-`tensor_to_memref` materialization ops inserted by partial bufferization passes
+`buffer_cast` materialization ops inserted by partial bufferization passes
 and emits an error if that is not sufficient to remove all tensors from the
 program.
 
@@ -268,7 +268,7 @@
 `populateEliminateBufferizeMaterializationsPatterns`
 ([code](https://github.com/llvm/llvm-project/blob/a0b65a7bcd6065688189b3d678c42ed6af9603db/mlir/include/mlir/Transforms/Bufferize.h#L58))
 is available for such passes to provide patterns that eliminate `tensor_load`
-and `tensor_to_memref`.
+and `buffer_cast`.
 
 ## Changes since [the talk](#the-talk)
 
diff --git a/mlir/docs/Dialects/Linalg.md b/mlir/docs/Dialects/Linalg.md
--- a/mlir/docs/Dialects/Linalg.md
+++ b/mlir/docs/Dialects/Linalg.md
@@ -406,9 +406,9 @@
 #map0 = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
 
 func @example(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
-  %0 = memref_cast %arg0 : memref<?x?xf32> to memref<?x?xf32, #map0>
-  %1 = memref_cast %arg1 : memref<?x?xf32> to memref<?x?xf32, #map0>
-  %2 = memref_cast %arg2 : memref<?x?xf32> to memref<?x?xf32, #map0>
+  %0 = memref.cast %arg0 : memref<?x?xf32> to memref<?x?xf32, #map0>
+  %1 = memref.cast %arg1 : memref<?x?xf32> to memref<?x?xf32, #map0>
+  %2 = memref.cast %arg2 : memref<?x?xf32> to memref<?x?xf32, #map0>
   call @pointwise_add(%0, %1, %2) : (memref<?x?xf32, #map0>, memref<?x?xf32, #map0>, memref<?x?xf32, #map0>) -> ()
   return
 }
@@ -518,9 +518,9 @@
 generally alias the operand `view`. At the moment the existing ops are:
 
 ```
-* `std.view`,
+* `memref.view`,
 * `std.subview`,
-* `std.transpose`.
+* `memref.transpose`.
 * `linalg.range`,
 * `linalg.slice`,
 * `linalg.reshape`,
diff --git a/mlir/docs/Dialects/Standard.md b/mlir/docs/Dialects/MemRef.md
copy from mlir/docs/Dialects/Standard.md
copy to mlir/docs/Dialects/MemRef.md
--- a/mlir/docs/Dialects/Standard.md
+++ b/mlir/docs/Dialects/MemRef.md
@@ -1,9 +1,6 @@
-# 'std' Dialect
+# 'memref' Dialect
 
-This dialect provides documentation for operations within the Standard dialect.
-
-Note: This dialect is a collection of operations for several different concepts,
-and should be split into multiple more-focused dialects accordingly.
+This dialect provides documentation for operations within the MemRef dialect.
 
 **Please post an RFC on the [forum](https://llvm.discourse.group/c/mlir/31)
 before adding or changing any operation in this dialect.**
@@ -12,7 +9,7 @@
 
 ## Operations
 
-[include "Dialects/StandardOps.md"]
+[include "Dialects/MemRefOps.md"]
 
 ### 'dma_start' operation
 
diff --git a/mlir/docs/Dialects/Standard.md b/mlir/docs/Dialects/Standard.md
--- a/mlir/docs/Dialects/Standard.md
+++ b/mlir/docs/Dialects/Standard.md
@@ -13,67 +13,3 @@
 ## Operations
 
 [include "Dialects/StandardOps.md"]
-
-### 'dma_start' operation
-
-Syntax:
-
-```
-operation ::= `dma_start` ssa-use`[`ssa-use-list`]` `,`
-               ssa-use`[`ssa-use-list`]` `,` ssa-use `,`
-               ssa-use`[`ssa-use-list`]` (`,` ssa-use `,` ssa-use)?
-              `:` memref-type `,` memref-type `,` memref-type
-```
-
-Starts a non-blocking DMA operation that transfers data from a source memref to
-a destination memref. The operands include the source and destination memref's
-each followed by its indices, size of the data transfer in terms of the number
-of elements (of the elemental type of the memref), a tag memref with its
-indices, and optionally two additional arguments corresponding to the stride (in
-terms of number of elements) and the number of elements to transfer per stride.
-The tag location is used by a dma_wait operation to check for completion. The
-indices of the source memref, destination memref, and the tag memref have the
-same restrictions as any load/store operation in an affine context (whenever DMA
-operations appear in an affine context). See
-[restrictions on dimensions and symbols](Affine.md#restrictions-on-dimensions-and-symbols)
-in affine contexts. This allows powerful static analysis and transformations in
-the presence of such DMAs including rescheduling, pipelining / overlap with
-computation, and checking for matching start/end operations. The source and
-destination memref need not be of the same dimensionality, but need to have the
-same elemental type.
-
-For example, a `dma_start` operation that transfers 32 vector elements from a
-memref `%src` at location `[%i, %j]` to memref `%dst` at `[%k, %l]` would be
-specified as shown below.
-
-Example:
-
-```mlir
-%size = constant 32 : index
-%tag = alloc() : memref<1 x i32, affine_map<(d0) -> (d0)>, 4>
-%idx = constant 0 : index
-dma_start %src[%i, %j], %dst[%k, %l], %size, %tag[%idx] :
-     memref<40 x 8 x vector<16xf32>, affine_map<(d0, d1) -> (d0, d1)>, 0>,
-     memref<2 x 4 x vector<16xf32>, affine_map<(d0, d1) -> (d0, d1)>, 2>,
-     memref<1 x i32>, affine_map<(d0) -> (d0)>, 4>
-```
-
-### 'dma_wait' operation
-
-Syntax:
-
-```
-operation ::= `dma_wait` ssa-use`[`ssa-use-list`]` `,` ssa-use `:` memref-type
-```
-
-Blocks until the completion of a DMA operation associated with the tag element
-specified with a tag memref and its indices. The operands include the tag memref
-followed by its indices and the number of elements associated with the DMA being
-waited on. The indices of the tag memref have the same restrictions as
-load/store indices.
-
-Example:
-
-```mlir
-dma_wait %tag[%idx], %size : memref<1 x i32, affine_map<(d0) -> (d0)>, 4>
-```
diff --git a/mlir/docs/Rationale/UsageOfConst.md b/mlir/docs/Rationale/UsageOfConst.md
--- a/mlir/docs/Rationale/UsageOfConst.md
+++ b/mlir/docs/Rationale/UsageOfConst.md
@@ -200,7 +200,7 @@
 ### The `OpPointer` and `ConstOpPointer` Classes
 
 The "typed operation" classes for registered operations (e.g. like `DimOp` for
-the "std.dim" operation in standard ops) contain a pointer to an operation and
+the "memref.dim" operation in memref ops) contain a pointer to an operation and
 provide typed APIs for processing it.
 
 However, this is a problem for our current `const` design - `const DimOp` means
diff --git a/mlir/docs/Traits.md b/mlir/docs/Traits.md
--- a/mlir/docs/Traits.md
+++ b/mlir/docs/Traits.md
@@ -211,7 +211,7 @@
 This trait is carried by region holding operations that define a new scope for
 automatic allocation. Such allocations are automatically freed when control is
 transferred back from the regions of such operations. As an example, allocations
-performed by [`std.alloca`](Dialects/Standard.md#stdalloca-allocaop) are
+performed by [`memref.alloca`](Dialects/MemRef.md#memrefalloca-allocaop) are
 automatically freed when control leaves the region of its closest surrounding op
 that has the trait AutomaticAllocationScope.
 
diff --git a/mlir/docs/Tutorials/Toy/Ch-5.md b/mlir/docs/Tutorials/Toy/Ch-5.md
--- a/mlir/docs/Tutorials/Toy/Ch-5.md
+++ b/mlir/docs/Tutorials/Toy/Ch-5.md
@@ -50,8 +50,9 @@
 ## Conversion Target
 
 For our purposes, we want to convert the compute-intensive `Toy` operations into
-a combination of operations from the `Affine` `Standard` dialects for further
-optimization. To start off the lowering, we first define our conversion target:
+a combination of operations from the `Affine`, `MemRef` and `Standard` dialects
+for further optimization. To start off the lowering, we first define our
+conversion target:
 
 ```c++
 void ToyToAffineLoweringPass::runOnFunction() {
@@ -61,8 +62,9 @@
 
   // We define the specific operations, or dialects, that are legal targets for
   // this lowering. In our case, we are lowering to a combination of the
-  // `Affine` and `Standard` dialects.
-  target.addLegalDialect<mlir::AffineDialect, mlir::StandardOpsDialect>();
+  // `Affine`, `MemRef` and `Standard` dialects.
+  target.addLegalDialect<mlir::AffineDialect, mlir::memref::MemRefDialect,
+                         mlir::StandardOpsDialect>();
 
   // We also define the Toy dialect as Illegal so that the conversion will fail
   // if any of these operations are *not* converted. Given that we actually want
diff --git a/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp b/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp
--- a/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp
+++ b/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp
@@ -7,8 +7,8 @@
 //===----------------------------------------------------------------------===//
 //
 // This file implements a partial lowering of Toy operations to a combination of
-// affine loops and standard operations. This lowering expects that all calls
-// have been inlined, and all shapes have been resolved.
+// affine loops, memref operations and standard operations. This lowering
+// expects that all calls have been inlined, and all shapes have been resolved.
 //
 //===----------------------------------------------------------------------===//
 
@@ -16,6 +16,7 @@
 #include "toy/Passes.h"
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/DialectConversion.h"
@@ -36,7 +37,7 @@
 /// Insert an allocation and deallocation for the given MemRefType.
 static Value insertAllocAndDealloc(MemRefType type, Location loc,
                                    PatternRewriter &rewriter) {
-  auto alloc = rewriter.create<AllocOp>(loc, type);
+  auto alloc = rewriter.create<memref::AllocOp>(loc, type);
 
   // Make sure to allocate at the beginning of the block.
   auto *parentBlock = alloc->getBlock();
@@ -44,7 +45,7 @@
 
   // Make sure to deallocate this alloc at the end of the block. This is fine
   // as toy functions have no control flow.
-  auto dealloc = rewriter.create<DeallocOp>(loc, alloc);
+  auto dealloc = rewriter.create<memref::DeallocOp>(loc, alloc);
   dealloc->moveBefore(&parentBlock->back());
   return alloc;
 }
@@ -152,8 +153,8 @@
 
     if (!valueShape.empty()) {
       for (auto i : llvm::seq<int64_t>(
-              0, *std::max_element(valueShape.begin(), valueShape.end())))
-       constantIndices.push_back(rewriter.create<ConstantIndexOp>(loc, i));
+               0, *std::max_element(valueShape.begin(), valueShape.end())))
+        constantIndices.push_back(rewriter.create<ConstantIndexOp>(loc, i));
     } else {
       // This is the case of a tensor of rank 0.
       constantIndices.push_back(rewriter.create<ConstantIndexOp>(loc, 0));
@@ -257,7 +258,7 @@
 struct ToyToAffineLoweringPass
     : public PassWrapper<ToyToAffineLoweringPass, FunctionPass> {
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<AffineDialect, StandardOpsDialect>();
+    registry.insert<AffineDialect, memref::MemRefDialect, StandardOpsDialect>();
   }
   void runOnFunction() final;
 };
@@ -283,8 +284,9 @@
 
   // We define the specific operations, or dialects, that are legal targets for
   // this lowering. In our case, we are lowering to a combination of the
-  // `Affine` and `Standard` dialects.
-  target.addLegalDialect<AffineDialect, StandardOpsDialect>();
+  // `Affine`, `MemRef` and `Standard` dialects.
+  target.addLegalDialect<AffineDialect, memref::MemRefDialect,
+                         StandardOpsDialect>();
 
   // We also define the Toy dialect as Illegal so that the conversion will fail
   // if any of these operations are *not* converted. Given that we actually want
diff --git a/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp b/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp
--- a/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp
+++ b/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp
@@ -7,8 +7,8 @@
 //===----------------------------------------------------------------------===//
 //
 // This file implements a partial lowering of Toy operations to a combination of
-// affine loops and standard operations. This lowering expects that all calls
-// have been inlined, and all shapes have been resolved.
+// affine loops, memref operations and standard operations. This lowering
+// expects that all calls have been inlined, and all shapes have been resolved.
 //
 //===----------------------------------------------------------------------===//
 
@@ -16,6 +16,7 @@
 #include "toy/Passes.h"
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/DialectConversion.h"
@@ -36,7 +37,7 @@
 /// Insert an allocation and deallocation for the given MemRefType.
 static Value insertAllocAndDealloc(MemRefType type, Location loc,
                                    PatternRewriter &rewriter) {
-  auto alloc = rewriter.create<AllocOp>(loc, type);
+  auto alloc = rewriter.create<memref::AllocOp>(loc, type);
 
   // Make sure to allocate at the beginning of the block.
   auto *parentBlock = alloc->getBlock();
@@ -44,7 +45,7 @@
 
   // Make sure to deallocate this alloc at the end of the block. This is fine
   // as toy functions have no control flow.
-  auto dealloc = rewriter.create<DeallocOp>(loc, alloc);
+  auto dealloc = rewriter.create<memref::DeallocOp>(loc, alloc);
   dealloc->moveBefore(&parentBlock->back());
   return alloc;
 }
@@ -152,8 +153,8 @@
 
     if (!valueShape.empty()) {
       for (auto i : llvm::seq<int64_t>(
-              0, *std::max_element(valueShape.begin(), valueShape.end())))
-       constantIndices.push_back(rewriter.create<ConstantIndexOp>(loc, i));
+               0, *std::max_element(valueShape.begin(), valueShape.end())))
+        constantIndices.push_back(rewriter.create<ConstantIndexOp>(loc, i));
     } else {
       // This is the case of a tensor of rank 0.
       constantIndices.push_back(rewriter.create<ConstantIndexOp>(loc, 0));
@@ -256,7 +257,7 @@
 struct ToyToAffineLoweringPass
     : public PassWrapper<ToyToAffineLoweringPass, FunctionPass> {
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<AffineDialect, StandardOpsDialect>();
+    registry.insert<AffineDialect, memref::MemRefDialect, StandardOpsDialect>();
   }
   void runOnFunction() final;
 };
@@ -282,8 +283,9 @@
 
   // We define the specific operations, or dialects, that are legal targets for
   // this lowering. In our case, we are lowering to a combination of the
-  // `Affine` and `Standard` dialects.
-  target.addLegalDialect<AffineDialect, StandardOpsDialect>();
+  // `Affine`, `MemRef` and `Standard` dialects.
+  target.addLegalDialect<AffineDialect, memref::MemRefDialect,
+                         StandardOpsDialect>();
 
   // We also define the Toy dialect as Illegal so that the conversion will fail
   // if any of these operations are *not* converted. Given that we actually want
diff --git a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp
--- a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp
+++ b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp
@@ -30,6 +30,7 @@
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Pass/Pass.h"
@@ -91,7 +92,8 @@
 
     // Generate a call to printf for the current element of the loop.
     auto printOp = cast<toy::PrintOp>(op);
-    auto elementLoad = rewriter.create<LoadOp>(loc, printOp.input(), loopIvs);
+    auto elementLoad =
+        rewriter.create<memref::LoadOp>(loc, printOp.input(), loopIvs);
     rewriter.create<CallOp>(loc, printfRef, rewriter.getIntegerType(32),
                             ArrayRef<Value>({formatSpecifierCst, elementLoad}));
 
diff --git a/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp b/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp
--- a/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp
+++ b/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp
@@ -7,8 +7,8 @@
 //===----------------------------------------------------------------------===//
 //
 // This file implements a partial lowering of Toy operations to a combination of
-// affine loops and standard operations. This lowering expects that all calls
-// have been inlined, and all shapes have been resolved.
+// affine loops, memref operations and standard operations. This lowering
+// expects that all calls have been inlined, and all shapes have been resolved.
 //
 //===----------------------------------------------------------------------===//
 
@@ -16,6 +16,7 @@
 #include "toy/Passes.h"
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/DialectConversion.h"
@@ -36,7 +37,7 @@
 /// Insert an allocation and deallocation for the given MemRefType.
 static Value insertAllocAndDealloc(MemRefType type, Location loc,
                                    PatternRewriter &rewriter) {
-  auto alloc = rewriter.create<AllocOp>(loc, type);
+  auto alloc = rewriter.create<memref::AllocOp>(loc, type);
 
   // Make sure to allocate at the beginning of the block.
   auto *parentBlock = alloc->getBlock();
@@ -44,7 +45,7 @@
 
   // Make sure to deallocate this alloc at the end of the block. This is fine
   // as toy functions have no control flow.
-  auto dealloc = rewriter.create<DeallocOp>(loc, alloc);
+  auto dealloc = rewriter.create<memref::DeallocOp>(loc, alloc);
   dealloc->moveBefore(&parentBlock->back());
   return alloc;
 }
@@ -152,8 +153,8 @@
 
     if (!valueShape.empty()) {
       for (auto i : llvm::seq<int64_t>(
-              0, *std::max_element(valueShape.begin(), valueShape.end())))
-       constantIndices.push_back(rewriter.create<ConstantIndexOp>(loc, i));
+               0, *std::max_element(valueShape.begin(), valueShape.end())))
+        constantIndices.push_back(rewriter.create<ConstantIndexOp>(loc, i));
     } else {
       // This is the case of a tensor of rank 0.
       constantIndices.push_back(rewriter.create<ConstantIndexOp>(loc, 0));
@@ -257,7 +258,7 @@
 struct ToyToAffineLoweringPass
     : public PassWrapper<ToyToAffineLoweringPass, FunctionPass> {
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<AffineDialect, StandardOpsDialect>();
+    registry.insert<AffineDialect, memref::MemRefDialect, StandardOpsDialect>();
   }
   void runOnFunction() final;
 };
@@ -283,8 +284,9 @@
 
   // We define the specific operations, or dialects, that are legal targets for
   // this lowering. In our case, we are lowering to a combination of the
-  // `Affine` and `Standard` dialects.
-  target.addLegalDialect<AffineDialect, StandardOpsDialect>();
+  // `Affine`, `MemRef` and `Standard` dialects.
+  target.addLegalDialect<AffineDialect, memref::MemRefDialect,
+                         StandardOpsDialect>();
 
   // We also define the Toy dialect as Illegal so that the conversion will fail
   // if any of these operations are *not* converted. Given that we actually want
diff --git a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp
--- a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp
+++ b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp
@@ -30,6 +30,7 @@
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Pass/Pass.h"
@@ -91,7 +92,8 @@
 
     // Generate a call to printf for the current element of the loop.
     auto printOp = cast<toy::PrintOp>(op);
-    auto elementLoad = rewriter.create<LoadOp>(loc, printOp.input(), loopIvs);
+    auto elementLoad =
+        rewriter.create<memref::LoadOp>(loc, printOp.input(), loopIvs);
     rewriter.create<CallOp>(loc, printfRef, rewriter.getIntegerType(32),
                             ArrayRef<Value>({formatSpecifierCst, elementLoad}));
 
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -121,7 +121,7 @@
 def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> {
   let summary = "Generate NVVM operations for gpu operations";
   let constructor = "mlir::createLowerGpuOpsToNVVMOpsPass()";
-  let dependentDialects = ["NVVM::NVVMDialect"];
+  let dependentDialects = ["NVVM::NVVMDialect", "memref::MemRefDialect"];
   let options = [
     Option<"indexBitwidth", "index-bitwidth", "unsigned",
            /*default=kDeriveIndexBitwidthFromDataLayout*/"0",
@@ -210,7 +210,7 @@
   let summary = "Convert the operations from the linalg dialect into the "
                 "Standard dialect";
   let constructor = "mlir::createConvertLinalgToStandardPass()";
-  let dependentDialects = ["StandardOpsDialect"];
+  let dependentDialects = ["memref::MemRefDialect", "StandardOpsDialect"];
 }
 
 //===----------------------------------------------------------------------===//
@@ -316,7 +316,11 @@
   let summary = "Convert operations from the shape dialect into the standard "
                 "dialect";
   let constructor = "mlir::createConvertShapeToStandardPass()";
-  let dependentDialects = ["StandardOpsDialect", "scf::SCFDialect"];
+  let dependentDialects = [
+    "memref::MemRefDialect",
+    "StandardOpsDialect",
+    "scf::SCFDialect"
+  ];
 }
 
 def ConvertShapeConstraints: Pass<"convert-shape-constraints", "FuncOp"> {
@@ -474,7 +478,11 @@
   let summary = "Lower the operations from the vector dialect into the SCF "
                 "dialect";
   let constructor = "mlir::createConvertVectorToSCFPass()";
-  let dependentDialects = ["AffineDialect", "scf::SCFDialect"];
+  let dependentDialects = [
+    "AffineDialect",
+    "memref::MemRefDialect",
+    "scf::SCFDialect"
+  ];
   let options = [
     Option<"fullUnroll", "full-unroll", "bool", /*default=*/"false",
            "Perform full unrolling when converting vector transfers to SCF">,
diff --git a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h
--- a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h
+++ b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h
@@ -72,7 +72,8 @@
 
 /// Creates a pass to convert the Standard dialect into the LLVMIR dialect.
 /// stdlib malloc/free is used by default for allocating memrefs allocated with
-/// std.alloc, while LLVM's alloca is used for those allocated with std.alloca.
+/// memref.alloc, while LLVM's alloca is used for those allocated with
+/// memref.alloca.
 std::unique_ptr<OperationPass<ModuleOp>>
 createLowerToLLVMPass(const LowerToLLVMOptions &options =
                           LowerToLLVMOptions::getDefaultOptions());
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -18,6 +18,7 @@
 def AffineDataCopyGeneration : FunctionPass<"affine-data-copy-generate"> {
   let summary = "Generate explicit copying for affine memory operations";
   let constructor = "mlir::createAffineDataCopyGenerationPass()";
+  let dependentDialects = ["memref::MemRefDialect"];
   let options = [
     Option<"fastMemoryCapacity", "fast-mem-capacity", "uint64_t",
            /*default=*/"std::numeric_limits<uint64_t>::max()",
diff --git a/mlir/include/mlir/Dialect/CMakeLists.txt b/mlir/include/mlir/Dialect/CMakeLists.txt
--- a/mlir/include/mlir/Dialect/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/CMakeLists.txt
@@ -9,6 +9,7 @@
 add_subdirectory(Math)
 add_subdirectory(Linalg)
 add_subdirectory(LLVMIR)
+add_subdirectory(MemRef)
 add_subdirectory(OpenACC)
 add_subdirectory(OpenMP)
 add_subdirectory(PDL)
diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -480,7 +480,7 @@
          %num_bx : index, %num_by : index, %num_bz : index,
          %num_tx : index, %num_ty : index, %num_tz : index)
       "some_op"(%bx, %tx) : (index, index) -> ()
-      %3 = "std.load"(%val1, %bx) : (memref<?xf32, 1>, index) -> f32
+      %3 = "memref.load"(%val1, %bx) : (memref<?xf32, 1>, index) -> f32
     }
     ```
 
@@ -812,7 +812,7 @@
   let summary = "GPU memory allocation operation.";
   let description = [{
     The `gpu.alloc` operation allocates a region of memory on the GPU. It is
-    similar to the `std.alloc` op, but supports asynchronous GPU execution.
+    similar to the `memref.alloc` op, but supports asynchronous GPU execution.
 
     The op does not execute before all async dependencies have finished
     executing.
@@ -850,7 +850,7 @@
   let description = [{
     The `gpu.dealloc` operation frees the region of memory referenced by a
     memref which was originally created by the `gpu.alloc` operation. It is
-    similar to the `std.dealloc` op, but supports asynchronous GPU execution.
+    similar to the `memref.dealloc` op, but supports asynchronous GPU execution.
 
     The op does not execute before all async dependencies have finished
     executing.
diff --git a/mlir/include/mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h b/mlir/include/mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h
--- a/mlir/include/mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h
+++ b/mlir/include/mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h
@@ -11,6 +11,7 @@
 #include "mlir/Dialect/Linalg/EDSC/Builders.h"
 #include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
 #include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 
 #include "mlir/Transforms/FoldUtils.h"
@@ -35,30 +36,25 @@
 };
 
 using folded_math_tanh = FoldedValueBuilder<math::TanhOp>;
-using folded_std_constant_index = FoldedValueBuilder<ConstantIndexOp>;
-using folded_std_constant_float = FoldedValueBuilder<ConstantFloatOp>;
-using folded_std_constant_int = FoldedValueBuilder<ConstantIntOp>;
-using folded_std_constant = FoldedValueBuilder<ConstantOp>;
-using folded_std_dim = FoldedValueBuilder<DimOp>;
+using folded_memref_alloc = FoldedValueBuilder<memref::AllocOp>;
+using folded_memref_cast = FoldedValueBuilder<memref::CastOp>;
+using folded_memref_dim = FoldedValueBuilder<memref::DimOp>;
+using folded_memref_load = FoldedValueBuilder<memref::LoadOp>;
+using folded_memref_sub_view = FoldedValueBuilder<memref::SubViewOp>;
+using folded_memref_tensor_load = FoldedValueBuilder<memref::TensorLoadOp>;
+using folded_memref_view = FoldedValueBuilder<memref::ViewOp>;
 using folded_std_muli = FoldedValueBuilder<MulIOp>;
 using folded_std_addi = FoldedValueBuilder<AddIOp>;
 using folded_std_addf = FoldedValueBuilder<AddFOp>;
-using folded_std_alloc = FoldedValueBuilder<AllocOp>;
 using folded_std_constant = FoldedValueBuilder<ConstantOp>;
 using folded_std_constant_float = FoldedValueBuilder<ConstantFloatOp>;
 using folded_std_constant_index = FoldedValueBuilder<ConstantIndexOp>;
 using folded_std_constant_int = FoldedValueBuilder<ConstantIntOp>;
-using folded_std_dim = FoldedValueBuilder<DimOp>;
 using folded_std_index_cast = FoldedValueBuilder<IndexCastOp>;
 using folded_std_muli = FoldedValueBuilder<MulIOp>;
 using folded_std_mulf = FoldedValueBuilder<MulFOp>;
-using folded_std_memref_cast = FoldedValueBuilder<MemRefCastOp>;
 using folded_std_select = FoldedValueBuilder<SelectOp>;
-using folded_std_load = FoldedValueBuilder<LoadOp>;
 using folded_std_subi = FoldedValueBuilder<SubIOp>;
-using folded_std_sub_view = FoldedValueBuilder<SubViewOp>;
-using folded_std_tensor_load = FoldedValueBuilder<TensorLoadOp>;
-using folded_std_view = FoldedValueBuilder<ViewOp>;
 using folded_std_zero_extendi = FoldedValueBuilder<ZeroExtendIOp>;
 using folded_std_sign_extendi = FoldedValueBuilder<SignExtendIOp>;
 using folded_tensor_extract = FoldedValueBuilder<tensor::ExtractOp>;
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgSparseOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgSparseOps.td
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgSparseOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgSparseOps.td
@@ -18,7 +18,7 @@
 //
 // The other operations form the bridge between the opaque pointer and
 // the actual storage of pointers, indices, and values. These operations
-// resemble 'tensor_to_memref' in the sense that they map tensors to
+// resemble 'buffer_cast' in the sense that they map tensors to
 // their bufferized memrefs, but they lower into actual calls since
 // sparse storage does not bufferize into a single memrefs, as dense
 // tensors do, but into a hierarchical storage scheme where pointers
@@ -74,9 +74,9 @@
   let description = [{
      Returns the pointers array of the sparse storage scheme at the
      given dimension for the given tensor. This is similar to the
-     `tensor_to_memref` operation in the sense that it provides a bridge
+     `buffer_cast` operation in the sense that it provides a bridge
      between a tensor world view and a bufferized world view. Unlike the
-     `tensor_to_memref` operation, however, this sparse operation actually
+     `buffer_cast` operation, however, this sparse operation actually
      lowers into a call into a support library to obtain access to the
      pointers array.
 
@@ -98,9 +98,9 @@
   let description = [{
      Returns the indices array of the sparse storage scheme at the
      given dimension for the given tensor. This is similar to the
-     `tensor_to_memref` operation in the sense that it provides a bridge
+     `buffer_cast` operation in the sense that it provides a bridge
      between a tensor world view and a bufferized world view. Unlike the
-     `tensor_to_memref` operation, however, this sparse operation actually
+     `buffer_cast` operation, however, this sparse operation actually
      lowers into a call into a support library to obtain access to the
      indices array.
 
@@ -122,9 +122,9 @@
   let description = [{
      Returns the values array of the sparse storage scheme for the given
      tensor, independent of the actual dimension. This is similar to the
-     `tensor_to_memref` operation in the sense that it provides a bridge
+     `buffer_cast` operation in the sense that it provides a bridge
      between a tensor world view and a bufferized world view. Unlike the
-     `tensor_to_memref` operation, however, this sparse operation actually
+     `buffer_cast` operation, however, this sparse operation actually
      lowers into a call into a support library to obtain access to the
      values array.
 
diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h
--- a/mlir/include/mlir/Dialect/Linalg/Passes.h
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.h
@@ -34,11 +34,11 @@
 std::unique_ptr<OperationPass<FuncOp>> createLinalgPromotionPass();
 
 /// Create a pass to convert Linalg operations to scf.for loops and
-/// std.load/std.store accesses.
+/// memref.load/memref.store accesses.
 std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToLoopsPass();
 
 /// Create a pass to convert Linalg operations to scf.parallel loops and
-/// std.load/std.store accesses.
+/// memref.load/memref.store accesses.
 std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToParallelLoopsPass();
 
 /// Create a pass to convert Linalg operations to affine.for loops and
diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td
--- a/mlir/include/mlir/Dialect/Linalg/Passes.td
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.td
@@ -19,7 +19,7 @@
     This pass only converts ops that operate on ranked tensors.
   }];
   let constructor = "mlir::createConvertElementwiseToLinalgPass()";
-  let dependentDialects = ["linalg::LinalgDialect"];
+  let dependentDialects = ["linalg::LinalgDialect", "memref::MemRefDialect"];
 }
 
 def LinalgFoldUnitExtentDims : FunctionPass<"linalg-fold-unit-extent-dims"> {
@@ -70,13 +70,21 @@
                "interchange vector",
                "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated">
   ];
-  let dependentDialects = ["linalg::LinalgDialect", "scf::SCFDialect", "AffineDialect"];
+  let dependentDialects = [
+    "linalg::LinalgDialect",
+    "scf::SCFDialect",
+    "AffineDialect"
+  ];
 }
 
 def LinalgBufferize : Pass<"linalg-bufferize", "FuncOp"> {
   let summary = "Bufferize the linalg dialect";
   let constructor = "mlir::createLinalgBufferizePass()";
-  let dependentDialects = ["linalg::LinalgDialect", "AffineDialect"];
+  let dependentDialects = [
+    "linalg::LinalgDialect",
+    "AffineDialect",
+    "memref::MemRefDialect"
+  ];
 }
 
 def LinalgLowerToParallelLoops
@@ -90,7 +98,12 @@
                "interchange vector",
                "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated">
   ];
-  let dependentDialects = ["AffineDialect", "linalg::LinalgDialect", "scf::SCFDialect"];
+  let dependentDialects = [
+    "AffineDialect",
+    "linalg::LinalgDialect",
+    "memref::MemRefDialect",
+    "scf::SCFDialect"
+  ];
 }
 
 def LinalgPromotion : FunctionPass<"linalg-promote-subviews"> {
@@ -109,7 +122,10 @@
   let summary = "Tile operations in the linalg dialect";
   let constructor = "mlir::createLinalgTilingPass()";
   let dependentDialects = [
-    "AffineDialect", "linalg::LinalgDialect", "scf::SCFDialect"
+    "AffineDialect",
+    "linalg::LinalgDialect",
+    "memref::MemRefDialect",
+    "scf::SCFDialect"
   ];
   let options = [
     ListOption<"tileSizes", "linalg-tile-sizes", "int64_t",
@@ -127,7 +143,12 @@
                "Test generation of dynamic promoted buffers",
                "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated">
   ];
-  let dependentDialects = ["AffineDialect", "linalg::LinalgDialect", "scf::SCFDialect"];
+  let dependentDialects = [
+    "AffineDialect",
+    "linalg::LinalgDialect",
+    "memref::MemRefDialect",
+    "scf::SCFDialect"
+  ];
 }
 
 def LinalgGeneralization : FunctionPass<"linalg-generalize-named-ops"> {
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -147,8 +147,8 @@
 /// dimension. If that is not possible, contains the dynamic size of the
 /// subview. The call back should return the buffer to use.
 using AllocBufferCallbackFn = std::function<Optional<Value>(
-    OpBuilder &b, SubViewOp subView, ArrayRef<Value> boundingSubViewSize,
-    OperationFolder *folder)>;
+    OpBuilder &b, memref::SubViewOp subView,
+    ArrayRef<Value> boundingSubViewSize, OperationFolder *folder)>;
 
 /// Callback function type used to deallocate the buffers used to hold the
 /// promoted subview.
@@ -244,7 +244,7 @@
   Value partialLocalView;
 };
 Optional<PromotionInfo>
-promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, SubViewOp subView,
+promoteSubviewAsNewBuffer(OpBuilder &b, Location loc, memref::SubViewOp subView,
                           AllocBufferCallbackFn allocationFn,
                           OperationFolder *folder = nullptr);
 
@@ -818,7 +818,7 @@
 /// Match and rewrite for the pattern:
 /// ```
 ///    %alloc = ...
-///    [optional] %view = std.view %alloc ...
+///    [optional] %view = memref.view %alloc ...
 ///    %subView = subview %allocOrView ...
 ///    [optional] linalg.fill(%allocOrView, %cst) ...
 ///    ...
@@ -828,7 +828,7 @@
 /// into
 /// ```
 ///    [unchanged] %alloc = ...
-///    [unchanged] [optional] %view = std.view %alloc ...
+///    [unchanged] [optional] %view = memref.view %alloc ...
 ///    [unchanged] [unchanged] %subView = subview %allocOrView ...
 ///    ...
 ///    vector.transfer_read %in[...], %cst ...
@@ -849,7 +849,7 @@
 /// Match and rewrite for the pattern:
 /// ```
 ///    %alloc = ...
-///    [optional] %view = std.view %alloc ...
+///    [optional] %view = memref.view %alloc ...
 ///    %subView = subview %allocOrView...
 ///    ...
 ///    vector.transfer_write %..., %allocOrView[...]
@@ -858,7 +858,7 @@
 /// into
 /// ```
 ///    [unchanged] %alloc = ...
-///    [unchanged] [optional] %view = std.view %alloc ...
+///    [unchanged] [optional] %view = memref.view %alloc ...
 ///    [unchanged] %subView = subview %allocOrView...
 ///    ...
 ///    vector.transfer_write %..., %out[...]
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -13,6 +13,7 @@
 #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
 #include "mlir/Dialect/Linalg/EDSC/Builders.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
@@ -21,7 +22,7 @@
 #include "llvm/ADT/SetVector.h"
 
 using mlir::edsc::intrinsics::AffineIndexedValue;
-using mlir::edsc::intrinsics::StdIndexedValue;
+using mlir::edsc::intrinsics::MemRefIndexedValue;
 
 namespace mlir {
 class AffineExpr;
@@ -213,7 +214,7 @@
 struct GenerateLoopNest {
   using IndexedValueTy =
       typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
-                                AffineIndexedValue, StdIndexedValue>::type;
+                                AffineIndexedValue, MemRefIndexedValue>::type;
 
   static void
   doit(ArrayRef<Range> loopRanges, ValueRange iterArgInitValues,
diff --git a/mlir/include/mlir/Dialect/MemRef/CMakeLists.txt b/mlir/include/mlir/Dialect/MemRef/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Dialect/MemRef/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(IR)
diff --git a/mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h
@@ -0,0 +1,37 @@
+//===- Intrinsics.h - MLIR EDSC Intrinsics for MemRefOps --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef MLIR_DIALECT_MEMREF_EDSC_INTRINSICS_H_
+#define MLIR_DIALECT_MEMREF_EDSC_INTRINSICS_H_
+
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/EDSC/Builders.h"
+
+namespace mlir {
+namespace edsc {
+namespace intrinsics {
+
+using memref_alloc = ValueBuilder<memref::AllocOp>;
+using memref_alloca = ValueBuilder<memref::AllocaOp>;
+using memref_cast = ValueBuilder<memref::CastOp>;
+using memref_dealloc = OperationBuilder<memref::DeallocOp>;
+using memref_dim = ValueBuilder<memref::DimOp>;
+using memref_load = ValueBuilder<memref::LoadOp>;
+using memref_store = OperationBuilder<memref::StoreOp>;
+using memref_sub_view = ValueBuilder<memref::SubViewOp>;
+using memref_tensor_load = ValueBuilder<memref::TensorLoadOp>;
+using memref_tensor_store = OperationBuilder<memref::TensorStoreOp>;
+using memref_view = ValueBuilder<memref::ViewOp>;
+
+/// Provide an index notation around memref_load and memref_store.
+using MemRefIndexedValue =
+    TemplatedIndexedValue<intrinsics::memref_load, intrinsics::memref_store>;
+} // namespace intrinsics
+} // namespace edsc
+} // namespace mlir
+
+#endif // MLIR_DIALECT_MEMREF_EDSC_INTRINSICS_H_
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/MemRef/IR/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Dialect/MemRef/IR/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_mlir_dialect(MemRefOps memref)
+add_mlir_doc(MemRefOps -gen-dialect-doc MemRefOps Dialects/)
diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h b/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
copy from mlir/include/mlir/Dialect/StandardOps/IR/Ops.h
copy to mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
@@ -1,39 +1,21 @@
-//===- Ops.h - Standard MLIR Operations -------------------------*- C++ -*-===//
+//===- MemRef.h - MemRef dialect --------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines convenience types for working with standard operations
-// in the MLIR operation set.
-//
-//===----------------------------------------------------------------------===//
 
-#ifndef MLIR_DIALECT_STANDARDOPS_IR_OPS_H
-#define MLIR_DIALECT_STANDARDOPS_IR_OPS_H
+#ifndef MLIR_DIALECT_MEMREF_IR_MEMREF_H_
+#define MLIR_DIALECT_MEMREF_IR_MEMREF_H_
 
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Dialect.h"
-#include "mlir/IR/OpImplementation.h"
 #include "mlir/Interfaces/CallInterfaces.h"
 #include "mlir/Interfaces/CastInterfaces.h"
-#include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
-#include "mlir/Interfaces/VectorInterfaces.h"
 #include "mlir/Interfaces/ViewLikeInterface.h"
 
-// Pull in all enum type definitions and utility function declarations.
-#include "mlir/Dialect/StandardOps/IR/OpsEnums.h.inc"
-
 namespace mlir {
-class AffineMap;
-class Builder;
-class FuncOp;
-class OpBuilder;
-
 raw_ostream &operator<<(raw_ostream &os, Range &range);
 
 /// Return the list of Range (i.e. offset, size, stride). Each Range
@@ -41,75 +23,23 @@
 /// with `b` at location `loc`.
 SmallVector<Range, 8> getOrCreateRanges(OffsetSizeAndStrideOpInterface op,
                                         OpBuilder &b, Location loc);
+} // namespace mlir
 
-#define GET_OP_CLASSES
-#include "mlir/Dialect/StandardOps/IR/Ops.h.inc"
-
-#include "mlir/Dialect/StandardOps/IR/OpsDialect.h.inc"
-
-/// This is a refinement of the "constant" op for the case where it is
-/// returning a float value of FloatType.
-///
-///   %1 = "std.constant"(){value: 42.0} : bf16
-///
-class ConstantFloatOp : public ConstantOp {
-public:
-  using ConstantOp::ConstantOp;
-
-  /// Builds a constant float op producing a float of the specified type.
-  static void build(OpBuilder &builder, OperationState &result,
-                    const APFloat &value, FloatType type);
-
-  APFloat getValue() {
-    return (*this)->getAttrOfType<FloatAttr>("value").getValue();
-  }
-
-  static bool classof(Operation *op);
-};
-
-/// This is a refinement of the "constant" op for the case where it is
-/// returning an integer value of IntegerType.
-///
-///   %1 = "std.constant"(){value: 42} : i32
-///
-class ConstantIntOp : public ConstantOp {
-public:
-  using ConstantOp::ConstantOp;
-  /// Build a constant int op producing an integer of the specified width.
-  static void build(OpBuilder &builder, OperationState &result, int64_t value,
-                    unsigned width);
-
-  /// Build a constant int op producing an integer with the specified type,
-  /// which must be an integer type.
-  static void build(OpBuilder &builder, OperationState &result, int64_t value,
-                    Type type);
-
-  int64_t getValue() {
-    return (*this)->getAttrOfType<IntegerAttr>("value").getInt();
-  }
-
-  static bool classof(Operation *op);
-};
-
-/// This is a refinement of the "constant" op for the case where it is
-/// returning an integer value of Index type.
-///
-///   %1 = "std.constant"(){value: 99} : () -> index
-///
-class ConstantIndexOp : public ConstantOp {
-public:
-  using ConstantOp::ConstantOp;
+//===----------------------------------------------------------------------===//
+// MemRef Dialect
+//===----------------------------------------------------------------------===//
 
-  /// Build a constant int op producing an index.
-  static void build(OpBuilder &builder, OperationState &result, int64_t value);
+#include "mlir/Dialect/MemRef/IR/MemRefOpsDialect.h.inc"
 
-  int64_t getValue() {
-    return (*this)->getAttrOfType<IntegerAttr>("value").getInt();
-  }
+//===----------------------------------------------------------------------===//
+// MemRef Dialect Operations
+//===----------------------------------------------------------------------===//
 
-  static bool classof(Operation *op);
-};
+#define GET_OP_CLASSES
+#include "mlir/Dialect/MemRef/IR/MemRefOps.h.inc"
 
+namespace mlir {
+namespace memref {
 // DmaStartOp starts a non-blocking DMA operation that transfers data from a
 // source memref to a destination memref. The source and destination memref need
 // not be of the same dimensionality, but need to have the same elemental type.
@@ -231,7 +161,7 @@
     return isSrcMemorySpaceFaster() ? 0 : getSrcMemRefRank() + 1;
   }
 
-  static StringRef getOperationName() { return "std.dma_start"; }
+  static StringRef getOperationName() { return "memref.dma_start"; }
   static ParseResult parse(OpAsmParser &parser, OperationState &result);
   void print(OpAsmPrinter &p);
   LogicalResult verify();
@@ -278,7 +208,7 @@
   static void build(OpBuilder &builder, OperationState &result, Value tagMemRef,
                     ValueRange tagIndices, Value numElements);
 
-  static StringRef getOperationName() { return "std.dma_wait"; }
+  static StringRef getOperationName() { return "memref.dma_wait"; }
 
   // Returns the Tag MemRef associated with the DMA operation being waited on.
   Value getTagMemRef() { return getOperand(0); }
@@ -303,73 +233,7 @@
                      SmallVectorImpl<OpFoldResult> &results);
   LogicalResult verify();
 };
+} // namespace memref
+} // namespace mlir
 
-/// Given an `originalShape` and a `reducedShape` assumed to be a subset of
-/// `originalShape` with some `1` entries erased, return the set of indices
-/// that specifies which of the entries of `originalShape` are dropped to obtain
-/// `reducedShape`. The returned mask can be applied as a projection to
-/// `originalShape` to obtain the `reducedShape`. This mask is useful to track
-/// which dimensions must be kept when e.g. compute MemRef strides under
-/// rank-reducing operations. Return None if reducedShape cannot be obtained
-/// by dropping only `1` entries in `originalShape`.
-llvm::Optional<llvm::SmallDenseSet<unsigned>>
-computeRankReductionMask(ArrayRef<int64_t> originalShape,
-                         ArrayRef<int64_t> reducedShape);
-
-/// Determines whether MemRefCastOp casts to a more dynamic version of the
-/// source memref. This is useful to to fold a memref_cast into a consuming op
-/// and implement canonicalization patterns for ops in different dialects that
-/// may consume the results of memref_cast operations. Such foldable memref_cast
-/// operations are typically inserted as `view` and `subview` ops and are
-/// canonicalized, to preserve the type compatibility of their uses.
-///
-/// Returns true when all conditions are met:
-/// 1. source and result are ranked memrefs with strided semantics and same
-/// element type and rank.
-/// 2. each of the source's size, offset or stride has more static information
-/// than the corresponding result's size, offset or stride.
-///
-/// Example 1:
-/// ```mlir
-///   %1 = memref_cast %0 : memref<8x16xf32> to memref<?x?xf32>
-///   %2 = consumer %1 ... : memref<?x?xf32> ...
-/// ```
-///
-/// may fold into:
-///
-/// ```mlir
-///   %2 = consumer %0 ... : memref<8x16xf32> ...
-/// ```
-///
-/// Example 2:
-/// ```
-///   %1 = memref_cast %0 : memref<?x16xf32, affine_map<(i, j)->(16 * i + j)>>
-///          to memref<?x?xf32>
-///   consumer %1 : memref<?x?xf32> ...
-/// ```
-///
-/// may fold into:
-///
-/// ```
-///   consumer %0 ... : memref<?x16xf32, affine_map<(i, j)->(16 * i + j)>>
-/// ```
-bool canFoldIntoConsumerOp(MemRefCastOp castOp);
-
-/// Compute `lhs` `pred` `rhs`, where `pred` is one of the known integer
-/// comparison predicates.
-bool applyCmpPredicate(CmpIPredicate predicate, const APInt &lhs,
-                       const APInt &rhs);
-
-/// Compute `lhs` `pred` `rhs`, where `pred` is one of the known floating point
-/// comparison predicates.
-bool applyCmpPredicate(CmpFPredicate predicate, const APFloat &lhs,
-                       const APFloat &rhs);
-
-/// Return true if ofr1 and ofr2 are the same integer constant attribute values
-/// or the same SSA value.
-/// Ignore integer bitwitdh and type mismatch that come from the fact there is
-/// no IndexAttr and that IndexType have no bitwidth.
-bool isEqualConstantIntOrValue(OpFoldResult ofr1, OpFoldResult ofr2);
-} // end namespace mlir
-
-#endif // MLIR_DIALECT_IR_STANDARDOPS_IR_OPS_H
+#endif // MLIR_DIALECT_MEMREF_IR_MEMREF_H_
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefBase.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefBase.td
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefBase.td
@@ -0,0 +1,25 @@
+//===- MemRefBase.td - Base definitions for memref dialect -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MEMREF_BASE
+#define MEMREF_BASE
+
+include "mlir/IR/OpBase.td"
+
+def MemRef_Dialect : Dialect {
+  let name = "memref";
+  let cppNamespace = "::mlir::memref";
+  let description = [{
+    The `memref` dialect is intended to hold core memref creation and
+    manipulation ops, which are not strongly associated with any particular
+    other dialect or domain abstraction.
+  }];
+  let hasConstantMaterializer = 1;
+}
+
+#endif // MEMREF_BASE
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
@@ -0,0 +1,1270 @@
+//===- MemRefOps.td - MemRef op definitions ----------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MEMREF_OPS
+#define MEMREF_OPS
+
+include "mlir/Dialect/MemRef/IR/MemRefBase.td"
+include "mlir/Interfaces/CastInterfaces.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/ViewLikeInterface.td"
+include "mlir/IR/SymbolInterfaces.td"
+
+class MemRef_Op<string mnemonic, list<OpTrait> traits = []>
+    : Op<MemRef_Dialect, mnemonic, traits> {
+  let printer = [{ return ::print(p, *this); }];
+  let verifier = [{ return ::verify(*this); }];
+  let parser = [{ return ::parse$cppClass(parser, result); }];
+}
+
+//===----------------------------------------------------------------------===//
+// AllocLikeOp
+//===----------------------------------------------------------------------===//
+
+// Base class for memref allocating ops: alloca and alloc.
+//
+//   %0 = alloclike(%m)[%s] : memref<8x?xf32, (d0, d1)[s0] -> ((d0 + s0), d1)>
+//
+class AllocLikeOp<string mnemonic,
+                  Resource resource,
+                  list<OpTrait> traits = []> :
+    MemRef_Op<mnemonic,
+    !listconcat([
+      AttrSizedOperandSegments
+    ], traits)> {
+
+  let arguments = (ins Variadic<Index>:$dynamicSizes,
+                   // The symbolic operands (the ones in square brackets) bind
+                   // to the symbols of the memref's layout map.
+                   Variadic<Index>:$symbolOperands,
+                   Confined<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$alignment);
+  let results = (outs Res<AnyMemRef, "", [MemAlloc<resource>]>:$memref);
+
+  let builders = [
+    OpBuilder<(ins "MemRefType":$memrefType,
+                  CArg<"IntegerAttr", "IntegerAttr()">:$alignment), [{
+      return build($_builder, $_state, memrefType, {}, alignment);
+    }]>,
+    OpBuilder<(ins "MemRefType":$memrefType, "ValueRange":$dynamicSizes,
+                  CArg<"IntegerAttr", "IntegerAttr()">:$alignment), [{
+      return build($_builder, $_state, memrefType, dynamicSizes, {}, alignment);
+    }]>,
+    OpBuilder<(ins "MemRefType":$memrefType, "ValueRange":$dynamicSizes,
+                  "ValueRange":$symbolOperands,
+                  CArg<"IntegerAttr", "{}">:$alignment), [{
+      $_state.types.push_back(memrefType);
+      $_state.addOperands(dynamicSizes);
+      $_state.addOperands(symbolOperands);
+      $_state.addAttribute(getOperandSegmentSizeAttr(),
+          $_builder.getI32VectorAttr({
+              static_cast<int32_t>(dynamicSizes.size()),
+              static_cast<int32_t>(symbolOperands.size())}));
+      if (alignment)
+        $_state.addAttribute(getAlignmentAttrName(), alignment);
+    }]>];
+
+  let extraClassDeclaration = [{
+    static StringRef getAlignmentAttrName() { return "alignment"; }
+
+    MemRefType getType() { return getResult().getType().cast<MemRefType>(); }
+
+    /// Returns the dynamic sizes for this alloc operation if specified.
+    operand_range getDynamicSizes() { return dynamicSizes(); }
+  }];
+
+  let assemblyFormat = [{
+    `(`$dynamicSizes`)` (`` `[` $symbolOperands^ `]`)? attr-dict `:` type($memref)
+  }];
+
+  let hasCanonicalizer = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// AssumeAlignmentOp
+//===----------------------------------------------------------------------===//
+
+def AssumeAlignmentOp : MemRef_Op<"assume_alignment"> {
+  let summary =
+      "assertion that gives alignment information to the input memref";
+  let description = [{
+    The `assume_alignment` operation takes a memref and an integer of alignment
+    value, and internally annotates the buffer with the given alignment. If
+    the buffer isn't aligned to the given alignment, the behavior is undefined.
+
+    This operation doesn't affect the semantics of a correct program. It's for
+    optimization only, and the optimization is best-effort.
+  }];
+  let arguments = (ins AnyMemRef:$memref,
+                       Confined<I32Attr, [IntPositive]>:$alignment);
+  let results = (outs);
+
+  let assemblyFormat = "$memref `,` $alignment attr-dict `:` type($memref)";
+}
+
+//===----------------------------------------------------------------------===//
+// BaseOpWithOffsetSizesAndStrides
+//===----------------------------------------------------------------------===//
+
+// Base class for ops with static/dynamic offset, sizes and strides
+// attributes/arguments.
+class BaseOpWithOffsetSizesAndStrides<string mnemonic, list<OpTrait> traits = []> :
+    MemRef_Op<mnemonic,
+           !listconcat(traits, [NoSideEffect, AttrSizedOperandSegments])> {
+  code extraBaseClassDeclaration = [{
+    /// Returns the dynamic sizes for this subview operation if specified.
+    operand_range getDynamicSizes() { return sizes(); }
+
+    /// Return the list of Range (i.e. offset, size, stride). Each
+    /// Range entry contains either the dynamic value or a ConstantIndexOp
+    /// constructed with `b` at location `loc`.
+    SmallVector<Range, 8> getOrCreateRanges(OpBuilder &b, Location loc) {
+      return mlir::getOrCreateRanges(*this, b, loc);
+    }
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// AllocOp
+//===----------------------------------------------------------------------===//
+
+def MemRef_AllocOp : AllocLikeOp<"alloc", DefaultResource> {
+  let summary = "memory allocation operation";
+  let description = [{
+    The `alloc` operation allocates a region of memory, as specified by its
+    memref type.
+
+    Example:
+
+    ```mlir
+    %0 = memref.alloc() : memref<8x64xf32, 1>
+    ```
+
+    The optional list of dimension operands are bound to the dynamic dimensions
+    specified in its memref type. In the example below, the ssa value '%d' is
+    bound to the second dimension of the memref (which is dynamic).
+
+    ```mlir
+    %0 = memref.alloc(%d) : memref<8x?xf32, 1>
+    ```
+
+    The optional list of symbol operands are bound to the symbols of the
+    memrefs affine map. In the example below, the ssa value '%s' is bound to
+    the symbol 's0' in the affine map specified in the allocs memref type.
+
+    ```mlir
+    %0 = memref.alloc()[%s] : memref<8x64xf32,
+                              affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1>
+    ```
+
+    This operation returns a single ssa value of memref type, which can be used
+    by subsequent load and store operations.
+
+    The optional `alignment` attribute may be specified to ensure that the
+    region of memory that will be indexed is aligned at the specified byte
+    boundary.
+
+    ```mlir
+    %0 = memref.alloc()[%s] {alignment = 8} :
+      memref<8x64xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1>
+    ```
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// AllocaOp
+//===----------------------------------------------------------------------===//
+
+def MemRef_AllocaOp : AllocLikeOp<"alloca", AutomaticAllocationScopeResource> {
+  let summary = "stack memory allocation operation";
+  let description = [{
+    The `alloca` operation allocates memory on the stack, to be automatically
+    released when control transfers back from the region of its closest
+    surrounding operation with an
+    [`AutomaticAllocationScope`](../Traits.md#automaticallocationscope) trait.
+    The amount of memory allocated is specified by its memref and additional
+    operands. For example:
+
+    ```mlir
+    %0 = memref.alloca() : memref<8x64xf32>
+    ```
+
+    The optional list of dimension operands are bound to the dynamic dimensions
+    specified in its memref type. In the example below, the SSA value '%d' is
+    bound to the second dimension of the memref (which is dynamic).
+
+    ```mlir
+    %0 = memref.alloca(%d) : memref<8x?xf32>
+    ```
+
+    The optional list of symbol operands are bound to the symbols of the
+    memref's affine map. In the example below, the SSA value '%s' is bound to
+    the symbol 's0' in the affine map specified in the allocs memref type.
+
+    ```mlir
+    %0 = memref.alloca()[%s] : memref<8x64xf32,
+                               affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>>
+    ```
+
+    This operation returns a single SSA value of memref type, which can be used
+    by subsequent load and store operations. An optional alignment attribute, if
+    specified, guarantees alignment at least to that boundary. If not specified,
+    an alignment on any convenient boundary compatible with the type will be
+    chosen.
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// BufferCastOp
+//===----------------------------------------------------------------------===//
+
+def MemRef_BufferCastOp : MemRef_Op<"buffer_cast",
+    [SameOperandsAndResultShape, SameOperandsAndResultElementType,
+     TypesMatchWith<"type of 'tensor' is the tensor equivalent of 'memref'",
+                    "memref", "tensor",
+                    "getTensorTypeFromMemRefType($_self)">]> {
+  let summary = "tensor to memref cast operation";
+  let description = [{
+    Casts a tensor to a memref.
+
+    ```mlir
+    // Result type is tensor<4x?xf32>
+    %12 = memref.buffer_cast %10 : memref<4x?xf32, #map0, 42>
+    ```
+  }];
+
+  let arguments = (ins AnyTensor:$tensor);
+  let results = (outs AnyRankedOrUnrankedMemRef:$memref);
+  // This op is fully verified by traits.
+  let verifier = ?;
+
+  let assemblyFormat = "$tensor attr-dict `:` type($memref)";
+
+  let hasFolder = 1;
+  let hasCanonicalizer = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// CastOp
+//===----------------------------------------------------------------------===//
+
+def MemRef_CastOp : MemRef_Op<"cast", [
+      NoSideEffect, SameOperandsAndResultShape,
+      DeclareOpInterfaceMethods<CastOpInterface>
+    ]> {
+  let summary = "memref cast operation";
+  let description = [{
+    Syntax:
+
+    ```
+    operation ::= ssa-id `=` `memref.cast` ssa-use `:` type `to` type
+    ```
+
+    The `memref.cast` operation converts a memref from one type to an equivalent
+    type with a compatible shape. The source and destination types are
+    compatible if:
+
+    a. Both are ranked memref types with the same element type, address space,
+    and rank and:
+      1. Both have the same layout or both have compatible strided layouts.
+      2. The individual sizes (resp. offset and strides in the case of strided
+         memrefs) may convert constant dimensions to dynamic dimensions and
+         vice-versa.
+
+    If the cast converts any dimensions from an unknown to a known size, then it
+    acts as an assertion that fails at runtime if the dynamic dimensions
+    disagree with resultant destination size.
+
+    Example:
+
+    ```mlir
+    // Assert that the input dynamic shape matches the destination static shape.
+    %2 = memref.cast %1 : memref<?x?xf32> to memref<4x4xf32>
+    // Erase static shape information, replacing it with dynamic information.
+    %3 = memref.cast %1 : memref<4xf32> to memref<?xf32>
+
+    // The same holds true for offsets and strides.
+
+    // Assert that the input dynamic shape matches the destination static stride.
+    %4 = memref.cast %1 : memref<12x4xf32, offset:?, strides: [?, ?]> to
+                          memref<12x4xf32, offset:5, strides: [4, 1]>
+    // Erase static offset and stride information, replacing it with
+    // dynamic information.
+    %5 = memref.cast %1 : memref<12x4xf32, offset:5, strides: [4, 1]> to
+                          memref<12x4xf32, offset:?, strides: [?, ?]>
+    ```
+
+    b. Either or both memref types are unranked with the same element type, and
+    address space.
+
+    Example:
+
+    ```mlir
+    Cast to concrete shape.
+        %4 = memref.cast %1 : memref<*xf32> to memref<4x?xf32>
+
+    Erase rank information.
+        %5 = memref.cast %1 : memref<4x?xf32> to memref<*xf32>
+    ```
+  }];
+
+  let arguments = (ins AnyRankedOrUnrankedMemRef:$source);
+  let results = (outs AnyRankedOrUnrankedMemRef:$dest);
+  let assemblyFormat = "$source attr-dict `:` type($source) `to` type($dest)";
+  let verifier = "return impl::verifyCastOp(*this, areCastCompatible);";
+  let builders = [
+    OpBuilder<(ins "Value":$source, "Type":$destType), [{
+       impl::buildCastOp($_builder, $_state, source, destType);
+    }]>
+  ];
+
+  let extraClassDeclaration = [{
+    /// Fold the given CastOp into consumer op.
+    static bool canFoldIntoConsumerOp(CastOp castOp);
+  }];
+
+  let hasFolder = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// DeallocOp
+//===----------------------------------------------------------------------===//
+
+def MemRef_DeallocOp : MemRef_Op<"dealloc", [MemRefsNormalizable]> {
+  let summary = "memory deallocation operation";
+  let description = [{
+    The `dealloc` operation frees the region of memory referenced by a memref
+    which was originally created by the `alloc` operation.
+    The `dealloc` operation should not be called on memrefs which alias an
+    alloc'd memref (e.g. memrefs returned by `view` operations).
+
+    Example:
+
+    ```mlir
+    %0 = memref.alloc() : memref<8x64xf32, (d0, d1) -> (d0, d1), 1>
+    memref.dealloc %0 : memref<8x64xf32, (d0, d1) -> (d0, d1), 1>
+    ```
+  }];
+
+  let arguments = (ins Arg<AnyMemRef, "", [MemFree]>:$memref);
+
+  let hasCanonicalizer = 1;
+  let hasFolder = 1;
+  let assemblyFormat = "$memref attr-dict `:` type($memref)";
+}
+
+//===----------------------------------------------------------------------===//
+// DimOp
+//===----------------------------------------------------------------------===//
+
+def DimOp : MemRef_Op<"dim", [NoSideEffect]> {
+  let summary = "dimension index operation";
+  let description = [{
+    The `dim` operation takes a memref and a dimension operand of type `index`.
+    It returns the size of the requested dimension of the given memref.
+    If the dimension index is out of bounds the behavior is undefined.
+
+    The specified memref type is that of the first operand.
+
+    Example:
+
+    ```mlir
+    // Always returns 4, can be constant folded:
+    %c0 = constant 0 : index
+    %x = memref.dim %A, %c0 : memref<4 x ? x f32>
+
+    // Returns the dynamic dimension of %A.
+    %c1 = constant 1 : index
+    %y = memref.dim %A, %c1 : memref<4 x ? x f32>
+
+    // Equivalent generic form:
+    %x = "memref.dim"(%A, %c0) : (memref<4 x ? x f32>, index) -> index
+    %y = "memref.dim"(%A, %c1) : (memref<4 x ? x f32>, index) -> index
+    ```
+  }];
+
+  let arguments = (ins AnyTypeOf<[AnyTensor, AnyRankedOrUnrankedMemRef],
+                                 "any memref or tensor type">:$memrefOrTensor,
+                       Index:$index);
+  let results = (outs Index:$result);
+
+  let assemblyFormat = [{
+    attr-dict $memrefOrTensor `,` $index `:` type($memrefOrTensor)
+  }];
+
+  let builders = [
+    OpBuilder<(ins "Value":$memrefOrTensor, "int64_t":$index)>,
+    OpBuilder<(ins "Value":$memrefOrTensor, "Value":$index)>
+  ];
+
+  let extraClassDeclaration = [{
+    /// Helper function to get the index as a simple integer if it is constant.
+    Optional<int64_t> getConstantIndex();
+  }];
+
+  let hasCanonicalizer = 1;
+  let hasFolder = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// GetGlobalOp
+//===----------------------------------------------------------------------===//
+
+def MemRef_GetGlobalOp : MemRef_Op<"get_global",
+    [NoSideEffect, DeclareOpInterfaceMethods<SymbolUserOpInterface>]> {
+  let summary = "get the memref pointing to a global variable";
+  let description = [{
+     The `memref.get_global` operation retrieves the memref pointing to a
+     named global variable. If the global variable is marked constant, writing
+     to the result memref (such as through a `memref.store` operation) is
+     undefined.
+
+     Example:
+
+     ```mlir
+     %x = memref.get_global @foo : memref<2xf32>
+     ```
+  }];
+
+  let arguments = (ins FlatSymbolRefAttr:$name);
+  let results = (outs AnyStaticShapeMemRef:$result);
+  let assemblyFormat = "$name `:` type($result) attr-dict";
+
+  // `GetGlobalOp` is fully verified by its traits.
+  let verifier = ?;
+}
+
+//===----------------------------------------------------------------------===//
+// GlobalOp
+//===----------------------------------------------------------------------===//
+
+def MemRef_GlobalOp : MemRef_Op<"global", [Symbol]> {
+  let summary = "declare or define a global memref variable";
+  let description = [{
+    The `memref.global` operation declares or defines a named global variable.
+    The backing memory for the variable is allocated statically and is described
+    by the type of the variable (which should be a statically shaped memref
+    type). The operation is a declaration if no `inital_value` is specified,
+    else it is a definition. The `initial_value` can either be a unit attribute
+    to represent a definition of an uninitialized global variable, or an
+    elements attribute to represent the definition of a global variable with an
+    initial value. The global variable can also be marked constant using the
+    `constant` unit attribute. Writing to such constant global variables is
+    undefined.
+
+    The global variable can be accessed by using the `memref.get_global` to
+    retrieve the memref for the global variable. Note that the memref
+    for such global variable itself is immutable (i.e., memref.get_global for a
+    given global variable will always return the same memref descriptor).
+
+    Example:
+
+    ```mlir
+    // Private variable with an initial value.
+    memref.global "private" @x : memref<2xf32> = dense<0.0,2.0>
+
+    // Declaration of an external variable.
+    memref.global "private" @y : memref<4xi32>
+
+    // Uninitialized externally visible variable.
+    memref.global @z : memref<3xf16> = uninitialized
+
+    // Externally visible constant variable.
+    memref.global constant @c : memref<2xi32> = dense<1, 4>
+    ```
+  }];
+
+  let arguments = (ins
+      SymbolNameAttr:$sym_name,
+      OptionalAttr<StrAttr>:$sym_visibility,
+      TypeAttr:$type,
+      OptionalAttr<AnyAttr>:$initial_value,
+      UnitAttr:$constant
+  );
+
+  let assemblyFormat = [{
+       ($sym_visibility^)?
+       (`constant` $constant^)?
+       $sym_name `:`
+       custom<GlobalMemrefOpTypeAndInitialValue>($type, $initial_value)
+       attr-dict
+  }];
+
+  let extraClassDeclaration = [{
+     bool isExternal() { return !initial_value(); }
+     bool isUninitialized() {
+       return !isExternal() && initial_value().getValue().isa<UnitAttr>();
+     }
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// LoadOp
+//===----------------------------------------------------------------------===//
+
+def LoadOp : MemRef_Op<"load",
+     [TypesMatchWith<"result type matches element type of 'memref'",
+                     "memref", "result",
+                     "$_self.cast<MemRefType>().getElementType()">,
+                     MemRefsNormalizable]> {
+  let summary = "load operation";
+  let description = [{
+    The `load` op reads an element from a memref specified by an index list. The
+    output of load is a new value with the same type as the elements of the
+    memref. The arity of indices is the rank of the memref (i.e., if the memref
+    loaded from is of rank 3, then 3 indices are required for the load following
+    the memref identifier).
+
+    In an `affine.if` or `affine.for` body, the indices of a load are restricted
+    to SSA values bound to surrounding loop induction variables,
+    [symbols](Affine.md#dimensions-and-symbols), results of a
+    [`constant` operation](#stdconstant-constantop), or the result of an
+    `affine.apply` operation that can in turn take as arguments all of the
+    aforementioned SSA values or the recursively result of such an
+    `affine.apply` operation.
+
+    Example:
+
+    ```mlir
+    %1 = affine.apply affine_map<(d0, d1) -> (3*d0)> (%i, %j)
+    %2 = affine.apply affine_map<(d0, d1) -> (d1+1)> (%i, %j)
+    %12 = memref.load %A[%1, %2] : memref<8x?xi32, #layout, memspace0>
+
+    // Example of an indirect load (treated as non-affine)
+    %3 = affine.apply affine_map<(d0) -> (2*d0 + 1)>(%12)
+    %13 = memref.load %A[%3, %2] : memref<4x?xi32, #layout, memspace0>
+    ```
+
+    **Context:** The `load` and `store` operations are specifically crafted to
+    fully resolve a reference to an element of a memref, and (in affine
+    `affine.if` and `affine.for` operations) the compiler can follow use-def
+    chains (e.g. through [`affine.apply`](Affine.md#affineapply-affineapplyop)
+    operations) to precisely analyze references at compile-time using polyhedral
+    techniques. This is possible because of the
+    [restrictions on dimensions and symbols](Affine.md#restrictions-on-dimensions-and-symbols)
+    in these contexts.
+  }];
+
+  let arguments = (ins Arg<AnyMemRef, "the reference to load from",
+                           [MemRead]>:$memref,
+                       Variadic<Index>:$indices);
+  let results = (outs AnyType:$result);
+
+  let builders = [
+    OpBuilder<(ins "Value":$memref, CArg<"ValueRange", "{}">:$indices), [{
+      auto memrefType = memref.getType().cast<MemRefType>();
+      $_state.addOperands(memref);
+      $_state.addOperands(indices);
+      $_state.types.push_back(memrefType.getElementType());
+    }]>];
+
+  let extraClassDeclaration = [{
+    Value getMemRef() { return getOperand(0); }
+    void setMemRef(Value value) { setOperand(0, value); }
+    MemRefType getMemRefType() {
+      return getMemRef().getType().cast<MemRefType>();
+    }
+
+    operand_range getIndices() { return {operand_begin() + 1, operand_end()}; }
+  }];
+
+  let hasCanonicalizer = 1;
+  let hasFolder = 1;
+
+  let assemblyFormat = "$memref `[` $indices `]` attr-dict `:` type($memref)";
+}
+
+//===----------------------------------------------------------------------===//
+// PrefetchOp
+//===----------------------------------------------------------------------===//
+
+def MemRef_PrefetchOp : MemRef_Op<"prefetch"> {
+  let summary = "prefetch operation";
+  let description = [{
+    The "prefetch" op prefetches data from a memref location described with
+    subscript indices similar to memref.load, and with three attributes: a
+    read/write specifier, a locality hint, and a cache type specifier as shown
+    below:
+
+    ```mlir
+    memref.prefetch %0[%i, %j], read, locality<3>, data : memref<400x400xi32>
+    ```
+
+    The read/write specifier is either 'read' or 'write', the locality hint
+    ranges from locality<0> (no locality) to locality<3> (extremely local keep
+    in cache). The cache type specifier is either 'data' or 'instr'
+    and specifies whether the prefetch is performed on data cache or on
+    instruction cache.
+  }];
+
+  let arguments = (ins AnyMemRef:$memref, Variadic<Index>:$indices,
+                   BoolAttr:$isWrite,
+                   Confined<I32Attr, [IntMinValue<0>,
+                     IntMaxValue<3>]>:$localityHint,
+                   BoolAttr:$isDataCache);
+
+  let extraClassDeclaration = [{
+    MemRefType getMemRefType() {
+      return memref().getType().cast<MemRefType>();
+    }
+    static StringRef getLocalityHintAttrName() { return "localityHint"; }
+    static StringRef getIsWriteAttrName() { return "isWrite"; }
+    static StringRef getIsDataCacheAttrName() { return "isDataCache"; }
+  }];
+
+  let hasFolder = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// ReinterpretCastOp
+//===----------------------------------------------------------------------===//
+
+def MemRef_ReinterpretCastOp:
+    BaseOpWithOffsetSizesAndStrides<"reinterpret_cast", [
+      NoSideEffect, ViewLikeOpInterface, OffsetSizeAndStrideOpInterface
+    ]> {
+  let summary = "memref reinterpret cast operation";
+  let description = [{
+    Modify offset, sizes and strides of an unranked/ranked memref.
+
+    Example:
+    ```mlir
+    memref.reinterpret_cast %ranked to
+      offset: [0],
+      sizes: [%size0, 10],
+      strides: [1, %stride1]
+    : memref<?x?xf32> to memref<?x10xf32, offset: 0, strides: [1, ?]>
+
+    memref.reinterpret_cast %unranked to
+      offset: [%offset],
+      sizes: [%size0, %size1],
+      strides: [%stride0, %stride1]
+    : memref<*xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
+    ```
+  }];
+
+  let arguments = (ins
+    Arg<AnyRankedOrUnrankedMemRef, "", []>:$source,
+    Variadic<Index>:$offsets,
+    Variadic<Index>:$sizes,
+    Variadic<Index>:$strides,
+    I64ArrayAttr:$static_offsets,
+    I64ArrayAttr:$static_sizes,
+    I64ArrayAttr:$static_strides
+  );
+  let results = (outs AnyMemRef:$result);
+
+  let assemblyFormat = [{
+    $source `to` `offset` `` `:`
+    custom<OperandsOrIntegersOffsetsOrStridesList>($offsets, $static_offsets)
+    `` `,` `sizes` `` `:`
+    custom<OperandsOrIntegersSizesList>($sizes, $static_sizes) `` `,` `strides`
+    `` `:`
+    custom<OperandsOrIntegersOffsetsOrStridesList>($strides, $static_strides)
+    attr-dict `:` type($source) `to` type($result)
+  }];
+
+  let parser = ?;
+  let printer = ?;
+
+  let builders = [
+    // Build a ReinterpretCastOp with mixed static and dynamic entries.
+    OpBuilder<(ins "MemRefType":$resultType, "Value":$source,
+      "OpFoldResult":$offset, "ArrayRef<OpFoldResult>":$sizes,
+      "ArrayRef<OpFoldResult>":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build a ReinterpretCastOp with static entries.
+    OpBuilder<(ins "MemRefType":$resultType, "Value":$source,
+      "int64_t":$offset, "ArrayRef<int64_t>":$sizes,
+      "ArrayRef<int64_t>":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build a ReinterpretCastOp with dynamic entries.
+    OpBuilder<(ins "MemRefType":$resultType, "Value":$source,
+      "Value":$offset, "ValueRange":$sizes,
+      "ValueRange":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
+  ];
+
+  let extraClassDeclaration = extraBaseClassDeclaration # [{
+    // The result of the op is always a ranked memref.
+    MemRefType getType() { return getResult().getType().cast<MemRefType>(); }
+    Value getViewSource() { return source(); }
+
+    /// Return the rank of the source ShapedType.
+    unsigned getResultRank() {
+      return getResult().getType().cast<ShapedType>().getRank();
+    }
+
+    /// Return the expected rank of each of the`static_offsets`, `static_sizes`
+    /// and `static_strides` attributes.
+    std::array<unsigned, 3> getArrayAttrMaxRanks() {
+      unsigned resultRank = getResult().getType().cast<ShapedType>().getRank();
+      return {1, resultRank, resultRank};
+    }
+
+    /// Return the number of leading operands before the `offsets`, `sizes` and
+    /// and `strides` operands.
+    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// ReshapeOp
+//===----------------------------------------------------------------------===//
+
+def MemRef_ReshapeOp: MemRef_Op<"reshape", [
+    ViewLikeOpInterface, NoSideEffect]>  {
+  let summary = "memref reshape operation";
+  let description = [{
+    The `reshape` operation converts a memref from one type to an
+    equivalent type with a provided shape. The data is never copied or
+    modified. The source and destination types are compatible if both have the
+    same element type, same number of elements, address space and identity
+    layout map. The following combinations are possible:
+
+    a. Source type is ranked or unranked. Shape argument has static size.
+    Result type is ranked.
+
+    ```mlir
+    // Reshape statically-shaped memref.
+    %dst = memref.reshape %src(%shape)
+             : (memref<4x1xf32>, memref<1xi32>) to memref<4xf32>
+    %dst0 = memref.reshape %src(%shape0)
+             : (memref<4x1xf32>, memref<2xi32>) to memref<2x2xf32>
+    // Flatten unranked memref.
+    %dst = memref.reshape %src(%shape)
+             : (memref<*xf32>, memref<1xi32>) to memref<?xf32>
+    ```
+
+    b. Source type is ranked or unranked. Shape argument has dynamic size.
+    Result type is unranked.
+
+    ```mlir
+    // Reshape dynamically-shaped 1D memref.
+    %dst = memref.reshape %src(%shape)
+             : (memref<?xf32>, memref<?xi32>) to memref<*xf32>
+    // Reshape unranked memref.
+    %dst = memref.reshape %src(%shape)
+             : (memref<*xf32>, memref<?xi32>) to memref<*xf32>
+    ```
+  }];
+
+  let arguments = (ins
+    AnyRankedOrUnrankedMemRef:$source,
+    MemRefRankOf<[AnySignlessInteger, Index], [1]>:$shape
+  );
+  let results = (outs AnyRankedOrUnrankedMemRef:$result);
+
+  let builders = [OpBuilder<
+     (ins "MemRefType":$resultType, "Value":$operand, "Value":$shape), [{
+       $_state.addOperands(operand);
+       $_state.addOperands(shape);
+       $_state.addTypes(resultType);
+     }]>];
+
+  let extraClassDeclaration = [{
+    MemRefType getType() { return getResult().getType().cast<MemRefType>(); }
+    Value getViewSource() { return source(); }
+  }];
+
+  let assemblyFormat = [{
+    $source `(` $shape `)` attr-dict `:` functional-type(operands, results)
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// StoreOp
+//===----------------------------------------------------------------------===//
+
+def MemRef_StoreOp : MemRef_Op<"store",
+     [TypesMatchWith<"type of 'value' matches element type of 'memref'",
+                     "memref", "value",
+                     "$_self.cast<MemRefType>().getElementType()">,
+                     MemRefsNormalizable]> {
+  let summary = "store operation";
+  let description = [{
+    Store a value to a memref location given by indices. The value stored should
+    have the same type as the elemental type of the memref. The number of
+    arguments provided within brackets need to match the rank of the memref.
+
+    In an affine context, the indices of a store are restricted to SSA values
+    bound to surrounding loop induction variables,
+    [symbols](Affine.md#restrictions-on-dimensions-and-symbols), results of a
+    [`constant` operation](#stdconstant-constantop), or the result of an
+    [`affine.apply`](Affine.md#affineapply-affineapplyop) operation that can in
+    turn take as arguments all of the aforementioned SSA values or the
+    recursively result of such an `affine.apply` operation.
+
+    Example:
+
+    ```mlir
+    memref.store %100, %A[%1, 1023] : memref<4x?xf32, #layout, memspace0>
+    ```
+
+    **Context:** The `load` and `store` operations are specifically crafted to
+    fully resolve a reference to an element of a memref, and (in polyhedral
+    `affine.if` and `affine.for` operations) the compiler can follow use-def
+    chains (e.g. through [`affine.apply`](Affine.md#affineapply-affineapplyop)
+    operations) to precisely analyze references at compile-time using polyhedral
+    techniques. This is possible because of the
+    [restrictions on dimensions and symbols](Affine.md#restrictions-on-dimensions-and-symbols)
+    in these contexts.
+  }];
+
+  let arguments = (ins AnyType:$value,
+                       Arg<AnyMemRef, "the reference to store to",
+                           [MemWrite]>:$memref,
+                       Variadic<Index>:$indices);
+
+  let builders = [
+    OpBuilder<(ins "Value":$valueToStore, "Value":$memref), [{
+      $_state.addOperands(valueToStore);
+      $_state.addOperands(memref);
+    }]>];
+
+  let extraClassDeclaration = [{
+      Value getValueToStore() { return getOperand(0); }
+
+      Value getMemRef() { return getOperand(1); }
+      void setMemRef(Value value) { setOperand(1, value); }
+      MemRefType getMemRefType() {
+        return getMemRef().getType().cast<MemRefType>();
+      }
+
+      operand_range getIndices() {
+        return {operand_begin() + 2, operand_end()};
+      }
+  }];
+
+  let hasFolder = 1;
+
+  let assemblyFormat = [{
+    $value `,` $memref `[` $indices `]` attr-dict `:` type($memref)
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// SubViewOp
+//===----------------------------------------------------------------------===//
+
+def SubViewOp : BaseOpWithOffsetSizesAndStrides<
+    "subview", [DeclareOpInterfaceMethods<ViewLikeOpInterface>,
+                NoSideEffect, OffsetSizeAndStrideOpInterface] >  {
+  let summary = "memref subview operation";
+  let description = [{
+    The "subview" operation converts a memref type to another memref type
+    which represents a reduced-size view of the original memref as specified by
+    the operation's offsets, sizes and strides arguments.
+
+    The SubView operation supports the following arguments:
+
+    * source: the "base" memref on which to create a "view" memref.
+    * offsets: memref-rank number of offsets into the "base" memref at which to
+               create the "view" memref.
+    * sizes: memref-rank number of sizes which specify the sizes of the result
+             "view" memref type.
+    * strides: memref-rank number of strides that compose multiplicatively with
+               the base memref strides in each dimension.
+
+    The representation based on offsets, sizes and strides support a
+    partially-static specification via attributes specified through the
+    `static_offsets`, `static_sizes` and `static_strides` arguments. A special
+    sentinel value ShapedType::kDynamicSize and
+    ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has
+    a dynamic value.
+
+    A subview operation may additionally reduce the rank of the resulting view
+    by removing dimensions that are statically known to be of size 1.
+
+    Example 1:
+
+    ```mlir
+    %0 = memref.alloc() : memref<64x4xf32, (d0, d1) -> (d0 * 4 + d1)>
+
+    // Create a sub-view of "base" memref '%0' with offset arguments '%c0',
+    // dynamic sizes for each dimension, and stride arguments '%c1'.
+    %1 = memref.subview %0[%c0, %c0][%size0, %size1][%c1, %c1]
+      : memref<64x4xf32, (d0, d1) -> (d0 * 4 + d1) > to
+        memref<?x?xf32, (d0, d1)[s0, s1] -> (d0 * s1 + d1 + s0)>
+    ```
+
+    Example 2:
+
+    ```mlir
+    %0 = memref.alloc() : memref<8x16x4xf32, (d0, d1, d1) -> (d0 * 64 + d1 * 4 + d2)>
+
+    // Create a sub-view of "base" memref '%0' with dynamic offsets, sizes,
+    // and strides.
+    // Note that dynamic offsets are represented by the linearized dynamic
+    // offset symbol 's0' in the subview memref layout map, and that the
+    // dynamic strides operands, after being applied to the base memref
+    // strides in each dimension, are represented in the view memref layout
+    // map as symbols 's1', 's2' and 's3'.
+    %1 = memref.subview %0[%i, %j, %k][%size0, %size1, %size2][%x, %y, %z]
+      : memref<8x16x4xf32, (d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)> to
+        memref<?x?x?xf32,
+          (d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>
+    ```
+
+    Example 3:
+
+    ```mlir
+    %0 = memref.alloc() : memref<8x16x4xf32, (d0, d1, d1) -> (d0 * 64 + d1 * 4 + d2)>
+
+    // Subview with constant offsets, sizes and strides.
+    %1 = memref.subview %0[0, 2, 0][4, 4, 4][64, 4, 1]
+      : memref<8x16x4xf32, (d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)> to
+        memref<4x4x4xf32, (d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2 + 8)>
+    ```
+
+    Example 4:
+
+    ```mlir
+    %0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
+
+    // Subview with constant size, but dynamic offsets and
+    // strides. The resulting memref has a static shape, but if the
+    // base memref has an affine map to describe the layout, the result
+    // memref also uses an affine map to describe the layout. The
+    // strides of the result memref is computed as follows:
+    //
+    // Let #map1 represents the layout of the base memref, and #map2
+    // represents the layout of the result memref. A #mapsubview can be
+    // constructed to map an index from the result memref to the base
+    // memref (note that the description below uses more convenient
+    // naming for symbols, while in affine maps, symbols are
+    // represented as unsigned numbers that identify that symbol in the
+    // given affine map.
+    //
+    // #mapsubview = (d0, d1)[o0, o1, t0, t1] -> (d0 * t0 + o0, d1 * t1 + o1)
+    //
+    // where, o0, o1, ... are offsets, and t0, t1, ... are strides. Then,
+    //
+    // #map2 = #map1.compose(#mapsubview)
+    //
+    // If the layout map is represented as
+    //
+    // #map1 = (d0, d1)[s0, s1, s2] -> (d0 * s1 + d1 * s2 + s0)
+    //
+    // then,
+    //
+    // #map2 = (d0, d1)[s0, s1, s2, o0, o1, t0, t1] ->
+    //              (d0 * s1 * t0 + d1 * s2 * t1 + o0 * s1 + o1 * s2 + s0)
+    //
+    // Representing this canonically
+    //
+    // #map2 = (d0, d1)[r0, r1, r2] -> (d0 * r1 + d1 * r2 + r0)
+    //
+    // where, r0 = o0 * s1 + o1 * s2 + s0, r1 = s1 * t0, r2 = s2 * t1.
+    %1 = memref.subview %0[%i, %j][4, 4][%x, %y] :
+      : memref<?x?xf32, (d0, d1)[s0, s1, s2] -> (d0 * s1 + d1 * s2 + s0)> to
+        memref<4x4xf32, (d0, d1)[r0, r1, r2] -> (d0 * r1 + d1 * r2 + r0)>
+
+    // Note that the subview op does not guarantee that the result
+    // memref is "inbounds" w.r.t to base memref. It is upto the client
+    // to ensure that the subview is accessed in a manner that is
+    // in-bounds.
+    ```
+
+    Example 5:
+
+    ```mlir
+    // Rank-reducing subview.
+    %1 = memref.subview %0[0, 0, 0][1, 16, 4][1, 1, 1] :
+      memref<8x16x4xf32> to memref<16x4xf32>
+    %3 = memref.subview %2[3, 4, 2][1, 6, 3][1, 1, 1] :
+      memref<8x16x4xf32> to memref<6x3xf32, offset: 210, strides: [4, 1]>
+    ```
+    }
+  }];
+
+  let arguments = (ins
+    AnyMemRef:$source,
+    Variadic<Index>:$offsets,
+    Variadic<Index>:$sizes,
+    Variadic<Index>:$strides,
+    I64ArrayAttr:$static_offsets,
+    I64ArrayAttr:$static_sizes,
+    I64ArrayAttr:$static_strides
+  );
+  let results = (outs AnyMemRef:$result);
+
+  let assemblyFormat = [{
+    $source ``
+    custom<OperandsOrIntegersOffsetsOrStridesList>($offsets, $static_offsets)
+    custom<OperandsOrIntegersSizesList>($sizes, $static_sizes)
+    custom<OperandsOrIntegersOffsetsOrStridesList>($strides, $static_strides)
+    attr-dict `:` type($source) `to` type($result)
+  }];
+
+  let builders = [
+    // Build a SubViewOp with mixed static and dynamic entries and custom
+    // result type. If the type passed is nullptr, it is inferred.
+    OpBuilder<(ins "Value":$source, "ArrayRef<OpFoldResult>":$offsets,
+      "ArrayRef<OpFoldResult>":$sizes, "ArrayRef<OpFoldResult>":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build a SubViewOp with mixed static and dynamic entries and inferred
+    // result type.
+    OpBuilder<(ins "MemRefType":$resultType, "Value":$source,
+      "ArrayRef<OpFoldResult>":$offsets, "ArrayRef<OpFoldResult>":$sizes,
+      "ArrayRef<OpFoldResult>":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build a SubViewOp with static entries and custom result type. If the
+    // type passed is nullptr, it is inferred.
+    OpBuilder<(ins "Value":$source, "ArrayRef<int64_t>":$offsets,
+      "ArrayRef<int64_t>":$sizes, "ArrayRef<int64_t>":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build a SubViewOp with static entries and inferred result type.
+    OpBuilder<(ins "MemRefType":$resultType, "Value":$source,
+      "ArrayRef<int64_t>":$offsets, "ArrayRef<int64_t>":$sizes,
+      "ArrayRef<int64_t>":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build a SubViewOp with dynamic entries and custom result type. If the
+    // type passed is nullptr, it is inferred.
+    OpBuilder<(ins "Value":$source, "ValueRange":$offsets,
+      "ValueRange":$sizes, "ValueRange":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build a SubViewOp with dynamic entries and inferred result type.
+    OpBuilder<(ins "MemRefType":$resultType, "Value":$source,
+      "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
+  ];
+
+  let extraClassDeclaration = extraBaseClassDeclaration # [{
+    /// Returns the type of the base memref operand.
+    MemRefType getSourceType() {
+      return source().getType().cast<MemRefType>();
+    }
+
+    /// The result of a subview is always a memref.
+    MemRefType getType() { return getResult().getType().cast<MemRefType>(); }
+
+    /// A subview result type can be fully inferred from the source type and the
+    /// static representation of offsets, sizes and strides. Special sentinels
+    /// encode the dynamic case.
+    static Type inferResultType(MemRefType sourceMemRefType,
+                                ArrayRef<int64_t> staticOffsets,
+                                ArrayRef<int64_t> staticSizes,
+                                ArrayRef<int64_t> staticStrides);
+    static Type inferResultType(MemRefType sourceMemRefType,
+                                ArrayRef<OpFoldResult> staticOffsets,
+                                ArrayRef<OpFoldResult> staticSizes,
+                                ArrayRef<OpFoldResult> staticStrides);
+    static Type inferRankReducedResultType(unsigned resultRank,
+                                           MemRefType sourceMemRefType,
+                                           ArrayRef<int64_t> staticOffsets,
+                                           ArrayRef<int64_t> staticSizes,
+                                           ArrayRef<int64_t> staticStrides);
+    static Type inferRankReducedResultType(unsigned resultRank,
+                                           MemRefType sourceMemRefType,
+                                           ArrayRef<OpFoldResult> staticOffsets,
+                                           ArrayRef<OpFoldResult> staticSizes,
+                                           ArrayRef<OpFoldResult> staticStrides);
+
+    /// Return the expected rank of each of the`static_offsets`, `static_sizes`
+    /// and `static_strides` attributes.
+    std::array<unsigned, 3> getArrayAttrMaxRanks() {
+      unsigned rank = getSourceType().getRank();
+      return {rank, rank, rank};
+    }
+
+    /// Return the number of leading operands before the `offsets`, `sizes` and
+    /// and `strides` operands.
+    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
+  }];
+
+  let hasCanonicalizer = 1;
+  let hasFolder = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// TensorLoadOp
+//===----------------------------------------------------------------------===//
+
+def TensorLoadOp : MemRef_Op<"tensor_load",
+    [SameOperandsAndResultShape, SameOperandsAndResultElementType,
+     TypesMatchWith<"result type matches tensor equivalent of 'memref'",
+                    "memref", "result",
+                    "getTensorTypeFromMemRefType($_self)">]> {
+  let summary = "tensor load operation";
+  let description = [{
+    Create a tensor from a memref, making an independent copy of the element
+    data. The result value is a tensor whose shape and element type match the
+    memref operand.
+
+    The opposite of this op is buffer_cast. Together, these two ops are
+    useful for source/target materializations when doing type conversions
+    involving tensors and memrefs.
+
+    Example:
+
+    ```mlir
+    // Produces a value of tensor<4x?xf32> type.
+    %12 = memref.tensor_load %10 : memref<4x?xf32, #layout, memspace0>
+    ```
+  }];
+
+  let arguments = (ins Arg<AnyRankedOrUnrankedMemRef,
+                       "the reference to load from", [MemRead]>:$memref);
+  let results = (outs AnyTensor:$result);
+  // TensorLoadOp is fully verified by traits.
+  let verifier = ?;
+
+  let builders = [
+    OpBuilder<(ins "Value":$memref), [{
+      $_state.addOperands(memref);
+      $_state.addTypes(getTensorTypeFromMemRefType(memref.getType()));
+    }]>];
+
+  let extraClassDeclaration = [{
+    /// The result of a tensor_load is always a tensor.
+    TensorType getType() {
+      Type resultType = getResult().getType();
+      if (resultType.isa<TensorType>())
+        return resultType.cast<TensorType>();
+      return {};
+    }
+  }];
+
+  let assemblyFormat = "$memref attr-dict `:` type($memref)";
+
+  let hasFolder = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// TensorStoreOp
+//===----------------------------------------------------------------------===//
+
+def TensorStoreOp : MemRef_Op<"tensor_store",
+    [SameOperandsShape, SameOperandsElementType,
+     TypesMatchWith<"type of 'value' matches tensor equivalent of 'memref'",
+                    "memref", "tensor",
+                    "getTensorTypeFromMemRefType($_self)">]> {
+  let summary = "tensor store operation";
+  let description = [{
+    Stores the contents of a tensor into a memref. The first operand is a value
+    of tensor type, the second operand is a value of memref type. The shapes and
+    element types of these must match, and are specified by the memref type.
+
+    Example:
+
+    ```mlir
+    %9 = dim %8, 1 : tensor<4x?xf32>
+    %10 = alloc(%9) : memref<4x?xf32, #layout, memspace0>
+    memref.tensor_store %8, %10 : memref<4x?xf32, #layout, memspace0>
+    ```
+  }];
+
+  let arguments = (ins AnyTensor:$tensor, Arg<AnyRankedOrUnrankedMemRef,
+                       "the reference to store to", [MemWrite]>:$memref);
+  // TensorStoreOp is fully verified by traits.
+  let verifier = ?;
+
+  let assemblyFormat = "$tensor `,` $memref attr-dict `:` type($memref)";
+}
+
+//===----------------------------------------------------------------------===//
+// TransposeOp
+//===----------------------------------------------------------------------===//
+
+def MemRef_TransposeOp : MemRef_Op<"transpose", [NoSideEffect]>,
+    Arguments<(ins AnyStridedMemRef:$in, AffineMapAttr:$permutation)>,
+    Results<(outs AnyStridedMemRef)> {
+  let summary = "`transpose` produces a new strided memref (metadata-only)";
+  let description = [{
+    The `transpose` op produces a strided memref whose sizes and strides
+    are a permutation of the original `in` memref. This is purely a metadata
+    transformation.
+
+    Example:
+
+    ```mlir
+    %1 = memref.transpose %0 (i, j) -> (j, i) : memref<?x?xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d1 * s0 + d0)>>
+    ```
+  }];
+
+  let builders = [
+    OpBuilder<(ins "Value":$in, "AffineMapAttr":$permutation,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>];
+
+  let extraClassDeclaration = [{
+    static StringRef getPermutationAttrName() { return "permutation"; }
+    ShapedType getShapedType() { return in().getType().cast<ShapedType>(); }
+  }];
+
+  let hasFolder = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// ViewOp
+//===----------------------------------------------------------------------===//
+
+def MemRef_ViewOp : MemRef_Op<"view", [
+    DeclareOpInterfaceMethods<ViewLikeOpInterface>, NoSideEffect]> {
+  let summary = "memref view operation";
+  let description = [{
+    The "view" operation extracts an N-D contiguous memref with empty layout map
+    with arbitrary element type from a 1-D contiguous memref with empty layout
+    map of i8 element  type. The ViewOp supports the following arguments:
+
+    * A single dynamic byte-shift operand must be specified which represents a
+      a shift of the base 1-D memref pointer from which to create the resulting
+      contiguous memref view with identity layout.
+    * A dynamic size operand that must be specified for each dynamic dimension
+      in the resulting view memref type.
+
+    The "view" operation gives a structured indexing form to a flat 1-D buffer.
+    Unlike "subview" it can perform a type change. The type change behavior
+    requires the op to have special semantics because, e.g. a byte shift of 3
+    cannot be represented as an offset on f64.
+    For now, a "view" op:
+
+    1. Only takes a contiguous source memref with 0 offset and empty layout.
+    2. Must specify a byte_shift operand (in the future, a special integer
+       attribute may be added to support the folded case).
+    3. Returns a contiguous memref with 0 offset and empty layout.
+
+    Example:
+
+    ```mlir
+    // Allocate a flat 1D/i8 memref.
+    %0 = memref.alloc() : memref<2048xi8>
+
+    // ViewOp with dynamic offset and static sizes.
+    %1 = memref.view %0[%offset_1024][] : memref<2048xi8> to memref<64x4xf32>
+
+    // ViewOp with dynamic offset and two dynamic size.
+    %2 = memref.view %0[%offset_1024][%size0, %size1] :
+      memref<2048xi8> to memref<?x4x?xf32>
+    ```
+  }];
+
+  let arguments = (ins MemRefRankOf<[I8], [1]>:$source,
+                   Index:$byte_shift,
+                   Variadic<Index>:$sizes);
+  let results = (outs AnyMemRef);
+
+  let extraClassDeclaration = [{
+    /// The result of a view is always a memref.
+    MemRefType getType() { return getResult().getType().cast<MemRefType>(); }
+
+    /// Returns the dynamic sizes for this view operation. This is redundant
+    /// with `sizes` but needed in template implementations. More specifically:
+    /// ```
+    /// template <typename AnyMemRefDefOp>
+    /// bool isMemRefSizeValidSymbol(AnyMemRefDefOp memrefDefOp, unsigned index,
+    ///                              Region *region)
+    /// ```
+    operand_range getDynamicSizes() {
+      return {sizes().begin(), sizes().end()};
+    }
+  }];
+
+  let hasCanonicalizer = 1;
+}
+
+#endif // MEMREF_OPS
diff --git a/mlir/include/mlir/Dialect/SCF/Passes.td b/mlir/include/mlir/Dialect/SCF/Passes.td
--- a/mlir/include/mlir/Dialect/SCF/Passes.td
+++ b/mlir/include/mlir/Dialect/SCF/Passes.td
@@ -14,6 +14,7 @@
 def SCFBufferize : FunctionPass<"scf-bufferize"> {
   let summary = "Bufferize the scf dialect.";
   let constructor = "mlir::createSCFBufferizePass()";
+  let dependentDialects = ["memref::MemRefDialect"];
 }
 
 def SCFForLoopSpecialization
diff --git a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td
--- a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td
@@ -25,5 +25,6 @@
 def ShapeBufferize : FunctionPass<"shape-bufferize"> {
   let summary = "Bufferize the shape dialect.";
   let constructor = "mlir::createShapeBufferizePass()";
+  let dependentDialects = ["memref::MemRefDialect"];
 }
 #endif // MLIR_DIALECT_SHAPE_TRANSFORMS_PASSES
diff --git a/mlir/include/mlir/Dialect/StandardOps/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/StandardOps/EDSC/Intrinsics.h
--- a/mlir/include/mlir/Dialect/StandardOps/EDSC/Intrinsics.h
+++ b/mlir/include/mlir/Dialect/StandardOps/EDSC/Intrinsics.h
@@ -17,35 +17,24 @@
 
 using std_addi = ValueBuilder<AddIOp>;
 using std_addf = ValueBuilder<AddFOp>;
-using std_alloc = ValueBuilder<AllocOp>;
-using std_alloca = ValueBuilder<AllocaOp>;
 using std_call = OperationBuilder<CallOp>;
 using std_constant = ValueBuilder<ConstantOp>;
 using std_constant_float = ValueBuilder<ConstantFloatOp>;
 using std_constant_index = ValueBuilder<ConstantIndexOp>;
 using std_constant_int = ValueBuilder<ConstantIntOp>;
-using std_dealloc = OperationBuilder<DeallocOp>;
 using std_divis = ValueBuilder<SignedDivIOp>;
 using std_diviu = ValueBuilder<UnsignedDivIOp>;
-using std_dim = ValueBuilder<DimOp>;
 using std_fpext = ValueBuilder<FPExtOp>;
 using std_fptrunc = ValueBuilder<FPTruncOp>;
 using std_index_cast = ValueBuilder<IndexCastOp>;
 using std_muli = ValueBuilder<MulIOp>;
 using std_mulf = ValueBuilder<MulFOp>;
-using std_memref_cast = ValueBuilder<MemRefCastOp>;
 using std_ret = OperationBuilder<ReturnOp>;
 using std_select = ValueBuilder<SelectOp>;
-using std_load = ValueBuilder<LoadOp>;
 using std_sign_extendi = ValueBuilder<SignExtendIOp>;
 using std_splat = ValueBuilder<SplatOp>;
-using std_store = OperationBuilder<StoreOp>;
 using std_subf = ValueBuilder<SubFOp>;
 using std_subi = ValueBuilder<SubIOp>;
-using std_sub_view = ValueBuilder<SubViewOp>;
-using std_tensor_load = ValueBuilder<TensorLoadOp>;
-using std_tensor_store = OperationBuilder<TensorStoreOp>;
-using std_view = ValueBuilder<ViewOp>;
 using std_zero_extendi = ValueBuilder<ZeroExtendIOp>;
 using tensor_extract = ValueBuilder<tensor::ExtractOp>;
 
@@ -77,10 +66,6 @@
 /// or to `falseBranch` and `falseOperand` if `cond` evaluates to `false`.
 CondBranchOp std_cond_br(Value cond, Block *trueBranch, ValueRange trueOperands,
                          Block *falseBranch, ValueRange falseOperands);
-
-/// Provide an index notation around sdt_load and std_store.
-using StdIndexedValue =
-    TemplatedIndexedValue<intrinsics::std_load, intrinsics::std_store>;
 } // namespace intrinsics
 } // namespace edsc
 } // namespace mlir
diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h
@@ -34,8 +34,6 @@
 class FuncOp;
 class OpBuilder;
 
-raw_ostream &operator<<(raw_ostream &os, Range &range);
-
 /// Return the list of Range (i.e. offset, size, stride). Each Range
 /// entry contains either the dynamic value or a ConstantIndexOp constructed
 /// with `b` at location `loc`.
@@ -110,200 +108,6 @@
   static bool classof(Operation *op);
 };
 
-// DmaStartOp starts a non-blocking DMA operation that transfers data from a
-// source memref to a destination memref. The source and destination memref need
-// not be of the same dimensionality, but need to have the same elemental type.
-// The operands include the source and destination memref's each followed by its
-// indices, size of the data transfer in terms of the number of elements (of the
-// elemental type of the memref), a tag memref with its indices, and optionally
-// at the end, a stride and a number_of_elements_per_stride arguments. The tag
-// location is used by a DmaWaitOp to check for completion. The indices of the
-// source memref, destination memref, and the tag memref have the same
-// restrictions as any load/store. The optional stride arguments should be of
-// 'index' type, and specify a stride for the slower memory space (memory space
-// with a lower memory space id), transferring chunks of
-// number_of_elements_per_stride every stride until %num_elements are
-// transferred. Either both or no stride arguments should be specified.
-//
-// For example, a DmaStartOp operation that transfers 256 elements of a memref
-// '%src' in memory space 0 at indices [%i, %j] to memref '%dst' in memory space
-// 1 at indices [%k, %l], would be specified as follows:
-//
-//   %num_elements = constant 256
-//   %idx = constant 0 : index
-//   %tag = alloc() : memref<1 x i32, (d0) -> (d0), 4>
-//   dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%idx] :
-//     memref<40 x 128 x f32>, (d0) -> (d0), 0>,
-//     memref<2 x 1024 x f32>, (d0) -> (d0), 1>,
-//     memref<1 x i32>, (d0) -> (d0), 2>
-//
-//   If %stride and %num_elt_per_stride are specified, the DMA is expected to
-//   transfer %num_elt_per_stride elements every %stride elements apart from
-//   memory space 0 until %num_elements are transferred.
-//
-//   dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%idx], %stride,
-//             %num_elt_per_stride :
-//
-// TODO: add additional operands to allow source and destination striding, and
-// multiple stride levels.
-// TODO: Consider replacing src/dst memref indices with view memrefs.
-class DmaStartOp
-    : public Op<DmaStartOp, OpTrait::VariadicOperands, OpTrait::ZeroResult> {
-public:
-  using Op::Op;
-
-  static void build(OpBuilder &builder, OperationState &result, Value srcMemRef,
-                    ValueRange srcIndices, Value destMemRef,
-                    ValueRange destIndices, Value numElements, Value tagMemRef,
-                    ValueRange tagIndices, Value stride = nullptr,
-                    Value elementsPerStride = nullptr);
-
-  // Returns the source MemRefType for this DMA operation.
-  Value getSrcMemRef() { return getOperand(0); }
-  // Returns the rank (number of indices) of the source MemRefType.
-  unsigned getSrcMemRefRank() {
-    return getSrcMemRef().getType().cast<MemRefType>().getRank();
-  }
-  // Returns the source memref indices for this DMA operation.
-  operand_range getSrcIndices() {
-    return {(*this)->operand_begin() + 1,
-            (*this)->operand_begin() + 1 + getSrcMemRefRank()};
-  }
-
-  // Returns the destination MemRefType for this DMA operations.
-  Value getDstMemRef() { return getOperand(1 + getSrcMemRefRank()); }
-  // Returns the rank (number of indices) of the destination MemRefType.
-  unsigned getDstMemRefRank() {
-    return getDstMemRef().getType().cast<MemRefType>().getRank();
-  }
-  unsigned getSrcMemorySpace() {
-    return getSrcMemRef().getType().cast<MemRefType>().getMemorySpaceAsInt();
-  }
-  unsigned getDstMemorySpace() {
-    return getDstMemRef().getType().cast<MemRefType>().getMemorySpaceAsInt();
-  }
-
-  // Returns the destination memref indices for this DMA operation.
-  operand_range getDstIndices() {
-    return {(*this)->operand_begin() + 1 + getSrcMemRefRank() + 1,
-            (*this)->operand_begin() + 1 + getSrcMemRefRank() + 1 +
-                getDstMemRefRank()};
-  }
-
-  // Returns the number of elements being transferred by this DMA operation.
-  Value getNumElements() {
-    return getOperand(1 + getSrcMemRefRank() + 1 + getDstMemRefRank());
-  }
-
-  // Returns the Tag MemRef for this DMA operation.
-  Value getTagMemRef() {
-    return getOperand(1 + getSrcMemRefRank() + 1 + getDstMemRefRank() + 1);
-  }
-  // Returns the rank (number of indices) of the tag MemRefType.
-  unsigned getTagMemRefRank() {
-    return getTagMemRef().getType().cast<MemRefType>().getRank();
-  }
-
-  // Returns the tag memref index for this DMA operation.
-  operand_range getTagIndices() {
-    unsigned tagIndexStartPos =
-        1 + getSrcMemRefRank() + 1 + getDstMemRefRank() + 1 + 1;
-    return {(*this)->operand_begin() + tagIndexStartPos,
-            (*this)->operand_begin() + tagIndexStartPos + getTagMemRefRank()};
-  }
-
-  /// Returns true if this is a DMA from a faster memory space to a slower one.
-  bool isDestMemorySpaceFaster() {
-    return (getSrcMemorySpace() < getDstMemorySpace());
-  }
-
-  /// Returns true if this is a DMA from a slower memory space to a faster one.
-  bool isSrcMemorySpaceFaster() {
-    // Assumes that a lower number is for a slower memory space.
-    return (getDstMemorySpace() < getSrcMemorySpace());
-  }
-
-  /// Given a DMA start operation, returns the operand position of either the
-  /// source or destination memref depending on the one that is at the higher
-  /// level of the memory hierarchy. Asserts failure if neither is true.
-  unsigned getFasterMemPos() {
-    assert(isSrcMemorySpaceFaster() || isDestMemorySpaceFaster());
-    return isSrcMemorySpaceFaster() ? 0 : getSrcMemRefRank() + 1;
-  }
-
-  static StringRef getOperationName() { return "std.dma_start"; }
-  static ParseResult parse(OpAsmParser &parser, OperationState &result);
-  void print(OpAsmPrinter &p);
-  LogicalResult verify();
-
-  LogicalResult fold(ArrayRef<Attribute> cstOperands,
-                     SmallVectorImpl<OpFoldResult> &results);
-
-  bool isStrided() {
-    return getNumOperands() != 1 + getSrcMemRefRank() + 1 + getDstMemRefRank() +
-                                   1 + 1 + getTagMemRefRank();
-  }
-
-  Value getStride() {
-    if (!isStrided())
-      return nullptr;
-    return getOperand(getNumOperands() - 1 - 1);
-  }
-
-  Value getNumElementsPerStride() {
-    if (!isStrided())
-      return nullptr;
-    return getOperand(getNumOperands() - 1);
-  }
-};
-
-// DmaWaitOp blocks until the completion of a DMA operation associated with the
-// tag element '%tag[%index]'. %tag is a memref, and %index has to be an index
-// with the same restrictions as any load/store index. %num_elements is the
-// number of elements associated with the DMA operation. For example:
-//
-//   dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%index] :
-//     memref<2048 x f32>, (d0) -> (d0), 0>,
-//     memref<256 x f32>, (d0) -> (d0), 1>
-//     memref<1 x i32>, (d0) -> (d0), 2>
-//   ...
-//   ...
-//   dma_wait %tag[%index], %num_elements : memref<1 x i32, (d0) -> (d0), 2>
-//
-class DmaWaitOp
-    : public Op<DmaWaitOp, OpTrait::VariadicOperands, OpTrait::ZeroResult> {
-public:
-  using Op::Op;
-
-  static void build(OpBuilder &builder, OperationState &result, Value tagMemRef,
-                    ValueRange tagIndices, Value numElements);
-
-  static StringRef getOperationName() { return "std.dma_wait"; }
-
-  // Returns the Tag MemRef associated with the DMA operation being waited on.
-  Value getTagMemRef() { return getOperand(0); }
-
-  // Returns the tag memref index for this DMA operation.
-  operand_range getTagIndices() {
-    return {(*this)->operand_begin() + 1,
-            (*this)->operand_begin() + 1 + getTagMemRefRank()};
-  }
-
-  // Returns the rank (number of indices) of the tag memref.
-  unsigned getTagMemRefRank() {
-    return getTagMemRef().getType().cast<MemRefType>().getRank();
-  }
-
-  // Returns the number of elements transferred in the associated DMA operation.
-  Value getNumElements() { return getOperand(1 + getTagMemRefRank()); }
-
-  static ParseResult parse(OpAsmParser &parser, OperationState &result);
-  void print(OpAsmPrinter &p);
-  LogicalResult fold(ArrayRef<Attribute> cstOperands,
-                     SmallVectorImpl<OpFoldResult> &results);
-  LogicalResult verify();
-};
-
 /// Given an `originalShape` and a `reducedShape` assumed to be a subset of
 /// `originalShape` with some `1` entries erased, return the set of indices
 /// that specifies which of the entries of `originalShape` are dropped to obtain
@@ -316,45 +120,6 @@
 computeRankReductionMask(ArrayRef<int64_t> originalShape,
                          ArrayRef<int64_t> reducedShape);
 
-/// Determines whether MemRefCastOp casts to a more dynamic version of the
-/// source memref. This is useful to to fold a memref_cast into a consuming op
-/// and implement canonicalization patterns for ops in different dialects that
-/// may consume the results of memref_cast operations. Such foldable memref_cast
-/// operations are typically inserted as `view` and `subview` ops and are
-/// canonicalized, to preserve the type compatibility of their uses.
-///
-/// Returns true when all conditions are met:
-/// 1. source and result are ranked memrefs with strided semantics and same
-/// element type and rank.
-/// 2. each of the source's size, offset or stride has more static information
-/// than the corresponding result's size, offset or stride.
-///
-/// Example 1:
-/// ```mlir
-///   %1 = memref_cast %0 : memref<8x16xf32> to memref<?x?xf32>
-///   %2 = consumer %1 ... : memref<?x?xf32> ...
-/// ```
-///
-/// may fold into:
-///
-/// ```mlir
-///   %2 = consumer %0 ... : memref<8x16xf32> ...
-/// ```
-///
-/// Example 2:
-/// ```
-///   %1 = memref_cast %0 : memref<?x16xf32, affine_map<(i, j)->(16 * i + j)>>
-///          to memref<?x?xf32>
-///   consumer %1 : memref<?x?xf32> ...
-/// ```
-///
-/// may fold into:
-///
-/// ```
-///   consumer %0 ... : memref<?x16xf32, affine_map<(i, j)->(16 * i + j)>>
-/// ```
-bool canFoldIntoConsumerOp(MemRefCastOp castOp);
-
 /// Compute `lhs` `pred` `rhs`, where `pred` is one of the known integer
 /// comparison predicates.
 bool applyCmpPredicate(CmpIPredicate predicate, const APInt &lhs,
diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@@ -43,13 +43,13 @@
   let parser = [{ return ::parse$cppClass(parser, result); }];
 }
 
-// Base class for standard cast operations. Requires single operand and result,
-// but does not constrain them to specific types.
-class CastOp<string mnemonic, list<OpTrait> traits = []> :
-    Std_Op<mnemonic, traits # [
+// Base class for arithmetic cast operations. Requires single operand and
+// result, but does not constrain them to specific types.
+class ArithmeticCastOp<string mnemonic, list<OpTrait> traits = []> :
+    Std_Op<mnemonic, traits # ElementwiseMappable.traits # [
+      DeclareOpInterfaceMethods<VectorUnrollOpInterface>,
       NoSideEffect, SameOperandsAndResultShape,
-      DeclareOpInterfaceMethods<CastOpInterface>
-    ]> {
+      DeclareOpInterfaceMethods<CastOpInterface>]> {
   let results = (outs AnyType);
 
   let builders = [
@@ -69,13 +69,6 @@
   let verifier = ?;
 }
 
-// Base class for arithmetic cast operations.
-class ArithmeticCastOp<string mnemonic, list<OpTrait> traits = []> :
-    CastOp<mnemonic, traits # [
-    DeclareOpInterfaceMethods<VectorUnrollOpInterface>] #
-    ElementwiseMappable.traits> {
-}
-
 // Base class for unary ops. Requires single operand and result. Individual
 // classes will have `operand` accessor.
 class UnaryOp<string mnemonic, list<OpTrait> traits = []> :
@@ -189,64 +182,6 @@
                   [DeclareOpInterfaceMethods<VectorUnrollOpInterface>])>,
     Arguments<(ins FloatLike:$a, FloatLike:$b, FloatLike:$c)>;
 
-// Base class for memref allocating ops: alloca and alloc.
-//
-//   %0 = alloclike(%m)[%s] : memref<8x?xf32, (d0, d1)[s0] -> ((d0 + s0), d1)>
-//
-class AllocLikeOp<string mnemonic,
-                  Resource resource,
-                  list<OpTrait> traits = []> :
-    Std_Op<mnemonic,
-    !listconcat([
-      AttrSizedOperandSegments
-    ], traits)> {
-
-  let arguments = (ins Variadic<Index>:$dynamicSizes,
-                   // The symbolic operands (the ones in square brackets) bind
-                   // to the symbols of the memref's layout map.
-                   Variadic<Index>:$symbolOperands,
-                   Confined<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$alignment);
-  let results = (outs Res<AnyMemRef, "", [MemAlloc<resource>]>:$memref);
-
-  let builders = [
-    OpBuilder<(ins "MemRefType":$memrefType,
-                  CArg<"IntegerAttr", "IntegerAttr()">:$alignment), [{
-      return build($_builder, $_state, memrefType, {}, alignment);
-    }]>,
-    OpBuilder<(ins "MemRefType":$memrefType, "ValueRange":$dynamicSizes,
-                  CArg<"IntegerAttr", "IntegerAttr()">:$alignment), [{
-      return build($_builder, $_state, memrefType, dynamicSizes, {}, alignment);
-    }]>,
-    OpBuilder<(ins "MemRefType":$memrefType, "ValueRange":$dynamicSizes,
-                  "ValueRange":$symbolOperands,
-                  CArg<"IntegerAttr", "{}">:$alignment), [{
-      $_state.types.push_back(memrefType);
-      $_state.addOperands(dynamicSizes);
-      $_state.addOperands(symbolOperands);
-      $_state.addAttribute(getOperandSegmentSizeAttr(),
-          $_builder.getI32VectorAttr({
-              static_cast<int32_t>(dynamicSizes.size()),
-              static_cast<int32_t>(symbolOperands.size())}));
-      if (alignment)
-        $_state.addAttribute(getAlignmentAttrName(), alignment);
-    }]>];
-
-  let extraClassDeclaration = [{
-    static StringRef getAlignmentAttrName() { return "alignment"; }
-
-    MemRefType getType() { return getResult().getType().cast<MemRefType>(); }
-
-    /// Returns the dynamic sizes for this alloc operation if specified.
-    operand_range getDynamicSizes() { return dynamicSizes(); }
-  }];
-
-  let assemblyFormat = [{
-    `(`$dynamicSizes`)` (`` `[` $symbolOperands^ `]`)? attr-dict `:` type($memref)
-  }];
-
-  let hasCanonicalizer = 1;
-}
-
 // Base class for ops with static/dynamic offset, sizes and strides
 // attributes/arguments.
 class BaseOpWithOffsetSizesAndStrides<string mnemonic, list<OpTrait> traits = []> :
@@ -362,96 +297,6 @@
   let hasFolder = 1;
 }
 
-//===----------------------------------------------------------------------===//
-// AllocOp
-//===----------------------------------------------------------------------===//
-
-def AllocOp : AllocLikeOp<"alloc", DefaultResource> {
-  let summary = "memory allocation operation";
-  let description = [{
-    The `alloc` operation allocates a region of memory, as specified by its
-    memref type.
-
-    Example:
-
-    ```mlir
-    %0 = alloc() : memref<8x64xf32, 1>
-    ```
-
-    The optional list of dimension operands are bound to the dynamic dimensions
-    specified in its memref type. In the example below, the ssa value '%d' is
-    bound to the second dimension of the memref (which is dynamic).
-
-    ```mlir
-    %0 = alloc(%d) : memref<8x?xf32, 1>
-    ```
-
-    The optional list of symbol operands are bound to the symbols of the
-    memrefs affine map. In the example below, the ssa value '%s' is bound to
-    the symbol 's0' in the affine map specified in the allocs memref type.
-
-    ```mlir
-    %0 = alloc()[%s] : memref<8x64xf32,
-                              affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1>
-    ```
-
-    This operation returns a single ssa value of memref type, which can be used
-    by subsequent load and store operations.
-
-    The optional `alignment` attribute may be specified to ensure that the
-    region of memory that will be indexed is aligned at the specified byte
-    boundary.
-
-    ```mlir
-    %0 = alloc()[%s] {alignment = 8} :
-      memref<8x64xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1>
-    ```
-  }];
-}
-
-//===----------------------------------------------------------------------===//
-// AllocaOp
-//===----------------------------------------------------------------------===//
-
-def AllocaOp : AllocLikeOp<"alloca", AutomaticAllocationScopeResource> {
-  let summary = "stack memory allocation operation";
-  let description = [{
-    The `alloca` operation allocates memory on the stack, to be automatically
-    released when control transfers back from the region of its closest
-    surrounding operation with an
-    [`AutomaticAllocationScope`](../Traits.md#automaticallocationscope) trait.
-    The amount of memory allocated is specified by its memref and additional
-    operands. For example:
-
-    ```mlir
-    %0 = alloca() : memref<8x64xf32>
-    ```
-
-    The optional list of dimension operands are bound to the dynamic dimensions
-    specified in its memref type. In the example below, the SSA value '%d' is
-    bound to the second dimension of the memref (which is dynamic).
-
-    ```mlir
-    %0 = alloca(%d) : memref<8x?xf32>
-    ```
-
-    The optional list of symbol operands are bound to the symbols of the
-    memref's affine map. In the example below, the SSA value '%s' is bound to
-    the symbol 's0' in the affine map specified in the allocs memref type.
-
-    ```mlir
-    %0 = alloca()[%s] : memref<8x64xf32,
-                               affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>>
-    ```
-
-    This operation returns a single SSA value of memref type, which can be used
-    by subsequent load and store operations. An optional alignment attribute, if
-    specified, guarantees alignment at least to that boundary. If not specified,
-    an alignment on any convenient boundary compatible with the type will be
-    chosen.
-  }];
-}
-
 //===----------------------------------------------------------------------===//
 // AndOp
 //===----------------------------------------------------------------------===//
@@ -515,28 +360,6 @@
   let hasCanonicalizer = 1;
 }
 
-//===----------------------------------------------------------------------===//
-// AssumeAlignmentOp
-//===----------------------------------------------------------------------===//
-
-def AssumeAlignmentOp : Std_Op<"assume_alignment"> {
-  let summary =
-      "assertion that gives alignment information to the input memref";
-  let description = [{
-    The `assume_alignment` operation takes a memref and an integer of alignment
-    value, and internally annotates the buffer with the given alignment. If
-    the buffer isn't aligned to the given alignment, the behavior is undefined.
-
-    This operation doesn't affect the semantics of a correct program. It's for
-    optimization only, and the optimization is best-effort.
-  }];
-  let arguments = (ins AnyMemRef:$memref,
-                       Confined<I32Attr, [IntPositive]>:$alignment);
-  let results = (outs);
-
-  let assemblyFormat = "$memref `,` $alignment attr-dict `:` type($memref)";
-}
-
 //===----------------------------------------------------------------------===//
 // AtomicRMWOp
 //===----------------------------------------------------------------------===//
@@ -1333,87 +1156,6 @@
   }];
 }
 
-//===----------------------------------------------------------------------===//
-// DeallocOp
-//===----------------------------------------------------------------------===//
-
-def DeallocOp : Std_Op<"dealloc", [MemRefsNormalizable]> {
-  let summary = "memory deallocation operation";
-  let description = [{
-    The `dealloc` operation frees the region of memory referenced by a memref
-    which was originally created by the `alloc` operation.
-    The `dealloc` operation should not be called on memrefs which alias an
-    alloc'd memref (e.g. memrefs returned by `view` operations).
-
-    Example:
-
-    ```mlir
-    %0 = alloc() : memref<8x64xf32, (d0, d1) -> (d0, d1), 1>
-    dealloc %0 : memref<8x64xf32, (d0, d1) -> (d0, d1), 1>
-    ```
-  }];
-
-  let arguments = (ins Arg<AnyMemRef, "", [MemFree]>:$memref);
-
-  let hasCanonicalizer = 1;
-  let hasFolder = 1;
-  let assemblyFormat = "$memref attr-dict `:` type($memref)";
-}
-
-//===----------------------------------------------------------------------===//
-// DimOp
-//===----------------------------------------------------------------------===//
-
-def DimOp : Std_Op<"dim", [NoSideEffect]> {
-  let summary = "dimension index operation";
-  let description = [{
-    The `dim` operation takes a memref/tensor and a dimension operand of type
-    `index`.
-    It returns the size of the requested dimension of the given memref/tensor.
-    If the dimension index is out of bounds the behavior is undefined.
-
-    The specified memref or tensor type is that of the first operand.
-
-    Example:
-
-    ```mlir
-    // Always returns 4, can be constant folded:
-    %c0 = constant 0 : index
-    %x = = dim %A, %c0 : tensor<4 x ? x f32>
-
-    // Returns the dynamic dimension of %A.
-    %c1 = constant 1 : index
-    %y = dim %A, %c1 : tensor<4 x ? x f32>
-
-    // Equivalent generic form:
-    %x = "std.dim"(%A, %c0) : (tensor<4 x ? x f32>, index) -> index
-    %y = "std.dim"(%A, %c1) : (tensor<4 x ? x f32>, index) -> index
-    ```
-  }];
-
-  let arguments = (ins AnyTypeOf<[AnyRankedOrUnrankedMemRef, AnyTensor],
-                                 "any tensor or memref type">:$memrefOrTensor,
-                       Index:$index);
-  let results = (outs Index:$result);
-
-  let assemblyFormat = [{
-    attr-dict $memrefOrTensor `,` $index `:` type($memrefOrTensor)
-  }];
-
-  let builders = [
-    OpBuilder<(ins "Value":$memrefOrTensor, "int64_t":$index)>,
-    OpBuilder<(ins "Value":$memrefOrTensor, "Value":$index)>
-  ];
-
-  let extraClassDeclaration = [{
-    /// Helper function to get the index as a simple integer if it is constant.
-    Optional<int64_t> getConstantIndex();
-  }];
-
-  let hasCanonicalizer = 1;
-  let hasFolder = 1;
-}
-
 //===----------------------------------------------------------------------===//
 // DivFOp
 //===----------------------------------------------------------------------===//
@@ -1512,98 +1254,6 @@
   }];
 }
 
-//===----------------------------------------------------------------------===//
-// GlobalMemrefOp
-//===----------------------------------------------------------------------===//
-
-def GlobalMemrefOp : Std_Op<"global_memref", [Symbol]> {
-  let summary = "declare or define a global memref variable";
-  let description = [{
-    The `global_memref` operation declares or defines a named global variable.
-    The backing memory for the variable is allocated statically and is described
-    by the type of the variable (which should be a statically shaped memref
-    type). The operation is a declaration if no `inital_value` is specified,
-    else it is a definition. The `initial_value` can either be a unit attribute
-    to represent a definition of an uninitialized global variable, or an
-    elements attribute to represent the definition of a global variable with an
-    initial value. The global variable can also be marked constant using the
-    `constant` unit attribute. Writing to such constant global variables is
-    undefined.
-
-    The global variable can be accessed by using the `get_global_memref` to
-    retrieve the memref for the global variable. Note that the memref
-    for such global variable itself is immutable (i.e., get_global_memref for a
-    given global variable will always return the same memref descriptor).
-
-    Example:
-
-    ```mlir
-    // Private variable with an initial value.
-    global_memref "private" @x : memref<2xf32> = dense<0.0,2.0>
-
-    // Declaration of an external variable.
-    global_memref "private" @y : memref<4xi32>
-
-    // Uninitialized externally visible variable.
-    global_memref @z : memref<3xf16> = uninitialized
-
-    // Externally visible constant variable.
-    global_memref constant @c : memref<2xi32> = dense<1, 4>
-    ```
-  }];
-
-  let arguments = (ins
-      SymbolNameAttr:$sym_name,
-      OptionalAttr<StrAttr>:$sym_visibility,
-      TypeAttr:$type,
-      OptionalAttr<AnyAttr>:$initial_value,
-      UnitAttr:$constant
-  );
-
-  let assemblyFormat = [{
-       ($sym_visibility^)?
-       (`constant` $constant^)?
-       $sym_name `:`
-       custom<GlobalMemrefOpTypeAndInitialValue>($type, $initial_value)
-       attr-dict
-  }];
-
-  let extraClassDeclaration = [{
-     bool isExternal() { return !initial_value(); }
-     bool isUninitialized() {
-       return !isExternal() && initial_value().getValue().isa<UnitAttr>();
-     }
-  }];
-}
-
-//===----------------------------------------------------------------------===//
-// GetGlobalMemrefOp
-//===----------------------------------------------------------------------===//
-
-def GetGlobalMemrefOp : Std_Op<"get_global_memref",
-    [NoSideEffect, DeclareOpInterfaceMethods<SymbolUserOpInterface>]> {
-  let summary = "get the memref pointing to a global variable";
-  let description = [{
-     The `get_global_memref` operation retrieves the memref pointing to a
-     named global variable. If the global variable is marked constant, writing
-     to the result memref (such as through a `std.store` operation) is
-     undefined.
-
-     Example:
-
-     ```mlir
-     %x = get_global_memref @foo : memref<2xf32>
-     ```
-  }];
-
-  let arguments = (ins FlatSymbolRefAttr:$name);
-  let results = (outs AnyStaticShapeMemRef:$result);
-  let assemblyFormat = "$name `:` type($result) attr-dict";
-
-  // `GetGlobalMemrefOp` is fully verified by its traits.
-  let verifier = ?;
-}
-
 //===----------------------------------------------------------------------===//
 // IndexCastOp
 //===----------------------------------------------------------------------===//
@@ -1620,470 +1270,126 @@
 }
 
 //===----------------------------------------------------------------------===//
-// LoadOp
+// MulFOp
 //===----------------------------------------------------------------------===//
 
-def LoadOp : Std_Op<"load",
-     [TypesMatchWith<"result type matches element type of 'memref'",
-                     "memref", "result",
-                     "$_self.cast<MemRefType>().getElementType()">,
-                     MemRefsNormalizable]> {
-  let summary = "load operation";
+def MulFOp : FloatBinaryOp<"mulf"> {
+  let summary = "floating point multiplication operation";
   let description = [{
-    The `load` op reads an element from a memref specified by an index list. The
-    output of load is a new value with the same type as the elements of the
-    memref. The arity of indices is the rank of the memref (i.e., if the memref
-    loaded from is of rank 3, then 3 indices are required for the load following
-    the memref identifier).
-
-    In an `affine.if` or `affine.for` body, the indices of a load are restricted
-    to SSA values bound to surrounding loop induction variables,
-    [symbols](Affine.md#dimensions-and-symbols), results of a
-    [`constant` operation](#stdconstant-constantop), or the result of an
-    `affine.apply` operation that can in turn take as arguments all of the
-    aforementioned SSA values or the recursively result of such an
-    `affine.apply` operation.
+    Syntax:
+
+    ```
+    operation ::= ssa-id `=` `std.mulf` ssa-use `,` ssa-use `:` type
+    ```
+
+    The `mulf` operation takes two operands and returns one result, each of
+    these is required to be the same type. This type may be a floating point
+    scalar type, a vector whose element type is a floating point type, or a
+    floating point tensor.
 
     Example:
 
     ```mlir
-    %1 = affine.apply affine_map<(d0, d1) -> (3*d0)> (%i, %j)
-    %2 = affine.apply affine_map<(d0, d1) -> (d1+1)> (%i, %j)
-    %12 = load %A[%1, %2] : memref<8x?xi32, #layout, memspace0>
+    // Scalar multiplication.
+    %a = mulf %b, %c : f64
+
+    // SIMD pointwise vector multiplication, e.g. for Intel SSE.
+    %f = mulf %g, %h : vector<4xf32>
 
-    // Example of an indirect load (treated as non-affine)
-    %3 = affine.apply affine_map<(d0) -> (2*d0 + 1)>(%12)
-    %13 = load %A[%3, %2] : memref<4x?xi32, #layout, memspace0>
+    // Tensor pointwise multiplication.
+    %x = mulf %y, %z : tensor<4x?xbf16>
     ```
 
-    **Context:** The `load` and `store` operations are specifically crafted to
-    fully resolve a reference to an element of a memref, and (in affine
-    `affine.if` and `affine.for` operations) the compiler can follow use-def
-    chains (e.g. through [`affine.apply`](Affine.md#affineapply-affineapplyop)
-    operations) to precisely analyze references at compile-time using polyhedral
-    techniques. This is possible because of the
-    [restrictions on dimensions and symbols](Affine.md#restrictions-on-dimensions-and-symbols)
-    in these contexts.
+    TODO: In the distant future, this will accept optional attributes for fast
+    math, contraction, rounding mode, and other controls.
   }];
+  let hasFolder = 1;
+}
 
-  let arguments = (ins Arg<AnyMemRef, "the reference to load from",
-                           [MemRead]>:$memref,
-                       Variadic<Index>:$indices);
-  let results = (outs AnyType:$result);
-
-  let builders = [
-    OpBuilder<(ins "Value":$memref, CArg<"ValueRange", "{}">:$indices), [{
-      auto memrefType = memref.getType().cast<MemRefType>();
-      $_state.addOperands(memref);
-      $_state.addOperands(indices);
-      $_state.types.push_back(memrefType.getElementType());
-    }]>];
-
-  let extraClassDeclaration = [{
-    Value getMemRef() { return getOperand(0); }
-    void setMemRef(Value value) { setOperand(0, value); }
-    MemRefType getMemRefType() {
-      return getMemRef().getType().cast<MemRefType>();
-    }
-
-    operand_range getIndices() { return {operand_begin() + 1, operand_end()}; }
-  }];
+//===----------------------------------------------------------------------===//
+// MulIOp
+//===----------------------------------------------------------------------===//
 
-  let hasCanonicalizer = 1;
+def MulIOp : IntBinaryOp<"muli", [Commutative]> {
+  let summary = "integer multiplication operation";
   let hasFolder = 1;
-
-  let assemblyFormat = "$memref `[` $indices `]` attr-dict `:` type($memref)";
 }
 
 //===----------------------------------------------------------------------===//
-// MemRefCastOp
+// NegFOp
 //===----------------------------------------------------------------------===//
 
-def MemRefCastOp : CastOp<"memref_cast", [
-    DeclareOpInterfaceMethods<ViewLikeOpInterface>
-  ]> {
-  let summary = "memref cast operation";
+def NegFOp : FloatUnaryOp<"negf"> {
+  let summary = "floating point negation";
   let description = [{
     Syntax:
 
     ```
-    operation ::= ssa-id `=` `std.memref_cast` ssa-use `:` type `to` type
+    operation ::= ssa-id `=` `negf` ssa-use `:` type
     ```
 
-    The `memref_cast` operation converts a memref from one type to an equivalent
-    type with a compatible shape. The source and destination types are
-    compatible if:
-
-    a. Both are ranked memref types with the same element type, address space,
-    and rank and:
-      1. Both have the same layout or both have compatible strided layouts.
-      2. The individual sizes (resp. offset and strides in the case of strided
-         memrefs) may convert constant dimensions to dynamic dimensions and
-         vice-versa.
-
-    If the cast converts any dimensions from an unknown to a known size, then it
-    acts as an assertion that fails at runtime if the dynamic dimensions
-    disagree with resultant destination size.
+    The `negf` operation computes the negation of a given value. It takes one
+    operand and returns one result of the same type. This type may be a float
+    scalar type, a vector whose element type is float, or a tensor of floats.
+    It has no standard attributes.
 
     Example:
 
     ```mlir
-    // Assert that the input dynamic shape matches the destination static shape.
-    %2 = memref_cast %1 : memref<?x?xf32> to memref<4x4xf32>
-    // Erase static shape information, replacing it with dynamic information.
-    %3 = memref_cast %1 : memref<4xf32> to memref<?xf32>
+    // Scalar negation value.
+    %a = negf %b : f64
+
+    // SIMD vector element-wise negation value.
+    %f = negf %g : vector<4xf32>
+
+    // Tensor element-wise negation value.
+    %x = negf %y : tensor<4x?xf8>
+    ```
+  }];
+}
+
+//===----------------------------------------------------------------------===//
+// OrOp
+//===----------------------------------------------------------------------===//
 
-    // The same holds true for offsets and strides.
+def OrOp : IntBinaryOp<"or", [Commutative]> {
+  let summary = "integer binary or";
+  let description = [{
+    Syntax:
 
-    // Assert that the input dynamic shape matches the destination static stride.
-    %4 = memref_cast %1 : memref<12x4xf32, offset:?, strides: [?, ?]> to
-                          memref<12x4xf32, offset:5, strides: [4, 1]>
-    // Erase static offset and stride information, replacing it with
-    // dynamic information.
-    %5 = memref_cast %1 : memref<12x4xf32, offset:5, strides: [4, 1]> to
-                          memref<12x4xf32, offset:?, strides: [?, ?]>
+    ```
+    operation ::= ssa-id `=` `or` ssa-use `,` ssa-use `:` type
     ```
 
-    b. Either or both memref types are unranked with the same element type, and
-    address space.
+    The `or` operation takes two operands and returns one result, each of these
+    is required to be the same type. This type may be an integer scalar type, a
+    vector whose element type is integer, or a tensor of integers. It has no
+    standard attributes.
 
     Example:
 
     ```mlir
-    Cast to concrete shape.
-        %4 = memref_cast %1 : memref<*xf32> to memref<4x?xf32>
+    // Scalar integer bitwise or.
+    %a = or %b, %c : i64
+
+    // SIMD vector element-wise bitwise integer or.
+    %f = or %g, %h : vector<4xi32>
 
-    Erase rank information.
-        %5 = memref_cast %1 : memref<4x?xf32> to memref<*xf32>
+    // Tensor element-wise bitwise integer or.
+    %x = or %y, %z : tensor<4x?xi8>
     ```
   }];
-
-  let arguments = (ins AnyRankedOrUnrankedMemRef:$source);
-  let results = (outs AnyRankedOrUnrankedMemRef);
-
   let hasFolder = 1;
 }
 
-
 //===----------------------------------------------------------------------===//
-// MemRefReinterpretCastOp
+// RankOp
 //===----------------------------------------------------------------------===//
 
-def MemRefReinterpretCastOp:
-    BaseOpWithOffsetSizesAndStrides<"memref_reinterpret_cast", [
-      NoSideEffect, ViewLikeOpInterface, OffsetSizeAndStrideOpInterface
-    ]> {
-  let summary = "memref reinterpret cast operation";
+def RankOp : Std_Op<"rank", [NoSideEffect]> {
+  let summary = "rank operation";
   let description = [{
-    Modify offset, sizes and strides of an unranked/ranked memref.
-
-    Example:
-    ```mlir
-    memref_reinterpret_cast %ranked to
-      offset: [0],
-      sizes: [%size0, 10],
-      strides: [1, %stride1]
-    : memref<?x?xf32> to memref<?x10xf32, offset: 0, strides: [1, ?]>
-
-    memref_reinterpret_cast %unranked to
-      offset: [%offset],
-      sizes: [%size0, %size1],
-      strides: [%stride0, %stride1]
-    : memref<*xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
-    ```
-  }];
-
-  let arguments = (ins
-    Arg<AnyRankedOrUnrankedMemRef, "", []>:$source,
-    Variadic<Index>:$offsets,
-    Variadic<Index>:$sizes,
-    Variadic<Index>:$strides,
-    I64ArrayAttr:$static_offsets,
-    I64ArrayAttr:$static_sizes,
-    I64ArrayAttr:$static_strides
-  );
-  let results = (outs AnyMemRef:$result);
-
-  let assemblyFormat = [{
-    $source `to` `offset` `` `:`
-    custom<OperandsOrIntegersOffsetsOrStridesList>($offsets, $static_offsets)
-    `` `,` `sizes` `` `:`
-    custom<OperandsOrIntegersSizesList>($sizes, $static_sizes) `` `,` `strides`
-    `` `:`
-    custom<OperandsOrIntegersOffsetsOrStridesList>($strides, $static_strides)
-    attr-dict `:` type($source) `to` type($result)
-  }];
-
-  let parser=?;
-  let printer=?;
-
-  let builders = [
-    // Build a ReinterpretCastOp with mixed static and dynamic entries.
-    OpBuilder<(ins "MemRefType":$resultType, "Value":$source,
-      "OpFoldResult":$offset, "ArrayRef<OpFoldResult>":$sizes,
-      "ArrayRef<OpFoldResult>":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a ReinterpretCastOp with static entries.
-    OpBuilder<(ins "MemRefType":$resultType, "Value":$source,
-      "int64_t":$offset, "ArrayRef<int64_t>":$sizes,
-      "ArrayRef<int64_t>":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a ReinterpretCastOp with dynamic entries.
-    OpBuilder<(ins "MemRefType":$resultType, "Value":$source,
-      "Value":$offset, "ValueRange":$sizes,
-      "ValueRange":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
-  ];
-
-  let extraClassDeclaration = extraBaseClassDeclaration # [{
-    // The result of the op is always a ranked memref.
-    MemRefType getType() { return getResult().getType().cast<MemRefType>(); }
-    Value getViewSource() { return source(); }
-
-    /// Return the rank of the source ShapedType.
-    unsigned getResultRank() {
-      return getResult().getType().cast<ShapedType>().getRank();
-    }
-
-    /// Return the expected rank of each of the`static_offsets`, `static_sizes`
-    /// and `static_strides` attributes.
-    std::array<unsigned, 3> getArrayAttrMaxRanks() {
-      unsigned resultRank = getResult().getType().cast<ShapedType>().getRank();
-      return {1, resultRank, resultRank};
-    }
-
-    /// Return the number of leading operands before the `offsets`, `sizes` and
-    /// and `strides` operands.
-    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
-  }];
-}
-
-//===----------------------------------------------------------------------===//
-// MemRefReshapeOp
-//===----------------------------------------------------------------------===//
-
-def MemRefReshapeOp: Std_Op<"memref_reshape", [
-    ViewLikeOpInterface, NoSideEffect]>  {
-  let summary = "memref reshape operation";
-  let description = [{
-    The `memref_reshape` operation converts a memref from one type to an
-    equivalent type with a provided shape. The data is never copied or
-    modified.  The source and destination types are compatible if both have the
-    same element type, same number of elements, address space and identity
-    layout map. The following combinations are possible:
-
-    a. Source type is ranked or unranked. Shape argument has static size.
-    Result type is ranked.
-
-    ```mlir
-    // Reshape statically-shaped memref.
-    %dst = memref_reshape %src(%shape)
-             : (memref<4x1xf32>, memref<1xi32>) to memref<4xf32>
-    %dst0 = memref_reshape %src(%shape0)
-             : (memref<4x1xf32>, memref<2xi32>) to memref<2x2xf32>
-    // Flatten unranked memref.
-    %dst = memref_reshape %src(%shape)
-             : (memref<*xf32>, memref<1xi32>) to memref<?xf32>
-    ```
-
-    a. Source type is ranked or unranked. Shape argument has dynamic size.
-    Result type is unranked.
-
-    ```mlir
-    // Reshape dynamically-shaped 1D memref.
-    %dst = memref_reshape %src(%shape)
-             : (memref<?xf32>, memref<?xi32>) to memref<*xf32>
-    // Reshape unranked memref.
-    %dst = memref_reshape %src(%shape)
-             : (memref<*xf32>, memref<?xi32>) to memref<*xf32>
-    ```
-  }];
-
-  let arguments = (ins
-    AnyRankedOrUnrankedMemRef:$source,
-    MemRefRankOf<[AnySignlessInteger, Index], [1]>:$shape
-  );
-  let results = (outs AnyRankedOrUnrankedMemRef:$result);
-
-  let builders = [OpBuilder<
-     (ins "MemRefType":$resultType, "Value":$operand, "Value":$shape), [{
-       $_state.addOperands(operand);
-       $_state.addOperands(shape);
-       $_state.addTypes(resultType);
-     }]>];
-
-  let extraClassDeclaration = [{
-    MemRefType getType() { return getResult().getType().cast<MemRefType>(); }
-    Value getViewSource() { return source(); }
-  }];
-
-  let assemblyFormat = [{
-    $source `(` $shape `)` attr-dict `:` functional-type(operands, results)
-  }];
-}
-
-//===----------------------------------------------------------------------===//
-// MulFOp
-//===----------------------------------------------------------------------===//
-
-def MulFOp : FloatBinaryOp<"mulf"> {
-  let summary = "floating point multiplication operation";
-  let description = [{
-    Syntax:
-
-    ```
-    operation ::= ssa-id `=` `std.mulf` ssa-use `,` ssa-use `:` type
-    ```
-
-    The `mulf` operation takes two operands and returns one result, each of
-    these is required to be the same type. This type may be a floating point
-    scalar type, a vector whose element type is a floating point type, or a
-    floating point tensor.
-
-    Example:
-
-    ```mlir
-    // Scalar multiplication.
-    %a = mulf %b, %c : f64
-
-    // SIMD pointwise vector multiplication, e.g. for Intel SSE.
-    %f = mulf %g, %h : vector<4xf32>
-
-    // Tensor pointwise multiplication.
-    %x = mulf %y, %z : tensor<4x?xbf16>
-    ```
-
-    TODO: In the distant future, this will accept optional attributes for fast
-    math, contraction, rounding mode, and other controls.
-  }];
-  let hasFolder = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// MulIOp
-//===----------------------------------------------------------------------===//
-
-def MulIOp : IntBinaryOp<"muli", [Commutative]> {
-  let summary = "integer multiplication operation";
-  let hasFolder = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// NegFOp
-//===----------------------------------------------------------------------===//
-
-def NegFOp : FloatUnaryOp<"negf"> {
-  let summary = "floating point negation";
-  let description = [{
-    Syntax:
-
-    ```
-    operation ::= ssa-id `=` `negf` ssa-use `:` type
-    ```
-
-    The `negf` operation computes the negation of a given value. It takes one
-    operand and returns one result of the same type. This type may be a float
-    scalar type, a vector whose element type is float, or a tensor of floats.
-    It has no standard attributes.
-
-    Example:
-
-    ```mlir
-    // Scalar negation value.
-    %a = negf %b : f64
-
-    // SIMD vector element-wise negation value.
-    %f = negf %g : vector<4xf32>
-
-    // Tensor element-wise negation value.
-    %x = negf %y : tensor<4x?xf8>
-    ```
-  }];
-}
-
-//===----------------------------------------------------------------------===//
-// OrOp
-//===----------------------------------------------------------------------===//
-
-def OrOp : IntBinaryOp<"or", [Commutative]> {
-  let summary = "integer binary or";
-  let description = [{
-    Syntax:
-
-    ```
-    operation ::= ssa-id `=` `or` ssa-use `,` ssa-use `:` type
-    ```
-
-    The `or` operation takes two operands and returns one result, each of these
-    is required to be the same type. This type may be an integer scalar type, a
-    vector whose element type is integer, or a tensor of integers. It has no
-    standard attributes.
-
-    Example:
-
-    ```mlir
-    // Scalar integer bitwise or.
-    %a = or %b, %c : i64
-
-    // SIMD vector element-wise bitwise integer or.
-    %f = or %g, %h : vector<4xi32>
-
-    // Tensor element-wise bitwise integer or.
-    %x = or %y, %z : tensor<4x?xi8>
-    ```
-  }];
-  let hasFolder = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// PrefetchOp
-//===----------------------------------------------------------------------===//
-
-def PrefetchOp : Std_Op<"prefetch"> {
-  let summary = "prefetch operation";
-  let description = [{
-    The "prefetch" op prefetches data from a memref location described with
-    subscript indices similar to std.load, and with three attributes: a
-    read/write specifier, a locality hint, and a cache type specifier as shown
-    below:
-
-    ```mlir
-    prefetch %0[%i, %j], read, locality<3>, data : memref<400x400xi32>
-    ```
-
-    The read/write specifier is either 'read' or 'write', the locality hint
-    ranges from locality<0> (no locality) to locality<3> (extremely local keep
-    in cache). The cache type specifier is either 'data' or 'instr'
-    and specifies whether the prefetch is performed on data cache or on
-    instruction cache.
-  }];
-
-  let arguments = (ins AnyMemRef:$memref, Variadic<Index>:$indices,
-                   BoolAttr:$isWrite,
-                   Confined<I32Attr, [IntMinValue<0>,
-                     IntMaxValue<3>]>:$localityHint,
-                   BoolAttr:$isDataCache);
-
-  let extraClassDeclaration = [{
-    MemRefType getMemRefType() {
-      return memref().getType().cast<MemRefType>();
-    }
-    static StringRef getLocalityHintAttrName() { return "localityHint"; }
-    static StringRef getIsWriteAttrName() { return "isWrite"; }
-    static StringRef getIsDataCacheAttrName() { return "isDataCache"; }
-  }];
-
-  let hasFolder = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// RankOp
-//===----------------------------------------------------------------------===//
-
-def RankOp : Std_Op<"rank", [NoSideEffect]> {
-  let summary = "rank operation";
-  let description = [{
-    The `rank` operation takes a memref/tensor operand and returns its rank.
+    The `rank` operation takes a memref/tensor operand and returns its rank.
 
     Example:
 
@@ -2094,7 +1400,7 @@
   }];
 
   let arguments = (ins AnyTypeOf<[AnyRankedOrUnrankedMemRef, AnyTensor],
-                                 "any tensor or memref type">:$memrefOrTensor);
+                                 "any memref or tensor type">:$memrefOrTensor);
   let results = (outs Index);
   let verifier = ?;
 
@@ -2486,77 +1792,6 @@
   let assemblyFormat = "$input attr-dict `:` type($aggregate)";
 }
 
-//===----------------------------------------------------------------------===//
-// StoreOp
-//===----------------------------------------------------------------------===//
-
-def StoreOp : Std_Op<"store",
-     [TypesMatchWith<"type of 'value' matches element type of 'memref'",
-                     "memref", "value",
-                     "$_self.cast<MemRefType>().getElementType()">,
-                     MemRefsNormalizable]> {
-  let summary = "store operation";
-  let description = [{
-    Store a value to a memref location given by indices. The value stored should
-    have the same type as the elemental type of the memref. The number of
-    arguments provided within brackets need to match the rank of the memref.
-
-    In an affine context, the indices of a store are restricted to SSA values
-    bound to surrounding loop induction variables,
-    [symbols](Affine.md#restrictions-on-dimensions-and-symbols), results of a
-    [`constant` operation](#stdconstant-constantop), or the result of an
-    [`affine.apply`](Affine.md#affineapply-affineapplyop) operation that can in turn
-    take as arguments all of the aforementioned SSA values or the recursively
-    result of such an `affine.apply` operation.
-
-    Example:
-
-    ```mlir
-    store %100, %A[%1, 1023] : memref<4x?xf32, #layout, memspace0>
-    ```
-
-    **Context:** The `load` and `store` operations are specifically crafted to
-    fully resolve a reference to an element of a memref, and (in polyhedral
-    `affine.if` and `affine.for` operations) the compiler can follow use-def
-    chains (e.g. through [`affine.apply`](Affine.md#affineapply-affineapplyop)
-    operations) to precisely analyze references at compile-time using polyhedral
-    techniques. This is possible because of the
-    [restrictions on dimensions and symbols](Affine.md#restrictions-on-dimensions-and-symbols)
-    in these contexts.
-  }];
-
-  let arguments = (ins AnyType:$value,
-                       Arg<AnyMemRef, "the reference to store to",
-                           [MemWrite]>:$memref,
-                       Variadic<Index>:$indices);
-
-  let builders = [
-    OpBuilder<(ins "Value":$valueToStore, "Value":$memref), [{
-      $_state.addOperands(valueToStore);
-      $_state.addOperands(memref);
-    }]>];
-
-  let extraClassDeclaration = [{
-      Value getValueToStore() { return getOperand(0); }
-
-      Value getMemRef() { return getOperand(1); }
-      void setMemRef(Value value) { setOperand(1, value); }
-      MemRefType getMemRefType() {
-        return getMemRef().getType().cast<MemRefType>();
-      }
-
-      operand_range getIndices() {
-        return {operand_begin() + 2, operand_end()};
-      }
-  }];
-
-  let hasFolder = 1;
-
-  let assemblyFormat = [{
-    $value `,` $memref `[` $indices `]` attr-dict `:` type($memref)
-  }];
-}
-
 //===----------------------------------------------------------------------===//
 // SubFOp
 //===----------------------------------------------------------------------===//
@@ -2575,240 +1810,6 @@
   let hasFolder = 1;
 }
 
-//===----------------------------------------------------------------------===//
-// SubViewOp
-//===----------------------------------------------------------------------===//
-
-def SubViewOp : BaseOpWithOffsetSizesAndStrides<
-    "subview", [DeclareOpInterfaceMethods<ViewLikeOpInterface>,
-                NoSideEffect, OffsetSizeAndStrideOpInterface] >  {
-  let summary = "memref subview operation";
-  let description = [{
-    The "subview" operation converts a memref type to another memref type
-    which represents a reduced-size view of the original memref as specified by
-    the operation's offsets, sizes and strides arguments.
-
-    The SubView operation supports the following arguments:
-
-    * source: the "base" memref on which to create a "view" memref.
-    * offsets: memref-rank number of offsets into the "base" memref at which to
-               create the "view" memref.
-    * sizes: memref-rank number of sizes which specify the sizes of the result
-             "view" memref type.
-    * strides: memref-rank number of strides that compose multiplicatively with
-               the base memref strides in each dimension.
-
-    The representation based on offsets, sizes and strides support a
-    partially-static specification via attributes specified through the
-    `static_offsets`, `static_sizes` and `static_strides` arguments. A special
-    sentinel value ShapedType::kDynamicSize and
-    ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has
-    a dynamic value.
-
-    A subview operation may additionally reduce the rank of the resulting view
-    by removing dimensions that are statically known to be of size 1.
-
-    Example 1:
-
-    ```mlir
-    %0 = alloc() : memref<64x4xf32, (d0, d1) -> (d0 * 4 + d1)>
-
-    // Create a sub-view of "base" memref '%0' with offset arguments '%c0',
-    // dynamic sizes for each dimension, and stride arguments '%c1'.
-    %1 = subview %0[%c0, %c0][%size0, %size1][%c1, %c1]
-      : memref<64x4xf32, (d0, d1) -> (d0 * 4 + d1) > to
-        memref<?x?xf32, (d0, d1)[s0, s1] -> (d0 * s1 + d1 + s0)>
-    ```
-
-    Example 2:
-
-    ```mlir
-    %0 = alloc() : memref<8x16x4xf32, (d0, d1, d1) -> (d0 * 64 + d1 * 4 + d2)>
-
-    // Create a sub-view of "base" memref '%0' with dynamic offsets, sizes,
-    // and strides.
-    // Note that dynamic offsets are represented by the linearized dynamic
-    // offset symbol 's0' in the subview memref layout map, and that the
-    // dynamic strides operands, after being applied to the base memref
-    // strides in each dimension, are represented in the view memref layout
-    // map as symbols 's1', 's2' and 's3'.
-    %1 = subview %0[%i, %j, %k][%size0, %size1, %size2][%x, %y, %z]
-      : memref<8x16x4xf32, (d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)> to
-        memref<?x?x?xf32,
-          (d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>
-    ```
-
-    Example 3:
-
-    ```mlir
-    %0 = alloc() : memref<8x16x4xf32, (d0, d1, d1) -> (d0 * 64 + d1 * 4 + d2)>
-
-    // Subview with constant offsets, sizes and strides.
-    %1 = subview %0[0, 2, 0][4, 4, 4][64, 4, 1]
-      : memref<8x16x4xf32, (d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)> to
-        memref<4x4x4xf32, (d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2 + 8)>
-    ```
-
-    Example 4:
-
-    ```mlir
-    %0 = alloc(%arg0, %arg1) : memref<?x?xf32>
-
-    // Subview with constant size, but dynamic offsets and
-    // strides. The resulting memref has a static shape, but if the
-    // base memref has an affine map to describe the layout, the result
-    // memref also uses an affine map to describe the layout. The
-    // strides of the result memref is computed as follows:
-    //
-    // Let #map1 represents the layout of the base memref, and #map2
-    // represents the layout of the result memref. A #mapsubview can be
-    // constructed to map an index from the result memref to the base
-    // memref (note that the description below uses more convenient
-    // naming for symbols, while in affine maps, symbols are
-    // represented as unsigned numbers that identify that symbol in the
-    // given affine map.
-    //
-    // #mapsubview = (d0, d1)[o0, o1, t0, t1] -> (d0 * t0 + o0, d1 * t1 + o1)
-    //
-    // where, o0, o1, ... are offsets, and t0, t1, ... are strides. Then,
-    //
-    // #map2 = #map1.compose(#mapsubview)
-    //
-    // If the layout map is represented as
-    //
-    // #map1 = (d0, d1)[s0, s1, s2] -> (d0 * s1 + d1 * s2 + s0)
-    //
-    // then,
-    //
-    // #map2 = (d0, d1)[s0, s1, s2, o0, o1, t0, t1] ->
-    //              (d0 * s1 * t0 + d1 * s2 * t1 + o0 * s1 + o1 * s2 + s0)
-    //
-    // Representing this canonically
-    //
-    // #map2 = (d0, d1)[r0, r1, r2] -> (d0 * r1 + d1 * r2 + r0)
-    //
-    // where, r0 = o0 * s1 + o1 * s2 + s0, r1 = s1 * t0, r2 = s2 * t1.
-    %1 = subview %0[%i, %j][4, 4][%x, %y] :
-      : memref<?x?xf32, (d0, d1)[s0, s1, s2] -> (d0 * s1 + d1 * s2 + s0)> to
-        memref<4x4xf32, (d0, d1)[r0, r1, r2] -> (d0 * r1 + d1 * r2 + r0)>
-
-    // Note that the subview op does not guarantee that the result
-    // memref is "inbounds" w.r.t to base memref. It is upto the client
-    // to ensure that the subview is accessed in a manner that is
-    // in-bounds.
-    ```
-
-    Example 5:
-
-    ```mlir
-    // Rank-reducing subview.
-    %1 = subview %0[0, 0, 0][1, 16, 4][1, 1, 1] :
-      memref<8x16x4xf32> to memref<16x4xf32>
-    %3 = subview %2[3, 4, 2][1, 6, 3][1, 1, 1] :
-      memref<8x16x4xf32> to memref<6x3xf32, offset: 210, strides: [4, 1]>
-    ```
-    }
-  }];
-
-  let arguments = (ins
-    AnyMemRef:$source,
-    Variadic<Index>:$offsets,
-    Variadic<Index>:$sizes,
-    Variadic<Index>:$strides,
-    I64ArrayAttr:$static_offsets,
-    I64ArrayAttr:$static_sizes,
-    I64ArrayAttr:$static_strides
-  );
-  let results = (outs AnyMemRef:$result);
-
-  let assemblyFormat = [{
-    $source ``
-    custom<OperandsOrIntegersOffsetsOrStridesList>($offsets, $static_offsets)
-    custom<OperandsOrIntegersSizesList>($sizes, $static_sizes)
-    custom<OperandsOrIntegersOffsetsOrStridesList>($strides, $static_strides)
-    attr-dict `:` type($source) `to` type($result)
-  }];
-
-  let builders = [
-    // Build a SubViewOp with mixed static and dynamic entries and custom
-    // result type. If the type passed is nullptr, it is inferred.
-    OpBuilder<(ins "Value":$source, "ArrayRef<OpFoldResult>":$offsets,
-      "ArrayRef<OpFoldResult>":$sizes, "ArrayRef<OpFoldResult>":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a SubViewOp with mixed static and dynamic entries and inferred
-    // result type.
-    OpBuilder<(ins "MemRefType":$resultType, "Value":$source,
-      "ArrayRef<OpFoldResult>":$offsets, "ArrayRef<OpFoldResult>":$sizes,
-      "ArrayRef<OpFoldResult>":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a SubViewOp with static entries and custom result type. If the
-    // type passed is nullptr, it is inferred.
-    OpBuilder<(ins "Value":$source, "ArrayRef<int64_t>":$offsets,
-      "ArrayRef<int64_t>":$sizes, "ArrayRef<int64_t>":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a SubViewOp with static entries and inferred result type.
-    OpBuilder<(ins "MemRefType":$resultType, "Value":$source,
-      "ArrayRef<int64_t>":$offsets, "ArrayRef<int64_t>":$sizes,
-      "ArrayRef<int64_t>":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a SubViewOp with dynamic entries and custom result type. If the
-    // type passed is nullptr, it is inferred.
-    OpBuilder<(ins "Value":$source, "ValueRange":$offsets,
-      "ValueRange":$sizes, "ValueRange":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a SubViewOp with dynamic entries and inferred result type.
-    OpBuilder<(ins "MemRefType":$resultType, "Value":$source,
-      "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
-  ];
-
-  let extraClassDeclaration = extraBaseClassDeclaration # [{
-    /// Returns the type of the base memref operand.
-    MemRefType getSourceType() {
-      return source().getType().cast<MemRefType>();
-    }
-
-    /// The result of a subview is always a memref.
-    MemRefType getType() { return getResult().getType().cast<MemRefType>(); }
-
-    /// A subview result type can be fully inferred from the source type and the
-    /// static representation of offsets, sizes and strides. Special sentinels
-    /// encode the dynamic case.
-    static Type inferResultType(MemRefType sourceMemRefType,
-                                ArrayRef<int64_t> staticOffsets,
-                                ArrayRef<int64_t> staticSizes,
-                                ArrayRef<int64_t> staticStrides);
-    static Type inferResultType(MemRefType sourceMemRefType,
-                                ArrayRef<OpFoldResult> staticOffsets,
-                                ArrayRef<OpFoldResult> staticSizes,
-                                ArrayRef<OpFoldResult> staticStrides);
-    static Type inferRankReducedResultType(unsigned resultRank,
-                                           MemRefType sourceMemRefType,
-                                           ArrayRef<int64_t> staticOffsets,
-                                           ArrayRef<int64_t> staticSizes,
-                                           ArrayRef<int64_t> staticStrides);
-    static Type inferRankReducedResultType(unsigned resultRank,
-                                           MemRefType sourceMemRefType,
-                                           ArrayRef<OpFoldResult> staticOffsets,
-                                           ArrayRef<OpFoldResult> staticSizes,
-                                           ArrayRef<OpFoldResult> staticStrides);
-
-    /// Return the expected rank of each of the`static_offsets`, `static_sizes`
-    /// and `static_strides` attributes.
-    std::array<unsigned, 3> getArrayAttrMaxRanks() {
-      unsigned rank = getSourceType().getRank();
-      return {rank, rank, rank};
-    }
-
-    /// Return the number of leading operands before the `offsets`, `sizes` and
-    /// and `strides` operands.
-    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
-  }];
-
-  let hasCanonicalizer = 1;
-  let hasFolder = 1;
-}
-
 //===----------------------------------------------------------------------===//
 // SubTensorOp
 //===----------------------------------------------------------------------===//
@@ -3046,170 +2047,6 @@
   let hasFolder = 1;
 }
 
-//===----------------------------------------------------------------------===//
-// TensorLoadOp
-//===----------------------------------------------------------------------===//
-
-def TensorLoadOp : Std_Op<"tensor_load",
-    [SameOperandsAndResultShape, SameOperandsAndResultElementType,
-     TypesMatchWith<"result type matches tensor equivalent of 'memref'",
-                    "memref", "result",
-                    "getTensorTypeFromMemRefType($_self)">]> {
-  let summary = "tensor load operation";
-  let description = [{
-    Create a tensor from a memref, making an independent copy of the element
-    data. The result value is a tensor whose shape and element type match the
-    memref operand.
-
-    The opposite of this op is tensor_to_memref. Together, these two ops are
-    useful for source/target materializations when doing type conversions
-    involving tensors and memrefs.
-
-    Example:
-
-    ```mlir
-    // Produces a value of tensor<4x?xf32> type.
-    %12 = tensor_load %10 : memref<4x?xf32, #layout, memspace0>
-    ```
-  }];
-
-  let arguments = (ins Arg<AnyRankedOrUnrankedMemRef,
-                       "the reference to load from", [MemRead]>:$memref);
-  let results = (outs AnyTensor:$result);
-  // TensorLoadOp is fully verified by traits.
-  let verifier = ?;
-
-  let builders = [
-    OpBuilder<(ins "Value":$memref), [{
-      $_state.addOperands(memref);
-      $_state.addTypes(getTensorTypeFromMemRefType(memref.getType()));
-    }]>];
-
-  let extraClassDeclaration = [{
-    /// The result of a tensor_load is always a tensor.
-    TensorType getType() {
-      Type resultType = getResult().getType();
-      if (resultType.isa<TensorType>())
-        return resultType.cast<TensorType>();
-      return {};
-    }
-  }];
-
-  let assemblyFormat = "$memref attr-dict `:` type($memref)";
-
-  let hasFolder = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// TensorStoreOp
-//===----------------------------------------------------------------------===//
-
-def TensorStoreOp : Std_Op<"tensor_store",
-    [SameOperandsShape, SameOperandsElementType,
-     TypesMatchWith<"type of 'value' matches tensor equivalent of 'memref'",
-                    "memref", "tensor",
-                    "getTensorTypeFromMemRefType($_self)">]> {
-  let summary = "tensor store operation";
-  let description = [{
-    Stores the contents of a tensor into a memref. The first operand is a value
-    of tensor type, the second operand is a value of memref type. The shapes and
-    element types of these must match, and are specified by the memref type.
-
-    Example:
-
-    ```mlir
-    %9 = dim %8, 1 : tensor<4x?xf32>
-    %10 = alloc(%9) : memref<4x?xf32, #layout, memspace0>
-    tensor_store %8, %10 : memref<4x?xf32, #layout, memspace0>
-    ```
-  }];
-
-  let arguments = (ins AnyTensor:$tensor, Arg<AnyRankedOrUnrankedMemRef,
-                       "the reference to store to", [MemWrite]>:$memref);
-  // TensorStoreOp is fully verified by traits.
-  let verifier = ?;
-
-  let assemblyFormat = "$tensor `,` $memref attr-dict `:` type($memref)";
-}
-
-//===----------------------------------------------------------------------===//
-// TensorToMemrefOp
-//===----------------------------------------------------------------------===//
-
-def TensorToMemrefOp : Std_Op<"tensor_to_memref",
-    [SameOperandsAndResultShape, SameOperandsAndResultElementType,
-     TypesMatchWith<"type of 'tensor' is the tensor equivalent of 'memref'",
-                    "memref", "tensor",
-                    "getTensorTypeFromMemRefType($_self)">]> {
-  let summary = "tensor to memref operation";
-  let description = [{
-    Create a memref from a tensor. This is a transient op created as a
-    materialization during type conversions between tensors and memrefs.
-
-    The opposite of this op is tensor_load. Together, these two ops are useful
-    for source/target materializations when doing type conversions involving
-    tensors and memrefs.
-
-    This op is defined by the fold
-    `tensor_to_memref(tensor_load(%memref)) -> %memref`, which is the property
-    that makes it a valid materialization in the type conversion framework.
-    This implies that one cannot assume that this op allocates a new memref for
-    its result.
-
-    Note: This op takes the memref type in its pretty form because the tensor
-    type can always be inferred from the memref type, but the reverse is not
-    true. For example, the memref might have a layout map or memory space which
-    cannot be inferred from the tensor type.
-
-    ```mlir
-    // Result type is tensor<4x?xf32>
-    %12 = tensor_to_memref %10 : memref<4x?xf32, #map0, 42>
-    ```
-  }];
-
-  let arguments = (ins AnyTensor:$tensor);
-  let results = (outs AnyRankedOrUnrankedMemRef:$memref);
-  // This op is fully verified by traits.
-  let verifier = ?;
-
-  let assemblyFormat = "$tensor attr-dict `:` type($memref)";
-
-  let hasFolder = 1;
-  let hasCanonicalizer = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// TransposeOp
-//===----------------------------------------------------------------------===//
-
-def TransposeOp : Std_Op<"transpose", [NoSideEffect]>,
-    Arguments<(ins AnyStridedMemRef:$in, AffineMapAttr:$permutation)>,
-    Results<(outs AnyStridedMemRef)> {
-  let summary = "`transpose` produces a new strided memref (metadata-only)";
-  let description = [{
-    The `transpose` op produces a strided memref whose sizes and strides
-    are a permutation of the original `in` memref. This is purely a metadata
-    transformation.
-
-    Example:
-
-    ```mlir
-    %1 = transpose %0 (i, j) -> (j, i) : memref<?x?xf32> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d1 * s0 + d0)>>
-    ```
-  }];
-
-  let builders = [
-    OpBuilder<(ins "Value":$in, "AffineMapAttr":$permutation,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>];
-
-  let extraClassDeclaration = [{
-    static StringRef getPermutationAttrName() { return "permutation"; }
-    ShapedType getShapedType() { return in().getType().cast<ShapedType>(); }
-  }];
-
-  let hasFolder = 1;
-}
-
 //===----------------------------------------------------------------------===//
 // TruncateIOp
 //===----------------------------------------------------------------------===//
@@ -3357,74 +2194,6 @@
   }];
 }
 
-//===----------------------------------------------------------------------===//
-// ViewOp
-//===----------------------------------------------------------------------===//
-
-def ViewOp : Std_Op<"view", [
-    DeclareOpInterfaceMethods<ViewLikeOpInterface>, NoSideEffect]> {
-  let summary = "memref view operation";
-  let description = [{
-    The "view" operation extracts an N-D contiguous memref with empty layout map
-    with arbitrary element type from a 1-D contiguous memref with empty layout
-    map of i8 element  type. The ViewOp supports the following arguments:
-
-    * A single dynamic byte-shift operand must be specified which represents a
-      a shift of the base 1-D memref pointer from which to create the resulting
-      contiguous memref view with identity layout.
-    * A dynamic size operand that must be specified for each dynamic dimension
-      in the resulting view memref type.
-
-    The "view" operation gives a structured indexing form to a flat 1-D buffer.
-    Unlike "subview" it can perform a type change. The type change behavior
-    requires the op to have special semantics because, e.g. a byte shift of 3
-    cannot be represented as an offset on f64.
-    For now, a "view" op:
-
-    1. Only takes a contiguous source memref with 0 offset and empty layout.
-    2. Must specify a byte_shift operand (in the future, a special integer
-       attribute may be added to support the folded case).
-    3. Returns a contiguous memref with 0 offset and empty layout.
-
-    Example:
-
-    ```mlir
-    // Allocate a flat 1D/i8 memref.
-    %0 = alloc() : memref<2048xi8>
-
-    // ViewOp with dynamic offset and static sizes.
-    %1 = view %0[%offset_1024][] : memref<2048xi8> to memref<64x4xf32>
-
-    // ViewOp with dynamic offset and two dynamic size.
-    %2 = view %0[%offset_1024][%size0, %size1] :
-      memref<2048xi8> to memref<?x4x?xf32>
-    ```
-  }];
-
-  let arguments = (ins MemRefRankOf<[I8], [1]>:$source,
-                   Index:$byte_shift,
-                   Variadic<Index>:$sizes);
-  let results = (outs AnyMemRef);
-
-  let extraClassDeclaration = [{
-    /// The result of a view is always a memref.
-    MemRefType getType() { return getResult().getType().cast<MemRefType>(); }
-
-    /// Returns the dynamic sizes for this view operation. This is redundant
-    /// with `sizes` but needed in template implementations. More specifically:
-    /// ```
-    /// template <typename AnyMemRefDefOp>
-    /// bool isMemRefSizeValidSymbol(AnyMemRefDefOp memrefDefOp, unsigned index,
-    ///                              Region *region)
-    /// ```
-    operand_range getDynamicSizes() {
-      return {sizes().begin(), sizes().end()};
-    }
-  }];
-
-  let hasCanonicalizer = 1;
-}
-
 //===----------------------------------------------------------------------===//
 // XOrOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h
--- a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h
@@ -37,7 +37,7 @@
 /// Creates an instance of the StdExpand pass that legalizes Std
 /// dialect ops to be convertible to LLVM. For example,
 /// `std.ceildivi_signed` gets transformed to a number of std operations,
-/// which can be lowered to LLVM; `memref_reshape` gets converted to
+/// which can be lowered to LLVM; `memref.reshape` gets converted to
 /// `memref_reinterpret_cast`.
 std::unique_ptr<Pass> createStdExpandOpsPass();
 
diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td
--- a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td
@@ -44,9 +44,10 @@
     implement the `ReturnLike` trait are not rewritten in general, as they
     require that the corresponding parent operation is also rewritten.
     Finally, this pass fails for unknown terminators, as we cannot decide
-    whether they need rewriting. 
+    whether they need rewriting.
   }];
   let constructor = "mlir::createFuncBufferizePass()";
+  let dependentDialects = ["memref::MemRefDialect"];
 }
 
 def TensorConstantBufferize : Pass<"tensor-constant-bufferize", "ModuleOp"> {
@@ -54,12 +55,13 @@
   let description = [{
     This pass bufferizes tensor constants.
 
-    This pass needs to be a module pass because it inserts std.global_memref
+    This pass needs to be a module pass because it inserts memref.global
     ops into the module, which cannot be done safely from a function pass due to
     multi-threading. Most other bufferization passes can run in parallel at
     function granularity.
   }];
   let constructor = "mlir::createTensorConstantBufferizePass()";
+  let dependentDialects = ["memref::MemRefDialect"];
 }
 
 #endif // MLIR_DIALECT_STANDARD_TRANSFORMS_PASSES
diff --git a/mlir/include/mlir/Dialect/StandardOps/Utils/Utils.h b/mlir/include/mlir/Dialect/StandardOps/Utils/Utils.h
--- a/mlir/include/mlir/Dialect/StandardOps/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/StandardOps/Utils/Utils.h
@@ -16,6 +16,9 @@
 #ifndef MLIR_DIALECT_STANDARDOPS_UTILS_UTILS_H
 #define MLIR_DIALECT_STANDARDOPS_UTILS_UTILS_H
 
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/IR/PatternMatch.h"
 #include "mlir/IR/Value.h"
 
 namespace mlir {
@@ -27,6 +30,51 @@
 /// constructing the necessary DimOp operators.
 SmallVector<Value, 4> getDynOperands(Location loc, Value val, OpBuilder &b);
 
+/// Matches a ConstantIndexOp.
+detail::op_matcher<ConstantIndexOp> matchConstantIndex();
+
+/// Detects the `values` produced by a ConstantIndexOp and places the new
+/// constant in place of the corresponding sentinel value.
+void canonicalizeSubViewPart(SmallVectorImpl<OpFoldResult> &values,
+                             function_ref<bool(int64_t)> isDynamic);
+
+void getPositionsOfShapeOne(unsigned rank, ArrayRef<int64_t> shape,
+                            llvm::SmallDenseSet<unsigned> &dimsToProject);
+
+/// Pattern to rewrite a subview op with constant arguments.
+template <typename OpType, typename CastOpFunc>
+class OpWithOffsetSizesAndStridesConstantArgumentFolder final
+    : public OpRewritePattern<OpType> {
+public:
+  using OpRewritePattern<OpType>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(OpType op,
+                                PatternRewriter &rewriter) const override {
+    // No constant operand, just return;
+    if (llvm::none_of(op.getOperands(), [](Value operand) {
+          return matchPattern(operand, matchConstantIndex());
+        }))
+      return failure();
+
+    // At least one of offsets/sizes/strides is a new constant.
+    // Form the new list of operands and constant attributes from the existing.
+    SmallVector<OpFoldResult> mixedOffsets(op.getMixedOffsets());
+    SmallVector<OpFoldResult> mixedSizes(op.getMixedSizes());
+    SmallVector<OpFoldResult> mixedStrides(op.getMixedStrides());
+    canonicalizeSubViewPart(mixedOffsets, ShapedType::isDynamicStrideOrOffset);
+    canonicalizeSubViewPart(mixedSizes, ShapedType::isDynamic);
+    canonicalizeSubViewPart(mixedStrides, ShapedType::isDynamicStrideOrOffset);
+
+    // Create the new op in canonical form.
+    auto newOp = rewriter.create<OpType>(op.getLoc(), op.source(), mixedOffsets,
+                                         mixedSizes, mixedStrides);
+    CastOpFunc func;
+    func(rewriter, op, newOp);
+
+    return success();
+  }
+};
+
 } // end namespace mlir
 
 #endif // MLIR_DIALECT_STANDARDOPS_UTILS_UTILS_H
diff --git a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h
--- a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h
+++ b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h
@@ -180,11 +180,11 @@
 /// ```
 ///    %1:3 = scf.if (%inBounds) {
 ///      // fastpath, direct cast
-///      memref_cast %A: memref<A...> to compatibleMemRefType
+///      memref.cast %A: memref<A...> to compatibleMemRefType
 ///      scf.yield %view : compatibleMemRefType, index, index
 ///    } else {
 ///      // slowpath, masked vector.transfer or linalg.copy.
-///      memref_cast %alloc: memref<B...> to compatibleMemRefType
+///      memref.cast %alloc: memref<B...> to compatibleMemRefType
 ///      scf.yield %4 : compatibleMemRefType, index, index
 //     }
 ///    %0 = vector.transfer_read %1#0[%1#1, %1#2] {masked = [false ... false]}
diff --git a/mlir/include/mlir/IR/OpDefinition.h b/mlir/include/mlir/IR/OpDefinition.h
--- a/mlir/include/mlir/IR/OpDefinition.h
+++ b/mlir/include/mlir/IR/OpDefinition.h
@@ -1133,7 +1133,7 @@
 /// A trait of region holding operations that define a new scope for automatic
 /// allocations, i.e., allocations that are freed when control is transferred
 /// back from the operation's region. Any operations performing such allocations
-/// (for eg. std.alloca) will have their allocations automatically freed at
+/// (for eg. memref.alloca) will have their allocations automatically freed at
 /// their closest enclosing operation with this trait.
 template <typename ConcreteType>
 class AutomaticAllocationScope
diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h
--- a/mlir/include/mlir/InitAllDialects.h
+++ b/mlir/include/mlir/InitAllDialects.h
@@ -28,6 +28,7 @@
 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/OpenACC/OpenACC.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/Dialect/PDL/IR/PDL.h"
@@ -60,6 +61,7 @@
                   LLVM::LLVMArmSVEDialect,
                   linalg::LinalgDialect,
                   math::MathDialect,
+                  memref::MemRefDialect,
                   scf::SCFDialect,
                   omp::OpenMPDialect,
                   pdl::PDLDialect,
diff --git a/mlir/include/mlir/Transforms/Bufferize.h b/mlir/include/mlir/Transforms/Bufferize.h
--- a/mlir/include/mlir/Transforms/Bufferize.h
+++ b/mlir/include/mlir/Transforms/Bufferize.h
@@ -54,7 +54,7 @@
 
 /// Populate patterns to eliminate bufferize materializations.
 ///
-/// In particular, these are the tensor_load/tensor_to_memref ops.
+/// In particular, these are the tensor_load/buffer_cast ops.
 void populateEliminateBufferizeMaterializationsPatterns(
     MLIRContext *context, BufferizeTypeConverter &typeConverter,
     OwningRewritePatternList &patterns);
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -54,7 +54,7 @@
 createPromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc);
 
 /// Creates a pass that finalizes a partial bufferization by removing remaining
-/// tensor_load and tensor_to_memref operations.
+/// tensor_load and buffer_cast operations.
 std::unique_ptr<FunctionPass> createFinalizingBufferizePass();
 
 /// Creates a pass that converts memref function results to out-params.
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -352,7 +352,7 @@
     works for static shaped memrefs.
   }];
   let constructor = "mlir::createBufferResultsToOutParamsPass()";
-  let dependentDialects = ["linalg::LinalgDialect"];
+  let dependentDialects = ["linalg::LinalgDialect", "memref::MemRefDialect"];
 }
 
 def Canonicalizer : Pass<"canonicalize"> {
@@ -363,6 +363,7 @@
     details.
   }];
   let constructor = "mlir::createCanonicalizerPass()";
+  let dependentDialects = ["memref::MemRefDialect"];
 }
 
 def CopyRemoval : FunctionPass<"copy-removal"> {
@@ -406,11 +407,11 @@
   let summary = "Finalize a partial bufferization";
   let description = [{
     A bufferize pass that finalizes a partial bufferization by removing
-    remaining `tensor_load` and `tensor_to_memref` operations.
+    remaining `memref.tensor_load` and `memref.buffer_cast` operations.
 
     The removal of those operations is only possible if the operations only
-    exist in pairs, i.e., all uses of `tensor_load` operations are
-    `tensor_to_memref` operations.
+    exist in pairs, i.e., all uses of `memref.tensor_load` operations are
+    `memref.buffer_cast` operations.
 
     This pass will fail if not all operations can be removed or if any operation
     with tensor typed operands remains.
@@ -535,7 +536,7 @@
     contained in the op. Operations marked with the [MemRefsNormalizable]
     (https://mlir.llvm.org/docs/Traits/#memrefsnormalizable) trait are
     expected to be normalizable. Supported operations include affine
-    operations, std.alloc, std.dealloc, and std.return.
+    operations, memref.alloc, memref.dealloc, and std.return.
 
     Given an appropriate layout map specified in the code, this transformation
     can express tiled or linearized access to multi-dimensional data
diff --git a/mlir/include/mlir/Transforms/Utils.h b/mlir/include/mlir/Transforms/Utils.h
--- a/mlir/include/mlir/Transforms/Utils.h
+++ b/mlir/include/mlir/Transforms/Utils.h
@@ -28,6 +28,10 @@
 class Location;
 class OpBuilder;
 
+namespace memref {
+class AllocOp;
+} // end namespace memref
+
 /// Replaces all "dereferencing" uses of `oldMemRef` with `newMemRef` while
 /// optionally remapping the old memref's indices using the supplied affine map,
 /// `indexRemap`. The new memref could be of a different shape or rank.
@@ -88,7 +92,7 @@
 /// Rewrites the memref defined by this alloc op to have an identity layout map
 /// and updates all its indexing uses. Returns failure if any of its uses
 /// escape (while leaving the IR in a valid state).
-LogicalResult normalizeMemRef(AllocOp op);
+LogicalResult normalizeMemRef(memref::AllocOp *op);
 
 /// Uses the old memref type map layout and computes the new memref type to have
 /// a new shape and a layout map, where the old layout map has been normalized
diff --git a/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp b/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp
--- a/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp
+++ b/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp
@@ -15,6 +15,7 @@
 
 #include "../PassDetail.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
@@ -44,7 +45,8 @@
       : builder(builder), dimValues(dimValues), symbolValues(symbolValues),
         loc(loc) {}
 
-  template <typename OpTy> Value buildBinaryExpr(AffineBinaryOpExpr expr) {
+  template <typename OpTy>
+  Value buildBinaryExpr(AffineBinaryOpExpr expr) {
     auto lhs = visit(expr.getLHS());
     auto rhs = visit(expr.getRHS());
     if (!lhs || !rhs)
@@ -563,8 +565,8 @@
 };
 
 /// Apply the affine map from an 'affine.load' operation to its operands, and
-/// feed the results to a newly created 'std.load' operation (which replaces the
-/// original 'affine.load').
+/// feed the results to a newly created 'memref.load' operation (which replaces
+/// the original 'affine.load').
 class AffineLoadLowering : public OpRewritePattern<AffineLoadOp> {
 public:
   using OpRewritePattern<AffineLoadOp>::OpRewritePattern;
@@ -579,14 +581,14 @@
       return failure();
 
     // Build vector.load memref[expandedMap.results].
-    rewriter.replaceOpWithNewOp<mlir::LoadOp>(op, op.getMemRef(),
-                                              *resultOperands);
+    rewriter.replaceOpWithNewOp<memref::LoadOp>(op, op.getMemRef(),
+                                                *resultOperands);
     return success();
   }
 };
 
 /// Apply the affine map from an 'affine.prefetch' operation to its operands,
-/// and feed the results to a newly created 'std.prefetch' operation (which
+/// and feed the results to a newly created 'memref.prefetch' operation (which
 /// replaces the original 'affine.prefetch').
 class AffinePrefetchLowering : public OpRewritePattern<AffinePrefetchOp> {
 public:
@@ -601,16 +603,16 @@
     if (!resultOperands)
       return failure();
 
-    // Build std.prefetch memref[expandedMap.results].
-    rewriter.replaceOpWithNewOp<PrefetchOp>(op, op.memref(), *resultOperands,
-                                            op.isWrite(), op.localityHint(),
-                                            op.isDataCache());
+    // Build memref.prefetch memref[expandedMap.results].
+    rewriter.replaceOpWithNewOp<memref::PrefetchOp>(
+        op, op.memref(), *resultOperands, op.isWrite(), op.localityHint(),
+        op.isDataCache());
     return success();
   }
 };
 
 /// Apply the affine map from an 'affine.store' operation to its operands, and
-/// feed the results to a newly created 'std.store' operation (which replaces
+/// feed the results to a newly created 'memref.store' operation (which replaces
 /// the original 'affine.store').
 class AffineStoreLowering : public OpRewritePattern<AffineStoreOp> {
 public:
@@ -625,8 +627,8 @@
     if (!maybeExpandedMap)
       return failure();
 
-    // Build std.store valueToStore, memref[expandedMap.results].
-    rewriter.replaceOpWithNewOp<mlir::StoreOp>(
+    // Build memref.store valueToStore, memref[expandedMap.results].
+    rewriter.replaceOpWithNewOp<memref::StoreOp>(
         op, op.getValueToStore(), op.getMemRef(), *maybeExpandedMap);
     return success();
   }
@@ -634,7 +636,8 @@
 
 /// Apply the affine maps from an 'affine.dma_start' operation to each of their
 /// respective map operands, and feed the results to a newly created
-/// 'std.dma_start' operation (which replaces the original 'affine.dma_start').
+/// 'memref.dma_start' operation (which replaces the original
+/// 'affine.dma_start').
 class AffineDmaStartLowering : public OpRewritePattern<AffineDmaStartOp> {
 public:
   using OpRewritePattern<AffineDmaStartOp>::OpRewritePattern;
@@ -663,8 +666,8 @@
     if (!maybeExpandedTagMap)
       return failure();
 
-    // Build std.dma_start operation with affine map results.
-    rewriter.replaceOpWithNewOp<DmaStartOp>(
+    // Build memref.dma_start operation with affine map results.
+    rewriter.replaceOpWithNewOp<memref::DmaStartOp>(
         op, op.getSrcMemRef(), *maybeExpandedSrcMap, op.getDstMemRef(),
         *maybeExpandedDstMap, op.getNumElements(), op.getTagMemRef(),
         *maybeExpandedTagMap, op.getStride(), op.getNumElementsPerStride());
@@ -673,7 +676,7 @@
 };
 
 /// Apply the affine map from an 'affine.dma_wait' operation tag memref,
-/// and feed the results to a newly created 'std.dma_wait' operation (which
+/// and feed the results to a newly created 'memref.dma_wait' operation (which
 /// replaces the original 'affine.dma_wait').
 class AffineDmaWaitLowering : public OpRewritePattern<AffineDmaWaitOp> {
 public:
@@ -688,8 +691,8 @@
     if (!maybeExpandedTagMap)
       return failure();
 
-    // Build std.dma_wait operation with affine map results.
-    rewriter.replaceOpWithNewOp<DmaWaitOp>(
+    // Build memref.dma_wait operation with affine map results.
+    rewriter.replaceOpWithNewOp<memref::DmaWaitOp>(
         op, op.getTagMemRef(), *maybeExpandedTagMap, op.getNumElements());
     return success();
   }
@@ -777,8 +780,8 @@
     populateAffineToStdConversionPatterns(patterns, &getContext());
     populateAffineToVectorConversionPatterns(patterns, &getContext());
     ConversionTarget target(getContext());
-    target
-        .addLegalDialect<scf::SCFDialect, StandardOpsDialect, VectorDialect>();
+    target.addLegalDialect<memref::MemRefDialect, scf::SCFDialect,
+                           StandardOpsDialect, VectorDialect>();
     if (failed(applyPartialConversion(getOperation(), target,
                                       std::move(patterns))))
       signalPassFailure();
diff --git a/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt b/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
--- a/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
+++ b/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
@@ -12,6 +12,7 @@
 
   LINK_LIBS PUBLIC
   MLIRAffine
+  MLIRMemRef
   MLIRSCF
   MLIRPass
   MLIRStandard
diff --git a/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt b/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt
--- a/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt
@@ -13,6 +13,7 @@
   MLIRGPU
   MLIRGPUToGPURuntimeTransforms
   MLIRLLVMIR
+  MLIRMemRef
   MLIRNVVMIR
   MLIRPass
   MLIRStandardToLLVM
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -18,6 +18,7 @@
 #include "mlir/Dialect/GPU/Passes.h"
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
 #include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
diff --git a/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt b/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt
--- a/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt
+++ b/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt
@@ -14,6 +14,7 @@
   MLIREDSC
   MLIRIR
   MLIRLinalg
+  MLIRMemRef
   MLIRPass
   MLIRSCF
   MLIRTransforms
diff --git a/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp b/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp
--- a/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp
+++ b/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp
@@ -12,6 +12,7 @@
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 
@@ -93,7 +94,7 @@
       continue;
     }
     Value cast =
-        b.create<MemRefCastOp>(loc, eraseStridedLayout(memrefType), op);
+        b.create<memref::CastOp>(loc, eraseStridedLayout(memrefType), op);
     res.push_back(cast);
   }
   return res;
@@ -143,12 +144,12 @@
   // If either inputPerm or outputPerm are non-identities, insert transposes.
   auto inputPerm = op.inputPermutation();
   if (inputPerm.hasValue() && !inputPerm->isIdentity())
-    in = rewriter.create<TransposeOp>(op.getLoc(), in,
-                                      AffineMapAttr::get(*inputPerm));
+    in = rewriter.create<memref::TransposeOp>(op.getLoc(), in,
+                                              AffineMapAttr::get(*inputPerm));
   auto outputPerm = op.outputPermutation();
   if (outputPerm.hasValue() && !outputPerm->isIdentity())
-    out = rewriter.create<TransposeOp>(op.getLoc(), out,
-                                       AffineMapAttr::get(*outputPerm));
+    out = rewriter.create<memref::TransposeOp>(op.getLoc(), out,
+                                               AffineMapAttr::get(*outputPerm));
 
   // If nothing was transposed, fail and let the conversion kick in.
   if (in == op.input() && out == op.output())
@@ -213,7 +214,8 @@
 void ConvertLinalgToStandardPass::runOnOperation() {
   auto module = getOperation();
   ConversionTarget target(getContext());
-  target.addLegalDialect<AffineDialect, scf::SCFDialect, StandardOpsDialect>();
+  target.addLegalDialect<AffineDialect, memref::MemRefDialect, scf::SCFDialect,
+                         StandardOpsDialect>();
   target.addLegalOp<ModuleOp, FuncOp, ModuleTerminatorOp, ReturnOp>();
   target.addLegalOp<linalg::ReshapeOp, linalg::RangeOp>();
   OwningRewritePatternList patterns;
diff --git a/mlir/lib/Conversion/PassDetail.h b/mlir/lib/Conversion/PassDetail.h
--- a/mlir/lib/Conversion/PassDetail.h
+++ b/mlir/lib/Conversion/PassDetail.h
@@ -38,6 +38,10 @@
 class NVVMDialect;
 } // end namespace NVVM
 
+namespace memref {
+class MemRefDialect;
+} // end namespace memref
+
 namespace omp {
 class OpenMPDialect;
 } // end namespace omp
diff --git a/mlir/lib/Conversion/SCFToGPU/CMakeLists.txt b/mlir/lib/Conversion/SCFToGPU/CMakeLists.txt
--- a/mlir/lib/Conversion/SCFToGPU/CMakeLists.txt
+++ b/mlir/lib/Conversion/SCFToGPU/CMakeLists.txt
@@ -15,6 +15,7 @@
   MLIRGPU
   MLIRIR
   MLIRLinalg
+  MLIRMemRef
   MLIRPass
   MLIRStandard
   MLIRSupport
diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
--- a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
+++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
@@ -18,6 +18,7 @@
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/ParallelLoopMapper.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/AffineExpr.h"
@@ -647,6 +648,7 @@
 }
 
 void mlir::configureParallelLoopToGPULegality(ConversionTarget &target) {
+  target.addLegalDialect<memref::MemRefDialect>();
   target.addDynamicallyLegalOp<scf::ParallelOp>([](scf::ParallelOp parallelOp) {
     return !parallelOp->getAttr(gpu::getMappingAttrName());
   });
diff --git a/mlir/lib/Conversion/ShapeToStandard/CMakeLists.txt b/mlir/lib/Conversion/ShapeToStandard/CMakeLists.txt
--- a/mlir/lib/Conversion/ShapeToStandard/CMakeLists.txt
+++ b/mlir/lib/Conversion/ShapeToStandard/CMakeLists.txt
@@ -19,6 +19,7 @@
   LINK_LIBS PUBLIC
   MLIREDSC
   MLIRIR
+  MLIRMemRef
   MLIRShape
   MLIRTensor
   MLIRPass
diff --git a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
--- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
+++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
@@ -9,6 +9,7 @@
 #include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h"
 
 #include "../PassDetail.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/Shape/IR/Shape.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
@@ -139,7 +140,7 @@
   // dimension in the tensor.
   SmallVector<Value> ranks, rankDiffs;
   llvm::append_range(ranks, llvm::map_range(transformed.shapes(), [&](Value v) {
-                       return lb.create<DimOp>(v, zero);
+                       return lb.create<memref::DimOp>(v, zero);
                      }));
 
   // Find the maximum rank
@@ -252,7 +253,7 @@
   // dimension in the tensor.
   SmallVector<Value> ranks, rankDiffs;
   llvm::append_range(ranks, llvm::map_range(transformed.shapes(), [&](Value v) {
-                       return lb.create<DimOp>(v, zero);
+                       return lb.create<memref::DimOp>(v, zero);
                      }));
 
   // Find the maximum rank
@@ -344,8 +345,8 @@
   // circumvents the necessity to materialize the shape in memory.
   if (auto shapeOfOp = op.shape().getDefiningOp<ShapeOfOp>()) {
     if (shapeOfOp.arg().getType().isa<ShapedType>()) {
-      rewriter.replaceOpWithNewOp<DimOp>(op, shapeOfOp.arg(),
-                                         transformed.dim());
+      rewriter.replaceOpWithNewOp<memref::DimOp>(op, shapeOfOp.arg(),
+                                                 transformed.dim());
       return success();
     }
   }
@@ -375,7 +376,7 @@
     return failure();
 
   shape::RankOp::Adaptor transformed(operands);
-  rewriter.replaceOpWithNewOp<DimOp>(op, transformed.shape(), 0);
+  rewriter.replaceOpWithNewOp<memref::DimOp>(op, transformed.shape(), 0);
   return success();
 }
 
@@ -404,7 +405,8 @@
   Value zero = rewriter.create<ConstantIndexOp>(loc, 0);
   Value one = rewriter.create<ConstantIndexOp>(loc, 1);
   Type indexTy = rewriter.getIndexType();
-  Value rank = rewriter.create<DimOp>(loc, indexTy, transformed.shape(), zero);
+  Value rank =
+      rewriter.create<memref::DimOp>(loc, indexTy, transformed.shape(), zero);
 
   auto loop = rewriter.create<scf::ForOp>(
       loc, zero, rank, one, op.initVals(),
@@ -490,11 +492,12 @@
   Type indexTy = rewriter.getIndexType();
   Value zero = rewriter.create<ConstantIndexOp>(loc, 0);
   Value firstShape = transformed.shapes().front();
-  Value firstRank = rewriter.create<DimOp>(loc, indexTy, firstShape, zero);
+  Value firstRank =
+      rewriter.create<memref::DimOp>(loc, indexTy, firstShape, zero);
   Value result = nullptr;
   // Generate a linear sequence of compares, all with firstShape as lhs.
   for (Value shape : transformed.shapes().drop_front(1)) {
-    Value rank = rewriter.create<DimOp>(loc, indexTy, shape, zero);
+    Value rank = rewriter.create<memref::DimOp>(loc, indexTy, shape, zero);
     Value eqRank =
         rewriter.create<CmpIOp>(loc, CmpIPredicate::eq, firstRank, rank);
     auto same = rewriter.create<IfOp>(
@@ -559,7 +562,7 @@
     int64_t rank = rankedTensorTy.getRank();
     for (int64_t i = 0; i < rank; i++) {
       if (rankedTensorTy.isDynamicDim(i)) {
-        Value extent = rewriter.create<DimOp>(loc, tensor, i);
+        Value extent = rewriter.create<memref::DimOp>(loc, tensor, i);
         extentValues.push_back(extent);
       } else {
         Value extent =
@@ -583,7 +586,7 @@
       op, getExtentTensorType(ctx), ValueRange{rank},
       [&](OpBuilder &b, Location loc, ValueRange args) {
         Value dim = args.front();
-        Value extent = b.create<DimOp>(loc, tensor, dim);
+        Value extent = b.create<memref::DimOp>(loc, tensor, dim);
         b.create<tensor::YieldOp>(loc, extent);
       });
 
@@ -613,7 +616,7 @@
   SplitAtOp::Adaptor transformed(op);
   ImplicitLocOpBuilder b(op.getLoc(), rewriter);
   Value zero = b.create<ConstantIndexOp>(0);
-  Value rank = b.create<DimOp>(transformed.operand(), zero);
+  Value rank = b.create<memref::DimOp>(transformed.operand(), zero);
 
   // index < 0 ? index + rank : index
   Value originalIndex = transformed.index();
@@ -670,8 +673,8 @@
   // Setup target legality.
   MLIRContext &ctx = getContext();
   ConversionTarget target(ctx);
-  target
-      .addLegalDialect<StandardOpsDialect, SCFDialect, tensor::TensorDialect>();
+  target.addLegalDialect<memref::MemRefDialect, StandardOpsDialect, SCFDialect,
+                         tensor::TensorDialect>();
   target.addLegalOp<CstrRequireOp, FuncOp, ModuleOp, ModuleTerminatorOp>();
 
   // Setup conversion patterns.
diff --git a/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt b/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
--- a/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
@@ -14,5 +14,6 @@
   LINK_LIBS PUBLIC
   MLIRLLVMIR
   MLIRMath
+  MLIRMemRef
   MLIRTransforms
   )
diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
--- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
+++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
@@ -17,6 +17,7 @@
 #include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/BlockAndValueMapping.h"
@@ -1864,13 +1865,13 @@
 
 struct AllocOpLowering : public AllocLikeOpLowering {
   AllocOpLowering(LLVMTypeConverter &converter)
-      : AllocLikeOpLowering(AllocOp::getOperationName(), converter) {}
+      : AllocLikeOpLowering(memref::AllocOp::getOperationName(), converter) {}
 
   std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
                                           Location loc, Value sizeBytes,
                                           Operation *op) const override {
     // Heap allocations.
-    AllocOp allocOp = cast<AllocOp>(op);
+    memref::AllocOp allocOp = cast<memref::AllocOp>(op);
     MemRefType memRefType = allocOp.getType();
 
     Value alignment;
@@ -1917,7 +1918,7 @@
 
 struct AlignedAllocOpLowering : public AllocLikeOpLowering {
   AlignedAllocOpLowering(LLVMTypeConverter &converter)
-      : AllocLikeOpLowering(AllocOp::getOperationName(), converter) {}
+      : AllocLikeOpLowering(memref::AllocOp::getOperationName(), converter) {}
 
   /// Returns the memref's element size in bytes.
   // TODO: there are other places where this is used. Expose publicly?
@@ -1950,7 +1951,7 @@
   /// Returns the alignment to be used for the allocation call itself.
   /// aligned_alloc requires the allocation size to be a power of two, and the
   /// allocation size to be a multiple of alignment,
-  int64_t getAllocationAlignment(AllocOp allocOp) const {
+  int64_t getAllocationAlignment(memref::AllocOp allocOp) const {
     if (Optional<uint64_t> alignment = allocOp.alignment())
       return *alignment;
 
@@ -1966,7 +1967,7 @@
                                           Location loc, Value sizeBytes,
                                           Operation *op) const override {
     // Heap allocations.
-    AllocOp allocOp = cast<AllocOp>(op);
+    memref::AllocOp allocOp = cast<memref::AllocOp>(op);
     MemRefType memRefType = allocOp.getType();
     int64_t alignment = getAllocationAlignment(allocOp);
     Value allocAlignment = createIndexConstant(rewriter, loc, alignment);
@@ -1997,7 +1998,7 @@
 
 struct AllocaOpLowering : public AllocLikeOpLowering {
   AllocaOpLowering(LLVMTypeConverter &converter)
-      : AllocLikeOpLowering(AllocaOp::getOperationName(), converter) {}
+      : AllocLikeOpLowering(memref::AllocaOp::getOperationName(), converter) {}
 
   /// Allocates the underlying buffer using the right call. `allocatedBytePtr`
   /// is set to null for stack allocations. `accessAlignment` is set if
@@ -2008,7 +2009,7 @@
 
     // With alloca, one gets a pointer to the element type right away.
     // For stack allocations.
-    auto allocaOp = cast<AllocaOp>(op);
+    auto allocaOp = cast<memref::AllocaOp>(op);
     auto elementPtrType = this->getElementPtrType(allocaOp.getType());
 
     auto allocatedElementPtr = rewriter.create<LLVM::AllocaOp>(
@@ -2180,17 +2181,17 @@
 // A `dealloc` is converted into a call to `free` on the underlying data buffer.
 // The memref descriptor being an SSA value, there is no need to clean it up
 // in any way.
-struct DeallocOpLowering : public ConvertOpToLLVMPattern<DeallocOp> {
-  using ConvertOpToLLVMPattern<DeallocOp>::ConvertOpToLLVMPattern;
+struct DeallocOpLowering : public ConvertOpToLLVMPattern<memref::DeallocOp> {
+  using ConvertOpToLLVMPattern<memref::DeallocOp>::ConvertOpToLLVMPattern;
 
   explicit DeallocOpLowering(LLVMTypeConverter &converter)
-      : ConvertOpToLLVMPattern<DeallocOp>(converter) {}
+      : ConvertOpToLLVMPattern<memref::DeallocOp>(converter) {}
 
   LogicalResult
-  matchAndRewrite(DeallocOp op, ArrayRef<Value> operands,
+  matchAndRewrite(memref::DeallocOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     assert(operands.size() == 1 && "dealloc takes one operand");
-    DeallocOp::Adaptor transformed(operands);
+    memref::DeallocOp::Adaptor transformed(operands);
 
     // Insert the `free` declaration if it is not already present.
     auto freeFunc = LLVM::lookupOrCreateFreeFn(op->getParentOfType<ModuleOp>());
@@ -2209,7 +2210,7 @@
                                           LLVMTypeConverter &typeConverter) {
   // LLVM type for a global memref will be a multi-dimension array. For
   // declarations or uninitialized global memrefs, we can potentially flatten
-  // this to a 1D array. However, for global_memref's with an initial value,
+  // this to a 1D array. However, for memref.global's with an initial value,
   // we do not intend to flatten the ElementsAttribute when going from std ->
   // LLVM dialect, so the LLVM type needs to me a multi-dimension array.
   Type elementType = unwrap(typeConverter.convertType(type.getElementType()));
@@ -2221,11 +2222,12 @@
 }
 
 /// GlobalMemrefOp is lowered to a LLVM Global Variable.
-struct GlobalMemrefOpLowering : public ConvertOpToLLVMPattern<GlobalMemrefOp> {
-  using ConvertOpToLLVMPattern<GlobalMemrefOp>::ConvertOpToLLVMPattern;
+struct GlobalMemrefOpLowering
+    : public ConvertOpToLLVMPattern<memref::GlobalOp> {
+  using ConvertOpToLLVMPattern<memref::GlobalOp>::ConvertOpToLLVMPattern;
 
   LogicalResult
-  matchAndRewrite(GlobalMemrefOp global, ArrayRef<Value> operands,
+  matchAndRewrite(memref::GlobalOp global, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     MemRefType type = global.type().cast<MemRefType>();
     if (!isConvertibleAndHasIdentityMaps(type))
@@ -2259,14 +2261,15 @@
 /// `AllocLikeOpLowering` to reuse the Memref descriptor construction.
 struct GetGlobalMemrefOpLowering : public AllocLikeOpLowering {
   GetGlobalMemrefOpLowering(LLVMTypeConverter &converter)
-      : AllocLikeOpLowering(GetGlobalMemrefOp::getOperationName(), converter) {}
+      : AllocLikeOpLowering(memref::GetGlobalOp::getOperationName(),
+                            converter) {}
 
-  /// Buffer "allocation" for get_global_memref op is getting the address of
+  /// Buffer "allocation" for memref.get_global op is getting the address of
   /// the global variable referenced.
   std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
                                           Location loc, Value sizeBytes,
                                           Operation *op) const override {
-    auto getGlobalOp = cast<GetGlobalMemrefOp>(op);
+    auto getGlobalOp = cast<memref::GetGlobalOp>(op);
     MemRefType type = getGlobalOp.result().getType().cast<MemRefType>();
     unsigned memSpace = type.getMemorySpaceAsInt();
 
@@ -2285,7 +2288,7 @@
                     createIndexConstant(rewriter, loc, 0));
     auto gep = rewriter.create<LLVM::GEPOp>(loc, elementPtrType, operands);
 
-    // We do not expect the memref obtained using `get_global_memref` to be
+    // We do not expect the memref obtained using `memref.get_global` to be
     // ever deallocated. Set the allocated pointer to be known bad value to
     // help debug if that ever happens.
     auto intPtrType = getIntPtrType(memSpace);
@@ -2354,17 +2357,17 @@
   }
 };
 
-struct MemRefCastOpLowering : public ConvertOpToLLVMPattern<MemRefCastOp> {
-  using ConvertOpToLLVMPattern<MemRefCastOp>::ConvertOpToLLVMPattern;
+struct MemRefCastOpLowering : public ConvertOpToLLVMPattern<memref::CastOp> {
+  using ConvertOpToLLVMPattern<memref::CastOp>::ConvertOpToLLVMPattern;
 
-  LogicalResult match(MemRefCastOp memRefCastOp) const override {
+  LogicalResult match(memref::CastOp memRefCastOp) const override {
     Type srcType = memRefCastOp.getOperand().getType();
     Type dstType = memRefCastOp.getType();
 
-    // MemRefCastOp reduce to bitcast in the ranked MemRef case and can be used
-    // for type erasure. For now they must preserve underlying element type and
-    // require source and result type to have the same rank. Therefore, perform
-    // a sanity check that the underlying structs are the same. Once op
+    // memref::CastOp reduce to bitcast in the ranked MemRef case and can be
+    // used for type erasure. For now they must preserve underlying element type
+    // and require source and result type to have the same rank. Therefore,
+    // perform a sanity check that the underlying structs are the same. Once op
     // semantics are relaxed we can revisit.
     if (srcType.isa<MemRefType>() && dstType.isa<MemRefType>())
       return success(typeConverter->convertType(srcType) ==
@@ -2381,9 +2384,9 @@
                : failure();
   }
 
-  void rewrite(MemRefCastOp memRefCastOp, ArrayRef<Value> operands,
+  void rewrite(memref::CastOp memRefCastOp, ArrayRef<Value> operands,
                ConversionPatternRewriter &rewriter) const override {
-    MemRefCastOp::Adaptor transformed(operands);
+    memref::CastOp::Adaptor transformed(operands);
 
     auto srcType = memRefCastOp.getOperand().getType();
     auto dstType = memRefCastOp.getType();
@@ -2486,14 +2489,15 @@
 }
 
 struct MemRefReinterpretCastOpLowering
-    : public ConvertOpToLLVMPattern<MemRefReinterpretCastOp> {
-  using ConvertOpToLLVMPattern<MemRefReinterpretCastOp>::ConvertOpToLLVMPattern;
+    : public ConvertOpToLLVMPattern<memref::ReinterpretCastOp> {
+  using ConvertOpToLLVMPattern<
+      memref::ReinterpretCastOp>::ConvertOpToLLVMPattern;
 
   LogicalResult
-  matchAndRewrite(MemRefReinterpretCastOp castOp, ArrayRef<Value> operands,
+  matchAndRewrite(memref::ReinterpretCastOp castOp, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
-    MemRefReinterpretCastOp::Adaptor adaptor(operands,
-                                             castOp->getAttrDictionary());
+    memref::ReinterpretCastOp::Adaptor adaptor(operands,
+                                               castOp->getAttrDictionary());
     Type srcType = castOp.source().getType();
 
     Value descriptor;
@@ -2505,11 +2509,10 @@
   }
 
 private:
-  LogicalResult
-  convertSourceMemRefToDescriptor(ConversionPatternRewriter &rewriter,
-                                  Type srcType, MemRefReinterpretCastOp castOp,
-                                  MemRefReinterpretCastOp::Adaptor adaptor,
-                                  Value *descriptor) const {
+  LogicalResult convertSourceMemRefToDescriptor(
+      ConversionPatternRewriter &rewriter, Type srcType,
+      memref::ReinterpretCastOp castOp,
+      memref::ReinterpretCastOp::Adaptor adaptor, Value *descriptor) const {
     MemRefType targetMemRefType =
         castOp.getResult().getType().cast<MemRefType>();
     auto llvmTargetDescriptorTy = typeConverter->convertType(targetMemRefType)
@@ -2555,14 +2558,14 @@
 };
 
 struct MemRefReshapeOpLowering
-    : public ConvertOpToLLVMPattern<MemRefReshapeOp> {
-  using ConvertOpToLLVMPattern<MemRefReshapeOp>::ConvertOpToLLVMPattern;
+    : public ConvertOpToLLVMPattern<memref::ReshapeOp> {
+  using ConvertOpToLLVMPattern<memref::ReshapeOp>::ConvertOpToLLVMPattern;
 
   LogicalResult
-  matchAndRewrite(MemRefReshapeOp reshapeOp, ArrayRef<Value> operands,
+  matchAndRewrite(memref::ReshapeOp reshapeOp, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     auto *op = reshapeOp.getOperation();
-    MemRefReshapeOp::Adaptor adaptor(operands, op->getAttrDictionary());
+    memref::ReshapeOp::Adaptor adaptor(operands, op->getAttrDictionary());
     Type srcType = reshapeOp.source().getType();
 
     Value descriptor;
@@ -2576,8 +2579,8 @@
 private:
   LogicalResult
   convertSourceMemRefToDescriptor(ConversionPatternRewriter &rewriter,
-                                  Type srcType, MemRefReshapeOp reshapeOp,
-                                  MemRefReshapeOp::Adaptor adaptor,
+                                  Type srcType, memref::ReshapeOp reshapeOp,
+                                  memref::ReshapeOp::Adaptor adaptor,
                                   Value *descriptor) const {
     // Conversion for statically-known shape args is performed via
     // `memref_reinterpret_cast`.
@@ -2722,11 +2725,11 @@
 
 // A `dim` is converted to a constant for static sizes and to an access to the
 // size stored in the memref descriptor for dynamic sizes.
-struct DimOpLowering : public ConvertOpToLLVMPattern<DimOp> {
-  using ConvertOpToLLVMPattern<DimOp>::ConvertOpToLLVMPattern;
+struct DimOpLowering : public ConvertOpToLLVMPattern<memref::DimOp> {
+  using ConvertOpToLLVMPattern<memref::DimOp>::ConvertOpToLLVMPattern;
 
   LogicalResult
-  matchAndRewrite(DimOp dimOp, ArrayRef<Value> operands,
+  matchAndRewrite(memref::DimOp dimOp, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     Type operandType = dimOp.memrefOrTensor().getType();
     if (operandType.isa<UnrankedMemRefType>()) {
@@ -2744,11 +2747,11 @@
   }
 
 private:
-  Value extractSizeOfUnrankedMemRef(Type operandType, DimOp dimOp,
+  Value extractSizeOfUnrankedMemRef(Type operandType, memref::DimOp dimOp,
                                     ArrayRef<Value> operands,
                                     ConversionPatternRewriter &rewriter) const {
     Location loc = dimOp.getLoc();
-    DimOp::Adaptor transformed(operands);
+    memref::DimOp::Adaptor transformed(operands);
 
     auto unrankedMemRefType = operandType.cast<UnrankedMemRefType>();
     auto scalarMemRefType =
@@ -2785,11 +2788,11 @@
     return rewriter.create<LLVM::LoadOp>(loc, sizePtr);
   }
 
-  Value extractSizeOfRankedMemRef(Type operandType, DimOp dimOp,
+  Value extractSizeOfRankedMemRef(Type operandType, memref::DimOp dimOp,
                                   ArrayRef<Value> operands,
                                   ConversionPatternRewriter &rewriter) const {
     Location loc = dimOp.getLoc();
-    DimOp::Adaptor transformed(operands);
+    memref::DimOp::Adaptor transformed(operands);
     // Take advantage if index is constant.
     MemRefType memRefType = operandType.cast<MemRefType>();
     if (Optional<int64_t> index = dimOp.getConstantIndex()) {
@@ -2833,7 +2836,7 @@
 };
 
 // Common base for load and store operations on MemRefs.  Restricts the match
-// to supported MemRef types.  Provides functionality to emit code accessing a
+// to supported MemRef types. Provides functionality to emit code accessing a
 // specific element of the underlying data buffer.
 template <typename Derived>
 struct LoadStoreOpLowering : public ConvertOpToLLVMPattern<Derived> {
@@ -2849,13 +2852,13 @@
 
 // Load operation is lowered to obtaining a pointer to the indexed element
 // and loading it.
-struct LoadOpLowering : public LoadStoreOpLowering<LoadOp> {
+struct LoadOpLowering : public LoadStoreOpLowering<memref::LoadOp> {
   using Base::Base;
 
   LogicalResult
-  matchAndRewrite(LoadOp loadOp, ArrayRef<Value> operands,
+  matchAndRewrite(memref::LoadOp loadOp, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
-    LoadOp::Adaptor transformed(operands);
+    memref::LoadOp::Adaptor transformed(operands);
     auto type = loadOp.getMemRefType();
 
     Value dataPtr =
@@ -2868,14 +2871,14 @@
 
 // Store operation is lowered to obtaining a pointer to the indexed element,
 // and storing the given value to it.
-struct StoreOpLowering : public LoadStoreOpLowering<StoreOp> {
+struct StoreOpLowering : public LoadStoreOpLowering<memref::StoreOp> {
   using Base::Base;
 
   LogicalResult
-  matchAndRewrite(StoreOp op, ArrayRef<Value> operands,
+  matchAndRewrite(memref::StoreOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     auto type = op.getMemRefType();
-    StoreOp::Adaptor transformed(operands);
+    memref::StoreOp::Adaptor transformed(operands);
 
     Value dataPtr =
         getStridedElementPtr(op.getLoc(), type, transformed.memref(),
@@ -2888,13 +2891,13 @@
 
 // The prefetch operation is lowered in a way similar to the load operation
 // except that the llvm.prefetch operation is used for replacement.
-struct PrefetchOpLowering : public LoadStoreOpLowering<PrefetchOp> {
+struct PrefetchOpLowering : public LoadStoreOpLowering<memref::PrefetchOp> {
   using Base::Base;
 
   LogicalResult
-  matchAndRewrite(PrefetchOp prefetchOp, ArrayRef<Value> operands,
+  matchAndRewrite(memref::PrefetchOp prefetchOp, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
-    PrefetchOp::Adaptor transformed(operands);
+    memref::PrefetchOp::Adaptor transformed(operands);
     auto type = prefetchOp.getMemRefType();
     auto loc = prefetchOp.getLoc();
 
@@ -3221,11 +3224,11 @@
 ///   2. Updates to the descriptor to introduce the data ptr, offset, size
 ///      and stride.
 /// The subview op is replaced by the descriptor.
-struct SubViewOpLowering : public ConvertOpToLLVMPattern<SubViewOp> {
-  using ConvertOpToLLVMPattern<SubViewOp>::ConvertOpToLLVMPattern;
+struct SubViewOpLowering : public ConvertOpToLLVMPattern<memref::SubViewOp> {
+  using ConvertOpToLLVMPattern<memref::SubViewOp>::ConvertOpToLLVMPattern;
 
   LogicalResult
-  matchAndRewrite(SubViewOp subViewOp, ArrayRef<Value> operands,
+  matchAndRewrite(memref::SubViewOp subViewOp, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     auto loc = subViewOp.getLoc();
 
@@ -3234,7 +3237,7 @@
         typeConverter->convertType(sourceMemRefType.getElementType());
 
     auto viewMemRefType = subViewOp.getType();
-    auto inferredType = SubViewOp::inferResultType(
+    auto inferredType = memref::SubViewOp::inferResultType(
                             subViewOp.getSourceType(),
                             extractFromI64ArrayAttr(subViewOp.static_offsets()),
                             extractFromI64ArrayAttr(subViewOp.static_sizes()),
@@ -3335,7 +3338,7 @@
       if (static_cast<unsigned>(i) >= mixedSizes.size()) {
         size = rewriter.create<LLVM::DialectCastOp>(
             loc, llvmIndexType,
-            rewriter.create<DimOp>(loc, subViewOp.source(), i));
+            rewriter.create<memref::DimOp>(loc, subViewOp.source(), i));
         stride = rewriter.create<LLVM::ConstantOp>(
             loc, llvmIndexType, rewriter.getI64IntegerAttr(1));
       } else {
@@ -3376,15 +3379,15 @@
 ///      and stride. Size and stride are permutations of the original values.
 ///   4. A store of the resulting ViewDescriptor to the alloca'ed pointer.
 /// The transpose op is replaced by the alloca'ed pointer.
-class TransposeOpLowering : public ConvertOpToLLVMPattern<TransposeOp> {
+class TransposeOpLowering : public ConvertOpToLLVMPattern<memref::TransposeOp> {
 public:
-  using ConvertOpToLLVMPattern<TransposeOp>::ConvertOpToLLVMPattern;
+  using ConvertOpToLLVMPattern<memref::TransposeOp>::ConvertOpToLLVMPattern;
 
   LogicalResult
-  matchAndRewrite(TransposeOp transposeOp, ArrayRef<Value> operands,
+  matchAndRewrite(memref::TransposeOp transposeOp, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     auto loc = transposeOp.getLoc();
-    TransposeOpAdaptor adaptor(operands);
+    memref::TransposeOpAdaptor adaptor(operands);
     MemRefDescriptor viewMemRef(adaptor.in());
 
     // No permutation, early exit.
@@ -3424,8 +3427,8 @@
 ///   2. Updates to the descriptor to introduce the data ptr, offset, size
 ///      and stride.
 /// The view op is replaced by the descriptor.
-struct ViewOpLowering : public ConvertOpToLLVMPattern<ViewOp> {
-  using ConvertOpToLLVMPattern<ViewOp>::ConvertOpToLLVMPattern;
+struct ViewOpLowering : public ConvertOpToLLVMPattern<memref::ViewOp> {
+  using ConvertOpToLLVMPattern<memref::ViewOp>::ConvertOpToLLVMPattern;
 
   // Build and return the value for the idx^th shape dimension, either by
   // returning the constant shape dimension or counting the proper dynamic size.
@@ -3461,10 +3464,10 @@
   }
 
   LogicalResult
-  matchAndRewrite(ViewOp viewOp, ArrayRef<Value> operands,
+  matchAndRewrite(memref::ViewOp viewOp, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     auto loc = viewOp.getLoc();
-    ViewOpAdaptor adaptor(operands);
+    memref::ViewOpAdaptor adaptor(operands);
 
     auto viewMemRefType = viewOp.getType();
     auto targetElementTy =
@@ -3540,13 +3543,14 @@
 };
 
 struct AssumeAlignmentOpLowering
-    : public ConvertOpToLLVMPattern<AssumeAlignmentOp> {
-  using ConvertOpToLLVMPattern<AssumeAlignmentOp>::ConvertOpToLLVMPattern;
+    : public ConvertOpToLLVMPattern<memref::AssumeAlignmentOp> {
+  using ConvertOpToLLVMPattern<
+      memref::AssumeAlignmentOp>::ConvertOpToLLVMPattern;
 
   LogicalResult
-  matchAndRewrite(AssumeAlignmentOp op, ArrayRef<Value> operands,
+  matchAndRewrite(memref::AssumeAlignmentOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
-    AssumeAlignmentOp::Adaptor transformed(operands);
+    memref::AssumeAlignmentOp::Adaptor transformed(operands);
     Value memref = transformed.memref();
     unsigned alignment = op.alignment();
     auto loc = op.getLoc();
diff --git a/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt
--- a/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt
+++ b/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt
@@ -13,6 +13,7 @@
   LINK_LIBS PUBLIC
   MLIRIR
   MLIRMath
+  MLIRMemRef
   MLIRPass
   MLIRSPIRV
   MLIRSPIRVConversion
diff --git a/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp
--- a/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp
+++ b/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp
@@ -14,6 +14,7 @@
 #include "../PassDetail.h"
 #include "mlir/Conversion/StandardToSPIRV/StandardToSPIRV.h"
 #include "mlir/Conversion/StandardToSPIRV/StandardToSPIRVPass.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
@@ -23,11 +24,11 @@
 using namespace mlir;
 
 /// Helpers to access the memref operand for each op.
-static Value getMemRefOperand(LoadOp op) { return op.memref(); }
+static Value getMemRefOperand(memref::LoadOp op) { return op.memref(); }
 
 static Value getMemRefOperand(vector::TransferReadOp op) { return op.source(); }
 
-static Value getMemRefOperand(StoreOp op) { return op.memref(); }
+static Value getMemRefOperand(memref::StoreOp op) { return op.memref(); }
 
 static Value getMemRefOperand(vector::TransferWriteOp op) {
   return op.source();
@@ -44,7 +45,7 @@
                                 PatternRewriter &rewriter) const override;
 
 private:
-  void replaceOp(OpTy loadOp, SubViewOp subViewOp,
+  void replaceOp(OpTy loadOp, memref::SubViewOp subViewOp,
                  ArrayRef<Value> sourceIndices,
                  PatternRewriter &rewriter) const;
 };
@@ -59,23 +60,22 @@
                                 PatternRewriter &rewriter) const override;
 
 private:
-  void replaceOp(OpTy StoreOp, SubViewOp subViewOp,
+  void replaceOp(OpTy storeOp, memref::SubViewOp subViewOp,
                  ArrayRef<Value> sourceIndices,
                  PatternRewriter &rewriter) const;
 };
 
 template <>
-void LoadOpOfSubViewFolder<LoadOp>::replaceOp(LoadOp loadOp,
-                                              SubViewOp subViewOp,
-                                              ArrayRef<Value> sourceIndices,
-                                              PatternRewriter &rewriter) const {
-  rewriter.replaceOpWithNewOp<LoadOp>(loadOp, subViewOp.source(),
-                                      sourceIndices);
+void LoadOpOfSubViewFolder<memref::LoadOp>::replaceOp(
+    memref::LoadOp loadOp, memref::SubViewOp subViewOp,
+    ArrayRef<Value> sourceIndices, PatternRewriter &rewriter) const {
+  rewriter.replaceOpWithNewOp<memref::LoadOp>(loadOp, subViewOp.source(),
+                                              sourceIndices);
 }
 
 template <>
 void LoadOpOfSubViewFolder<vector::TransferReadOp>::replaceOp(
-    vector::TransferReadOp loadOp, SubViewOp subViewOp,
+    vector::TransferReadOp loadOp, memref::SubViewOp subViewOp,
     ArrayRef<Value> sourceIndices, PatternRewriter &rewriter) const {
   rewriter.replaceOpWithNewOp<vector::TransferReadOp>(
       loadOp, loadOp.getVectorType(), subViewOp.source(), sourceIndices,
@@ -83,16 +83,16 @@
 }
 
 template <>
-void StoreOpOfSubViewFolder<StoreOp>::replaceOp(
-    StoreOp storeOp, SubViewOp subViewOp, ArrayRef<Value> sourceIndices,
-    PatternRewriter &rewriter) const {
-  rewriter.replaceOpWithNewOp<StoreOp>(storeOp, storeOp.value(),
-                                       subViewOp.source(), sourceIndices);
+void StoreOpOfSubViewFolder<memref::StoreOp>::replaceOp(
+    memref::StoreOp storeOp, memref::SubViewOp subViewOp,
+    ArrayRef<Value> sourceIndices, PatternRewriter &rewriter) const {
+  rewriter.replaceOpWithNewOp<memref::StoreOp>(
+      storeOp, storeOp.value(), subViewOp.source(), sourceIndices);
 }
 
 template <>
 void StoreOpOfSubViewFolder<vector::TransferWriteOp>::replaceOp(
-    vector::TransferWriteOp tranferWriteOp, SubViewOp subViewOp,
+    vector::TransferWriteOp tranferWriteOp, memref::SubViewOp subViewOp,
     ArrayRef<Value> sourceIndices, PatternRewriter &rewriter) const {
   rewriter.replaceOpWithNewOp<vector::TransferWriteOp>(
       tranferWriteOp, tranferWriteOp.vector(), subViewOp.source(),
@@ -120,7 +120,7 @@
 ///          memref<12x42xf32>
 static LogicalResult
 resolveSourceIndices(Location loc, PatternRewriter &rewriter,
-                     SubViewOp subViewOp, ValueRange indices,
+                     memref::SubViewOp subViewOp, ValueRange indices,
                      SmallVectorImpl<Value> &sourceIndices) {
   // TODO: Aborting when the offsets are static. There might be a way to fold
   // the subview op with load even if the offsets have been canonicalized
@@ -152,7 +152,8 @@
 LogicalResult
 LoadOpOfSubViewFolder<OpTy>::matchAndRewrite(OpTy loadOp,
                                              PatternRewriter &rewriter) const {
-  auto subViewOp = getMemRefOperand(loadOp).template getDefiningOp<SubViewOp>();
+  auto subViewOp =
+      getMemRefOperand(loadOp).template getDefiningOp<memref::SubViewOp>();
   if (!subViewOp) {
     return failure();
   }
@@ -174,7 +175,7 @@
 StoreOpOfSubViewFolder<OpTy>::matchAndRewrite(OpTy storeOp,
                                               PatternRewriter &rewriter) const {
   auto subViewOp =
-      getMemRefOperand(storeOp).template getDefiningOp<SubViewOp>();
+      getMemRefOperand(storeOp).template getDefiningOp<memref::SubViewOp>();
   if (!subViewOp) {
     return failure();
   }
@@ -193,9 +194,9 @@
 
 void mlir::populateStdLegalizationPatternsForSPIRVLowering(
     MLIRContext *context, OwningRewritePatternList &patterns) {
-  patterns.insert<LoadOpOfSubViewFolder<LoadOp>,
+  patterns.insert<LoadOpOfSubViewFolder<memref::LoadOp>,
                   LoadOpOfSubViewFolder<vector::TransferReadOp>,
-                  StoreOpOfSubViewFolder<StoreOp>,
+                  StoreOpOfSubViewFolder<memref::StoreOp>,
                   StoreOpOfSubViewFolder<vector::TransferWriteOp>>(context);
 }
 
diff --git a/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.cpp
--- a/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.cpp
+++ b/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
 #include "mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h"
@@ -237,12 +238,12 @@
 /// to Workgroup memory when the size is constant.  Note that this pattern needs
 /// to be applied in a pass that runs at least at spv.module scope since it wil
 /// ladd global variables into the spv.module.
-class AllocOpPattern final : public OpConversionPattern<AllocOp> {
+class AllocOpPattern final : public OpConversionPattern<memref::AllocOp> {
 public:
-  using OpConversionPattern<AllocOp>::OpConversionPattern;
+  using OpConversionPattern<memref::AllocOp>::OpConversionPattern;
 
   LogicalResult
-  matchAndRewrite(AllocOp operation, ArrayRef<Value> operands,
+  matchAndRewrite(memref::AllocOp operation, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     MemRefType allocType = operation.getType();
     if (!isAllocationSupported(allocType))
@@ -278,12 +279,12 @@
 
 /// Removed a deallocation if it is a supported allocation. Currently only
 /// removes deallocation if the memory space is workgroup memory.
-class DeallocOpPattern final : public OpConversionPattern<DeallocOp> {
+class DeallocOpPattern final : public OpConversionPattern<memref::DeallocOp> {
 public:
-  using OpConversionPattern<DeallocOp>::OpConversionPattern;
+  using OpConversionPattern<memref::DeallocOp>::OpConversionPattern;
 
   LogicalResult
-  matchAndRewrite(DeallocOp operation, ArrayRef<Value> operands,
+  matchAndRewrite(memref::DeallocOp operation, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     MemRefType deallocType = operation.memref().getType().cast<MemRefType>();
     if (!isAllocationSupported(deallocType))
@@ -430,23 +431,23 @@
                   ConversionPatternRewriter &rewriter) const override;
 };
 
-/// Converts std.load to spv.Load.
-class IntLoadOpPattern final : public OpConversionPattern<LoadOp> {
+/// Converts memref.load to spv.Load.
+class IntLoadOpPattern final : public OpConversionPattern<memref::LoadOp> {
 public:
-  using OpConversionPattern<LoadOp>::OpConversionPattern;
+  using OpConversionPattern<memref::LoadOp>::OpConversionPattern;
 
   LogicalResult
-  matchAndRewrite(LoadOp loadOp, ArrayRef<Value> operands,
+  matchAndRewrite(memref::LoadOp loadOp, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override;
 };
 
-/// Converts std.load to spv.Load.
-class LoadOpPattern final : public OpConversionPattern<LoadOp> {
+/// Converts memref.load to spv.Load.
+class LoadOpPattern final : public OpConversionPattern<memref::LoadOp> {
 public:
-  using OpConversionPattern<LoadOp>::OpConversionPattern;
+  using OpConversionPattern<memref::LoadOp>::OpConversionPattern;
 
   LogicalResult
-  matchAndRewrite(LoadOp loadOp, ArrayRef<Value> operands,
+  matchAndRewrite(memref::LoadOp loadOp, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override;
 };
 
@@ -469,23 +470,23 @@
                   ConversionPatternRewriter &rewriter) const override;
 };
 
-/// Converts std.store to spv.Store on integers.
-class IntStoreOpPattern final : public OpConversionPattern<StoreOp> {
+/// Converts memref.store to spv.Store on integers.
+class IntStoreOpPattern final : public OpConversionPattern<memref::StoreOp> {
 public:
-  using OpConversionPattern<StoreOp>::OpConversionPattern;
+  using OpConversionPattern<memref::StoreOp>::OpConversionPattern;
 
   LogicalResult
-  matchAndRewrite(StoreOp storeOp, ArrayRef<Value> operands,
+  matchAndRewrite(memref::StoreOp storeOp, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override;
 };
 
-/// Converts std.store to spv.Store.
-class StoreOpPattern final : public OpConversionPattern<StoreOp> {
+/// Converts memref.store to spv.Store.
+class StoreOpPattern final : public OpConversionPattern<memref::StoreOp> {
 public:
-  using OpConversionPattern<StoreOp>::OpConversionPattern;
+  using OpConversionPattern<memref::StoreOp>::OpConversionPattern;
 
   LogicalResult
-  matchAndRewrite(StoreOp storeOp, ArrayRef<Value> operands,
+  matchAndRewrite(memref::StoreOp storeOp, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override;
 };
 
@@ -975,9 +976,10 @@
 //===----------------------------------------------------------------------===//
 
 LogicalResult
-IntLoadOpPattern::matchAndRewrite(LoadOp loadOp, ArrayRef<Value> operands,
+IntLoadOpPattern::matchAndRewrite(memref::LoadOp loadOp,
+                                  ArrayRef<Value> operands,
                                   ConversionPatternRewriter &rewriter) const {
-  LoadOpAdaptor loadOperands(operands);
+  memref::LoadOpAdaptor loadOperands(operands);
   auto loc = loadOp.getLoc();
   auto memrefType = loadOp.memref().getType().cast<MemRefType>();
   if (!memrefType.getElementType().isSignlessInteger())
@@ -1051,9 +1053,9 @@
 }
 
 LogicalResult
-LoadOpPattern::matchAndRewrite(LoadOp loadOp, ArrayRef<Value> operands,
+LoadOpPattern::matchAndRewrite(memref::LoadOp loadOp, ArrayRef<Value> operands,
                                ConversionPatternRewriter &rewriter) const {
-  LoadOpAdaptor loadOperands(operands);
+  memref::LoadOpAdaptor loadOperands(operands);
   auto memrefType = loadOp.memref().getType().cast<MemRefType>();
   if (memrefType.getElementType().isSignlessInteger())
     return failure();
@@ -1101,9 +1103,10 @@
 //===----------------------------------------------------------------------===//
 
 LogicalResult
-IntStoreOpPattern::matchAndRewrite(StoreOp storeOp, ArrayRef<Value> operands,
+IntStoreOpPattern::matchAndRewrite(memref::StoreOp storeOp,
+                                   ArrayRef<Value> operands,
                                    ConversionPatternRewriter &rewriter) const {
-  StoreOpAdaptor storeOperands(operands);
+  memref::StoreOpAdaptor storeOperands(operands);
   auto memrefType = storeOp.memref().getType().cast<MemRefType>();
   if (!memrefType.getElementType().isSignlessInteger())
     return failure();
@@ -1180,9 +1183,10 @@
 }
 
 LogicalResult
-StoreOpPattern::matchAndRewrite(StoreOp storeOp, ArrayRef<Value> operands,
+StoreOpPattern::matchAndRewrite(memref::StoreOp storeOp,
+                                ArrayRef<Value> operands,
                                 ConversionPatternRewriter &rewriter) const {
-  StoreOpAdaptor storeOperands(operands);
+  memref::StoreOpAdaptor storeOperands(operands);
   auto memrefType = storeOp.memref().getType().cast<MemRefType>();
   if (memrefType.getElementType().isSignlessInteger())
     return failure();
diff --git a/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt b/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt
--- a/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt
@@ -20,6 +20,7 @@
   MLIRArmSVEToLLVM
   MLIRLLVMArmSVE
   MLIRLLVMIR
+  MLIRMemRef
   MLIRStandardToLLVM
   MLIRTargetLLVMIRExport
   MLIRTransforms
diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -12,6 +12,7 @@
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
 #include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/IR/BuiltinTypes.h"
@@ -1262,7 +1263,7 @@
     unsigned vecWidth = LLVM::getVectorNumElements(vtp).getFixedValue();
     unsigned lastIndex = llvm::size(xferOp.indices()) - 1;
     Value off = xferOp.indices()[lastIndex];
-    Value dim = rewriter.create<DimOp>(loc, xferOp.source(), lastIndex);
+    Value dim = rewriter.create<memref::DimOp>(loc, xferOp.source(), lastIndex);
     Value mask = buildVectorComparison(
         rewriter, xferOp, enableIndexOptimizations, vecWidth, dim, &off);
 
diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
@@ -19,6 +19,7 @@
 #include "mlir/Dialect/ArmSVE/ArmSVEDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMArmSVEDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@@ -39,6 +40,7 @@
   // Override explicitly to allow conditional dialect dependence.
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<LLVM::LLVMDialect>();
+    registry.insert<memref::MemRefDialect>();
     if (enableArmNeon)
       registry.insert<arm_neon::ArmNeonDialect>();
     if (enableArmSVE)
@@ -72,6 +74,7 @@
   // Architecture specific augmentations.
   LLVMConversionTarget target(getContext());
   target.addLegalOp<LLVM::DialectCastOp>();
+  target.addLegalDialect<memref::MemRefDialect>();
   target.addLegalDialect<StandardOpsDialect>();
   target.addLegalOp<UnrealizedConversionCastOp>();
   if (enableArmNeon) {
diff --git a/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt b/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt
--- a/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt
+++ b/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt
@@ -11,5 +11,6 @@
   MLIREDSC
   MLIRAffineEDSC
   MLIRLLVMIR
+  MLIRMemRef
   MLIRTransforms
   )
diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
--- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
+++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
@@ -16,6 +16,7 @@
 
 #include "../PassDetail.h"
 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
+#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
 #include "mlir/Dialect/SCF/EDSC/Builders.h"
 #include "mlir/Dialect/SCF/EDSC/Intrinsics.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
@@ -252,7 +253,7 @@
       op->getParentWithTrait<OpTrait::AutomaticAllocationScope>();
   assert(scope && "Expected op to be inside automatic allocation scope");
   b.setInsertionPointToStart(&scope->getRegion(0).front());
-  Value res = std_alloca(memRefMinorVectorType);
+  Value res = memref_alloca(memRefMinorVectorType);
   return res;
 }
 
@@ -314,7 +315,7 @@
               return {vector};
             }
             // 3.b. Otherwise, just go through the temporary `alloc`.
-            std_store(vector, alloc, majorIvs);
+            memref_store(vector, alloc, majorIvs);
             return {};
           },
           [&]() -> scf::ValueVector {
@@ -326,7 +327,7 @@
               return {vector};
             }
             // 3.d. Otherwise, just go through the temporary `alloc`.
-            std_store(vector, alloc, majorIvs);
+            memref_store(vector, alloc, majorIvs);
             return {};
           });
 
@@ -341,14 +342,15 @@
         result = vector_insert(loaded1D, result, majorIvs);
       // 5.b. Otherwise, just go through the temporary `alloc`.
       else
-        std_store(loaded1D, alloc, majorIvs);
+        memref_store(loaded1D, alloc, majorIvs);
     }
   });
 
   assert((!options.unroll ^ (bool)result) &&
          "Expected resulting Value iff unroll");
   if (!result)
-    result = std_load(vector_type_cast(MemRefType::get({}, vectorType), alloc));
+    result =
+        memref_load(vector_type_cast(MemRefType::get({}, vectorType), alloc));
   rewriter.replaceOp(op, result);
 
   return success();
@@ -359,8 +361,8 @@
   Value alloc;
   if (!options.unroll) {
     alloc = setAllocAtFunctionEntry(memRefMinorVectorType, op);
-    std_store(xferOp.vector(),
-              vector_type_cast(MemRefType::get({}, vectorType), alloc));
+    memref_store(xferOp.vector(),
+                 vector_type_cast(MemRefType::get({}, vectorType), alloc));
   }
 
   emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets,
@@ -379,7 +381,7 @@
       if (options.unroll)
         result = vector_extract(xferOp.vector(), majorIvs);
       else
-        result = std_load(alloc, majorIvs);
+        result = memref_load(alloc, majorIvs);
       auto map =
           getTransferMinorIdentityMap(xferOp.getShapedType(), minorVectorType);
       ArrayAttr masked;
@@ -560,7 +562,7 @@
   // Conservative lowering to scalar load / stores.
   // 1. Setup all the captures.
   ScopedContext scope(rewriter, transfer.getLoc());
-  StdIndexedValue remote(transfer.source());
+  MemRefIndexedValue remote(transfer.source());
   MemRefBoundsCapture memRefBoundsCapture(transfer.source());
   VectorBoundsCapture vectorBoundsCapture(transfer.vector());
   int coalescedIdx = computeCoalescedIndex(transfer);
@@ -579,7 +581,7 @@
   // 2. Emit alloc-copy-load-dealloc.
   MLIRContext *ctx = op->getContext();
   Value tmp = setAllocAtFunctionEntry(tmpMemRefType(transfer), transfer);
-  StdIndexedValue local(tmp);
+  MemRefIndexedValue local(tmp);
   loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) {
     auto ivsStorage = llvm::to_vector<8>(loopIvs);
     // Swap the ivs which will reorder memory accesses.
@@ -601,7 +603,7 @@
         rewriter, cast<VectorTransferOpInterface>(transfer.getOperation()), ivs,
         memRefBoundsCapture, loadValue, loadPadding);
   });
-  Value vectorValue = std_load(vector_type_cast(tmp));
+  Value vectorValue = memref_load(vector_type_cast(tmp));
 
   // 3. Propagate.
   rewriter.replaceOp(op, vectorValue);
@@ -646,7 +648,7 @@
 
   // 1. Setup all the captures.
   ScopedContext scope(rewriter, transfer.getLoc());
-  StdIndexedValue remote(transfer.source());
+  MemRefIndexedValue remote(transfer.source());
   MemRefBoundsCapture memRefBoundsCapture(transfer.source());
   Value vectorValue(transfer.vector());
   VectorBoundsCapture vectorBoundsCapture(transfer.vector());
@@ -665,9 +667,9 @@
 
   // 2. Emit alloc-store-copy-dealloc.
   Value tmp = setAllocAtFunctionEntry(tmpMemRefType(transfer), transfer);
-  StdIndexedValue local(tmp);
+  MemRefIndexedValue local(tmp);
   Value vec = vector_type_cast(tmp);
-  std_store(vectorValue, vec);
+  memref_store(vectorValue, vec);
   loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) {
     auto ivsStorage = llvm::to_vector<8>(loopIvs);
     // Swap the ivsStorage which will reorder memory accesses.
diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -8,6 +8,7 @@
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/BuiltinOps.h"
@@ -64,7 +65,7 @@
   // op won't be top-level anymore after inlining.
   Attribute operandCst;
   return matchPattern(value.getDefiningOp(), m_Constant(&operandCst)) ||
-         value.getDefiningOp<DimOp>();
+         value.getDefiningOp<memref::DimOp>();
 }
 
 /// Checks if all values known to be legal affine dimensions or symbols in `src`
@@ -295,7 +296,7 @@
     return applyOp.isValidDim(region);
   // The dim op is okay if its operand memref/tensor is defined at the top
   // level.
-  if (auto dimOp = dyn_cast<DimOp>(op))
+  if (auto dimOp = dyn_cast<memref::DimOp>(op))
     return isTopLevelValue(dimOp.memrefOrTensor());
   return false;
 }
@@ -317,9 +318,8 @@
 }
 
 /// Returns true if the result of the dim op is a valid symbol for `region`.
-static bool isDimOpValidSymbol(DimOp dimOp, Region *region) {
-  // The dim op is okay if its operand memref/tensor is defined at the top
-  // level.
+static bool isDimOpValidSymbol(memref::DimOp dimOp, Region *region) {
+  // The dim op is okay if its operand memref is defined at the top level.
   if (isTopLevelValue(dimOp.memrefOrTensor()))
     return true;
 
@@ -328,14 +328,14 @@
   if (dimOp.memrefOrTensor().isa<BlockArgument>())
     return false;
 
-  // The dim op is also okay if its operand memref/tensor is a view/subview
-  // whose corresponding size is a valid symbol.
+  // The dim op is also okay if its operand memref is a view/subview whose
+  // corresponding size is a valid symbol.
   Optional<int64_t> index = dimOp.getConstantIndex();
   assert(index.hasValue() &&
          "expect only `dim` operations with a constant index");
   int64_t i = index.getValue();
   return TypeSwitch<Operation *, bool>(dimOp.memrefOrTensor().getDefiningOp())
-      .Case<ViewOp, SubViewOp, AllocOp>(
+      .Case<memref::ViewOp, memref::SubViewOp, memref::AllocOp>(
           [&](auto op) { return isMemRefSizeValidSymbol(op, i, region); })
       .Default([](Operation *) { return false; });
 }
@@ -404,7 +404,7 @@
     return applyOp.isValidSymbol(region);
 
   // Dim op results could be valid symbols at any level.
-  if (auto dimOp = dyn_cast<DimOp>(defOp))
+  if (auto dimOp = dyn_cast<memref::DimOp>(defOp))
     return isDimOpValidSymbol(dimOp, region);
 
   // Check for values dominating `region`'s parent op.
@@ -915,12 +915,12 @@
 //===----------------------------------------------------------------------===//
 
 /// This is a common class used for patterns of the form
-/// "someop(memrefcast) -> someop".  It folds the source of any memref_cast
+/// "someop(memrefcast) -> someop".  It folds the source of any memref.cast
 /// into the root operation directly.
 static LogicalResult foldMemRefCast(Operation *op) {
   bool folded = false;
   for (OpOperand &operand : op->getOpOperands()) {
-    auto cast = operand.get().getDefiningOp<MemRefCastOp>();
+    auto cast = operand.get().getDefiningOp<memref::CastOp>();
     if (cast && !cast.getOperand().getType().isa<UnrankedMemRefType>()) {
       operand.set(cast.getOperand());
       folded = true;
@@ -2254,7 +2254,8 @@
 // AffineMinMaxOpBase
 //===----------------------------------------------------------------------===//
 
-template <typename T> static LogicalResult verifyAffineMinMaxOp(T op) {
+template <typename T>
+static LogicalResult verifyAffineMinMaxOp(T op) {
   // Verify that operand count matches affine map dimension and symbol count.
   if (op.getNumOperands() != op.map().getNumDims() + op.map().getNumSymbols())
     return op.emitOpError(
@@ -2262,7 +2263,8 @@
   return success();
 }
 
-template <typename T> static void printAffineMinMaxOp(OpAsmPrinter &p, T op) {
+template <typename T>
+static void printAffineMinMaxOp(OpAsmPrinter &p, T op) {
   p << op.getOperationName() << ' ' << op->getAttr(T::getMapAttrName());
   auto operands = op.getOperands();
   unsigned numDims = op.map().getNumDims();
diff --git a/mlir/lib/Dialect/Affine/IR/CMakeLists.txt b/mlir/lib/Dialect/Affine/IR/CMakeLists.txt
--- a/mlir/lib/Dialect/Affine/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/IR/CMakeLists.txt
@@ -14,6 +14,7 @@
   MLIREDSC
   MLIRIR
   MLIRLoopLikeInterface
+  MLIRMemRef
   MLIRSideEffectInterfaces
   MLIRStandard
   )
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
--- a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
@@ -23,6 +23,7 @@
 #include "mlir/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 #include "mlir/Transforms/LoopUtils.h"
diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
--- a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
@@ -22,6 +22,7 @@
   MLIRAffineUtils
   MLIREDSC
   MLIRIR
+  MLIRMemRef
   MLIRPass
   MLIRSideEffectInterfaces
   MLIRStandard
diff --git a/mlir/lib/Dialect/Affine/Transforms/PassDetail.h b/mlir/lib/Dialect/Affine/Transforms/PassDetail.h
--- a/mlir/lib/Dialect/Affine/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Affine/Transforms/PassDetail.h
@@ -19,6 +19,11 @@
 namespace linalg {
 class LinalgDialect;
 } // end namespace linalg
+
+namespace memref {
+class MemRefDialect;
+} // end namespace memref
+
 namespace vector {
 class VectorDialect;
 } // end namespace vector
diff --git a/mlir/lib/Dialect/CMakeLists.txt b/mlir/lib/Dialect/CMakeLists.txt
--- a/mlir/lib/Dialect/CMakeLists.txt
+++ b/mlir/lib/Dialect/CMakeLists.txt
@@ -9,6 +9,7 @@
 add_subdirectory(Linalg)
 add_subdirectory(LLVMIR)
 add_subdirectory(Math)
+add_subdirectory(MemRef)
 add_subdirectory(OpenACC)
 add_subdirectory(OpenMP)
 add_subdirectory(PDL)
diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -35,6 +35,7 @@
   MLIRAsync
   MLIREDSC
   MLIRIR
+  MLIRMemRef
   MLIRLLVMIR
   MLIRLLVMToLLVMIRTranslation
   MLIRSCF
diff --git a/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp b/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
--- a/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
@@ -13,6 +13,7 @@
 
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/Passes.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
@@ -107,7 +108,7 @@
     createPredicatedBlock(isFirstLane, [&] {
       Value subgroupId = getDivideBySubgroupSize(invocationIdx);
       Value index = create<IndexCastOp>(indexType, subgroupId);
-      create<StoreOp>(subgroupReduce, buffer, index);
+      create<memref::StoreOp>(subgroupReduce, buffer, index);
     });
     create<gpu::BarrierOp>();
 
@@ -124,27 +125,29 @@
     Value zero = create<ConstantIndexOp>(0);
     createPredicatedBlock(isValidSubgroup, [&] {
       Value index = create<IndexCastOp>(indexType, invocationIdx);
-      Value value = create<LoadOp>(valueType, buffer, index);
+      Value value = create<memref::LoadOp>(valueType, buffer, index);
       Value result =
           createSubgroupReduce(numSubgroups, laneId, value, accumFactory);
-      create<StoreOp>(result, buffer, zero);
+      create<memref::StoreOp>(result, buffer, zero);
     });
 
     // Synchronize workgroup and load result from workgroup memory.
     create<gpu::BarrierOp>();
-    Value result = create<LoadOp>(valueType, buffer, zero);
+    Value result = create<memref::LoadOp>(valueType, buffer, zero);
 
     rewriter.replaceOp(reduceOp, result);
   }
 
 private:
   // Shortcut to create an op from rewriter using loc as the first argument.
-  template <typename T, typename... Args> T create(Args... args) {
+  template <typename T, typename... Args>
+  T create(Args... args) {
     return rewriter.create<T>(loc, std::forward<Args>(args)...);
   }
 
   // Creates dimension op of type T, with the result casted to int32.
-  template <typename T> Value getDimOp(StringRef dimension) {
+  template <typename T>
+  Value getDimOp(StringRef dimension) {
     Value dim = create<T>(indexType, rewriter.getStringAttr(dimension));
     return create<IndexCastOp>(int32Type, dim);
   }
@@ -236,7 +239,8 @@
   }
 
   /// Returns an accumulator factory that creates an op of type T.
-  template <typename T> AccumulatorFactory getFactory() {
+  template <typename T>
+  AccumulatorFactory getFactory() {
     return [&](Value lhs, Value rhs) {
       return create<T>(lhs.getType(), lhs, rhs);
     };
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -14,6 +14,7 @@
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/Passes.h"
 #include "mlir/Dialect/GPU/Utils.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
@@ -58,7 +59,7 @@
 /// operations may not have side-effects, as otherwise sinking (and hence
 /// duplicating them) is not legal.
 static bool isSinkingBeneficiary(Operation *op) {
-  return isa<ConstantOp, DimOp, SelectOp, CmpIOp>(op);
+  return isa<ConstantOp, memref::DimOp, SelectOp, CmpIOp>(op);
 }
 
 /// For a given operation `op`, computes whether it is beneficial to sink the
diff --git a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
--- a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
@@ -13,6 +13,7 @@
 
 #include "mlir/Dialect/GPU/MemoryPromotion.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
+#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
 #include "mlir/Dialect/SCF/EDSC/Builders.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
 #include "mlir/Pass/Pass.h"
@@ -82,7 +83,7 @@
   loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) {
     ivs.assign(loopIvs.begin(), loopIvs.end());
     auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank);
-    StdIndexedValue fromHandle(from), toHandle(to);
+    MemRefIndexedValue fromHandle(from), toHandle(to);
     toHandle(activeIvs) = fromHandle(activeIvs);
   });
 
diff --git a/mlir/lib/Dialect/Linalg/Analysis/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Analysis/CMakeLists.txt
--- a/mlir/lib/Dialect/Linalg/Analysis/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Analysis/CMakeLists.txt
@@ -7,5 +7,6 @@
   LINK_LIBS PUBLIC
   MLIRIR
   MLIRLinalg
+  MLIRMemRef
   MLIRStandard
   )
diff --git a/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp b/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp
--- a/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp
+++ b/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp
@@ -12,6 +12,7 @@
 
 #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BuiltinOps.h"
 
@@ -48,7 +49,7 @@
     // the aliasing further.
     if (isa<RegionBranchOpInterface>(defOp))
       return v;
-    if (isa<TensorToMemrefOp>(defOp))
+    if (isa<memref::BufferCastOp>(defOp))
       return v;
 
     if (auto memEffect = dyn_cast<MemoryEffectOpInterface>(defOp)) {
diff --git a/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt b/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt
--- a/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt
@@ -11,6 +11,7 @@
   MLIRAffineEDSC
   MLIRLinalg
   MLIRMath
+  MLIRMemRef
   MLIRSCF
   MLIRStandard
   )
diff --git a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
--- a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
@@ -19,5 +19,6 @@
   MLIRSideEffectInterfaces
   MLIRViewLikeInterface
   MLIRStandard
+  MLIRMemRef
   MLIRTensor
   )
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
--- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
@@ -9,6 +9,7 @@
 #include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h"
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/AffineExprVisitor.h"
 #include "mlir/IR/AffineMap.h"
 #include "llvm/ADT/SmallSet.h"
@@ -187,7 +188,7 @@
   for (Value v : getShapedOperands()) {
     ShapedType t = v.getType().template cast<ShapedType>();
     for (unsigned i = 0, e = t.getRank(); i < e; ++i)
-      res.push_back(b.create<DimOp>(loc, v, i));
+      res.push_back(b.create<memref::DimOp>(loc, v, i));
   }
   return res;
 }
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -15,6 +15,7 @@
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/AffineExprVisitor.h"
 #include "mlir/IR/Matchers.h"
@@ -109,12 +110,12 @@
 /// ```
 ///    someop(memrefcast) -> someop
 /// ```
-/// It folds the source of the memref_cast into the root operation directly.
+/// It folds the source of the memref.cast into the root operation directly.
 static LogicalResult foldMemRefCast(Operation *op) {
   bool folded = false;
   for (OpOperand &operand : op->getOpOperands()) {
-    auto castOp = operand.get().getDefiningOp<MemRefCastOp>();
-    if (castOp && canFoldIntoConsumerOp(castOp)) {
+    auto castOp = operand.get().getDefiningOp<memref::CastOp>();
+    if (castOp && memref::CastOp::canFoldIntoConsumerOp(castOp)) {
       operand.set(castOp.getOperand());
       folded = true;
     }
@@ -776,10 +777,10 @@
 /// - A constant value if the size is static along the dimension.
 /// - The dynamic value that defines the size of the result of
 ///   `linalg.init_tensor` op.
-struct ReplaceDimOfInitTensorOp : public OpRewritePattern<DimOp> {
-  using OpRewritePattern<DimOp>::OpRewritePattern;
+struct ReplaceDimOfInitTensorOp : public OpRewritePattern<memref::DimOp> {
+  using OpRewritePattern<memref::DimOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(DimOp dimOp,
+  LogicalResult matchAndRewrite(memref::DimOp dimOp,
                                 PatternRewriter &rewriter) const override {
     auto initTensorOp = dimOp.memrefOrTensor().getDefiningOp<InitTensorOp>();
     if (!initTensorOp)
@@ -986,7 +987,7 @@
   assert(rankedTensorType.hasStaticShape());
   int rank = rankedTensorType.getRank();
   for (int i = 0; i < rank; ++i) {
-    auto dimOp = builder.createOrFold<DimOp>(loc, source, i);
+    auto dimOp = builder.createOrFold<memref::DimOp>(loc, source, i);
     auto resultDimSize = builder.createOrFold<ConstantIndexOp>(
         loc, rankedTensorType.getDimSize(i));
     auto highValue = builder.createOrFold<SubIOp>(loc, resultDimSize, dimOp);
@@ -1292,7 +1293,7 @@
   AffineExpr expr;
   SmallVector<Value, 2> dynamicDims;
   for (auto dim : llvm::seq(startPos, endPos + 1)) {
-    dynamicDims.push_back(builder.create<DimOp>(loc, src, dim));
+    dynamicDims.push_back(builder.create<memref::DimOp>(loc, src, dim));
     AffineExpr currExpr = builder.getAffineSymbolExpr(dim - startPos);
     expr = (expr ? expr * currExpr : currExpr);
   }
@@ -1361,7 +1362,7 @@
            "dimensions");
     linearizedStaticDim *= d.value();
   }
-  Value sourceDim = builder.create<DimOp>(loc, src, sourceDimPos);
+  Value sourceDim = builder.create<memref::DimOp>(loc, src, sourceDimPos);
   return applyMapToValues(
       builder, loc,
       AffineMap::get(
@@ -1637,9 +1638,9 @@
 };
 
 /// Canonicalize dim ops that use the output shape with dim of the input.
-struct ReplaceDimOfReshapeOpResult : OpRewritePattern<DimOp> {
-  using OpRewritePattern<DimOp>::OpRewritePattern;
-  LogicalResult matchAndRewrite(DimOp dimOp,
+struct ReplaceDimOfReshapeOpResult : OpRewritePattern<memref::DimOp> {
+  using OpRewritePattern<memref::DimOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(memref::DimOp dimOp,
                                 PatternRewriter &rewriter) const override {
     Value dimValue = dimOp.memrefOrTensor();
     Optional<int64_t> dimIndex = dimOp.getConstantIndex();
@@ -2445,24 +2446,25 @@
   }
 };
 
-/// Replaces std.dim operations that use the result of a LinalgOp (on tensors)
-/// with std.dim operations that use one of the arguments. For example,
+/// Replaces memref.dim operations that use the result of a LinalgOp (on
+/// tensors) with memref.dim operations that use one of the arguments. For
+/// example,
 ///
 /// %0 = linalg.matmul ins(%arg0, %arg1, ...)
-/// %1 = dim %0, %c0
+/// %1 = memref.dim %0, %c0
 ///
 /// with
 ///
-/// %1 = dim %arg0, %c0
+/// %1 = memref.dim %arg0, %c0
 ///
 /// where possible. With this the result of the `linalg.matmul` is not used in
 /// dim operations. If the value produced is replaced with another value (say by
 /// tiling `linalg.matmul`) will make the `linalg.matmul` truly dead instead of
 /// used in a dim op that would prevent the DCE of this op.
-struct ReplaceDimOfLinalgOpResult : public OpRewritePattern<DimOp> {
-  using OpRewritePattern<DimOp>::OpRewritePattern;
+struct ReplaceDimOfLinalgOpResult : public OpRewritePattern<memref::DimOp> {
+  using OpRewritePattern<memref::DimOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(DimOp dimOp,
+  LogicalResult matchAndRewrite(memref::DimOp dimOp,
                                 PatternRewriter &rewriter) const override {
     Value dimValue = dimOp.memrefOrTensor();
     Optional<int64_t> dimIndex = dimOp.getConstantIndex();
@@ -2479,7 +2481,7 @@
     if (!operandDimValue) {
       // Its always possible to replace using the corresponding `outs`
       // parameter.
-      operandDimValue = rewriter.create<DimOp>(
+      operandDimValue = rewriter.create<memref::DimOp>(
           dimOp.getLoc(), linalgOp.getOutput(resultIndex), *dimIndex);
     }
     rewriter.replaceOp(dimOp, *operandDimValue);
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
@@ -25,8 +25,8 @@
 
 static Value cloneMemref(Location loc, Value memref, OpBuilder &b) {
   auto memrefType = memref.getType().cast<MemRefType>();
-  auto alloc =
-      b.create<AllocOp>(loc, memrefType, getDynOperands(loc, memref, b));
+  auto alloc = b.create<memref::AllocOp>(loc, memrefType,
+                                         getDynOperands(loc, memref, b));
   b.create<linalg::CopyOp>(loc, memref, alloc);
   return alloc;
 }
@@ -60,17 +60,17 @@
       continue;
     }
 
-    if (auto alloc = resultTensor.getDefiningOp<AllocOp>()) {
+    if (auto alloc = resultTensor.getDefiningOp<memref::AllocOp>()) {
       resultBuffers.push_back(resultTensor);
       continue;
     }
     // Allocate buffers for statically-shaped results.
     if (memrefType.hasStaticShape()) {
-      resultBuffers.push_back(b.create<AllocOp>(loc, memrefType));
+      resultBuffers.push_back(b.create<memref::AllocOp>(loc, memrefType));
       continue;
     }
 
-    resultBuffers.push_back(b.create<AllocOp>(
+    resultBuffers.push_back(b.create<memref::AllocOp>(
         loc, memrefType, getDynOperands(loc, resultTensor, b)));
   }
   return success();
@@ -148,7 +148,7 @@
   matchAndRewrite(InitTensorOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     linalg::InitTensorOpAdaptor adaptor(operands, op->getAttrDictionary());
-    rewriter.replaceOpWithNewOp<AllocOp>(
+    rewriter.replaceOpWithNewOp<memref::AllocOp>(
         op, getTypeConverter()->convertType(op.getType()).cast<MemRefType>(),
         adaptor.sizes());
     return success();
@@ -231,9 +231,9 @@
     // op.sizes() capture exactly the dynamic alloc operands matching the
     // subviewMemRefType thanks to subview/subtensor canonicalization and
     // verification.
-    Value alloc =
-        rewriter.create<AllocOp>(op.getLoc(), subviewMemRefType, op.sizes());
-    Value subView = rewriter.create<SubViewOp>(
+    Value alloc = rewriter.create<memref::AllocOp>(
+        op.getLoc(), subviewMemRefType, op.sizes());
+    Value subView = rewriter.create<memref::SubViewOp>(
         op.getLoc(), sourceMemref, op.getMixedOffsets(), op.getMixedSizes(),
         op.getMixedStrides());
     rewriter.create<linalg::CopyOp>(op.getLoc(), subView, alloc);
@@ -243,8 +243,8 @@
 };
 
 /// Convert `subtensor_insert %source into %dest [offsets][sizes][strides] ->
-/// %t` to an tensor_to_memref + subview + copy + tensor_load pattern.
-/// tensor_to_memref and tensor_load are inserted automatically by the
+/// %t` to an buffer_cast + subview + copy + tensor_load pattern.
+/// buffer_cast and tensor_load are inserted automatically by the
 /// conversion infra:
 /// ```
 ///   %sv = subview %dest [offsets][sizes][strides]
@@ -273,7 +273,7 @@
     assert(destMemRef.getType().isa<MemRefType>());
 
     // Take a subview to copy the small memref.
-    Value subview = rewriter.create<SubViewOp>(
+    Value subview = rewriter.create<memref::SubViewOp>(
         op.getLoc(), destMemRef, op.getMixedOffsets(), op.getMixedSizes(),
         op.getMixedStrides());
     // Copy the small memref.
@@ -295,7 +295,7 @@
 
     // Mark all Standard operations legal.
     target.addLegalDialect<AffineDialect, math::MathDialect,
-                           StandardOpsDialect>();
+                           memref::MemRefDialect, StandardOpsDialect>();
     target.addIllegalOp<InitTensorOp, SubTensorOp, SubTensorInsertOp>();
 
     // Mark all Linalg operations illegal as long as they work on tensors.
diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
--- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
@@ -29,6 +29,7 @@
   MLIRAnalysis
   MLIREDSC
   MLIRIR
+  MLIRMemRef
   MLIRLinalgAnalysis
   MLIRLinalgEDSC
   MLIRLinalg
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
@@ -18,6 +18,8 @@
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
+#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/AffineExpr.h"
@@ -104,11 +106,12 @@
     SmallVector<OpFoldResult, 4> offsets, sizes, strides;
     inferShapeComponents(map, loopRanges, offsets, sizes, strides);
     Value shape = en.value();
-    Value sub = shape.getType().isa<MemRefType>()
-                    ? b.create<SubViewOp>(loc, shape, offsets, sizes, strides)
-                          .getResult()
-                    : b.create<SubTensorOp>(loc, shape, offsets, sizes, strides)
-                          .getResult();
+    Value sub =
+        shape.getType().isa<MemRefType>()
+            ? b.create<memref::SubViewOp>(loc, shape, offsets, sizes, strides)
+                  .getResult()
+            : b.create<SubTensorOp>(loc, shape, offsets, sizes, strides)
+                  .getResult();
     clonedShapes.push_back(sub);
   }
   // Append the other operands.
@@ -177,8 +180,8 @@
     // `ViewInterface`. The interface needs a `getOrCreateRanges` method which
     // currently returns a `linalg.range`. The fix here is to move this op to
     // `std` dialect and add the method to `ViewInterface`.
-    if (fromSubViewOpOnly &&
-        !isa_and_nonnull<SubViewOp, SubTensorOp>(en.value().getDefiningOp()))
+    if (fromSubViewOpOnly && !isa_and_nonnull<memref::SubViewOp, SubTensorOp>(
+                                 en.value().getDefiningOp()))
       continue;
 
     unsigned idx = en.index();
@@ -227,9 +230,8 @@
                  << "existing LoopRange: " << loopRanges[i] << "\n");
     else {
       auto shapeDim = getShapeDefiningLoopRange(producer, i);
-      loopRanges[i] = Range{std_constant_index(0),
-                            std_dim(shapeDim.shape, shapeDim.dimension),
-                            std_constant_index(1)};
+      Value dim = memref_dim(shapeDim.shape, shapeDim.dimension);
+      loopRanges[i] = Range{std_constant_index(0), dim, std_constant_index(1)};
       LLVM_DEBUG(llvm::dbgs() << "new LoopRange: " << loopRanges[i] << "\n");
     }
   }
@@ -242,7 +244,7 @@
 static Range getRangeFromOperandShape(OpBuilder &b, Location loc,
                                       Value shapedOperand, unsigned dim) {
   Operation *shapeProducingOp = shapedOperand.getDefiningOp();
-  if (auto subViewOp = dyn_cast<SubViewOp>(shapeProducingOp))
+  if (auto subViewOp = dyn_cast<memref::SubViewOp>(shapeProducingOp))
     return subViewOp.getOrCreateRanges(b, loc)[dim];
   if (auto subTensorOp = dyn_cast<SubTensorOp>(shapeProducingOp))
     return subTensorOp.getOrCreateRanges(b, loc)[dim];
@@ -425,7 +427,7 @@
 
   // Must be a subview or a slice to guarantee there are loops we can fuse
   // into.
-  auto subView = consumerOpOperand.get().getDefiningOp<SubViewOp>();
+  auto subView = consumerOpOperand.get().getDefiningOp<memref::SubViewOp>();
   if (!subView) {
     LLVM_DEBUG(llvm::dbgs() << "\nNot fusable (not a subview)");
     return llvm::None;
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
@@ -14,6 +14,7 @@
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
+#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
 #include "mlir/Dialect/SCF/EDSC/Builders.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
 #include "mlir/IR/AffineExpr.h"
@@ -200,7 +201,7 @@
       conds.push_back(leftOutOfBound);
     else
       conds.push_back(conds.back() || leftOutOfBound);
-    Value rightBound = std_dim(input, idx);
+    Value rightBound = memref_dim(input, idx);
     conds.push_back(conds.back() || (sge(dim, rightBound)));
 
     // When padding is involved, the indices will only be shifted to negative,
@@ -307,12 +308,12 @@
   IndexedValueType F(convOp.filter()), O(convOp.output());
 
   // Emit scalar form. Padded conv involves an affine.max in the memory access
-  // which is not allowed by affine.load. Override to use an StdIndexedValue
+  // which is not allowed by affine.load. Override to use an MemRefIndexedValue
   // when there is non-zero padding.
   if (hasPadding(convOp)) {
     Type type = convOp.input().getType().cast<MemRefType>().getElementType();
     Value padValue = std_constant(type, getPadValueAttr<ConvOp>(type));
-    Value paddedInput = getPaddedInput<StdIndexedValue>(
+    Value paddedInput = getPaddedInput<MemRefIndexedValue>(
         convOp.input(), imIdx,
         /* Only need to pad the window dimensions */
         {0, static_cast<int>(imIdx.size()) - 1}, padValue);
@@ -338,9 +339,9 @@
     Type type =
         op.input().getType().template cast<MemRefType>().getElementType();
     Value padValue = std_constant(type, getPadValueAttr<PoolingOp>(type));
-    return getPaddedInput<StdIndexedValue>(op.input(), inputIndices,
-                                           /*Pad every dimension*/ {},
-                                           padValue);
+    return getPaddedInput<MemRefIndexedValue>(op.input(), inputIndices,
+                                              /*Pad every dimension*/ {},
+                                              padValue);
   }
   IndexedValueType input(op.input());
   return input(inputIndices);
@@ -546,7 +547,7 @@
   MLIRContext *context = funcOp.getContext();
   OwningRewritePatternList patterns;
   patterns.insert<LinalgRewritePattern<LoopType>>(interchangeVector);
-  DimOp::getCanonicalizationPatterns(patterns, context);
+  memref::DimOp::getCanonicalizationPatterns(patterns, context);
   AffineApplyOp::getCanonicalizationPatterns(patterns, context);
   patterns.insert<FoldAffineOp>(context);
   // Just apply the patterns greedily.
@@ -593,12 +594,18 @@
 
 struct LowerToAffineLoops
     : public LinalgLowerToAffineLoopsBase<LowerToAffineLoops> {
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<memref::MemRefDialect>();
+  }
   void runOnFunction() override {
     lowerLinalgToLoopsImpl<AffineForOp>(getFunction(), interchangeVector);
   }
 };
 
 struct LowerToLoops : public LinalgLowerToLoopsBase<LowerToLoops> {
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<memref::MemRefDialect, scf::SCFDialect>();
+  }
   void runOnFunction() override {
     lowerLinalgToLoopsImpl<scf::ForOp>(getFunction(), interchangeVector);
   }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/PassDetail.h b/mlir/lib/Dialect/Linalg/Transforms/PassDetail.h
--- a/mlir/lib/Dialect/Linalg/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Linalg/Transforms/PassDetail.h
@@ -26,6 +26,10 @@
 class SCFDialect;
 } // end namespace scf
 
+namespace memref {
+class MemRefDialect;
+} // end namespace memref
+
 namespace vector {
 class VectorDialect;
 } // end namespace vector
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
@@ -18,6 +18,7 @@
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
+#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
 #include "mlir/IR/AffineExpr.h"
@@ -38,9 +39,9 @@
 
 using folded_affine_min = FoldedValueBuilder<AffineMinOp>;
 using folded_linalg_range = FoldedValueBuilder<linalg::RangeOp>;
-using folded_std_dim = FoldedValueBuilder<DimOp>;
-using folded_std_subview = FoldedValueBuilder<SubViewOp>;
-using folded_std_view = FoldedValueBuilder<ViewOp>;
+using folded_memref_dim = FoldedValueBuilder<memref::DimOp>;
+using folded_memref_subview = FoldedValueBuilder<memref::SubViewOp>;
+using folded_memref_view = FoldedValueBuilder<memref::ViewOp>;
 
 #define DEBUG_TYPE "linalg-promotion"
 
@@ -59,22 +60,22 @@
   if (!dynamicBuffers)
     if (auto cst = size.getDefiningOp<ConstantIndexOp>())
       return options.useAlloca
-                 ? std_alloca(MemRefType::get(width * cst.getValue(),
-                                              IntegerType::get(ctx, 8)),
-                              ValueRange{}, alignment_attr)
+                 ? memref_alloca(MemRefType::get(width * cst.getValue(),
+                                                 IntegerType::get(ctx, 8)),
+                                 ValueRange{}, alignment_attr)
                        .value
-                 : std_alloc(MemRefType::get(width * cst.getValue(),
-                                             IntegerType::get(ctx, 8)),
-                             ValueRange{}, alignment_attr)
+                 : memref_alloc(MemRefType::get(width * cst.getValue(),
+                                                IntegerType::get(ctx, 8)),
+                                ValueRange{}, alignment_attr)
                        .value;
   Value mul =
       folded_std_muli(folder, folded_std_constant_index(folder, width), size);
   return options.useAlloca
-             ? std_alloca(MemRefType::get(-1, IntegerType::get(ctx, 8)), mul,
-                          alignment_attr)
+             ? memref_alloca(MemRefType::get(-1, IntegerType::get(ctx, 8)), mul,
+                             alignment_attr)
                    .value
-             : std_alloc(MemRefType::get(-1, IntegerType::get(ctx, 8)), mul,
-                         alignment_attr)
+             : memref_alloc(MemRefType::get(-1, IntegerType::get(ctx, 8)), mul,
+                            alignment_attr)
                    .value;
 }
 
@@ -82,10 +83,12 @@
 /// no call back to do so is provided. The default is to allocate a
 /// memref<..xi8> and return a view to get a memref type of shape
 /// boundingSubViewSize.
-static Optional<Value> defaultAllocBufferCallBack(
-    const LinalgPromotionOptions &options, OpBuilder &builder,
-    SubViewOp subView, ArrayRef<Value> boundingSubViewSize, bool dynamicBuffers,
-    Optional<unsigned> alignment, OperationFolder *folder) {
+static Optional<Value>
+defaultAllocBufferCallBack(const LinalgPromotionOptions &options,
+                           OpBuilder &builder, memref::SubViewOp subView,
+                           ArrayRef<Value> boundingSubViewSize,
+                           bool dynamicBuffers, Optional<unsigned> alignment,
+                           OperationFolder *folder) {
   ShapedType viewType = subView.getType();
   int64_t rank = viewType.getRank();
   (void)rank;
@@ -100,7 +103,7 @@
                              dynamicBuffers, folder, alignment);
   SmallVector<int64_t, 4> dynSizes(boundingSubViewSize.size(),
                                    ShapedType::kDynamicSize);
-  Value view = folded_std_view(
+  Value view = folded_memref_view(
       folder, MemRefType::get(dynSizes, viewType.getElementType()), buffer,
       zero, boundingSubViewSize);
   return view;
@@ -112,10 +115,10 @@
 static LogicalResult
 defaultDeallocBufferCallBack(const LinalgPromotionOptions &options,
                              OpBuilder &b, Value fullLocalView) {
-  auto viewOp = fullLocalView.getDefiningOp<ViewOp>();
+  auto viewOp = fullLocalView.getDefiningOp<memref::ViewOp>();
   assert(viewOp && "expected full local view to be a ViewOp");
   if (!options.useAlloca)
-    std_dealloc(viewOp.source());
+    memref_dealloc(viewOp.source());
   return success();
 }
 
@@ -161,21 +164,21 @@
     if (options.operandsToPromote && !options.operandsToPromote->count(idx))
       continue;
     auto *op = linalgOp.getShapedOperand(idx).getDefiningOp();
-    if (auto sv = dyn_cast_or_null<SubViewOp>(op)) {
+    if (auto sv = dyn_cast_or_null<memref::SubViewOp>(op)) {
       subViews[idx] = sv;
       useFullTileBuffers[sv] = vUseFullTileBuffers[idx];
     }
   }
 
-  allocationFn =
-      (options.allocationFn ? *(options.allocationFn)
-                            : [&](OpBuilder &builder, SubViewOp subViewOp,
-                                  ArrayRef<Value> boundingSubViewSize,
-                                  OperationFolder *folder) -> Optional<Value> {
-        return defaultAllocBufferCallBack(options, builder, subViewOp,
-                                          boundingSubViewSize, dynamicBuffers,
-                                          alignment, folder);
-      });
+  allocationFn = (options.allocationFn
+                      ? *(options.allocationFn)
+                      : [&](OpBuilder &builder, memref::SubViewOp subViewOp,
+                            ArrayRef<Value> boundingSubViewSize,
+                            OperationFolder *folder) -> Optional<Value> {
+    return defaultAllocBufferCallBack(options, builder, subViewOp,
+                                      boundingSubViewSize, dynamicBuffers,
+                                      alignment, folder);
+  });
   deallocationFn =
       (options.deallocationFn
            ? *(options.deallocationFn)
@@ -209,7 +212,7 @@
 // boundary tiles. For now this is done with an unconditional `fill` op followed
 // by a partial `copy` op.
 Optional<PromotionInfo> mlir::linalg::promoteSubviewAsNewBuffer(
-    OpBuilder &b, Location loc, SubViewOp subView,
+    OpBuilder &b, Location loc, memref::SubViewOp subView,
     AllocBufferCallbackFn allocationFn, OperationFolder *folder) {
   ScopedContext scopedContext(b, loc);
   auto viewType = subView.getType();
@@ -227,7 +230,8 @@
         (!sizeAttr) ? rangeValue.size : b.create<ConstantOp>(loc, sizeAttr);
     LLVM_DEBUG(llvm::dbgs() << "Extracted tightest: " << size << "\n");
     fullSizes.push_back(size);
-    partialSizes.push_back(folded_std_dim(folder, subView, en.index()).value);
+    partialSizes.push_back(
+        folded_memref_dim(folder, subView, en.index()).value);
   }
   SmallVector<int64_t, 4> dynSizes(fullSizes.size(), -1);
   // If a callback is not specified, then use the default implementation for
@@ -238,7 +242,7 @@
   SmallVector<OpFoldResult, 4> zeros(fullSizes.size(), b.getIndexAttr(0));
   SmallVector<OpFoldResult, 4> ones(fullSizes.size(), b.getIndexAttr(1));
   auto partialLocalView =
-      folded_std_subview(folder, *fullLocalView, zeros, partialSizes, ones);
+      folded_memref_subview(folder, *fullLocalView, zeros, partialSizes, ones);
   return PromotionInfo{*fullLocalView, partialLocalView};
 }
 
@@ -253,7 +257,8 @@
   MapVector<unsigned, PromotionInfo> promotionInfoMap;
 
   for (auto v : options.subViews) {
-    SubViewOp subView = cast<SubViewOp>(v.second.getDefiningOp());
+    memref::SubViewOp subView =
+        cast<memref::SubViewOp>(v.second.getDefiningOp());
     Optional<PromotionInfo> promotionInfo = promoteSubviewAsNewBuffer(
         b, loc, subView, options.allocationFn, folder);
     if (!promotionInfo)
@@ -277,8 +282,9 @@
     auto info = promotionInfoMap.find(v.first);
     if (info == promotionInfoMap.end())
       continue;
-    if (failed(options.copyInFn(b, cast<SubViewOp>(v.second.getDefiningOp()),
-                                info->second.partialLocalView)))
+    if (failed(options.copyInFn(
+            b, cast<memref::SubViewOp>(v.second.getDefiningOp()),
+            info->second.partialLocalView)))
       return {};
   }
   return promotionInfoMap;
@@ -353,7 +359,7 @@
     return failure();
   // Check that at least one of the requested operands is indeed a subview.
   for (auto en : llvm::enumerate(linOp.getShapedOperands())) {
-    auto sv = isa_and_nonnull<SubViewOp>(en.value().getDefiningOp());
+    auto sv = isa_and_nonnull<memref::SubViewOp>(en.value().getDefiningOp());
     if (sv) {
       if (!options.operandsToPromote.hasValue() ||
           options.operandsToPromote->count(en.index()))
diff --git a/mlir/lib/Dialect/Linalg/Transforms/SparseLowering.cpp b/mlir/lib/Dialect/Linalg/Transforms/SparseLowering.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/SparseLowering.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/SparseLowering.cpp
@@ -44,11 +44,11 @@
 };
 
 /// Sparse conversion rule for dimension accesses.
-class TensorToDimSizeConverter : public OpConversionPattern<DimOp> {
+class TensorToDimSizeConverter : public OpConversionPattern<memref::DimOp> {
 public:
   using OpConversionPattern::OpConversionPattern;
   LogicalResult
-  matchAndRewrite(DimOp op, ArrayRef<Value> operands,
+  matchAndRewrite(memref::DimOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     if (!operands[0].getType().isa<LLVM::LLVMPointerType>())
       return failure();
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
@@ -533,13 +533,13 @@
   // positions for the output tensor. Currently this results in functional,
   // but slightly imprecise IR, so it is put under an experimental option.
   if (codegen.options.fastOutput)
-    return rewriter.create<TensorToMemrefOp>(loc, denseTp, tensor);
+    return rewriter.create<memref::BufferCastOp>(loc, denseTp, tensor);
   // By default, a new buffer is allocated which is initialized to the
   // tensor defined in the outs() clause. This is always correct but
   // introduces a dense initialization component that may negatively
   // impact the running complexity of the sparse kernel.
-  Value init = rewriter.create<TensorToMemrefOp>(loc, denseTp, tensor);
-  Value alloc = rewriter.create<AllocOp>(loc, denseTp, args);
+  Value init = rewriter.create<memref::BufferCastOp>(loc, denseTp, tensor);
+  Value alloc = rewriter.create<memref::AllocOp>(loc, denseTp, args);
   rewriter.create<linalg::CopyOp>(loc, init, alloc);
   return alloc;
 }
@@ -585,8 +585,8 @@
       }
       // Find lower and upper bound in current dimension.
       Value up;
-      if (shape[d] == TensorType::kDynamicSize) {
-        up = rewriter.create<DimOp>(loc, tensor, d);
+      if (shape[d] == MemRefType::kDynamicSize) {
+        up = rewriter.create<memref::DimOp>(loc, tensor, d);
         args.push_back(up);
       } else {
         up = rewriter.create<ConstantIndexOp>(loc, shape[d]);
@@ -600,7 +600,7 @@
       auto denseTp = MemRefType::get(shape, tensorType.getElementType());
       if (t < numInputs)
         codegen.buffers[t] =
-            rewriter.create<TensorToMemrefOp>(loc, denseTp, tensor);
+            rewriter.create<memref::BufferCastOp>(loc, denseTp, tensor);
       else
         codegen.buffers[t] =
             genOutputBuffer(codegen, rewriter, op, denseTp, args);
@@ -716,7 +716,7 @@
   Value ptr = codegen.buffers[tensor];
   if (codegen.curVecLength > 1)
     return genVectorLoad(codegen, rewriter, ptr, args);
-  return rewriter.create<LoadOp>(loc, ptr, args);
+  return rewriter.create<memref::LoadOp>(loc, ptr, args);
 }
 
 /// Generates a store on a dense tensor.
@@ -744,7 +744,7 @@
   if (codegen.curVecLength > 1)
     genVectorStore(codegen, rewriter, rhs, ptr, args);
   else
-    rewriter.create<StoreOp>(loc, rhs, ptr, args);
+    rewriter.create<memref::StoreOp>(loc, rhs, ptr, args);
 }
 
 /// Generates a pointer/index load from the sparse storage scheme.
@@ -752,7 +752,7 @@
                      Value ptr, Value s) {
   if (codegen.curVecLength > 1)
     return genVectorLoad(codegen, rewriter, ptr, {s});
-  Value load = rewriter.create<LoadOp>(loc, ptr, s);
+  Value load = rewriter.create<memref::LoadOp>(loc, ptr, s);
   return load.getType().isa<IndexType>()
              ? load
              : rewriter.create<IndexCastOp>(loc, load, rewriter.getIndexType());
@@ -1345,8 +1345,8 @@
     CodeGen codegen(options, numTensors, numLoops);
     genBuffers(merger, codegen, rewriter, op);
     genStmt(merger, codegen, rewriter, op, topSort, exp.getValue(), 0);
-    Value result =
-        rewriter.create<TensorLoadOp>(op.getLoc(), codegen.buffers.back());
+    Value result = rewriter.create<memref::TensorLoadOp>(
+        op.getLoc(), codegen.buffers.back());
     rewriter.replaceOp(op, result);
     return success();
   }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -17,6 +17,8 @@
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
+#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/EDSC/Builders.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
@@ -34,7 +36,6 @@
 using namespace mlir::linalg;
 using namespace mlir::scf;
 
-
 #define DEBUG_TYPE "linalg-tiling"
 
 static bool isZero(Value v) {
@@ -144,9 +145,9 @@
 // operand_dim_1 = dim %operand, 1 : memref<50x100xf32>
 // scf.for %k = %c0 to operand_dim_0 step %c10 {
 //   scf.for %l = %c0 to operand_dim_1 step %c25 {
-//     %4 = std.subview %operand[%k, %l][%c10, %c25][%c1, %c1]
+//     %4 = memref.subview %operand[%k, %l][%c10, %c25][%c1, %c1]
 //       : memref<50x100xf32> to memref<?x?xf32, #strided>
-//     %5 = std.subview %result[%k, %l][%c10, %c25][%c1, %c1]
+//     %5 = memref.subview %result[%k, %l][%c10, %c25][%c1, %c1]
 //       : memref<50x100xf32> to memref<?x?xf32, #strided>
 //     linalg.indexed_generic pointwise_2d_trait %4, %5 {
 //     ^bb0(%i: index, %j: index, %operand_in: f32, %result_in: f32):
@@ -262,7 +263,7 @@
     for (unsigned r = 0; r < rank; ++r) {
       if (!isTiled(map.getSubMap({r}), tileSizes)) {
         offsets.push_back(b.getIndexAttr(0));
-        sizes.push_back(std_dim(shapedOp, r).value);
+        sizes.push_back(memref_dim(shapedOp, r).value);
         strides.push_back(b.getIndexAttr(1));
         continue;
       }
@@ -290,7 +291,7 @@
              getAffineDimExpr(/*position=*/1, b.getContext()) -
                  getAffineDimExpr(/*position=*/2, b.getContext())},
             b.getContext());
-        auto d = std_dim(shapedOp, r);
+        Value d = memref_dim(shapedOp, r);
         SmallVector<Value, 4> operands{size, d, offset};
         fullyComposeAffineMapAndOperands(&minMap, &operands);
         size = affine_min(b.getIndexType(), minMap, operands);
@@ -302,7 +303,7 @@
 
     if (shapedType.isa<MemRefType>())
       res.push_back(
-          b.create<SubViewOp>(loc, shapedOp, offsets, sizes, strides));
+          b.create<memref::SubViewOp>(loc, shapedOp, offsets, sizes, strides));
     else
       res.push_back(
           b.create<SubTensorOp>(loc, shapedOp, offsets, sizes, strides));
@@ -474,7 +475,7 @@
 
   if (!options.tileSizeComputationFunction)
     return llvm::None;
-  
+
   // Enforce the convention that "tiling by zero" skips tiling a particular
   // dimension. This convention is significantly simpler to handle instead of
   // adjusting affine maps to account for missing dimensions.
@@ -564,9 +565,9 @@
   scf::ParallelOp::getCanonicalizationPatterns(patterns, ctx);
   ConstantIndexOp::getCanonicalizationPatterns(patterns, ctx);
   SubTensorOp::getCanonicalizationPatterns(patterns, ctx);
-  SubViewOp::getCanonicalizationPatterns(patterns, ctx);
+  memref::SubViewOp::getCanonicalizationPatterns(patterns, ctx);
   tensor::CastOp::getCanonicalizationPatterns(patterns, ctx);
-  ViewOp::getCanonicalizationPatterns(patterns, ctx);
+  memref::ViewOp::getCanonicalizationPatterns(patterns, ctx);
   CanonicalizationPatternList<
 #define GET_OP_LIST
 #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -212,7 +212,7 @@
     SmallVector<OpFoldResult> offsets(rank, rewriter.getIndexAttr(0));
     auto sizes = llvm::to_vector<4>(llvm::map_range(
         llvm::seq<unsigned>(0, rank), [&](unsigned d) -> OpFoldResult {
-          auto dimOp = rewriter.create<DimOp>(loc, std::get<0>(it), d);
+          auto dimOp = rewriter.create<memref::DimOp>(loc, std::get<0>(it), d);
           newUsersOfOpToPad.insert(dimOp);
           return dimOp.getResult();
         }));
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -85,7 +85,7 @@
 }
 
 /// Build a vector.transfer_read from `source` at indices set to all `0`.
-/// If source has rank zero, build an std.load.
+/// If source has rank zero, build an memref.load.
 /// Return the produced value.
 static Value buildVectorRead(OpBuilder &builder, Value source) {
   edsc::ScopedContext scope(builder);
@@ -94,11 +94,11 @@
     SmallVector<Value> indices(shapedType.getRank(), std_constant_index(0));
     return vector_transfer_read(vectorType, source, indices);
   }
-  return std_load(source);
+  return memref_load(source);
 }
 
 /// Build a vector.transfer_write of `value` into `dest` at indices set to all
-/// `0`. If `dest` has null rank, build an std.store.
+/// `0`. If `dest` has null rank, build an memref.store.
 /// Return the produced value or null if no value is produced.
 static Value buildVectorWrite(OpBuilder &builder, Value value, Value dest) {
   edsc::ScopedContext scope(builder);
@@ -110,7 +110,7 @@
       value = vector_broadcast(vectorType, value);
     write = vector_transfer_write(value, dest, indices);
   } else {
-    write = std_store(value, dest);
+    write = memref_store(value, dest);
   }
   LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE "]: vectorized op: " << *write);
   if (!write->getResults().empty())
@@ -544,7 +544,7 @@
       rewriter.getAffineMapArrayAttr(indexingMaps),
       rewriter.getStrArrayAttr(iteratorTypes));
 
-  rewriter.create<StoreOp>(loc, result, output, ValueRange(zeros));
+  rewriter.create<memref::StoreOp>(loc, result, output, ValueRange(zeros));
   rewriter.eraseOp(op);
   return success();
 }
@@ -667,12 +667,12 @@
 }
 
 /// Return the unique subview use of `v` if it is indeed unique, null otherwise.
-static SubViewOp getSubViewUseIfUnique(Value v) {
-  SubViewOp subViewOp;
+static memref::SubViewOp getSubViewUseIfUnique(Value v) {
+  memref::SubViewOp subViewOp;
   for (auto &u : v.getUses()) {
-    if (auto newSubViewOp = dyn_cast<SubViewOp>(u.getOwner())) {
+    if (auto newSubViewOp = dyn_cast<memref::SubViewOp>(u.getOwner())) {
       if (subViewOp)
-        return SubViewOp();
+        return memref::SubViewOp();
       subViewOp = newSubViewOp;
     }
   }
@@ -686,14 +686,14 @@
 
   // Transfer into `view`.
   Value viewOrAlloc = xferOp.source();
-  if (!viewOrAlloc.getDefiningOp<ViewOp>() &&
-      !viewOrAlloc.getDefiningOp<AllocOp>())
+  if (!viewOrAlloc.getDefiningOp<memref::ViewOp>() &&
+      !viewOrAlloc.getDefiningOp<memref::AllocOp>())
     return failure();
 
   LLVM_DEBUG(llvm::dbgs() << "\n[" DEBUG_TYPE "]: " << viewOrAlloc);
 
   // Ensure there is exactly one subview of `viewOrAlloc` defining `subView`.
-  SubViewOp subViewOp = getSubViewUseIfUnique(viewOrAlloc);
+  memref::SubViewOp subViewOp = getSubViewUseIfUnique(viewOrAlloc);
   if (!subViewOp)
     return failure();
   Value subView = subViewOp.getResult();
@@ -765,12 +765,12 @@
     vector::TransferWriteOp xferOp, PatternRewriter &rewriter) const {
   // Transfer into `viewOrAlloc`.
   Value viewOrAlloc = xferOp.source();
-  if (!viewOrAlloc.getDefiningOp<ViewOp>() &&
-      !viewOrAlloc.getDefiningOp<AllocOp>())
+  if (!viewOrAlloc.getDefiningOp<memref::ViewOp>() &&
+      !viewOrAlloc.getDefiningOp<memref::AllocOp>())
     return failure();
 
   // Ensure there is exactly one subview of `viewOrAlloc` defining `subView`.
-  SubViewOp subViewOp = getSubViewUseIfUnique(viewOrAlloc);
+  memref::SubViewOp subViewOp = getSubViewUseIfUnique(viewOrAlloc);
   if (!subViewOp)
     return failure();
   Value subView = subViewOp.getResult();
diff --git a/mlir/lib/Dialect/MemRef/CMakeLists.txt b/mlir/lib/Dialect/MemRef/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Dialect/MemRef/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(IR)
diff --git a/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt b/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
@@ -0,0 +1,17 @@
+add_mlir_dialect_library(MLIRMemRef
+  MemRefDialect.cpp
+  MemRefOps.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${PROJECT_SOURCE_DIR}/inlude/mlir/Dialect/MemRefDialect
+
+  DEPENDS
+  MLIRMemRefOpsIncGen
+
+  LINK_COMPONENTS
+  Core
+
+  LINK_LIBS PUBLIC
+  MLIRDialect
+  MLIRIR
+)
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp
@@ -0,0 +1,39 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Transforms/InliningUtils.h"
+
+using namespace mlir;
+using namespace mlir::memref;
+
+//===----------------------------------------------------------------------===//
+// MemRefDialect Dialect Interfaces
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct MemRefInlinerInterface : public DialectInlinerInterface {
+  using DialectInlinerInterface::DialectInlinerInterface;
+  bool isLegalToInline(Region *dest, Region *src, bool wouldBeCloned,
+                       BlockAndValueMapping &valueMapping) const final {
+    return true;
+  }
+  bool isLegalToInline(Operation *, Region *, bool wouldBeCloned,
+                       BlockAndValueMapping &) const final {
+    return true;
+  }
+};
+} // end anonymous namespace
+
+void mlir::memref::MemRefDialect::initialize() {
+  addOperations<DmaStartOp, DmaWaitOp,
+#define GET_OP_LIST
+#include "mlir/Dialect/MemRef/IR/MemRefOps.cpp.inc"
+                >();
+  addInterfaces<MemRefInlinerInterface>();
+}
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -0,0 +1,2128 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Dialect/StandardOps/Utils/Utils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/IR/AffineMap.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/TypeUtilities.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace mlir;
+using namespace mlir::memref;
+
+/// Materialize a single constant operation from a given attribute value with
+/// the desired resultant type.
+Operation *MemRefDialect::materializeConstant(OpBuilder &builder,
+                                              Attribute value, Type type,
+                                              Location loc) {
+  return builder.create<mlir::ConstantOp>(loc, type, value);
+}
+
+/// Extract int64_t values from the assumed ArrayAttr of IntegerAttr.
+static SmallVector<int64_t, 4> extractFromI64ArrayAttr(Attribute attr) {
+  return llvm::to_vector<4>(
+      llvm::map_range(attr.cast<ArrayAttr>(), [](Attribute a) -> int64_t {
+        return a.cast<IntegerAttr>().getInt();
+      }));
+}
+
+/// Helper function to dispatch an OpFoldResult into either the `dynamicVec` if
+/// it is a Value or into `staticVec` if it is an IntegerAttr.
+/// In the case of a Value, a copy of the `sentinel` value is also pushed to
+/// `staticVec`. This is useful to extract mixed static and dynamic entries that
+/// come from an AttrSizedOperandSegments trait.
+static void dispatchIndexOpFoldResult(OpFoldResult ofr,
+                                      SmallVectorImpl<Value> &dynamicVec,
+                                      SmallVectorImpl<int64_t> &staticVec,
+                                      int64_t sentinel) {
+  if (auto v = ofr.dyn_cast<Value>()) {
+    dynamicVec.push_back(v);
+    staticVec.push_back(sentinel);
+    return;
+  }
+  APInt apInt = ofr.dyn_cast<Attribute>().cast<IntegerAttr>().getValue();
+  staticVec.push_back(apInt.getSExtValue());
+}
+
+static void dispatchIndexOpFoldResults(ArrayRef<OpFoldResult> ofrs,
+                                       SmallVectorImpl<Value> &dynamicVec,
+                                       SmallVectorImpl<int64_t> &staticVec,
+                                       int64_t sentinel) {
+  for (auto ofr : ofrs)
+    dispatchIndexOpFoldResult(ofr, dynamicVec, staticVec, sentinel);
+}
+
+//===----------------------------------------------------------------------===//
+// Common canonicalization pattern support logic
+//===----------------------------------------------------------------------===//
+
+/// This is a common class used for patterns of the form
+/// "someop(memrefcast) -> someop".  It folds the source of any memref.cast
+/// into the root operation directly.
+static LogicalResult foldMemRefCast(Operation *op) {
+  bool folded = false;
+  for (OpOperand &operand : op->getOpOperands()) {
+    auto cast = operand.get().getDefiningOp<CastOp>();
+    if (cast && !cast.getOperand().getType().isa<UnrankedMemRefType>()) {
+      operand.set(cast.getOperand());
+      folded = true;
+    }
+  }
+  return success(folded);
+}
+
+//===----------------------------------------------------------------------===//
+// Helpers for GlobalOp
+//===----------------------------------------------------------------------===//
+
+static Type getTensorTypeFromMemRefType(Type type) {
+  if (auto memref = type.dyn_cast<MemRefType>())
+    return RankedTensorType::get(memref.getShape(), memref.getElementType());
+  if (auto memref = type.dyn_cast<UnrankedMemRefType>())
+    return UnrankedTensorType::get(memref.getElementType());
+  return NoneType::get(type.getContext());
+}
+
+//===----------------------------------------------------------------------===//
+// AllocOp / AllocaOp
+//===----------------------------------------------------------------------===//
+
+template <typename AllocLikeOp>
+static LogicalResult verifyAllocLikeOp(AllocLikeOp op) {
+  static_assert(llvm::is_one_of<AllocLikeOp, AllocOp, AllocaOp>::value,
+                "applies to only alloc or alloca");
+  auto memRefType = op.getResult().getType().template dyn_cast<MemRefType>();
+  if (!memRefType)
+    return op.emitOpError("result must be a memref");
+
+  if (static_cast<int64_t>(op.dynamicSizes().size()) !=
+      memRefType.getNumDynamicDims())
+    return op.emitOpError("dimension operand count does not equal memref "
+                          "dynamic dimension count");
+
+  unsigned numSymbols = 0;
+  if (!memRefType.getAffineMaps().empty())
+    numSymbols = memRefType.getAffineMaps().front().getNumSymbols();
+  if (op.symbolOperands().size() != numSymbols)
+    return op.emitOpError(
+        "symbol operand count does not equal memref symbol count");
+
+  return success();
+}
+
+static LogicalResult verify(AllocOp op) { return verifyAllocLikeOp(op); }
+
+static LogicalResult verify(AllocaOp op) {
+  // An alloca op needs to have an ancestor with an allocation scope trait.
+  if (!op->getParentWithTrait<OpTrait::AutomaticAllocationScope>())
+    return op.emitOpError(
+        "requires an ancestor op with AutomaticAllocationScope trait");
+
+  return verifyAllocLikeOp(op);
+}
+
+namespace {
+/// Fold constant dimensions into an alloc like operation.
+template <typename AllocLikeOp>
+struct SimplifyAllocConst : public OpRewritePattern<AllocLikeOp> {
+  using OpRewritePattern<AllocLikeOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(AllocLikeOp alloc,
+                                PatternRewriter &rewriter) const override {
+    // Check to see if any dimensions operands are constants.  If so, we can
+    // substitute and drop them.
+    if (llvm::none_of(alloc.getOperands(), [](Value operand) {
+          return matchPattern(operand, matchConstantIndex());
+        }))
+      return failure();
+
+    auto memrefType = alloc.getType();
+
+    // Ok, we have one or more constant operands.  Collect the non-constant ones
+    // and keep track of the resultant memref type to build.
+    SmallVector<int64_t, 4> newShapeConstants;
+    newShapeConstants.reserve(memrefType.getRank());
+    SmallVector<Value, 4> newOperands;
+
+    unsigned dynamicDimPos = 0;
+    for (unsigned dim = 0, e = memrefType.getRank(); dim < e; ++dim) {
+      int64_t dimSize = memrefType.getDimSize(dim);
+      // If this is already static dimension, keep it.
+      if (dimSize != -1) {
+        newShapeConstants.push_back(dimSize);
+        continue;
+      }
+      auto *defOp = alloc.getOperand(dynamicDimPos).getDefiningOp();
+      if (auto constantIndexOp = dyn_cast_or_null<ConstantIndexOp>(defOp)) {
+        // Dynamic shape dimension will be folded.
+        newShapeConstants.push_back(constantIndexOp.getValue());
+      } else {
+        // Dynamic shape dimension not folded; copy operand from old memref.
+        newShapeConstants.push_back(-1);
+        newOperands.push_back(alloc.getOperand(dynamicDimPos));
+      }
+      dynamicDimPos++;
+    }
+
+    // Create new memref type (which will have fewer dynamic dimensions).
+    MemRefType newMemRefType =
+        MemRefType::Builder(memrefType).setShape(newShapeConstants);
+    assert(static_cast<int64_t>(newOperands.size()) ==
+           newMemRefType.getNumDynamicDims());
+
+    // Create and insert the alloc op for the new memref.
+    auto newAlloc = rewriter.create<AllocLikeOp>(alloc.getLoc(), newMemRefType,
+                                                 newOperands, IntegerAttr());
+    // Insert a cast so we have the same type as the old alloc.
+    auto resultCast =
+        rewriter.create<CastOp>(alloc.getLoc(), newAlloc, alloc.getType());
+
+    rewriter.replaceOp(alloc, {resultCast});
+    return success();
+  }
+};
+
+/// Fold alloc operations with no uses. Alloc has side effects on the heap,
+/// but can still be deleted if it has zero uses.
+struct SimplifyDeadAlloc : public OpRewritePattern<AllocOp> {
+  using OpRewritePattern<AllocOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(AllocOp alloc,
+                                PatternRewriter &rewriter) const override {
+    if (alloc.use_empty()) {
+      rewriter.eraseOp(alloc);
+      return success();
+    }
+    return failure();
+  }
+};
+} // end anonymous namespace.
+
+void AllocOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+                                          MLIRContext *context) {
+  results.insert<SimplifyAllocConst<AllocOp>, SimplifyDeadAlloc>(context);
+}
+
+void AllocaOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+                                           MLIRContext *context) {
+  results.insert<SimplifyAllocConst<AllocaOp>>(context);
+}
+
+//===----------------------------------------------------------------------===//
+// AssumeAlignmentOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult verify(AssumeAlignmentOp op) {
+  unsigned alignment = op.alignment();
+  if (!llvm::isPowerOf2_32(alignment))
+    return op.emitOpError("alignment must be power of 2");
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// BufferCastOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult BufferCastOp::fold(ArrayRef<Attribute>) {
+  if (auto tensorLoad = tensor().getDefiningOp<TensorLoadOp>())
+    if (tensorLoad.memref().getType() == getType())
+      return tensorLoad.memref();
+  return {};
+}
+
+namespace {
+/// Replace tensor_cast + buffer_cast by buffer_cast + memref_cast.
+struct BufferCast : public OpRewritePattern<BufferCastOp> {
+  using OpRewritePattern<BufferCastOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(BufferCastOp bufferCast,
+                                PatternRewriter &rewriter) const final {
+    auto tensorCastOperand =
+        bufferCast.getOperand().getDefiningOp<tensor::CastOp>();
+    if (!tensorCastOperand)
+      return failure();
+    auto srcTensorType =
+        tensorCastOperand.getOperand().getType().dyn_cast<RankedTensorType>();
+    if (!srcTensorType)
+      return failure();
+    auto memrefType = MemRefType::get(srcTensorType.getShape(),
+                                      srcTensorType.getElementType());
+    Value memref = rewriter.create<BufferCastOp>(
+        bufferCast.getLoc(), memrefType, tensorCastOperand.getOperand());
+    rewriter.replaceOpWithNewOp<CastOp>(bufferCast, bufferCast.getType(),
+                                        memref);
+    return success();
+  }
+};
+
+/// Canonicalize memref.tensor_load + memref.buffer_cast to memref.cast when
+/// type mismatches prevent `BufferCastOp::fold` to kick in.
+struct TensorLoadToMemRef : public OpRewritePattern<BufferCastOp> {
+  using OpRewritePattern<BufferCastOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(BufferCastOp bufferCast,
+                                PatternRewriter &rewriter) const final {
+    auto tensorLoad = bufferCast.tensor().getDefiningOp<TensorLoadOp>();
+    // Bail unless we have a tensor_load + memref.buffer_cast with different
+    // types. `BufferCastOp::fold` handles the same type case.
+    if (!tensorLoad || tensorLoad.memref().getType() == bufferCast.getType())
+      return failure();
+    // If types are not cast-compatible, bail.
+    if (!CastOp::areCastCompatible(tensorLoad.memref().getType(),
+                                   bufferCast.getType()))
+      return failure();
+    rewriter.replaceOpWithNewOp<CastOp>(bufferCast, bufferCast.getType(),
+                                        tensorLoad.memref());
+    return success();
+  }
+};
+
+} // namespace
+
+void BufferCastOp::getCanonicalizationPatterns(
+    OwningRewritePatternList &results, MLIRContext *context) {
+  results.insert<BufferCast, TensorLoadToMemRef>(context);
+}
+
+//===----------------------------------------------------------------------===//
+// CastOp
+//===----------------------------------------------------------------------===//
+
+/// Determines whether MemRef_CastOp casts to a more dynamic version of the
+/// source memref. This is useful to to fold a memref.cast into a consuming op
+/// and implement canonicalization patterns for ops in different dialects that
+/// may consume the results of memref.cast operations. Such foldable memref.cast
+/// operations are typically inserted as `view` and `subview` ops are
+/// canonicalized, to preserve the type compatibility of their uses.
+///
+/// Returns true when all conditions are met:
+/// 1. source and result are ranked memrefs with strided semantics and same
+/// element type and rank.
+/// 2. each of the source's size, offset or stride has more static information
+/// than the corresponding result's size, offset or stride.
+///
+/// Example 1:
+/// ```mlir
+///   %1 = memref.cast %0 : memref<8x16xf32> to memref<?x?xf32>
+///   %2 = consumer %1 ... : memref<?x?xf32> ...
+/// ```
+///
+/// may fold into:
+///
+/// ```mlir
+///   %2 = consumer %0 ... : memref<8x16xf32> ...
+/// ```
+///
+/// Example 2:
+/// ```
+///   %1 = memref.cast %0 : memref<?x16xf32, affine_map<(i, j)->(16 * i + j)>>
+///          to memref<?x?xf32>
+///   consumer %1 : memref<?x?xf32> ...
+/// ```
+///
+/// may fold into:
+///
+/// ```
+///   consumer %0 ... : memref<?x16xf32, affine_map<(i, j)->(16 * i + j)>>
+/// ```
+bool CastOp::canFoldIntoConsumerOp(CastOp castOp) {
+  MemRefType sourceType = castOp.source().getType().dyn_cast<MemRefType>();
+  MemRefType resultType = castOp.getType().dyn_cast<MemRefType>();
+
+  // Requires ranked MemRefType.
+  if (!sourceType || !resultType)
+    return false;
+
+  // Requires same elemental type.
+  if (sourceType.getElementType() != resultType.getElementType())
+    return false;
+
+  // Requires same rank.
+  if (sourceType.getRank() != resultType.getRank())
+    return false;
+
+  // Only fold casts between strided memref forms.
+  int64_t sourceOffset, resultOffset;
+  SmallVector<int64_t, 4> sourceStrides, resultStrides;
+  if (failed(getStridesAndOffset(sourceType, sourceStrides, sourceOffset)) ||
+      failed(getStridesAndOffset(resultType, resultStrides, resultOffset)))
+    return false;
+
+  // If cast is towards more static sizes along any dimension, don't fold.
+  for (auto it : llvm::zip(sourceType.getShape(), resultType.getShape())) {
+    auto ss = std::get<0>(it), st = std::get<1>(it);
+    if (ss != st)
+      if (MemRefType::isDynamic(ss) && !MemRefType::isDynamic(st))
+        return false;
+  }
+
+  // If cast is towards more static offset along any dimension, don't fold.
+  if (sourceOffset != resultOffset)
+    if (MemRefType::isDynamicStrideOrOffset(sourceOffset) &&
+        !MemRefType::isDynamicStrideOrOffset(resultOffset))
+      return false;
+
+  // If cast is towards more static strides along any dimension, don't fold.
+  for (auto it : llvm::zip(sourceStrides, resultStrides)) {
+    auto ss = std::get<0>(it), st = std::get<1>(it);
+    if (ss != st)
+      if (MemRefType::isDynamicStrideOrOffset(ss) &&
+          !MemRefType::isDynamicStrideOrOffset(st))
+        return false;
+  }
+
+  return true;
+}
+
+bool CastOp::areCastCompatible(TypeRange inputs, TypeRange outputs) {
+  if (inputs.size() != 1 || outputs.size() != 1)
+    return false;
+  Type a = inputs.front(), b = outputs.front();
+  auto aT = a.dyn_cast<MemRefType>();
+  auto bT = b.dyn_cast<MemRefType>();
+
+  auto uaT = a.dyn_cast<UnrankedMemRefType>();
+  auto ubT = b.dyn_cast<UnrankedMemRefType>();
+
+  if (aT && bT) {
+    if (aT.getElementType() != bT.getElementType())
+      return false;
+    if (aT.getAffineMaps() != bT.getAffineMaps()) {
+      int64_t aOffset, bOffset;
+      SmallVector<int64_t, 4> aStrides, bStrides;
+      if (failed(getStridesAndOffset(aT, aStrides, aOffset)) ||
+          failed(getStridesAndOffset(bT, bStrides, bOffset)) ||
+          aStrides.size() != bStrides.size())
+        return false;
+
+      // Strides along a dimension/offset are compatible if the value in the
+      // source memref is static and the value in the target memref is the
+      // same. They are also compatible if either one is dynamic (see
+      // description of MemRefCastOp for details).
+      auto checkCompatible = [](int64_t a, int64_t b) {
+        return (a == MemRefType::getDynamicStrideOrOffset() ||
+                b == MemRefType::getDynamicStrideOrOffset() || a == b);
+      };
+      if (!checkCompatible(aOffset, bOffset))
+        return false;
+      for (auto aStride : enumerate(aStrides))
+        if (!checkCompatible(aStride.value(), bStrides[aStride.index()]))
+          return false;
+    }
+    if (aT.getMemorySpaceAsInt() != bT.getMemorySpaceAsInt())
+      return false;
+
+    // They must have the same rank, and any specified dimensions must match.
+    if (aT.getRank() != bT.getRank())
+      return false;
+
+    for (unsigned i = 0, e = aT.getRank(); i != e; ++i) {
+      int64_t aDim = aT.getDimSize(i), bDim = bT.getDimSize(i);
+      if (aDim != -1 && bDim != -1 && aDim != bDim)
+        return false;
+    }
+    return true;
+  } else {
+    if (!aT && !uaT)
+      return false;
+    if (!bT && !ubT)
+      return false;
+    // Unranked to unranked casting is unsupported
+    if (uaT && ubT)
+      return false;
+
+    auto aEltType = (aT) ? aT.getElementType() : uaT.getElementType();
+    auto bEltType = (bT) ? bT.getElementType() : ubT.getElementType();
+    if (aEltType != bEltType)
+      return false;
+
+    auto aMemSpace =
+        (aT) ? aT.getMemorySpaceAsInt() : uaT.getMemorySpaceAsInt();
+    auto bMemSpace =
+        (bT) ? bT.getMemorySpaceAsInt() : ubT.getMemorySpaceAsInt();
+    if (aMemSpace != bMemSpace)
+      return false;
+
+    return true;
+  }
+
+  return false;
+}
+
+OpFoldResult CastOp::fold(ArrayRef<Attribute> operands) {
+  return succeeded(foldMemRefCast(*this)) ? getResult() : Value();
+}
+
+//===----------------------------------------------------------------------===//
+// DeallocOp
+//===----------------------------------------------------------------------===//
+namespace {
+/// Fold Dealloc operations that are deallocating an AllocOp that is only used
+/// by other Dealloc operations.
+struct SimplifyDeadDealloc : public OpRewritePattern<DeallocOp> {
+  using OpRewritePattern<DeallocOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(DeallocOp dealloc,
+                                PatternRewriter &rewriter) const override {
+    // Check that the memref operand's defining operation is an AllocOp.
+    Value memref = dealloc.memref();
+    if (!isa_and_nonnull<AllocOp>(memref.getDefiningOp()))
+      return failure();
+
+    // Check that all of the uses of the AllocOp are other DeallocOps.
+    for (auto *user : memref.getUsers())
+      if (!isa<DeallocOp>(user))
+        return failure();
+
+    // Erase the dealloc operation.
+    rewriter.eraseOp(dealloc);
+    return success();
+  }
+};
+} // end anonymous namespace.
+
+static LogicalResult verify(DeallocOp op) {
+  if (!op.memref().getType().isa<MemRefType>())
+    return op.emitOpError("operand must be a memref");
+  return success();
+}
+
+void DeallocOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+                                            MLIRContext *context) {
+  results.insert<SimplifyDeadDealloc>(context);
+}
+
+LogicalResult DeallocOp::fold(ArrayRef<Attribute> cstOperands,
+                              SmallVectorImpl<OpFoldResult> &results) {
+  /// dealloc(memrefcast) -> dealloc
+  return foldMemRefCast(*this);
+}
+
+//===----------------------------------------------------------------------===//
+// DimOp
+//===----------------------------------------------------------------------===//
+
+void DimOp::build(OpBuilder &builder, OperationState &result, Value memref,
+                  int64_t index) {
+  auto loc = result.location;
+  Value indexValue = builder.create<ConstantIndexOp>(loc, index);
+  build(builder, result, memref, indexValue);
+}
+
+void DimOp::build(OpBuilder &builder, OperationState &result, Value memref,
+                  Value index) {
+  auto indexTy = builder.getIndexType();
+  build(builder, result, indexTy, memref, index);
+}
+
+Optional<int64_t> DimOp::getConstantIndex() {
+  if (auto constantOp = index().getDefiningOp<ConstantOp>())
+    return constantOp.getValue().cast<IntegerAttr>().getInt();
+  return {};
+}
+
+static LogicalResult verify(DimOp op) {
+  // Assume unknown index to be in range.
+  Optional<int64_t> index = op.getConstantIndex();
+  if (!index.hasValue())
+    return success();
+
+  // Check that constant index is not knowingly out of range.
+  auto type = op.memrefOrTensor().getType();
+  if (auto memrefType = type.dyn_cast<MemRefType>()) {
+    if (index.getValue() >= memrefType.getRank())
+      return op.emitOpError("index is out of range");
+  } else if (auto tensorType = type.dyn_cast<RankedTensorType>()) {
+    if (index.getValue() >= tensorType.getRank())
+      return op.emitOpError("index is out of range");
+  } else if (type.isa<UnrankedMemRefType>() || type.isa<UnrankedTensorType>()) {
+    // Assume index to be in range.
+  } else {
+    llvm_unreachable("expected operand with memref type");
+  }
+  return success();
+}
+
+OpFoldResult DimOp::fold(ArrayRef<Attribute> operands) {
+  auto index = operands[1].dyn_cast_or_null<IntegerAttr>();
+
+  // All forms of folding require a known index.
+  if (!index)
+    return {};
+
+  auto argTy = memrefOrTensor().getType();
+  // Fold if the shape extent along the given index is known.
+  if (auto shapedTy = argTy.dyn_cast<ShapedType>()) {
+    // Folding for unranked types (UnrankedMemRefType) is not supported.
+    if (!shapedTy.hasRank())
+      return {};
+    if (!shapedTy.isDynamicDim(index.getInt())) {
+      Builder builder(getContext());
+      return builder.getIndexAttr(shapedTy.getShape()[index.getInt()]);
+    }
+  }
+
+  Operation *definingOp = memrefOrTensor().getDefiningOp();
+
+  // dim(memref.tensor_load(memref)) -> dim(memref)
+  if (auto tensorLoadOp = dyn_cast_or_null<TensorLoadOp>(definingOp)) {
+    setOperand(0, tensorLoadOp.memref());
+    return getResult();
+  }
+
+  // Fold dim to the operand of tensor.generate.
+  if (auto fromElements = dyn_cast_or_null<tensor::GenerateOp>(definingOp)) {
+    auto resultType =
+        fromElements.getResult().getType().cast<RankedTensorType>();
+    // The case where the type encodes the size of the dimension is handled
+    // above.
+    assert(resultType.getShape()[index.getInt()] ==
+           RankedTensorType::kDynamicSize);
+
+    // Find the operand of the fromElements that corresponds to this index.
+    auto dynExtents = fromElements.dynamicExtents().begin();
+    for (auto dim : resultType.getShape().take_front(index.getInt()))
+      if (dim == RankedTensorType::kDynamicSize)
+        dynExtents++;
+
+    return Value{*dynExtents};
+  }
+
+  // The size at the given index is now known to be a dynamic size.
+  unsigned unsignedIndex = index.getValue().getZExtValue();
+
+  if (auto subtensor = dyn_cast_or_null<mlir::SubTensorOp>(definingOp)) {
+    assert(subtensor.isDynamicSize(unsignedIndex) &&
+           "Expected dynamic subtensor size");
+    return subtensor.getDynamicSize(unsignedIndex);
+  }
+
+  // Fold dim to the size argument for an `AllocOp`, `ViewOp`, or `SubViewOp`.
+  auto memrefType = argTy.dyn_cast<MemRefType>();
+  if (!memrefType)
+    return {};
+
+  if (auto alloc = dyn_cast_or_null<AllocOp>(definingOp))
+    return *(alloc.getDynamicSizes().begin() +
+             memrefType.getDynamicDimIndex(unsignedIndex));
+
+  if (auto view = dyn_cast_or_null<ViewOp>(definingOp))
+    return *(view.getDynamicSizes().begin() +
+             memrefType.getDynamicDimIndex(unsignedIndex));
+
+  if (auto subview = dyn_cast_or_null<SubViewOp>(definingOp)) {
+    assert(subview.isDynamicSize(unsignedIndex) &&
+           "Expected dynamic subview size");
+    return subview.getDynamicSize(unsignedIndex);
+  }
+
+  // dim(memrefcast) -> dim
+  if (succeeded(foldMemRefCast(*this)))
+    return getResult();
+
+  return {};
+}
+
+namespace {
+/// Fold dim of a memref reshape operation to a load into the reshape's shape
+/// operand.
+struct DimOfMemRefReshape : public OpRewritePattern<DimOp> {
+  using OpRewritePattern<DimOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(DimOp dim,
+                                PatternRewriter &rewriter) const override {
+    auto reshape = dim.memrefOrTensor().getDefiningOp<ReshapeOp>();
+
+    if (!reshape)
+      return failure();
+
+    // Place the load directly after the reshape to ensure that the shape memref
+    // was not mutated.
+    rewriter.setInsertionPointAfter(reshape);
+    rewriter.replaceOpWithNewOp<LoadOp>(dim, reshape.shape(),
+                                        llvm::makeArrayRef({dim.index()}));
+    return success();
+  }
+};
+
+/// Fold dim of a dim of a cast into the dim of the source of the tensor cast.
+template <typename CastOpTy>
+struct DimOfCastOp : public OpRewritePattern<DimOp> {
+  using OpRewritePattern<DimOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(DimOp dimOp,
+                                PatternRewriter &rewriter) const override {
+    auto castOp = dimOp.memrefOrTensor().getDefiningOp<CastOpTy>();
+    if (!castOp)
+      return failure();
+    Value newSource = castOp.getOperand();
+    rewriter.replaceOpWithNewOp<DimOp>(dimOp, newSource, dimOp.index());
+    return success();
+  }
+};
+} // end anonymous namespace.
+
+void DimOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+                                        MLIRContext *context) {
+  results.insert<DimOfMemRefReshape, DimOfCastOp<BufferCastOp>,
+                 DimOfCastOp<tensor::CastOp>>(context);
+}
+
+// ---------------------------------------------------------------------------
+// DmaStartOp
+// ---------------------------------------------------------------------------
+
+void DmaStartOp::build(OpBuilder &builder, OperationState &result,
+                       Value srcMemRef, ValueRange srcIndices, Value destMemRef,
+                       ValueRange destIndices, Value numElements,
+                       Value tagMemRef, ValueRange tagIndices, Value stride,
+                       Value elementsPerStride) {
+  result.addOperands(srcMemRef);
+  result.addOperands(srcIndices);
+  result.addOperands(destMemRef);
+  result.addOperands(destIndices);
+  result.addOperands({numElements, tagMemRef});
+  result.addOperands(tagIndices);
+  if (stride)
+    result.addOperands({stride, elementsPerStride});
+}
+
+void DmaStartOp::print(OpAsmPrinter &p) {
+  p << getOperationName() << " " << getSrcMemRef() << '[' << getSrcIndices()
+    << "], " << getDstMemRef() << '[' << getDstIndices() << "], "
+    << getNumElements() << ", " << getTagMemRef() << '[' << getTagIndices()
+    << ']';
+  if (isStrided())
+    p << ", " << getStride() << ", " << getNumElementsPerStride();
+
+  p.printOptionalAttrDict((*this)->getAttrs());
+  p << " : " << getSrcMemRef().getType() << ", " << getDstMemRef().getType()
+    << ", " << getTagMemRef().getType();
+}
+
+// Parse DmaStartOp.
+// Ex:
+//   %dma_id = dma_start %src[%i, %j], %dst[%k, %l], %size,
+//                       %tag[%index], %stride, %num_elt_per_stride :
+//                     : memref<3076 x f32, 0>,
+//                       memref<1024 x f32, 2>,
+//                       memref<1 x i32>
+//
+ParseResult DmaStartOp::parse(OpAsmParser &parser, OperationState &result) {
+  OpAsmParser::OperandType srcMemRefInfo;
+  SmallVector<OpAsmParser::OperandType, 4> srcIndexInfos;
+  OpAsmParser::OperandType dstMemRefInfo;
+  SmallVector<OpAsmParser::OperandType, 4> dstIndexInfos;
+  OpAsmParser::OperandType numElementsInfo;
+  OpAsmParser::OperandType tagMemrefInfo;
+  SmallVector<OpAsmParser::OperandType, 4> tagIndexInfos;
+  SmallVector<OpAsmParser::OperandType, 2> strideInfo;
+
+  SmallVector<Type, 3> types;
+  auto indexType = parser.getBuilder().getIndexType();
+
+  // Parse and resolve the following list of operands:
+  // *) source memref followed by its indices (in square brackets).
+  // *) destination memref followed by its indices (in square brackets).
+  // *) dma size in KiB.
+  if (parser.parseOperand(srcMemRefInfo) ||
+      parser.parseOperandList(srcIndexInfos, OpAsmParser::Delimiter::Square) ||
+      parser.parseComma() || parser.parseOperand(dstMemRefInfo) ||
+      parser.parseOperandList(dstIndexInfos, OpAsmParser::Delimiter::Square) ||
+      parser.parseComma() || parser.parseOperand(numElementsInfo) ||
+      parser.parseComma() || parser.parseOperand(tagMemrefInfo) ||
+      parser.parseOperandList(tagIndexInfos, OpAsmParser::Delimiter::Square))
+    return failure();
+
+  // Parse optional stride and elements per stride.
+  if (parser.parseTrailingOperandList(strideInfo))
+    return failure();
+
+  bool isStrided = strideInfo.size() == 2;
+  if (!strideInfo.empty() && !isStrided) {
+    return parser.emitError(parser.getNameLoc(),
+                            "expected two stride related operands");
+  }
+
+  if (parser.parseColonTypeList(types))
+    return failure();
+  if (types.size() != 3)
+    return parser.emitError(parser.getNameLoc(), "fewer/more types expected");
+
+  if (parser.resolveOperand(srcMemRefInfo, types[0], result.operands) ||
+      parser.resolveOperands(srcIndexInfos, indexType, result.operands) ||
+      parser.resolveOperand(dstMemRefInfo, types[1], result.operands) ||
+      parser.resolveOperands(dstIndexInfos, indexType, result.operands) ||
+      // size should be an index.
+      parser.resolveOperand(numElementsInfo, indexType, result.operands) ||
+      parser.resolveOperand(tagMemrefInfo, types[2], result.operands) ||
+      // tag indices should be index.
+      parser.resolveOperands(tagIndexInfos, indexType, result.operands))
+    return failure();
+
+  if (isStrided) {
+    if (parser.resolveOperands(strideInfo, indexType, result.operands))
+      return failure();
+  }
+
+  return success();
+}
+
+LogicalResult DmaStartOp::verify() {
+  unsigned numOperands = getNumOperands();
+
+  // Mandatory non-variadic operands are: src memref, dst memref, tag memref and
+  // the number of elements.
+  if (numOperands < 4)
+    return emitOpError("expected at least 4 operands");
+
+  // Check types of operands. The order of these calls is important: the later
+  // calls rely on some type properties to compute the operand position.
+  // 1. Source memref.
+  if (!getSrcMemRef().getType().isa<MemRefType>())
+    return emitOpError("expected source to be of memref type");
+  if (numOperands < getSrcMemRefRank() + 4)
+    return emitOpError() << "expected at least " << getSrcMemRefRank() + 4
+                         << " operands";
+  if (!getSrcIndices().empty() &&
+      !llvm::all_of(getSrcIndices().getTypes(),
+                    [](Type t) { return t.isIndex(); }))
+    return emitOpError("expected source indices to be of index type");
+
+  // 2. Destination memref.
+  if (!getDstMemRef().getType().isa<MemRefType>())
+    return emitOpError("expected destination to be of memref type");
+  unsigned numExpectedOperands = getSrcMemRefRank() + getDstMemRefRank() + 4;
+  if (numOperands < numExpectedOperands)
+    return emitOpError() << "expected at least " << numExpectedOperands
+                         << " operands";
+  if (!getDstIndices().empty() &&
+      !llvm::all_of(getDstIndices().getTypes(),
+                    [](Type t) { return t.isIndex(); }))
+    return emitOpError("expected destination indices to be of index type");
+
+  // 3. Number of elements.
+  if (!getNumElements().getType().isIndex())
+    return emitOpError("expected num elements to be of index type");
+
+  // 4. Tag memref.
+  if (!getTagMemRef().getType().isa<MemRefType>())
+    return emitOpError("expected tag to be of memref type");
+  numExpectedOperands += getTagMemRefRank();
+  if (numOperands < numExpectedOperands)
+    return emitOpError() << "expected at least " << numExpectedOperands
+                         << " operands";
+  if (!getTagIndices().empty() &&
+      !llvm::all_of(getTagIndices().getTypes(),
+                    [](Type t) { return t.isIndex(); }))
+    return emitOpError("expected tag indices to be of index type");
+
+  // DMAs from different memory spaces supported.
+  if (getSrcMemorySpace() == getDstMemorySpace())
+    return emitOpError("DMA should be between different memory spaces");
+
+  // Optional stride-related operands must be either both present or both
+  // absent.
+  if (numOperands != numExpectedOperands &&
+      numOperands != numExpectedOperands + 2)
+    return emitOpError("incorrect number of operands");
+
+  // 5. Strides.
+  if (isStrided()) {
+    if (!getStride().getType().isIndex() ||
+        !getNumElementsPerStride().getType().isIndex())
+      return emitOpError(
+          "expected stride and num elements per stride to be of type index");
+  }
+
+  return success();
+}
+
+LogicalResult DmaStartOp::fold(ArrayRef<Attribute> cstOperands,
+                               SmallVectorImpl<OpFoldResult> &results) {
+  /// dma_start(memrefcast) -> dma_start
+  return foldMemRefCast(*this);
+}
+
+// ---------------------------------------------------------------------------
+// DmaWaitOp
+// ---------------------------------------------------------------------------
+
+void DmaWaitOp::build(OpBuilder &builder, OperationState &result,
+                      Value tagMemRef, ValueRange tagIndices,
+                      Value numElements) {
+  result.addOperands(tagMemRef);
+  result.addOperands(tagIndices);
+  result.addOperands(numElements);
+}
+
+void DmaWaitOp::print(OpAsmPrinter &p) {
+  p << getOperationName() << " " << getTagMemRef() << '[' << getTagIndices()
+    << "], " << getNumElements();
+  p.printOptionalAttrDict((*this)->getAttrs());
+  p << " : " << getTagMemRef().getType();
+}
+
+// Parse DmaWaitOp.
+// Eg:
+//   dma_wait %tag[%index], %num_elements : memref<1 x i32, (d0) -> (d0), 4>
+//
+ParseResult DmaWaitOp::parse(OpAsmParser &parser, OperationState &result) {
+  OpAsmParser::OperandType tagMemrefInfo;
+  SmallVector<OpAsmParser::OperandType, 2> tagIndexInfos;
+  Type type;
+  auto indexType = parser.getBuilder().getIndexType();
+  OpAsmParser::OperandType numElementsInfo;
+
+  // Parse tag memref, its indices, and dma size.
+  if (parser.parseOperand(tagMemrefInfo) ||
+      parser.parseOperandList(tagIndexInfos, OpAsmParser::Delimiter::Square) ||
+      parser.parseComma() || parser.parseOperand(numElementsInfo) ||
+      parser.parseColonType(type) ||
+      parser.resolveOperand(tagMemrefInfo, type, result.operands) ||
+      parser.resolveOperands(tagIndexInfos, indexType, result.operands) ||
+      parser.resolveOperand(numElementsInfo, indexType, result.operands))
+    return failure();
+
+  return success();
+}
+
+LogicalResult DmaWaitOp::fold(ArrayRef<Attribute> cstOperands,
+                              SmallVectorImpl<OpFoldResult> &results) {
+  /// dma_wait(memrefcast) -> dma_wait
+  return foldMemRefCast(*this);
+}
+
+LogicalResult DmaWaitOp::verify() {
+  // Mandatory non-variadic operands are tag and the number of elements.
+  if (getNumOperands() < 2)
+    return emitOpError() << "expected at least 2 operands";
+
+  // Check types of operands. The order of these calls is important: the later
+  // calls rely on some type properties to compute the operand position.
+  if (!getTagMemRef().getType().isa<MemRefType>())
+    return emitOpError() << "expected tag to be of memref type";
+
+  if (getNumOperands() != 2 + getTagMemRefRank())
+    return emitOpError() << "expected " << 2 + getTagMemRefRank()
+                         << " operands";
+
+  if (!getTagIndices().empty() &&
+      !llvm::all_of(getTagIndices().getTypes(),
+                    [](Type t) { return t.isIndex(); }))
+    return emitOpError() << "expected tag indices to be of index type";
+
+  if (!getNumElements().getType().isIndex())
+    return emitOpError()
+           << "expected the number of elements to be of index type";
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// GlobalOp
+//===----------------------------------------------------------------------===//
+
+static void printGlobalMemrefOpTypeAndInitialValue(OpAsmPrinter &p, GlobalOp op,
+                                                   TypeAttr type,
+                                                   Attribute initialValue) {
+  p << type;
+  if (!op.isExternal()) {
+    p << " = ";
+    if (op.isUninitialized())
+      p << "uninitialized";
+    else
+      p.printAttributeWithoutType(initialValue);
+  }
+}
+
+static ParseResult
+parseGlobalMemrefOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr,
+                                       Attribute &initialValue) {
+  Type type;
+  if (parser.parseType(type))
+    return failure();
+
+  auto memrefType = type.dyn_cast<MemRefType>();
+  if (!memrefType || !memrefType.hasStaticShape())
+    return parser.emitError(parser.getNameLoc())
+           << "type should be static shaped memref, but got " << type;
+  typeAttr = TypeAttr::get(type);
+
+  if (parser.parseOptionalEqual())
+    return success();
+
+  if (succeeded(parser.parseOptionalKeyword("uninitialized"))) {
+    initialValue = UnitAttr::get(parser.getBuilder().getContext());
+    return success();
+  }
+
+  Type tensorType = getTensorTypeFromMemRefType(memrefType);
+  if (parser.parseAttribute(initialValue, tensorType))
+    return failure();
+  if (!initialValue.isa<ElementsAttr>())
+    return parser.emitError(parser.getNameLoc())
+           << "initial value should be a unit or elements attribute";
+  return success();
+}
+
+static LogicalResult verify(GlobalOp op) {
+  auto memrefType = op.type().dyn_cast<MemRefType>();
+  if (!memrefType || !memrefType.hasStaticShape())
+    return op.emitOpError("type should be static shaped memref, but got ")
+           << op.type();
+
+  // Verify that the initial value, if present, is either a unit attribute or
+  // an elements attribute.
+  if (op.initial_value().hasValue()) {
+    Attribute initValue = op.initial_value().getValue();
+    if (!initValue.isa<UnitAttr>() && !initValue.isa<ElementsAttr>())
+      return op.emitOpError("initial value should be a unit or elements "
+                            "attribute, but got ")
+             << initValue;
+
+    // Check that the type of the initial value is compatible with the type of
+    // the global variable.
+    if (initValue.isa<ElementsAttr>()) {
+      Type initType = initValue.getType();
+      Type tensorType = getTensorTypeFromMemRefType(memrefType);
+      if (initType != tensorType)
+        return op.emitOpError("initial value expected to be of type ")
+               << tensorType << ", but was of type " << initType;
+    }
+  }
+
+  // TODO: verify visibility for declarations.
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// GetGlobalOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult
+GetGlobalOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
+  // Verify that the result type is same as the type of the referenced
+  // memref.global op.
+  auto global =
+      symbolTable.lookupNearestSymbolFrom<GlobalOp>(*this, nameAttr());
+  if (!global)
+    return emitOpError("'")
+           << name() << "' does not reference a valid global memref";
+
+  Type resultType = result().getType();
+  if (global.type() != resultType)
+    return emitOpError("result type ")
+           << resultType << " does not match type " << global.type()
+           << " of the global memref @" << name();
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// LoadOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult verify(LoadOp op) {
+  if (op.getNumOperands() != 1 + op.getMemRefType().getRank())
+    return op.emitOpError("incorrect number of indices for load");
+  return success();
+}
+
+OpFoldResult LoadOp::fold(ArrayRef<Attribute> cstOperands) {
+  /// load(memrefcast) -> load
+  if (succeeded(foldMemRefCast(*this)))
+    return getResult();
+  return OpFoldResult();
+}
+
+namespace {
+/// Fold a load on a buffer_cast operation into an tensor.extract on the
+/// corresponding tensor.
+struct LoadOfBufferCast : public OpRewritePattern<LoadOp> {
+  using OpRewritePattern<LoadOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(LoadOp load,
+                                PatternRewriter &rewriter) const override {
+    auto buffercast = load.memref().getDefiningOp<BufferCastOp>();
+    if (!buffercast)
+      return failure();
+
+    rewriter.replaceOpWithNewOp<tensor::ExtractOp>(load, buffercast.tensor(),
+                                                   load.indices());
+    return success();
+  }
+};
+} // end anonymous namespace.
+
+void LoadOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+                                         MLIRContext *context) {
+  results.insert<LoadOfBufferCast>(context);
+}
+
+//===----------------------------------------------------------------------===//
+// PrefetchOp
+//===----------------------------------------------------------------------===//
+
+static void print(OpAsmPrinter &p, PrefetchOp op) {
+  p << PrefetchOp::getOperationName() << " " << op.memref() << '[';
+  p.printOperands(op.indices());
+  p << ']' << ", " << (op.isWrite() ? "write" : "read");
+  p << ", locality<" << op.localityHint();
+  p << ">, " << (op.isDataCache() ? "data" : "instr");
+  p.printOptionalAttrDict(
+      op->getAttrs(),
+      /*elidedAttrs=*/{"localityHint", "isWrite", "isDataCache"});
+  p << " : " << op.getMemRefType();
+}
+
+static ParseResult parsePrefetchOp(OpAsmParser &parser,
+                                   OperationState &result) {
+  OpAsmParser::OperandType memrefInfo;
+  SmallVector<OpAsmParser::OperandType, 4> indexInfo;
+  IntegerAttr localityHint;
+  MemRefType type;
+  StringRef readOrWrite, cacheType;
+
+  auto indexTy = parser.getBuilder().getIndexType();
+  auto i32Type = parser.getBuilder().getIntegerType(32);
+  if (parser.parseOperand(memrefInfo) ||
+      parser.parseOperandList(indexInfo, OpAsmParser::Delimiter::Square) ||
+      parser.parseComma() || parser.parseKeyword(&readOrWrite) ||
+      parser.parseComma() || parser.parseKeyword("locality") ||
+      parser.parseLess() ||
+      parser.parseAttribute(localityHint, i32Type, "localityHint",
+                            result.attributes) ||
+      parser.parseGreater() || parser.parseComma() ||
+      parser.parseKeyword(&cacheType) || parser.parseColonType(type) ||
+      parser.resolveOperand(memrefInfo, type, result.operands) ||
+      parser.resolveOperands(indexInfo, indexTy, result.operands))
+    return failure();
+
+  if (!readOrWrite.equals("read") && !readOrWrite.equals("write"))
+    return parser.emitError(parser.getNameLoc(),
+                            "rw specifier has to be 'read' or 'write'");
+  result.addAttribute(
+      PrefetchOp::getIsWriteAttrName(),
+      parser.getBuilder().getBoolAttr(readOrWrite.equals("write")));
+
+  if (!cacheType.equals("data") && !cacheType.equals("instr"))
+    return parser.emitError(parser.getNameLoc(),
+                            "cache type has to be 'data' or 'instr'");
+
+  result.addAttribute(
+      PrefetchOp::getIsDataCacheAttrName(),
+      parser.getBuilder().getBoolAttr(cacheType.equals("data")));
+
+  return success();
+}
+
+static LogicalResult verify(PrefetchOp op) {
+  if (op.getNumOperands() != 1 + op.getMemRefType().getRank())
+    return op.emitOpError("too few indices");
+
+  return success();
+}
+
+LogicalResult PrefetchOp::fold(ArrayRef<Attribute> cstOperands,
+                               SmallVectorImpl<OpFoldResult> &results) {
+  // prefetch(memrefcast) -> prefetch
+  return foldMemRefCast(*this);
+}
+
+//===----------------------------------------------------------------------===//
+// ReinterpretCastOp
+//===----------------------------------------------------------------------===//
+
+/// Build a ReinterpretCastOp with all dynamic entries: `staticOffsets`,
+/// `staticSizes` and `staticStrides` are automatically filled with
+/// source-memref-rank sentinel values that encode dynamic entries.
+void ReinterpretCastOp::build(OpBuilder &b, OperationState &result,
+                              MemRefType resultType, Value source,
+                              OpFoldResult offset, ArrayRef<OpFoldResult> sizes,
+                              ArrayRef<OpFoldResult> strides,
+                              ArrayRef<NamedAttribute> attrs) {
+  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
+  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
+  dispatchIndexOpFoldResults(offset, dynamicOffsets, staticOffsets,
+                             ShapedType::kDynamicStrideOrOffset);
+  dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes,
+                             ShapedType::kDynamicSize);
+  dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides,
+                             ShapedType::kDynamicStrideOrOffset);
+  build(b, result, resultType, source, dynamicOffsets, dynamicSizes,
+        dynamicStrides, b.getI64ArrayAttr(staticOffsets),
+        b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides));
+  result.addAttributes(attrs);
+}
+
+void ReinterpretCastOp::build(OpBuilder &b, OperationState &result,
+                              MemRefType resultType, Value source,
+                              int64_t offset, ArrayRef<int64_t> sizes,
+                              ArrayRef<int64_t> strides,
+                              ArrayRef<NamedAttribute> attrs) {
+  SmallVector<OpFoldResult> sizeValues =
+      llvm::to_vector<4>(llvm::map_range(sizes, [&](int64_t v) -> OpFoldResult {
+        return b.getI64IntegerAttr(v);
+      }));
+  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
+      llvm::map_range(strides, [&](int64_t v) -> OpFoldResult {
+        return b.getI64IntegerAttr(v);
+      }));
+  build(b, result, resultType, source, b.getI64IntegerAttr(offset), sizeValues,
+        strideValues, attrs);
+}
+
+void ReinterpretCastOp::build(OpBuilder &b, OperationState &result,
+                              MemRefType resultType, Value source, Value offset,
+                              ValueRange sizes, ValueRange strides,
+                              ArrayRef<NamedAttribute> attrs) {
+  SmallVector<OpFoldResult> sizeValues = llvm::to_vector<4>(
+      llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));
+  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
+      llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));
+  build(b, result, resultType, source, offset, sizeValues, strideValues, attrs);
+}
+
+// TODO: ponder whether we want to allow missing trailing sizes/strides that are
+// completed automatically, like we have for subview and subtensor.
+static LogicalResult verify(ReinterpretCastOp op) {
+  // The source and result memrefs should be in the same memory space.
+  auto srcType = op.source().getType().cast<BaseMemRefType>();
+  auto resultType = op.getType().cast<MemRefType>();
+  if (srcType.getMemorySpaceAsInt() != resultType.getMemorySpaceAsInt())
+    return op.emitError("different memory spaces specified for source type ")
+           << srcType << " and result memref type " << resultType;
+  if (srcType.getElementType() != resultType.getElementType())
+    return op.emitError("different element types specified for source type ")
+           << srcType << " and result memref type " << resultType;
+
+  // Match sizes in result memref type and in static_sizes attribute.
+  for (auto &en :
+       llvm::enumerate(llvm::zip(resultType.getShape(),
+                                 extractFromI64ArrayAttr(op.static_sizes())))) {
+    int64_t resultSize = std::get<0>(en.value());
+    int64_t expectedSize = std::get<1>(en.value());
+    if (resultSize != expectedSize)
+      return op.emitError("expected result type with size = ")
+             << expectedSize << " instead of " << resultSize
+             << " in dim = " << en.index();
+  }
+
+  // Match offset and strides in static_offset and static_strides attributes if
+  // result memref type has an affine map specified.
+  if (!resultType.getAffineMaps().empty()) {
+    int64_t resultOffset;
+    SmallVector<int64_t, 4> resultStrides;
+    if (failed(getStridesAndOffset(resultType, resultStrides, resultOffset)))
+      return failure();
+
+    // Match offset in result memref type and in static_offsets attribute.
+    int64_t expectedOffset =
+        extractFromI64ArrayAttr(op.static_offsets()).front();
+    if (resultOffset != expectedOffset)
+      return op.emitError("expected result type with offset = ")
+             << resultOffset << " instead of " << expectedOffset;
+
+    // Match strides in result memref type and in static_strides attribute.
+    for (auto &en : llvm::enumerate(llvm::zip(
+             resultStrides, extractFromI64ArrayAttr(op.static_strides())))) {
+      int64_t resultStride = std::get<0>(en.value());
+      int64_t expectedStride = std::get<1>(en.value());
+      if (resultStride != expectedStride)
+        return op.emitError("expected result type with stride = ")
+               << expectedStride << " instead of " << resultStride
+               << " in dim = " << en.index();
+    }
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// ReshapeOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult verify(ReshapeOp op) {
+  Type operandType = op.source().getType();
+  Type resultType = op.result().getType();
+
+  Type operandElementType = operandType.cast<ShapedType>().getElementType();
+  Type resultElementType = resultType.cast<ShapedType>().getElementType();
+  if (operandElementType != resultElementType)
+    return op.emitOpError("element types of source and destination memref "
+                          "types should be the same");
+
+  if (auto operandMemRefType = operandType.dyn_cast<MemRefType>())
+    if (!operandMemRefType.getAffineMaps().empty())
+      return op.emitOpError(
+          "source memref type should have identity affine map");
+
+  int64_t shapeSize = op.shape().getType().cast<MemRefType>().getDimSize(0);
+  auto resultMemRefType = resultType.dyn_cast<MemRefType>();
+  if (resultMemRefType) {
+    if (!resultMemRefType.getAffineMaps().empty())
+      return op.emitOpError(
+          "result memref type should have identity affine map");
+    if (shapeSize == ShapedType::kDynamicSize)
+      return op.emitOpError("cannot use shape operand with dynamic length to "
+                            "reshape to statically-ranked memref type");
+    if (shapeSize != resultMemRefType.getRank())
+      return op.emitOpError(
+          "length of shape operand differs from the result's memref rank");
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// StoreOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult verify(StoreOp op) {
+  if (op.getNumOperands() != 2 + op.getMemRefType().getRank())
+    return op.emitOpError("store index operand count not equal to memref rank");
+
+  return success();
+}
+
+LogicalResult StoreOp::fold(ArrayRef<Attribute> cstOperands,
+                            SmallVectorImpl<OpFoldResult> &results) {
+  /// store(memrefcast) -> store
+  return foldMemRefCast(*this);
+}
+
+//===----------------------------------------------------------------------===//
+// SubViewOp
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// Helpers to write more idiomatic operations.
+namespace saturated_arith {
+struct Wrapper {
+  explicit Wrapper(int64_t v) : v(v) {}
+  operator int64_t() { return v; }
+  int64_t v;
+};
+Wrapper operator+(Wrapper a, int64_t b) {
+  if (ShapedType::isDynamicStrideOrOffset(a) ||
+      ShapedType::isDynamicStrideOrOffset(b))
+    return Wrapper(ShapedType::kDynamicStrideOrOffset);
+  return Wrapper(a.v + b);
+}
+Wrapper operator*(Wrapper a, int64_t b) {
+  if (ShapedType::isDynamicStrideOrOffset(a) ||
+      ShapedType::isDynamicStrideOrOffset(b))
+    return Wrapper(ShapedType::kDynamicStrideOrOffset);
+  return Wrapper(a.v * b);
+}
+} // end namespace saturated_arith
+} // end namespace
+
+/// A subview result type can be fully inferred from the source type and the
+/// static representation of offsets, sizes and strides. Special sentinels
+/// encode the dynamic case.
+Type SubViewOp::inferResultType(MemRefType sourceMemRefType,
+                                ArrayRef<int64_t> leadingStaticOffsets,
+                                ArrayRef<int64_t> leadingStaticSizes,
+                                ArrayRef<int64_t> leadingStaticStrides) {
+  // A subview may specify only a leading subset of offset/sizes/strides in
+  // which case we complete with offset=0, sizes from memref type and strides=1.
+  unsigned rank = sourceMemRefType.getRank();
+  assert(leadingStaticOffsets.size() <= rank &&
+         "unexpected leadingStaticOffsets overflow");
+  assert(leadingStaticSizes.size() <= rank &&
+         "unexpected leadingStaticSizes overflow");
+  assert(leadingStaticStrides.size() <= rank &&
+         "unexpected leadingStaticStrides overflow");
+  auto staticOffsets = llvm::to_vector<4>(leadingStaticOffsets);
+  auto staticSizes = llvm::to_vector<4>(leadingStaticSizes);
+  auto staticStrides = llvm::to_vector<4>(leadingStaticStrides);
+  unsigned numTrailingOffsets = rank - staticOffsets.size();
+  unsigned numTrailingSizes = rank - staticSizes.size();
+  unsigned numTrailingStrides = rank - staticStrides.size();
+  staticOffsets.append(numTrailingOffsets, 0);
+  llvm::append_range(staticSizes,
+                     sourceMemRefType.getShape().take_back(numTrailingSizes));
+  staticStrides.append(numTrailingStrides, 1);
+
+  // Extract source offset and strides.
+  int64_t sourceOffset;
+  SmallVector<int64_t, 4> sourceStrides;
+  auto res = getStridesAndOffset(sourceMemRefType, sourceStrides, sourceOffset);
+  assert(succeeded(res) && "SubViewOp expected strided memref type");
+  (void)res;
+
+  // Compute target offset whose value is:
+  //   `sourceOffset + sum_i(staticOffset_i * sourceStrides_i)`.
+  int64_t targetOffset = sourceOffset;
+  for (auto it : llvm::zip(staticOffsets, sourceStrides)) {
+    auto staticOffset = std::get<0>(it), targetStride = std::get<1>(it);
+    using namespace saturated_arith;
+    targetOffset = Wrapper(targetOffset) + Wrapper(staticOffset) * targetStride;
+  }
+
+  // Compute target stride whose value is:
+  //   `sourceStrides_i * staticStrides_i`.
+  SmallVector<int64_t, 4> targetStrides;
+  targetStrides.reserve(staticOffsets.size());
+  for (auto it : llvm::zip(sourceStrides, staticStrides)) {
+    auto sourceStride = std::get<0>(it), staticStride = std::get<1>(it);
+    using namespace saturated_arith;
+    targetStrides.push_back(Wrapper(sourceStride) * staticStride);
+  }
+
+  // The type is now known.
+  return MemRefType::get(
+      staticSizes, sourceMemRefType.getElementType(),
+      makeStridedLinearLayoutMap(targetStrides, targetOffset,
+                                 sourceMemRefType.getContext()),
+      sourceMemRefType.getMemorySpaceAsInt());
+}
+
+Type SubViewOp::inferResultType(MemRefType sourceMemRefType,
+                                ArrayRef<OpFoldResult> leadingStaticOffsets,
+                                ArrayRef<OpFoldResult> leadingStaticSizes,
+                                ArrayRef<OpFoldResult> leadingStaticStrides) {
+  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
+  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
+  dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets,
+                             staticOffsets, ShapedType::kDynamicStrideOrOffset);
+  dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes,
+                             ShapedType::kDynamicSize);
+  dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides,
+                             staticStrides, ShapedType::kDynamicStrideOrOffset);
+  return SubViewOp::inferResultType(sourceMemRefType, staticOffsets,
+                                    staticSizes, staticStrides)
+      .cast<MemRefType>();
+}
+
+Type SubViewOp::inferRankReducedResultType(
+    unsigned resultRank, MemRefType sourceRankedTensorType,
+    ArrayRef<int64_t> leadingStaticOffsets,
+    ArrayRef<int64_t> leadingStaticSizes,
+    ArrayRef<int64_t> leadingStaticStrides) {
+  auto inferredType =
+      inferResultType(sourceRankedTensorType, leadingStaticOffsets,
+                      leadingStaticSizes, leadingStaticStrides)
+          .cast<MemRefType>();
+  assert(inferredType.getRank() >= resultRank && "expected ");
+  int rankDiff = inferredType.getRank() - resultRank;
+  if (rankDiff > 0) {
+    auto shape = inferredType.getShape();
+    llvm::SmallDenseSet<unsigned> dimsToProject;
+    mlir::getPositionsOfShapeOne(rankDiff, shape, dimsToProject);
+    SmallVector<int64_t> projectedShape;
+    for (unsigned pos = 0, e = shape.size(); pos < e; ++pos)
+      if (!dimsToProject.contains(pos))
+        projectedShape.push_back(shape[pos]);
+
+    AffineMap map;
+    auto maps = inferredType.getAffineMaps();
+    if (!maps.empty() && maps.front())
+      map = getProjectedMap(maps.front(), dimsToProject);
+    inferredType =
+        MemRefType::get(projectedShape, inferredType.getElementType(), map,
+                        inferredType.getMemorySpaceAsInt());
+  }
+  return inferredType;
+}
+
+Type SubViewOp::inferRankReducedResultType(
+    unsigned resultRank, MemRefType sourceRankedTensorType,
+    ArrayRef<OpFoldResult> leadingStaticOffsets,
+    ArrayRef<OpFoldResult> leadingStaticSizes,
+    ArrayRef<OpFoldResult> leadingStaticStrides) {
+  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
+  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
+  dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets,
+                             staticOffsets, ShapedType::kDynamicStrideOrOffset);
+  dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes,
+                             ShapedType::kDynamicSize);
+  dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides,
+                             staticStrides, ShapedType::kDynamicStrideOrOffset);
+  return SubViewOp::inferRankReducedResultType(
+      resultRank, sourceRankedTensorType, staticOffsets, staticSizes,
+      staticStrides);
+}
+// Build a SubViewOp with mixed static and dynamic entries and custom result
+// type. If the type passed is nullptr, it is inferred.
+void SubViewOp::build(OpBuilder &b, OperationState &result,
+                      MemRefType resultType, Value source,
+                      ArrayRef<OpFoldResult> offsets,
+                      ArrayRef<OpFoldResult> sizes,
+                      ArrayRef<OpFoldResult> strides,
+                      ArrayRef<NamedAttribute> attrs) {
+  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
+  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
+  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets,
+                             ShapedType::kDynamicStrideOrOffset);
+  dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes,
+                             ShapedType::kDynamicSize);
+  dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides,
+                             ShapedType::kDynamicStrideOrOffset);
+  auto sourceMemRefType = source.getType().cast<MemRefType>();
+  // Structuring implementation this way avoids duplication between builders.
+  if (!resultType) {
+    resultType = SubViewOp::inferResultType(sourceMemRefType, staticOffsets,
+                                            staticSizes, staticStrides)
+                     .cast<MemRefType>();
+  }
+  build(b, result, resultType, source, dynamicOffsets, dynamicSizes,
+        dynamicStrides, b.getI64ArrayAttr(staticOffsets),
+        b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides));
+  result.addAttributes(attrs);
+}
+
+// Build a SubViewOp with mixed static and dynamic entries and inferred result
+// type.
+void SubViewOp::build(OpBuilder &b, OperationState &result, Value source,
+                      ArrayRef<OpFoldResult> offsets,
+                      ArrayRef<OpFoldResult> sizes,
+                      ArrayRef<OpFoldResult> strides,
+                      ArrayRef<NamedAttribute> attrs) {
+  build(b, result, MemRefType(), source, offsets, sizes, strides, attrs);
+}
+
+// Build a SubViewOp with static entries and inferred result type.
+void SubViewOp::build(OpBuilder &b, OperationState &result, Value source,
+                      ArrayRef<int64_t> offsets, ArrayRef<int64_t> sizes,
+                      ArrayRef<int64_t> strides,
+                      ArrayRef<NamedAttribute> attrs) {
+  SmallVector<OpFoldResult> offsetValues = llvm::to_vector<4>(
+      llvm::map_range(offsets, [&](int64_t v) -> OpFoldResult {
+        return b.getI64IntegerAttr(v);
+      }));
+  SmallVector<OpFoldResult> sizeValues =
+      llvm::to_vector<4>(llvm::map_range(sizes, [&](int64_t v) -> OpFoldResult {
+        return b.getI64IntegerAttr(v);
+      }));
+  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
+      llvm::map_range(strides, [&](int64_t v) -> OpFoldResult {
+        return b.getI64IntegerAttr(v);
+      }));
+  build(b, result, source, offsetValues, sizeValues, strideValues, attrs);
+}
+
+// Build a SubViewOp with dynamic entries and custom result type. If the
+// type passed is nullptr, it is inferred.
+void SubViewOp::build(OpBuilder &b, OperationState &result,
+                      MemRefType resultType, Value source,
+                      ArrayRef<int64_t> offsets, ArrayRef<int64_t> sizes,
+                      ArrayRef<int64_t> strides,
+                      ArrayRef<NamedAttribute> attrs) {
+  SmallVector<OpFoldResult> offsetValues = llvm::to_vector<4>(
+      llvm::map_range(offsets, [&](int64_t v) -> OpFoldResult {
+        return b.getI64IntegerAttr(v);
+      }));
+  SmallVector<OpFoldResult> sizeValues =
+      llvm::to_vector<4>(llvm::map_range(sizes, [&](int64_t v) -> OpFoldResult {
+        return b.getI64IntegerAttr(v);
+      }));
+  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
+      llvm::map_range(strides, [&](int64_t v) -> OpFoldResult {
+        return b.getI64IntegerAttr(v);
+      }));
+  build(b, result, resultType, source, offsetValues, sizeValues, strideValues,
+        attrs);
+}
+
+// Build a SubViewOp with dynamic entries and custom result type. If the type
+// passed is nullptr, it is inferred.
+void SubViewOp::build(OpBuilder &b, OperationState &result,
+                      MemRefType resultType, Value source, ValueRange offsets,
+                      ValueRange sizes, ValueRange strides,
+                      ArrayRef<NamedAttribute> attrs) {
+  SmallVector<OpFoldResult> offsetValues = llvm::to_vector<4>(
+      llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; }));
+  SmallVector<OpFoldResult> sizeValues = llvm::to_vector<4>(
+      llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));
+  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
+      llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));
+  build(b, result, resultType, source, offsetValues, sizeValues, strideValues);
+}
+
+// Build a SubViewOp with dynamic entries and inferred result type.
+void SubViewOp::build(OpBuilder &b, OperationState &result, Value source,
+                      ValueRange offsets, ValueRange sizes, ValueRange strides,
+                      ArrayRef<NamedAttribute> attrs) {
+  build(b, result, MemRefType(), source, offsets, sizes, strides, attrs);
+}
+
+/// For ViewLikeOpInterface.
+Value SubViewOp::getViewSource() { return source(); }
+
+/// Given an `originalShape` and a `reducedShape` assumed to be a subset of
+/// `originalShape` with some `1` entries erased, return the set of indices
+/// that specifies which of the entries of `originalShape` are dropped to obtain
+/// `reducedShape`. The returned mask can be applied as a projection to
+/// `originalShape` to obtain the `reducedShape`. This mask is useful to track
+/// which dimensions must be kept when e.g. compute MemRef strides under
+/// rank-reducing operations. Return None if reducedShape cannot be obtained
+/// by dropping only `1` entries in `originalShape`.
+llvm::Optional<llvm::SmallDenseSet<unsigned>>
+mlir::computeRankReductionMask(ArrayRef<int64_t> originalShape,
+                               ArrayRef<int64_t> reducedShape) {
+  size_t originalRank = originalShape.size(), reducedRank = reducedShape.size();
+  llvm::SmallDenseSet<unsigned> unusedDims;
+  unsigned reducedIdx = 0;
+  for (unsigned originalIdx = 0; originalIdx < originalRank; ++originalIdx) {
+    // Greedily insert `originalIdx` if no match.
+    if (reducedIdx < reducedRank &&
+        originalShape[originalIdx] == reducedShape[reducedIdx]) {
+      reducedIdx++;
+      continue;
+    }
+
+    unusedDims.insert(originalIdx);
+    // If no match on `originalIdx`, the `originalShape` at this dimension
+    // must be 1, otherwise we bail.
+    if (originalShape[originalIdx] != 1)
+      return llvm::None;
+  }
+  // The whole reducedShape must be scanned, otherwise we bail.
+  if (reducedIdx != reducedRank)
+    return llvm::None;
+  return unusedDims;
+}
+
+enum SubViewVerificationResult {
+  Success,
+  RankTooLarge,
+  SizeMismatch,
+  ElemTypeMismatch,
+  MemSpaceMismatch,
+  AffineMapMismatch
+};
+
+/// Checks if `original` Type type can be rank reduced to `reduced` type.
+/// This function is slight variant of `is subsequence` algorithm where
+/// not matching dimension must be 1.
+static SubViewVerificationResult
+isRankReducedType(Type originalType, Type candidateReducedType,
+                  std::string *errMsg = nullptr) {
+  if (originalType == candidateReducedType)
+    return SubViewVerificationResult::Success;
+  if (!originalType.isa<MemRefType>())
+    return SubViewVerificationResult::Success;
+  if (originalType.isa<MemRefType>() && !candidateReducedType.isa<MemRefType>())
+    return SubViewVerificationResult::Success;
+
+  ShapedType originalShapedType = originalType.cast<ShapedType>();
+  ShapedType candidateReducedShapedType =
+      candidateReducedType.cast<ShapedType>();
+
+  // Rank and size logic is valid for all ShapedTypes.
+  ArrayRef<int64_t> originalShape = originalShapedType.getShape();
+  ArrayRef<int64_t> candidateReducedShape =
+      candidateReducedShapedType.getShape();
+  unsigned originalRank = originalShape.size(),
+           candidateReducedRank = candidateReducedShape.size();
+  if (candidateReducedRank > originalRank)
+    return SubViewVerificationResult::RankTooLarge;
+
+  auto optionalUnusedDimsMask =
+      computeRankReductionMask(originalShape, candidateReducedShape);
+
+  // Sizes cannot be matched in case empty vector is returned.
+  if (!optionalUnusedDimsMask.hasValue())
+    return SubViewVerificationResult::SizeMismatch;
+
+  if (originalShapedType.getElementType() !=
+      candidateReducedShapedType.getElementType())
+    return SubViewVerificationResult::ElemTypeMismatch;
+
+  // Strided layout logic is relevant for MemRefType only.
+  MemRefType original = originalType.cast<MemRefType>();
+  MemRefType candidateReduced = candidateReducedType.cast<MemRefType>();
+  if (original.getMemorySpaceAsInt() != candidateReduced.getMemorySpaceAsInt())
+    return SubViewVerificationResult::MemSpaceMismatch;
+
+  llvm::SmallDenseSet<unsigned> unusedDims = optionalUnusedDimsMask.getValue();
+  auto inferredType =
+      getProjectedMap(getStridedLinearLayoutMap(original), unusedDims);
+  AffineMap candidateLayout;
+  if (candidateReduced.getAffineMaps().empty())
+    candidateLayout = getStridedLinearLayoutMap(candidateReduced);
+  else
+    candidateLayout = candidateReduced.getAffineMaps().front();
+  assert(inferredType.getNumResults() == 1 &&
+         candidateLayout.getNumResults() == 1);
+  if (inferredType.getNumSymbols() != candidateLayout.getNumSymbols() ||
+      inferredType.getNumDims() != candidateLayout.getNumDims()) {
+    if (errMsg) {
+      llvm::raw_string_ostream os(*errMsg);
+      os << "inferred type: " << inferredType;
+    }
+    return SubViewVerificationResult::AffineMapMismatch;
+  }
+  // Check that the difference of the affine maps simplifies to 0.
+  AffineExpr diffExpr =
+      inferredType.getResult(0) - candidateLayout.getResult(0);
+  diffExpr = simplifyAffineExpr(diffExpr, inferredType.getNumDims(),
+                                inferredType.getNumSymbols());
+  auto cst = diffExpr.dyn_cast<AffineConstantExpr>();
+  if (!(cst && cst.getValue() == 0)) {
+    if (errMsg) {
+      llvm::raw_string_ostream os(*errMsg);
+      os << "inferred type: " << inferredType;
+    }
+    return SubViewVerificationResult::AffineMapMismatch;
+  }
+  return SubViewVerificationResult::Success;
+}
+
+template <typename OpTy>
+static LogicalResult produceSubViewErrorMsg(SubViewVerificationResult result,
+                                            OpTy op, Type expectedType,
+                                            StringRef errMsg = "") {
+  auto memrefType = expectedType.cast<ShapedType>();
+  switch (result) {
+  case SubViewVerificationResult::Success:
+    return success();
+  case SubViewVerificationResult::RankTooLarge:
+    return op.emitError("expected result rank to be smaller or equal to ")
+           << "the source rank. " << errMsg;
+  case SubViewVerificationResult::SizeMismatch:
+    return op.emitError("expected result type to be ")
+           << expectedType
+           << " or a rank-reduced version. (mismatch of result sizes) "
+           << errMsg;
+  case SubViewVerificationResult::ElemTypeMismatch:
+    return op.emitError("expected result element type to be ")
+           << memrefType.getElementType() << errMsg;
+  case SubViewVerificationResult::MemSpaceMismatch:
+    return op.emitError("expected result and source memory spaces to match.")
+           << errMsg;
+  case SubViewVerificationResult::AffineMapMismatch:
+    return op.emitError("expected result type to be ")
+           << expectedType
+           << " or a rank-reduced version. (mismatch of result affine map) "
+           << errMsg;
+  }
+  llvm_unreachable("unexpected subview verification result");
+}
+
+/// Verifier for SubViewOp.
+static LogicalResult verify(SubViewOp op) {
+  MemRefType baseType = op.getSourceType();
+  MemRefType subViewType = op.getType();
+
+  // The base memref and the view memref should be in the same memory space.
+  if (baseType.getMemorySpaceAsInt() != subViewType.getMemorySpaceAsInt())
+    return op.emitError("different memory spaces specified for base memref "
+                        "type ")
+           << baseType << " and subview memref type " << subViewType;
+
+  // Verify that the base memref type has a strided layout map.
+  if (!isStrided(baseType))
+    return op.emitError("base type ") << baseType << " is not strided";
+
+  // Verify result type against inferred type.
+  auto expectedType = SubViewOp::inferResultType(
+      baseType, extractFromI64ArrayAttr(op.static_offsets()),
+      extractFromI64ArrayAttr(op.static_sizes()),
+      extractFromI64ArrayAttr(op.static_strides()));
+
+  std::string errMsg;
+  auto result = isRankReducedType(expectedType, subViewType, &errMsg);
+  return produceSubViewErrorMsg(result, op, expectedType, errMsg);
+}
+
+raw_ostream &mlir::operator<<(raw_ostream &os, Range &range) {
+  return os << "range " << range.offset << ":" << range.size << ":"
+            << range.stride;
+}
+
+/// Return the list of Range (i.e. offset, size, stride). Each Range
+/// entry contains either the dynamic value or a ConstantIndexOp constructed
+/// with `b` at location `loc`.
+SmallVector<Range, 8> mlir::getOrCreateRanges(OffsetSizeAndStrideOpInterface op,
+                                              OpBuilder &b, Location loc) {
+  std::array<unsigned, 3> ranks = op.getArrayAttrMaxRanks();
+  assert(ranks[0] == ranks[1] && "expected offset and sizes of equal ranks");
+  assert(ranks[1] == ranks[2] && "expected sizes and strides of equal ranks");
+  SmallVector<Range, 8> res;
+  unsigned rank = ranks[0];
+  res.reserve(rank);
+  for (unsigned idx = 0; idx < rank; ++idx) {
+    Value offset =
+        op.isDynamicOffset(idx)
+            ? op.getDynamicOffset(idx)
+            : b.create<ConstantIndexOp>(loc, op.getStaticOffset(idx));
+    Value size = op.isDynamicSize(idx)
+                     ? op.getDynamicSize(idx)
+                     : b.create<ConstantIndexOp>(loc, op.getStaticSize(idx));
+    Value stride =
+        op.isDynamicStride(idx)
+            ? op.getDynamicStride(idx)
+            : b.create<ConstantIndexOp>(loc, op.getStaticStride(idx));
+    res.emplace_back(Range{offset, size, stride});
+  }
+  return res;
+}
+
+namespace {
+/// Pattern to rewrite a subview op with MemRefCast arguments.
+/// This essentially pushes memref.cast past its consuming subview when
+/// `canFoldIntoConsumerOp` is true.
+///
+/// Example:
+/// ```
+///   %0 = memref.cast %V : memref<16x16xf32> to memref<?x?xf32>
+///   %1 = memref.subview %0[0, 0][3, 4][1, 1] :
+///     memref<?x?xf32> to memref<3x4xf32, offset:?, strides:[?, 1]>
+/// ```
+/// is rewritten into:
+/// ```
+///   %0 = memref.subview %V: memref<16x16xf32> to memref<3x4xf32, #[[map0]]>
+///   %1 = memref.cast %0: memref<3x4xf32, offset:0, strides:[16, 1]> to
+///     memref<3x4xf32, offset:?, strides:[?, 1]>
+/// ```
+class SubViewOpMemRefCastFolder final : public OpRewritePattern<SubViewOp> {
+public:
+  using OpRewritePattern<SubViewOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(SubViewOp subViewOp,
+                                PatternRewriter &rewriter) const override {
+    // Any constant operand, just return to let SubViewOpConstantFolder kick in.
+    if (llvm::any_of(subViewOp.getOperands(), [](Value operand) {
+          return matchPattern(operand, matchConstantIndex());
+        }))
+      return failure();
+
+    auto castOp = subViewOp.source().getDefiningOp<CastOp>();
+    if (!castOp)
+      return failure();
+
+    if (!CastOp::canFoldIntoConsumerOp(castOp))
+      return failure();
+
+    /// Deduce the resultType of the SubViewOp using `inferSubViewResultType` on
+    /// the cast source operand type and the SubViewOp static information. This
+    /// is the resulting type if the MemRefCastOp were folded.
+    auto resultType = SubViewOp::inferRankReducedResultType(
+        subViewOp.getType().getRank(),
+        castOp.source().getType().cast<MemRefType>(),
+        subViewOp.getMixedOffsets(), subViewOp.getMixedSizes(),
+        subViewOp.getMixedStrides());
+    Value newSubView = rewriter.create<SubViewOp>(
+        subViewOp.getLoc(), resultType, castOp.source(), subViewOp.offsets(),
+        subViewOp.sizes(), subViewOp.strides(), subViewOp.static_offsets(),
+        subViewOp.static_sizes(), subViewOp.static_strides());
+    rewriter.replaceOpWithNewOp<CastOp>(subViewOp, subViewOp.getType(),
+                                        newSubView);
+    return success();
+  }
+};
+} // namespace
+
+/// A canonicalizer wrapper to replace SubViewOps.
+struct SubViewCanonicalizer {
+  void operator()(PatternRewriter &rewriter, SubViewOp op, SubViewOp newOp) {
+    rewriter.replaceOpWithNewOp<CastOp>(op, newOp, op.getType());
+  }
+};
+
+void SubViewOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+                                            MLIRContext *context) {
+  results.insert<OpWithOffsetSizesAndStridesConstantArgumentFolder<
+                     SubViewOp, SubViewCanonicalizer>,
+                 SubViewOpMemRefCastFolder>(context);
+}
+
+OpFoldResult SubViewOp::fold(ArrayRef<Attribute> operands) {
+  auto resultShapedType = getResult().getType().cast<ShapedType>();
+  auto sourceShapedType = source().getType().cast<ShapedType>();
+
+  if (resultShapedType.hasStaticShape() &&
+      resultShapedType == sourceShapedType) {
+    return getViewSource();
+  }
+
+  return {};
+}
+
+//===----------------------------------------------------------------------===//
+// TensorLoadOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult TensorLoadOp::fold(ArrayRef<Attribute>) {
+  if (auto bufferCast = memref().getDefiningOp<BufferCastOp>())
+    // Approximate alias analysis by conservatively folding only when no there
+    // is no interleaved operation.
+    if (bufferCast->getBlock() == this->getOperation()->getBlock() &&
+        bufferCast->getNextNode() == this->getOperation())
+      return bufferCast.tensor();
+  return {};
+}
+
+//===----------------------------------------------------------------------===//
+// TransposeOp
+//===----------------------------------------------------------------------===//
+
+/// Build a strided memref type by applying `permutationMap` tp `memRefType`.
+static MemRefType inferTransposeResultType(MemRefType memRefType,
+                                           AffineMap permutationMap) {
+  auto rank = memRefType.getRank();
+  auto originalSizes = memRefType.getShape();
+  // Compute permuted sizes.
+  SmallVector<int64_t, 4> sizes(rank, 0);
+  for (auto en : llvm::enumerate(permutationMap.getResults()))
+    sizes[en.index()] =
+        originalSizes[en.value().cast<AffineDimExpr>().getPosition()];
+
+  // Compute permuted strides.
+  int64_t offset;
+  SmallVector<int64_t, 4> strides;
+  auto res = getStridesAndOffset(memRefType, strides, offset);
+  assert(succeeded(res) && strides.size() == static_cast<unsigned>(rank));
+  (void)res;
+  auto map =
+      makeStridedLinearLayoutMap(strides, offset, memRefType.getContext());
+  map = permutationMap ? map.compose(permutationMap) : map;
+  return MemRefType::Builder(memRefType).setShape(sizes).setAffineMaps(map);
+}
+
+void TransposeOp::build(OpBuilder &b, OperationState &result, Value in,
+                        AffineMapAttr permutation,
+                        ArrayRef<NamedAttribute> attrs) {
+  auto permutationMap = permutation.getValue();
+  assert(permutationMap);
+
+  auto memRefType = in.getType().cast<MemRefType>();
+  // Compute result type.
+  MemRefType resultType = inferTransposeResultType(memRefType, permutationMap);
+
+  build(b, result, resultType, in, attrs);
+  result.addAttribute(TransposeOp::getPermutationAttrName(), permutation);
+}
+
+// transpose $in $permutation attr-dict : type($in) `to` type(results)
+static void print(OpAsmPrinter &p, TransposeOp op) {
+  p << "memref.transpose " << op.in() << " " << op.permutation();
+  p.printOptionalAttrDict(op->getAttrs(),
+                          {TransposeOp::getPermutationAttrName()});
+  p << " : " << op.in().getType() << " to " << op.getType();
+}
+
+static ParseResult parseTransposeOp(OpAsmParser &parser,
+                                    OperationState &result) {
+  OpAsmParser::OperandType in;
+  AffineMap permutation;
+  MemRefType srcType, dstType;
+  if (parser.parseOperand(in) || parser.parseAffineMap(permutation) ||
+      parser.parseOptionalAttrDict(result.attributes) ||
+      parser.parseColonType(srcType) ||
+      parser.resolveOperand(in, srcType, result.operands) ||
+      parser.parseKeywordType("to", dstType) ||
+      parser.addTypeToList(dstType, result.types))
+    return failure();
+
+  result.addAttribute(TransposeOp::getPermutationAttrName(),
+                      AffineMapAttr::get(permutation));
+  return success();
+}
+
+static LogicalResult verify(TransposeOp op) {
+  if (!op.permutation().isPermutation())
+    return op.emitOpError("expected a permutation map");
+  if (op.permutation().getNumDims() != op.getShapedType().getRank())
+    return op.emitOpError(
+        "expected a permutation map of same rank as the input");
+
+  auto srcType = op.in().getType().cast<MemRefType>();
+  auto dstType = op.getType().cast<MemRefType>();
+  auto transposedType = inferTransposeResultType(srcType, op.permutation());
+  if (dstType != transposedType)
+    return op.emitOpError("output type ")
+           << dstType << " does not match transposed input type " << srcType
+           << ", " << transposedType;
+  return success();
+}
+
+OpFoldResult TransposeOp::fold(ArrayRef<Attribute>) {
+  if (succeeded(foldMemRefCast(*this)))
+    return getResult();
+  return {};
+}
+
+//===----------------------------------------------------------------------===//
+// ViewOp
+//===----------------------------------------------------------------------===//
+
+static ParseResult parseViewOp(OpAsmParser &parser, OperationState &result) {
+  OpAsmParser::OperandType srcInfo;
+  SmallVector<OpAsmParser::OperandType, 1> offsetInfo;
+  SmallVector<OpAsmParser::OperandType, 4> sizesInfo;
+  auto indexType = parser.getBuilder().getIndexType();
+  Type srcType, dstType;
+  llvm::SMLoc offsetLoc;
+  if (parser.parseOperand(srcInfo) || parser.getCurrentLocation(&offsetLoc) ||
+      parser.parseOperandList(offsetInfo, OpAsmParser::Delimiter::Square))
+    return failure();
+
+  if (offsetInfo.size() != 1)
+    return parser.emitError(offsetLoc) << "expects 1 offset operand";
+
+  return failure(
+      parser.parseOperandList(sizesInfo, OpAsmParser::Delimiter::Square) ||
+      parser.parseOptionalAttrDict(result.attributes) ||
+      parser.parseColonType(srcType) ||
+      parser.resolveOperand(srcInfo, srcType, result.operands) ||
+      parser.resolveOperands(offsetInfo, indexType, result.operands) ||
+      parser.resolveOperands(sizesInfo, indexType, result.operands) ||
+      parser.parseKeywordType("to", dstType) ||
+      parser.addTypeToList(dstType, result.types));
+}
+
+static void print(OpAsmPrinter &p, ViewOp op) {
+  p << op.getOperationName() << ' ' << op.getOperand(0) << '[';
+  p.printOperand(op.byte_shift());
+  p << "][" << op.sizes() << ']';
+  p.printOptionalAttrDict(op->getAttrs());
+  p << " : " << op.getOperand(0).getType() << " to " << op.getType();
+}
+
+static LogicalResult verify(ViewOp op) {
+  auto baseType = op.getOperand(0).getType().cast<MemRefType>();
+  auto viewType = op.getType();
+
+  // The base memref should have identity layout map (or none).
+  if (baseType.getAffineMaps().size() > 1 ||
+      (baseType.getAffineMaps().size() == 1 &&
+       !baseType.getAffineMaps()[0].isIdentity()))
+    return op.emitError("unsupported map for base memref type ") << baseType;
+
+  // The result memref should have identity layout map (or none).
+  if (viewType.getAffineMaps().size() > 1 ||
+      (viewType.getAffineMaps().size() == 1 &&
+       !viewType.getAffineMaps()[0].isIdentity()))
+    return op.emitError("unsupported map for result memref type ") << viewType;
+
+  // The base memref and the view memref should be in the same memory space.
+  if (baseType.getMemorySpaceAsInt() != viewType.getMemorySpaceAsInt())
+    return op.emitError("different memory spaces specified for base memref "
+                        "type ")
+           << baseType << " and view memref type " << viewType;
+
+  // Verify that we have the correct number of sizes for the result type.
+  unsigned numDynamicDims = viewType.getNumDynamicDims();
+  if (op.sizes().size() != numDynamicDims)
+    return op.emitError("incorrect number of size operands for type ")
+           << viewType;
+
+  return success();
+}
+
+Value ViewOp::getViewSource() { return source(); }
+
+namespace {
+
+struct ViewOpShapeFolder : public OpRewritePattern<ViewOp> {
+  using OpRewritePattern<ViewOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(ViewOp viewOp,
+                                PatternRewriter &rewriter) const override {
+    // Return if none of the operands are constants.
+    if (llvm::none_of(viewOp.getOperands(), [](Value operand) {
+          return matchPattern(operand, matchConstantIndex());
+        }))
+      return failure();
+
+    // Get result memref type.
+    auto memrefType = viewOp.getType();
+
+    // Get offset from old memref view type 'memRefType'.
+    int64_t oldOffset;
+    SmallVector<int64_t, 4> oldStrides;
+    if (failed(getStridesAndOffset(memrefType, oldStrides, oldOffset)))
+      return failure();
+    assert(oldOffset == 0 && "Expected 0 offset");
+
+    SmallVector<Value, 4> newOperands;
+
+    // Offset cannot be folded into result type.
+
+    // Fold any dynamic dim operands which are produced by a constant.
+    SmallVector<int64_t, 4> newShapeConstants;
+    newShapeConstants.reserve(memrefType.getRank());
+
+    unsigned dynamicDimPos = 0;
+    unsigned rank = memrefType.getRank();
+    for (unsigned dim = 0, e = rank; dim < e; ++dim) {
+      int64_t dimSize = memrefType.getDimSize(dim);
+      // If this is already static dimension, keep it.
+      if (!ShapedType::isDynamic(dimSize)) {
+        newShapeConstants.push_back(dimSize);
+        continue;
+      }
+      auto *defOp = viewOp.sizes()[dynamicDimPos].getDefiningOp();
+      if (auto constantIndexOp = dyn_cast_or_null<ConstantIndexOp>(defOp)) {
+        // Dynamic shape dimension will be folded.
+        newShapeConstants.push_back(constantIndexOp.getValue());
+      } else {
+        // Dynamic shape dimension not folded; copy operand from old memref.
+        newShapeConstants.push_back(dimSize);
+        newOperands.push_back(viewOp.sizes()[dynamicDimPos]);
+      }
+      dynamicDimPos++;
+    }
+
+    // Create new memref type with constant folded dims.
+    MemRefType newMemRefType =
+        MemRefType::Builder(memrefType).setShape(newShapeConstants);
+    // Nothing new, don't fold.
+    if (newMemRefType == memrefType)
+      return failure();
+
+    // Create new ViewOp.
+    auto newViewOp = rewriter.create<ViewOp>(viewOp.getLoc(), newMemRefType,
+                                             viewOp.getOperand(0),
+                                             viewOp.byte_shift(), newOperands);
+    // Insert a cast so we have the same type as the old memref type.
+    rewriter.replaceOpWithNewOp<CastOp>(viewOp, newViewOp, viewOp.getType());
+    return success();
+  }
+};
+
+struct ViewOpMemrefCastFolder : public OpRewritePattern<ViewOp> {
+  using OpRewritePattern<ViewOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(ViewOp viewOp,
+                                PatternRewriter &rewriter) const override {
+    Value memrefOperand = viewOp.getOperand(0);
+    CastOp memrefCastOp = memrefOperand.getDefiningOp<CastOp>();
+    if (!memrefCastOp)
+      return failure();
+    Value allocOperand = memrefCastOp.getOperand();
+    AllocOp allocOp = allocOperand.getDefiningOp<AllocOp>();
+    if (!allocOp)
+      return failure();
+    rewriter.replaceOpWithNewOp<ViewOp>(viewOp, viewOp.getType(), allocOperand,
+                                        viewOp.byte_shift(), viewOp.sizes());
+    return success();
+  }
+};
+
+} // end anonymous namespace
+
+void ViewOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+                                         MLIRContext *context) {
+  results.insert<ViewOpShapeFolder, ViewOpMemrefCastFolder>(context);
+}
+
+//===----------------------------------------------------------------------===//
+// TableGen'd op method definitions
+//===----------------------------------------------------------------------===//
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/MemRef/IR/MemRefOps.cpp.inc"
diff --git a/mlir/lib/Dialect/SCF/CMakeLists.txt b/mlir/lib/Dialect/SCF/CMakeLists.txt
--- a/mlir/lib/Dialect/SCF/CMakeLists.txt
+++ b/mlir/lib/Dialect/SCF/CMakeLists.txt
@@ -12,6 +12,7 @@
   MLIREDSC
   MLIRIR
   MLIRLoopLikeInterface
+  MLIRMemRef
   MLIRSideEffectInterfaces
   MLIRStandard
   )
diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp
--- a/mlir/lib/Dialect/SCF/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/SCF.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/SCF/SCF.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/PatternMatch.h"
@@ -568,7 +569,7 @@
 ///    %t0 = ... : tensor_type
 ///    %0 = scf.for ... iter_args(%bb0 : %t0) -> (tensor_type) {
 ///      ...
-///      // %m is either tensor_to_memref(%bb00) or defined above the loop
+///      // %m is either buffer_cast(%bb00) or defined above the loop
 ///      %m... : memref_type
 ///      ... // uses of %m with potential inplace updates
 ///      %new_tensor = tensor_load %m : memref_type
@@ -578,7 +579,7 @@
 /// ```
 ///
 /// `%bb0` may have either 0 or 1 use. If it has 1 use it must be exactly a
-/// `%m = tensor_to_memref %bb0` op that feeds into the yielded `tensor_load`
+/// `%m = buffer_cast %bb0` op that feeds into the yielded `tensor_load`
 /// op.
 ///
 /// If no aliasing write to the memref `%m`, from which `%new_tensor`is loaded,
@@ -590,7 +591,7 @@
 ///
 /// The canonicalization rewrites the pattern as:
 /// ```
-///    // %m is either a tensor_to_memref or defined above
+///    // %m is either a buffer_cast or defined above
 ///    %m... : memref_type
 ///    scf.for ... iter_args(%bb0 : %t0) -> (tensor_type) {
 ///      ... // uses of %m with potential inplace updates
@@ -601,7 +602,7 @@
 ///
 /// A later bbArg canonicalization will further rewrite as:
 /// ```
-///    // %m is either a tensor_to_memref or defined above
+///    // %m is either a buffer_cast or defined above
 ///    %m... : memref_type
 ///    scf.for ... { // no iter_args
 ///      ... // uses of %m with potential inplace updates
@@ -622,19 +623,18 @@
       unsigned idx = bbArg.getArgNumber() - /*numIv=*/1;
       auto yieldOp = cast<scf::YieldOp>(forOp.region().front().getTerminator());
       Value yieldVal = yieldOp->getOperand(idx);
-      auto tensorLoadOp = yieldVal.getDefiningOp<TensorLoadOp>();
+      auto tensorLoadOp = yieldVal.getDefiningOp<memref::TensorLoadOp>();
       bool isTensor = bbArg.getType().isa<TensorType>();
 
-      TensorToMemrefOp tensorToMemRefOp;
-      // Either bbArg has no use or it has a single tensor_to_memref use.
+      memref::BufferCastOp bufferCastOp;
+      // Either bbArg has no use or it has a single buffer_cast use.
       if (bbArg.hasOneUse())
-        tensorToMemRefOp =
-            dyn_cast<TensorToMemrefOp>(*bbArg.getUsers().begin());
-      if (!isTensor || !tensorLoadOp ||
-          (!bbArg.use_empty() && !tensorToMemRefOp))
+        bufferCastOp =
+            dyn_cast<memref::BufferCastOp>(*bbArg.getUsers().begin());
+      if (!isTensor || !tensorLoadOp || (!bbArg.use_empty() && !bufferCastOp))
         continue;
-      // If tensorToMemRefOp is present, it must feed into the `tensorLoadOp`.
-      if (tensorToMemRefOp && tensorLoadOp.memref() != tensorToMemRefOp)
+      // If bufferCastOp is present, it must feed into the `tensorLoadOp`.
+      if (bufferCastOp && tensorLoadOp.memref() != bufferCastOp)
         continue;
       // TODO: Any aliasing write of tensorLoadOp.memref() nested under `forOp`
       // must be before `tensorLoadOp` in the block so that the lastWrite
@@ -644,18 +644,18 @@
       if (tensorLoadOp->getNextNode() != yieldOp)
         continue;
 
-      // Clone the optional tensorToMemRefOp before forOp.
-      if (tensorToMemRefOp) {
+      // Clone the optional bufferCastOp before forOp.
+      if (bufferCastOp) {
         rewriter.setInsertionPoint(forOp);
-        rewriter.replaceOpWithNewOp<TensorToMemrefOp>(
-            tensorToMemRefOp, tensorToMemRefOp.memref().getType(),
-            tensorToMemRefOp.tensor());
+        rewriter.replaceOpWithNewOp<memref::BufferCastOp>(
+            bufferCastOp, bufferCastOp.memref().getType(),
+            bufferCastOp.tensor());
       }
 
       // Clone the tensorLoad after forOp.
       rewriter.setInsertionPointAfter(forOp);
       Value newTensorLoad =
-          rewriter.create<TensorLoadOp>(loc, tensorLoadOp.memref());
+          rewriter.create<memref::TensorLoadOp>(loc, tensorLoadOp.memref());
       Value forOpResult = forOp.getResult(bbArg.getArgNumber() - /*iv=*/1);
       replacements.insert(std::make_pair(forOpResult, newTensorLoad));
 
diff --git a/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp b/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp
--- a/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp
@@ -8,6 +8,7 @@
 
 #include "mlir/Transforms/Bufferize.h"
 #include "PassDetail.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/Passes.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/SCF/Transforms.h"
diff --git a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
--- a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
@@ -15,6 +15,7 @@
   LINK_LIBS PUBLIC
   MLIRAffine
   MLIRIR
+  MLIRMemRef
   MLIRPass
   MLIRSCF
   MLIRStandard
diff --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp
--- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/Passes.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/SCF/Transforms.h"
@@ -52,10 +53,10 @@
     ParallelOp firstPloop, ParallelOp secondPloop,
     const BlockAndValueMapping &firstToSecondPloopIndices) {
   DenseMap<Value, SmallVector<ValueRange, 1>> bufferStores;
-  firstPloop.getBody()->walk([&](StoreOp store) {
+  firstPloop.getBody()->walk([&](memref::StoreOp store) {
     bufferStores[store.getMemRef()].push_back(store.indices());
   });
-  auto walkResult = secondPloop.getBody()->walk([&](LoadOp load) {
+  auto walkResult = secondPloop.getBody()->walk([&](memref::LoadOp load) {
     // Stop if the memref is defined in secondPloop body. Careful alias analysis
     // is needed.
     auto *memrefDef = load.getMemRef().getDefiningOp();
diff --git a/mlir/lib/Dialect/SCF/Transforms/PassDetail.h b/mlir/lib/Dialect/SCF/Transforms/PassDetail.h
--- a/mlir/lib/Dialect/SCF/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/SCF/Transforms/PassDetail.h
@@ -18,6 +18,10 @@
 
 class AffineDialect;
 
+namespace memref {
+class MemRefDialect;
+} // end namespace memref
+
 #define GEN_PASS_CLASSES
 #include "mlir/Dialect/SCF/Passes.h.inc"
 
diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp
--- a/mlir/lib/Dialect/Shape/IR/Shape.cpp
+++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp
@@ -407,7 +407,7 @@
     // Reduce op to equivalent with unique operands.
     if (unique.size() < op.getNumOperands()) {
       rewriter.replaceOpWithNewOp<OpTy>(op, op->getResultTypes(), unique,
-                                        op.getAttrs());
+                                        op->getAttrs());
       return success();
     }
 
diff --git a/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp
--- a/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp
@@ -8,6 +8,7 @@
 
 #include "mlir/Transforms/Bufferize.h"
 #include "PassDetail.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/Shape/Transforms/Passes.h"
 #include "mlir/Pass/Pass.h"
 
diff --git a/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt
--- a/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt
@@ -14,6 +14,7 @@
 target_link_libraries(MLIRShapeOpsTransforms
   PUBLIC
   MLIRIR
+  MLIRMemRef
   MLIRPass
   MLIRShape
   MLIRSupport
diff --git a/mlir/lib/Dialect/Shape/Transforms/PassDetail.h b/mlir/lib/Dialect/Shape/Transforms/PassDetail.h
--- a/mlir/lib/Dialect/Shape/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Shape/Transforms/PassDetail.h
@@ -13,6 +13,10 @@
 
 namespace mlir {
 
+namespace memref {
+class MemRefDialect;
+} // end namespace memref
+
 #define GEN_PASS_CLASSES
 #include "mlir/Dialect/Shape/Transforms/Passes.h.inc"
 
diff --git a/mlir/lib/Dialect/StandardOps/CMakeLists.txt b/mlir/lib/Dialect/StandardOps/CMakeLists.txt
--- a/mlir/lib/Dialect/StandardOps/CMakeLists.txt
+++ b/mlir/lib/Dialect/StandardOps/CMakeLists.txt
@@ -16,6 +16,7 @@
   MLIRControlFlowInterfaces
   MLIREDSC
   MLIRIR
+  MLIRMemRef
   MLIRSideEffectInterfaces
   MLIRTensor
   MLIRVectorInterfaces
diff --git a/mlir/lib/Dialect/StandardOps/EDSC/Builders.cpp b/mlir/lib/Dialect/StandardOps/EDSC/Builders.cpp
--- a/mlir/lib/Dialect/StandardOps/EDSC/Builders.cpp
+++ b/mlir/lib/Dialect/StandardOps/EDSC/Builders.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
@@ -23,7 +24,7 @@
   const auto &shape = memRefType.getShape();
   for (unsigned idx = 0, n = shape.size(); idx < n; ++idx) {
     if (shape[idx] == -1)
-      res.push_back(std_dim(memRef, idx));
+      res.push_back(memref_dim(memRef, idx));
     else
       res.push_back(std_constant_index(shape[idx]));
   }
diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
--- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
+++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
@@ -9,6 +9,7 @@
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 
 #include "mlir/Dialect/CommonFolders.h"
+#include "mlir/Dialect/StandardOps/Utils/Utils.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
@@ -216,10 +217,10 @@
 
 void StandardOpsDialect::initialize() {
   getContext()->loadDialect<tensor::TensorDialect>();
-  addOperations<DmaStartOp, DmaWaitOp,
+  addOperations<
 #define GET_OP_LIST
 #include "mlir/Dialect/StandardOps/IR/Ops.cpp.inc"
-                >();
+      >();
   addInterfaces<StdInlinerInterface>();
 }
 
@@ -231,32 +232,6 @@
   return builder.create<ConstantOp>(loc, type, value);
 }
 
-/// Matches a ConstantIndexOp.
-/// TODO: This should probably just be a general matcher that uses m_Constant
-/// and checks the operation for an index type.
-static detail::op_matcher<ConstantIndexOp> m_ConstantIndex() {
-  return detail::op_matcher<ConstantIndexOp>();
-}
-
-//===----------------------------------------------------------------------===//
-// Common canonicalization pattern support logic
-//===----------------------------------------------------------------------===//
-
-/// This is a common class used for patterns of the form
-/// "someop(memrefcast) -> someop".  It folds the source of any memref_cast
-/// into the root operation directly.
-static LogicalResult foldMemRefCast(Operation *op) {
-  bool folded = false;
-  for (OpOperand &operand : op->getOpOperands()) {
-    auto cast = operand.get().getDefiningOp<MemRefCastOp>();
-    if (cast && !cast.getOperand().getType().isa<UnrankedMemRefType>()) {
-      operand.set(cast.getOperand());
-      folded = true;
-    }
-  }
-  return success(folded);
-}
-
 //===----------------------------------------------------------------------===//
 // Common cast compatibility check for vector types.
 //===----------------------------------------------------------------------===//
@@ -278,18 +253,6 @@
   return false;
 }
 
-//===----------------------------------------------------------------------===//
-// Helpers for Tensor[Load|Store]Op, TensorToMemrefOp, and GlobalMemrefOp
-//===----------------------------------------------------------------------===//
-
-static Type getTensorTypeFromMemRefType(Type type) {
-  if (auto memref = type.dyn_cast<MemRefType>())
-    return RankedTensorType::get(memref.getShape(), memref.getElementType());
-  if (auto memref = type.dyn_cast<UnrankedMemRefType>())
-    return UnrankedTensorType::get(memref.getElementType());
-  return NoneType::get(type.getContext());
-}
-
 //===----------------------------------------------------------------------===//
 // AddFOp
 //===----------------------------------------------------------------------===//
@@ -320,131 +283,6 @@
       }));
 }
 
-//===----------------------------------------------------------------------===//
-// AllocOp / AllocaOp
-//===----------------------------------------------------------------------===//
-
-template <typename AllocLikeOp>
-static LogicalResult verifyAllocLikeOp(AllocLikeOp op) {
-  static_assert(llvm::is_one_of<AllocLikeOp, AllocOp, AllocaOp>::value,
-                "applies to only alloc or alloca");
-  auto memRefType = op.getResult().getType().template dyn_cast<MemRefType>();
-  if (!memRefType)
-    return op.emitOpError("result must be a memref");
-
-  if (static_cast<int64_t>(op.dynamicSizes().size()) !=
-      memRefType.getNumDynamicDims())
-    return op.emitOpError("dimension operand count does not equal memref "
-                          "dynamic dimension count");
-
-  unsigned numSymbols = 0;
-  if (!memRefType.getAffineMaps().empty())
-    numSymbols = memRefType.getAffineMaps().front().getNumSymbols();
-  if (op.symbolOperands().size() != numSymbols)
-    return op.emitOpError(
-        "symbol operand count does not equal memref symbol count");
-
-  return success();
-}
-
-static LogicalResult verify(AllocOp op) { return verifyAllocLikeOp(op); }
-
-static LogicalResult verify(AllocaOp op) {
-  // An alloca op needs to have an ancestor with an allocation scope trait.
-  if (!op->getParentWithTrait<OpTrait::AutomaticAllocationScope>())
-    return op.emitOpError(
-        "requires an ancestor op with AutomaticAllocationScope trait");
-
-  return verifyAllocLikeOp(op);
-}
-
-namespace {
-/// Fold constant dimensions into an alloc like operation.
-template <typename AllocLikeOp>
-struct SimplifyAllocConst : public OpRewritePattern<AllocLikeOp> {
-  using OpRewritePattern<AllocLikeOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(AllocLikeOp alloc,
-                                PatternRewriter &rewriter) const override {
-    // Check to see if any dimensions operands are constants.  If so, we can
-    // substitute and drop them.
-    if (llvm::none_of(alloc.getOperands(), [](Value operand) {
-          return matchPattern(operand, m_ConstantIndex());
-        }))
-      return failure();
-
-    auto memrefType = alloc.getType();
-
-    // Ok, we have one or more constant operands.  Collect the non-constant ones
-    // and keep track of the resultant memref type to build.
-    SmallVector<int64_t, 4> newShapeConstants;
-    newShapeConstants.reserve(memrefType.getRank());
-    SmallVector<Value, 4> newOperands;
-
-    unsigned dynamicDimPos = 0;
-    for (unsigned dim = 0, e = memrefType.getRank(); dim < e; ++dim) {
-      int64_t dimSize = memrefType.getDimSize(dim);
-      // If this is already static dimension, keep it.
-      if (dimSize != -1) {
-        newShapeConstants.push_back(dimSize);
-        continue;
-      }
-      auto *defOp = alloc.getOperand(dynamicDimPos).getDefiningOp();
-      if (auto constantIndexOp = dyn_cast_or_null<ConstantIndexOp>(defOp)) {
-        // Dynamic shape dimension will be folded.
-        newShapeConstants.push_back(constantIndexOp.getValue());
-      } else {
-        // Dynamic shape dimension not folded; copy operand from old memref.
-        newShapeConstants.push_back(-1);
-        newOperands.push_back(alloc.getOperand(dynamicDimPos));
-      }
-      dynamicDimPos++;
-    }
-
-    // Create new memref type (which will have fewer dynamic dimensions).
-    MemRefType newMemRefType =
-        MemRefType::Builder(memrefType).setShape(newShapeConstants);
-    assert(static_cast<int64_t>(newOperands.size()) ==
-           newMemRefType.getNumDynamicDims());
-
-    // Create and insert the alloc op for the new memref.
-    auto newAlloc = rewriter.create<AllocLikeOp>(alloc.getLoc(), newMemRefType,
-                                                 newOperands, IntegerAttr());
-    // Insert a cast so we have the same type as the old alloc.
-    auto resultCast = rewriter.create<MemRefCastOp>(alloc.getLoc(), newAlloc,
-                                                    alloc.getType());
-
-    rewriter.replaceOp(alloc, {resultCast});
-    return success();
-  }
-};
-
-/// Fold alloc operations with no uses. Alloc has side effects on the heap,
-/// but can still be deleted if it has zero uses.
-struct SimplifyDeadAlloc : public OpRewritePattern<AllocOp> {
-  using OpRewritePattern<AllocOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(AllocOp alloc,
-                                PatternRewriter &rewriter) const override {
-    if (alloc.use_empty()) {
-      rewriter.eraseOp(alloc);
-      return success();
-    }
-    return failure();
-  }
-};
-} // end anonymous namespace.
-
-void AllocOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                          MLIRContext *context) {
-  results.insert<SimplifyAllocConst<AllocOp>, SimplifyDeadAlloc>(context);
-}
-
-void AllocaOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                           MLIRContext *context) {
-  results.insert<SimplifyAllocConst<AllocaOp>>(context);
-}
-
 //===----------------------------------------------------------------------===//
 // AndOp
 //===----------------------------------------------------------------------===//
@@ -491,17 +329,6 @@
   patterns.insert<EraseRedundantAssertions>(context);
 }
 
-//===----------------------------------------------------------------------===//
-// AssumeAlignmentOp
-//===----------------------------------------------------------------------===//
-
-static LogicalResult verify(AssumeAlignmentOp op) {
-  unsigned alignment = op.alignment();
-  if (!llvm::isPowerOf2_32(alignment))
-    return op.emitOpError("alignment must be power of 2");
-  return success();
-}
-
 //===----------------------------------------------------------------------===//
 // AtomicRMWOp
 //===----------------------------------------------------------------------===//
@@ -1347,222 +1174,6 @@
   ConstantOp::build(builder, result, type, builder.getIntegerAttr(type, value));
 }
 
-//===----------------------------------------------------------------------===//
-// DeallocOp
-//===----------------------------------------------------------------------===//
-namespace {
-/// Fold Dealloc operations that are deallocating an AllocOp that is only used
-/// by other Dealloc operations.
-struct SimplifyDeadDealloc : public OpRewritePattern<DeallocOp> {
-  using OpRewritePattern<DeallocOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(DeallocOp dealloc,
-                                PatternRewriter &rewriter) const override {
-    // Check that the memref operand's defining operation is an AllocOp.
-    Value memref = dealloc.memref();
-    if (!isa_and_nonnull<AllocOp>(memref.getDefiningOp()))
-      return failure();
-
-    // Check that all of the uses of the AllocOp are other DeallocOps.
-    for (auto *user : memref.getUsers())
-      if (!isa<DeallocOp>(user))
-        return failure();
-
-    // Erase the dealloc operation.
-    rewriter.eraseOp(dealloc);
-    return success();
-  }
-};
-} // end anonymous namespace.
-
-static LogicalResult verify(DeallocOp op) {
-  if (!op.memref().getType().isa<MemRefType>())
-    return op.emitOpError("operand must be a memref");
-  return success();
-}
-
-void DeallocOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                            MLIRContext *context) {
-  results.insert<SimplifyDeadDealloc>(context);
-}
-
-LogicalResult DeallocOp::fold(ArrayRef<Attribute> cstOperands,
-                              SmallVectorImpl<OpFoldResult> &results) {
-  /// dealloc(memrefcast) -> dealloc
-  return foldMemRefCast(*this);
-}
-
-//===----------------------------------------------------------------------===//
-// DimOp
-//===----------------------------------------------------------------------===//
-
-void DimOp::build(OpBuilder &builder, OperationState &result,
-                  Value memrefOrTensor, int64_t index) {
-  auto loc = result.location;
-  Value indexValue = builder.create<ConstantIndexOp>(loc, index);
-  build(builder, result, memrefOrTensor, indexValue);
-}
-
-void DimOp::build(OpBuilder &builder, OperationState &result,
-                  Value memrefOrTensor, Value index) {
-  auto indexTy = builder.getIndexType();
-  build(builder, result, indexTy, memrefOrTensor, index);
-}
-
-Optional<int64_t> DimOp::getConstantIndex() {
-  if (auto constantOp = index().getDefiningOp<ConstantOp>())
-    return constantOp.getValue().cast<IntegerAttr>().getInt();
-  return {};
-}
-
-static LogicalResult verify(DimOp op) {
-  // Assume unknown index to be in range.
-  Optional<int64_t> index = op.getConstantIndex();
-  if (!index.hasValue())
-    return success();
-
-  // Check that constant index is not knowingly out of range.
-  auto type = op.memrefOrTensor().getType();
-  if (auto tensorType = type.dyn_cast<RankedTensorType>()) {
-    if (index.getValue() >= tensorType.getRank())
-      return op.emitOpError("index is out of range");
-  } else if (auto memrefType = type.dyn_cast<MemRefType>()) {
-    if (index.getValue() >= memrefType.getRank())
-      return op.emitOpError("index is out of range");
-  } else if (type.isa<UnrankedTensorType>() || type.isa<UnrankedMemRefType>()) {
-    // Assume index to be in range.
-  } else {
-    llvm_unreachable("expected operand with tensor or memref type");
-  }
-
-  return success();
-}
-
-OpFoldResult DimOp::fold(ArrayRef<Attribute> operands) {
-  auto index = operands[1].dyn_cast_or_null<IntegerAttr>();
-
-  // All forms of folding require a known index.
-  if (!index)
-    return {};
-
-  auto argTy = memrefOrTensor().getType();
-  // Fold if the shape extent along the given index is known.
-  if (auto shapedTy = argTy.dyn_cast<ShapedType>()) {
-    // Folding for unranked types (UnrankedMemRefType, UnrankedTensorType) is
-    // not supported.
-    if (!shapedTy.hasRank())
-      return {};
-    if (!shapedTy.isDynamicDim(index.getInt())) {
-      Builder builder(getContext());
-      return builder.getIndexAttr(shapedTy.getShape()[index.getInt()]);
-    }
-  }
-
-  Operation *definingOp = memrefOrTensor().getDefiningOp();
-  // dim(tensor_load(memref)) -> dim(memref)
-  if (auto tensorLoadOp = dyn_cast_or_null<TensorLoadOp>(definingOp)) {
-    setOperand(0, tensorLoadOp.memref());
-    return getResult();
-  }
-
-  // Fold dim to the operand of tensor.generate.
-  if (auto fromElements = dyn_cast_or_null<tensor::GenerateOp>(definingOp)) {
-    auto resultType =
-        fromElements.getResult().getType().cast<RankedTensorType>();
-    // The case where the type encodes the size of the dimension is handled
-    // above.
-    assert(resultType.getShape()[index.getInt()] ==
-           RankedTensorType::kDynamicSize);
-
-    // Find the operand of the fromElements that corresponds to this index.
-    auto dynExtents = fromElements.dynamicExtents().begin();
-    for (auto dim : resultType.getShape().take_front(index.getInt()))
-      if (dim == RankedTensorType::kDynamicSize)
-        dynExtents++;
-
-    return Value{*dynExtents};
-  }
-
-  // The size at the given index is now known to be a dynamic size.
-  unsigned unsignedIndex = index.getValue().getZExtValue();
-
-  if (auto subtensor = dyn_cast_or_null<SubTensorOp>(definingOp)) {
-    assert(subtensor.isDynamicSize(unsignedIndex) &&
-           "Expected dynamic subtensor size");
-    return subtensor.getDynamicSize(unsignedIndex);
-  }
-
-  // Fold dim to the size argument for an `AllocOp`, `ViewOp`, or `SubViewOp`.
-  auto memrefType = argTy.dyn_cast<MemRefType>();
-  if (!memrefType)
-    return {};
-
-  if (auto alloc = dyn_cast_or_null<AllocOp>(definingOp))
-    return *(alloc.getDynamicSizes().begin() +
-             memrefType.getDynamicDimIndex(unsignedIndex));
-
-  if (auto view = dyn_cast_or_null<ViewOp>(definingOp))
-    return *(view.getDynamicSizes().begin() +
-             memrefType.getDynamicDimIndex(unsignedIndex));
-
-  if (auto subview = dyn_cast_or_null<SubViewOp>(definingOp)) {
-    assert(subview.isDynamicSize(unsignedIndex) &&
-           "Expected dynamic subview size");
-    return subview.getDynamicSize(unsignedIndex);
-  }
-
-  // dim(memrefcast) -> dim
-  if (succeeded(foldMemRefCast(*this)))
-    return getResult();
-
-  return {};
-}
-
-namespace {
-/// Fold dim of a memref reshape operation to a load into the reshape's shape
-/// operand.
-struct DimOfMemRefReshape : public OpRewritePattern<DimOp> {
-  using OpRewritePattern<DimOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(DimOp dim,
-                                PatternRewriter &rewriter) const override {
-    auto reshape = dim.memrefOrTensor().getDefiningOp<MemRefReshapeOp>();
-
-    if (!reshape)
-      return failure();
-
-    // Place the load directly after the reshape to ensure that the shape memref
-    // was not mutated.
-    rewriter.setInsertionPointAfter(reshape);
-    rewriter.replaceOpWithNewOp<LoadOp>(dim, reshape.shape(),
-                                        llvm::makeArrayRef({dim.index()}));
-    return success();
-  }
-};
-
-/// Fold dim of a dim of a cast into the dim of the source of the tensor cast.
-template <typename CastOpTy>
-struct DimOfCastOp : public OpRewritePattern<DimOp> {
-  using OpRewritePattern<DimOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(DimOp dimOp,
-                                PatternRewriter &rewriter) const override {
-    auto castOp = dimOp.memrefOrTensor().getDefiningOp<CastOpTy>();
-    if (!castOp)
-      return failure();
-    Value newSource = castOp.getOperand();
-    rewriter.replaceOpWithNewOp<DimOp>(dimOp, newSource, dimOp.index());
-    return success();
-  }
-};
-} // end anonymous namespace.
-
-void DimOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                        MLIRContext *context) {
-  results.insert<DimOfMemRefReshape, DimOfCastOp<TensorToMemrefOp>,
-                 DimOfCastOp<tensor::CastOp>>(context);
-}
-
 // ---------------------------------------------------------------------------
 // DivFOp
 // ---------------------------------------------------------------------------
@@ -1572,256 +1183,6 @@
       operands, [](APFloat a, APFloat b) { return a / b; });
 }
 
-// ---------------------------------------------------------------------------
-// DmaStartOp
-// ---------------------------------------------------------------------------
-
-void DmaStartOp::build(OpBuilder &builder, OperationState &result,
-                       Value srcMemRef, ValueRange srcIndices, Value destMemRef,
-                       ValueRange destIndices, Value numElements,
-                       Value tagMemRef, ValueRange tagIndices, Value stride,
-                       Value elementsPerStride) {
-  result.addOperands(srcMemRef);
-  result.addOperands(srcIndices);
-  result.addOperands(destMemRef);
-  result.addOperands(destIndices);
-  result.addOperands({numElements, tagMemRef});
-  result.addOperands(tagIndices);
-  if (stride)
-    result.addOperands({stride, elementsPerStride});
-}
-
-void DmaStartOp::print(OpAsmPrinter &p) {
-  p << "dma_start " << getSrcMemRef() << '[' << getSrcIndices() << "], "
-    << getDstMemRef() << '[' << getDstIndices() << "], " << getNumElements()
-    << ", " << getTagMemRef() << '[' << getTagIndices() << ']';
-  if (isStrided())
-    p << ", " << getStride() << ", " << getNumElementsPerStride();
-
-  p.printOptionalAttrDict((*this)->getAttrs());
-  p << " : " << getSrcMemRef().getType() << ", " << getDstMemRef().getType()
-    << ", " << getTagMemRef().getType();
-}
-
-// Parse DmaStartOp.
-// Ex:
-//   %dma_id = dma_start %src[%i, %j], %dst[%k, %l], %size,
-//                       %tag[%index], %stride, %num_elt_per_stride :
-//                     : memref<3076 x f32, 0>,
-//                       memref<1024 x f32, 2>,
-//                       memref<1 x i32>
-//
-ParseResult DmaStartOp::parse(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType srcMemRefInfo;
-  SmallVector<OpAsmParser::OperandType, 4> srcIndexInfos;
-  OpAsmParser::OperandType dstMemRefInfo;
-  SmallVector<OpAsmParser::OperandType, 4> dstIndexInfos;
-  OpAsmParser::OperandType numElementsInfo;
-  OpAsmParser::OperandType tagMemrefInfo;
-  SmallVector<OpAsmParser::OperandType, 4> tagIndexInfos;
-  SmallVector<OpAsmParser::OperandType, 2> strideInfo;
-
-  SmallVector<Type, 3> types;
-  auto indexType = parser.getBuilder().getIndexType();
-
-  // Parse and resolve the following list of operands:
-  // *) source memref followed by its indices (in square brackets).
-  // *) destination memref followed by its indices (in square brackets).
-  // *) dma size in KiB.
-  if (parser.parseOperand(srcMemRefInfo) ||
-      parser.parseOperandList(srcIndexInfos, OpAsmParser::Delimiter::Square) ||
-      parser.parseComma() || parser.parseOperand(dstMemRefInfo) ||
-      parser.parseOperandList(dstIndexInfos, OpAsmParser::Delimiter::Square) ||
-      parser.parseComma() || parser.parseOperand(numElementsInfo) ||
-      parser.parseComma() || parser.parseOperand(tagMemrefInfo) ||
-      parser.parseOperandList(tagIndexInfos, OpAsmParser::Delimiter::Square))
-    return failure();
-
-  // Parse optional stride and elements per stride.
-  if (parser.parseTrailingOperandList(strideInfo))
-    return failure();
-
-  bool isStrided = strideInfo.size() == 2;
-  if (!strideInfo.empty() && !isStrided) {
-    return parser.emitError(parser.getNameLoc(),
-                            "expected two stride related operands");
-  }
-
-  if (parser.parseColonTypeList(types))
-    return failure();
-  if (types.size() != 3)
-    return parser.emitError(parser.getNameLoc(), "fewer/more types expected");
-
-  if (parser.resolveOperand(srcMemRefInfo, types[0], result.operands) ||
-      parser.resolveOperands(srcIndexInfos, indexType, result.operands) ||
-      parser.resolveOperand(dstMemRefInfo, types[1], result.operands) ||
-      parser.resolveOperands(dstIndexInfos, indexType, result.operands) ||
-      // size should be an index.
-      parser.resolveOperand(numElementsInfo, indexType, result.operands) ||
-      parser.resolveOperand(tagMemrefInfo, types[2], result.operands) ||
-      // tag indices should be index.
-      parser.resolveOperands(tagIndexInfos, indexType, result.operands))
-    return failure();
-
-  if (isStrided) {
-    if (parser.resolveOperands(strideInfo, indexType, result.operands))
-      return failure();
-  }
-
-  return success();
-}
-
-LogicalResult DmaStartOp::verify() {
-  unsigned numOperands = getNumOperands();
-
-  // Mandatory non-variadic operands are: src memref, dst memref, tag memref and
-  // the number of elements.
-  if (numOperands < 4)
-    return emitOpError("expected at least 4 operands");
-
-  // Check types of operands. The order of these calls is important: the later
-  // calls rely on some type properties to compute the operand position.
-  // 1. Source memref.
-  if (!getSrcMemRef().getType().isa<MemRefType>())
-    return emitOpError("expected source to be of memref type");
-  if (numOperands < getSrcMemRefRank() + 4)
-    return emitOpError() << "expected at least " << getSrcMemRefRank() + 4
-                         << " operands";
-  if (!getSrcIndices().empty() &&
-      !llvm::all_of(getSrcIndices().getTypes(),
-                    [](Type t) { return t.isIndex(); }))
-    return emitOpError("expected source indices to be of index type");
-
-  // 2. Destination memref.
-  if (!getDstMemRef().getType().isa<MemRefType>())
-    return emitOpError("expected destination to be of memref type");
-  unsigned numExpectedOperands = getSrcMemRefRank() + getDstMemRefRank() + 4;
-  if (numOperands < numExpectedOperands)
-    return emitOpError() << "expected at least " << numExpectedOperands
-                         << " operands";
-  if (!getDstIndices().empty() &&
-      !llvm::all_of(getDstIndices().getTypes(),
-                    [](Type t) { return t.isIndex(); }))
-    return emitOpError("expected destination indices to be of index type");
-
-  // 3. Number of elements.
-  if (!getNumElements().getType().isIndex())
-    return emitOpError("expected num elements to be of index type");
-
-  // 4. Tag memref.
-  if (!getTagMemRef().getType().isa<MemRefType>())
-    return emitOpError("expected tag to be of memref type");
-  numExpectedOperands += getTagMemRefRank();
-  if (numOperands < numExpectedOperands)
-    return emitOpError() << "expected at least " << numExpectedOperands
-                         << " operands";
-  if (!getTagIndices().empty() &&
-      !llvm::all_of(getTagIndices().getTypes(),
-                    [](Type t) { return t.isIndex(); }))
-    return emitOpError("expected tag indices to be of index type");
-
-  // DMAs from different memory spaces supported.
-  if (getSrcMemorySpace() == getDstMemorySpace())
-    return emitOpError("DMA should be between different memory spaces");
-
-  // Optional stride-related operands must be either both present or both
-  // absent.
-  if (numOperands != numExpectedOperands &&
-      numOperands != numExpectedOperands + 2)
-    return emitOpError("incorrect number of operands");
-
-  // 5. Strides.
-  if (isStrided()) {
-    if (!getStride().getType().isIndex() ||
-        !getNumElementsPerStride().getType().isIndex())
-      return emitOpError(
-          "expected stride and num elements per stride to be of type index");
-  }
-
-  return success();
-}
-
-LogicalResult DmaStartOp::fold(ArrayRef<Attribute> cstOperands,
-                               SmallVectorImpl<OpFoldResult> &results) {
-  /// dma_start(memrefcast) -> dma_start
-  return foldMemRefCast(*this);
-}
-
-// ---------------------------------------------------------------------------
-// DmaWaitOp
-// ---------------------------------------------------------------------------
-
-void DmaWaitOp::build(OpBuilder &builder, OperationState &result,
-                      Value tagMemRef, ValueRange tagIndices,
-                      Value numElements) {
-  result.addOperands(tagMemRef);
-  result.addOperands(tagIndices);
-  result.addOperands(numElements);
-}
-
-void DmaWaitOp::print(OpAsmPrinter &p) {
-  p << "dma_wait " << getTagMemRef() << '[' << getTagIndices() << "], "
-    << getNumElements();
-  p.printOptionalAttrDict((*this)->getAttrs());
-  p << " : " << getTagMemRef().getType();
-}
-
-// Parse DmaWaitOp.
-// Eg:
-//   dma_wait %tag[%index], %num_elements : memref<1 x i32, (d0) -> (d0), 4>
-//
-ParseResult DmaWaitOp::parse(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType tagMemrefInfo;
-  SmallVector<OpAsmParser::OperandType, 2> tagIndexInfos;
-  Type type;
-  auto indexType = parser.getBuilder().getIndexType();
-  OpAsmParser::OperandType numElementsInfo;
-
-  // Parse tag memref, its indices, and dma size.
-  if (parser.parseOperand(tagMemrefInfo) ||
-      parser.parseOperandList(tagIndexInfos, OpAsmParser::Delimiter::Square) ||
-      parser.parseComma() || parser.parseOperand(numElementsInfo) ||
-      parser.parseColonType(type) ||
-      parser.resolveOperand(tagMemrefInfo, type, result.operands) ||
-      parser.resolveOperands(tagIndexInfos, indexType, result.operands) ||
-      parser.resolveOperand(numElementsInfo, indexType, result.operands))
-    return failure();
-
-  return success();
-}
-
-LogicalResult DmaWaitOp::fold(ArrayRef<Attribute> cstOperands,
-                              SmallVectorImpl<OpFoldResult> &results) {
-  /// dma_wait(memrefcast) -> dma_wait
-  return foldMemRefCast(*this);
-}
-
-LogicalResult DmaWaitOp::verify() {
-  // Mandatory non-variadic operands are tag and the number of elements.
-  if (getNumOperands() < 2)
-    return emitOpError() << "expected at least 2 operands";
-
-  // Check types of operands. The order of these calls is important: the later
-  // calls rely on some type properties to compute the operand position.
-  if (!getTagMemRef().getType().isa<MemRefType>())
-    return emitOpError() << "expected tag to be of memref type";
-
-  if (getNumOperands() != 2 + getTagMemRefRank())
-    return emitOpError() << "expected " << 2 + getTagMemRefRank()
-                         << " operands";
-
-  if (!getTagIndices().empty() &&
-      !llvm::all_of(getTagIndices().getTypes(),
-                    [](Type t) { return t.isIndex(); }))
-    return emitOpError() << "expected tag indices to be of index type";
-
-  if (!getNumElements().getType().isIndex())
-    return emitOpError()
-           << "expected the number of elements to be of index type";
-
-  return success();
-}
-
 //===----------------------------------------------------------------------===//
 // FPExtOp
 //===----------------------------------------------------------------------===//
@@ -1876,106 +1237,6 @@
   return areVectorCastSimpleCompatible(a, b, areCastCompatible);
 }
 
-//===----------------------------------------------------------------------===//
-// GlobalMemrefOp
-//===----------------------------------------------------------------------===//
-
-static void printGlobalMemrefOpTypeAndInitialValue(OpAsmPrinter &p,
-                                                   GlobalMemrefOp op,
-                                                   TypeAttr type,
-                                                   Attribute initialValue) {
-  p << type;
-  if (!op.isExternal()) {
-    p << " = ";
-    if (op.isUninitialized())
-      p << "uninitialized";
-    else
-      p.printAttributeWithoutType(initialValue);
-  }
-}
-
-static ParseResult
-parseGlobalMemrefOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr,
-                                       Attribute &initialValue) {
-  Type type;
-  if (parser.parseType(type))
-    return failure();
-
-  auto memrefType = type.dyn_cast<MemRefType>();
-  if (!memrefType || !memrefType.hasStaticShape())
-    return parser.emitError(parser.getNameLoc())
-           << "type should be static shaped memref, but got " << type;
-  typeAttr = TypeAttr::get(type);
-
-  if (parser.parseOptionalEqual())
-    return success();
-
-  if (succeeded(parser.parseOptionalKeyword("uninitialized"))) {
-    initialValue = UnitAttr::get(parser.getBuilder().getContext());
-    return success();
-  }
-
-  Type tensorType = getTensorTypeFromMemRefType(memrefType);
-  if (parser.parseAttribute(initialValue, tensorType))
-    return failure();
-  if (!initialValue.isa<ElementsAttr>())
-    return parser.emitError(parser.getNameLoc())
-           << "initial value should be a unit or elements attribute";
-  return success();
-}
-
-static LogicalResult verify(GlobalMemrefOp op) {
-  auto memrefType = op.type().dyn_cast<MemRefType>();
-  if (!memrefType || !memrefType.hasStaticShape())
-    return op.emitOpError("type should be static shaped memref, but got ")
-           << op.type();
-
-  // Verify that the initial value, if present, is either a unit attribute or
-  // an elements attribute.
-  if (op.initial_value().hasValue()) {
-    Attribute initValue = op.initial_value().getValue();
-    if (!initValue.isa<UnitAttr>() && !initValue.isa<ElementsAttr>())
-      return op.emitOpError("initial value should be a unit or elements "
-                            "attribute, but got ")
-             << initValue;
-
-    // Check that the type of the initial value is compatible with the type of
-    // the global variable.
-    if (initValue.isa<ElementsAttr>()) {
-      Type initType = initValue.getType();
-      Type tensorType = getTensorTypeFromMemRefType(memrefType);
-      if (initType != tensorType)
-        return op.emitOpError("initial value expected to be of type ")
-               << tensorType << ", but was of type " << initType;
-    }
-  }
-
-  // TODO: verify visibility for declarations.
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// GetGlobalMemrefOp
-//===----------------------------------------------------------------------===//
-
-LogicalResult
-GetGlobalMemrefOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
-  // Verify that the result type is same as the type of the referenced
-  // global_memref op.
-  auto global =
-      symbolTable.lookupNearestSymbolFrom<GlobalMemrefOp>(*this, nameAttr());
-  if (!global)
-    return emitOpError("'")
-           << name() << "' does not reference a valid global memref";
-
-  Type resultType = result().getType();
-  if (global.type() != resultType)
-    return emitOpError("result type ")
-           << resultType << " does not match type " << global.type()
-           << " of the global memref @" << name();
-  return success();
-}
-
 //===----------------------------------------------------------------------===//
 // IndexCastOp
 //===----------------------------------------------------------------------===//
@@ -2014,288 +1275,16 @@
 }
 
 //===----------------------------------------------------------------------===//
-// LoadOp
+// MulFOp
 //===----------------------------------------------------------------------===//
 
-static LogicalResult verify(LoadOp op) {
-  if (op.getNumOperands() != 1 + op.getMemRefType().getRank())
-    return op.emitOpError("incorrect number of indices for load");
-  return success();
-}
-
-OpFoldResult LoadOp::fold(ArrayRef<Attribute> cstOperands) {
-  /// load(memrefcast) -> load
-  if (succeeded(foldMemRefCast(*this)))
-    return getResult();
-  return OpFoldResult();
-}
-
-namespace {
-/// Fold a load on a tensor_to_memref operation into an tensor.extract on the
-/// corresponding tensor.
-struct LoadOfTensorToMemref : public OpRewritePattern<LoadOp> {
-  using OpRewritePattern<LoadOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(LoadOp load,
-                                PatternRewriter &rewriter) const override {
-    auto tensorToMemref = load.memref().getDefiningOp<TensorToMemrefOp>();
-    if (!tensorToMemref)
-      return failure();
-
-    rewriter.replaceOpWithNewOp<tensor::ExtractOp>(
-        load, tensorToMemref.tensor(), load.indices());
-    return success();
-  }
-};
-} // end anonymous namespace.
-
-void LoadOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                         MLIRContext *context) {
-  results.insert<LoadOfTensorToMemref>(context);
+OpFoldResult MulFOp::fold(ArrayRef<Attribute> operands) {
+  return constFoldBinaryOp<FloatAttr>(
+      operands, [](APFloat a, APFloat b) { return a * b; });
 }
 
 //===----------------------------------------------------------------------===//
-// MemRefCastOp
-//===----------------------------------------------------------------------===//
-
-Value MemRefCastOp::getViewSource() { return source(); }
-
-bool MemRefCastOp::areCastCompatible(TypeRange inputs, TypeRange outputs) {
-  if (inputs.size() != 1 || outputs.size() != 1)
-    return false;
-  Type a = inputs.front(), b = outputs.front();
-  auto aT = a.dyn_cast<MemRefType>();
-  auto bT = b.dyn_cast<MemRefType>();
-
-  auto uaT = a.dyn_cast<UnrankedMemRefType>();
-  auto ubT = b.dyn_cast<UnrankedMemRefType>();
-
-  if (aT && bT) {
-    if (aT.getElementType() != bT.getElementType())
-      return false;
-    if (aT.getAffineMaps() != bT.getAffineMaps()) {
-      int64_t aOffset, bOffset;
-      SmallVector<int64_t, 4> aStrides, bStrides;
-      if (failed(getStridesAndOffset(aT, aStrides, aOffset)) ||
-          failed(getStridesAndOffset(bT, bStrides, bOffset)) ||
-          aStrides.size() != bStrides.size())
-        return false;
-
-      // Strides along a dimension/offset are compatible if the value in the
-      // source memref is static and the value in the target memref is the
-      // same. They are also compatible if either one is dynamic (see
-      // description of MemRefCastOp for details).
-      auto checkCompatible = [](int64_t a, int64_t b) {
-        return (a == MemRefType::getDynamicStrideOrOffset() ||
-                b == MemRefType::getDynamicStrideOrOffset() || a == b);
-      };
-      if (!checkCompatible(aOffset, bOffset))
-        return false;
-      for (auto aStride : enumerate(aStrides))
-        if (!checkCompatible(aStride.value(), bStrides[aStride.index()]))
-          return false;
-    }
-    if (aT.getMemorySpaceAsInt() != bT.getMemorySpaceAsInt())
-      return false;
-
-    // They must have the same rank, and any specified dimensions must match.
-    if (aT.getRank() != bT.getRank())
-      return false;
-
-    for (unsigned i = 0, e = aT.getRank(); i != e; ++i) {
-      int64_t aDim = aT.getDimSize(i), bDim = bT.getDimSize(i);
-      if (aDim != -1 && bDim != -1 && aDim != bDim)
-        return false;
-    }
-    return true;
-  } else {
-    if (!aT && !uaT)
-      return false;
-    if (!bT && !ubT)
-      return false;
-    // Unranked to unranked casting is unsupported
-    if (uaT && ubT)
-      return false;
-
-    auto aEltType = (aT) ? aT.getElementType() : uaT.getElementType();
-    auto bEltType = (bT) ? bT.getElementType() : ubT.getElementType();
-    if (aEltType != bEltType)
-      return false;
-
-    auto aMemSpace =
-        (aT) ? aT.getMemorySpaceAsInt() : uaT.getMemorySpaceAsInt();
-    auto bMemSpace =
-        (bT) ? bT.getMemorySpaceAsInt() : ubT.getMemorySpaceAsInt();
-    if (aMemSpace != bMemSpace)
-      return false;
-
-    return true;
-  }
-
-  return false;
-}
-
-OpFoldResult MemRefCastOp::fold(ArrayRef<Attribute> operands) {
-  return succeeded(foldMemRefCast(*this)) ? getResult() : Value();
-}
-
-//===----------------------------------------------------------------------===//
-// MemRefReinterpretCastOp
-//===----------------------------------------------------------------------===//
-
-/// Build a MemRefReinterpretCastOp with all dynamic entries: `staticOffsets`,
-/// `staticSizes` and `staticStrides` are automatically filled with
-/// source-memref-rank sentinel values that encode dynamic entries.
-void mlir::MemRefReinterpretCastOp::build(OpBuilder &b, OperationState &result,
-                                          MemRefType resultType, Value source,
-                                          OpFoldResult offset,
-                                          ArrayRef<OpFoldResult> sizes,
-                                          ArrayRef<OpFoldResult> strides,
-                                          ArrayRef<NamedAttribute> attrs) {
-  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
-  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
-  dispatchIndexOpFoldResults(offset, dynamicOffsets, staticOffsets,
-                             ShapedType::kDynamicStrideOrOffset);
-  dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes,
-                             ShapedType::kDynamicSize);
-  dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides,
-                             ShapedType::kDynamicStrideOrOffset);
-  build(b, result, resultType, source, dynamicOffsets, dynamicSizes,
-        dynamicStrides, b.getI64ArrayAttr(staticOffsets),
-        b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides));
-  result.addAttributes(attrs);
-}
-
-void mlir::MemRefReinterpretCastOp::build(OpBuilder &b, OperationState &result,
-                                          MemRefType resultType, Value source,
-                                          int64_t offset,
-                                          ArrayRef<int64_t> sizes,
-                                          ArrayRef<int64_t> strides,
-                                          ArrayRef<NamedAttribute> attrs) {
-  SmallVector<OpFoldResult> sizeValues =
-      llvm::to_vector<4>(llvm::map_range(sizes, [&](int64_t v) -> OpFoldResult {
-        return b.getI64IntegerAttr(v);
-      }));
-  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
-      llvm::map_range(strides, [&](int64_t v) -> OpFoldResult {
-        return b.getI64IntegerAttr(v);
-      }));
-  build(b, result, resultType, source, b.getI64IntegerAttr(offset), sizeValues,
-        strideValues, attrs);
-}
-
-void mlir::MemRefReinterpretCastOp::build(OpBuilder &b, OperationState &result,
-                                          MemRefType resultType, Value source,
-                                          Value offset, ValueRange sizes,
-                                          ValueRange strides,
-                                          ArrayRef<NamedAttribute> attrs) {
-  SmallVector<OpFoldResult> sizeValues = llvm::to_vector<4>(
-      llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));
-  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
-      llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));
-  build(b, result, resultType, source, offset, sizeValues, strideValues, attrs);
-}
-
-// TODO: ponder whether we want to allow missing trailing sizes/strides that are
-// completed automatically, like we have for subview and subtensor.
-static LogicalResult verify(MemRefReinterpretCastOp op) {
-  // The source and result memrefs should be in the same memory space.
-  auto srcType = op.source().getType().cast<BaseMemRefType>();
-  auto resultType = op.getType().cast<MemRefType>();
-  if (srcType.getMemorySpaceAsInt() != resultType.getMemorySpaceAsInt())
-    return op.emitError("different memory spaces specified for source type ")
-           << srcType << " and result memref type " << resultType;
-  if (srcType.getElementType() != resultType.getElementType())
-    return op.emitError("different element types specified for source type ")
-           << srcType << " and result memref type " << resultType;
-
-  // Match sizes in result memref type and in static_sizes attribute.
-  for (auto &en :
-       llvm::enumerate(llvm::zip(resultType.getShape(),
-                                 extractFromI64ArrayAttr(op.static_sizes())))) {
-    int64_t resultSize = std::get<0>(en.value());
-    int64_t expectedSize = std::get<1>(en.value());
-    if (resultSize != expectedSize)
-      return op.emitError("expected result type with size = ")
-             << expectedSize << " instead of " << resultSize
-             << " in dim = " << en.index();
-  }
-
-  // Match offset and strides in static_offset and static_strides attributes if
-  // result memref type has an affine map specified.
-  if (!resultType.getAffineMaps().empty()) {
-    int64_t resultOffset;
-    SmallVector<int64_t, 4> resultStrides;
-    if (failed(getStridesAndOffset(resultType, resultStrides, resultOffset)))
-      return failure();
-
-    // Match offset in result memref type and in static_offsets attribute.
-    int64_t expectedOffset =
-        extractFromI64ArrayAttr(op.static_offsets()).front();
-    if (resultOffset != expectedOffset)
-      return op.emitError("expected result type with offset = ")
-             << resultOffset << " instead of " << expectedOffset;
-
-    // Match strides in result memref type and in static_strides attribute.
-    for (auto &en : llvm::enumerate(llvm::zip(
-             resultStrides, extractFromI64ArrayAttr(op.static_strides())))) {
-      int64_t resultStride = std::get<0>(en.value());
-      int64_t expectedStride = std::get<1>(en.value());
-      if (resultStride != expectedStride)
-        return op.emitError("expected result type with stride = ")
-               << expectedStride << " instead of " << resultStride
-               << " in dim = " << en.index();
-    }
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// MemRefReshapeOp
-//===----------------------------------------------------------------------===//
-
-static LogicalResult verify(MemRefReshapeOp op) {
-  Type operandType = op.source().getType();
-  Type resultType = op.result().getType();
-
-  Type operandElementType = operandType.cast<ShapedType>().getElementType();
-  Type resultElementType = resultType.cast<ShapedType>().getElementType();
-  if (operandElementType != resultElementType)
-    return op.emitOpError("element types of source and destination memref "
-                          "types should be the same");
-
-  if (auto operandMemRefType = operandType.dyn_cast<MemRefType>())
-    if (!operandMemRefType.getAffineMaps().empty())
-      return op.emitOpError(
-          "source memref type should have identity affine map");
-
-  int64_t shapeSize = op.shape().getType().cast<MemRefType>().getDimSize(0);
-  auto resultMemRefType = resultType.dyn_cast<MemRefType>();
-  if (resultMemRefType) {
-    if (!resultMemRefType.getAffineMaps().empty())
-      return op.emitOpError(
-          "result memref type should have identity affine map");
-    if (shapeSize == ShapedType::kDynamicSize)
-      return op.emitOpError("cannot use shape operand with dynamic length to "
-                            "reshape to statically-ranked memref type");
-    if (shapeSize != resultMemRefType.getRank())
-      return op.emitOpError(
-          "length of shape operand differs from the result's memref rank");
-  }
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// MulFOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult MulFOp::fold(ArrayRef<Attribute> operands) {
-  return constFoldBinaryOp<FloatAttr>(
-      operands, [](APFloat a, APFloat b) { return a * b; });
-}
-
-//===----------------------------------------------------------------------===//
-// MulIOp
+// MulIOp
 //===----------------------------------------------------------------------===//
 
 OpFoldResult MulIOp::fold(ArrayRef<Attribute> operands) {
@@ -2327,76 +1316,6 @@
                                         [](APInt a, APInt b) { return a | b; });
 }
 
-//===----------------------------------------------------------------------===//
-// PrefetchOp
-//===----------------------------------------------------------------------===//
-
-static void print(OpAsmPrinter &p, PrefetchOp op) {
-  p << PrefetchOp::getOperationName() << " " << op.memref() << '[';
-  p.printOperands(op.indices());
-  p << ']' << ", " << (op.isWrite() ? "write" : "read");
-  p << ", locality<" << op.localityHint();
-  p << ">, " << (op.isDataCache() ? "data" : "instr");
-  p.printOptionalAttrDict(
-      op->getAttrs(),
-      /*elidedAttrs=*/{"localityHint", "isWrite", "isDataCache"});
-  p << " : " << op.getMemRefType();
-}
-
-static ParseResult parsePrefetchOp(OpAsmParser &parser,
-                                   OperationState &result) {
-  OpAsmParser::OperandType memrefInfo;
-  SmallVector<OpAsmParser::OperandType, 4> indexInfo;
-  IntegerAttr localityHint;
-  MemRefType type;
-  StringRef readOrWrite, cacheType;
-
-  auto indexTy = parser.getBuilder().getIndexType();
-  auto i32Type = parser.getBuilder().getIntegerType(32);
-  if (parser.parseOperand(memrefInfo) ||
-      parser.parseOperandList(indexInfo, OpAsmParser::Delimiter::Square) ||
-      parser.parseComma() || parser.parseKeyword(&readOrWrite) ||
-      parser.parseComma() || parser.parseKeyword("locality") ||
-      parser.parseLess() ||
-      parser.parseAttribute(localityHint, i32Type, "localityHint",
-                            result.attributes) ||
-      parser.parseGreater() || parser.parseComma() ||
-      parser.parseKeyword(&cacheType) || parser.parseColonType(type) ||
-      parser.resolveOperand(memrefInfo, type, result.operands) ||
-      parser.resolveOperands(indexInfo, indexTy, result.operands))
-    return failure();
-
-  if (!readOrWrite.equals("read") && !readOrWrite.equals("write"))
-    return parser.emitError(parser.getNameLoc(),
-                            "rw specifier has to be 'read' or 'write'");
-  result.addAttribute(
-      PrefetchOp::getIsWriteAttrName(),
-      parser.getBuilder().getBoolAttr(readOrWrite.equals("write")));
-
-  if (!cacheType.equals("data") && !cacheType.equals("instr"))
-    return parser.emitError(parser.getNameLoc(),
-                            "cache type has to be 'data' or 'instr'");
-
-  result.addAttribute(
-      PrefetchOp::getIsDataCacheAttrName(),
-      parser.getBuilder().getBoolAttr(cacheType.equals("data")));
-
-  return success();
-}
-
-static LogicalResult verify(PrefetchOp op) {
-  if (op.getNumOperands() != 1 + op.getMemRefType().getRank())
-    return op.emitOpError("too few indices");
-
-  return success();
-}
-
-LogicalResult PrefetchOp::fold(ArrayRef<Attribute> cstOperands,
-                               SmallVectorImpl<OpFoldResult> &results) {
-  // prefetch(memrefcast) -> prefetch
-  return foldMemRefCast(*this);
-}
-
 //===----------------------------------------------------------------------===//
 // RankOp
 //===----------------------------------------------------------------------===//
@@ -2735,23 +1654,6 @@
   return SplatElementsAttr::get(shapedType, {constOperand});
 }
 
-//===----------------------------------------------------------------------===//
-// StoreOp
-//===----------------------------------------------------------------------===//
-
-static LogicalResult verify(StoreOp op) {
-  if (op.getNumOperands() != 2 + op.getMemRefType().getRank())
-    return op.emitOpError("store index operand count not equal to memref rank");
-
-  return success();
-}
-
-LogicalResult StoreOp::fold(ArrayRef<Attribute> cstOperands,
-                            SmallVectorImpl<OpFoldResult> &results) {
-  /// store(memrefcast) -> store
-  return foldMemRefCast(*this);
-}
-
 //===----------------------------------------------------------------------===//
 // SubFOp
 //===----------------------------------------------------------------------===//
@@ -2792,97 +1694,33 @@
 }
 
 //===----------------------------------------------------------------------===//
-// SubViewOp
+// SubTensorOp
 //===----------------------------------------------------------------------===//
 
-namespace {
-/// Helpers to write more idiomatic operations.
-namespace saturated_arith {
-struct Wrapper {
-  explicit Wrapper(int64_t v) : v(v) {}
-  operator int64_t() { return v; }
-  int64_t v;
-};
-Wrapper operator+(Wrapper a, int64_t b) {
-  if (ShapedType::isDynamicStrideOrOffset(a) ||
-      ShapedType::isDynamicStrideOrOffset(b))
-    return Wrapper(ShapedType::kDynamicStrideOrOffset);
-  return Wrapper(a.v + b);
-}
-Wrapper operator*(Wrapper a, int64_t b) {
-  if (ShapedType::isDynamicStrideOrOffset(a) ||
-      ShapedType::isDynamicStrideOrOffset(b))
-    return Wrapper(ShapedType::kDynamicStrideOrOffset);
-  return Wrapper(a.v * b);
-}
-} // end namespace saturated_arith
-} // end namespace
-
-/// A subview result type can be fully inferred from the source type and the
+/// A subtensor result type can be fully inferred from the source type and the
 /// static representation of offsets, sizes and strides. Special sentinels
 /// encode the dynamic case.
-Type SubViewOp::inferResultType(MemRefType sourceMemRefType,
-                                ArrayRef<int64_t> leadingStaticOffsets,
-                                ArrayRef<int64_t> leadingStaticSizes,
-                                ArrayRef<int64_t> leadingStaticStrides) {
-  // A subview may specify only a leading subset of offset/sizes/strides in
+Type SubTensorOp::inferResultType(RankedTensorType sourceRankedTensorType,
+                                  ArrayRef<int64_t> leadingStaticOffsets,
+                                  ArrayRef<int64_t> leadingStaticSizes,
+                                  ArrayRef<int64_t> leadingStaticStrides) {
+  // A subtensor may specify only a leading subset of offset/sizes/strides in
   // which case we complete with offset=0, sizes from memref type and strides=1.
-  unsigned rank = sourceMemRefType.getRank();
-  assert(leadingStaticOffsets.size() <= rank &&
-         "unexpected leadingStaticOffsets overflow");
+  unsigned rank = sourceRankedTensorType.getRank();
   assert(leadingStaticSizes.size() <= rank &&
          "unexpected leadingStaticSizes overflow");
-  assert(leadingStaticStrides.size() <= rank &&
-         "unexpected leadingStaticStrides overflow");
-  auto staticOffsets = llvm::to_vector<4>(leadingStaticOffsets);
   auto staticSizes = llvm::to_vector<4>(leadingStaticSizes);
-  auto staticStrides = llvm::to_vector<4>(leadingStaticStrides);
-  unsigned numTrailingOffsets = rank - staticOffsets.size();
   unsigned numTrailingSizes = rank - staticSizes.size();
-  unsigned numTrailingStrides = rank - staticStrides.size();
-  staticOffsets.append(numTrailingOffsets, 0);
-  llvm::append_range(staticSizes,
-                     sourceMemRefType.getShape().take_back(numTrailingSizes));
-  staticStrides.append(numTrailingStrides, 1);
-
-  // Extract source offset and strides.
-  int64_t sourceOffset;
-  SmallVector<int64_t, 4> sourceStrides;
-  auto res = getStridesAndOffset(sourceMemRefType, sourceStrides, sourceOffset);
-  assert(succeeded(res) && "SubViewOp expected strided memref type");
-  (void)res;
-
-  // Compute target offset whose value is:
-  //   `sourceOffset + sum_i(staticOffset_i * sourceStrides_i)`.
-  int64_t targetOffset = sourceOffset;
-  for (auto it : llvm::zip(staticOffsets, sourceStrides)) {
-    auto staticOffset = std::get<0>(it), targetStride = std::get<1>(it);
-    using namespace saturated_arith;
-    targetOffset = Wrapper(targetOffset) + Wrapper(staticOffset) * targetStride;
-  }
-
-  // Compute target stride whose value is:
-  //   `sourceStrides_i * staticStrides_i`.
-  SmallVector<int64_t, 4> targetStrides;
-  targetStrides.reserve(staticOffsets.size());
-  for (auto it : llvm::zip(sourceStrides, staticStrides)) {
-    auto sourceStride = std::get<0>(it), staticStride = std::get<1>(it);
-    using namespace saturated_arith;
-    targetStrides.push_back(Wrapper(sourceStride) * staticStride);
-  }
-
-  // The type is now known.
-  return MemRefType::get(
-      staticSizes, sourceMemRefType.getElementType(),
-      makeStridedLinearLayoutMap(targetStrides, targetOffset,
-                                 sourceMemRefType.getContext()),
-      sourceMemRefType.getMemorySpaceAsInt());
+  llvm::append_range(staticSizes, sourceRankedTensorType.getShape().take_back(
+                                      numTrailingSizes));
+  return RankedTensorType::get(staticSizes,
+                               sourceRankedTensorType.getElementType());
 }
 
-Type SubViewOp::inferResultType(MemRefType sourceMemRefType,
-                                ArrayRef<OpFoldResult> leadingStaticOffsets,
-                                ArrayRef<OpFoldResult> leadingStaticSizes,
-                                ArrayRef<OpFoldResult> leadingStaticStrides) {
+Type SubTensorOp::inferResultType(RankedTensorType sourceRankedTensorType,
+                                  ArrayRef<OpFoldResult> leadingStaticOffsets,
+                                  ArrayRef<OpFoldResult> leadingStaticSizes,
+                                  ArrayRef<OpFoldResult> leadingStaticStrides) {
   SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
   SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
   dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets,
@@ -2891,55 +1729,39 @@
                              ShapedType::kDynamicSize);
   dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides,
                              staticStrides, ShapedType::kDynamicStrideOrOffset);
-  return SubViewOp::inferResultType(sourceMemRefType, staticOffsets,
-                                    staticSizes, staticStrides);
-}
-
-static void
-getPositionsOfShapeOne(unsigned rank, ArrayRef<int64_t> shape,
-                       llvm::SmallDenseSet<unsigned> &dimsToProject) {
-  dimsToProject.reserve(rank);
-  for (unsigned pos = 0, e = shape.size(); pos < e && rank > 0; ++pos) {
-    if (shape[pos] == 1) {
-      dimsToProject.insert(pos);
-      --rank;
-    }
-  }
+  return SubTensorOp::inferResultType(sourceRankedTensorType, staticOffsets,
+                                      staticSizes, staticStrides);
 }
 
-Type SubViewOp::inferRankReducedResultType(
-    unsigned resultRank, MemRefType sourceRankedTensorType,
+/// A subtensor result type can be fully inferred from the source type and the
+/// static representation of offsets, sizes and strides. Special sentinels
+/// encode the dynamic case.
+Type SubTensorOp::inferRankReducedResultType(
+    unsigned resultRank, RankedTensorType sourceRankedTensorType,
     ArrayRef<int64_t> leadingStaticOffsets,
     ArrayRef<int64_t> leadingStaticSizes,
     ArrayRef<int64_t> leadingStaticStrides) {
   auto inferredType =
       inferResultType(sourceRankedTensorType, leadingStaticOffsets,
                       leadingStaticSizes, leadingStaticStrides)
-          .cast<MemRefType>();
-  assert(inferredType.getRank() >= resultRank && "expected ");
+          .cast<RankedTensorType>();
   int rankDiff = inferredType.getRank() - resultRank;
   if (rankDiff > 0) {
     auto shape = inferredType.getShape();
     llvm::SmallDenseSet<unsigned> dimsToProject;
-    getPositionsOfShapeOne(rankDiff, shape, dimsToProject);
+    mlir::getPositionsOfShapeOne(rankDiff, shape, dimsToProject);
     SmallVector<int64_t> projectedShape;
     for (unsigned pos = 0, e = shape.size(); pos < e; ++pos)
       if (!dimsToProject.contains(pos))
         projectedShape.push_back(shape[pos]);
-
-    AffineMap map;
-    auto maps = inferredType.getAffineMaps();
-    if (!maps.empty() && maps.front())
-      map = getProjectedMap(maps.front(), dimsToProject);
     inferredType =
-        MemRefType::get(projectedShape, inferredType.getElementType(), map,
-                        inferredType.getMemorySpaceAsInt());
+        RankedTensorType::get(projectedShape, inferredType.getElementType());
   }
   return inferredType;
 }
 
-Type SubViewOp::inferRankReducedResultType(
-    unsigned resultRank, MemRefType sourceRankedTensorType,
+Type SubTensorOp::inferRankReducedResultType(
+    unsigned resultRank, RankedTensorType sourceRankedTensorType,
     ArrayRef<OpFoldResult> leadingStaticOffsets,
     ArrayRef<OpFoldResult> leadingStaticSizes,
     ArrayRef<OpFoldResult> leadingStaticStrides) {
@@ -2951,19 +1773,19 @@
                              ShapedType::kDynamicSize);
   dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides,
                              staticStrides, ShapedType::kDynamicStrideOrOffset);
-  return SubViewOp::inferRankReducedResultType(
+  return SubTensorOp::inferRankReducedResultType(
       resultRank, sourceRankedTensorType, staticOffsets, staticSizes,
       staticStrides);
 }
 
-// Build a SubViewOp with mixed static and dynamic entries and custom result
+// Build a SubTensorOp with mixed static and dynamic entries and custom result
 // type. If the type passed is nullptr, it is inferred.
-void mlir::SubViewOp::build(OpBuilder &b, OperationState &result,
-                            MemRefType resultType, Value source,
-                            ArrayRef<OpFoldResult> offsets,
-                            ArrayRef<OpFoldResult> sizes,
-                            ArrayRef<OpFoldResult> strides,
-                            ArrayRef<NamedAttribute> attrs) {
+void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result,
+                              RankedTensorType resultType, Value source,
+                              ArrayRef<OpFoldResult> offsets,
+                              ArrayRef<OpFoldResult> sizes,
+                              ArrayRef<OpFoldResult> strides,
+                              ArrayRef<NamedAttribute> attrs) {
   SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
   SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
   dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets,
@@ -2972,12 +1794,13 @@
                              ShapedType::kDynamicSize);
   dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides,
                              ShapedType::kDynamicStrideOrOffset);
-  auto sourceMemRefType = source.getType().cast<MemRefType>();
+  auto sourceRankedTensorType = source.getType().cast<RankedTensorType>();
   // Structuring implementation this way avoids duplication between builders.
   if (!resultType) {
-    resultType = SubViewOp::inferResultType(sourceMemRefType, staticOffsets,
-                                            staticSizes, staticStrides)
-                     .cast<MemRefType>();
+    resultType =
+        SubTensorOp::inferResultType(sourceRankedTensorType, staticOffsets,
+                                     staticSizes, staticStrides)
+            .cast<RankedTensorType>();
   }
   build(b, result, resultType, source, dynamicOffsets, dynamicSizes,
         dynamicStrides, b.getI64ArrayAttr(staticOffsets),
@@ -2985,66 +1808,23 @@
   result.addAttributes(attrs);
 }
 
-// Build a SubViewOp with mixed static and dynamic entries and inferred result
+// Build a SubTensorOp with mixed static and dynamic entries and inferred result
 // type.
-void mlir::SubViewOp::build(OpBuilder &b, OperationState &result, Value source,
-                            ArrayRef<OpFoldResult> offsets,
-                            ArrayRef<OpFoldResult> sizes,
-                            ArrayRef<OpFoldResult> strides,
-                            ArrayRef<NamedAttribute> attrs) {
-  build(b, result, MemRefType(), source, offsets, sizes, strides, attrs);
-}
-
-// Build a SubViewOp with static entries and inferred result type.
-void mlir::SubViewOp::build(OpBuilder &b, OperationState &result, Value source,
-                            ArrayRef<int64_t> offsets, ArrayRef<int64_t> sizes,
-                            ArrayRef<int64_t> strides,
-                            ArrayRef<NamedAttribute> attrs) {
-  SmallVector<OpFoldResult> offsetValues = llvm::to_vector<4>(
-      llvm::map_range(offsets, [&](int64_t v) -> OpFoldResult {
-        return b.getI64IntegerAttr(v);
-      }));
-  SmallVector<OpFoldResult> sizeValues =
-      llvm::to_vector<4>(llvm::map_range(sizes, [&](int64_t v) -> OpFoldResult {
-        return b.getI64IntegerAttr(v);
-      }));
-  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
-      llvm::map_range(strides, [&](int64_t v) -> OpFoldResult {
-        return b.getI64IntegerAttr(v);
-      }));
-  build(b, result, source, offsetValues, sizeValues, strideValues, attrs);
-}
-
-// Build a SubViewOp with dynamic entries and custom result type. If the
-// type passed is nullptr, it is inferred.
-void mlir::SubViewOp::build(OpBuilder &b, OperationState &result,
-                            MemRefType resultType, Value source,
-                            ArrayRef<int64_t> offsets, ArrayRef<int64_t> sizes,
-                            ArrayRef<int64_t> strides,
-                            ArrayRef<NamedAttribute> attrs) {
-  SmallVector<OpFoldResult> offsetValues = llvm::to_vector<4>(
-      llvm::map_range(offsets, [&](int64_t v) -> OpFoldResult {
-        return b.getI64IntegerAttr(v);
-      }));
-  SmallVector<OpFoldResult> sizeValues =
-      llvm::to_vector<4>(llvm::map_range(sizes, [&](int64_t v) -> OpFoldResult {
-        return b.getI64IntegerAttr(v);
-      }));
-  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
-      llvm::map_range(strides, [&](int64_t v) -> OpFoldResult {
-        return b.getI64IntegerAttr(v);
-      }));
-  build(b, result, resultType, source, offsetValues, sizeValues, strideValues,
-        attrs);
+void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result,
+                              Value source, ArrayRef<OpFoldResult> offsets,
+                              ArrayRef<OpFoldResult> sizes,
+                              ArrayRef<OpFoldResult> strides,
+                              ArrayRef<NamedAttribute> attrs) {
+  build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
 }
 
-// Build a SubViewOp with dynamic entries and custom result type. If the type
+// Build a SubTensorOp with dynamic entries and custom result type. If the type
 // passed is nullptr, it is inferred.
-void mlir::SubViewOp::build(OpBuilder &b, OperationState &result,
-                            MemRefType resultType, Value source,
-                            ValueRange offsets, ValueRange sizes,
-                            ValueRange strides,
-                            ArrayRef<NamedAttribute> attrs) {
+void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result,
+                              RankedTensorType resultType, Value source,
+                              ValueRange offsets, ValueRange sizes,
+                              ValueRange strides,
+                              ArrayRef<NamedAttribute> attrs) {
   SmallVector<OpFoldResult> offsetValues = llvm::to_vector<4>(
       llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; }));
   SmallVector<OpFoldResult> sizeValues = llvm::to_vector<4>(
@@ -3054,75 +1834,34 @@
   build(b, result, resultType, source, offsetValues, sizeValues, strideValues);
 }
 
-// Build a SubViewOp with dynamic entries and inferred result type.
-void mlir::SubViewOp::build(OpBuilder &b, OperationState &result, Value source,
-                            ValueRange offsets, ValueRange sizes,
-                            ValueRange strides,
-                            ArrayRef<NamedAttribute> attrs) {
-  build(b, result, MemRefType(), source, offsets, sizes, strides, attrs);
-}
-
-/// For ViewLikeOpInterface.
-Value SubViewOp::getViewSource() { return source(); }
-
-/// Given an `originalShape` and a `reducedShape` assumed to be a subset of
-/// `originalShape` with some `1` entries erased, return the set of indices
-/// that specifies which of the entries of `originalShape` are dropped to obtain
-/// `reducedShape`. The returned mask can be applied as a projection to
-/// `originalShape` to obtain the `reducedShape`. This mask is useful to track
-/// which dimensions must be kept when e.g. compute MemRef strides under
-/// rank-reducing operations. Return None if reducedShape cannot be obtained
-/// by dropping only `1` entries in `originalShape`.
-llvm::Optional<llvm::SmallDenseSet<unsigned>>
-mlir::computeRankReductionMask(ArrayRef<int64_t> originalShape,
-                               ArrayRef<int64_t> reducedShape) {
-  size_t originalRank = originalShape.size(), reducedRank = reducedShape.size();
-  llvm::SmallDenseSet<unsigned> unusedDims;
-  unsigned reducedIdx = 0;
-  for (unsigned originalIdx = 0; originalIdx < originalRank; ++originalIdx) {
-    // Greedily insert `originalIdx` if no match.
-    if (reducedIdx < reducedRank &&
-        originalShape[originalIdx] == reducedShape[reducedIdx]) {
-      reducedIdx++;
-      continue;
-    }
-
-    unusedDims.insert(originalIdx);
-    // If no match on `originalIdx`, the `originalShape` at this dimension
-    // must be 1, otherwise we bail.
-    if (originalShape[originalIdx] != 1)
-      return llvm::None;
-  }
-  // The whole reducedShape must be scanned, otherwise we bail.
-  if (reducedIdx != reducedRank)
-    return llvm::None;
-  return unusedDims;
+// Build a SubTensorOp with dynamic entries and inferred result type.
+void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result,
+                              Value source, ValueRange offsets,
+                              ValueRange sizes, ValueRange strides,
+                              ArrayRef<NamedAttribute> attrs) {
+  build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
 }
 
-enum SubViewVerificationResult {
+enum SubTensorVerificationResult {
   Success,
   RankTooLarge,
   SizeMismatch,
   ElemTypeMismatch,
-  MemSpaceMismatch,
-  AffineMapMismatch
 };
 
 /// Checks if `original` Type type can be rank reduced to `reduced` type.
 /// This function is slight variant of `is subsequence` algorithm where
 /// not matching dimension must be 1.
-static SubViewVerificationResult
+static SubTensorVerificationResult
 isRankReducedType(Type originalType, Type candidateReducedType,
                   std::string *errMsg = nullptr) {
   if (originalType == candidateReducedType)
-    return SubViewVerificationResult::Success;
-  if (!originalType.isa<RankedTensorType>() && !originalType.isa<MemRefType>())
-    return SubViewVerificationResult::Success;
+    return SubTensorVerificationResult::Success;
+  if (!originalType.isa<RankedTensorType>())
+    return SubTensorVerificationResult::Success;
   if (originalType.isa<RankedTensorType>() &&
       !candidateReducedType.isa<RankedTensorType>())
-    return SubViewVerificationResult::Success;
-  if (originalType.isa<MemRefType>() && !candidateReducedType.isa<MemRefType>())
-    return SubViewVerificationResult::Success;
+    return SubTensorVerificationResult::Success;
 
   ShapedType originalShapedType = originalType.cast<ShapedType>();
   ShapedType candidateReducedShapedType =
@@ -3135,553 +1874,75 @@
   unsigned originalRank = originalShape.size(),
            candidateReducedRank = candidateReducedShape.size();
   if (candidateReducedRank > originalRank)
-    return SubViewVerificationResult::RankTooLarge;
+    return SubTensorVerificationResult::RankTooLarge;
 
   auto optionalUnusedDimsMask =
       computeRankReductionMask(originalShape, candidateReducedShape);
 
   // Sizes cannot be matched in case empty vector is returned.
   if (!optionalUnusedDimsMask.hasValue())
-    return SubViewVerificationResult::SizeMismatch;
+    return SubTensorVerificationResult::SizeMismatch;
 
   if (originalShapedType.getElementType() !=
       candidateReducedShapedType.getElementType())
-    return SubViewVerificationResult::ElemTypeMismatch;
+    return SubTensorVerificationResult::ElemTypeMismatch;
 
   // We are done for the tensor case.
   if (originalType.isa<RankedTensorType>())
-    return SubViewVerificationResult::Success;
-
-  // Strided layout logic is relevant for MemRefType only.
-  MemRefType original = originalType.cast<MemRefType>();
-  MemRefType candidateReduced = candidateReducedType.cast<MemRefType>();
-  if (original.getMemorySpaceAsInt() != candidateReduced.getMemorySpaceAsInt())
-    return SubViewVerificationResult::MemSpaceMismatch;
+    return SubTensorVerificationResult::Success;
 
-  llvm::SmallDenseSet<unsigned> unusedDims = optionalUnusedDimsMask.getValue();
-  auto inferredType =
-      getProjectedMap(getStridedLinearLayoutMap(original), unusedDims);
-  AffineMap candidateLayout;
-  if (candidateReduced.getAffineMaps().empty())
-    candidateLayout = getStridedLinearLayoutMap(candidateReduced);
-  else
-    candidateLayout = candidateReduced.getAffineMaps().front();
-  assert(inferredType.getNumResults() == 1 &&
-         candidateLayout.getNumResults() == 1);
-  if (inferredType.getNumSymbols() != candidateLayout.getNumSymbols() ||
-      inferredType.getNumDims() != candidateLayout.getNumDims()) {
-    if (errMsg) {
-      llvm::raw_string_ostream os(*errMsg);
-      os << "inferred type: " << inferredType;
-    }
-    return SubViewVerificationResult::AffineMapMismatch;
-  }
-  // Check that the difference of the affine maps simplifies to 0.
-  AffineExpr diffExpr =
-      inferredType.getResult(0) - candidateLayout.getResult(0);
-  diffExpr = simplifyAffineExpr(diffExpr, inferredType.getNumDims(),
-                                inferredType.getNumSymbols());
-  auto cst = diffExpr.dyn_cast<AffineConstantExpr>();
-  if (!(cst && cst.getValue() == 0)) {
-    if (errMsg) {
-      llvm::raw_string_ostream os(*errMsg);
-      os << "inferred type: " << inferredType;
-    }
-    return SubViewVerificationResult::AffineMapMismatch;
-  }
-  return SubViewVerificationResult::Success;
+  return SubTensorVerificationResult::Success;
 }
 
 template <typename OpTy>
-static LogicalResult produceSubViewErrorMsg(SubViewVerificationResult result,
-                                            OpTy op, Type expectedType,
-                                            StringRef errMsg = "") {
+static LogicalResult
+produceSubTensorErrorMsg(SubTensorVerificationResult result, OpTy op,
+                         Type expectedType, StringRef errMsg = "") {
   auto memrefType = expectedType.cast<ShapedType>();
   switch (result) {
-  case SubViewVerificationResult::Success:
+  case SubTensorVerificationResult::Success:
     return success();
-  case SubViewVerificationResult::RankTooLarge:
+  case SubTensorVerificationResult::RankTooLarge:
     return op.emitError("expected result rank to be smaller or equal to ")
            << "the source rank. " << errMsg;
-  case SubViewVerificationResult::SizeMismatch:
+  case SubTensorVerificationResult::SizeMismatch:
     return op.emitError("expected result type to be ")
            << expectedType
            << " or a rank-reduced version. (mismatch of result sizes) "
            << errMsg;
-  case SubViewVerificationResult::ElemTypeMismatch:
+  case SubTensorVerificationResult::ElemTypeMismatch:
     return op.emitError("expected result element type to be ")
            << memrefType.getElementType() << errMsg;
-  case SubViewVerificationResult::MemSpaceMismatch:
-    return op.emitError("expected result and source memory spaces to match.")
-           << errMsg;
-  case SubViewVerificationResult::AffineMapMismatch:
-    return op.emitError("expected result type to be ")
-           << expectedType
-           << " or a rank-reduced version. (mismatch of result affine map) "
-           << errMsg;
   }
-  llvm_unreachable("unexpected subview verification result");
+  llvm_unreachable("unexpected subtensor verification result");
 }
-
-/// Verifier for SubViewOp.
-static LogicalResult verify(SubViewOp op) {
-  MemRefType baseType = op.getSourceType();
-  MemRefType subViewType = op.getType();
-
-  // The base memref and the view memref should be in the same memory space.
-  if (baseType.getMemorySpaceAsInt() != subViewType.getMemorySpaceAsInt())
-    return op.emitError("different memory spaces specified for base memref "
-                        "type ")
-           << baseType << " and subview memref type " << subViewType;
-
-  // Verify that the base memref type has a strided layout map.
-  if (!isStrided(baseType))
-    return op.emitError("base type ") << baseType << " is not strided";
-
+/// Verifier for SubTensorOp.
+static LogicalResult verify(SubTensorOp op) {
   // Verify result type against inferred type.
-  auto expectedType = SubViewOp::inferResultType(
-      baseType, extractFromI64ArrayAttr(op.static_offsets()),
+  auto expectedType = SubTensorOp::inferResultType(
+      op.getSourceType(), extractFromI64ArrayAttr(op.static_offsets()),
       extractFromI64ArrayAttr(op.static_sizes()),
       extractFromI64ArrayAttr(op.static_strides()));
-
-  std::string errMsg;
-  auto result = isRankReducedType(expectedType, subViewType, &errMsg);
-  return produceSubViewErrorMsg(result, op, expectedType, errMsg);
-}
-
-raw_ostream &mlir::operator<<(raw_ostream &os, Range &range) {
-  return os << "range " << range.offset << ":" << range.size << ":"
-            << range.stride;
-}
-
-/// Return the list of Range (i.e. offset, size, stride). Each Range
-/// entry contains either the dynamic value or a ConstantIndexOp constructed
-/// with `b` at location `loc`.
-SmallVector<Range, 8> mlir::getOrCreateRanges(OffsetSizeAndStrideOpInterface op,
-                                              OpBuilder &b, Location loc) {
-  std::array<unsigned, 3> ranks = op.getArrayAttrMaxRanks();
-  assert(ranks[0] == ranks[1] && "expected offset and sizes of equal ranks");
-  assert(ranks[1] == ranks[2] && "expected sizes and strides of equal ranks");
-  SmallVector<Range, 8> res;
-  unsigned rank = ranks[0];
-  res.reserve(rank);
-  for (unsigned idx = 0; idx < rank; ++idx) {
-    Value offset =
-        op.isDynamicOffset(idx)
-            ? op.getDynamicOffset(idx)
-            : b.create<ConstantIndexOp>(loc, op.getStaticOffset(idx));
-    Value size = op.isDynamicSize(idx)
-                     ? op.getDynamicSize(idx)
-                     : b.create<ConstantIndexOp>(loc, op.getStaticSize(idx));
-    Value stride =
-        op.isDynamicStride(idx)
-            ? op.getDynamicStride(idx)
-            : b.create<ConstantIndexOp>(loc, op.getStaticStride(idx));
-    res.emplace_back(Range{offset, size, stride});
-  }
-  return res;
+  auto result = isRankReducedType(expectedType, op.getType());
+  return produceSubTensorErrorMsg(result, op, expectedType);
 }
 
 namespace {
-
-/// Detects the `values` produced by a ConstantIndexOp and places the new
-/// constant in place of the corresponding sentinel value.
-void canonicalizeSubViewPart(SmallVectorImpl<OpFoldResult> &values,
-                             llvm::function_ref<bool(int64_t)> isDynamic) {
-  for (OpFoldResult &ofr : values) {
-    if (ofr.is<Attribute>())
-      continue;
-    // Newly static, move from Value to constant.
-    if (auto cstOp = ofr.dyn_cast<Value>().getDefiningOp<ConstantIndexOp>())
-      ofr = OpBuilder(cstOp).getIndexAttr(cstOp.getValue());
-  }
-}
-
-static void replaceWithNewOp(PatternRewriter &rewriter, SubViewOp op,
-                             SubViewOp newOp) {
-  rewriter.replaceOpWithNewOp<MemRefCastOp>(op, newOp, op.getType());
-}
-
-static void replaceWithNewOp(PatternRewriter &rewriter, SubTensorOp op,
-                             SubTensorOp newOp) {
-  Value replacement = newOp.getResult();
-  if (replacement.getType() != op.getType())
-    replacement =
-        rewriter.create<tensor::CastOp>(op.getLoc(), op.getType(), replacement);
-  rewriter.replaceOp(op, replacement);
-}
-
-/// Pattern to rewrite a subview op with constant arguments.
-template <typename OpType>
-class OpWithOffsetSizesAndStridesConstantArgumentFolder final
-    : public OpRewritePattern<OpType> {
-public:
-  using OpRewritePattern<OpType>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(OpType op,
-                                PatternRewriter &rewriter) const override {
-    // No constant operand, just return;
-    if (llvm::none_of(op.getOperands(), [](Value operand) {
-          return matchPattern(operand, m_ConstantIndex());
-        }))
-      return failure();
-
-    // At least one of offsets/sizes/strides is a new constant.
-    // Form the new list of operands and constant attributes from the existing.
-    SmallVector<OpFoldResult> mixedOffsets(op.getMixedOffsets());
-    SmallVector<OpFoldResult> mixedSizes(op.getMixedSizes());
-    SmallVector<OpFoldResult> mixedStrides(op.getMixedStrides());
-    canonicalizeSubViewPart(mixedOffsets, ShapedType::isDynamicStrideOrOffset);
-    canonicalizeSubViewPart(mixedSizes, ShapedType::isDynamic);
-    canonicalizeSubViewPart(mixedStrides, ShapedType::isDynamicStrideOrOffset);
-
-    // Create the new op in canonical form.
-    auto newOp = rewriter.create<OpType>(op.getLoc(), op.source(), mixedOffsets,
-                                         mixedSizes, mixedStrides);
-
-    replaceWithNewOp(rewriter, op, newOp);
-
-    return success();
-  }
-};
-
-} // end anonymous namespace
-
-/// Determines whether MemRefCastOp casts to a more dynamic version of the
-/// source memref. This is useful to to fold a memref_cast into a consuming op
-/// and implement canonicalization patterns for ops in different dialects that
-/// may consume the results of memref_cast operations. Such foldable memref_cast
-/// operations are typically inserted as `view` and `subview` ops are
-/// canonicalized, to preserve the type compatibility of their uses.
-///
-/// Returns true when all conditions are met:
-/// 1. source and result are ranked memrefs with strided semantics and same
-/// element type and rank.
-/// 2. each of the source's size, offset or stride has more static information
-/// than the corresponding result's size, offset or stride.
-///
-/// Example 1:
-/// ```mlir
-///   %1 = memref_cast %0 : memref<8x16xf32> to memref<?x?xf32>
-///   %2 = consumer %1 ... : memref<?x?xf32> ...
-/// ```
-///
-/// may fold into:
-///
-/// ```mlir
-///   %2 = consumer %0 ... : memref<8x16xf32> ...
-/// ```
-///
-/// Example 2:
-/// ```
-///   %1 = memref_cast %0 : memref<?x16xf32, affine_map<(i, j)->(16 * i + j)>>
-///          to memref<?x?xf32>
-///   consumer %1 : memref<?x?xf32> ...
-/// ```
-///
-/// may fold into:
-///
-/// ```
-///   consumer %0 ... : memref<?x16xf32, affine_map<(i, j)->(16 * i + j)>>
-/// ```
-bool mlir::canFoldIntoConsumerOp(MemRefCastOp castOp) {
-  MemRefType sourceType = castOp.source().getType().dyn_cast<MemRefType>();
-  MemRefType resultType = castOp.getType().dyn_cast<MemRefType>();
-
-  // Requires ranked MemRefType.
-  if (!sourceType || !resultType)
-    return false;
-
-  // Requires same elemental type.
-  if (sourceType.getElementType() != resultType.getElementType())
-    return false;
-
-  // Requires same rank.
-  if (sourceType.getRank() != resultType.getRank())
-    return false;
-
-  // Only fold casts between strided memref forms.
-  int64_t sourceOffset, resultOffset;
-  SmallVector<int64_t, 4> sourceStrides, resultStrides;
-  if (failed(getStridesAndOffset(sourceType, sourceStrides, sourceOffset)) ||
-      failed(getStridesAndOffset(resultType, resultStrides, resultOffset)))
-    return false;
-
-  // If cast is towards more static sizes along any dimension, don't fold.
-  for (auto it : llvm::zip(sourceType.getShape(), resultType.getShape())) {
-    auto ss = std::get<0>(it), st = std::get<1>(it);
-    if (ss != st)
-      if (MemRefType::isDynamic(ss) && !MemRefType::isDynamic(st))
-        return false;
-  }
-
-  // If cast is towards more static offset along any dimension, don't fold.
-  if (sourceOffset != resultOffset)
-    if (MemRefType::isDynamicStrideOrOffset(sourceOffset) &&
-        !MemRefType::isDynamicStrideOrOffset(resultOffset))
-      return false;
-
-  // If cast is towards more static strides along any dimension, don't fold.
-  for (auto it : llvm::zip(sourceStrides, resultStrides)) {
-    auto ss = std::get<0>(it), st = std::get<1>(it);
-    if (ss != st)
-      if (MemRefType::isDynamicStrideOrOffset(ss) &&
-          !MemRefType::isDynamicStrideOrOffset(st))
-        return false;
-  }
-
-  return true;
-}
-
-namespace {
-/// Pattern to rewrite a subview op with MemRefCast arguments.
-/// This essentially pushes memref_cast past its consuming subview when
-/// `canFoldIntoConsumerOp` is true.
-///
-/// Example:
-/// ```
-///   %0 = memref_cast %V : memref<16x16xf32> to memref<?x?xf32>
-///   %1 = subview %0[0, 0][3, 4][1, 1] :
-///     memref<?x?xf32> to memref<3x4xf32, offset:?, strides:[?, 1]>
-/// ```
-/// is rewritten into:
-/// ```
-///   %0 = subview %V: memref<16x16xf32> to memref<3x4xf32, #[[map0]]>
-///   %1 = memref_cast %0: memref<3x4xf32, offset:0, strides:[16, 1]> to
-///     memref<3x4xf32, offset:?, strides:[?, 1]>
-/// ```
-class SubViewOpMemRefCastFolder final : public OpRewritePattern<SubViewOp> {
-public:
-  using OpRewritePattern<SubViewOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(SubViewOp subViewOp,
-                                PatternRewriter &rewriter) const override {
-    // Any constant operand, just return to let SubViewOpConstantFolder kick in.
-    if (llvm::any_of(subViewOp.getOperands(), [](Value operand) {
-          return matchPattern(operand, m_ConstantIndex());
-        }))
-      return failure();
-
-    auto castOp = subViewOp.source().getDefiningOp<MemRefCastOp>();
-    if (!castOp)
-      return failure();
-
-    if (!canFoldIntoConsumerOp(castOp))
-      return failure();
-
-    /// Deduce the resultType of the SubViewOp using `inferSubViewResultType` on
-    /// the cast source operand type and the SubViewOp static information. This
-    /// is the resulting type if the MemRefCastOp were folded.
-    auto resultType = SubViewOp::inferRankReducedResultType(
-        subViewOp.getType().getRank(),
-        castOp.source().getType().cast<MemRefType>(),
-        subViewOp.getMixedOffsets(), subViewOp.getMixedSizes(),
-        subViewOp.getMixedStrides());
-    Value newSubView = rewriter.create<SubViewOp>(
-        subViewOp.getLoc(), resultType, castOp.source(), subViewOp.offsets(),
-        subViewOp.sizes(), subViewOp.strides(), subViewOp.static_offsets(),
-        subViewOp.static_sizes(), subViewOp.static_strides());
-    rewriter.replaceOpWithNewOp<MemRefCastOp>(subViewOp, subViewOp.getType(),
-                                              newSubView);
-    return success();
-  }
-};
-} // namespace
-
-void SubViewOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                            MLIRContext *context) {
-  results.insert<OpWithOffsetSizesAndStridesConstantArgumentFolder<SubViewOp>,
-                 SubViewOpMemRefCastFolder>(context);
-}
-
-OpFoldResult SubViewOp::fold(ArrayRef<Attribute> operands) {
-  auto resultShapedType = getResult().getType().cast<ShapedType>();
-  auto sourceShapedType = source().getType().cast<ShapedType>();
-
-  if (resultShapedType.hasStaticShape() &&
-      resultShapedType == sourceShapedType) {
-    return getViewSource();
-  }
-
-  return {};
-}
-
-//===----------------------------------------------------------------------===//
-// SubTensorOp
-//===----------------------------------------------------------------------===//
-
-/// A subtensor result type can be fully inferred from the source type and the
-/// static representation of offsets, sizes and strides. Special sentinels
-/// encode the dynamic case.
-Type SubTensorOp::inferResultType(RankedTensorType sourceRankedTensorType,
-                                  ArrayRef<int64_t> leadingStaticOffsets,
-                                  ArrayRef<int64_t> leadingStaticSizes,
-                                  ArrayRef<int64_t> leadingStaticStrides) {
-  // A subtensor may specify only a leading subset of offset/sizes/strides in
-  // which case we complete with offset=0, sizes from memref type and strides=1.
-  unsigned rank = sourceRankedTensorType.getRank();
-  assert(leadingStaticSizes.size() <= rank &&
-         "unexpected leadingStaticSizes overflow");
-  auto staticSizes = llvm::to_vector<4>(leadingStaticSizes);
-  unsigned numTrailingSizes = rank - staticSizes.size();
-  llvm::append_range(staticSizes, sourceRankedTensorType.getShape().take_back(
-                                      numTrailingSizes));
-  return RankedTensorType::get(staticSizes,
-                               sourceRankedTensorType.getElementType());
-}
-
-Type SubTensorOp::inferResultType(RankedTensorType sourceRankedTensorType,
-                                  ArrayRef<OpFoldResult> leadingStaticOffsets,
-                                  ArrayRef<OpFoldResult> leadingStaticSizes,
-                                  ArrayRef<OpFoldResult> leadingStaticStrides) {
-  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
-  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
-  dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets,
-                             staticOffsets, ShapedType::kDynamicStrideOrOffset);
-  dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes,
-                             ShapedType::kDynamicSize);
-  dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides,
-                             staticStrides, ShapedType::kDynamicStrideOrOffset);
-  return SubTensorOp::inferResultType(sourceRankedTensorType, staticOffsets,
-                                      staticSizes, staticStrides);
-}
-
-/// A subtensor result type can be fully inferred from the source type and the
-/// static representation of offsets, sizes and strides. Special sentinels
-/// encode the dynamic case.
-Type SubTensorOp::inferRankReducedResultType(
-    unsigned resultRank, RankedTensorType sourceRankedTensorType,
-    ArrayRef<int64_t> leadingStaticOffsets,
-    ArrayRef<int64_t> leadingStaticSizes,
-    ArrayRef<int64_t> leadingStaticStrides) {
-  auto inferredType =
-      inferResultType(sourceRankedTensorType, leadingStaticOffsets,
-                      leadingStaticSizes, leadingStaticStrides)
-          .cast<RankedTensorType>();
-  int rankDiff = inferredType.getRank() - resultRank;
-  if (rankDiff > 0) {
-    auto shape = inferredType.getShape();
-    llvm::SmallDenseSet<unsigned> dimsToProject;
-    getPositionsOfShapeOne(rankDiff, shape, dimsToProject);
-    SmallVector<int64_t> projectedShape;
-    for (unsigned pos = 0, e = shape.size(); pos < e; ++pos)
-      if (!dimsToProject.contains(pos))
-        projectedShape.push_back(shape[pos]);
-    inferredType =
-        RankedTensorType::get(projectedShape, inferredType.getElementType());
-  }
-  return inferredType;
-}
-
-Type SubTensorOp::inferRankReducedResultType(
-    unsigned resultRank, RankedTensorType sourceRankedTensorType,
-    ArrayRef<OpFoldResult> leadingStaticOffsets,
-    ArrayRef<OpFoldResult> leadingStaticSizes,
-    ArrayRef<OpFoldResult> leadingStaticStrides) {
-  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
-  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
-  dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets,
-                             staticOffsets, ShapedType::kDynamicStrideOrOffset);
-  dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes,
-                             ShapedType::kDynamicSize);
-  dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides,
-                             staticStrides, ShapedType::kDynamicStrideOrOffset);
-  return SubTensorOp::inferRankReducedResultType(
-      resultRank, sourceRankedTensorType, staticOffsets, staticSizes,
-      staticStrides);
-}
-
-// Build a SubTensorOp with mixed static and dynamic entries and custom result
-// type. If the type passed is nullptr, it is inferred.
-void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result,
-                              RankedTensorType resultType, Value source,
-                              ArrayRef<OpFoldResult> offsets,
-                              ArrayRef<OpFoldResult> sizes,
-                              ArrayRef<OpFoldResult> strides,
-                              ArrayRef<NamedAttribute> attrs) {
-  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
-  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
-  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets,
-                             ShapedType::kDynamicStrideOrOffset);
-  dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes,
-                             ShapedType::kDynamicSize);
-  dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides,
-                             ShapedType::kDynamicStrideOrOffset);
-  auto sourceRankedTensorType = source.getType().cast<RankedTensorType>();
-  // Structuring implementation this way avoids duplication between builders.
-  if (!resultType) {
-    resultType =
-        SubTensorOp::inferResultType(sourceRankedTensorType, staticOffsets,
-                                     staticSizes, staticStrides)
-            .cast<RankedTensorType>();
-  }
-  build(b, result, resultType, source, dynamicOffsets, dynamicSizes,
-        dynamicStrides, b.getI64ArrayAttr(staticOffsets),
-        b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides));
-  result.addAttributes(attrs);
-}
-
-// Build a SubTensorOp with mixed static and dynamic entries and inferred result
-// type.
-void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result,
-                              Value source, ArrayRef<OpFoldResult> offsets,
-                              ArrayRef<OpFoldResult> sizes,
-                              ArrayRef<OpFoldResult> strides,
-                              ArrayRef<NamedAttribute> attrs) {
-  build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
-}
-
-// Build a SubTensorOp with dynamic entries and custom result type. If the type
-// passed is nullptr, it is inferred.
-void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result,
-                              RankedTensorType resultType, Value source,
-                              ValueRange offsets, ValueRange sizes,
-                              ValueRange strides,
-                              ArrayRef<NamedAttribute> attrs) {
-  SmallVector<OpFoldResult> offsetValues = llvm::to_vector<4>(
-      llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; }));
-  SmallVector<OpFoldResult> sizeValues = llvm::to_vector<4>(
-      llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));
-  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
-      llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));
-  build(b, result, resultType, source, offsetValues, sizeValues, strideValues);
-}
-
-// Build a SubTensorOp with dynamic entries and inferred result type.
-void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result,
-                              Value source, ValueRange offsets,
-                              ValueRange sizes, ValueRange strides,
-                              ArrayRef<NamedAttribute> attrs) {
-  build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
-}
-
-/// Verifier for SubTensorOp.
-static LogicalResult verify(SubTensorOp op) {
-  // Verify result type against inferred type.
-  auto expectedType = SubTensorOp::inferResultType(
-      op.getSourceType(), extractFromI64ArrayAttr(op.static_offsets()),
-      extractFromI64ArrayAttr(op.static_sizes()),
-      extractFromI64ArrayAttr(op.static_strides()));
-  auto result = isRankReducedType(expectedType, op.getType());
-  return produceSubViewErrorMsg(result, op, expectedType);
-}
-
-namespace {
-/// Pattern to rewrite a subtensor op with tensor::Cast arguments.
-/// This essentially pushes memref_cast past its consuming subtensor when
-/// `canFoldIntoConsumerOp` is true.
-///
-/// Example:
-/// ```
-///   %0 = tensorcast %V : tensor<16x16xf32> to tensor<?x?xf32>
-///   %1 = subtensor %0[0, 0][3, 4][1, 1] : tensor<?x?xf32> to tensor<3x4xf32>
-/// ```
-/// is rewritten into:
-/// ```
-///   %0 = subtensor %V[0, 0][3, 4][1, 1] : tensor<16x16xf32> to tensor<3x4xf32>
-///   %1 = tensor.cast %0: tensor<3x4xf32> to tensor<3x4xf32>
-/// ```
-class SubTensorOpCastFolder final : public OpRewritePattern<SubTensorOp> {
+/// Pattern to rewrite a subtensor op with tensor::Cast arguments.
+/// This essentially pushes memref_cast past its consuming subtensor when
+/// `canFoldIntoConsumerOp` is true.
+///
+/// Example:
+/// ```
+///   %0 = tensorcast %V : tensor<16x16xf32> to tensor<?x?xf32>
+///   %1 = subtensor %0[0, 0][3, 4][1, 1] : tensor<?x?xf32> to tensor<3x4xf32>
+/// ```
+/// is rewritten into:
+/// ```
+///   %0 = subtensor %V[0, 0][3, 4][1, 1] : tensor<16x16xf32> to tensor<3x4xf32>
+///   %1 = tensor.cast %0: tensor<3x4xf32> to tensor<3x4xf32>
+/// ```
+class SubTensorOpCastFolder final : public OpRewritePattern<SubTensorOp> {
 public:
   using OpRewritePattern<SubTensorOp>::OpRewritePattern;
 
@@ -3689,7 +1950,7 @@
                                 PatternRewriter &rewriter) const override {
     // Any constant operand, just return to let SubViewOpConstantFolder kick in.
     if (llvm::any_of(subTensorOp.getOperands(), [](Value operand) {
-          return matchPattern(operand, m_ConstantIndex());
+          return matchPattern(operand, matchConstantIndex());
         }))
       return failure();
 
@@ -3721,9 +1982,22 @@
 };
 } // namespace
 
+/// A canonicalizer wrapper to replace SubTensorOps.
+struct SubTensorCanonicalizer {
+  void operator()(PatternRewriter &rewriter, SubTensorOp op,
+                  SubTensorOp newOp) {
+    Value replacement = newOp.getResult();
+    if (replacement.getType() != op.getType())
+      replacement = rewriter.create<tensor::CastOp>(op.getLoc(), op.getType(),
+                                                    replacement);
+    rewriter.replaceOp(op, replacement);
+  }
+};
+
 void SubTensorOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
                                               MLIRContext *context) {
-  results.insert<OpWithOffsetSizesAndStridesConstantArgumentFolder<SubTensorOp>,
+  results.insert<OpWithOffsetSizesAndStridesConstantArgumentFolder<
+                     SubTensorOp, SubTensorCanonicalizer>,
                  SubTensorOpCastFolder>(context);
 }
 
@@ -3814,12 +2088,13 @@
                                 PatternRewriter &rewriter) const override {
     // No constant operand, just return.
     if (llvm::none_of(subTensorInsertOp.getOperands(), [](Value operand) {
-          return matchPattern(operand, m_ConstantIndex());
+          return matchPattern(operand, matchConstantIndex());
         }))
       return failure();
 
     // At least one of offsets/sizes/strides is a new constant.
-    // Form the new list of operands and constant attributes from the existing.
+    // Form the new list of operands and constant attributes from the
+    // existing.
     SmallVector<OpFoldResult> mixedOffsets(subTensorInsertOp.getMixedOffsets());
     SmallVector<OpFoldResult> mixedSizes(subTensorInsertOp.getMixedSizes());
     SmallVector<OpFoldResult> mixedStrides(subTensorInsertOp.getMixedStrides());
@@ -3856,7 +2131,7 @@
   LogicalResult matchAndRewrite(SubTensorInsertOp subTensorInsertOp,
                                 PatternRewriter &rewriter) const override {
     if (llvm::any_of(subTensorInsertOp.getOperands(), [](Value operand) {
-          return matchPattern(operand, m_ConstantIndex());
+          return matchPattern(operand, matchConstantIndex());
         }))
       return failure();
 
@@ -3896,175 +2171,6 @@
                  SubTensorInsertOpCastFolder>(context);
 }
 
-//===----------------------------------------------------------------------===//
-// TensorLoadOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult TensorLoadOp::fold(ArrayRef<Attribute>) {
-  if (auto tensorToMemref = memref().getDefiningOp<TensorToMemrefOp>())
-    // Approximate alias analysis by conservatively folding only when no there
-    // is no interleaved operation.
-    if (tensorToMemref->getBlock() == this->getOperation()->getBlock() &&
-        tensorToMemref->getNextNode() == this->getOperation())
-      return tensorToMemref.tensor();
-  return {};
-}
-
-//===----------------------------------------------------------------------===//
-// TensorToMemrefOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult TensorToMemrefOp::fold(ArrayRef<Attribute>) {
-  if (auto tensorLoad = tensor().getDefiningOp<TensorLoadOp>())
-    if (tensorLoad.memref().getType() == getType())
-      return tensorLoad.memref();
-  return {};
-}
-
-namespace {
-/// Replace tensor_cast + tensor_to_memref by tensor_to_memref + memref_cast.
-struct TensorCastToMemref : public OpRewritePattern<TensorToMemrefOp> {
-  using OpRewritePattern<TensorToMemrefOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(TensorToMemrefOp tensorToMemRef,
-                                PatternRewriter &rewriter) const final {
-    auto tensorCastOperand =
-        tensorToMemRef.getOperand().getDefiningOp<tensor::CastOp>();
-    if (!tensorCastOperand)
-      return failure();
-    auto srcTensorType =
-        tensorCastOperand.getOperand().getType().dyn_cast<RankedTensorType>();
-    if (!srcTensorType)
-      return failure();
-    auto memrefType = MemRefType::get(srcTensorType.getShape(),
-                                      srcTensorType.getElementType());
-    Value memref = rewriter.create<TensorToMemrefOp>(
-        tensorToMemRef.getLoc(), memrefType, tensorCastOperand.getOperand());
-    rewriter.replaceOpWithNewOp<MemRefCastOp>(tensorToMemRef,
-                                              tensorToMemRef.getType(), memref);
-    return success();
-  }
-};
-
-/// Canonicalize tensor_load + tensor_to_memref to memref_cast when type
-/// mismatches prevent `TensorToMemrefOp::fold` to kick in.
-struct TensorLoadToMemref : public OpRewritePattern<TensorToMemrefOp> {
-  using OpRewritePattern<TensorToMemrefOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(TensorToMemrefOp tensorToMemRef,
-                                PatternRewriter &rewriter) const final {
-    auto tensorLoad = tensorToMemRef.tensor().getDefiningOp<TensorLoadOp>();
-    // Bail unless we have a tensor_load + tensor_to_memref with different
-    // types. `TensorToMemrefOp::fold` handles the same type case.
-    if (!tensorLoad ||
-        tensorLoad.memref().getType() == tensorToMemRef.getType())
-      return failure();
-    // If types are not cast-compatible, bail.
-    if (!MemRefCastOp::areCastCompatible(tensorLoad.memref().getType(),
-                                         tensorToMemRef.getType()))
-      return failure();
-    rewriter.replaceOpWithNewOp<MemRefCastOp>(
-        tensorToMemRef, tensorToMemRef.getType(), tensorLoad.memref());
-    return success();
-  }
-};
-} // namespace
-
-void TensorToMemrefOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &results, MLIRContext *context) {
-  results.insert<TensorCastToMemref, TensorLoadToMemref>(context);
-}
-
-//===----------------------------------------------------------------------===//
-// TransposeOp
-//===----------------------------------------------------------------------===//
-
-/// Build a strided memref type by applying `permutationMap` tp `memRefType`.
-static MemRefType inferTransposeResultType(MemRefType memRefType,
-                                           AffineMap permutationMap) {
-  auto rank = memRefType.getRank();
-  auto originalSizes = memRefType.getShape();
-  // Compute permuted sizes.
-  SmallVector<int64_t, 4> sizes(rank, 0);
-  for (auto en : llvm::enumerate(permutationMap.getResults()))
-    sizes[en.index()] =
-        originalSizes[en.value().cast<AffineDimExpr>().getPosition()];
-
-  // Compute permuted strides.
-  int64_t offset;
-  SmallVector<int64_t, 4> strides;
-  auto res = getStridesAndOffset(memRefType, strides, offset);
-  assert(succeeded(res) && strides.size() == static_cast<unsigned>(rank));
-  (void)res;
-  auto map =
-      makeStridedLinearLayoutMap(strides, offset, memRefType.getContext());
-  map = permutationMap ? map.compose(permutationMap) : map;
-  return MemRefType::Builder(memRefType).setShape(sizes).setAffineMaps(map);
-}
-
-void TransposeOp::build(OpBuilder &b, OperationState &result, Value in,
-                        AffineMapAttr permutation,
-                        ArrayRef<NamedAttribute> attrs) {
-  auto permutationMap = permutation.getValue();
-  assert(permutationMap);
-
-  auto memRefType = in.getType().cast<MemRefType>();
-  // Compute result type.
-  MemRefType resultType = inferTransposeResultType(memRefType, permutationMap);
-
-  build(b, result, resultType, in, attrs);
-  result.addAttribute(TransposeOp::getPermutationAttrName(), permutation);
-}
-
-// transpose $in $permutation attr-dict : type($in) `to` type(results)
-static void print(OpAsmPrinter &p, TransposeOp op) {
-  p << "transpose " << op.in() << " " << op.permutation();
-  p.printOptionalAttrDict(op->getAttrs(),
-                          {TransposeOp::getPermutationAttrName()});
-  p << " : " << op.in().getType() << " to " << op.getType();
-}
-
-static ParseResult parseTransposeOp(OpAsmParser &parser,
-                                    OperationState &result) {
-  OpAsmParser::OperandType in;
-  AffineMap permutation;
-  MemRefType srcType, dstType;
-  if (parser.parseOperand(in) || parser.parseAffineMap(permutation) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(srcType) ||
-      parser.resolveOperand(in, srcType, result.operands) ||
-      parser.parseKeywordType("to", dstType) ||
-      parser.addTypeToList(dstType, result.types))
-    return failure();
-
-  result.addAttribute(TransposeOp::getPermutationAttrName(),
-                      AffineMapAttr::get(permutation));
-  return success();
-}
-
-static LogicalResult verify(TransposeOp op) {
-  if (!op.permutation().isPermutation())
-    return op.emitOpError("expected a permutation map");
-  if (op.permutation().getNumDims() != op.getShapedType().getRank())
-    return op.emitOpError(
-        "expected a permutation map of same rank as the input");
-
-  auto srcType = op.in().getType().cast<MemRefType>();
-  auto dstType = op.getType().cast<MemRefType>();
-  auto transposedType = inferTransposeResultType(srcType, op.permutation());
-  if (dstType != transposedType)
-    return op.emitOpError("output type ")
-           << dstType << " does not match transposed input type " << srcType
-           << ", " << transposedType;
-  return success();
-}
-
-OpFoldResult TransposeOp::fold(ArrayRef<Attribute>) {
-  if (succeeded(foldMemRefCast(*this)))
-    return getResult();
-  return {};
-}
-
 //===----------------------------------------------------------------------===//
 // TruncateIOp
 //===----------------------------------------------------------------------===//
@@ -4141,172 +2247,6 @@
   return IntegerAttr::get(lhs.getType(), lhs.getValue().urem(rhsValue));
 }
 
-//===----------------------------------------------------------------------===//
-// ViewOp
-//===----------------------------------------------------------------------===//
-
-static ParseResult parseViewOp(OpAsmParser &parser, OperationState &result) {
-  OpAsmParser::OperandType srcInfo;
-  SmallVector<OpAsmParser::OperandType, 1> offsetInfo;
-  SmallVector<OpAsmParser::OperandType, 4> sizesInfo;
-  auto indexType = parser.getBuilder().getIndexType();
-  Type srcType, dstType;
-  llvm::SMLoc offsetLoc;
-  if (parser.parseOperand(srcInfo) || parser.getCurrentLocation(&offsetLoc) ||
-      parser.parseOperandList(offsetInfo, OpAsmParser::Delimiter::Square))
-    return failure();
-
-  if (offsetInfo.size() != 1)
-    return parser.emitError(offsetLoc) << "expects 1 offset operand";
-
-  return failure(
-      parser.parseOperandList(sizesInfo, OpAsmParser::Delimiter::Square) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(srcType) ||
-      parser.resolveOperand(srcInfo, srcType, result.operands) ||
-      parser.resolveOperands(offsetInfo, indexType, result.operands) ||
-      parser.resolveOperands(sizesInfo, indexType, result.operands) ||
-      parser.parseKeywordType("to", dstType) ||
-      parser.addTypeToList(dstType, result.types));
-}
-
-static void print(OpAsmPrinter &p, ViewOp op) {
-  p << op.getOperationName() << ' ' << op.getOperand(0) << '[';
-  p.printOperand(op.byte_shift());
-  p << "][" << op.sizes() << ']';
-  p.printOptionalAttrDict(op->getAttrs());
-  p << " : " << op.getOperand(0).getType() << " to " << op.getType();
-}
-
-static LogicalResult verify(ViewOp op) {
-  auto baseType = op.getOperand(0).getType().cast<MemRefType>();
-  auto viewType = op.getType();
-
-  // The base memref should have identity layout map (or none).
-  if (baseType.getAffineMaps().size() > 1 ||
-      (baseType.getAffineMaps().size() == 1 &&
-       !baseType.getAffineMaps()[0].isIdentity()))
-    return op.emitError("unsupported map for base memref type ") << baseType;
-
-  // The result memref should have identity layout map (or none).
-  if (viewType.getAffineMaps().size() > 1 ||
-      (viewType.getAffineMaps().size() == 1 &&
-       !viewType.getAffineMaps()[0].isIdentity()))
-    return op.emitError("unsupported map for result memref type ") << viewType;
-
-  // The base memref and the view memref should be in the same memory space.
-  if (baseType.getMemorySpaceAsInt() != viewType.getMemorySpaceAsInt())
-    return op.emitError("different memory spaces specified for base memref "
-                        "type ")
-           << baseType << " and view memref type " << viewType;
-
-  // Verify that we have the correct number of sizes for the result type.
-  unsigned numDynamicDims = viewType.getNumDynamicDims();
-  if (op.sizes().size() != numDynamicDims)
-    return op.emitError("incorrect number of size operands for type ")
-           << viewType;
-
-  return success();
-}
-
-Value ViewOp::getViewSource() { return source(); }
-
-namespace {
-
-struct ViewOpShapeFolder : public OpRewritePattern<ViewOp> {
-  using OpRewritePattern<ViewOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(ViewOp viewOp,
-                                PatternRewriter &rewriter) const override {
-    // Return if none of the operands are constants.
-    if (llvm::none_of(viewOp.getOperands(), [](Value operand) {
-          return matchPattern(operand, m_ConstantIndex());
-        }))
-      return failure();
-
-    // Get result memref type.
-    auto memrefType = viewOp.getType();
-
-    // Get offset from old memref view type 'memRefType'.
-    int64_t oldOffset;
-    SmallVector<int64_t, 4> oldStrides;
-    if (failed(getStridesAndOffset(memrefType, oldStrides, oldOffset)))
-      return failure();
-    assert(oldOffset == 0 && "Expected 0 offset");
-
-    SmallVector<Value, 4> newOperands;
-
-    // Offset cannot be folded into result type.
-
-    // Fold any dynamic dim operands which are produced by a constant.
-    SmallVector<int64_t, 4> newShapeConstants;
-    newShapeConstants.reserve(memrefType.getRank());
-
-    unsigned dynamicDimPos = 0;
-    unsigned rank = memrefType.getRank();
-    for (unsigned dim = 0, e = rank; dim < e; ++dim) {
-      int64_t dimSize = memrefType.getDimSize(dim);
-      // If this is already static dimension, keep it.
-      if (!ShapedType::isDynamic(dimSize)) {
-        newShapeConstants.push_back(dimSize);
-        continue;
-      }
-      auto *defOp = viewOp.sizes()[dynamicDimPos].getDefiningOp();
-      if (auto constantIndexOp = dyn_cast_or_null<ConstantIndexOp>(defOp)) {
-        // Dynamic shape dimension will be folded.
-        newShapeConstants.push_back(constantIndexOp.getValue());
-      } else {
-        // Dynamic shape dimension not folded; copy operand from old memref.
-        newShapeConstants.push_back(dimSize);
-        newOperands.push_back(viewOp.sizes()[dynamicDimPos]);
-      }
-      dynamicDimPos++;
-    }
-
-    // Create new memref type with constant folded dims.
-    MemRefType newMemRefType =
-        MemRefType::Builder(memrefType).setShape(newShapeConstants);
-    // Nothing new, don't fold.
-    if (newMemRefType == memrefType)
-      return failure();
-
-    // Create new ViewOp.
-    auto newViewOp = rewriter.create<ViewOp>(viewOp.getLoc(), newMemRefType,
-                                             viewOp.getOperand(0),
-                                             viewOp.byte_shift(), newOperands);
-    // Insert a cast so we have the same type as the old memref type.
-    rewriter.replaceOpWithNewOp<MemRefCastOp>(viewOp, newViewOp,
-                                              viewOp.getType());
-    return success();
-  }
-};
-
-struct ViewOpMemrefCastFolder : public OpRewritePattern<ViewOp> {
-  using OpRewritePattern<ViewOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(ViewOp viewOp,
-                                PatternRewriter &rewriter) const override {
-    Value memrefOperand = viewOp.getOperand(0);
-    MemRefCastOp memrefCastOp = memrefOperand.getDefiningOp<MemRefCastOp>();
-    if (!memrefCastOp)
-      return failure();
-    Value allocOperand = memrefCastOp.getOperand();
-    AllocOp allocOp = allocOperand.getDefiningOp<AllocOp>();
-    if (!allocOp)
-      return failure();
-    rewriter.replaceOpWithNewOp<ViewOp>(viewOp, viewOp.getType(), allocOperand,
-                                        viewOp.byte_shift(), viewOp.sizes());
-    return success();
-  }
-};
-
-} // end anonymous namespace
-
-void ViewOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                         MLIRContext *context) {
-  results.insert<ViewOpShapeFolder, ViewOpMemrefCastFolder>(context);
-}
-
 //===----------------------------------------------------------------------===//
 // XOrOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp b/mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp
--- a/mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp
@@ -12,6 +12,7 @@
 
 #include "mlir/Transforms/Bufferize.h"
 #include "PassDetail.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Transforms/Passes.h"
@@ -21,15 +22,15 @@
 using namespace mlir;
 
 namespace {
-class BufferizeDimOp : public OpConversionPattern<DimOp> {
+class BufferizeDimOp : public OpConversionPattern<memref::DimOp> {
 public:
   using OpConversionPattern::OpConversionPattern;
   LogicalResult
-  matchAndRewrite(DimOp op, ArrayRef<Value> operands,
+  matchAndRewrite(memref::DimOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
-    DimOp::Adaptor adaptor(operands);
-    rewriter.replaceOpWithNewOp<DimOp>(op, adaptor.memrefOrTensor(),
-                                       adaptor.index());
+    memref::DimOp::Adaptor adaptor(operands);
+    rewriter.replaceOpWithNewOp<memref::DimOp>(op, adaptor.memrefOrTensor(),
+                                               adaptor.index());
     return success();
   }
 };
@@ -67,6 +68,7 @@
     OwningRewritePatternList patterns;
     ConversionTarget target(*context);
 
+    target.addLegalDialect<memref::MemRefDialect>();
     target.addLegalDialect<StandardOpsDialect>();
     target.addLegalDialect<scf::SCFDialect>();
 
@@ -78,8 +80,8 @@
       return typeConverter.isLegal(op.getType()) ||
              !op.condition().getType().isa<IntegerType>();
     });
-    target.addDynamicallyLegalOp<DimOp>(
-        [&](DimOp op) { return typeConverter.isLegal(op); });
+    target.addDynamicallyLegalOp<memref::DimOp>(
+        [&](memref::DimOp op) { return typeConverter.isLegal(op); });
     if (failed(
             applyPartialConversion(getFunction(), target, std::move(patterns))))
       signalPassFailure();
diff --git a/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt b/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt
--- a/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt
@@ -14,6 +14,7 @@
 
   LINK_LIBS PUBLIC
   MLIRIR
+  MLIRMemRef
   MLIRPass
   MLIRSCF
   MLIRStandard
diff --git a/mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp b/mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp
--- a/mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp
+++ b/mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Transforms/Passes.h"
 #include "mlir/IR/PatternMatch.h"
@@ -70,13 +71,13 @@
   }
 };
 
-/// Converts `memref_reshape` that has a target shape of a statically-known
-/// size to `memref_reinterpret_cast`.
-struct MemRefReshapeOpConverter : public OpRewritePattern<MemRefReshapeOp> {
+/// Converts `memref.reshape` that has a target shape of a statically-known
+/// size to `memref.reinterpret_cast`.
+struct MemRefReshapeOpConverter : public OpRewritePattern<memref::ReshapeOp> {
 public:
   using OpRewritePattern::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(MemRefReshapeOp op,
+  LogicalResult matchAndRewrite(memref::ReshapeOp op,
                                 PatternRewriter &rewriter) const final {
     auto shapeType = op.shape().getType().cast<MemRefType>();
     if (!shapeType.hasStaticShape())
@@ -91,7 +92,7 @@
     Value stride = rewriter.create<ConstantIndexOp>(loc, 1);
     for (int i = rank - 1; i >= 0; --i) {
       Value index = rewriter.create<ConstantIndexOp>(loc, i);
-      Value size = rewriter.create<LoadOp>(loc, op.shape(), index);
+      Value size = rewriter.create<memref::LoadOp>(loc, op.shape(), index);
       if (!size.getType().isa<IndexType>())
         size = rewriter.create<IndexCastOp>(loc, size, rewriter.getIndexType());
       sizes[i] = size;
@@ -99,7 +100,7 @@
       if (i > 0)
         stride = rewriter.create<MulIOp>(loc, stride, size);
     }
-    rewriter.replaceOpWithNewOp<MemRefReinterpretCastOp>(
+    rewriter.replaceOpWithNewOp<memref::ReinterpretCastOp>(
         op, op.getType(), op.source(), /*offset=*/rewriter.getIndexAttr(0),
         sizes, strides);
     return success();
@@ -215,12 +216,12 @@
 
     ConversionTarget target(getContext());
 
-    target.addLegalDialect<StandardOpsDialect>();
+    target.addLegalDialect<memref::MemRefDialect, StandardOpsDialect>();
     target.addDynamicallyLegalOp<AtomicRMWOp>([](AtomicRMWOp op) {
       return op.kind() != AtomicRMWKind::maxf &&
              op.kind() != AtomicRMWKind::minf;
     });
-    target.addDynamicallyLegalOp<MemRefReshapeOp>([](MemRefReshapeOp op) {
+    target.addDynamicallyLegalOp<memref::ReshapeOp>([](memref::ReshapeOp op) {
       return !op.shape().getType().cast<MemRefType>().hasStaticShape();
     });
     target.addIllegalOp<SignedCeilDivIOp>();
diff --git a/mlir/lib/Dialect/StandardOps/Transforms/FuncBufferize.cpp b/mlir/lib/Dialect/StandardOps/Transforms/FuncBufferize.cpp
--- a/mlir/lib/Dialect/StandardOps/Transforms/FuncBufferize.cpp
+++ b/mlir/lib/Dialect/StandardOps/Transforms/FuncBufferize.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Transforms/FuncConversions.h"
 #include "mlir/Dialect/StandardOps/Transforms/Passes.h"
@@ -22,7 +23,6 @@
 namespace {
 struct FuncBufferizePass : public FuncBufferizeBase<FuncBufferizePass> {
   using FuncBufferizeBase<FuncBufferizePass>::FuncBufferizeBase;
-
   void runOnOperation() override {
     auto module = getOperation();
     auto *context = &getContext();
@@ -43,8 +43,8 @@
     populateBranchOpInterfaceTypeConversionPattern(patterns, context,
                                                    typeConverter);
     populateReturnOpTypeConversionPattern(patterns, context, typeConverter);
-    target.addLegalOp<ModuleOp, ModuleTerminatorOp, TensorLoadOp,
-                      TensorToMemrefOp>();
+    target.addLegalOp<ModuleOp, ModuleTerminatorOp, memref::TensorLoadOp,
+                      memref::BufferCastOp>();
 
     target.markUnknownOpDynamicallyLegal([&](Operation *op) {
       return isNotBranchOpInterfaceOrReturnLikeOp(op) ||
diff --git a/mlir/lib/Dialect/StandardOps/Transforms/PassDetail.h b/mlir/lib/Dialect/StandardOps/Transforms/PassDetail.h
--- a/mlir/lib/Dialect/StandardOps/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/StandardOps/Transforms/PassDetail.h
@@ -16,6 +16,10 @@
 
 class AtomicRMWOp;
 
+namespace memref {
+class MemRefDialect;
+} // end namespace memref
+
 #define GEN_PASS_CLASSES
 #include "mlir/Dialect/StandardOps/Transforms/Passes.h.inc"
 
diff --git a/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp b/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp
--- a/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp
+++ b/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Transforms/Passes.h"
 #include "mlir/IR/BlockAndValueMapping.h"
@@ -26,13 +27,13 @@
 class GlobalCreator {
 public:
   explicit GlobalCreator(ModuleOp module);
-  GlobalMemrefOp getGlobalFor(Attribute attr) {
+  memref::GlobalOp getGlobalFor(Attribute attr) {
     assert(globals.find(attr) != globals.end() && "unknown constant attr");
     return globals[attr];
   }
 
 private:
-  DenseMap<Attribute, GlobalMemrefOp> globals;
+  DenseMap<Attribute, memref::GlobalOp> globals;
 };
 
 GlobalCreator::GlobalCreator(ModuleOp module) {
@@ -58,7 +59,7 @@
     interleave(type.getShape(), os, "x");
     os << "x" << type.getElementType();
 
-    auto global = globalBuilder.create<GlobalMemrefOp>(
+    auto global = globalBuilder.create<memref::GlobalOp>(
         op.getLoc(), (Twine("__constant_") + os.str()).str(),
         /*sym_visibility=*/globalBuilder.getStringAttr("private"),
         /*type=*/typeConverter.convertType(type),
@@ -89,8 +90,8 @@
       return failure();
 
     auto globalMemref = globals.getGlobalFor(op.value());
-    rewriter.replaceOpWithNewOp<GetGlobalMemrefOp>(op, globalMemref.type(),
-                                                   globalMemref.getName());
+    rewriter.replaceOpWithNewOp<memref::GetGlobalOp>(op, globalMemref.type(),
+                                                     globalMemref.getName());
     return success();
   }
   GlobalCreator &globals;
@@ -109,7 +110,7 @@
     OwningRewritePatternList patterns;
     ConversionTarget target(*context);
 
-    target.addLegalDialect<StandardOpsDialect>();
+    target.addLegalDialect<memref::MemRefDialect>();
     patterns.insert<BufferizeTensorConstantOp>(globals, typeConverter, context);
     target.addDynamicallyLegalOp<ConstantOp>(
         [&](ConstantOp op) { return typeConverter.isLegal(op.getType()); });
diff --git a/mlir/lib/Dialect/StandardOps/Utils/Utils.cpp b/mlir/lib/Dialect/StandardOps/Utils/Utils.cpp
--- a/mlir/lib/Dialect/StandardOps/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/StandardOps/Utils/Utils.cpp
@@ -12,6 +12,7 @@
 
 #include "mlir/Dialect/StandardOps/Utils/Utils.h"
 
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 
 using namespace mlir;
@@ -21,8 +22,41 @@
   SmallVector<Value, 4> dynOperands;
   auto shapedType = val.getType().cast<ShapedType>();
   for (auto dim : llvm::enumerate(shapedType.getShape())) {
-    if (dim.value() == TensorType::kDynamicSize)
-      dynOperands.push_back(b.create<DimOp>(loc, val, dim.index()));
+    if (dim.value() == MemRefType::kDynamicSize)
+      dynOperands.push_back(b.create<memref::DimOp>(loc, val, dim.index()));
   }
   return dynOperands;
 }
+
+/// Matches a ConstantIndexOp.
+/// TODO: This should probably just be a general matcher that uses matchConstant
+/// and checks the operation for an index type.
+detail::op_matcher<ConstantIndexOp> mlir::matchConstantIndex() {
+  return detail::op_matcher<ConstantIndexOp>();
+}
+
+/// Detects the `values` produced by a ConstantIndexOp and places the new
+/// constant in place of the corresponding sentinel value.
+void mlir::canonicalizeSubViewPart(
+    SmallVectorImpl<OpFoldResult> &values,
+    llvm::function_ref<bool(int64_t)> isDynamic) {
+  for (OpFoldResult &ofr : values) {
+    if (ofr.is<Attribute>())
+      continue;
+    // Newly static, move from Value to constant.
+    if (auto cstOp = ofr.dyn_cast<Value>().getDefiningOp<ConstantIndexOp>())
+      ofr = OpBuilder(cstOp).getIndexAttr(cstOp.getValue());
+  }
+}
+
+void mlir::getPositionsOfShapeOne(
+    unsigned rank, ArrayRef<int64_t> shape,
+    llvm::SmallDenseSet<unsigned> &dimsToProject) {
+  dimsToProject.reserve(rank);
+  for (unsigned pos = 0, e = shape.size(); pos < e && rank > 0; ++pos) {
+    if (shape[pos] == 1) {
+      dimsToProject.insert(pos);
+      --rank;
+    }
+  }
+}
diff --git a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp
--- a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp
@@ -12,6 +12,7 @@
 
 #include "mlir/Transforms/Bufferize.h"
 #include "PassDetail.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
@@ -28,7 +29,7 @@
   matchAndRewrite(tensor::CastOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     auto resultType = getTypeConverter()->convertType(op.getType());
-    rewriter.replaceOpWithNewOp<MemRefCastOp>(op, resultType, operands[0]);
+    rewriter.replaceOpWithNewOp<memref::CastOp>(op, resultType, operands[0]);
     return success();
   }
 };
@@ -42,8 +43,8 @@
   matchAndRewrite(tensor::ExtractOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     tensor::ExtractOp::Adaptor adaptor(operands);
-    rewriter.replaceOpWithNewOp<LoadOp>(op, adaptor.tensor(),
-                                        adaptor.indices());
+    rewriter.replaceOpWithNewOp<memref::LoadOp>(op, adaptor.tensor(),
+                                                adaptor.indices());
     return success();
   }
 };
@@ -60,11 +61,12 @@
     int numberOfElements = op.elements().size();
     auto resultType = MemRefType::get(
         {numberOfElements}, op.getType().cast<TensorType>().getElementType());
-    Value result = rewriter.create<AllocOp>(op.getLoc(), resultType);
+    Value result = rewriter.create<memref::AllocOp>(op.getLoc(), resultType);
     for (auto element : llvm::enumerate(op.elements())) {
       Value index =
           rewriter.create<ConstantIndexOp>(op.getLoc(), element.index());
-      rewriter.create<StoreOp>(op.getLoc(), element.value(), result, index);
+      rewriter.create<memref::StoreOp>(op.getLoc(), element.value(), result,
+                                       index);
     }
     rewriter.replaceOp(op, {result});
     return success();
@@ -86,8 +88,8 @@
     RankedTensorType tensorType = op.getType().cast<RankedTensorType>();
     MemRefType memrefType =
         MemRefType::get(tensorType.getShape(), tensorType.getElementType());
-    Value result =
-        rewriter.create<AllocOp>(loc, memrefType, transformed.dynamicExtents());
+    Value result = rewriter.create<memref::AllocOp>(
+        loc, memrefType, transformed.dynamicExtents());
 
     // Collect loop bounds.
     int64_t rank = tensorType.getRank();
@@ -125,9 +127,9 @@
     // about creating that.
     Operation *elementYield = parallelBody->getTerminator()->getPrevNode();
     rewriter.setInsertionPointAfter(elementYield);
-    rewriter.replaceOpWithNewOp<StoreOp>(elementYield,
-                                         elementYield->getOperands()[0], result,
-                                         parallelBody->getArguments());
+    rewriter.replaceOpWithNewOp<memref::StoreOp>(
+        elementYield, elementYield->getOperands()[0], result,
+        parallelBody->getArguments());
 
     rewriter.replaceOp(op, {result});
     return success();
@@ -155,6 +157,7 @@
     populateTensorBufferizePatterns(context, typeConverter, patterns);
     target.addIllegalOp<tensor::CastOp, tensor::ExtractOp,
                         tensor::FromElementsOp, tensor::GenerateOp>();
+    target.addLegalDialect<memref::MemRefDialect>();
     target.addLegalDialect<StandardOpsDialect>();
     target.addLegalDialect<scf::SCFDialect>();
 
diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
--- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
@@ -9,6 +9,7 @@
 
   LINK_LIBS PUBLIC
   MLIRIR
+  MLIRMemRef
   MLIRPass
   MLIRSCF
   MLIRTensor
diff --git a/mlir/lib/Dialect/Vector/CMakeLists.txt b/mlir/lib/Dialect/Vector/CMakeLists.txt
--- a/mlir/lib/Dialect/Vector/CMakeLists.txt
+++ b/mlir/lib/Dialect/Vector/CMakeLists.txt
@@ -20,6 +20,7 @@
   MLIRStandard
   MLIRAffine
   MLIRLinalg
+  MLIRMemRef
   MLIRSCF
   MLIRLoopAnalysis
   MLIRSideEffectInterfaces
diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp
--- a/mlir/lib/Dialect/Vector/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/VectorOps.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Vector/VectorOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Utils/StructuredOpsUtils.h"
@@ -2396,12 +2397,12 @@
 /// ```
 ///    someop(memrefcast) -> someop
 /// ```
-/// It folds the source of the memref_cast into the root operation directly.
+/// It folds the source of the memref.cast into the root operation directly.
 static LogicalResult foldMemRefCast(Operation *op) {
   bool folded = false;
   for (OpOperand &operand : op->getOpOperands()) {
-    auto castOp = operand.get().getDefiningOp<MemRefCastOp>();
-    if (castOp && canFoldIntoConsumerOp(castOp)) {
+    auto castOp = operand.get().getDefiningOp<memref::CastOp>();
+    if (castOp && memref::CastOp::canFoldIntoConsumerOp(castOp)) {
       operand.set(castOp.getOperand());
       folded = true;
     }
diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp
--- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp
@@ -16,6 +16,8 @@
 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
+#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/EDSC/Intrinsics.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
@@ -2282,7 +2284,7 @@
     // Fold or create the check that `index + vector_size` <= `memref_size`.
     Value sum = xferOp.indices()[indicesIdx] + std_constant_index(vectorSize);
     Value cond =
-        createScopedFoldedSLE(sum, std_dim(xferOp.source(), indicesIdx));
+        createScopedFoldedSLE(sum, memref_dim(xferOp.source(), indicesIdx));
     if (!cond)
       return;
     // Conjunction over all dims for which we are in-bounds.
@@ -2317,7 +2319,7 @@
 ///     b. using a dynamic shape and/or stride for the dimensions that don't
 ///        agree.
 static MemRefType getCastCompatibleMemRefType(MemRefType aT, MemRefType bT) {
-  if (MemRefCastOp::areCastCompatible(aT, bT))
+  if (memref::CastOp::areCastCompatible(aT, bT))
     return aT;
   if (aT.getRank() != bT.getRank())
     return MemRefType();
@@ -2362,8 +2364,8 @@
   sizes.append(leadingIndices.begin(), leadingIndices.end());
   xferOp.zipResultAndIndexing([&](int64_t resultIdx, int64_t indicesIdx) {
     using MapList = ArrayRef<ArrayRef<AffineExpr>>;
-    Value dimMemRef = std_dim(xferOp.source(), indicesIdx);
-    Value dimAlloc = std_dim(alloc, resultIdx);
+    Value dimMemRef = memref_dim(xferOp.source(), indicesIdx);
+    Value dimAlloc = memref_dim(alloc, resultIdx);
     Value index = xferOp.indices()[indicesIdx];
     AffineExpr i, j, k;
     bindDims(xferOp.getContext(), i, j, k);
@@ -2377,7 +2379,7 @@
 
   SmallVector<OpFoldResult, 4> indices = llvm::to_vector<4>(llvm::map_range(
       xferOp.indices(), [](Value idx) -> OpFoldResult { return idx; }));
-  return std_sub_view(
+  return memref_sub_view(
       xferOp.source(), indices, sizes,
       SmallVector<OpFoldResult>(memrefRank, OpBuilder(xferOp).getIndexAttr(1)));
 }
@@ -2388,13 +2390,13 @@
 /// Produce IR resembling:
 /// ```
 ///    %1:3 = scf.if (%inBounds) {
-///      memref_cast %A: memref<A...> to compatibleMemRefType
+///      memref.cast %A: memref<A...> to compatibleMemRefType
 ///      scf.yield %view, ... : compatibleMemRefType, index, index
 ///    } else {
 ///      %2 = linalg.fill(%alloc, %pad)
 ///      %3 = subview %view [...][...][...]
 ///      linalg.copy(%3, %alloc)
-///      memref_cast %alloc: memref<B...> to compatibleMemRefType
+///      memref.cast %alloc: memref<B...> to compatibleMemRefType
 ///      scf.yield %4, ... : compatibleMemRefType, index, index
 ///   }
 /// ```
@@ -2412,7 +2414,7 @@
       [&]() -> scf::ValueVector {
         Value res = memref;
         if (compatibleMemRefType != xferOp.getShapedType())
-          res = std_memref_cast(memref, compatibleMemRefType);
+          res = memref_cast(memref, compatibleMemRefType);
         scf::ValueVector viewAndIndices{res};
         viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(),
                               xferOp.indices().end());
@@ -2425,7 +2427,7 @@
         Value memRefSubView = createScopedSubViewIntersection(
             cast<VectorTransferOpInterface>(xferOp.getOperation()), alloc);
         linalg_copy(memRefSubView, alloc);
-        Value casted = std_memref_cast(alloc, compatibleMemRefType);
+        Value casted = memref_cast(alloc, compatibleMemRefType);
         scf::ValueVector viewAndIndices{casted};
         viewAndIndices.insert(viewAndIndices.end(), xferOp.getTransferRank(),
                               zero);
@@ -2441,14 +2443,14 @@
 /// Produce IR resembling:
 /// ```
 ///    %1:3 = scf.if (%inBounds) {
-///      memref_cast %A: memref<A...> to compatibleMemRefType
+///      memref.cast %A: memref<A...> to compatibleMemRefType
 ///      scf.yield %view, ... : compatibleMemRefType, index, index
 ///    } else {
 ///      %2 = vector.transfer_read %view[...], %pad : memref<A...>, vector<...>
 ///      %3 = vector.type_cast %extra_alloc :
 ///        memref<...> to memref<vector<...>>
 ///      store %2, %3[] : memref<vector<...>>
-///      %4 = memref_cast %alloc: memref<B...> to compatibleMemRefType
+///      %4 = memref.cast %alloc: memref<B...> to compatibleMemRefType
 ///      scf.yield %4, ... : compatibleMemRefType, index, index
 ///   }
 /// ```
@@ -2466,7 +2468,7 @@
       [&]() -> scf::ValueVector {
         Value res = memref;
         if (compatibleMemRefType != xferOp.getShapedType())
-          res = std_memref_cast(memref, compatibleMemRefType);
+          res = memref_cast(memref, compatibleMemRefType);
         scf::ValueVector viewAndIndices{res};
         viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(),
                               xferOp.indices().end());
@@ -2476,10 +2478,10 @@
         Operation *newXfer =
             ScopedContext::getBuilderRef().clone(*xferOp.getOperation());
         Value vector = cast<VectorTransferOpInterface>(newXfer).vector();
-        std_store(vector, vector_type_cast(
-                              MemRefType::get({}, vector.getType()), alloc));
+        memref_store(vector, vector_type_cast(
+                                 MemRefType::get({}, vector.getType()), alloc));
 
-        Value casted = std_memref_cast(alloc, compatibleMemRefType);
+        Value casted = memref_cast(alloc, compatibleMemRefType);
         scf::ValueVector viewAndIndices{casted};
         viewAndIndices.insert(viewAndIndices.end(), xferOp.getTransferRank(),
                               zero);
@@ -2506,11 +2508,11 @@
 /// ```
 ///    %1:3 = scf.if (%inBounds) {
 ///      // fastpath, direct cast
-///      memref_cast %A: memref<A...> to compatibleMemRefType
+///      memref.cast %A: memref<A...> to compatibleMemRefType
 ///      scf.yield %view : compatibleMemRefType, index, index
 ///    } else {
 ///      // slowpath, masked vector.transfer or linalg.copy.
-///      memref_cast %alloc: memref<B...> to compatibleMemRefType
+///      memref.cast %alloc: memref<B...> to compatibleMemRefType
 ///      scf.yield %4 : compatibleMemRefType, index, index
 //     }
 ///    %0 = vector.transfer_read %1#0[%1#1, %1#2] {masked = [false ... false]}
@@ -2565,8 +2567,8 @@
     b.setInsertionPointToStart(&funcOp.getRegion().front());
     auto shape = xferOp.getVectorType().getShape();
     Type elementType = xferOp.getVectorType().getElementType();
-    alloc = std_alloca(MemRefType::get(shape, elementType), ValueRange{},
-                       b.getI64IntegerAttr(32));
+    alloc = memref_alloca(MemRefType::get(shape, elementType), ValueRange{},
+                          b.getI64IntegerAttr(32));
   }
 
   MemRefType compatibleMemRefType =
@@ -2780,8 +2782,8 @@
     if (!broadcastedDims.empty() &&
         unbroadcastedVectorType.getNumElements() == 1) {
       // If broadcasting is required and the number of loaded elements is 1 then
-      // we can create `std.load` instead of `vector.load`.
-      loadOp = rewriter.create<mlir::LoadOp>(read.getLoc(), read.source(),
+      // we can create `memref.load` instead of `vector.load`.
+      loadOp = rewriter.create<memref::LoadOp>(read.getLoc(), read.source(),
                                              read.indices());
     } else {
       // Otherwise create `vector.load`.
diff --git a/mlir/lib/Transforms/BufferDeallocation.cpp b/mlir/lib/Transforms/BufferDeallocation.cpp
--- a/mlir/lib/Transforms/BufferDeallocation.cpp
+++ b/mlir/lib/Transforms/BufferDeallocation.cpp
@@ -53,6 +53,7 @@
 
 #include "PassDetail.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Utils/Utils.h"
 #include "mlir/IR/Operation.h"
@@ -425,8 +426,8 @@
 
     // TODO: provide a generic interface to create dialect-specific
     // Alloc and CopyOp nodes.
-    auto alloc = builder.create<AllocOp>(terminator->getLoc(), memRefType,
-                                         dynamicOperands);
+    auto alloc = builder.create<memref::AllocOp>(terminator->getLoc(),
+                                                 memRefType, dynamicOperands);
 
     // Create a new copy operation that copies to contents of the old
     // allocation to the new one.
@@ -499,7 +500,7 @@
           continue;
         // If there is no dealloc node, insert one in the right place.
         OpBuilder builder(nextOp);
-        builder.create<DeallocOp>(alloc.getLoc(), alloc);
+        builder.create<memref::DeallocOp>(alloc.getLoc(), alloc);
       }
     }
   }
diff --git a/mlir/lib/Transforms/BufferOptimizations.cpp b/mlir/lib/Transforms/BufferOptimizations.cpp
--- a/mlir/lib/Transforms/BufferOptimizations.cpp
+++ b/mlir/lib/Transforms/BufferOptimizations.cpp
@@ -12,6 +12,7 @@
 // convert heap-based allocations to stack-based allocations, if possible.
 
 #include "PassDetail.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/Interfaces/LoopLikeInterface.h"
 #include "mlir/Pass/Pass.h"
@@ -33,7 +34,7 @@
                                 unsigned bitwidthOfIndexType,
                                 unsigned maxRankOfAllocatedMemRef) {
   auto type = alloc.getType().dyn_cast<ShapedType>();
-  if (!type || !alloc.getDefiningOp<AllocOp>())
+  if (!type || !alloc.getDefiningOp<memref::AllocOp>())
     return false;
   if (!type.hasStaticShape()) {
     // Check if the dynamic shape dimension of the alloc is produced by RankOp.
@@ -317,7 +318,7 @@
       // `AutomaticAllocationScope` determined during the initialization phase.
       OpBuilder builder(startOperation);
       Operation *allocOp = alloc.getDefiningOp();
-      Operation *alloca = builder.create<AllocaOp>(
+      Operation *alloca = builder.create<memref::AllocaOp>(
           alloc.getLoc(), alloc.getType().cast<MemRefType>(),
           allocOp->getOperands());
 
diff --git a/mlir/lib/Transforms/BufferResultsToOutParams.cpp b/mlir/lib/Transforms/BufferResultsToOutParams.cpp
--- a/mlir/lib/Transforms/BufferResultsToOutParams.cpp
+++ b/mlir/lib/Transforms/BufferResultsToOutParams.cpp
@@ -8,6 +8,7 @@
 
 #include "PassDetail.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/Pass/Pass.h"
@@ -99,7 +100,7 @@
         didFail = true;
         return;
       }
-      Value outParam = builder.create<AllocOp>(
+      Value outParam = builder.create<memref::AllocOp>(
           op.getLoc(), memref.getType().cast<MemRefType>());
       memref.replaceAllUsesWith(outParam);
       outParams.push_back(outParam);
diff --git a/mlir/lib/Transforms/Bufferize.cpp b/mlir/lib/Transforms/Bufferize.cpp
--- a/mlir/lib/Transforms/Bufferize.cpp
+++ b/mlir/lib/Transforms/Bufferize.cpp
@@ -8,6 +8,7 @@
 
 #include "mlir/Transforms/Bufferize.h"
 #include "PassDetail.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/Transforms/Passes.h"
 
@@ -21,7 +22,7 @@
                                    ValueRange inputs, Location loc) {
   assert(inputs.size() == 1);
   assert(inputs[0].getType().isa<BaseMemRefType>());
-  return builder.create<TensorLoadOp>(loc, type, inputs[0]);
+  return builder.create<memref::TensorLoadOp>(loc, type, inputs[0]);
 }
 
 /// Registers conversions into BufferizeTypeConverter
@@ -42,24 +43,24 @@
                               ValueRange inputs, Location loc) -> Value {
     assert(inputs.size() == 1);
     assert(inputs[0].getType().isa<TensorType>());
-    return builder.create<TensorToMemrefOp>(loc, type, inputs[0]);
+    return builder.create<memref::BufferCastOp>(loc, type, inputs[0]);
   });
 }
 
 void mlir::populateBufferizeMaterializationLegality(ConversionTarget &target) {
-  target.addLegalOp<TensorLoadOp, TensorToMemrefOp>();
+  target.addLegalOp<memref::TensorLoadOp, memref::BufferCastOp>();
 }
 
 namespace {
 // In a finalizing bufferize conversion, we know that all tensors have been
 // converted to memrefs, thus, this op becomes an identity.
-class BufferizeTensorLoadOp : public OpConversionPattern<TensorLoadOp> {
+class BufferizeTensorLoadOp : public OpConversionPattern<memref::TensorLoadOp> {
 public:
   using OpConversionPattern::OpConversionPattern;
   LogicalResult
-  matchAndRewrite(TensorLoadOp op, ArrayRef<Value> operands,
+  matchAndRewrite(memref::TensorLoadOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
-    TensorLoadOp::Adaptor adaptor(operands);
+    memref::TensorLoadOp::Adaptor adaptor(operands);
     rewriter.replaceOp(op, adaptor.memref());
     return success();
   }
@@ -69,13 +70,13 @@
 namespace {
 // In a finalizing bufferize conversion, we know that all tensors have been
 // converted to memrefs, thus, this op becomes an identity.
-class BufferizeTensorToMemrefOp : public OpConversionPattern<TensorToMemrefOp> {
+class BufferizeCastOp : public OpConversionPattern<memref::BufferCastOp> {
 public:
   using OpConversionPattern::OpConversionPattern;
   LogicalResult
-  matchAndRewrite(TensorToMemrefOp op, ArrayRef<Value> operands,
+  matchAndRewrite(memref::BufferCastOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
-    TensorToMemrefOp::Adaptor adaptor(operands);
+    memref::BufferCastOp::Adaptor adaptor(operands);
     rewriter.replaceOp(op, adaptor.tensor());
     return success();
   }
@@ -85,8 +86,8 @@
 void mlir::populateEliminateBufferizeMaterializationsPatterns(
     MLIRContext *context, BufferizeTypeConverter &typeConverter,
     OwningRewritePatternList &patterns) {
-  patterns.insert<BufferizeTensorLoadOp, BufferizeTensorToMemrefOp>(
-      typeConverter, context);
+  patterns.insert<BufferizeTensorLoadOp, BufferizeCastOp>(typeConverter,
+                                                          context);
 }
 
 namespace {
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -38,6 +38,7 @@
   MLIRCopyOpInterface
   MLIRLinalg
   MLIRLoopLikeInterface
+  MLIRMemRef
   MLIRSCF
   MLIRPass
   MLIRTransformUtils
diff --git a/mlir/lib/Transforms/Canonicalizer.cpp b/mlir/lib/Transforms/Canonicalizer.cpp
--- a/mlir/lib/Transforms/Canonicalizer.cpp
+++ b/mlir/lib/Transforms/Canonicalizer.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 #include "mlir/Transforms/Passes.h"
diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@@ -16,6 +16,7 @@
 #include "mlir/Analysis/LoopAnalysis.h"
 #include "mlir/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Builders.h"
@@ -958,7 +959,7 @@
   // consumer loop nests to reduce their live range. Currently they are added
   // at the beginning of the function, because loop nests can be reordered
   // during the fusion pass.
-  Value newMemRef = top.create<AllocOp>(forOp.getLoc(), newMemRefType);
+  Value newMemRef = top.create<memref::AllocOp>(forOp.getLoc(), newMemRefType);
 
   // Build an AffineMap to remap access functions based on lower bound offsets.
   SmallVector<AffineExpr, 4> remapExprs;
@@ -1937,7 +1938,7 @@
         continue;
       // Use list expected to match the dep graph info.
       auto *op = memref.getDefiningOp();
-      if (isa_and_nonnull<AllocOp>(op))
+      if (isa_and_nonnull<memref::AllocOp>(op))
         op->erase();
     }
   }
diff --git a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
--- a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
+++ b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
@@ -17,6 +17,7 @@
 #include "mlir/Analysis/AffineAnalysis.h"
 #include "mlir/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/Transforms/Passes.h"
@@ -168,7 +169,7 @@
 
   // Perform the actual store to load forwarding.
   Value storeVal =
-    cast<AffineWriteOpInterface>(lastWriteStoreOp).getValueToStore();
+      cast<AffineWriteOpInterface>(lastWriteStoreOp).getValueToStore();
   loadOp.getValue().replaceAllUsesWith(storeVal);
   // Record the memref for a later sweep to optimize away.
   memrefsToErase.insert(loadOp.getMemRef());
@@ -203,12 +204,12 @@
   for (auto memref : memrefsToErase) {
     // If the memref hasn't been alloc'ed in this function, skip.
     Operation *defOp = memref.getDefiningOp();
-    if (!defOp || !isa<AllocOp>(defOp))
+    if (!defOp || !isa<memref::AllocOp>(defOp))
       // TODO: if the memref was returned by a 'call' operation, we
       // could still erase it if the call had no side-effects.
       continue;
     if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) {
-          return !isa<AffineWriteOpInterface, DeallocOp>(ownerOp);
+          return !isa<AffineWriteOpInterface, memref::DeallocOp>(ownerOp);
         }))
       continue;
 
diff --git a/mlir/lib/Transforms/NormalizeMemRefs.cpp b/mlir/lib/Transforms/NormalizeMemRefs.cpp
--- a/mlir/lib/Transforms/NormalizeMemRefs.cpp
+++ b/mlir/lib/Transforms/NormalizeMemRefs.cpp
@@ -13,6 +13,7 @@
 
 #include "PassDetail.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Transforms/Passes.h"
 #include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/SmallSet.h"
@@ -152,7 +153,7 @@
     return true;
 
   if (funcOp
-          .walk([&](AllocOp allocOp) -> WalkResult {
+          .walk([&](memref::AllocOp allocOp) -> WalkResult {
             Value oldMemRef = allocOp.getResult();
             if (!isMemRefNormalizable(oldMemRef.getUsers()))
               return WalkResult::interrupt();
@@ -326,10 +327,10 @@
   // Turn memrefs' non-identity layouts maps into ones with identity. Collect
   // alloc ops first and then process since normalizeMemRef replaces/erases ops
   // during memref rewriting.
-  SmallVector<AllocOp, 4> allocOps;
-  funcOp.walk([&](AllocOp op) { allocOps.push_back(op); });
-  for (AllocOp allocOp : allocOps)
-    (void)normalizeMemRef(allocOp);
+  SmallVector<memref::AllocOp, 4> allocOps;
+  funcOp.walk([&](memref::AllocOp op) { allocOps.push_back(op); });
+  for (memref::AllocOp allocOp : allocOps)
+    (void)normalizeMemRef(&allocOp);
 
   // We use this OpBuilder to create new memref layout later.
   OpBuilder b(funcOp);
diff --git a/mlir/lib/Transforms/PassDetail.h b/mlir/lib/Transforms/PassDetail.h
--- a/mlir/lib/Transforms/PassDetail.h
+++ b/mlir/lib/Transforms/PassDetail.h
@@ -20,6 +20,10 @@
 class LinalgDialect;
 } // end namespace linalg
 
+namespace memref {
+class MemRefDialect;
+} // end namespace memref
+
 #define GEN_PASS_CLASSES
 #include "mlir/Transforms/Passes.h.inc"
 
diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp
--- a/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp
@@ -17,6 +17,7 @@
 #include "mlir/Analysis/LoopAnalysis.h"
 #include "mlir/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/Utils/Utils.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/Transforms/LoopUtils.h"
@@ -88,8 +89,8 @@
   auto allocOperands = getDynOperands(forOp.getLoc(), oldMemRef, bOuter);
 
   // Create and place the alloc right before the 'affine.for' operation.
-  Value newMemRef =
-      bOuter.create<AllocOp>(forOp.getLoc(), newMemRefType, allocOperands);
+  Value newMemRef = bOuter.create<memref::AllocOp>(
+      forOp.getLoc(), newMemRefType, allocOperands);
 
   // Create 'iv mod 2' value to index the leading dimension.
   auto d0 = bInner.getAffineDimExpr(0);
@@ -115,7 +116,7 @@
   }
   // Insert the dealloc op right after the for loop.
   bOuter.setInsertionPointAfter(forOp);
-  bOuter.create<DeallocOp>(forOp.getLoc(), newMemRef);
+  bOuter.create<memref::DeallocOp>(forOp.getLoc(), newMemRef);
 
   return true;
 }
@@ -201,7 +202,7 @@
     bool escapingUses = false;
     for (auto *user : memref.getUsers()) {
       // We can double buffer regardless of dealloc's outside the loop.
-      if (isa<DeallocOp>(user))
+      if (isa<memref::DeallocOp>(user))
         continue;
       if (!forOp.getBody()->findAncestorOpInBlock(*user)) {
         LLVM_DEBUG(llvm::dbgs()
@@ -274,7 +275,8 @@
       if (oldMemRef.use_empty()) {
         allocOp->erase();
       } else if (oldMemRef.hasOneUse()) {
-        if (auto dealloc = dyn_cast<DeallocOp>(*oldMemRef.user_begin())) {
+        if (auto dealloc =
+                dyn_cast<memref::DeallocOp>(*oldMemRef.user_begin())) {
           dealloc.erase();
           allocOp->erase();
         }
@@ -296,7 +298,8 @@
       if (oldTagMemRef.use_empty()) {
         tagAllocOp->erase();
       } else if (oldTagMemRef.hasOneUse()) {
-        if (auto dealloc = dyn_cast<DeallocOp>(*oldTagMemRef.user_begin())) {
+        if (auto dealloc =
+                dyn_cast<memref::DeallocOp>(*oldTagMemRef.user_begin())) {
           dealloc.erase();
           tagAllocOp->erase();
         }
diff --git a/mlir/lib/Transforms/Utils/CMakeLists.txt b/mlir/lib/Transforms/Utils/CMakeLists.txt
--- a/mlir/lib/Transforms/Utils/CMakeLists.txt
+++ b/mlir/lib/Transforms/Utils/CMakeLists.txt
@@ -18,6 +18,7 @@
   MLIRAffine
   MLIRAnalysis
   MLIRLoopAnalysis
+  MLIRMemRef
   MLIRSCF
   MLIRPass
   MLIRRewrite
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -18,6 +18,7 @@
 #include "mlir/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/BlockAndValueMapping.h"
@@ -2487,7 +2488,8 @@
 
     // Create the fast memory space buffer just before the 'affine.for'
     // operation.
-    fastMemRef = prologue.create<AllocOp>(loc, fastMemRefType).getResult();
+    fastMemRef =
+        prologue.create<memref::AllocOp>(loc, fastMemRefType).getResult();
     // Record it.
     fastBufferMap[memref] = fastMemRef;
     // fastMemRefType is a constant shaped memref.
@@ -2557,7 +2559,7 @@
     // Create a tag (single element 1-d memref) for the DMA.
     auto tagMemRefType = MemRefType::get({1}, top.getIntegerType(32), {},
                                          copyOptions.tagMemorySpace);
-    auto tagMemRef = prologue.create<AllocOp>(loc, tagMemRefType);
+    auto tagMemRef = prologue.create<memref::AllocOp>(loc, tagMemRefType);
 
     SmallVector<Value, 4> tagIndices({zeroIndex});
     auto tagAffineMap = b.getMultiDimIdentityMap(tagIndices.size());
@@ -2585,7 +2587,7 @@
                               numElementsSSA);
 
     // Generate dealloc for the tag.
-    auto tagDeallocOp = epilogue.create<DeallocOp>(loc, tagMemRef);
+    auto tagDeallocOp = epilogue.create<memref::DeallocOp>(loc, tagMemRef);
     if (*nEnd == end && isCopyOutAtEndOfBlock)
       // Since new ops are being appended (for outgoing DMAs), adjust the end to
       // mark end of range of the original.
@@ -2594,7 +2596,7 @@
 
   // Generate dealloc for the buffer.
   if (!existingBuf) {
-    auto bufDeallocOp = epilogue.create<DeallocOp>(loc, fastMemRef);
+    auto bufDeallocOp = epilogue.create<memref::DeallocOp>(loc, fastMemRef);
     // When generating pointwise copies, `nEnd' has to be set to deallocOp on
     // the fast buffer (since it marks the new end insertion point).
     if (!copyOptions.generateDma && *nEnd == end && isCopyOutAtEndOfBlock)
diff --git a/mlir/lib/Transforms/Utils/Utils.cpp b/mlir/lib/Transforms/Utils/Utils.cpp
--- a/mlir/lib/Transforms/Utils/Utils.cpp
+++ b/mlir/lib/Transforms/Utils/Utils.cpp
@@ -17,6 +17,7 @@
 #include "mlir/Analysis/AffineStructures.h"
 #include "mlir/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Dominance.h"
@@ -253,7 +254,7 @@
 
     // Skip dealloc's - no replacement is necessary, and a memref replacement
     // at other uses doesn't hurt these dealloc's.
-    if (isa<DeallocOp>(op) && !replaceInDeallocOp)
+    if (isa<memref::DeallocOp>(op) && !replaceInDeallocOp)
       continue;
 
     // Check if the memref was used in a non-dereferencing context. It is fine
@@ -380,24 +381,24 @@
 }
 
 // TODO: Currently works for static memrefs with a single layout map.
-LogicalResult mlir::normalizeMemRef(AllocOp allocOp) {
-  MemRefType memrefType = allocOp.getType();
-  OpBuilder b(allocOp);
+LogicalResult mlir::normalizeMemRef(memref::AllocOp *allocOp) {
+  MemRefType memrefType = allocOp->getType();
+  OpBuilder b(*allocOp);
 
   // Fetch a new memref type after normalizing the old memref to have an
   // identity map layout.
   MemRefType newMemRefType =
-      normalizeMemRefType(memrefType, b, allocOp.symbolOperands().size());
+      normalizeMemRefType(memrefType, b, allocOp->symbolOperands().size());
   if (newMemRefType == memrefType)
     // Either memrefType already had an identity map or the map couldn't be
     // transformed to an identity map.
     return failure();
 
-  Value oldMemRef = allocOp.getResult();
+  Value oldMemRef = allocOp->getResult();
 
-  SmallVector<Value, 4> symbolOperands(allocOp.symbolOperands());
-  AllocOp newAlloc = b.create<AllocOp>(allocOp.getLoc(), newMemRefType,
-                                       allocOp.alignmentAttr());
+  SmallVector<Value, 4> symbolOperands(allocOp->symbolOperands());
+  memref::AllocOp newAlloc = b.create<memref::AllocOp>(
+      allocOp->getLoc(), newMemRefType, allocOp->alignmentAttr());
   AffineMap layoutMap = memrefType.getAffineMaps().front();
   // Replace all uses of the old memref.
   if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
@@ -414,10 +415,11 @@
   }
   // Replace any uses of the original alloc op and erase it. All remaining uses
   // have to be dealloc's; RAMUW above would've failed otherwise.
-  assert(llvm::all_of(oldMemRef.getUsers(),
-                      [](Operation *op) { return isa<DeallocOp>(op); }));
+  assert(llvm::all_of(oldMemRef.getUsers(), [](Operation *op) {
+    return isa<memref::DeallocOp>(op);
+  }));
   oldMemRef.replaceAllUsesWith(newAlloc);
-  allocOp.erase();
+  allocOp->erase();
   return success();
 }
 
diff --git a/mlir/test/Analysis/test-alias-analysis.mlir b/mlir/test/Analysis/test-alias-analysis.mlir
--- a/mlir/test/Analysis/test-alias-analysis.mlir
+++ b/mlir/test/Analysis/test-alias-analysis.mlir
@@ -23,10 +23,10 @@
 // CHECK-DAG: alloc_2#0 <-> func.region0#0: MayAlias
 // CHECK-DAG: alloc_2#0 <-> func.region0#1: MayAlias
 func @simple(%arg: memref<2xf32>, %arg1: memref<2xf32>) attributes {test.ptr = "func"} {
-  %0 = alloca() {test.ptr = "alloca_1"} : memref<8x64xf32>
-  %1 = alloca() {test.ptr = "alloca_2"} : memref<8x64xf32>
-  %2 = alloc() {test.ptr = "alloc_1"} : memref<8x64xf32>
-  %3 = alloc() {test.ptr = "alloc_2"} : memref<8x64xf32>
+  %0 = memref.alloca() {test.ptr = "alloca_1"} : memref<8x64xf32>
+  %1 = memref.alloca() {test.ptr = "alloca_2"} : memref<8x64xf32>
+  %2 = memref.alloc() {test.ptr = "alloc_1"} : memref<8x64xf32>
+  %3 = memref.alloc() {test.ptr = "alloc_2"} : memref<8x64xf32>
   return
 }
 
@@ -50,9 +50,9 @@
 
 // CHECK-DAG: func.region0.block1#0 <-> func.region0.block2#0: MustAlias
 func @control_flow(%arg: memref<2xf32>, %cond: i1) attributes {test.ptr = "func"} {
-  %0 = alloca() {test.ptr = "alloca_1"} : memref<8x64xf32>
-  %1 = alloca() {test.ptr = "alloca_2"} : memref<8x64xf32>
-  %2 = alloc() {test.ptr = "alloc_1"} : memref<8x64xf32>
+  %0 = memref.alloca() {test.ptr = "alloca_1"} : memref<8x64xf32>
+  %1 = memref.alloca() {test.ptr = "alloca_2"} : memref<8x64xf32>
+  %2 = memref.alloc() {test.ptr = "alloc_1"} : memref<8x64xf32>
 
   cond_br %cond, ^bb1(%0 : memref<8x64xf32>), ^bb2(%0 : memref<8x64xf32>)
 
@@ -83,9 +83,9 @@
 
 // CHECK-DAG: func.region0.block1#0 <-> func.region0.block2#0: MayAlias
 func @control_flow_merge(%arg: memref<2xf32>, %cond: i1) attributes {test.ptr = "func"} {
-  %0 = alloca() {test.ptr = "alloca_1"} : memref<8x64xf32>
-  %1 = alloca() {test.ptr = "alloca_2"} : memref<8x64xf32>
-  %2 = alloc() {test.ptr = "alloc_1"} : memref<8x64xf32>
+  %0 = memref.alloca() {test.ptr = "alloca_1"} : memref<8x64xf32>
+  %1 = memref.alloca() {test.ptr = "alloca_2"} : memref<8x64xf32>
+  %2 = memref.alloc() {test.ptr = "alloc_1"} : memref<8x64xf32>
 
   cond_br %cond, ^bb1(%0 : memref<8x64xf32>), ^bb2(%2 : memref<8x64xf32>)
 
@@ -123,9 +123,9 @@
 // CHECK-DAG: if_alloc#0 <-> func.region0#0: MayAlias
 // CHECK-DAG: if_alloc#0 <-> func.region0#1: MayAlias
 func @region_control_flow(%arg: memref<2xf32>, %cond: i1) attributes {test.ptr = "func"} {
-  %0 = alloca() {test.ptr = "alloca_1"} : memref<8x64xf32>
-  %1 = alloca() {test.ptr = "alloca_2"} : memref<8x64xf32>
-  %2 = alloc() {test.ptr = "alloc_1"} : memref<8x64xf32>
+  %0 = memref.alloca() {test.ptr = "alloca_1"} : memref<8x64xf32>
+  %1 = memref.alloca() {test.ptr = "alloca_2"} : memref<8x64xf32>
+  %2 = memref.alloc() {test.ptr = "alloc_1"} : memref<8x64xf32>
 
   %3 = scf.if %cond -> (memref<8x64xf32>) {
     scf.yield %0 : memref<8x64xf32>
@@ -181,9 +181,9 @@
 // CHECK-DAG: for_alloca.region0#1 <-> func.region0#3: NoAlias
 func @region_loop_control_flow(%arg: memref<2xf32>, %loopI0 : index,
                                %loopI1 : index, %loopI2 : index) attributes {test.ptr = "func"} {
-  %0 = alloca() {test.ptr = "alloca_1"} : memref<8x64xf32>
-  %1 = alloca() {test.ptr = "alloca_2"} : memref<8x64xf32>
-  %2 = alloc() {test.ptr = "alloc_1"} : memref<8x64xf32>
+  %0 = memref.alloca() {test.ptr = "alloca_1"} : memref<8x64xf32>
+  %1 = memref.alloca() {test.ptr = "alloca_2"} : memref<8x64xf32>
+  %2 = memref.alloc() {test.ptr = "alloc_1"} : memref<8x64xf32>
 
   %result = scf.for %i0 = %loopI0 to %loopI1 step %loopI2 iter_args(%si = %0) -> (memref<8x64xf32>) {
     scf.yield %si : memref<8x64xf32>
@@ -201,11 +201,11 @@
 // CHECK-DAG: view#0 <-> func.region0#0: NoAlias
 // CHECK-DAG: view#0 <-> func.region0#1: NoAlias
 func @view_like(%arg: memref<2xf32>, %size: index) attributes {test.ptr = "func"} {
-  %1 = alloc() {test.ptr = "alloc_1"} : memref<8x64xf32>
+  %1 = memref.alloc() {test.ptr = "alloc_1"} : memref<8x64xf32>
 
   %c0 = constant 0 : index
-  %2 = alloca (%size) {test.ptr = "alloca_1"} : memref<?xi8>
-  %3 = view %2[%c0][] {test.ptr = "view"} : memref<?xi8> to memref<8x64xf32>
+  %2 = memref.alloca (%size) {test.ptr = "alloca_1"} : memref<?xi8>
+  %3 = memref.view %2[%c0][] {test.ptr = "view"} : memref<?xi8> to memref<8x64xf32>
   return
 }
 
@@ -225,7 +225,7 @@
 
 // CHECK-DAG: constant_3#0 <-> func.region0#0: MayAlias
 func @constants(%arg: memref<2xf32>) attributes {test.ptr = "func"} {
-  %1 = alloc() {test.ptr = "alloc_1"} : memref<8x64xf32>
+  %1 = memref.alloc() {test.ptr = "alloc_1"} : memref<8x64xf32>
 
   %c0 = constant {test.ptr = "constant_1"} 0 : index
   %c0_2 = constant {test.ptr = "constant_2"} 0 : index
diff --git a/mlir/test/Analysis/test-liveness.mlir b/mlir/test/Analysis/test-liveness.mlir
--- a/mlir/test/Analysis/test-liveness.mlir
+++ b/mlir/test/Analysis/test-liveness.mlir
@@ -221,7 +221,7 @@
     // CHECK-NEXT: LiveOut:{{ *$}}
     %2 = addi %0, %arg5 : i32
     %3 = addi %2, %0 : i32
-    store %3, %buffer[] : memref<i32>
+    memref.store %3, %buffer[] : memref<i32>
   }
   return %1 : i32
 }
@@ -265,7 +265,7 @@
     %2 = addi %0, %arg5 : i32
     scf.for %arg7 = %arg0 to %arg1 step %arg2 {
       %3 = addi %2, %0 : i32
-      store %3, %buffer[] : memref<i32>
+      memref.store %3, %buffer[] : memref<i32>
     }
   }
   return %1 : i32
@@ -299,7 +299,7 @@
     // CHECK-NEXT: LiveIn: arg5@0 arg6@0 val_7
     // CHECK-NEXT: LiveOut:{{ *$}}
     %2 = addi %0, %arg5 : i32
-    store %2, %buffer[] : memref<i32>
+    memref.store %2, %buffer[] : memref<i32>
   }
   br ^exit
 
@@ -312,7 +312,7 @@
     // CHECK-NEXT: LiveIn: arg6@0 val_7 val_8
     // CHECK-NEXT: LiveOut:{{ *$}}
     %2 = addi %0, %1 : i32
-    store %2, %buffer[] : memref<i32>
+    memref.store %2, %buffer[] : memref<i32>
   }
   return %1 : i32
 }
diff --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c
--- a/mlir/test/CAPI/ir.c
+++ b/mlir/test/CAPI/ir.c
@@ -35,7 +35,7 @@
       mlirTypeParseGet(ctx, mlirStringRefCreateFromCString("f32"));
 
   MlirOperationState loadLHSState = mlirOperationStateGet(
-      mlirStringRefCreateFromCString("std.load"), location);
+      mlirStringRefCreateFromCString("memref.load"), location);
   MlirValue loadLHSOperands[] = {funcArg0, iv};
   mlirOperationStateAddOperands(&loadLHSState, 2, loadLHSOperands);
   mlirOperationStateAddResults(&loadLHSState, 1, &f32Type);
@@ -43,7 +43,7 @@
   mlirBlockAppendOwnedOperation(loopBody, loadLHS);
 
   MlirOperationState loadRHSState = mlirOperationStateGet(
-      mlirStringRefCreateFromCString("std.load"), location);
+      mlirStringRefCreateFromCString("memref.load"), location);
   MlirValue loadRHSOperands[] = {funcArg1, iv};
   mlirOperationStateAddOperands(&loadRHSState, 2, loadRHSOperands);
   mlirOperationStateAddResults(&loadRHSState, 1, &f32Type);
@@ -60,7 +60,7 @@
   mlirBlockAppendOwnedOperation(loopBody, add);
 
   MlirOperationState storeState = mlirOperationStateGet(
-      mlirStringRefCreateFromCString("std.store"), location);
+      mlirStringRefCreateFromCString("memref.store"), location);
   MlirValue storeOperands[] = {mlirOperationGetResult(add, 0), funcArg0, iv};
   mlirOperationStateAddOperands(&storeState, 3, storeOperands);
   MlirOperation store = mlirOperationCreate(&storeState);
@@ -121,7 +121,7 @@
   MlirValue constZeroValue = mlirOperationGetResult(constZero, 0);
   MlirValue dimOperands[] = {funcArg0, constZeroValue};
   MlirOperationState dimState = mlirOperationStateGet(
-      mlirStringRefCreateFromCString("std.dim"), location);
+      mlirStringRefCreateFromCString("memref.dim"), location);
   mlirOperationStateAddOperands(&dimState, 2, dimOperands);
   mlirOperationStateAddResults(&dimState, 1, &indexType);
   MlirOperation dim = mlirOperationCreate(&dimState);
@@ -167,13 +167,13 @@
   // CHECK: module {
   // CHECK:   func @add(%[[ARG0:.*]]: memref<?xf32>, %[[ARG1:.*]]: memref<?xf32>) {
   // CHECK:     %[[C0:.*]] = constant 0 : index
-  // CHECK:     %[[DIM:.*]] = dim %[[ARG0]], %[[C0]] : memref<?xf32>
+  // CHECK:     %[[DIM:.*]] = memref.dim %[[ARG0]], %[[C0]] : memref<?xf32>
   // CHECK:     %[[C1:.*]] = constant 1 : index
   // CHECK:     scf.for %[[I:.*]] = %[[C0]] to %[[DIM]] step %[[C1]] {
-  // CHECK:       %[[LHS:.*]] = load %[[ARG0]][%[[I]]] : memref<?xf32>
-  // CHECK:       %[[RHS:.*]] = load %[[ARG1]][%[[I]]] : memref<?xf32>
+  // CHECK:       %[[LHS:.*]] = memref.load %[[ARG0]][%[[I]]] : memref<?xf32>
+  // CHECK:       %[[RHS:.*]] = memref.load %[[ARG1]][%[[I]]] : memref<?xf32>
   // CHECK:       %[[SUM:.*]] = addf %[[LHS]], %[[RHS]] : f32
-  // CHECK:       store %[[SUM]], %[[ARG0]][%[[I]]] : memref<?xf32>
+  // CHECK:       memref.store %[[SUM]], %[[ARG0]][%[[I]]] : memref<?xf32>
   // CHECK:     }
   // CHECK:     return
   // CHECK:   }
@@ -330,7 +330,7 @@
   // CHECK: Block eq: 1
 
   // In the module we created, the first operation of the first function is
-  // an "std.dim", which has an attribute and a single result that we can
+  // an "memref.dim", which has an attribute and a single result that we can
   // use to test the printing mechanism.
   mlirBlockPrint(block, printToStderr, NULL);
   fprintf(stderr, "\n");
@@ -339,13 +339,13 @@
   fprintf(stderr, "\n");
   // clang-format off
   // CHECK:   %[[C0:.*]] = constant 0 : index
-  // CHECK:   %[[DIM:.*]] = dim %{{.*}}, %[[C0]] : memref<?xf32>
+  // CHECK:   %[[DIM:.*]] = memref.dim %{{.*}}, %[[C0]] : memref<?xf32>
   // CHECK:   %[[C1:.*]] = constant 1 : index
   // CHECK:   scf.for %[[I:.*]] = %[[C0]] to %[[DIM]] step %[[C1]] {
-  // CHECK:     %[[LHS:.*]] = load %{{.*}}[%[[I]]] : memref<?xf32>
-  // CHECK:     %[[RHS:.*]] = load %{{.*}}[%[[I]]] : memref<?xf32>
+  // CHECK:     %[[LHS:.*]] = memref.load %{{.*}}[%[[I]]] : memref<?xf32>
+  // CHECK:     %[[RHS:.*]] = memref.load %{{.*}}[%[[I]]] : memref<?xf32>
   // CHECK:     %[[SUM:.*]] = addf %[[LHS]], %[[RHS]] : f32
-  // CHECK:     store %[[SUM]], %{{.*}}[%[[I]]] : memref<?xf32>
+  // CHECK:     memref.store %[[SUM]], %{{.*}}[%[[I]]] : memref<?xf32>
   // CHECK:   }
   // CHECK: return
   // CHECK: First operation: {{.*}} = constant 0 : index
@@ -1453,13 +1453,15 @@
   mlirContextSetAllowUnregisteredDialects(ctx, true);
   MlirLocation loc = mlirLocationUnknownGet(ctx);
 
-  MlirOperationState opState = mlirOperationStateGet(mlirStringRefCreateFromCString("invalid.op"), loc);
+  MlirOperationState opState =
+      mlirOperationStateGet(mlirStringRefCreateFromCString("invalid.op"), loc);
   MlirRegion region = mlirRegionCreate();
   MlirBlock block = mlirBlockCreate(0, NULL);
   mlirRegionAppendOwnedBlock(region, block);
   mlirOperationStateAddOwnedRegions(&opState, 1, &region);
   MlirOperation op = mlirOperationCreate(&opState);
-  MlirIdentifier ident = mlirIdentifierGet(ctx, mlirStringRefCreateFromCString("identifier"));
+  MlirIdentifier ident =
+      mlirIdentifierGet(ctx, mlirStringRefCreateFromCString("identifier"));
 
   if (!mlirContextEqual(ctx, mlirOperationGetContext(op))) {
     fprintf(stderr, "ERROR: Getting context from operation failed\n");
@@ -1473,7 +1475,7 @@
     fprintf(stderr, "ERROR: Getting context from identifier failed\n");
     return 3;
   }
-  
+
   mlirOperationDestroy(op);
   mlirContextDestroy(ctx);
 
diff --git a/mlir/test/Conversion/AffineToStandard/lower-affine-gpu.mlir b/mlir/test/Conversion/AffineToStandard/lower-affine-gpu.mlir
--- a/mlir/test/Conversion/AffineToStandard/lower-affine-gpu.mlir
+++ b/mlir/test/Conversion/AffineToStandard/lower-affine-gpu.mlir
@@ -4,12 +4,12 @@
 gpu.module @kernels {
   gpu.func @foo(%arg0 : index, %arg1 : memref<?xf32>) -> f32 {
     %0 = affine.apply #map0gpufunc(%arg0)
-    %1 = load %arg1[%0] : memref<?xf32>
+    %1 = memref.load %arg1[%0] : memref<?xf32>
     gpu.return %1 : f32
   }
 
 //      CHECK: gpu.func
 // CHECK-SAME: %[[ARG0:.*]]: index
 //  CHECK-NOT:   affine.apply
-//      CHECK:   load %{{.*}}[%[[ARG0]]]
+//      CHECK:   memref.load %{{.*}}[%[[ARG0]]]
 }
diff --git a/mlir/test/Conversion/AffineToStandard/lower-affine-to-vector.mlir b/mlir/test/Conversion/AffineToStandard/lower-affine-to-vector.mlir
--- a/mlir/test/Conversion/AffineToStandard/lower-affine-to-vector.mlir
+++ b/mlir/test/Conversion/AffineToStandard/lower-affine-to-vector.mlir
@@ -1,13 +1,12 @@
 // RUN: mlir-opt -lower-affine --split-input-file %s | FileCheck %s
 
-
 // CHECK-LABEL: func @affine_vector_load
 func @affine_vector_load(%arg0 : index) {
-  %0 = alloc() : memref<100xf32>
+  %0 = memref.alloc() : memref<100xf32>
   affine.for %i0 = 0 to 16 {
     %1 = affine.vector_load %0[%i0 + symbol(%arg0) + 7] : memref<100xf32>, vector<8xf32>
   }
-// CHECK:       %[[buf:.*]] = alloc
+// CHECK:       %[[buf:.*]] = memref.alloc
 // CHECK:       %[[a:.*]] = addi %{{.*}}, %{{.*}} : index
 // CHECK-NEXT:  %[[c7:.*]] = constant 7 : index
 // CHECK-NEXT:  %[[b:.*]] = addi %[[a]], %[[c7]] : index
@@ -19,12 +18,12 @@
 
 // CHECK-LABEL: func @affine_vector_store
 func @affine_vector_store(%arg0 : index) {
-  %0 = alloc() : memref<100xf32>
+  %0 = memref.alloc() : memref<100xf32>
   %1 = constant dense<11.0> : vector<4xf32>
   affine.for %i0 = 0 to 16 {
     affine.vector_store %1, %0[%i0 - symbol(%arg0) + 7] : memref<100xf32>, vector<4xf32>
 }
-// CHECK:       %[[buf:.*]] = alloc
+// CHECK:       %[[buf:.*]] = memref.alloc
 // CHECK:       %[[val:.*]] = constant dense
 // CHECK:       %[[c_1:.*]] = constant -1 : index
 // CHECK-NEXT:  %[[a:.*]] = muli %arg0, %[[c_1]] : index
@@ -39,11 +38,11 @@
 
 // CHECK-LABEL: func @vector_load_2d
 func @vector_load_2d() {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   affine.for %i0 = 0 to 16 step 2{
     affine.for %i1 = 0 to 16 step 8 {
       %1 = affine.vector_load %0[%i0, %i1] : memref<100x100xf32>, vector<2x8xf32>
-// CHECK:      %[[buf:.*]] = alloc
+// CHECK:      %[[buf:.*]] = memref.alloc
 // CHECK:      scf.for %[[i0:.*]] =
 // CHECK:        scf.for %[[i1:.*]] =
 // CHECK-NEXT:     vector.load %[[buf]][%[[i0]], %[[i1]]] : memref<100x100xf32>, vector<2x8xf32>
@@ -56,12 +55,12 @@
 
 // CHECK-LABEL: func @vector_store_2d
 func @vector_store_2d() {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   %1 = constant dense<11.0> : vector<2x8xf32>
   affine.for %i0 = 0 to 16 step 2{
     affine.for %i1 = 0 to 16 step 8 {
       affine.vector_store %1, %0[%i0, %i1] : memref<100x100xf32>, vector<2x8xf32>
-// CHECK:      %[[buf:.*]] = alloc
+// CHECK:      %[[buf:.*]] = memref.alloc
 // CHECK:      %[[val:.*]] = constant dense
 // CHECK:      scf.for %[[i0:.*]] =
 // CHECK:        scf.for %[[i1:.*]] =
diff --git a/mlir/test/Conversion/AffineToStandard/lower-affine.mlir b/mlir/test/Conversion/AffineToStandard/lower-affine.mlir
--- a/mlir/test/Conversion/AffineToStandard/lower-affine.mlir
+++ b/mlir/test/Conversion/AffineToStandard/lower-affine.mlir
@@ -42,7 +42,7 @@
 // CHECK-NEXT:    %[[UPPER:.*]] = constant 10 : index
 // CHECK-NEXT:    %[[STEP:.*]] = constant 2 : index
 // CHECK-NEXT:    %[[SUM:.*]] = scf.for %[[IV:.*]] = %[[LOWER]] to %[[UPPER]] step %[[STEP]] iter_args(%[[SUM_ITER:.*]] = %[[INIT_SUM]]) -> (f32) {
-// CHECK-NEXT:      load
+// CHECK-NEXT:      memref.load
 // CHECK-NEXT:      %[[SUM_NEXT:.*]] = addf
 // CHECK-NEXT:      scf.yield %[[SUM_NEXT]] : f32
 // CHECK-NEXT:    }
@@ -533,20 +533,20 @@
 
 // CHECK-LABEL: func @affine_load
 func @affine_load(%arg0 : index) {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   affine.for %i0 = 0 to 10 {
     %1 = affine.load %0[%i0 + symbol(%arg0) + 7] : memref<10xf32>
   }
 // CHECK:       %[[a:.*]] = addi %{{.*}}, %{{.*}} : index
 // CHECK-NEXT:  %[[c7:.*]] = constant 7 : index
 // CHECK-NEXT:  %[[b:.*]] = addi %[[a]], %[[c7]] : index
-// CHECK-NEXT:  %{{.*}} = load %[[v0:.*]][%[[b]]] : memref<10xf32>
+// CHECK-NEXT:  %{{.*}} = memref.load %[[v0:.*]][%[[b]]] : memref<10xf32>
   return
 }
 
 // CHECK-LABEL: func @affine_store
 func @affine_store(%arg0 : index) {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   %1 = constant 11.0 : f32
   affine.for %i0 = 0 to 10 {
     affine.store %1, %0[%i0 - symbol(%arg0) + 7] : memref<10xf32>
@@ -564,29 +564,29 @@
 func @affine_load_store_zero_dim(%arg0 : memref<i32>, %arg1 : memref<i32>) {
   %0 = affine.load %arg0[] : memref<i32>
   affine.store %0, %arg1[] : memref<i32>
-// CHECK: %[[x:.*]] = load %arg0[] : memref<i32>
+// CHECK: %[[x:.*]] = memref.load %arg0[] : memref<i32>
 // CHECK: store %[[x]], %arg1[] : memref<i32>
   return
 }
 
 // CHECK-LABEL: func @affine_prefetch
 func @affine_prefetch(%arg0 : index) {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   affine.for %i0 = 0 to 10 {
     affine.prefetch %0[%i0 + symbol(%arg0) + 7], read, locality<3>, data : memref<10xf32>
   }
 // CHECK:       %[[a:.*]] = addi %{{.*}}, %{{.*}} : index
 // CHECK-NEXT:  %[[c7:.*]] = constant 7 : index
 // CHECK-NEXT:  %[[b:.*]] = addi %[[a]], %[[c7]] : index
-// CHECK-NEXT:  prefetch %[[v0:.*]][%[[b]]], read, locality<3>, data : memref<10xf32>
+// CHECK-NEXT:  memref.prefetch %[[v0:.*]][%[[b]]], read, locality<3>, data : memref<10xf32>
   return
 }
 
 // CHECK-LABEL: func @affine_dma_start
 func @affine_dma_start(%arg0 : index) {
-  %0 = alloc() : memref<100xf32>
-  %1 = alloc() : memref<100xf32, 2>
-  %2 = alloc() : memref<1xi32>
+  %0 = memref.alloc() : memref<100xf32>
+  %1 = memref.alloc() : memref<100xf32, 2>
+  %2 = memref.alloc() : memref<1xi32>
   %c0 = constant 0 : index
   %c64 = constant 64 : index
   affine.for %i0 = 0 to 10 {
@@ -603,7 +603,7 @@
 
 // CHECK-LABEL: func @affine_dma_wait
 func @affine_dma_wait(%arg0 : index) {
-  %2 = alloc() : memref<1xi32>
+  %2 = memref.alloc() : memref<1xi32>
   %c64 = constant 64 : index
   affine.for %i0 = 0 to 10 {
     affine.dma_wait %2[%i0 + %arg0 + 17], %c64 : memref<1xi32>
@@ -694,15 +694,15 @@
 // CHECK-DAG:       %[[C1_9:.*]] = constant 1
 // CHECK-DAG:       %[[C1_10:.*]] = constant 1
 // CHECK:           scf.parallel (%[[arg6:.*]], %[[arg7:.*]], %[[arg8:.*]]) = (%[[arg3]], %[[arg4]], %[[arg5]]) to (%[[A0]], %[[A1]], %[[A2]]) step (%[[C1]], %[[C1_9]], %[[C1_10]]) {
-// CHECK:             %[[A3:.*]] = load %[[ARG1]][%[[arg6]], %[[arg8]]] : memref<100x100xf32>
-// CHECK:             %[[A4:.*]] = load %[[ARG2]][%[[arg8]], %[[arg7]]] : memref<100x100xf32>
+// CHECK:             %[[A3:.*]] = memref.load %[[ARG1]][%[[arg6]], %[[arg8]]] : memref<100x100xf32>
+// CHECK:             %[[A4:.*]] = memref.load %[[ARG2]][%[[arg8]], %[[arg7]]] : memref<100x100xf32>
 // CHECK:             mulf %[[A3]], %[[A4]] : f32
 // CHECK:             scf.yield
 
 /////////////////////////////////////////////////////////////////////
 
 func @affine_parallel_simple(%arg0: memref<3x3xf32>, %arg1: memref<3x3xf32>) -> (memref<3x3xf32>) {
-  %O = alloc() : memref<3x3xf32>
+  %O = memref.alloc() : memref<3x3xf32>
   affine.parallel (%kx, %ky) = (0, 0) to (2, 2) {
       %1 = affine.load %arg0[%kx, %ky] : memref<3x3xf32>
       %2 = affine.load %arg1[%kx, %ky] : memref<3x3xf32>
@@ -719,8 +719,8 @@
 // CHECK-NEXT:    %[[STEP_1:.*]] = constant 1 : index
 // CHECK-NEXT:    %[[STEP_2:.*]] = constant 1 : index
 // CHECK-NEXT:    scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[LOWER_1]], %[[LOWER_2]]) to (%[[UPPER_1]], %[[UPPER_2]]) step (%[[STEP_1]], %[[STEP_2]]) {
-// CHECK-NEXT:      %[[VAL_1:.*]] = load
-// CHECK-NEXT:      %[[VAL_2:.*]] = load
+// CHECK-NEXT:      %[[VAL_1:.*]] = memref.load
+// CHECK-NEXT:      %[[VAL_2:.*]] = memref.load
 // CHECK-NEXT:      %[[PRODUCT:.*]] = mulf
 // CHECK-NEXT:      store
 // CHECK-NEXT:      scf.yield
@@ -732,7 +732,7 @@
 
 func @affine_parallel_simple_dynamic_bounds(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
   %c_0 = constant 0 : index
-  %output_dim = dim %arg0, %c_0 : memref<?x?xf32>
+  %output_dim = memref.dim %arg0, %c_0 : memref<?x?xf32>
   affine.parallel (%kx, %ky) = (%c_0, %c_0) to (%output_dim, %output_dim) {
       %1 = affine.load %arg0[%kx, %ky] : memref<?x?xf32>
       %2 = affine.load %arg1[%kx, %ky] : memref<?x?xf32>
@@ -744,14 +744,14 @@
 // CHECK-LABEL: func @affine_parallel_simple_dynamic_bounds
 // CHECK-SAME:  %[[ARG_0:.*]]: memref<?x?xf32>, %[[ARG_1:.*]]: memref<?x?xf32>, %[[ARG_2:.*]]: memref<?x?xf32>
 // CHECK:         %[[DIM_INDEX:.*]] = constant 0 : index
-// CHECK-NEXT:    %[[UPPER:.*]] = dim %[[ARG_0]], %[[DIM_INDEX]] : memref<?x?xf32>
+// CHECK-NEXT:    %[[UPPER:.*]] = memref.dim %[[ARG_0]], %[[DIM_INDEX]] : memref<?x?xf32>
 // CHECK-NEXT:    %[[LOWER_1:.*]] = constant 0 : index
 // CHECK-NEXT:    %[[LOWER_2:.*]] = constant 0 : index
 // CHECK-NEXT:    %[[STEP_1:.*]] = constant 1 : index
 // CHECK-NEXT:    %[[STEP_2:.*]] = constant 1 : index
 // CHECK-NEXT:    scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[LOWER_1]], %[[LOWER_2]]) to (%[[UPPER]], %[[UPPER]]) step (%[[STEP_1]], %[[STEP_2]]) {
-// CHECK-NEXT:      %[[VAL_1:.*]] = load
-// CHECK-NEXT:      %[[VAL_2:.*]] = load
+// CHECK-NEXT:      %[[VAL_1:.*]] = memref.load
+// CHECK-NEXT:      %[[VAL_2:.*]] = memref.load
 // CHECK-NEXT:      %[[PRODUCT:.*]] = mulf
 // CHECK-NEXT:      store
 // CHECK-NEXT:      scf.yield
@@ -781,8 +781,8 @@
 // CHECK-NEXT:    %[[INIT_1:.*]] = constant 0.000000e+00 : f32
 // CHECK-NEXT:    %[[INIT_2:.*]] = constant 1.000000e+00 : f32
 // CHECK-NEXT:    %[[RES:.*]] = scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[LOWER_1]], %[[LOWER_2]]) to (%[[UPPER_1]], %[[UPPER_2]]) step (%[[STEP_1]], %[[STEP_2]]) init (%[[INIT_1]], %[[INIT_2]]) -> (f32, f32) {
-// CHECK-NEXT:      %[[VAL_1:.*]] = load
-// CHECK-NEXT:      %[[VAL_2:.*]] = load
+// CHECK-NEXT:      %[[VAL_1:.*]] = memref.load
+// CHECK-NEXT:      %[[VAL_2:.*]] = memref.load
 // CHECK-NEXT:      %[[PRODUCT:.*]] = mulf
 // CHECK-NEXT:      %[[SUM:.*]] = addf
 // CHECK-NEXT:      scf.reduce(%[[PRODUCT]]) : f32 {
diff --git a/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir b/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir
--- a/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir
+++ b/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir
@@ -20,7 +20,7 @@
   // CHECK: %[[TOKEN:.*]] = call @async_execute_fn(%arg0, %arg1)
   %token = async.execute {
     %c0 = constant 0 : index
-    store %arg0, %arg1[%c0] : memref<1xf32>
+    memref.store %arg0, %arg1[%c0] : memref<1xf32>
     async.yield
   }
   // CHECK: call @mlirAsyncRuntimeAwaitToken(%[[TOKEN]])
@@ -51,7 +51,7 @@
 
 // Resume coroutine after suspension.
 // CHECK: ^[[RESUME]]:
-// CHECK: store %arg0, %arg1[%c0] : memref<1xf32>
+// CHECK: memref.store %arg0, %arg1[%c0] : memref<1xf32>
 // CHECK: call @mlirAsyncRuntimeEmplaceToken(%[[RET]])
 
 // Delete coroutine.
@@ -74,12 +74,12 @@
 
     %token1 = async.execute {
       %c1 = constant 1: index
-      store %arg0, %arg2[%c0] : memref<1xf32>
+      memref.store %arg0, %arg2[%c0] : memref<1xf32>
       async.yield
     }
     async.await %token1 : !async.token
 
-    store %arg1, %arg2[%c0] : memref<1xf32>
+    memref.store %arg1, %arg2[%c0] : memref<1xf32>
     async.yield
   }
   // CHECK: call @mlirAsyncRuntimeAwaitToken(%[[TOKEN]])
@@ -95,7 +95,7 @@
 // CHECK: %[[HDL_0:.*]] = llvm.intr.coro.begin
 // CHECK: call @mlirAsyncRuntimeExecute
 // CHECK: llvm.intr.coro.suspend
-// CHECK: store %arg0, %arg1[%arg2] : memref<1xf32>
+// CHECK: memref.store %arg0, %arg1[%arg2] : memref<1xf32>
 // CHECK: call @mlirAsyncRuntimeEmplaceToken(%[[RET_0]])
 
 // Function outlined from the outer async.execute operation.
@@ -115,7 +115,7 @@
 // CHECK: llvm.intr.coro.suspend
 
 // Emplace result token after second resumption.
-// CHECK: store %arg2, %arg1[%c0] : memref<1xf32>
+// CHECK: memref.store %arg2, %arg1[%c0] : memref<1xf32>
 // CHECK: call @mlirAsyncRuntimeEmplaceToken(%[[RET_1]])
 
 // -----
@@ -125,13 +125,13 @@
   // CHECK: %0 = call @async_execute_fn(%arg0, %arg1)
   %token = async.execute {
     %c0 = constant 0 : index
-    store %arg0, %arg1[%c0] : memref<1xf32>
+    memref.store %arg0, %arg1[%c0] : memref<1xf32>
     async.yield
   }
   // CHECK: %1 = call @async_execute_fn_0(%0, %arg0, %arg1)
   %token_0 = async.execute [%token] {
     %c0 = constant 0 : index
-    store %arg0, %arg1[%c0] : memref<1xf32>
+    memref.store %arg0, %arg1[%c0] : memref<1xf32>
     async.yield
   }
   return
@@ -144,7 +144,7 @@
 // CHECK: %[[HDL_0:.*]] = llvm.intr.coro.begin
 // CHECK: call @mlirAsyncRuntimeExecute
 // CHECK: llvm.intr.coro.suspend
-// CHECK: store %arg0, %arg1[%c0] : memref<1xf32>
+// CHECK: memref.store %arg0, %arg1[%c0] : memref<1xf32>
 // CHECK: call @mlirAsyncRuntimeEmplaceToken(%[[RET_0]])
 
 // Function outlined from the second async.execute operation with dependency.
@@ -163,7 +163,7 @@
 // CHECK: llvm.intr.coro.suspend
 
 // Emplace result token after second resumption.
-// CHECK: store %arg1, %arg2[%c0] : memref<1xf32>
+// CHECK: memref.store %arg1, %arg2[%c0] : memref<1xf32>
 // CHECK: call @mlirAsyncRuntimeEmplaceToken(%[[RET_1]])
 
 // -----
diff --git a/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir b/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir
--- a/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir
+++ b/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir
@@ -42,7 +42,7 @@
     // ROCDL: llvm.getelementptr
     // ROCDL: llvm.store
     %c0 = constant 0 : index
-    store %arg0, %arg1[%c0] : memref<4xf32, 5>
+    memref.store %arg0, %arg1[%c0] : memref<4xf32, 5>
 
     "terminator"() : () -> ()
   }
@@ -108,7 +108,7 @@
     // ROCDL: llvm.getelementptr
     // ROCDL: llvm.store
     %c0 = constant 0 : index
-    store %arg0, %arg1[%c0] : memref<4xf32, 3>
+    memref.store %arg0, %arg1[%c0] : memref<4xf32, 3>
 
     "terminator"() : () -> ()
   }
@@ -178,7 +178,7 @@
     // ROCDL: %[[descr10:.*]] = llvm.insertvalue %[[c1]], %[[descr9]][4, 2]
 
     %c0 = constant 0 : index
-    store %arg0, %arg1[%c0,%c0,%c0] : memref<4x2x6xf32, 3>
+    memref.store %arg0, %arg1[%c0,%c0,%c0] : memref<4x2x6xf32, 3>
     "terminator"() : () -> ()
   }
 }
@@ -222,10 +222,10 @@
     // ROCDL: llvm.alloca %[[c4]] x f32 : (i64) -> !llvm.ptr<f32, 5>
 
     %c0 = constant 0 : index
-    store %arg0, %arg1[%c0] : memref<1xf32, 3>
-    store %arg0, %arg2[%c0] : memref<2xf32, 3>
-    store %arg0, %arg3[%c0] : memref<3xf32, 5>
-    store %arg0, %arg4[%c0] : memref<4xf32, 5>
+    memref.store %arg0, %arg1[%c0] : memref<1xf32, 3>
+    memref.store %arg0, %arg2[%c0] : memref<2xf32, 3>
+    memref.store %arg0, %arg3[%c0] : memref<3xf32, 5>
+    memref.store %arg0, %arg4[%c0] : memref<4xf32, 5>
     "terminator"() : () -> ()
   }
 }
diff --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
--- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
@@ -17,7 +17,7 @@
     %c1_2 = constant 1 : index
     gpu.launch_func @kernels::@load_store_kernel
         blocks in (%0, %c1_2, %c1_2) threads in (%1, %c1_2, %c1_2)
-        args(%arg0 : memref<12x4xf32>, %arg1 : memref<12x4xf32>, %arg2 : memref<12x4xf32>, 
+        args(%arg0 : memref<12x4xf32>, %arg1 : memref<12x4xf32>, %arg2 : memref<12x4xf32>,
              %c0 : index, %c0_0 : index, %c1 : index, %c1_1 : index)
     return
   }
@@ -69,15 +69,15 @@
       // CHECK: %[[OFFSET1_2:.*]] = spv.IAdd %[[OFFSET1_1]], %[[UPDATE1_2]] : i32
       // CHECK: %[[PTR1:.*]] = spv.AccessChain %[[ARG0]]{{\[}}%[[ZERO]], %[[OFFSET1_2]]{{\]}}
       // CHECK-NEXT: %[[VAL1:.*]] = spv.Load "StorageBuffer" %[[PTR1]]
-      %14 = load %arg0[%12, %13] : memref<12x4xf32>
+      %14 = memref.load %arg0[%12, %13] : memref<12x4xf32>
       // CHECK: %[[PTR2:.*]] = spv.AccessChain %[[ARG1]]{{\[}}{{%.*}}, {{%.*}}{{\]}}
       // CHECK-NEXT: %[[VAL2:.*]] = spv.Load "StorageBuffer" %[[PTR2]]
-      %15 = load %arg1[%12, %13] : memref<12x4xf32>
+      %15 = memref.load %arg1[%12, %13] : memref<12x4xf32>
       // CHECK: %[[VAL3:.*]] = spv.FAdd %[[VAL1]], %[[VAL2]]
       %16 = addf %14, %15 : f32
       // CHECK: %[[PTR3:.*]] = spv.AccessChain %[[ARG2]]{{\[}}{{%.*}}, {{%.*}}{{\]}}
       // CHECK-NEXT: spv.Store "StorageBuffer" %[[PTR3]], %[[VAL3]]
-      store %16, %arg2[%12, %13] : memref<12x4xf32>
+      memref.store %16, %arg2[%12, %13] : memref<12x4xf32>
       gpu.return
     }
   }
diff --git a/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir b/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
--- a/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
+++ b/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s -convert-gpu-launch-to-vulkan-launch | FileCheck %s
 
-// CHECK: %[[resource:.*]] = alloc() : memref<12xf32>
+// CHECK: %[[resource:.*]] = memref.alloc() : memref<12xf32>
 // CHECK: %[[index:.*]] = constant 1 : index
 // CHECK: call @vulkanLaunch(%[[index]], %[[index]], %[[index]], %[[resource]]) {spirv_blob = "{{.*}}", spirv_entry_point = "kernel"}
 
@@ -24,7 +24,7 @@
     }
   }
   func @foo() {
-    %0 = alloc() : memref<12xf32>
+    %0 = memref.alloc() : memref<12xf32>
     %c1 = constant 1 : index
     gpu.launch_func @kernels::@kernel
         blocks in(%c1, %c1, %c1)
diff --git a/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir b/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir
--- a/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir
+++ b/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir
@@ -18,15 +18,15 @@
 //   CHECK-DAG:   %[[c3:.*]] = constant 3 : index
 //   CHECK-DAG:   %[[c0:.*]] = constant 0 : index
 //   CHECK-DAG:   %[[c1:.*]] = constant 1 : index
-//       CHECK:   %[[v0:.*]] = dim %[[arg1]], %[[c0]] : memref<?xf32>
-//       CHECK:   %[[v1:.*]] = dim %[[arg2]], %[[c0]] : memref<?xf32>
-//       CHECK:   %[[v2:.*]] = dim %[[arg0]], %[[c0]] : memref<?xf32>
-//       CHECK:   %[[v3:.*]] = alloc(%[[c12]]) : memref<?xi8>
-//       CHECK:   %[[v4:.*]] = alloc(%[[c12]]) : memref<?xi8>
-//       CHECK:   %[[v5:.*]] = alloc(%[[c4]]) : memref<?xi8>
-//       CHECK:   %[[v6:.*]] = std.view %[[v3]][%[[c0]]][] : memref<?xi8> to memref<3xf32>
-//       CHECK:   %[[v7:.*]] = std.view %[[v4]][%[[c0]]][] : memref<?xi8> to memref<3xf32>
-//       CHECK:   %[[v8:.*]] = std.view %[[v5]][%[[c0]]][] : memref<?xi8> to memref<1xf32>
+//       CHECK:   %[[v0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?xf32>
+//       CHECK:   %[[v1:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?xf32>
+//       CHECK:   %[[v2:.*]] = memref.dim %[[arg0]], %[[c0]] : memref<?xf32>
+//       CHECK:   %[[v3:.*]] = memref.alloc(%[[c12]]) : memref<?xi8>
+//       CHECK:   %[[v4:.*]] = memref.alloc(%[[c12]]) : memref<?xi8>
+//       CHECK:   %[[v5:.*]] = memref.alloc(%[[c4]]) : memref<?xi8>
+//       CHECK:   %[[v6:.*]] = memref.view %[[v3]][%[[c0]]][] : memref<?xi8> to memref<3xf32>
+//       CHECK:   %[[v7:.*]] = memref.view %[[v4]][%[[c0]]][] : memref<?xi8> to memref<3xf32>
+//       CHECK:   %[[v8:.*]] = memref.view %[[v5]][%[[c0]]][] : memref<?xi8> to memref<1xf32>
 //       CHECK:   scf.for %[[arg3:.*]] = %[[c0]] to %[[v1]] step %[[c1]] {
 //       CHECK:     %[[v9:.*]] = affine.min #[[$map0]](%[[arg3]])[%[[v1]]]
 //       CHECK:     %[[v10:.*]] = subview %[[arg2]][%[[arg3]]] [%[[v9]]] [1]  : memref<?xf32> to memref<?xf32, #[[$map1]]>
diff --git a/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir b/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir
--- a/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir
+++ b/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir
@@ -21,11 +21,11 @@
   // CHECK-BLOCKS-NEXT: gpu.launch blocks(%[[B0:.*]], %[[B1:.*]], %[[B2:.*]]) in (%{{.*}} = %[[BOUND]], %{{.*}} = %[[ONE]], %{{.*}}0 = %[[ONE]]) threads(%[[T0:.*]], %[[T1:.*]], %[[T2:.*]]) in (%{{.*}} = %[[ONE]], %{{.*}} = %[[ONE]], %{{.*}} = %[[ONE]])
   affine.for %i = 0 to 42 {
   // CHECK-THREADS-NEXT: %[[INDEX:.*]] = addi %{{.*}}, %[[T0]]
-  // CHECK-THREADS-NEXT: load %{{.*}}[%[[INDEX]]]
+  // CHECK-THREADS-NEXT: memref.load %{{.*}}[%[[INDEX]]]
   // CHECK-BLOCKS-NEXT: %[[INDEX:.*]] = addi %{{.*}}, %[[B0]]
-  // CHECK-BLOCKS-NEXT: load %{{.*}}[%[[INDEX]]]
-    %0 = load %A[%i] : memref<?xf32>
-    store %0, %B[%i] : memref<?xf32>
+  // CHECK-BLOCKS-NEXT: memref.load %{{.*}}[%[[INDEX]]]
+    %0 = memref.load %A[%i] : memref<?xf32>
+    memref.store %0, %B[%i] : memref<?xf32>
     // CHECK-THREADS: gpu.terminator
     // CHECK-BLOCKS: gpu.terminator
   }
diff --git a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
--- a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
+++ b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
@@ -9,8 +9,8 @@
   %step = constant 2 : index
   scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%arg4, %step)  {
-    %val = load %buf[%i0, %i1] : memref<?x?xf32>
-    store %val, %res[%i1, %i0] : memref<?x?xf32>
+    %val = memref.load %buf[%i0, %i1] : memref<?x?xf32>
+    memref.store %val, %res[%i1, %i0] : memref<?x?xf32>
   } { mapping = [{processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, {processor = 0, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}] }
   return
 }
@@ -28,8 +28,8 @@
 // CHECK:           gpu.launch blocks([[VAL_11:%.*]], [[VAL_12:%.*]], [[VAL_13:%.*]]) in ([[VAL_14:%.*]] = [[VAL_10]], [[VAL_15:%.*]] = [[VAL_9]], [[VAL_16:%.*]] = [[VAL_8]]) threads([[VAL_17:%.*]], [[VAL_18:%.*]], [[VAL_19:%.*]]) in ([[VAL_20:%.*]] = [[VAL_8]], [[VAL_21:%.*]] = [[VAL_8]], [[VAL_22:%.*]] = [[VAL_8]]) {
 // CHECK:             [[VAL_23:%.*]] = affine.apply #[[$MAP1]]([[VAL_12]]){{\[}}[[VAL_4]], [[VAL_0]]]
 // CHECK:             [[VAL_24:%.*]] = affine.apply #[[$MAP1]]([[VAL_11]]){{\[}}[[VAL_7]], [[VAL_1]]]
-// CHECK:             [[VAL_25:%.*]] = load [[VAL_5]]{{\[}}[[VAL_23]], [[VAL_24]]] : memref<?x?xf32>
-// CHECK:             store [[VAL_25]], [[VAL_6]]{{\[}}[[VAL_24]], [[VAL_23]]] : memref<?x?xf32>
+// CHECK:             [[VAL_25:%.*]] = memref.load [[VAL_5]]{{\[}}[[VAL_23]], [[VAL_24]]] : memref<?x?xf32>
+// CHECK:             memref.store [[VAL_25]], [[VAL_6]]{{\[}}[[VAL_24]], [[VAL_23]]] : memref<?x?xf32>
 // CHECK:             gpu.terminator
 // CHECK:           }
 // CHECK:           return
@@ -53,8 +53,8 @@
                                             step (%one, %one)  {
       %idx0 = addi %i0, %si0 : index
       %idx1 = addi %i1, %si1 : index
-      %val = load %buf[%idx0, %idx1] : memref<?x?xf32>
-      store %val, %res[%idx1, %idx0] : memref<?x?xf32>
+      %val = memref.load %buf[%idx0, %idx1] : memref<?x?xf32>
+      memref.store %val, %res[%idx1, %idx0] : memref<?x?xf32>
     } { mapping = [
         {processor = 4, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>},
         {processor = 3, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}
@@ -87,8 +87,8 @@
 // CHECK:             [[VAL_55:%.*]] = affine.apply #[[$MAP1]]([[VAL_46]]){{\[}}[[VAL_33]], [[VAL_32]]]
 // CHECK:             [[VAL_56:%.*]] = addi [[VAL_52]], [[VAL_54]] : index
 // CHECK:             [[VAL_57:%.*]] = addi [[VAL_53]], [[VAL_55]] : index
-// CHECK:             [[VAL_58:%.*]] = load [[VAL_30]]{{\[}}[[VAL_56]], [[VAL_57]]] : memref<?x?xf32>
-// CHECK:             store [[VAL_58]], [[VAL_31]]{{\[}}[[VAL_57]], [[VAL_56]]] : memref<?x?xf32>
+// CHECK:             [[VAL_58:%.*]] = memref.load [[VAL_30]]{{\[}}[[VAL_56]], [[VAL_57]]] : memref<?x?xf32>
+// CHECK:             memref.store [[VAL_58]], [[VAL_31]]{{\[}}[[VAL_57]], [[VAL_56]]] : memref<?x?xf32>
 // CHECK:             gpu.terminator
 // CHECK:           }
 // CHECK:           return
@@ -106,8 +106,8 @@
   %step = constant 2 : index
   scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
                                           step (%arg4, %step)  {
-    %val = load %buf[%i0, %i1] : memref<?x?xf32>
-    store %val, %res[%i1, %i0] : memref<?x?xf32>
+    %val = memref.load %buf[%i0, %i1] : memref<?x?xf32>
+    memref.store %val, %res[%i1, %i0] : memref<?x?xf32>
   } { mapping = [
       {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>},
       {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}
@@ -127,8 +127,8 @@
 // CHECK:           gpu.launch blocks([[VAL_69:%.*]], [[VAL_70:%.*]], [[VAL_71:%.*]]) in ([[VAL_72:%.*]] = [[VAL_67]], [[VAL_73:%.*]] = [[VAL_68]], [[VAL_74:%.*]] = [[VAL_67]]) threads([[VAL_75:%.*]], [[VAL_76:%.*]], [[VAL_77:%.*]]) in ([[VAL_78:%.*]] = [[VAL_67]], [[VAL_79:%.*]] = [[VAL_67]], [[VAL_80:%.*]] = [[VAL_67]]) {
 // CHECK:             [[VAL_81:%.*]] = affine.apply #[[$MAP1]]([[VAL_70]]){{\[}}[[VAL_63]], [[VAL_59]]]
 // CHECK:             scf.for [[VAL_82:%.*]] = [[VAL_60]] to [[VAL_62]] step [[VAL_66]] {
-// CHECK:               [[VAL_83:%.*]] = load [[VAL_64]]{{\[}}[[VAL_81]], [[VAL_82]]] : memref<?x?xf32>
-// CHECK:               store [[VAL_83]], [[VAL_65]]{{\[}}[[VAL_82]], [[VAL_81]]] : memref<?x?xf32>
+// CHECK:               [[VAL_83:%.*]] = memref.load [[VAL_64]]{{\[}}[[VAL_81]], [[VAL_82]]] : memref<?x?xf32>
+// CHECK:               memref.store [[VAL_83]], [[VAL_65]]{{\[}}[[VAL_82]], [[VAL_81]]] : memref<?x?xf32>
 // CHECK:             }
 // CHECK:             gpu.terminator
 // CHECK:           }
@@ -153,8 +153,8 @@
                                             step (%one, %one)  {
       %idx0 = addi %i0, %si0 : index
       %idx1 = addi %i1, %si1 : index
-      %val = load %buf[%idx0, %idx1] : memref<?x?xf32>
-      store %val, %res[%idx1, %idx0] : memref<?x?xf32>
+      %val = memref.load %buf[%idx0, %idx1] : memref<?x?xf32>
+      memref.store %val, %res[%idx1, %idx0] : memref<?x?xf32>
     } { mapping = [
         {processor = 4, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>},
         {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}
@@ -185,8 +185,8 @@
 // CHECK:               scf.for [[VAL_111:%.*]] = [[VAL_90]] to [[VAL_92]] step [[VAL_91]] {
 // CHECK:                 [[VAL_112:%.*]] = addi [[VAL_108]], [[VAL_110]] : index
 // CHECK:                 [[VAL_113:%.*]] = addi [[VAL_109]], [[VAL_111]] : index
-// CHECK:                 [[VAL_114:%.*]] = load [[VAL_88]]{{\[}}[[VAL_112]], [[VAL_113]]] : memref<?x?xf32>
-// CHECK:                 store [[VAL_114]], [[VAL_89]]{{\[}}[[VAL_113]], [[VAL_112]]] : memref<?x?xf32>
+// CHECK:                 [[VAL_114:%.*]] = memref.load [[VAL_88]]{{\[}}[[VAL_112]], [[VAL_113]]] : memref<?x?xf32>
+// CHECK:                 memref.store [[VAL_114]], [[VAL_89]]{{\[}}[[VAL_113]], [[VAL_112]]] : memref<?x?xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:             gpu.terminator
@@ -208,31 +208,31 @@
     %c0 = constant 0 : index
     %c3 = constant 3 : index
     %c2 = constant 2 : index
-    %0 = dim %arg0, %c0 : memref<?x?xf32, #map0>
-    %1 = dim %arg0, %c1 : memref<?x?xf32, #map0>
+    %0 = memref.dim %arg0, %c0 : memref<?x?xf32, #map0>
+    %1 = memref.dim %arg0, %c1 : memref<?x?xf32, #map0>
     scf.parallel (%arg3, %arg4) = (%c0, %c0) to (%0, %1) step (%c2, %c3) {
-      %2 = dim %arg0, %c0 : memref<?x?xf32, #map0>
+      %2 = memref.dim %arg0, %c0 : memref<?x?xf32, #map0>
       %3 = affine.min #map1(%arg3)[%2]
       %squared_min = muli %3, %3 : index
-      %4 = dim %arg0, %c1 : memref<?x?xf32, #map0>
+      %4 = memref.dim %arg0, %c1 : memref<?x?xf32, #map0>
       %5 = affine.min #map2(%arg4)[%4]
-      %6 = std.subview %arg0[%arg3, %arg4][%squared_min, %5][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3>
-      %7 = dim %arg1, %c0 : memref<?x?xf32, #map0>
+      %6 = memref.subview %arg0[%arg3, %arg4][%squared_min, %5][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3>
+      %7 = memref.dim %arg1, %c0 : memref<?x?xf32, #map0>
       %8 = affine.min #map1(%arg3)[%7]
-      %9 = dim %arg1, %c1 : memref<?x?xf32, #map0>
+      %9 = memref.dim %arg1, %c1 : memref<?x?xf32, #map0>
       %10 = affine.min #map2(%arg4)[%9]
-      %11 = std.subview %arg1[%arg3, %arg4][%8, %10][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3>
-      %12 = dim %arg2, %c0 : memref<?x?xf32, #map0>
+      %11 = memref.subview %arg1[%arg3, %arg4][%8, %10][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3>
+      %12 = memref.dim %arg2, %c0 : memref<?x?xf32, #map0>
       %13 = affine.min #map1(%arg3)[%12]
-      %14 = dim %arg2, %c1 : memref<?x?xf32, #map0>
+      %14 = memref.dim %arg2, %c1 : memref<?x?xf32, #map0>
       %15 = affine.min #map2(%arg4)[%14]
-      %16 = std.subview %arg2[%arg3, %arg4][%13, %15][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3>
+      %16 = memref.subview %arg2[%arg3, %arg4][%13, %15][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3>
       scf.parallel (%arg5, %arg6) = (%c0, %c0) to (%squared_min, %5) step (%c1, %c1) {
-        %17 = load %6[%arg5, %arg6] : memref<?x?xf32, #map3>
-        %18 = load %11[%arg5, %arg6] : memref<?x?xf32, #map3>
-        %19 = load %16[%arg5, %arg6] : memref<?x?xf32, #map3>
+        %17 = memref.load %6[%arg5, %arg6] : memref<?x?xf32, #map3>
+        %18 = memref.load %11[%arg5, %arg6] : memref<?x?xf32, #map3>
+        %19 = memref.load %16[%arg5, %arg6] : memref<?x?xf32, #map3>
         %20 = addf %17, %18 : f32
-        store %20, %16[%arg5, %arg6] : memref<?x?xf32, #map3>
+        memref.store %20, %16[%arg5, %arg6] : memref<?x?xf32, #map3>
         scf.yield
       } {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 3 : i64}, {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 4 : i64}]}
       scf.yield
@@ -255,8 +255,8 @@
 // CHECK:           %[[C0:.*]] = constant 0 : index
 // CHECK:           %[[C3:.*]] = constant 3 : index
 // CHECK:           %[[C2:.*]] = constant 2 : index
-// CHECK:           [[VAL_7:%.*]] = dim [[VAL_0]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]>
-// CHECK:           [[VAL_8:%.*]] = dim [[VAL_0]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]>
+// CHECK:           [[VAL_7:%.*]] = memref.dim [[VAL_0]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]>
+// CHECK:           [[VAL_8:%.*]] = memref.dim [[VAL_0]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]>
 // CHECK:           [[VAL_9:%.*]] = constant 1 : index
 // CHECK:           [[VAL_10:%.*]] = affine.apply #[[$MAP1]]([[VAL_7]]){{\[}}%[[C0]], %[[C2]]]
 // CHECK:           [[VAL_11:%.*]] = affine.apply #[[$MAP1]]([[VAL_8]]){{\[}}%[[C0]], %[[C3]]]
@@ -267,33 +267,33 @@
 // CHECK:           gpu.launch blocks([[VAL_16:%.*]], [[VAL_17:%.*]], [[VAL_18:%.*]]) in ([[VAL_19:%.*]] = [[VAL_10]], [[VAL_20:%.*]] = [[VAL_11]], [[VAL_21:%.*]] = [[VAL_9]]) threads([[VAL_22:%.*]], [[VAL_23:%.*]], [[VAL_24:%.*]]) in ([[VAL_25:%.*]] = [[VAL_13]], [[VAL_26:%.*]] = [[VAL_15]], [[VAL_27:%.*]] = [[VAL_9]]) {
 // CHECK:             [[VAL_28:%.*]] = affine.apply #[[$MAP2]]([[VAL_16]]){{\[}}%[[C2]], %[[C0]]]
 // CHECK:             [[VAL_29:%.*]] = affine.apply #[[$MAP2]]([[VAL_17]]){{\[}}%[[C3]], %[[C0]]]
-// CHECK:             [[VAL_30:%.*]] = dim [[VAL_0]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]>
+// CHECK:             [[VAL_30:%.*]] = memref.dim [[VAL_0]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]>
 // CHECK:             [[VAL_31:%.*]] = affine.min #[[$MAP3]]([[VAL_28]]){{\[}}[[VAL_30]]]
 // CHECK:             [[VAL_31_SQUARED:%.*]] = muli [[VAL_31]], [[VAL_31]] : index
-// CHECK:             [[VAL_32:%.*]] = dim [[VAL_0]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]>
+// CHECK:             [[VAL_32:%.*]] = memref.dim [[VAL_0]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]>
 // CHECK:             [[VAL_33:%.*]] = affine.min #[[$MAP4]]([[VAL_29]]){{\[}}[[VAL_32]]]
-// CHECK:             [[VAL_34:%.*]] = subview [[VAL_0]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_31_SQUARED]], [[VAL_33]]] {{\[}}%[[C1]], %[[C1]]] : memref<?x?xf32, #[[$MAP0]]> to memref<?x?xf32, #[[$MAP5]]>
-// CHECK:             [[VAL_35:%.*]] = dim [[VAL_1]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]>
+// CHECK:             [[VAL_34:%.*]] = memref.subview [[VAL_0]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_31_SQUARED]], [[VAL_33]]] {{\[}}%[[C1]], %[[C1]]] : memref<?x?xf32, #[[$MAP0]]> to memref<?x?xf32, #[[$MAP5]]>
+// CHECK:             [[VAL_35:%.*]] = memref.dim [[VAL_1]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]>
 // CHECK:             [[VAL_36:%.*]] = affine.min #[[$MAP3]]([[VAL_28]]){{\[}}[[VAL_35]]]
-// CHECK:             [[VAL_37:%.*]] = dim [[VAL_1]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]>
+// CHECK:             [[VAL_37:%.*]] = memref.dim [[VAL_1]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]>
 // CHECK:             [[VAL_38:%.*]] = affine.min #[[$MAP4]]([[VAL_29]]){{\[}}[[VAL_37]]]
-// CHECK:             [[VAL_39:%.*]] = subview [[VAL_1]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_36]], [[VAL_38]]] {{\[}}%[[C1]], %[[C1]]] : memref<?x?xf32, #[[$MAP0]]> to memref<?x?xf32, #[[$MAP5]]>
-// CHECK:             [[VAL_40:%.*]] = dim [[VAL_2]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]>
+// CHECK:             [[VAL_39:%.*]] = memref.subview [[VAL_1]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_36]], [[VAL_38]]] {{\[}}%[[C1]], %[[C1]]] : memref<?x?xf32, #[[$MAP0]]> to memref<?x?xf32, #[[$MAP5]]>
+// CHECK:             [[VAL_40:%.*]] = memref.dim [[VAL_2]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]>
 // CHECK:             [[VAL_41:%.*]] = affine.min #[[$MAP3]]([[VAL_28]]){{\[}}[[VAL_40]]]
-// CHECK:             [[VAL_42:%.*]] = dim [[VAL_2]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]>
+// CHECK:             [[VAL_42:%.*]] = memref.dim [[VAL_2]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]>
 // CHECK:             [[VAL_43:%.*]] = affine.min #[[$MAP4]]([[VAL_29]]){{\[}}[[VAL_42]]]
-// CHECK:             [[VAL_44:%.*]] = subview [[VAL_2]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_41]], [[VAL_43]]] {{\[}}%[[C1]], %[[C1]]] : memref<?x?xf32, #[[$MAP0]]> to memref<?x?xf32, #[[$MAP5]]>
+// CHECK:             [[VAL_44:%.*]] = memref.subview [[VAL_2]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_41]], [[VAL_43]]] {{\[}}%[[C1]], %[[C1]]] : memref<?x?xf32, #[[$MAP0]]> to memref<?x?xf32, #[[$MAP5]]>
 // CHECK:             [[VAL_45:%.*]] = affine.apply #[[$MAP2]]([[VAL_22]]){{\[}}%[[C1]], %[[C0]]]
 // CHECK:             [[VAL_46:%.*]] = cmpi slt, [[VAL_45]], [[VAL_31_SQUARED]] : index
 // CHECK:             scf.if [[VAL_46]] {
 // CHECK:               [[VAL_47:%.*]] = affine.apply #[[$MAP2]]([[VAL_23]]){{\[}}%[[C1]], %[[C0]]]
 // CHECK:               [[VAL_48:%.*]] = cmpi slt, [[VAL_47]], [[VAL_33]] : index
 // CHECK:               scf.if [[VAL_48]] {
-// CHECK:                 [[VAL_49:%.*]] = load [[VAL_34]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[$MAP5]]>
-// CHECK:                 [[VAL_50:%.*]] = load [[VAL_39]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[$MAP5]]>
-// CHECK:                 [[VAL_51:%.*]] = load [[VAL_44]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[$MAP5]]>
+// CHECK:                 [[VAL_49:%.*]] = memref.load [[VAL_34]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[$MAP5]]>
+// CHECK:                 [[VAL_50:%.*]] = memref.load [[VAL_39]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[$MAP5]]>
+// CHECK:                 [[VAL_51:%.*]] = memref.load [[VAL_44]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[$MAP5]]>
 // CHECK:                 [[VAL_52:%.*]] = addf [[VAL_49]], [[VAL_50]] : f32
-// CHECK:                 store [[VAL_52]], [[VAL_44]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[$MAP5]]>
+// CHECK:                 memref.store [[VAL_52]], [[VAL_44]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[$MAP5]]>
 // CHECK:               }
 // CHECK:             }
 // CHECK:             gpu.terminator
@@ -353,8 +353,8 @@
                                             step (%one, %one)  {
       %idx0 = addi %i0, %si0 : index
       %idx1 = addi %i1, %si1 : index
-      %val = load %buf[%idx0, %idx1] : memref<?x?xf32>
-      store %val, %res[%idx1, %idx0] : memref<?x?xf32>
+      %val = memref.load %buf[%idx0, %idx1] : memref<?x?xf32>
+      memref.store %val, %res[%idx1, %idx0] : memref<?x?xf32>
     } { mapping = [
         {processor = 4, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>},
         {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}
diff --git a/mlir/test/Conversion/SCFToGPU/step_one.mlir b/mlir/test/Conversion/SCFToGPU/step_one.mlir
--- a/mlir/test/Conversion/SCFToGPU/step_one.mlir
+++ b/mlir/test/Conversion/SCFToGPU/step_one.mlir
@@ -64,12 +64,12 @@
           // CHECK-22-NEXT:   %[[jj:.*]] = addi %{{.*}}, %{{.*}} : index
 
           // Using remapped values instead of loop iterators.
-          // CHECK-11:        {{.*}} = load %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32>
-          // CHECK-22:        {{.*}} = load %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32>
-          %0 = load %A[%i, %j, %ii, %jj] : memref<?x?x?x?xf32>
-          // CHECK-11-NEXT:   store {{.*}}, %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32>
-          // CHECK-22-NEXT:   store {{.*}}, %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32>
-          store %0, %B[%i, %j, %ii, %jj] : memref<?x?x?x?xf32>
+          // CHECK-11:        {{.*}} = memref.load %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32>
+          // CHECK-22:        {{.*}} = memref.load %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32>
+          %0 = memref.load %A[%i, %j, %ii, %jj] : memref<?x?x?x?xf32>
+          // CHECK-11-NEXT:   memref.store {{.*}}, %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32>
+          // CHECK-22-NEXT:   memref.store {{.*}}, %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32>
+          memref.store %0, %B[%i, %j, %ii, %jj] : memref<?x?x?x?xf32>
 
           // CHECK-11: gpu.terminator
           // CHECK-22: gpu.terminator
diff --git a/mlir/test/Conversion/SCFToGPU/step_positive.mlir b/mlir/test/Conversion/SCFToGPU/step_positive.mlir
--- a/mlir/test/Conversion/SCFToGPU/step_positive.mlir
+++ b/mlir/test/Conversion/SCFToGPU/step_positive.mlir
@@ -18,10 +18,10 @@
       // CHECK-NEXT: %[[prod_j:.*]] = muli %{{.*}}, %{{.*}} : index
       // CHECK-NEXT: %[[j:.*]] = addi %{{.*}}, %[[prod_j]] : index
 
-      // CHECK:     {{.*}} = load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32>
-      %0 = load %A[%i, %j] : memref<?x?xf32>
-      // CHECK:     store {{.*}}, %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32>
-      store %0, %B[%i, %j] : memref<?x?xf32>
+      // CHECK:     {{.*}} = memref.load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32>
+      %0 = memref.load %A[%i, %j] : memref<?x?xf32>
+      // CHECK:     memref.store {{.*}}, %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32>
+      memref.store %0, %B[%i, %j] : memref<?x?xf32>
     }
   }
   return
diff --git a/mlir/test/Conversion/SCFToSPIRV/for.mlir b/mlir/test/Conversion/SCFToSPIRV/for.mlir
--- a/mlir/test/Conversion/SCFToSPIRV/for.mlir
+++ b/mlir/test/Conversion/SCFToSPIRV/for.mlir
@@ -36,8 +36,8 @@
   // CHECK:        spv.mlir.merge
   // CHECK:      }
   scf.for %arg4 = %lb to %ub step %step {
-    %1 = load %arg2[%arg4] : memref<10xf32>
-    store %1, %arg3[%arg4] : memref<10xf32>
+    %1 = memref.load %arg2[%arg4] : memref<10xf32>
+    memref.store %1, %arg3[%arg4] : memref<10xf32>
   }
   return
 }
@@ -78,8 +78,8 @@
   // CHECK-DAG: %[[OUT2:.*]] = spv.Load "Function" %[[VAR2]] : f32
   // CHECK: spv.Store "StorageBuffer" {{%.*}}, %[[OUT1]] : f32
   // CHECK: spv.Store "StorageBuffer" {{%.*}}, %[[OUT2]] : f32
-  store %result#0, %arg3[%lb] : memref<10xf32>
-  store %result#1, %arg3[%ub] : memref<10xf32>
+  memref.store %result#0, %arg3[%lb] : memref<10xf32>
+  memref.store %result#1, %arg3[%ub] : memref<10xf32>
   return
 }
 
diff --git a/mlir/test/Conversion/SCFToSPIRV/if.mlir b/mlir/test/Conversion/SCFToSPIRV/if.mlir
--- a/mlir/test/Conversion/SCFToSPIRV/if.mlir
+++ b/mlir/test/Conversion/SCFToSPIRV/if.mlir
@@ -20,7 +20,7 @@
   // CHECK-NEXT:  spv.Return
 
   scf.if %arg3 {
-    store %value, %arg2[%i] : memref<10xf32>
+    memref.store %value, %arg2[%i] : memref<10xf32>
   }
   return
 }
@@ -61,19 +61,19 @@
 
   scf.if %arg5 {
     scf.if %arg6 {
-      %value = load %arg3[%i] : memref<10xf32>
-      store %value, %arg4[%i] : memref<10xf32>
+      %value = memref.load %arg3[%i] : memref<10xf32>
+      memref.store %value, %arg4[%i] : memref<10xf32>
     } else {
-      %value = load %arg4[%i] : memref<10xf32>
-      store %value, %arg3[%i] : memref<10xf32>
+      %value = memref.load %arg4[%i] : memref<10xf32>
+      memref.store %value, %arg3[%i] : memref<10xf32>
     }
   } else {
     scf.if %arg6 {
-      %value = load %arg3[%j] : memref<10xf32>
-      store %value, %arg4[%j] : memref<10xf32>
+      %value = memref.load %arg3[%j] : memref<10xf32>
+      memref.store %value, %arg4[%j] : memref<10xf32>
     } else {
-      %value = load %arg4[%j] : memref<10xf32>
-      store %value, %arg3[%j] : memref<10xf32>
+      %value = memref.load %arg4[%j] : memref<10xf32>
+      memref.store %value, %arg3[%j] : memref<10xf32>
     }
   }
   return
@@ -116,8 +116,8 @@
   }
   %i = constant 0 : index
   %j = constant 1 : index
-  store %0#0, %arg2[%i] : memref<10xf32>
-  store %0#1, %arg2[%j] : memref<10xf32>
+  memref.store %0#0, %arg2[%i] : memref<10xf32>
+  memref.store %0#1, %arg2[%j] : memref<10xf32>
   return
 }
 
@@ -149,7 +149,7 @@
   } else {
     scf.yield %arg3 : memref<10xf32>
   }
-  store %value, %0[%i] : memref<10xf32>
+  memref.store %value, %0[%i] : memref<10xf32>
   return
 }
 
diff --git a/mlir/test/Conversion/SPIRVToLLVM/lower-host-to-llvm-calls.mlir b/mlir/test/Conversion/SPIRVToLLVM/lower-host-to-llvm-calls.mlir
--- a/mlir/test/Conversion/SPIRVToLLVM/lower-host-to-llvm-calls.mlir
+++ b/mlir/test/Conversion/SPIRVToLLVM/lower-host-to-llvm-calls.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt --lower-host-to-llvm %s | FileCheck %s
-  
+
 module attributes {gpu.container_module, spv.target_env = #spv.target_env<#spv.vce<v1.0, [Shader], [SPV_KHR_variable_pointers]>, {max_compute_workgroup_invocations = 128 : i32, max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>} {
 
   //       CHECK: llvm.mlir.global linkonce @__spv__foo_bar_arg_0_descriptor_set0_binding0() : !llvm.struct<(array<6 x i32>)>
@@ -8,7 +8,7 @@
   //       CHECK: spv.module @__spv__foo
   //       CHECK:   spv.GlobalVariable @bar_arg_0 bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4> [0])>, StorageBuffer>
   //       CHECK:   spv.func @__spv__foo_bar
-  
+
   //       CHECK:   spv.EntryPoint "GLCompute" @__spv__foo_bar
   //       CHECK:   spv.ExecutionMode @__spv__foo_bar "LocalSize", 1, 1, 1
 
@@ -38,7 +38,7 @@
   }
 
   func @main() {
-    %buffer = alloc() : memref<6xi32>
+    %buffer = memref.alloc() : memref<6xi32>
     %one = constant 1 : index
     gpu.launch_func @foo::@bar blocks in (%one, %one, %one)
         threads in (%one, %one, %one) args(%buffer : memref<6xi32>)
diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
--- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
+++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
@@ -31,7 +31,7 @@
 // CHECK-SAME: (%[[SHAPE:.*]]: tensor<?xindex>) -> index
 func @rank(%shape : tensor<?xindex>) -> index {
   // CHECK: %[[C0:.*]] = constant 0 : index
-  // CHECK: %[[RESULT:.*]] = dim %[[SHAPE]], %[[C0]]
+  // CHECK: %[[RESULT:.*]] = memref.dim %[[SHAPE]], %[[C0]]
   // CHECK: return %[[RESULT]] : index
   %rank = shape.rank %shape : tensor<?xindex> -> index
   return %rank : index
@@ -60,12 +60,12 @@
 
 // -----
 
-// Express `get_extent` as `std.dim` when it relies directly on the outcome of a
+// Express `get_extent` as `memref.dim` when it relies directly on the outcome of a
 // `shape_of` operation.
 // CHECK-LABEL: @get_extent_shape_of
 // CHECK-SAME:  (%[[ARG:.*]]: tensor<2x3xf32>, %[[IDX:.*]]: index) -> index
 func @get_extent_shape_of(%arg : tensor<2x3xf32>, %idx : index) -> index {
-  // CHECK: %[[RESULT:.*]] = dim %[[ARG]], %[[IDX]] : tensor<2x3xf32>
+  // CHECK: %[[RESULT:.*]] = memref.dim %[[ARG]], %[[IDX]] : tensor<2x3xf32>
   // CHECK: return %[[RESULT]] : index
   %shape = shape.shape_of %arg : tensor<2x3xf32> -> tensor<?xindex>
   %result = shape.get_extent %shape, %idx : tensor<?xindex>, index -> index
@@ -178,7 +178,7 @@
 // CHECK-NEXT: %[[INIT:.*]] = constant 1 : index
 // CHECK-NEXT: %[[C0:.*]] = constant 0 : index
 // CHECK-NEXT: %[[C1:.*]] = constant 1 : index
-// CHECK-NEXT: %[[RANK:.*]] = dim %[[SHAPE]], %[[C0]] : tensor<?xindex>
+// CHECK-NEXT: %[[RANK:.*]] = memref.dim %[[SHAPE]], %[[C0]] : tensor<?xindex>
 // CHECK-NEXT: %[[RESULT:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] iter_args(%[[ACC:.*]] = %[[INIT]]) -> (index)
 // CHECK-NEXT:   %[[EXTENT:.*]] = tensor.extract %[[SHAPE]][%[[I]]]
 // CHECK-NEXT:   %[[NEW_ACC:.*]] = muli %[[ACC]], %[[EXTENT]] : index
@@ -206,7 +206,7 @@
   // CHECK: %[[RANK:.*]] = rank %[[ARG]] : tensor<*xf32>
   // CHECK: %[[SHAPE:.*]] = tensor.generate %[[RANK]] {
   // CHECK: ^bb0(%[[I:.*]]: index):
-  // CHECK:   %[[EXTENT:.*]] = dim %[[ARG]], %[[I]] : tensor<*xf32>
+  // CHECK:   %[[EXTENT:.*]] = memref.dim %[[ARG]], %[[I]] : tensor<*xf32>
   // CHECK:   yield %[[EXTENT]] : index
   // CHECK: } : tensor<?xindex>
   %shape = shape.shape_of %arg : tensor<*xf32> -> tensor<?xindex>
@@ -258,7 +258,7 @@
   // CHECK-DAG: %[[C1:.*]] = constant 1 : index
   // CHECK-DAG: %[[C5:.*]] = constant 5 : index
   // CHECK-DAG: %[[C2:.*]] = constant 2 : index
-  // CHECK-DAG: %[[DYN_DIM:.*]] = dim %[[ARG]], %[[C2]] : tensor<1x5x?xf32>
+  // CHECK-DAG: %[[DYN_DIM:.*]] = memref.dim %[[ARG]], %[[C2]] : tensor<1x5x?xf32>
   // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor.from_elements %[[C1]], %[[C5]], %[[DYN_DIM]] : tensor<3xindex>
   %shape = shape.shape_of %arg : tensor<1x5x?xf32> -> tensor<?xindex>
   return
@@ -270,8 +270,8 @@
 // CHECK-SAME:   (%[[A:.*]]: tensor<?xindex>, %[[B:.*]]: tensor<?xindex>) -> i1
 func @shape_eq(%a : tensor<?xindex>, %b : tensor<?xindex>) -> i1 {
   // CHECK: %[[C0:.*]] = constant 0 : index
-  // CHECK: %[[RANK_A:.*]] = dim %[[A]], %[[C0]] : tensor<?xindex>
-  // CHECK: %[[RANK_B:.*]] = dim %[[B]], %[[C0]] : tensor<?xindex>
+  // CHECK: %[[RANK_A:.*]] = memref.dim %[[A]], %[[C0]] : tensor<?xindex>
+  // CHECK: %[[RANK_B:.*]] = memref.dim %[[B]], %[[C0]] : tensor<?xindex>
   // CHECK: %[[RANK_EQ:.*]] = cmpi eq, %[[RANK_A]], %[[RANK_B]]
   // CHECK: %[[SHAPE_EQ:.*]] = scf.if %[[RANK_EQ]] -> (i1) {
   // CHECK:   %[[C1:.*]] = constant 1 : index
@@ -299,8 +299,8 @@
 // CHECK-SAME:   (%[[A:.*]]: tensor<?xindex>, %[[B:.*]]: tensor<?xindex>, %[[C:.*]]: tensor<?xindex>) -> i1
 func @shape_eq(%a : tensor<?xindex>, %b : tensor<?xindex>, %c : tensor<?xindex>) -> i1 {
   // CHECK: %[[C0:.*]] = constant 0 : index
-  // CHECK: %[[RANK_A:.*]] = dim %[[A]], %[[C0]] : tensor<?xindex>
-  // CHECK: %[[RANK_B:.*]] = dim %[[B]], %[[C0]] : tensor<?xindex>
+  // CHECK: %[[RANK_A:.*]] = memref.dim %[[A]], %[[C0]] : tensor<?xindex>
+  // CHECK: %[[RANK_B:.*]] = memref.dim %[[B]], %[[C0]] : tensor<?xindex>
   // CHECK: %[[RANK_EQ:.*]] = cmpi eq, %[[RANK_A]], %[[RANK_B]]
   // CHECK: %[[SHAPE_EQ:.*]] = scf.if %[[RANK_EQ]] -> (i1) {
   // CHECK:   %[[C1:.*]] = constant 1 : index
@@ -317,7 +317,7 @@
   // CHECK:   %[[SHAPE_EQ_INNER:.*]] = constant false
   // CHECK:   scf.yield %[[SHAPE_EQ_INNER]] : i1
   // CHECK: }
-  // CHECK: %[[RANK_C:.*]] = dim %[[C]], %[[C0]] : tensor<?xindex>
+  // CHECK: %[[RANK_C:.*]] = memref.dim %[[C]], %[[C0]] : tensor<?xindex>
   // CHECK: %[[RANK_EQ:.*]] = cmpi eq, %[[RANK_A]], %[[RANK_C]]
   // CHECK: %[[SHAPE_EQ2:.*]] = scf.if %[[RANK_EQ]] -> (i1) {
   // CHECK:   %[[C1:.*]] = constant 1 : index
@@ -362,9 +362,9 @@
 // CHECK-SAME:          %[[ARG2:.*]]: tensor<2xindex>)
 // CHECK:           %[[C0:.*]] = constant 0 : index
 // CHECK:           %[[C1:.*]] = constant 1 : index
-// CHECK:           %[[RANK0:.*]] = dim %[[ARG0]], %[[C0]] : tensor<2xindex>
-// CHECK:           %[[RANK1:.*]] = dim %[[ARG1]], %[[C0]] : tensor<3xindex>
-// CHECK:           %[[RANK2:.*]] = dim %[[ARG2]], %[[C0]] : tensor<2xindex>
+// CHECK:           %[[RANK0:.*]] = memref.dim %[[ARG0]], %[[C0]] : tensor<2xindex>
+// CHECK:           %[[RANK1:.*]] = memref.dim %[[ARG1]], %[[C0]] : tensor<3xindex>
+// CHECK:           %[[RANK2:.*]] = memref.dim %[[ARG2]], %[[C0]] : tensor<2xindex>
 // CHECK:           %[[CMP0:.*]] = cmpi ugt, %[[RANK1]], %[[RANK0]] : index
 // CHECK:           %[[LARGER_DIM:.*]] = select %[[CMP0]], %[[RANK1]], %[[RANK0]] : index
 // CHECK:           %[[CMP1:.*]] = cmpi ugt, %[[RANK2]], %[[LARGER_DIM]] : index
@@ -452,9 +452,9 @@
 // CHECK-SAME:          %[[ARG2:.*]]: tensor<2xindex>)
 // CHECK:           %[[C0:.*]] = constant 0 : index
 // CHECK:           %[[C1:.*]] = constant 1 : index
-// CHECK:           %[[RANK0:.*]] = dim %[[ARG0]], %[[C0]] : tensor<2xindex>
-// CHECK:           %[[RANK1:.*]] = dim %[[ARG1]], %[[C0]] : tensor<3xindex>
-// CHECK:           %[[RANK2:.*]] = dim %[[ARG2]], %[[C0]] : tensor<2xindex>
+// CHECK:           %[[RANK0:.*]] = memref.dim %[[ARG0]], %[[C0]] : tensor<2xindex>
+// CHECK:           %[[RANK1:.*]] = memref.dim %[[ARG1]], %[[C0]] : tensor<3xindex>
+// CHECK:           %[[RANK2:.*]] = memref.dim %[[ARG2]], %[[C0]] : tensor<2xindex>
 // CHECK:           %[[CMP0:.*]] = cmpi ugt, %[[RANK1]], %[[RANK0]] : index
 // CHECK:           %[[LARGER_DIM:.*]] = select %[[CMP0]], %[[RANK1]], %[[RANK0]] : index
 // CHECK:           %[[CMP1:.*]] = cmpi ugt, %[[RANK2]], %[[LARGER_DIM]] : index
@@ -544,9 +544,9 @@
 // CHECK-SAME:          %[[ARG1:.*]]: tensor<3xindex>,
 // CHECK-SAME:          %[[ARG2:.*]]: tensor<2xindex>) {
 // CHECK:           %[[C0:.*]] = constant 0 : index
-// CHECK:           %[[RANK0:.*]] = dim %[[ARG0]], %[[C0]] : tensor<2xindex>
-// CHECK:           %[[RANK1:.*]] = dim %[[ARG1]], %[[C0]] : tensor<3xindex>
-// CHECK:           %[[RANK2:.*]] = dim %[[ARG2]], %[[C0]] : tensor<2xindex>
+// CHECK:           %[[RANK0:.*]] = memref.dim %[[ARG0]], %[[C0]] : tensor<2xindex>
+// CHECK:           %[[RANK1:.*]] = memref.dim %[[ARG1]], %[[C0]] : tensor<3xindex>
+// CHECK:           %[[RANK2:.*]] = memref.dim %[[ARG2]], %[[C0]] : tensor<2xindex>
 // CHECK:           %[[CMP0:.*]] = cmpi ugt, %[[RANK1]], %[[RANK0]] : index
 // CHECK:           %[[LARGER_DIM:.*]] = select %[[CMP0]], %[[RANK1]], %[[RANK0]] : index
 // CHECK:           %[[CMP1:.*]] = cmpi ugt, %[[RANK2]], %[[LARGER_DIM]] : index
@@ -600,7 +600,7 @@
 // CHECK-SAME: %[[SHAPE:.*]]: tensor<?xindex>, %[[INDEX:.*]]: index
 func @split_at(%shape: tensor<?xindex>, %index: index) -> (tensor<?xindex>, tensor<?xindex>) {
   // CHECK-NEXT: %[[C0:.*]] = constant 0 : index
-  // CHECK-NEXT: %[[RANK:.*]] = dim %[[SHAPE]], %[[C0]] : tensor<?xindex>
+  // CHECK-NEXT: %[[RANK:.*]] = memref.dim %[[SHAPE]], %[[C0]] : tensor<?xindex>
   // CHECK-NEXT: %[[POSINDEX:.*]] = addi %[[INDEX]], %[[RANK]] : index
   // CHECK-NEXT: %[[ISNEG:.*]] = cmpi slt, %[[INDEX]], %[[C0]] : index
   // CHECK-NEXT: %[[SELECT:.*]] = select %[[ISNEG]], %[[POSINDEX]], %[[INDEX]] : index
diff --git a/mlir/test/Conversion/StandardToLLVM/calling-convention.mlir b/mlir/test/Conversion/StandardToLLVM/calling-convention.mlir
--- a/mlir/test/Conversion/StandardToLLVM/calling-convention.mlir
+++ b/mlir/test/Conversion/StandardToLLVM/calling-convention.mlir
@@ -75,7 +75,7 @@
 // CHECK-LABEL: @callee
 // EMIT_C_ATTRIBUTE-LABEL: @callee
 func @callee(%arg0: memref<?xf32>, %arg1: index) {
-  %0 = load %arg0[%arg1] : memref<?xf32>
+  %0 = memref.load %arg0[%arg1] : memref<?xf32>
   return
 }
 
@@ -100,7 +100,7 @@
 // CHECK-LABEL: @other_callee
 // EMIT_C_ATTRIBUTE-LABEL: @other_callee
 func @other_callee(%arg0: memref<?xf32>, %arg1: index) attributes { llvm.emit_c_interface } {
-  %0 = load %arg0[%arg1] : memref<?xf32>
+  %0 = memref.load %arg0[%arg1] : memref<?xf32>
   return
 }
 
@@ -151,7 +151,7 @@
   // CHECK: %[[DESC_0:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
   // CHECK: %[[DESC_1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC_0]][0]
   // CHECK: %[[DESC_2:.*]] = llvm.insertvalue %[[MEMORY]], %[[DESC_1]][1]
-  %0 = memref_cast %arg0: memref<4x3xf32> to memref<*xf32>
+  %0 = memref.cast %arg0: memref<4x3xf32> to memref<*xf32>
 
   // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : index)
   // CHECK: %[[TWO:.*]] = llvm.mlir.constant(2 : index)
@@ -213,7 +213,7 @@
   // CHECK: %[[DESC_0:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
   // CHECK: %[[DESC_1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC_0]][0]
   // CHECK: %[[DESC_2:.*]] = llvm.insertvalue %[[MEMORY]], %[[DESC_1]][1]
-  %0 = memref_cast %arg0 : memref<4x3xf32> to memref<*xf32>
+  %0 = memref.cast %arg0 : memref<4x3xf32> to memref<*xf32>
 
   // Only check that we allocate the memory for each operand of the "return"
   // separately, even if both operands are the same value. The calling
diff --git a/mlir/test/Conversion/StandardToLLVM/convert-argattrs.mlir b/mlir/test/Conversion/StandardToLLVM/convert-argattrs.mlir
--- a/mlir/test/Conversion/StandardToLLVM/convert-argattrs.mlir
+++ b/mlir/test/Conversion/StandardToLLVM/convert-argattrs.mlir
@@ -5,7 +5,7 @@
 // CHECK-COUNT-7: {dialect.a = true, dialect.b = 4 : i64}
 func @check_attributes(%static: memref<10x20xf32> {dialect.a = true, dialect.b = 4 : i64 }) {
   %c0 = constant 0 : index
-  %0 = load %static[%c0, %c0]: memref<10x20xf32>
+  %0 = memref.load %static[%c0, %c0]: memref<10x20xf32>
   return
 }
 
diff --git a/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir b/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir
--- a/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir
+++ b/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir
@@ -48,7 +48,7 @@
 //  CHECK-NEXT:  llvm.insertvalue %[[st0]], %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
 //  CHECK-NEXT:  llvm.insertvalue %[[N]], %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
 //  CHECK-NEXT:  llvm.insertvalue %[[one]], %{{.*}}[4, 2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
-  %0 = alloc(%arg0, %arg1) : memref<?x42x?xf32>
+  %0 = memref.alloc(%arg0, %arg1) : memref<?x42x?xf32>
 //  CHECK-NEXT:  llvm.return %{{.*}} : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
   return %0 : memref<?x42x?xf32>
 }
@@ -58,7 +58,7 @@
 //      CHECK:  %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
 // CHECK-NEXT:  %[[ptri8:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
 // CHECK-NEXT:  llvm.call @free(%[[ptri8]]) : (!llvm.ptr<i8>) -> ()
-  dealloc %arg0 : memref<?x42x?xf32>
+  memref.dealloc %arg0 : memref<?x42x?xf32>
 // CHECK-NEXT:  llvm.return
   return
 }
@@ -82,7 +82,7 @@
 //  CHECK-NEXT:  llvm.insertvalue %[[N]], %{{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 //  CHECK-NEXT:  llvm.insertvalue %[[N]], %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 //  CHECK-NEXT:  llvm.insertvalue %[[one]], %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  %0 = alloc(%arg0, %arg1) : memref<?x?xf32>
+  %0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
 //  CHECK-NEXT:  llvm.return %{{.*}} : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
   return %0 : memref<?x?xf32>
 }
@@ -107,7 +107,7 @@
 //  CHECK-NEXT:  llvm.insertvalue %[[N]], %{{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 //  CHECK-NEXT:  llvm.insertvalue %[[N]], %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 //  CHECK-NEXT:  llvm.insertvalue %[[st1]], %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  %0 = alloca(%arg0, %arg1) : memref<?x?xf32>
+  %0 = memref.alloca(%arg0, %arg1) : memref<?x?xf32>
 
 // Test with explicitly specified alignment. llvm.alloca takes care of the
 // alignment. The same pointer is thus used for allocation and aligned
@@ -116,7 +116,7 @@
 // CHECK: %[[desc:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK: %[[desc1:.*]] = llvm.insertvalue %[[alloca_aligned]], %[[desc]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK: llvm.insertvalue %[[alloca_aligned]], %[[desc1]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  alloca(%arg0, %arg1) {alignment = 32} : memref<?x?xf32>
+  memref.alloca(%arg0, %arg1) {alignment = 32} : memref<?x?xf32>
   return %0 : memref<?x?xf32>
 }
 
@@ -125,7 +125,7 @@
 //      CHECK:  %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK-NEXT:  %[[ptri8:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
 // CHECK-NEXT:  llvm.call @free(%[[ptri8]]) : (!llvm.ptr<i8>) -> ()
-  dealloc %arg0 : memref<?x?xf32>
+  memref.dealloc %arg0 : memref<?x?xf32>
   return
 }
 
@@ -142,23 +142,23 @@
 // ALIGNED-ALLOC-NEXT:  %[[alignment:.*]] = llvm.mlir.constant(32 : index) : i64
 // ALIGNED-ALLOC-NEXT:  %[[allocated:.*]] = llvm.call @aligned_alloc(%[[alignment]], %[[bytes]]) : (i64, i64) -> !llvm.ptr<i8>
 // ALIGNED-ALLOC-NEXT:  llvm.bitcast %[[allocated]] : !llvm.ptr<i8> to !llvm.ptr<f32>
-  %0 = alloc() {alignment = 32} : memref<32x18xf32>
+  %0 = memref.alloc() {alignment = 32} : memref<32x18xf32>
   // Do another alloc just to test that we have a unique declaration for
   // aligned_alloc.
   // ALIGNED-ALLOC:  llvm.call @aligned_alloc
-  %1 = alloc() {alignment = 64} : memref<4096xf32>
+  %1 = memref.alloc() {alignment = 64} : memref<4096xf32>
 
   // Alignment is to element type boundaries (minimum 16 bytes).
   // ALIGNED-ALLOC:  %[[c32:.*]] = llvm.mlir.constant(32 : index) : i64
   // ALIGNED-ALLOC-NEXT:  llvm.call @aligned_alloc(%[[c32]]
-  %2 = alloc() : memref<4096xvector<8xf32>>
+  %2 = memref.alloc() : memref<4096xvector<8xf32>>
   // The minimum alignment is 16 bytes unless explicitly specified.
   // ALIGNED-ALLOC:  %[[c16:.*]] = llvm.mlir.constant(16 : index) : i64
   // ALIGNED-ALLOC-NEXT:  llvm.call @aligned_alloc(%[[c16]],
-  %3 = alloc() : memref<4096xvector<2xf32>>
+  %3 = memref.alloc() : memref<4096xvector<2xf32>>
   // ALIGNED-ALLOC:  %[[c8:.*]] = llvm.mlir.constant(8 : index) : i64
   // ALIGNED-ALLOC-NEXT:  llvm.call @aligned_alloc(%[[c8]],
-  %4 = alloc() {alignment = 8} : memref<1024xvector<4xf32>>
+  %4 = memref.alloc() {alignment = 8} : memref<1024xvector<4xf32>>
   // Bump the memref allocation size if its size is not a multiple of alignment.
   // ALIGNED-ALLOC:       %[[c32:.*]] = llvm.mlir.constant(32 : index) : i64
   // ALIGNED-ALLOC-NEXT:  llvm.mlir.constant(1 : index) : i64
@@ -167,11 +167,11 @@
   // ALIGNED-ALLOC-NEXT:  llvm.urem
   // ALIGNED-ALLOC-NEXT:  %[[SIZE_ALIGNED:.*]] = llvm.sub
   // ALIGNED-ALLOC-NEXT:  llvm.call @aligned_alloc(%[[c32]], %[[SIZE_ALIGNED]])
-  %5 = alloc() {alignment = 32} : memref<100xf32>
+  %5 = memref.alloc() {alignment = 32} : memref<100xf32>
   // Bump alignment to the next power of two if it isn't.
   // ALIGNED-ALLOC:  %[[c128:.*]] = llvm.mlir.constant(128 : index) : i64
   // ALIGNED-ALLOC:  llvm.call @aligned_alloc(%[[c128]]
-  %6 = alloc(%N) : memref<?xvector<18xf32>>
+  %6 = memref.alloc(%N) : memref<?xvector<18xf32>>
   return %0 : memref<32x18xf32>
 }
 
@@ -187,7 +187,7 @@
 //  CHECK-NEXT:  %[[off1:.*]] = llvm.add %[[offI]], %[[J]] : i64
 //  CHECK-NEXT:  %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 //  CHECK-NEXT:  llvm.load %[[addr]] : !llvm.ptr<f32>
-  %0 = load %mixed[%i, %j] : memref<42x?xf32>
+  %0 = memref.load %mixed[%i, %j] : memref<42x?xf32>
   return
 }
 
@@ -208,7 +208,7 @@
 //  CHECK-NEXT:  %[[off1:.*]] = llvm.add %[[offI]], %[[J]] : i64
 //  CHECK-NEXT:  %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 //  CHECK-NEXT:  llvm.load %[[addr]] : !llvm.ptr<f32>
-  %0 = load %dynamic[%i, %j] : memref<?x?xf32>
+  %0 = memref.load %dynamic[%i, %j] : memref<?x?xf32>
   return
 }
 
@@ -232,17 +232,17 @@
 // CHECK-NEXT:  [[C3:%.*]] = llvm.mlir.constant(3 : i32) : i32
 // CHECK-NEXT:  [[C1_1:%.*]] = llvm.mlir.constant(1 : i32) : i32
 // CHECK-NEXT:  "llvm.intr.prefetch"(%[[addr]], [[C1]], [[C3]], [[C1_1]]) : (!llvm.ptr<f32>, i32, i32, i32) -> ()
-  prefetch %A[%i, %j], write, locality<3>, data : memref<?x?xf32>
+  memref.prefetch %A[%i, %j], write, locality<3>, data : memref<?x?xf32>
 // CHECK:  [[C0:%.*]] = llvm.mlir.constant(0 : i32) : i32
 // CHECK:  [[C0_1:%.*]] = llvm.mlir.constant(0 : i32) : i32
 // CHECK:  [[C1_2:%.*]] = llvm.mlir.constant(1 : i32) : i32
 // CHECK:  "llvm.intr.prefetch"(%{{.*}}, [[C0]], [[C0_1]], [[C1_2]]) : (!llvm.ptr<f32>, i32, i32, i32) -> ()
-  prefetch %A[%i, %j], read, locality<0>, data : memref<?x?xf32>
+  memref.prefetch %A[%i, %j], read, locality<0>, data : memref<?x?xf32>
 // CHECK:  [[C0_2:%.*]] = llvm.mlir.constant(0 : i32) : i32
 // CHECK:  [[C2:%.*]] = llvm.mlir.constant(2 : i32) : i32
 // CHECK:  [[C0_3:%.*]] = llvm.mlir.constant(0 : i32) : i32
 // CHECK:  "llvm.intr.prefetch"(%{{.*}}, [[C0_2]], [[C2]], [[C0_3]]) : (!llvm.ptr<f32>, i32, i32, i32) -> ()
-  prefetch %A[%i, %j], read, locality<2>, instr : memref<?x?xf32>
+  memref.prefetch %A[%i, %j], read, locality<2>, instr : memref<?x?xf32>
   return
 }
 
@@ -263,7 +263,7 @@
 //  CHECK-NEXT:  %[[off1:.*]] = llvm.add %[[offI]], %[[J]] : i64
 //  CHECK-NEXT:  %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 //  CHECK-NEXT:  llvm.store %{{.*}}, %[[addr]] : !llvm.ptr<f32>
-  store %val, %dynamic[%i, %j] : memref<?x?xf32>
+  memref.store %val, %dynamic[%i, %j] : memref<?x?xf32>
   return
 }
 
@@ -284,56 +284,56 @@
 //  CHECK-NEXT:  %[[off1:.*]] = llvm.add %[[offI]], %[[J]] : i64
 //  CHECK-NEXT:  %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 //  CHECK-NEXT:  llvm.store %{{.*}}, %[[addr]] : !llvm.ptr<f32>
-  store %val, %mixed[%i, %j] : memref<42x?xf32>
+  memref.store %val, %mixed[%i, %j] : memref<42x?xf32>
   return
 }
 
 // CHECK-LABEL: func @memref_cast_static_to_dynamic
 func @memref_cast_static_to_dynamic(%static : memref<10x42xf32>) {
 // CHECK-NOT: llvm.bitcast
-  %0 = memref_cast %static : memref<10x42xf32> to memref<?x?xf32>
+  %0 = memref.cast %static : memref<10x42xf32> to memref<?x?xf32>
   return
 }
 
 // CHECK-LABEL: func @memref_cast_static_to_mixed
 func @memref_cast_static_to_mixed(%static : memref<10x42xf32>) {
 // CHECK-NOT: llvm.bitcast
-  %0 = memref_cast %static : memref<10x42xf32> to memref<?x42xf32>
+  %0 = memref.cast %static : memref<10x42xf32> to memref<?x42xf32>
   return
 }
 
 // CHECK-LABEL: func @memref_cast_dynamic_to_static
 func @memref_cast_dynamic_to_static(%dynamic : memref<?x?xf32>) {
 // CHECK-NOT: llvm.bitcast
-  %0 = memref_cast %dynamic : memref<?x?xf32> to memref<10x12xf32>
+  %0 = memref.cast %dynamic : memref<?x?xf32> to memref<10x12xf32>
   return
 }
 
 // CHECK-LABEL: func @memref_cast_dynamic_to_mixed
 func @memref_cast_dynamic_to_mixed(%dynamic : memref<?x?xf32>) {
 // CHECK-NOT: llvm.bitcast
-  %0 = memref_cast %dynamic : memref<?x?xf32> to memref<?x12xf32>
+  %0 = memref.cast %dynamic : memref<?x?xf32> to memref<?x12xf32>
   return
 }
 
 // CHECK-LABEL: func @memref_cast_mixed_to_dynamic
 func @memref_cast_mixed_to_dynamic(%mixed : memref<42x?xf32>) {
 // CHECK-NOT: llvm.bitcast
-  %0 = memref_cast %mixed : memref<42x?xf32> to memref<?x?xf32>
+  %0 = memref.cast %mixed : memref<42x?xf32> to memref<?x?xf32>
   return
 }
 
 // CHECK-LABEL: func @memref_cast_mixed_to_static
 func @memref_cast_mixed_to_static(%mixed : memref<42x?xf32>) {
 // CHECK-NOT: llvm.bitcast
-  %0 = memref_cast %mixed : memref<42x?xf32> to memref<42x1xf32>
+  %0 = memref.cast %mixed : memref<42x?xf32> to memref<42x1xf32>
   return
 }
 
 // CHECK-LABEL: func @memref_cast_mixed_to_mixed
 func @memref_cast_mixed_to_mixed(%mixed : memref<42x?xf32>) {
 // CHECK-NOT: llvm.bitcast
-  %0 = memref_cast %mixed : memref<42x?xf32> to memref<?x1xf32>
+  %0 = memref.cast %mixed : memref<42x?xf32> to memref<?x1xf32>
   return
 }
 
@@ -347,7 +347,7 @@
 // CHECK    :  llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
 // CHECK-DAG:  llvm.insertvalue %[[r]], %{{.*}}[0] : !llvm.struct<(i64, ptr<i8>)>
 // CHECK-DAG:  llvm.insertvalue %[[p2]], %{{.*}}[1] : !llvm.struct<(i64, ptr<i8>)>
-  %0 = memref_cast %arg : memref<42x2x?xf32> to memref<*xf32>
+  %0 = memref.cast %arg : memref<42x2x?xf32> to memref<*xf32>
   return
 }
 
@@ -355,7 +355,7 @@
 func @memref_cast_unranked_to_ranked(%arg : memref<*xf32>) {
 //      CHECK: %[[p:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(i64, ptr<i8>)>
 // CHECK-NEXT: llvm.bitcast %[[p]] : !llvm.ptr<i8> to !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>>
-  %0 = memref_cast %arg : memref<*xf32> to memref<?x?x10x2xf32>
+  %0 = memref.cast %arg : memref<*xf32> to memref<?x?x10x2xf32>
   return
 }
 
@@ -363,19 +363,19 @@
 func @mixed_memref_dim(%mixed : memref<42x?x?x13x?xf32>) {
 // CHECK: llvm.mlir.constant(42 : index) : i64
   %c0 = constant 0 : index
-  %0 = dim %mixed, %c0 : memref<42x?x?x13x?xf32>
+  %0 = memref.dim %mixed, %c0 : memref<42x?x?x13x?xf32>
 // CHECK: llvm.extractvalue %[[ld:.*]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<5 x i64>, array<5 x i64>)>
   %c1 = constant 1 : index
-  %1 = dim %mixed, %c1 : memref<42x?x?x13x?xf32>
+  %1 = memref.dim %mixed, %c1 : memref<42x?x?x13x?xf32>
 // CHECK: llvm.extractvalue %[[ld]][3, 2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<5 x i64>, array<5 x i64>)>
   %c2 = constant 2 : index
-  %2 = dim %mixed, %c2 : memref<42x?x?x13x?xf32>
+  %2 = memref.dim %mixed, %c2 : memref<42x?x?x13x?xf32>
 // CHECK: llvm.mlir.constant(13 : index) : i64
   %c3 = constant 3 : index
-  %3 = dim %mixed, %c3 : memref<42x?x?x13x?xf32>
+  %3 = memref.dim %mixed, %c3 : memref<42x?x?x13x?xf32>
 // CHECK: llvm.extractvalue %[[ld]][3, 4] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<5 x i64>, array<5 x i64>)>
   %c4 = constant 4 : index
-  %4 = dim %mixed, %c4 : memref<42x?x?x13x?xf32>
+  %4 = memref.dim %mixed, %c4 : memref<42x?x?x13x?xf32>
   return
 }
 
@@ -398,13 +398,13 @@
   // CHECK-DAG: %[[RESULT_PTR:.*]] = llvm.getelementptr %[[SIZES_PTR]][%[[C0]], %[[IDX]]] : (!llvm.ptr<array<2 x i64>>, i64, i64) -> !llvm.ptr<i64>
   // CHECK-DAG: %[[RESULT:.*]] = llvm.load %[[RESULT_PTR]] : !llvm.ptr<i64>
   // CHECK-DAG: llvm.return %[[RESULT]] : i64
-  %result = dim %arg, %idx : memref<3x?xf32>
+  %result = memref.dim %arg, %idx : memref<3x?xf32>
   return %result : index
 }
 
 // CHECK-LABEL: @memref_reinterpret_cast_ranked_to_static_shape
 func @memref_reinterpret_cast_ranked_to_static_shape(%input : memref<2x3xf32>) {
-  %output = memref_reinterpret_cast %input to
+  %output = memref.reinterpret_cast %input to
            offset: [0], sizes: [6, 1], strides: [1, 1]
            : memref<2x3xf32> to memref<6x1xf32>
   return
@@ -433,7 +433,7 @@
                                                         %stride_0 : index,
                                                         %stride_1 : index,
                                                         %input : memref<*xf32>) {
-  %output = memref_reinterpret_cast %input to
+  %output = memref.reinterpret_cast %input to
            offset: [%offset], sizes: [%size_0, %size_1],
            strides: [%stride_0, %stride_1]
            : memref<*xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
@@ -462,7 +462,7 @@
 
 // CHECK-LABEL: @memref_reshape
 func @memref_reshape(%input : memref<2x3xf32>, %shape : memref<?xindex>) {
-  %output = memref_reshape %input(%shape)
+  %output = memref.reshape %input(%shape)
                 : (memref<2x3xf32>, memref<?xindex>) -> memref<*xf32>
   return
 }
diff --git a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir
--- a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir
+++ b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir
@@ -93,7 +93,7 @@
 // BAREPTR-NEXT:  llvm.insertvalue %[[ptr]], %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
 // BAREPTR-NEXT:  %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64
 // BAREPTR-NEXT:  llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-  %0 = alloc() : memref<f32>
+  %0 = memref.alloc() : memref<f32>
   return %0 : memref<f32>
 }
 
@@ -109,7 +109,7 @@
 // BAREPTR: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
 // BAREPTR-NEXT: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
 // BAREPTR-NEXT: llvm.call @free(%[[bc]]) : (!llvm.ptr<i8>) -> ()
-  dealloc %arg0 : memref<f32>
+  memref.dealloc %arg0 : memref<f32>
   return
 }
 
@@ -161,7 +161,7 @@
 // BAREPTR-NEXT:  llvm.insertvalue %[[alignedBitCast]], %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
 // BAREPTR-NEXT:  %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64
 // BAREPTR-NEXT:  llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-  %0 = alloc() {alignment = 8} : memref<42xf32>
+  %0 = memref.alloc() {alignment = 8} : memref<42xf32>
   return %0 : memref<42xf32>
 }
 
@@ -183,7 +183,7 @@
 // BAREPTR-NEXT: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
 // BAREPTR-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[size_bytes]]) : (i64) -> !llvm.ptr<i8>
 // BAREPTR-NEXT: llvm.bitcast %[[allocated]] : !llvm.ptr<i8> to !llvm.ptr<f32>
- %0 = alloc() : memref<32x18xf32>
+ %0 = memref.alloc() : memref<32x18xf32>
  return %0 : memref<32x18xf32>
 }
 
@@ -199,7 +199,7 @@
 // CHECK-NEXT:  %[[gep:.*]] = llvm.getelementptr %[[null]][%[[num_elems]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 // CHECK-NEXT:  %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
 // CHECK-NEXT:  %[[allocated:.*]] = llvm.alloca %[[size_bytes]] x f32 : (i64) -> !llvm.ptr<f32>
- %0 = alloca() : memref<32x18xf32>
+ %0 = memref.alloca() : memref<32x18xf32>
 
  // Test with explicitly specified alignment. llvm.alloca takes care of the
  // alignment. The same pointer is thus used for allocation and aligned
@@ -208,7 +208,7 @@
  // CHECK: %[[desc:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
  // CHECK: %[[desc1:.*]] = llvm.insertvalue %[[alloca_aligned]], %[[desc]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
  // CHECK: llvm.insertvalue %[[alloca_aligned]], %[[desc1]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
- alloca() {alignment = 32} : memref<32x18xf32>
+ memref.alloca() {alignment = 32} : memref<32x18xf32>
  return %0 : memref<32x18xf32>
 }
 
@@ -224,7 +224,7 @@
 // BAREPTR:      %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 // BAREPTR-NEXT: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
 // BAREPTR-NEXT: llvm.call @free(%[[bc]]) : (!llvm.ptr<i8>) -> ()
-  dealloc %static : memref<10x8xf32>
+  memref.dealloc %static : memref<10x8xf32>
   return
 }
 
@@ -238,7 +238,7 @@
 
 // BAREPTR:      %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
 // BAREPTR-NEXT: llvm.load %[[ptr:.*]] : !llvm.ptr<f32>
-  %0 = load %arg0[] : memref<f32>
+  %0 = memref.load %arg0[] : memref<f32>
   return %0 : f32
 }
 
@@ -265,7 +265,7 @@
 // BAREPTR-NEXT: %[[off1:.*]] = llvm.add %[[offI]], %[[J]] : i64
 // BAREPTR-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 // BAREPTR-NEXT: llvm.load %[[addr]] : !llvm.ptr<f32>
-  %0 = load %static[%i, %j] : memref<10x42xf32>
+  %0 = memref.load %static[%i, %j] : memref<10x42xf32>
   return
 }
 
@@ -280,7 +280,7 @@
 
 // BAREPTR:      %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
 // BAREPTR-NEXT: llvm.store %[[val]], %[[ptr]] : !llvm.ptr<f32>
-  store %arg1, %arg0[] : memref<f32>
+  memref.store %arg1, %arg0[] : memref<f32>
   return
 }
 
@@ -314,7 +314,7 @@
 // BAREPTR-NEXT: %[[off1:.*]] = llvm.add %[[offI]], %[[J]] : i64
 // BAREPTR-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 // BAREPTR-NEXT: llvm.store %{{.*}}, %[[addr]] : !llvm.ptr<f32>
-  store %val, %static[%i, %j] : memref<10x42xf32>
+  memref.store %val, %static[%i, %j] : memref<10x42xf32>
   return
 }
 
@@ -327,23 +327,23 @@
 // BAREPTR:      llvm.insertvalue %{{.*}}, %{{.*}}[4, 4] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<5 x i64>, array<5 x i64>)>
 // BAREPTR: llvm.mlir.constant(42 : index) : i64
   %c0 = constant 0 : index
-  %0 = dim %static, %c0 : memref<42x32x15x13x27xf32>
+  %0 = memref.dim %static, %c0 : memref<42x32x15x13x27xf32>
 // CHECK:  llvm.mlir.constant(32 : index) : i64
 // BAREPTR:  llvm.mlir.constant(32 : index) : i64
   %c1 = constant 1 : index
-  %1 = dim %static, %c1 : memref<42x32x15x13x27xf32>
+  %1 = memref.dim %static, %c1 : memref<42x32x15x13x27xf32>
 // CHECK:  llvm.mlir.constant(15 : index) : i64
 // BAREPTR:  llvm.mlir.constant(15 : index) : i64
   %c2 = constant 2 : index
-  %2 = dim %static, %c2 : memref<42x32x15x13x27xf32>
+  %2 = memref.dim %static, %c2 : memref<42x32x15x13x27xf32>
 // CHECK:  llvm.mlir.constant(13 : index) : i64
 // BAREPTR:  llvm.mlir.constant(13 : index) : i64
   %c3 = constant 3 : index
-  %3 = dim %static, %c3 : memref<42x32x15x13x27xf32>
+  %3 = memref.dim %static, %c3 : memref<42x32x15x13x27xf32>
 // CHECK:  llvm.mlir.constant(27 : index) : i64
 // BAREPTR:  llvm.mlir.constant(27 : index) : i64
   %c4 = constant 4 : index
-  %4 = dim %static, %c4 : memref<42x32x15x13x27xf32>
+  %4 = memref.dim %static, %c4 : memref<42x32x15x13x27xf32>
   return
 }
 
diff --git a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir
--- a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir
+++ b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir
@@ -786,7 +786,7 @@
 func @view(%arg0 : index, %arg1 : index, %arg2 : index) {
   // CHECK: llvm.mlir.constant(2048 : index) : i64
   // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
-  %0 = alloc() : memref<2048xi8>
+  %0 = memref.alloc() : memref<2048xi8>
 
   // Test two dynamic sizes.
   // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
@@ -802,7 +802,7 @@
   // CHECK: llvm.insertvalue %[[ARG0]], %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
   // CHECK: llvm.mul %{{.*}}, %[[ARG1]]
   // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  %1 = view %0[%arg2][%arg0, %arg1] : memref<2048xi8> to memref<?x?xf32>
+  %1 = memref.view %0[%arg2][%arg0, %arg1] : memref<2048xi8> to memref<?x?xf32>
 
   // Test one dynamic size.
   // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
@@ -819,7 +819,7 @@
   // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
   // CHECK: llvm.mul %{{.*}}, %[[ARG1]]
   // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  %3 = view %0[%arg2][%arg1] : memref<2048xi8> to memref<4x?xf32>
+  %3 = memref.view %0[%arg2][%arg1] : memref<2048xi8> to memref<4x?xf32>
 
   // Test static sizes.
   // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
@@ -837,12 +837,12 @@
   // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
   // CHECK: llvm.mlir.constant(4 : index) : i64
   // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  %5 = view %0[%arg2][] : memref<2048xi8> to memref<64x4xf32>
+  %5 = memref.view %0[%arg2][] : memref<2048xi8> to memref<64x4xf32>
 
   // Test view memory space.
   // CHECK: llvm.mlir.constant(2048 : index) : i64
   // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<i8, 4>, ptr<i8, 4>, i64, array<1 x i64>, array<1 x i64>)>
-  %6 = alloc() : memref<2048xi8, 4>
+  %6 = memref.alloc() : memref<2048xi8, 4>
 
   // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
   // CHECK: %[[BASE_PTR_4:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i8, 4>, ptr<i8, 4>, i64, array<1 x i64>, array<1 x i64>)>
@@ -859,7 +859,7 @@
   // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
   // CHECK: llvm.mlir.constant(4 : index) : i64
   // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
-  %7 = view %6[%arg2][] : memref<2048xi8, 4> to memref<64x4xf32, 4>
+  %7 = memref.view %6[%arg2][] : memref<2048xi8, 4> to memref<64x4xf32, 4>
 
   return
 }
@@ -919,7 +919,7 @@
   // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i32
   // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
 
-  %1 = subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] :
+  %1 = memref.subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] :
     memref<64x4xf32, offset: 0, strides: [4, 1]>
   to memref<?x?xf32, offset: ?, strides: [?, ?]>
   return
@@ -980,7 +980,7 @@
   // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i32
   // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
 
-  %1 = subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] :
+  %1 = memref.subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] :
     memref<64x4xf32, offset: 0, strides: [4, 1], 3>
     to memref<?x?xf32, offset: ?, strides: [?, ?], 3>
   return
@@ -1055,7 +1055,7 @@
   // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG7]], %[[STRIDE0]] : i32
   // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[CST4]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
   // CHECK32: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  %1 = subview %0[%arg0, %arg1][4, 2][%arg0, %arg1] :
+  %1 = memref.subview %0[%arg0, %arg1][4, 2][%arg0, %arg1] :
     memref<64x4xf32, offset: 0, strides: [4, 1]>
     to memref<4x2xf32, offset: ?, strides: [?, ?]>
   return
@@ -1126,7 +1126,7 @@
   // CHECK32: %[[CST4:.*]] = llvm.mlir.constant(4 : i64)
   // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[ARG7]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
   // CHECK32: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  %1 = subview %0[%arg0, %arg1][%arg0, %arg1][1, 2] :
+  %1 = memref.subview %0[%arg0, %arg1][%arg0, %arg1][1, 2] :
     memref<64x4xf32, offset: 0, strides: [4, 1]>
     to memref<?x?xf32, offset: ?, strides: [4, 2]>
   return
@@ -1156,7 +1156,7 @@
   // CHECK32: %[[CST4:.*]] = llvm.mlir.constant(4 : i64)
   // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[CST62]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
   // CHECK32: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  %1 = subview %0[0, 8][62, 3][1, 1] :
+  %1 = memref.subview %0[0, 8][62, 3][1, 1] :
     memref<64x4xf32, offset: 0, strides: [4, 1]>
     to memref<62x3xf32, offset: 8, strides: [4, 1]>
   return
@@ -1201,7 +1201,7 @@
   // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i32
   // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[CST62]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
   // CHECK32: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  %1 = subview %0[%arg1, 8][62, %arg2][%arg0, 1] :
+  %1 = memref.subview %0[%arg1, 8][62, %arg2][%arg0, 1] :
     memref<64x4xf32, offset: 0, strides: [4, 1]>
     to memref<62x?xf32, offset: ?, strides: [?, 1]>
   return
@@ -1227,7 +1227,7 @@
   // CHECK: %[[C3_3:.*]] = llvm.mlir.constant(3 : i64) : i64
   // CHECK: llvm.insertvalue %[[C3_2]], %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
   // CHECK: llvm.insertvalue %[[C3_3]], %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  %2 = subview %0[2][3][1]: memref<5x3xf32> to memref<3x3xf32, offset: 6, strides: [3, 1]>
+  %2 = memref.subview %0[2][3][1]: memref<5x3xf32> to memref<3x3xf32, offset: 6, strides: [3, 1]>
 
   return
 }
@@ -1259,7 +1259,7 @@
   // CHECK: %[[MUL:.*]] = llvm.mul %[[C1_2]], %[[ST0]]  : i64
   // CHECK: llvm.insertvalue %[[C3]], %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
   // CHECK: llvm.insertvalue %[[MUL]], %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  %1 = subview %0[2][3][1]: memref<5x?xf32> to memref<3x?xf32, offset: ?, strides: [?, 1]>
+  %1 = memref.subview %0[2][3][1]: memref<5x?xf32> to memref<3x?xf32, offset: ?, strides: [?, 1]>
   return
 }
 
@@ -1281,7 +1281,7 @@
   // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64
   // CHECK: llvm.insertvalue %[[C3]], %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
   // CHECK: llvm.insertvalue %[[C1]], %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-  %1 = subview %0[1][1][1]: memref<5x3xf32> to memref<3xf32, offset: 3, strides: [1]>
+  %1 = memref.subview %0[1][1][1]: memref<5x3xf32> to memref<3xf32, offset: 3, strides: [1]>
 
   return
 }
@@ -1346,7 +1346,7 @@
   // CHECK-NEXT: %[[MASKED_PTR:.*]] = llvm.and %[[INT]], %[[MASK:.*]] : i64
   // CHECK-NEXT: %[[CONDITION:.*]] = llvm.icmp "eq" %[[MASKED_PTR]], %[[ZERO]] : i64
   // CHECK-NEXT: "llvm.intr.assume"(%[[CONDITION]]) : (i1) -> ()
-  assume_alignment %0, 16 : memref<4x4xf16>
+  memref.assume_alignment %0, 16 : memref<4x4xf16>
   return
 }
 
@@ -1393,7 +1393,7 @@
 // CHECK32-LABEL: func @dim_of_unranked
 func @dim_of_unranked(%unranked: memref<*xi32>) -> index {
   %c0 = constant 0 : index
-  %dim = dim %unranked, %c0 : memref<*xi32>
+  %dim = memref.dim %unranked, %c0 : memref<*xi32>
   return %dim : index
 }
 // CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
diff --git a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
--- a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
+++ b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
@@ -3,10 +3,10 @@
 // CHECK-LABEL: func @address_space(
 // CHECK-SAME:    !llvm.ptr<f32, 7>
 func @address_space(%arg0 : memref<32xf32, affine_map<(d0) -> (d0)>, 7>) {
-  %0 = alloc() : memref<32xf32, affine_map<(d0) -> (d0)>, 5>
+  %0 = memref.alloc() : memref<32xf32, affine_map<(d0) -> (d0)>, 5>
   %1 = constant 7 : index
   // CHECK: llvm.load %{{.*}} : !llvm.ptr<f32, 5>
-  %2 = load %0[%1] : memref<32xf32, affine_map<(d0) -> (d0)>, 5>
+  %2 = memref.load %0[%1] : memref<32xf32, affine_map<(d0) -> (d0)>, 5>
   std.return
 }
 
@@ -122,25 +122,25 @@
 //       CHECK:   llvm.extractvalue {{.*}}[3, 2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
 //       CHECK:    llvm.insertvalue {{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
 func @transpose(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
-  %0 = transpose %arg0 (i, j, k) -> (k, i, j) : memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2 * s1 + s0 + d0 * s2 + d1)>>
+  %0 = memref.transpose %arg0 (i, j, k) -> (k, i, j) : memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2 * s1 + s0 + d0 * s2 + d1)>>
   return
 }
 
 // -----
 
 // CHECK: llvm.mlir.global external @gv0() : !llvm.array<2 x f32>
-global_memref @gv0 : memref<2xf32> = uninitialized
+memref.global @gv0 : memref<2xf32> = uninitialized
 
 // CHECK: llvm.mlir.global private @gv1() : !llvm.array<2 x f32>
-global_memref "private" @gv1 : memref<2xf32>
+memref.global "private" @gv1 : memref<2xf32>
 
 // CHECK: llvm.mlir.global external @gv2(dense<{{\[\[}}0.000000e+00, 1.000000e+00, 2.000000e+00], [3.000000e+00, 4.000000e+00, 5.000000e+00]]> : tensor<2x3xf32>) : !llvm.array<2 x array<3 x f32>>
-global_memref @gv2 : memref<2x3xf32> = dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]>
+memref.global @gv2 : memref<2x3xf32> = dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]>
 
 // Test 1D memref.
 // CHECK-LABEL: func @get_gv0_memref
 func @get_gv0_memref() {
-  %0 = get_global_memref @gv0 : memref<2xf32>
+  %0 = memref.get_global @gv0 : memref<2xf32>
   // CHECK: %[[DIM:.*]] = llvm.mlir.constant(2 : index) : i64
   // CHECK: %[[STRIDE:.*]] = llvm.mlir.constant(1 : index) : i64
   // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @gv0 : !llvm.ptr<array<2 x f32>>
@@ -179,13 +179,13 @@
   // CHECK: llvm.insertvalue %[[DIM1]], {{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
   // CHECK: llvm.insertvalue %[[STRIDE1]], {{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 
-  %0 = get_global_memref @gv2 : memref<2x3xf32>
+  %0 = memref.get_global @gv2 : memref<2x3xf32>
   return
 }
 
 // Test scalar memref.
 // CHECK: llvm.mlir.global external @gv3(1.000000e+00 : f32) : f32
-global_memref @gv3 : memref<f32> = dense<1.0>
+memref.global @gv3 : memref<f32> = dense<1.0>
 
 // CHECK-LABEL: func @get_gv3_memref
 func @get_gv3_memref() {
@@ -199,7 +199,7 @@
   // CHECK: llvm.insertvalue %[[GEP]], {{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
   // CHECK: %[[OFFSET:.*]] = llvm.mlir.constant(0 : index) : i64
   // CHECK: llvm.insertvalue %[[OFFSET]], {{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-  %0 = get_global_memref @gv3 : memref<f32>
+  %0 = memref.get_global @gv3 : memref<f32>
   return
 }
 
diff --git a/mlir/test/Conversion/StandardToSPIRV/alloc.mlir b/mlir/test/Conversion/StandardToSPIRV/alloc.mlir
--- a/mlir/test/Conversion/StandardToSPIRV/alloc.mlir
+++ b/mlir/test/Conversion/StandardToSPIRV/alloc.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt -allow-unregistered-dialect -split-input-file -convert-std-to-spirv -canonicalize -verify-diagnostics %s -o - | FileCheck %s
 
 //===----------------------------------------------------------------------===//
-// std allocation/deallocation ops
+// memref allocation/deallocation ops
 //===----------------------------------------------------------------------===//
 
 module attributes {
@@ -10,22 +10,22 @@
   }
 {
   func @alloc_dealloc_workgroup_mem(%arg0 : index, %arg1 : index) {
-    %0 = alloc() : memref<4x5xf32, 3>
-    %1 = load %0[%arg0, %arg1] : memref<4x5xf32, 3>
-    store %1, %0[%arg0, %arg1] : memref<4x5xf32, 3>
-    dealloc %0 : memref<4x5xf32, 3>
+    %0 = memref.alloc() : memref<4x5xf32, 3>
+    %1 = memref.load %0[%arg0, %arg1] : memref<4x5xf32, 3>
+    memref.store %1, %0[%arg0, %arg1] : memref<4x5xf32, 3>
+    memref.dealloc %0 : memref<4x5xf32, 3>
     return
   }
 }
 //     CHECK: spv.GlobalVariable @[[VAR:.+]] : !spv.ptr<!spv.struct<(!spv.array<20 x f32, stride=4>)>, Workgroup>
 //     CHECK: func @alloc_dealloc_workgroup_mem
-// CHECK-NOT:   alloc
+// CHECK-NOT:   memref.alloc
 //     CHECK:   %[[PTR:.+]] = spv.mlir.addressof @[[VAR]]
 //     CHECK:   %[[LOADPTR:.+]] = spv.AccessChain %[[PTR]]
 //     CHECK:   %[[VAL:.+]] = spv.Load "Workgroup" %[[LOADPTR]] : f32
 //     CHECK:   %[[STOREPTR:.+]] = spv.AccessChain %[[PTR]]
 //     CHECK:   spv.Store "Workgroup" %[[STOREPTR]], %[[VAL]] : f32
-// CHECK-NOT:   dealloc
+// CHECK-NOT:   memref.dealloc
 //     CHECK:   spv.Return
 
 // -----
@@ -36,10 +36,10 @@
   }
 {
   func @alloc_dealloc_workgroup_mem(%arg0 : index, %arg1 : index) {
-    %0 = alloc() : memref<4x5xi16, 3>
-    %1 = load %0[%arg0, %arg1] : memref<4x5xi16, 3>
-    store %1, %0[%arg0, %arg1] : memref<4x5xi16, 3>
-    dealloc %0 : memref<4x5xi16, 3>
+    %0 = memref.alloc() : memref<4x5xi16, 3>
+    %1 = memref.load %0[%arg0, %arg1] : memref<4x5xi16, 3>
+    memref.store %1, %0[%arg0, %arg1] : memref<4x5xi16, 3>
+    memref.dealloc %0 : memref<4x5xi16, 3>
     return
   }
 }
@@ -65,8 +65,8 @@
   }
 {
   func @two_allocs() {
-    %0 = alloc() : memref<4x5xf32, 3>
-    %1 = alloc() : memref<2x3xi32, 3>
+    %0 = memref.alloc() : memref<4x5xf32, 3>
+    %1 = memref.alloc() : memref<2x3xi32, 3>
     return
   }
 }
@@ -86,8 +86,8 @@
   }
 {
   func @two_allocs_vector() {
-    %0 = alloc() : memref<4xvector<4xf32>, 3>
-    %1 = alloc() : memref<2xvector<2xi32>, 3>
+    %0 = memref.alloc() : memref<4xvector<4xf32>, 3>
+    %1 = memref.alloc() : memref<2xvector<2xi32>, 3>
     return
   }
 }
@@ -109,8 +109,8 @@
 {
   func @alloc_dealloc_dynamic_workgroup_mem(%arg0 : index) {
     // expected-error @+2 {{unhandled allocation type}}
-    // expected-error @+1 {{'std.alloc' op operand #0 must be index}}
-    %0 = alloc(%arg0) : memref<4x?xf32, 3>
+    // expected-error @+1 {{'memref.alloc' op operand #0 must be index}}
+    %0 = memref.alloc(%arg0) : memref<4x?xf32, 3>
     return
   }
 }
@@ -124,7 +124,7 @@
 {
   func @alloc_dealloc_mem() {
     // expected-error @+1 {{unhandled allocation type}}
-    %0 = alloc() : memref<4x5xf32>
+    %0 = memref.alloc() : memref<4x5xf32>
     return
   }
 }
@@ -139,8 +139,8 @@
 {
   func @alloc_dealloc_dynamic_workgroup_mem(%arg0 : memref<4x?xf32, 3>) {
     // expected-error @+2 {{unhandled deallocation type}}
-    // expected-error @+1 {{'std.dealloc' op operand #0 must be memref of any type values}}
-    dealloc %arg0 : memref<4x?xf32, 3>
+    // expected-error @+1 {{'memref.dealloc' op operand #0 must be memref of any type values}}
+    memref.dealloc %arg0 : memref<4x?xf32, 3>
     return
   }
 }
@@ -155,7 +155,7 @@
   func @alloc_dealloc_mem(%arg0 : memref<4x5xf32>) {
     // expected-error @+2 {{unhandled deallocation type}}
     // expected-error @+1 {{op operand #0 must be memref of any type values}}
-    dealloc %arg0 : memref<4x5xf32>
+    memref.dealloc %arg0 : memref<4x5xf32>
     return
   }
 }
diff --git a/mlir/test/Conversion/StandardToSPIRV/legalization.mlir b/mlir/test/Conversion/StandardToSPIRV/legalization.mlir
--- a/mlir/test/Conversion/StandardToSPIRV/legalization.mlir
+++ b/mlir/test/Conversion/StandardToSPIRV/legalization.mlir
@@ -3,70 +3,70 @@
 // CHECK-LABEL: @fold_static_stride_subview_with_load
 // CHECK-SAME: [[ARG0:%.*]]: memref<12x32xf32>, [[ARG1:%.*]]: index, [[ARG2:%.*]]: index, [[ARG3:%.*]]: index, [[ARG4:%.*]]: index
 func @fold_static_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index) -> f32 {
-  // CHECK-NOT: subview
+  // CHECK-NOT: memref.subview
   // CHECK: [[C2:%.*]] = constant 2 : index
   // CHECK: [[C3:%.*]] = constant 3 : index
   // CHECK: [[STRIDE1:%.*]] = muli [[ARG3]], [[C2]] : index
   // CHECK: [[INDEX1:%.*]] = addi [[ARG1]], [[STRIDE1]] : index
   // CHECK: [[STRIDE2:%.*]] = muli [[ARG4]], [[C3]] : index
   // CHECK: [[INDEX2:%.*]] = addi [[ARG2]], [[STRIDE2]] : index
-  // CHECK: load [[ARG0]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
-  %0 = subview %arg0[%arg1, %arg2][4, 4][2, 3] : memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [64, 3]>
-  %1 = load %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [64, 3]>
+  // CHECK: memref.load [[ARG0]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
+  %0 = memref.subview %arg0[%arg1, %arg2][4, 4][2, 3] : memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [64, 3]>
+  %1 = memref.load %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [64, 3]>
   return %1 : f32
 }
 
 // CHECK-LABEL: @fold_dynamic_stride_subview_with_load
 // CHECK-SAME: [[ARG0:%.*]]: memref<12x32xf32>, [[ARG1:%.*]]: index, [[ARG2:%.*]]: index, [[ARG3:%.*]]: index, [[ARG4:%.*]]: index, [[ARG5:%.*]]: index, [[ARG6:%.*]]: index
 func @fold_dynamic_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index) -> f32 {
-  // CHECK-NOT: subview
+  // CHECK-NOT: memref.subview
   // CHECK: [[STRIDE1:%.*]] = muli [[ARG3]], [[ARG5]] : index
   // CHECK: [[INDEX1:%.*]] = addi [[ARG1]], [[STRIDE1]] : index
   // CHECK: [[STRIDE2:%.*]] = muli [[ARG4]], [[ARG6]] : index
   // CHECK: [[INDEX2:%.*]] = addi [[ARG2]], [[STRIDE2]] : index
-  // CHECK: load [[ARG0]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
-  %0 = subview %arg0[%arg1, %arg2][4, 4][%arg5, %arg6] :
+  // CHECK: memref.load [[ARG0]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
+  %0 = memref.subview %arg0[%arg1, %arg2][4, 4][%arg5, %arg6] :
     memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [?, ?]>
-  %1 = load %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [?, ?]>
+  %1 = memref.load %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [?, ?]>
   return %1 : f32
 }
 
 // CHECK-LABEL: @fold_static_stride_subview_with_store
 // CHECK-SAME: [[ARG0:%.*]]: memref<12x32xf32>, [[ARG1:%.*]]: index, [[ARG2:%.*]]: index, [[ARG3:%.*]]: index, [[ARG4:%.*]]: index, [[ARG5:%.*]]: f32
 func @fold_static_stride_subview_with_store(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : f32) {
-  // CHECK-NOT: subview
+  // CHECK-NOT: memref.subview
   // CHECK: [[C2:%.*]] = constant 2 : index
   // CHECK: [[C3:%.*]] = constant 3 : index
   // CHECK: [[STRIDE1:%.*]] = muli [[ARG3]], [[C2]] : index
   // CHECK: [[INDEX1:%.*]] = addi [[ARG1]], [[STRIDE1]] : index
   // CHECK: [[STRIDE2:%.*]] = muli [[ARG4]], [[C3]] : index
   // CHECK: [[INDEX2:%.*]] = addi [[ARG2]], [[STRIDE2]] : index
-  // CHECK: store [[ARG5]], [[ARG0]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
-  %0 = subview %arg0[%arg1, %arg2][4, 4][2, 3] :
+  // CHECK: memref.store [[ARG5]], [[ARG0]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
+  %0 = memref.subview %arg0[%arg1, %arg2][4, 4][2, 3] :
     memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [64, 3]>
-  store %arg5, %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [64, 3]>
+  memref.store %arg5, %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [64, 3]>
   return
 }
 
 // CHECK-LABEL: @fold_dynamic_stride_subview_with_store
 // CHECK-SAME: [[ARG0:%.*]]: memref<12x32xf32>, [[ARG1:%.*]]: index, [[ARG2:%.*]]: index, [[ARG3:%.*]]: index, [[ARG4:%.*]]: index, [[ARG5:%.*]]: index, [[ARG6:%.*]]: index, [[ARG7:%.*]]: f32
 func @fold_dynamic_stride_subview_with_store(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index, %arg7 : f32) {
-  // CHECK-NOT: subview
+  // CHECK-NOT: memref.subview
   // CHECK: [[STRIDE1:%.*]] = muli [[ARG3]], [[ARG5]] : index
   // CHECK: [[INDEX1:%.*]] = addi [[ARG1]], [[STRIDE1]] : index
   // CHECK: [[STRIDE2:%.*]] = muli [[ARG4]], [[ARG6]] : index
   // CHECK: [[INDEX2:%.*]] = addi [[ARG2]], [[STRIDE2]] : index
-  // CHECK: store [[ARG7]], [[ARG0]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
-  %0 = subview %arg0[%arg1, %arg2][4, 4][%arg5, %arg6] :
+  // CHECK: memref.store [[ARG7]], [[ARG0]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}
+  %0 = memref.subview %arg0[%arg1, %arg2][4, 4][%arg5, %arg6] :
     memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [?, ?]>
-  store %arg7, %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [?, ?]>
+  memref.store %arg7, %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [?, ?]>
   return
 }
 
 // CHECK-LABEL: @fold_static_stride_subview_with_transfer_read
 // CHECK-SAME: [[ARG0:%.*]]: memref<12x32xf32>, [[ARG1:%.*]]: index, [[ARG2:%.*]]: index, [[ARG3:%.*]]: index, [[ARG4:%.*]]: index
 func @fold_static_stride_subview_with_transfer_read(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index) -> vector<4xf32> {
-  // CHECK-NOT: subview
+  // CHECK-NOT: memref.subview
   // CHECK: [[F1:%.*]] = constant 1.000000e+00 : f32
   // CHECK: [[C2:%.*]] = constant 2 : index
   // CHECK: [[C3:%.*]] = constant 3 : index
@@ -76,7 +76,7 @@
   // CHECK: [[INDEX2:%.*]] = addi [[ARG2]], [[STRIDE2]] : index
   // CHECK: vector.transfer_read [[ARG0]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}}, [[F1]] {masked = [false]}
   %f1 = constant 1.0 : f32
-  %0 = subview %arg0[%arg1, %arg2][4, 4][2, 3] : memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [64, 3]>
+  %0 = memref.subview %arg0[%arg1, %arg2][4, 4][2, 3] : memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [64, 3]>
   %1 = vector.transfer_read %0[%arg3, %arg4], %f1 {masked = [false]} : memref<4x4xf32, offset:?, strides: [64, 3]>, vector<4xf32>
   return %1 : vector<4xf32>
 }
@@ -84,7 +84,7 @@
 // CHECK-LABEL: @fold_static_stride_subview_with_transfer_write
 // CHECK-SAME: [[ARG0:%.*]]: memref<12x32xf32>, [[ARG1:%.*]]: index, [[ARG2:%.*]]: index, [[ARG3:%.*]]: index, [[ARG4:%.*]]: index, [[ARG5:%.*]]: vector<4xf32>
 func @fold_static_stride_subview_with_transfer_write(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : vector<4xf32>) {
-  // CHECK-NOT: subview
+  // CHECK-NOT: memref.subview
   // CHECK: [[C2:%.*]] = constant 2 : index
   // CHECK: [[C3:%.*]] = constant 3 : index
   // CHECK: [[STRIDE1:%.*]] = muli [[ARG3]], [[C2]] : index
@@ -92,7 +92,7 @@
   // CHECK: [[STRIDE2:%.*]] = muli [[ARG4]], [[C3]] : index
   // CHECK: [[INDEX2:%.*]] = addi [[ARG2]], [[STRIDE2]] : index
   // CHECK: vector.transfer_write [[ARG5]], [[ARG0]]{{\[}}[[INDEX1]], [[INDEX2]]{{\]}} {masked = [false]}
-  %0 = subview %arg0[%arg1, %arg2][4, 4][2, 3] :
+  %0 = memref.subview %arg0[%arg1, %arg2][4, 4][2, 3] :
     memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [64, 3]>
   vector.transfer_write %arg5, %0[%arg3, %arg4] {masked = [false]} : vector<4xf32>, memref<4x4xf32, offset:?, strides: [64, 3]>
   return
diff --git a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir
--- a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir
+++ b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir
@@ -864,7 +864,7 @@
 }
 
 //===----------------------------------------------------------------------===//
-// std load/store ops
+// memref load/store ops
 //===----------------------------------------------------------------------===//
 
 // CHECK-LABEL: @load_store_zero_rank_float
@@ -876,13 +876,13 @@
   // CHECK-SAME: [[ZERO1]], [[ZERO1]]
   // CHECK-SAME: ] :
   //      CHECK: spv.Load "StorageBuffer" %{{.*}} : f32
-  %0 = load %arg0[] : memref<f32>
+  %0 = memref.load %arg0[] : memref<f32>
   //      CHECK: [[ZERO2:%.*]] = spv.Constant 0 : i32
   //      CHECK: spv.AccessChain [[ARG1]][
   // CHECK-SAME: [[ZERO2]], [[ZERO2]]
   // CHECK-SAME: ] :
   //      CHECK: spv.Store "StorageBuffer" %{{.*}} : f32
-  store %0, %arg1[] : memref<f32>
+  memref.store %0, %arg1[] : memref<f32>
   return
 }
 
@@ -895,13 +895,13 @@
   // CHECK-SAME: [[ZERO1]], [[ZERO1]]
   // CHECK-SAME: ] :
   //      CHECK: spv.Load "StorageBuffer" %{{.*}} : i32
-  %0 = load %arg0[] : memref<i32>
+  %0 = memref.load %arg0[] : memref<i32>
   //      CHECK: [[ZERO2:%.*]] = spv.Constant 0 : i32
   //      CHECK: spv.AccessChain [[ARG1]][
   // CHECK-SAME: [[ZERO2]], [[ZERO2]]
   // CHECK-SAME: ] :
   //      CHECK: spv.Store "StorageBuffer" %{{.*}} : i32
-  store %0, %arg1[] : memref<i32>
+  memref.store %0, %arg1[] : memref<i32>
   return
 }
 
@@ -934,7 +934,7 @@
   //     CHECK: %[[T2:.+]] = spv.Constant 24 : i32
   //     CHECK: %[[T3:.+]] = spv.ShiftLeftLogical %[[T1]], %[[T2]] : i32, i32
   //     CHECK: spv.ShiftRightArithmetic %[[T3]], %[[T2]] : i32, i32
-  %0 = load %arg0[] : memref<i8>
+  %0 = memref.load %arg0[] : memref<i8>
   return
 }
 
@@ -960,7 +960,7 @@
   //     CHECK: %[[T2:.+]] = spv.Constant 16 : i32
   //     CHECK: %[[T3:.+]] = spv.ShiftLeftLogical %[[T1]], %[[T2]] : i32, i32
   //     CHECK: spv.ShiftRightArithmetic %[[T3]], %[[T2]] : i32, i32
-  %0 = load %arg0[%index] : memref<10xi16>
+  %0 = memref.load %arg0[%index] : memref<10xi16>
   return
 }
 
@@ -969,7 +969,7 @@
   // CHECK-NOT: spv.SDiv
   //     CHECK: spv.Load
   // CHECK-NOT: spv.ShiftRightArithmetic
-  %0 = load %arg0[] : memref<i32>
+  %0 = memref.load %arg0[] : memref<i32>
   return
 }
 
@@ -978,7 +978,7 @@
   // CHECK-NOT: spv.SDiv
   //     CHECK: spv.Load
   // CHECK-NOT: spv.ShiftRightArithmetic
-  %0 = load %arg0[] : memref<f32>
+  %0 = memref.load %arg0[] : memref<f32>
   return
 }
 
@@ -1000,7 +1000,7 @@
   //     CHECK: %[[PTR:.+]] = spv.AccessChain %[[ARG0]][%[[ZERO]], %[[ACCESS_IDX]]]
   //     CHECK: spv.AtomicAnd "Device" "AcquireRelease" %[[PTR]], %[[MASK]]
   //     CHECK: spv.AtomicOr "Device" "AcquireRelease" %[[PTR]], %[[STORE_VAL]]
-  store %value, %arg0[] : memref<i8>
+  memref.store %value, %arg0[] : memref<i8>
   return
 }
 
@@ -1026,7 +1026,7 @@
   //     CHECK: %[[PTR:.+]] = spv.AccessChain %[[ARG0]][%[[ZERO]], %[[ACCESS_IDX]]]
   //     CHECK: spv.AtomicAnd "Device" "AcquireRelease" %[[PTR]], %[[MASK]]
   //     CHECK: spv.AtomicOr "Device" "AcquireRelease" %[[PTR]], %[[STORE_VAL]]
-  store %value, %arg0[%index] : memref<10xi16>
+  memref.store %value, %arg0[%index] : memref<10xi16>
   return
 }
 
@@ -1035,7 +1035,7 @@
   //     CHECK: spv.Store
   // CHECK-NOT: spv.AtomicAnd
   // CHECK-NOT: spv.AtomicOr
-  store %value, %arg0[] : memref<i32>
+  memref.store %value, %arg0[] : memref<i32>
   return
 }
 
@@ -1044,7 +1044,7 @@
   //     CHECK: spv.Store
   // CHECK-NOT: spv.AtomicAnd
   // CHECK-NOT: spv.AtomicOr
-  store %value, %arg0[] : memref<f32>
+  memref.store %value, %arg0[] : memref<f32>
   return
 }
 
@@ -1077,7 +1077,7 @@
   //     CHECK: %[[T2:.+]] = spv.Constant 24 : i32
   //     CHECK: %[[T3:.+]] = spv.ShiftLeftLogical %[[T1]], %[[T2]] : i32, i32
   //     CHECK: spv.ShiftRightArithmetic %[[T3]], %[[T2]] : i32, i32
-  %0 = load %arg0[] : memref<i8>
+  %0 = memref.load %arg0[] : memref<i8>
   return
 }
 
@@ -1086,7 +1086,7 @@
   // CHECK-NOT: spv.SDiv
   //     CHECK: spv.Load
   // CHECK-NOT: spv.ShiftRightArithmetic
-  %0 = load %arg0[] : memref<i16>
+  %0 = memref.load %arg0[] : memref<i16>
   return
 }
 
@@ -1108,7 +1108,7 @@
   //     CHECK: %[[PTR:.+]] = spv.AccessChain %[[ARG0]][%[[ZERO]], %[[ACCESS_IDX]]]
   //     CHECK: spv.AtomicAnd "Device" "AcquireRelease" %[[PTR]], %[[MASK]]
   //     CHECK: spv.AtomicOr "Device" "AcquireRelease" %[[PTR]], %[[STORE_VAL]]
-  store %value, %arg0[] : memref<i8>
+  memref.store %value, %arg0[] : memref<i8>
   return
 }
 
@@ -1117,7 +1117,7 @@
   //     CHECK: spv.Store
   // CHECK-NOT: spv.AtomicAnd
   // CHECK-NOT: spv.AtomicOr
-  store %value, %arg0[%index] : memref<10xi16>
+  memref.store %value, %arg0[%index] : memref<10xi16>
   return
 }
 
diff --git a/mlir/test/Conversion/StandardToSPIRV/subview-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/subview-to-spirv.mlir
--- a/mlir/test/Conversion/StandardToSPIRV/subview-to-spirv.mlir
+++ b/mlir/test/Conversion/StandardToSPIRV/subview-to-spirv.mlir
@@ -3,7 +3,7 @@
 module {
 
 //===----------------------------------------------------------------------===//
-// std.subview
+// memref.subview
 //===----------------------------------------------------------------------===//
 
 // CHECK-LABEL: @fold_static_stride_subview
@@ -21,17 +21,17 @@
   //     CHECK: %[[T1:.*]] = addi %[[ARG1]], %[[T0]]
   //     CHECK: %[[T2:.*]] = muli %[[ARG4]], %[[ARG2]]
   //     CHECK: %[[T3:.*]] = addi %[[T2]], %[[C2]]
-  //     CHECK: %[[LOADVAL:.*]] = load %[[ARG0]][%[[T1]], %[[T3]]]
+  //     CHECK: %[[LOADVAL:.*]] = memref.load %[[ARG0]][%[[T1]], %[[T3]]]
   //     CHECK: %[[STOREVAL:.*]] = math.sqrt %[[LOADVAL]]
   //     CHECK: %[[T6:.*]] = muli %[[ARG3]], %[[C3]]
   //     CHECK: %[[T7:.*]] = addi %[[ARG1]], %[[T6]]
   //     CHECK: %[[T8:.*]] = muli %[[ARG4]], %[[ARG2]]
   //     CHECK: %[[T9:.*]] = addi %[[T8]], %[[C2]]
-  //     CHECK: store %[[STOREVAL]], %[[ARG0]][%[[T7]], %[[T9]]]
-  %0 = subview %arg0[%arg1, 2][4, 4][3, %arg2] : memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [96, ?]>
-  %1 = load %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [96, ?]>
+  //     CHECK: memref.store %[[STOREVAL]], %[[ARG0]][%[[T7]], %[[T9]]]
+  %0 = memref.subview %arg0[%arg1, 2][4, 4][3, %arg2] : memref<12x32xf32> to memref<4x4xf32, offset:?, strides: [96, ?]>
+  %1 = memref.load %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [96, ?]>
   %2 = math.sqrt %1 : f32
-  store %2, %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [96, ?]>
+  memref.store %2, %0[%arg3, %arg4] : memref<4x4xf32, offset:?, strides: [96, ?]>
   return
 }
 
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -1056,7 +1056,7 @@
 //       CHECK: %[[vecPtr:.*]] = llvm.bitcast %[[gep]] :
 //  CHECK-SAME: !llvm.ptr<f32> to !llvm.ptr<vector<17xf32>>
 //       CHECK: %[[C0:.*]] = constant 0 : index
-//       CHECK: %[[DIM:.*]] = dim %{{.*}}, %[[C0]] : memref<?xf32>
+//       CHECK: %[[DIM:.*]] = memref.dim %{{.*}}, %[[C0]] : memref<?xf32>
 //
 // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ].
 //       CHECK: %[[linearIndex:.*]] = constant dense
@@ -1118,7 +1118,7 @@
 // CHECK-LABEL: func @transfer_read_2d_to_1d
 //  CHECK-SAME: %[[BASE_0:[a-zA-Z0-9]*]]: index, %[[BASE_1:[a-zA-Z0-9]*]]: index) -> vector<17xf32>
 //       CHECK: %[[c1:.*]] = constant 1 : index
-//       CHECK: %[[DIM:.*]] = dim %{{.*}}, %[[c1]] : memref<?x?xf32>
+//       CHECK: %[[DIM:.*]] = memref.dim %{{.*}}, %[[c1]] : memref<?x?xf32>
 //
 // Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ].
 //       CHECK: %[[trunc:.*]] = index_cast %[[BASE_1]] : index to i32
@@ -1152,7 +1152,7 @@
 //
 // 2. Check address space of the memref is correct.
 //       CHECK: %[[c0:.*]] = constant 0 : index
-//       CHECK: %[[DIM:.*]] = dim %{{.*}}, %[[c0]] : memref<?xf32, 3>
+//       CHECK: %[[DIM:.*]] = memref.dim %{{.*}}, %[[c0]] : memref<?xf32, 3>
 //
 // 3. Check address space for GEP is correct.
 //       CHECK: %[[gep_b:.*]] = llvm.getelementptr {{.*}} :
diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
--- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
+++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
@@ -4,7 +4,7 @@
 // CHECK-LABEL: func @materialize_read_1d() {
 func @materialize_read_1d() {
   %f0 = constant 0.0: f32
-  %A = alloc () : memref<7x42xf32>
+  %A = memref.alloc () : memref<7x42xf32>
   affine.for %i0 = 0 to 7 step 4 {
     affine.for %i1 = 0 to 42 step 4 {
       %f1 = vector.transfer_read %A[%i0, %i1], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32>
@@ -16,7 +16,7 @@
       %f4 = vector.transfer_read %A[%i0, %ip3], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32>
       // Both accesses in the load must be clipped otherwise %i1 + 2 and %i1 + 3 will go out of bounds.
       // CHECK: scf.if
-      // CHECK-NEXT: load
+      // CHECK-NEXT: memref.load
       // CHECK-NEXT: vector.insertelement
       // CHECK-NEXT: store
       // CHECK-NEXT: else
@@ -35,7 +35,7 @@
 // CHECK-LABEL: func @materialize_read_1d_partially_specialized
 func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %dyn4 : index) {
   %f0 = constant 0.0: f32
-  %A = alloc (%dyn1, %dyn2, %dyn4) : memref<7x?x?x42x?xf32>
+  %A = memref.alloc (%dyn1, %dyn2, %dyn4) : memref<7x?x?x42x?xf32>
   affine.for %i0 = 0 to 7 {
     affine.for %i1 = 0 to %dyn1 {
       affine.for %i2 = 0 to %dyn2 {
@@ -52,9 +52,9 @@
       }
     }
   }
-  // CHECK: %[[tensor:[0-9]+]] = alloc
-  // CHECK-NOT: {{.*}} dim %[[tensor]], %c0
-  // CHECK-NOT: {{.*}} dim %[[tensor]], %c3
+  // CHECK: %[[tensor:[0-9]+]] = memref.alloc
+  // CHECK-NOT: {{.*}} memref.dim %[[tensor]], %c0
+  // CHECK-NOT: {{.*}} memref.dim %[[tensor]], %c3
   return
 }
 
@@ -65,13 +65,13 @@
 // CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
 func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
   %f0 = constant 0.0: f32
-  // CHECK-DAG:  %[[ALLOC:.*]] = alloca() : memref<5x4xvector<3xf32>>
+  // CHECK-DAG:  %[[ALLOC:.*]] = memref.alloca() : memref<5x4xvector<3xf32>>
   // CHECK-DAG:  %[[C0:.*]] = constant 0 : index
   // CHECK-DAG:  %[[C1:.*]] = constant 1 : index
   // CHECK-DAG:  %[[C3:.*]] = constant 3 : index
   // CHECK-DAG:  %[[C4:.*]] = constant 4 : index
   // CHECK-DAG:  %[[C5:.*]] = constant 5 : index
-  //     CHECK:  %{{.*}} = alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
+  //     CHECK:  %{{.*}} = memref.alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
   // CHECK-NEXT:  affine.for %[[I0:.*]] = 0 to %{{.*}} step 3 {
   // CHECK-NEXT:    affine.for %[[I1:.*]] = 0 to %{{.*}} {
   // CHECK-NEXT:      affine.for %[[I2:.*]] = 0 to %{{.*}} {
@@ -80,11 +80,11 @@
   // CHECK-NEXT:            scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
   // CHECK-NEXT:              scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
   // CHECK:                     %[[VIDX:.*]] = index_cast %[[I4]]
-  // CHECK:                     %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>>
+  // CHECK:                     %[[VEC:.*]] = memref.load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>>
   // CHECK:                     %[[L0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I4]])
   // CHECK:                     %[[L3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I6]])
   // CHECK-NEXT:                scf.if
-  // CHECK-NEXT:                  %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[I1]], %[[I2]], %[[L3]]] : memref<?x?x?x?xf32>
+  // CHECK-NEXT:                  %[[SCAL:.*]] = memref.load %{{.*}}[%[[L0]], %[[I1]], %[[I2]], %[[L3]]] : memref<?x?x?x?xf32>
   // CHECK-NEXT:                  %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %[[VEC]][%[[VIDX]] : i32] : vector<3xf32>
   // CHECK-NEXT:                  store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>>
   // CHECK-NEXT:                } else {
@@ -95,7 +95,7 @@
   // CHECK-NEXT:            }
   // CHECK-NEXT:          }
   // CHECK-NEXT:          %[[ALLOC_CAST:.*]] = vector.type_cast %[[ALLOC]] : memref<5x4xvector<3xf32>> to memref<vector<5x4x3xf32>>
-  // CHECK-NEXT:          %[[LD:.*]] = load %[[ALLOC_CAST]][] : memref<vector<5x4x3xf32>>
+  // CHECK-NEXT:          %[[LD:.*]] = memref.load %[[ALLOC_CAST]][] : memref<vector<5x4x3xf32>>
   // CHECK-NEXT:          "dummy_use"(%[[LD]]) : (vector<5x4x3xf32>) -> ()
   // CHECK-NEXT:        }
   // CHECK-NEXT:      }
@@ -107,7 +107,7 @@
   // Check that I0 + I4 (of size 3) read from first index load(L0, ...) and write into last index store(..., I4)
   // Check that I3 + I6 (of size 5) read from last index load(..., L3) and write into first index store(I6, ...)
   // Other dimensions are just accessed with I1, I2 resp.
-  %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
+  %A = memref.alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
   affine.for %i0 = 0 to %M step 3 {
     affine.for %i1 = 0 to %N {
       affine.for %i2 = 0 to %O {
@@ -129,14 +129,14 @@
 
 // CHECK-LABEL:func @materialize_write(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
 func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
-  // CHECK-DAG:  %[[ALLOC:.*]] = alloca() : memref<5x4xvector<3xf32>>
+  // CHECK-DAG:  %[[ALLOC:.*]] = memref.alloca() : memref<5x4xvector<3xf32>>
   // CHECK-DAG:  %{{.*}} = constant dense<1.000000e+00> : vector<5x4x3xf32>
   // CHECK-DAG:  %[[C0:.*]] = constant 0 : index
   // CHECK-DAG:  %[[C1:.*]] = constant 1 : index
   // CHECK-DAG:  %[[C3:.*]] = constant 3 : index
   // CHECK-DAG:  %[[C4:.*]] = constant 4 : index
   // CHECK-DAG:  %[[C5:.*]] = constant 5 : index
-  //     CHECK:  %{{.*}} = alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
+  //     CHECK:  %{{.*}} = memref.alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
   // CHECK-NEXT:  affine.for %[[I0:.*]] = 0 to %{{.*}} step 3 {
   // CHECK-NEXT:    affine.for %[[I1:.*]] = 0 to %{{.*}} step 4 {
   // CHECK-NEXT:      affine.for %[[I2:.*]] = 0 to %{{.*}} {
@@ -151,7 +151,7 @@
   // CHECK:                     %[[S1:.*]] = affine.apply #[[$ADD]](%[[I1]], %[[I5]])
   // CHECK:                     %[[S3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I6]])
   // CHECK-NEXT:                scf.if
-  // CHECK-NEXT:                  %[[VEC:.*]] = load {{.*}}[%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>>
+  // CHECK-NEXT:                  %[[VEC:.*]] = memref.load {{.*}}[%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>>
   // CHECK-NEXT:                  %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32>
   //      CHECK:                  store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[I2]], %[[S3]]] : memref<?x?x?x?xf32>
   // CHECK-NEXT:                }
@@ -169,7 +169,7 @@
   // Check that I1 + I5 (of size 4) read from second index load(..., I5, ...) and write into second index store(..., S1, ...)
   // Check that I3 + I6 (of size 5) read from first index load(I6, ...) and write into last index store(..., S3)
   // Other dimension is just accessed with I2.
-  %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
+  %A = memref.alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
   %f1 = constant dense<1.000000e+00> : vector<5x4x3xf32>
   affine.for %i0 = 0 to %M step 3 {
     affine.for %i1 = 0 to %N step 4 {
@@ -204,9 +204,9 @@
   %f7 = constant 7.0: f32
 
   // CHECK-DAG: %[[splat:.*]] = constant dense<7.000000e+00> : vector<15xf32>
-  // CHECK-DAG: %[[alloc:.*]] = alloca() : memref<3xvector<15xf32>>
+  // CHECK-DAG: %[[alloc:.*]] = memref.alloca() : memref<3xvector<15xf32>>
   // CHECK-DAG: %[[C0:.*]] = constant 0 : index
-  // CHECK-DAG: %[[dim:.*]] = dim %[[A]], %[[C0]] : memref<?x?xf32>
+  // CHECK-DAG: %[[dim:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
   // CHECK: affine.for %[[I:.*]] = 0 to 3 {
   // CHECK:   %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
   // CHECK:   %[[cond1:.*]] = cmpi slt, %[[add]], %[[dim]] : index
@@ -217,13 +217,13 @@
   // CHECK:     store %[[splat]], %[[alloc]][%[[I]]] : memref<3xvector<15xf32>>
   // CHECK:   }
   // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref<vector<3x15xf32>>
-  // CHECK: %[[cst:.*]] = load %[[vmemref]][] : memref<vector<3x15xf32>>
+  // CHECK: %[[cst:.*]] = memref.load %[[vmemref]][] : memref<vector<3x15xf32>>
 
   // FULL-UNROLL: %[[pad:.*]] = constant 7.000000e+00 : f32
   // FULL-UNROLL: %[[VEC0:.*]] = constant dense<7.000000e+00> : vector<3x15xf32>
   // FULL-UNROLL: %[[C0:.*]] = constant 0 : index
   // FULL-UNROLL: %[[SPLAT:.*]] = constant dense<7.000000e+00> : vector<15xf32>
-  // FULL-UNROLL: %[[DIM:.*]] = dim %[[A]], %[[C0]] : memref<?x?xf32>
+  // FULL-UNROLL: %[[DIM:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
   // FULL-UNROLL: cmpi slt, %[[base]], %[[DIM]] : index
   // FULL-UNROLL: %[[VEC1:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
   // FULL-UNROLL:   vector.transfer_read %[[A]][%[[base]], %[[base]]], %[[pad]] : memref<?x?xf32>, vector<15xf32>
@@ -277,20 +277,20 @@
 //  FULL-UNROLL-SAME:   %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
 func @transfer_write_progressive(%A : memref<?x?xf32>, %base: index, %vec: vector<3x15xf32>) {
   // CHECK: %[[C0:.*]] = constant 0 : index
-  // CHECK: %[[alloc:.*]] = alloca() : memref<3xvector<15xf32>>
+  // CHECK: %[[alloc:.*]] = memref.alloca() : memref<3xvector<15xf32>>
   // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref<vector<3x15xf32>>
   // CHECK: store %[[vec]], %[[vmemref]][] : memref<vector<3x15xf32>>
-  // CHECK: %[[dim:.*]] = dim %[[A]], %[[C0]] : memref<?x?xf32>
+  // CHECK: %[[dim:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
   // CHECK: affine.for %[[I:.*]] = 0 to 3 {
   // CHECK:   %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
   // CHECK:   %[[cmp:.*]] = cmpi slt, %[[add]], %[[dim]] : index
   // CHECK:   scf.if %[[cmp]] {
-  // CHECK:     %[[vec_1d:.*]] = load %0[%[[I]]] : memref<3xvector<15xf32>>
+  // CHECK:     %[[vec_1d:.*]] = memref.load %0[%[[I]]] : memref<3xvector<15xf32>>
   // CHECK:     vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
   // CHECK:   }
 
   // FULL-UNROLL: %[[C0:.*]] = constant 0 : index
-  // FULL-UNROLL: %[[DIM:.*]] = dim %[[A]], %[[C0]] : memref<?x?xf32>
+  // FULL-UNROLL: %[[DIM:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
   // FULL-UNROLL: %[[CMP0:.*]] = cmpi slt, %[[base]], %[[DIM]] : index
   // FULL-UNROLL: scf.if %[[CMP0]] {
   // FULL-UNROLL:   %[[V0:.*]] = vector.extract %[[vec]][0] : vector<3x15xf32>
@@ -331,12 +331,12 @@
 //  FULL-UNROLL-SAME:   %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
 func @transfer_write_progressive_unmasked(%A : memref<?x?xf32>, %base: index, %vec: vector<3x15xf32>) {
   // CHECK-NOT:    scf.if
-  // CHECK-NEXT: %[[alloc:.*]] = alloca() : memref<3xvector<15xf32>>
+  // CHECK-NEXT: %[[alloc:.*]] = memref.alloca() : memref<3xvector<15xf32>>
   // CHECK-NEXT: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref<vector<3x15xf32>>
   // CHECK-NEXT: store %[[vec]], %[[vmemref]][] : memref<vector<3x15xf32>>
   // CHECK-NEXT: affine.for %[[I:.*]] = 0 to 3 {
   // CHECK-NEXT:   %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
-  // CHECK-NEXT:   %[[vec_1d:.*]] = load %0[%[[I]]] : memref<3xvector<15xf32>>
+  // CHECK-NEXT:   %[[vec_1d:.*]] = memref.load %0[%[[I]]] : memref<3xvector<15xf32>>
   // CHECK-NEXT:   vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] {masked = [false]} : vector<15xf32>, memref<?x?xf32>
 
   // FULL-UNROLL: %[[VEC0:.*]] = vector.extract %[[vec]][0] : vector<3x15xf32>
@@ -384,8 +384,8 @@
 //       CHECK:   %[[cst:.*]] = constant 0.000000e+00 : f32
 //       CHECK:   %[[c2:.*]] = constant 2 : index
 //       CHECK:   %[[cst0:.*]] = constant dense<0.000000e+00> : vector<3xf32>
-//       CHECK:   %[[m:.*]] = alloca() : memref<3xvector<3xf32>>
-//       CHECK:   %[[d:.*]] = dim %[[A]], %[[c2]] : memref<?x?x?x?xf32>
+//       CHECK:   %[[m:.*]] = memref.alloca() : memref<3xvector<3xf32>>
+//       CHECK:   %[[d:.*]] = memref.dim %[[A]], %[[c2]] : memref<?x?x?x?xf32>
 //       CHECK:   affine.for %[[arg1:.*]] = 0 to 3 {
 //       CHECK:      %[[cmp:.*]] = cmpi slt, %[[arg1]], %[[d]] : index
 //       CHECK:      scf.if %[[cmp]] {
@@ -396,7 +396,7 @@
 //       CHECK:      }
 //       CHECK:    }
 //       CHECK:    %[[cast:.*]] = vector.type_cast %[[m]] : memref<3xvector<3xf32>> to memref<vector<3x3xf32>>
-//       CHECK:    %[[ret:.*]]  = load %[[cast]][] : memref<vector<3x3xf32>>
+//       CHECK:    %[[ret:.*]]  = memref.load %[[cast]][] : memref<vector<3x3xf32>>
 //       CHECK:    return %[[ret]] : vector<3x3xf32>
 
 func @transfer_write_minor_identity(%A : vector<3x3xf32>, %B : memref<?x?x?x?xf32>) {
@@ -413,14 +413,14 @@
 //  CHECK-SAME:   %[[B:.*]]: memref<?x?x?x?xf32>)
 //       CHECK:   %[[c0:.*]] = constant 0 : index
 //       CHECK:   %[[c2:.*]] = constant 2 : index
-//       CHECK:   %[[m:.*]] = alloca() : memref<3xvector<3xf32>>
+//       CHECK:   %[[m:.*]] = memref.alloca() : memref<3xvector<3xf32>>
 //       CHECK:   %[[cast:.*]] = vector.type_cast %[[m]] : memref<3xvector<3xf32>> to memref<vector<3x3xf32>>
 //       CHECK:   store %[[A]], %[[cast]][] : memref<vector<3x3xf32>>
-//       CHECK:   %[[d:.*]] = dim %[[B]], %[[c2]] : memref<?x?x?x?xf32>
+//       CHECK:   %[[d:.*]] = memref.dim %[[B]], %[[c2]] : memref<?x?x?x?xf32>
 //       CHECK:   affine.for %[[arg2:.*]] = 0 to 3 {
 //       CHECK:      %[[cmp:.*]] = cmpi slt, %[[arg2]], %[[d]] : index
 //       CHECK:      scf.if %[[cmp]] {
-//       CHECK:        %[[tmp:.*]] = load %[[m]][%[[arg2]]] : memref<3xvector<3xf32>>
+//       CHECK:        %[[tmp:.*]] = memref.load %[[m]][%[[arg2]]] : memref<3xvector<3xf32>>
 //       CHECK:        vector.transfer_write %[[tmp]], %[[B]][%[[c0]], %[[c0]], %[[arg2]], %[[c0]]] : vector<3xf32>, memref<?x?x?x?xf32>
 //       CHECK:      }
 //       CHECK:    }
@@ -438,7 +438,7 @@
 
 // CHECK-LABEL: transfer_read_strided(
 // CHECK: scf.for
-// CHECK: load
+// CHECK: memref.load
 
 func @transfer_write_strided(%A : vector<4xf32>, %B : memref<8x4xf32, affine_map<(d0, d1) -> (d0 + d1 * 8)>>) {
   %c0 = constant 0 : index
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/uniform_divergent.mlir b/mlir/test/Dialect/Affine/SuperVectorize/uniform_divergent.mlir
--- a/mlir/test/Dialect/Affine/SuperVectorize/uniform_divergent.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/uniform_divergent.mlir
@@ -42,7 +42,7 @@
 // CHECK-LABEL: @uniform_load
 func @uniform_load(%A : memref<?x?xf32>, %C : memref<?x?xf32>) {
   %c0 = constant 0 : index
-  %N = dim %A, %c0 : memref<?x?xf32>
+  %N = memref.dim %A, %c0 : memref<?x?xf32>
   affine.for %i = 0 to %N {
     %uniform_ld = affine.load %A[%i, %i] : memref<?x?xf32>
     affine.for %j = 0 to %N {
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vector_utils.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vector_utils.mlir
--- a/mlir/test/Dialect/Affine/SuperVectorize/vector_utils.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vector_utils.mlir
@@ -4,12 +4,12 @@
 
 func @vector_add_2d(%arg0: index, %arg1: index) -> f32 {
   // Nothing should be matched in this first block.
-  // CHECK-NOT:matched: {{.*}} = alloc{{.*}}
+  // CHECK-NOT:matched: {{.*}} = memref.alloc{{.*}}
   // CHECK-NOT:matched: {{.*}} = constant 0{{.*}}
   // CHECK-NOT:matched: {{.*}} = constant 1{{.*}}
-  %0 = alloc(%arg0, %arg1) : memref<?x?xf32>
-  %1 = alloc(%arg0, %arg1) : memref<?x?xf32>
-  %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
+  %0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
+  %1 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
+  %2 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
   %c0 = constant 0 : index
   %cst = constant 1.000000e+00 : f32
 
@@ -29,11 +29,11 @@
   // Nothing should be matched in this last block.
   // CHECK-NOT:matched: {{.*}} = constant 7{{.*}}
   // CHECK-NOT:matched: {{.*}} = constant 42{{.*}}
-  // CHECK-NOT:matched: {{.*}} = load{{.*}}
+  // CHECK-NOT:matched: {{.*}} = memref.load{{.*}}
   // CHECK-NOT:matched: return {{.*}}
   %c7 = constant 7 : index
   %c42 = constant 42 : index
-  %9 = load %2[%c7, %c42] : memref<?x?xf32>
+  %9 = memref.load %2[%c7, %c42] : memref<?x?xf32>
   return %9 : f32
 }
 
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_1d.mlir
@@ -8,15 +8,15 @@
 // CHECK-DAG: %[[C0:.*]] = constant 0 : index
 // CHECK-DAG: %[[C1:.*]] = constant 1 : index
 // CHECK-DAG: %[[C2:.*]] = constant 2 : index
-// CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_P:%[0-9]+]] = dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
+// CHECK-DAG: [[ARG_M:%[0-9]+]] = memref.dim %{{.*}}, %[[C0]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_N:%[0-9]+]] = memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_P:%[0-9]+]] = memref.dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
    %c0 = constant 0 : index
    %c1 = constant 1 : index
    %c2 = constant 2 : index
-   %M = dim %A, %c0 : memref<?x?xf32>
-   %N = dim %A, %c1 : memref<?x?xf32>
-   %P = dim %B, %c2 : memref<?x?x?xf32>
+   %M = memref.dim %A, %c0 : memref<?x?xf32>
+   %N = memref.dim %A, %c1 : memref<?x?xf32>
+   %P = memref.dim %B, %c2 : memref<?x?x?xf32>
 
 // CHECK: for {{.*}} step 128
 // CHECK-NEXT: %{{.*}} = affine.apply #[[$map_id1]](%[[C0]])
@@ -36,15 +36,15 @@
 // CHECK-DAG: %[[C0:.*]] = constant 0 : index
 // CHECK-DAG: %[[C1:.*]] = constant 1 : index
 // CHECK-DAG: %[[C2:.*]] = constant 2 : index
-// CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_P:%[0-9]+]] = dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
+// CHECK-DAG: [[ARG_M:%[0-9]+]] = memref.dim %{{.*}}, %[[C0]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_N:%[0-9]+]] = memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_P:%[0-9]+]] = memref.dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
    %c0 = constant 0 : index
    %c1 = constant 1 : index
    %c2 = constant 2 : index
-   %M = dim %A, %c0 : memref<?x?xf32>
-   %N = dim %A, %c1 : memref<?x?xf32>
-   %P = dim %B, %c2 : memref<?x?x?xf32>
+   %M = memref.dim %A, %c0 : memref<?x?xf32>
+   %N = memref.dim %A, %c1 : memref<?x?xf32>
+   %P = memref.dim %B, %c2 : memref<?x?x?xf32>
 
 // CHECK:for [[IV3:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
 // CHECK-NEXT: %[[CST:.*]] = constant 0.0{{.*}}: f32
@@ -62,15 +62,15 @@
 // CHECK-DAG: %[[C0:.*]] = constant 0 : index
 // CHECK-DAG: %[[C1:.*]] = constant 1 : index
 // CHECK-DAG: %[[C2:.*]] = constant 2 : index
-// CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %arg0, %[[C0]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %arg0, %[[C1]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_P:%[0-9]+]] = dim %arg1, %[[C2]] : memref<?x?x?xf32>
+// CHECK-DAG: [[ARG_M:%[0-9]+]] = memref.dim %arg0, %[[C0]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_N:%[0-9]+]] = memref.dim %arg0, %[[C1]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_P:%[0-9]+]] = memref.dim %arg1, %[[C2]] : memref<?x?x?xf32>
    %c0 = constant 0 : index
    %c1 = constant 1 : index
    %c2 = constant 2 : index
-   %M = dim %A, %c0 : memref<?x?xf32>
-   %N = dim %A, %c1 : memref<?x?xf32>
-   %P = dim %B, %c2 : memref<?x?x?xf32>
+   %M = memref.dim %A, %c0 : memref<?x?xf32>
+   %N = memref.dim %A, %c1 : memref<?x?xf32>
+   %P = memref.dim %B, %c2 : memref<?x?x?xf32>
 
 // CHECK:for [[IV8:%[arg0-9]+]] = 0 to [[ARG_M]] step 128
 // CHECK-NEXT:   for [[IV9:%[arg0-9]*]] = 0 to [[ARG_N]] {
@@ -90,9 +90,9 @@
 
 // CHECK-LABEL: func @vector_add_2d
 func @vector_add_2d(%M : index, %N : index) -> f32 {
-  %A = alloc (%M, %N) : memref<?x?xf32, 0>
-  %B = alloc (%M, %N) : memref<?x?xf32, 0>
-  %C = alloc (%M, %N) : memref<?x?xf32, 0>
+  %A = memref.alloc (%M, %N) : memref<?x?xf32, 0>
+  %B = memref.alloc (%M, %N) : memref<?x?xf32, 0>
+  %C = memref.alloc (%M, %N) : memref<?x?xf32, 0>
   %f1 = constant 1.0 : f32
   %f2 = constant 2.0 : f32
   affine.for %i0 = 0 to %M {
@@ -147,15 +147,15 @@
 // CHECK-DAG: %[[C0:.*]] = constant 0 : index
 // CHECK-DAG: %[[C1:.*]] = constant 1 : index
 // CHECK-DAG: %[[C2:.*]] = constant 2 : index
-// CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_P:%[0-9]+]] = dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
+// CHECK-DAG: [[ARG_M:%[0-9]+]] = memref.dim %{{.*}}, %[[C0]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_N:%[0-9]+]] = memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_P:%[0-9]+]] = memref.dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
    %c0 = constant 0 : index
    %c1 = constant 1 : index
    %c2 = constant 2 : index
-   %M = dim %A, %c0 : memref<?x?xf32>
-   %N = dim %A, %c1 : memref<?x?xf32>
-   %P = dim %B, %c2 : memref<?x?x?xf32>
+   %M = memref.dim %A, %c0 : memref<?x?xf32>
+   %N = memref.dim %A, %c1 : memref<?x?xf32>
+   %P = memref.dim %B, %c2 : memref<?x?x?xf32>
 
 // CHECK:for {{.*}} [[ARG_M]] {
    affine.for %i1 = 0 to %M { // not vectorized
@@ -171,15 +171,15 @@
 // CHECK-DAG: %[[C0:.*]] = constant 0 : index
 // CHECK-DAG: %[[C1:.*]] = constant 1 : index
 // CHECK-DAG: %[[C2:.*]] = constant 2 : index
-// CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_P:%[0-9]+]] = dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
+// CHECK-DAG: [[ARG_M:%[0-9]+]] = memref.dim %{{.*}}, %[[C0]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_N:%[0-9]+]] = memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_P:%[0-9]+]] = memref.dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
    %c0 = constant 0 : index
    %c1 = constant 1 : index
    %c2 = constant 2 : index
-   %M = dim %A, %c0 : memref<?x?xf32>
-   %N = dim %A, %c1 : memref<?x?xf32>
-   %P = dim %B, %c2 : memref<?x?x?xf32>
+   %M = memref.dim %A, %c0 : memref<?x?xf32>
+   %N = memref.dim %A, %c1 : memref<?x?xf32>
+   %P = memref.dim %B, %c2 : memref<?x?x?xf32>
 
 // CHECK:   affine.for %{{.*}}{{[0-9]*}} = 0 to [[ARG_M]] {
    affine.for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
@@ -195,15 +195,15 @@
 // CHECK-DAG: %[[C0:.*]] = constant 0 : index
 // CHECK-DAG: %[[C1:.*]] = constant 1 : index
 // CHECK-DAG: %[[C2:.*]] = constant 2 : index
-// CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_P:%[0-9]+]] = dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
+// CHECK-DAG: [[ARG_M:%[0-9]+]] = memref.dim %{{.*}}, %[[C0]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_N:%[0-9]+]] = memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_P:%[0-9]+]] = memref.dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
    %c0 = constant 0 : index
    %c1 = constant 1 : index
    %c2 = constant 2 : index
-   %M = dim %A, %c0 : memref<?x?xf32>
-   %N = dim %A, %c1 : memref<?x?xf32>
-   %P = dim %B, %c2 : memref<?x?x?xf32>
+   %M = memref.dim %A, %c0 : memref<?x?xf32>
+   %N = memref.dim %A, %c1 : memref<?x?xf32>
+   %P = memref.dim %B, %c2 : memref<?x?x?xf32>
 
 // CHECK:for [[IV4:%[arg0-9]+]] = 0 to [[ARG_M]] step 128 {
 // CHECK-NEXT:   for [[IV5:%[arg0-9]*]] = 0 to [[ARG_N]] {
@@ -224,15 +224,15 @@
 // CHECK-DAG: %[[C0:.*]] = constant 0 : index
 // CHECK-DAG: %[[C1:.*]] = constant 1 : index
 // CHECK-DAG: %[[C2:.*]] = constant 2 : index
-// CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_P:%[0-9]+]] = dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
+// CHECK-DAG: [[ARG_M:%[0-9]+]] = memref.dim %{{.*}}, %[[C0]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_N:%[0-9]+]] = memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_P:%[0-9]+]] = memref.dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
    %c0 = constant 0 : index
    %c1 = constant 1 : index
    %c2 = constant 2 : index
-   %M = dim %A, %c0 : memref<?x?xf32>
-   %N = dim %A, %c1 : memref<?x?xf32>
-   %P = dim %B, %c2 : memref<?x?x?xf32>
+   %M = memref.dim %A, %c0 : memref<?x?xf32>
+   %N = memref.dim %A, %c1 : memref<?x?xf32>
+   %P = memref.dim %B, %c2 : memref<?x?x?xf32>
 
 // CHECK: for [[IV6:%[arg0-9]*]] = 0 to [[ARG_M]] {
 // CHECK-NEXT:   for [[IV7:%[arg0-9]*]] = 0 to [[ARG_N]] {
@@ -251,15 +251,15 @@
 // CHECK-DAG: %[[C0:.*]] = constant 0 : index
 // CHECK-DAG: %[[C1:.*]] = constant 1 : index
 // CHECK-DAG: %[[C2:.*]] = constant 2 : index
-// CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_P:%[0-9]+]] = dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
+// CHECK-DAG: [[ARG_M:%[0-9]+]] = memref.dim %{{.*}}, %[[C0]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_N:%[0-9]+]] = memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_P:%[0-9]+]] = memref.dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
    %c0 = constant 0 : index
    %c1 = constant 1 : index
    %c2 = constant 2 : index
-   %M = dim %A, %c0 : memref<?x?xf32>
-   %N = dim %A, %c1 : memref<?x?xf32>
-   %P = dim %B, %c2 : memref<?x?x?xf32>
+   %M = memref.dim %A, %c0 : memref<?x?xf32>
+   %N = memref.dim %A, %c1 : memref<?x?xf32>
+   %P = memref.dim %B, %c2 : memref<?x?x?xf32>
 
 // CHECK: for [[IV10:%[arg0-9]*]] = 0 to %{{[0-9]*}} {
 // CHECK:   for [[IV11:%[arg0-9]*]] = 0 to %{{[0-9]*}} {
@@ -279,15 +279,15 @@
 // CHECK-DAG: %[[C0:.*]] = constant 0 : index
 // CHECK-DAG: %[[C1:.*]] = constant 1 : index
 // CHECK-DAG: %[[C2:.*]] = constant 2 : index
-// CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_P:%[0-9]+]] = dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
+// CHECK-DAG: [[ARG_M:%[0-9]+]] = memref.dim %{{.*}}, %[[C0]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_N:%[0-9]+]] = memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_P:%[0-9]+]] = memref.dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
    %c0 = constant 0 : index
    %c1 = constant 1 : index
    %c2 = constant 2 : index
-   %M = dim %A, %c0 : memref<?x?xf32>
-   %N = dim %A, %c1 : memref<?x?xf32>
-   %P = dim %B, %c2 : memref<?x?x?xf32>
+   %M = memref.dim %A, %c0 : memref<?x?xf32>
+   %N = memref.dim %A, %c1 : memref<?x?xf32>
+   %P = memref.dim %B, %c2 : memref<?x?x?xf32>
 
 // CHECK: for [[IV12:%[arg0-9]*]] = 0 to %{{[0-9]*}} {
 // CHECK:   for [[IV13:%[arg0-9]*]] = 0 to %{{[0-9]*}} {
@@ -309,19 +309,19 @@
 // CHECK-DAG: %[[C0:.*]] = constant 0 : index
 // CHECK-DAG: %[[C1:.*]] = constant 1 : index
 // CHECK-DAG: %[[C2:.*]] = constant 2 : index
-// CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_P:%[0-9]+]] = dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
+// CHECK-DAG: [[ARG_M:%[0-9]+]] = memref.dim %{{.*}}, %[[C0]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_N:%[0-9]+]] = memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_P:%[0-9]+]] = memref.dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
    %c0 = constant 0 : index
    %c1 = constant 1 : index
    %c2 = constant 2 : index
-   %M = dim %A, %c0 : memref<?x?xf32>
-   %N = dim %A, %c1 : memref<?x?xf32>
-   %P = dim %B, %c2 : memref<?x?x?xf32>
+   %M = memref.dim %A, %c0 : memref<?x?xf32>
+   %N = memref.dim %A, %c1 : memref<?x?xf32>
+   %P = memref.dim %B, %c2 : memref<?x?x?xf32>
 
 // CHECK:  affine.for %{{.*}}{{[0-9]*}} = 0 to %{{[0-9]*}} {
    affine.for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
-     %a16 = alloc(%M) : memref<?xvector<2xf32>>
+     %a16 = memref.alloc(%M) : memref<?xvector<2xf32>>
      %l16 = affine.load %a16[%i16] : memref<?xvector<2xf32>>
    }
    return
@@ -337,15 +337,15 @@
 // CHECK-DAG: %[[C0:.*]] = constant 0 : index
 // CHECK-DAG: %[[C1:.*]] = constant 1 : index
 // CHECK-DAG: %[[C2:.*]] = constant 2 : index
-// CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_P:%[0-9]+]] = dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
+// CHECK-DAG: [[ARG_M:%[0-9]+]] = memref.dim %{{.*}}, %[[C0]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_N:%[0-9]+]] = memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_P:%[0-9]+]] = memref.dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
    %c0 = constant 0 : index
    %c1 = constant 1 : index
    %c2 = constant 2 : index
-   %M = dim %A, %c0 : memref<?x?xf32>
-   %N = dim %A, %c1 : memref<?x?xf32>
-   %P = dim %B, %c2 : memref<?x?x?xf32>
+   %M = memref.dim %A, %c0 : memref<?x?xf32>
+   %N = memref.dim %A, %c1 : memref<?x?xf32>
+   %P = memref.dim %B, %c2 : memref<?x?x?xf32>
 
 // CHECK: affine.for %{{.*}}{{[0-9]*}} = 0 to %{{[0-9]*}} {
 // CHECK:   for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
@@ -371,15 +371,15 @@
 // CHECK-DAG: %[[C0:.*]] = constant 0 : index
 // CHECK-DAG: %[[C1:.*]] = constant 1 : index
 // CHECK-DAG: %[[C2:.*]] = constant 2 : index
-// CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_P:%[0-9]+]] = dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
+// CHECK-DAG: [[ARG_M:%[0-9]+]] = memref.dim %{{.*}}, %[[C0]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_N:%[0-9]+]] = memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_P:%[0-9]+]] = memref.dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
    %c0 = constant 0 : index
    %c1 = constant 1 : index
    %c2 = constant 2 : index
-   %M = dim %A, %c0 : memref<?x?xf32>
-   %N = dim %A, %c1 : memref<?x?xf32>
-   %P = dim %B, %c2 : memref<?x?x?xf32>
+   %M = memref.dim %A, %c0 : memref<?x?xf32>
+   %N = memref.dim %A, %c1 : memref<?x?xf32>
+   %P = memref.dim %B, %c2 : memref<?x?x?xf32>
 
 // CHECK: affine.for %{{.*}}{{[0-9]*}} = 0 to %{{[0-9]*}} {
 // CHECK:   for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
@@ -404,15 +404,15 @@
 // CHECK-DAG: %[[C0:.*]] = constant 0 : index
 // CHECK-DAG: %[[C1:.*]] = constant 1 : index
 // CHECK-DAG: %[[C2:.*]] = constant 2 : index
-// CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
-// CHECK-DAG: [[ARG_P:%[0-9]+]] = dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
+// CHECK-DAG: [[ARG_M:%[0-9]+]] = memref.dim %{{.*}}, %[[C0]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_N:%[0-9]+]] = memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32>
+// CHECK-DAG: [[ARG_P:%[0-9]+]] = memref.dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
    %c0 = constant 0 : index
    %c1 = constant 1 : index
    %c2 = constant 2 : index
-   %M = dim %A, %c0 : memref<?x?xf32>
-   %N = dim %A, %c1 : memref<?x?xf32>
-   %P = dim %B, %c2 : memref<?x?x?xf32>
+   %M = memref.dim %A, %c0 : memref<?x?xf32>
+   %N = memref.dim %A, %c1 : memref<?x?xf32>
+   %P = memref.dim %B, %c2 : memref<?x?x?xf32>
 
 // CHECK:  affine.for %{{.*}}{{[0-9]*}} = 0 to %{{[0-9]*}} {
    affine.for %i15 = 0 to %M { // not vectorized due to condition below
@@ -430,15 +430,15 @@
   // CHECK-DAG: %[[C0:.*]] = constant 0 : index
   // CHECK-DAG: %[[C1:.*]] = constant 1 : index
   // CHECK-DAG: %[[C2:.*]] = constant 2 : index
-  // CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32>
-  // CHECK-DAG: [[ARG_N:%[0-9]+]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
-  // CHECK-DAG: [[ARG_P:%[0-9]+]] = dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
+  // CHECK-DAG: [[ARG_M:%[0-9]+]] = memref.dim %{{.*}}, %[[C0]] : memref<?x?xf32>
+  // CHECK-DAG: [[ARG_N:%[0-9]+]] = memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32>
+  // CHECK-DAG: [[ARG_P:%[0-9]+]] = memref.dim %{{.*}}, %[[C2]] : memref<?x?x?xf32>
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c2 = constant 2 : index
-  %M = dim %A, %c0 : memref<?x?xf32>
-  %N = dim %A, %c1 : memref<?x?xf32>
-  %P = dim %B, %c2 : memref<?x?x?xf32>
+  %M = memref.dim %A, %c0 : memref<?x?xf32>
+  %N = memref.dim %A, %c1 : memref<?x?xf32>
+  %P = memref.dim %B, %c2 : memref<?x?x?xf32>
 
   // CHECK: for [[IV10:%[arg0-9]*]] = 0 to %{{[0-9]*}} {
   // CHECK:   for [[IV11:%[arg0-9]*]] = 0 to %{{[0-9]*}} {
@@ -458,7 +458,7 @@
 // CHECK-LABEL: @vec_rejected_sequential
 func @vec_rejected_sequential(%A : memref<?xf32>) {
   %c0 = constant 0 : index
-  %N = dim %A, %c0 : memref<?xf32>
+  %N = memref.dim %A, %c0 : memref<?xf32>
   affine.for %i = 0 to %N {
     // CHECK-NOT: vector
     %a = affine.load %A[%i] : memref<?xf32>
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_2d.mlir
@@ -13,9 +13,9 @@
    %c0 = constant 0 : index
    %c1 = constant 1 : index
    %c2 = constant 2 : index
-   %M = dim %A, %c0 : memref<?x?x?xf32>
-   %N = dim %A, %c1 : memref<?x?x?xf32>
-   %P = dim %A, %c2 : memref<?x?x?xf32>
+   %M = memref.dim %A, %c0 : memref<?x?x?xf32>
+   %N = memref.dim %A, %c1 : memref<?x?x?xf32>
+   %P = memref.dim %A, %c2 : memref<?x?x?xf32>
    // CHECK: for  {{.*}} = 0 to %{{.*}} {
    // CHECK:   for {{.*}} = 0 to %{{.*}} step 32
    // CHECK:     for {{.*}} = 0 to %{{.*}} step 256
@@ -47,9 +47,9 @@
 }
 
 func @vector_add_2d(%M : index, %N : index) -> f32 {
-  %A = alloc (%M, %N) : memref<?x?xf32, 0>
-  %B = alloc (%M, %N) : memref<?x?xf32, 0>
-  %C = alloc (%M, %N) : memref<?x?xf32, 0>
+  %A = memref.alloc (%M, %N) : memref<?x?xf32, 0>
+  %B = memref.alloc (%M, %N) : memref<?x?xf32, 0>
+  %C = memref.alloc (%M, %N) : memref<?x?xf32, 0>
   %f1 = constant 1.0 : f32
   %f2 = constant 2.0 : f32
   affine.for %i0 = 0 to %M {
@@ -102,14 +102,14 @@
 func @vectorize_matmul(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %M = dim %arg0, %c0 : memref<?x?xf32>
-  %K = dim %arg0, %c1 : memref<?x?xf32>
-  %N = dim %arg2, %c1 : memref<?x?xf32>
+  %M = memref.dim %arg0, %c0 : memref<?x?xf32>
+  %K = memref.dim %arg0, %c1 : memref<?x?xf32>
+  %N = memref.dim %arg2, %c1 : memref<?x?xf32>
   //      VECT: %[[C0:.*]] = constant 0 : index
   // VECT-NEXT: %[[C1:.*]] = constant 1 : index
-  // VECT-NEXT: %[[M:.*]] = dim %{{.*}}, %[[C0]] : memref<?x?xf32>
-  // VECT-NEXT: %[[K:.*]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
-  // VECT-NEXT: %[[N:.*]] = dim %{{.*}}, %[[C1]] : memref<?x?xf32>
+  // VECT-NEXT: %[[M:.*]] = memref.dim %{{.*}}, %[[C0]] : memref<?x?xf32>
+  // VECT-NEXT: %[[K:.*]] = memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32>
+  // VECT-NEXT: %[[N:.*]] = memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32>
   //      VECT: {{.*}} #[[$map_id1]](%[[M]]) step 4 {
   // VECT-NEXT:   {{.*}} #[[$map_id1]](%[[N]]) step 8 {
   //      VECT:     %[[VC0:.*]] = constant dense<0.000000e+00> : vector<4x8xf32>
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_3d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_3d.mlir
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_3d.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_3d.mlir
@@ -4,9 +4,9 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c2 = constant 2 : index
-  %0 = dim %A, %c0 : memref<?x?x?xf32>
-  %1 = dim %A, %c1 : memref<?x?x?xf32>
-  %2 = dim %A, %c2 : memref<?x?x?xf32>
+  %0 = memref.dim %A, %c0 : memref<?x?x?xf32>
+  %1 = memref.dim %A, %c1 : memref<?x?x?xf32>
+  %2 = memref.dim %A, %c2 : memref<?x?x?xf32>
   // CHECK: affine.for %{{.*}} = 0 to %{{.*}} {
   // CHECK:   affine.for %{{.*}} = 0 to %{{.*}} {
   // CHECK:     affine.for %{{.*}} = 0 to %{{.*}} step 32 {
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_outer_loop_2d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_outer_loop_2d.mlir
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_outer_loop_2d.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_outer_loop_2d.mlir
@@ -7,9 +7,9 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c2 = constant 2 : index
-  %M = dim %A, %c0 : memref<?x?x?xf32>
-  %N = dim %A, %c1 : memref<?x?x?xf32>
-  %P = dim %A, %c2 : memref<?x?x?xf32>
+  %M = memref.dim %A, %c0 : memref<?x?x?xf32>
+  %N = memref.dim %A, %c1 : memref<?x?x?xf32>
+  %P = memref.dim %A, %c2 : memref<?x?x?xf32>
   // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32
   // CHECK:   affine.for %{{.*}} = 0 to %{{.*}} {
   // CHECK:     affine.for %{{.*}} = 0 to %{{.*}} step 256
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_outer_loop_transpose_2d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_outer_loop_transpose_2d.mlir
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_outer_loop_transpose_2d.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_outer_loop_transpose_2d.mlir
@@ -7,9 +7,9 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c2 = constant 2 : index
-  %M = dim %A, %c0 : memref<?x?x?xf32>
-  %N = dim %A, %c1 : memref<?x?x?xf32>
-  %P = dim %A, %c2 : memref<?x?x?xf32>
+  %M = memref.dim %A, %c0 : memref<?x?x?xf32>
+  %N = memref.dim %A, %c1 : memref<?x?x?xf32>
+  %P = memref.dim %A, %c2 : memref<?x?x?xf32>
   // CHECK: for  {{.*}} = 0 to %{{.*}} {
   // CHECK:   for  {{.*}} = 0 to %{{.*}} {
   // CHECK:     for  {{.*}} = 0 to %{{.*}} {
@@ -40,9 +40,9 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c2 = constant 2 : index
-  %0 = dim %A, %c0 : memref<?x?x?xf32>
-  %1 = dim %A, %c1 : memref<?x?x?xf32>
-  %2 = dim %A, %c2 : memref<?x?x?xf32>
+  %0 = memref.dim %A, %c0 : memref<?x?x?xf32>
+  %1 = memref.dim %A, %c1 : memref<?x?x?xf32>
+  %2 = memref.dim %A, %c2 : memref<?x?x?xf32>
   // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32 {
   // CHECK:   affine.for %{{.*}} = 0 to %{{.*}} {
   // CHECK:     affine.for %{{.*}} = 0 to %{{.*}} step 256 {
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_transpose_2d.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_transpose_2d.mlir
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_transpose_2d.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_transpose_2d.mlir
@@ -7,9 +7,9 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c2 = constant 2 : index
-  %M = dim %A, %c0 : memref<?x?x?xf32>
-  %N = dim %A, %c1 : memref<?x?x?xf32>
-  %P = dim %A, %c2 : memref<?x?x?xf32>
+  %M = memref.dim %A, %c0 : memref<?x?x?xf32>
+  %N = memref.dim %A, %c1 : memref<?x?x?xf32>
+  %P = memref.dim %A, %c2 : memref<?x?x?xf32>
   // CHECK: for  {{.*}} = 0 to %{{.*}} {
   // CHECK:   for  {{.*}} = 0 to %{{.*}} {
   // CHECK:     for  {{.*}} = 0 to %{{.*}} {
@@ -40,9 +40,9 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c2 = constant 2 : index
-  %0 = dim %A, %c0 : memref<?x?x?xf32>
-  %1 = dim %A, %c1 : memref<?x?x?xf32>
-  %2 = dim %A, %c2 : memref<?x?x?xf32>
+  %0 = memref.dim %A, %c0 : memref<?x?x?xf32>
+  %1 = memref.dim %A, %c1 : memref<?x?x?xf32>
+  %2 = memref.dim %A, %c2 : memref<?x?x?xf32>
   // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32 {
   // CHECK:   affine.for %{{.*}} = 0 to %{{.*}} step 256 {
   // CHECK:     affine.for %{{.*}} = 0 to %{{.*}} {
diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir
--- a/mlir/test/Dialect/Affine/affine-data-copy.mlir
+++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir
@@ -49,7 +49,7 @@
 
 // CHECK: affine.for %[[I:.*]] = 0 to 4096 step 128 {
 // CHECK:   affine.for %[[J:.*]] = 0 to 4096 step 128 {
-// CHECK:     [[BUFC:%[0-9]+]] = alloc() : memref<128x128xf32>
+// CHECK:     [[BUFC:%[0-9]+]] = memref.alloc() : memref<128x128xf32>
 // The result matrix's copy gets hoisted out.
 // Result matrix copy-in.
 // CHECK:     affine.for %[[II:.*]] = #[[$MAP_IDENTITY]](%{{.*}}) to #[[$MAP_PLUS_128]](%{{.*}}) {
@@ -61,7 +61,7 @@
 
 // LHS matrix copy-in.
 // CHECK:     affine.for %[[K:.*]] = 0 to 4096 step 128 {
-// CHECK:      [[BUFA:%[0-9]+]] = alloc() : memref<128x128xf32>
+// CHECK:      [[BUFA:%[0-9]+]] = memref.alloc() : memref<128x128xf32>
 // CHECK:       affine.for %[[II:.*]] = #[[$MAP_IDENTITY]](%{{.*}}) to #[[$MAP_PLUS_128]](%{{.*}}) {
 // CHECK:         affine.for %[[KK:.*]] = #[[$MAP_IDENTITY]](%{{.*}}) to #[[$MAP_PLUS_128]](%{{.*}}) {
 // CHECK:           affine.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<4096x4096xf32>
@@ -70,7 +70,7 @@
 // CHECK:       }
 
 // RHS matrix copy-in.
-// CHECK:       [[BUFB:%[0-9]+]] = alloc() : memref<128x128xf32>
+// CHECK:       [[BUFB:%[0-9]+]] = memref.alloc() : memref<128x128xf32>
 // CHECK:       affine.for %[[KK:.*]] = #[[$MAP_IDENTITY]](%{{.*}}) to #[[$MAP_PLUS_128]](%{{.*}}) {
 // CHECK:         affine.for %[[JJ:.*]] = #[[$MAP_IDENTITY]](%{{.*}}) to #[[$MAP_PLUS_128]](%{{.*}}) {
 // CHECK:           affine.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<4096x4096xf32>
@@ -91,8 +91,8 @@
 // CHECK:           }
 // CHECK:         }
 // CHECK:       }
-// CHECK:       dealloc [[BUFB]] : memref<128x128xf32>
-// CHECK:       dealloc [[BUFA]] : memref<128x128xf32>
+// CHECK:       memref.dealloc [[BUFB]] : memref<128x128xf32>
+// CHECK:       memref.dealloc [[BUFA]] : memref<128x128xf32>
 // CHECK:     }
 
 // Result matrix copy out.
@@ -102,15 +102,15 @@
 // CHECK:         store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<4096x4096xf32>
 // CHECK:       }
 // CHECK:     }
-// CHECK:     dealloc [[BUFC]] : memref<128x128xf32>
+// CHECK:     memref.dealloc [[BUFC]] : memref<128x128xf32>
 // CHECK:   }
 // CHECK: }
 
 // Check that only one memref is copied when memref filter is used.
 
 //      FILTER: affine.for %{{.*}} = 0 to 4096 step 128 {
-//      FILTER:   alloc() : memref<128x4096xf32>
-//  FILTER-NOT:   alloc()
+//      FILTER:   memref.alloc() : memref<128x4096xf32>
+//  FILTER-NOT:   memref.alloc()
 //      FILTER:   affine.for
 //      FILTER:     affine.for %{{.*}} = 0 to 4096 {
 //      FILTER:   affine.for %{{.*}} = 0 to 4096 step 128 {
@@ -118,8 +118,8 @@
 // FILTER-NEXT:       affine.for %{{.*}} = #map{{.*}}(%{{.*}}) to #map{{.*}}(%{{.*}}) {
 // FILTER-NEXT:         affine.for %{{.*}} = #map{{.*}}(%{{.*}}) to #map{{.*}}(%{{.*}}) {
 // FILTER-NEXT:           affine.for %{{.*}} = #map{{.*}}(%{{.*}}) to #map{{.*}}(%{{.*}}) {
-//      FILTER:   dealloc %{{.*}} : memref<128x4096xf32>
-//  FILTER-NOT:   dealloc %{{.*}} : memref<128x4096xf32>
+//      FILTER:   memref.dealloc %{{.*}} : memref<128x4096xf32>
+//  FILTER-NOT:   memref.dealloc %{{.*}} : memref<128x4096xf32>
 
 // -----
 
@@ -145,43 +145,43 @@
 }
 // CHECK-SMALL: affine.for %arg{{.*}} = 0 to 1024 {
 // CHECK-SMALL:   affine.for %arg{{.*}} = 0 to 1024 {
-// CHECK-SMALL:     alloc() : memref<1x1xf32>
+// CHECK-SMALL:     memref.alloc() : memref<1x1xf32>
 // CHECK-SMALL:     affine.load %arg{{.*}}[%{{.*}}, %{{.*}}] : memref<1024x1024xf32>
 // CHECK-SMALL:     affine.store %{{.*}}, %{{.*}}[0, 0] : memref<1x1xf32>
 // CHECK-SMALL:     affine.for %arg{{.*}} = 0 to 1024 {
-// CHECK-SMALL:       alloc() : memref<1x1xf32>
+// CHECK-SMALL:       memref.alloc() : memref<1x1xf32>
 // CHECK-SMALL:       affine.load %arg{{.*}}[%{{.*}}, %{{.*}}] : memref<1024x1024xf32>
 // CHECK-SMALL:       affine.store %{{.*}}, %{{.*}}[0, 0] : memref<1x1xf32>
 // CHECK-SMALL:       affine.load %{{.*}}[0, 0] : memref<1x1xf32>
 // CHECK-SMALL:       affine.load %{{.*}}[0, 0] : memref<1x1xf32>
 // CHECK-SMALL:       addf %{{.*}}, %{{.*}} : f32
 // CHECK-SMALL:       affine.store %{{.*}}, %{{.*}}[0, 0] : memref<1x1xf32>
-// CHECK-SMALL:       dealloc %{{.*}} : memref<1x1xf32>
+// CHECK-SMALL:       memref.dealloc %{{.*}} : memref<1x1xf32>
 // CHECK-SMALL:     }
 // CHECK-SMALL:     affine.load %{{.*}}[0, 0] : memref<1x1xf32>
 // CHECK-SMALL:     affine.store %{{.*}}, %arg{{.*}}[%{{.*}}, %{{.*}}] : memref<1024x1024xf32>
-// CHECK-SMALL:     dealloc %{{.*}} : memref<1x1xf32>
+// CHECK-SMALL:     memref.dealloc %{{.*}} : memref<1x1xf32>
 // CHECK-SMALL:   }
 // CHECK-SMALL: }
 // CHECK-SMALL: return
 
 // Check that only one memref is copied when memref filter is used.
 
-//      FILTER: alloc() : memref<1024x1024xf32>
-//  FILTER-NOT: alloc()
+//      FILTER: memref.alloc() : memref<1024x1024xf32>
+//  FILTER-NOT: memref.alloc()
 //      FILTER: affine.for %{{.*}} = 0 to 1024 {
 //      FILTER:   affine.for %{{.*}} = 0 to 1024 {
 //      FILTER: affine.for %{{.*}} = 0 to 1024 {
 // FILTER-NEXT:   affine.for %{{.*}} = 0 to 1024 {
 // FILTER-NEXT:     affine.for %{{.*}} = 0 to 1024 {
-//      FILTER: dealloc %{{.*}} : memref<1024x1024xf32>
-//  FILTER-NOT: dealloc
+//      FILTER: memref.dealloc %{{.*}} : memref<1024x1024xf32>
+//  FILTER-NOT: memref.dealloc
 //  FILTER:     return
 
 // CHeck that only one memref is copied, because for-memref-region is enabled
 // (and the first ever encountered load is analyzed).
-//      MEMREF_REGION: alloc() : memref<1024x1024xf32>
-//  MEMREF_REGION-NOT: alloc()
+//      MEMREF_REGION: memref.alloc() : memref<1024x1024xf32>
+//  MEMREF_REGION-NOT: memref.alloc()
 //      MEMREF_REGION: affine.for %{{.*}} = 0 to 1024 {
 //      MEMREF_REGION:   affine.for %{{.*}} = 0 to 1024 {
 //      MEMREF_REGION:   }
@@ -189,8 +189,8 @@
 // MEMREF_REGION-NEXT: affine.for %{{.*}} = 0 to 1024 {
 // MEMREF_REGION-NEXT:   affine.for %{{.*}} = 0 to 1024 {
 // MEMREF_REGION-NEXT:     affine.for %{{.*}} = 0 to 1024 {
-//      MEMREF_REGION: dealloc %{{.*}} : memref<1024x1024xf32>
-// MEMREF_REGION-NOT: dealloc
+//      MEMREF_REGION: memref.dealloc %{{.*}} : memref<1024x1024xf32>
+// MEMREF_REGION-NOT: memref.dealloc
 // MEMREF_REGION-NEXT: return
 
 // -----
@@ -216,7 +216,7 @@
   return %A : memref<4096xf32>
 }
 // CHECK:      affine.for %[[IV1:.*]] = 0 to 4096 step 100
-// CHECK:        %[[BUF:.*]] = alloc() : memref<100xf32>
+// CHECK:        %[[BUF:.*]] = memref.alloc() : memref<100xf32>
 // CHECK-NEXT:   affine.for %[[IV2:.*]] = #[[$MAP_IDENTITY]](%[[IV1]]) to min #[[$MAP_MIN_UB1]](%[[IV1]]) {
 // CHECK-NEXT:     affine.load %{{.*}}[%[[IV2]]] : memref<4096xf32>
 // CHECK-NEXT:     affine.store %{{.*}}, %[[BUF]][%[[IV2]] - %[[IV1]]] : memref<100xf32>
@@ -230,7 +230,7 @@
 // CHECK-NEXT:     affine.load %[[BUF]][%[[IV2]] - %[[IV1]]] : memref<100xf32>
 // CHECK-NEXT:     affine.store %{{.*}}, %{{.*}}[%[[IV2]]] : memref<4096xf32>
 // CHECK-NEXT:   }
-// CHECK-NEXT:   dealloc %[[BUF]] : memref<100xf32>
+// CHECK-NEXT:   memref.dealloc %[[BUF]] : memref<100xf32>
 // CHECK-NEXT: }
 
 // -----
@@ -257,7 +257,7 @@
   return
 }
 
-// CHECK:      %[[BUF:.*]] = alloc() : memref<2048x6xf64>
+// CHECK:      %[[BUF:.*]] = memref.alloc() : memref<2048x6xf64>
 // CHECK-NEXT: affine.for %[[ii:.*]] = 0 to 2048 {
 // CHECK-NEXT:   affine.for %[[jj:.*]] = max #[[$LB]]()[%[[i]], %[[j]]] to min #[[$UB]]()[%[[i]], %[[j]]] {
 // CHECK-NEXT:      affine.load %{{.*}}[%[[ii]], %[[jj]]] : memref<2048x516xf64>
@@ -269,4 +269,4 @@
 // CHECK-NEXT:     affine.load %[[BUF]][%[[ii_]], %[[jj_]] - symbol(%[[j]]) * 6] : memref<2048x6xf64>
 // CHECK-NEXT:    }
 // CHECK-NEXT: }
-// CHECK-NEXT: dealloc %[[BUF]] : memref<2048x6xf64>
+// CHECK-NEXT: memref.dealloc %[[BUF]] : memref<2048x6xf64>
diff --git a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
--- a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
+++ b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -affine-loop-invariant-code-motion -split-input-file | FileCheck %s
 
 func @nested_loops_both_having_invariant_code() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
   %cf8 = constant 8.0 : f32
 
@@ -12,7 +12,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
   // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
   // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
@@ -29,14 +29,14 @@
 // CHECK-LABEL: func @store_affine_apply
 func @store_affine_apply() -> memref<10xf32> {
   %cf7 = constant 7.0 : f32
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   affine.for %arg0 = 0 to 10 {
       %t0 = affine.apply affine_map<(d1) -> (d1 + 1)>(%arg0)
       affine.store %cf7, %m[%t0] : memref<10xf32>
   }
   return %m : memref<10xf32>
 // CHECK:       %cst = constant 7.000000e+00 : f32
-// CHECK-NEXT:  %0 = alloc() : memref<10xf32>
+// CHECK-NEXT:  %0 = memref.alloc() : memref<10xf32>
 // CHECK-NEXT:  affine.for %arg0 = 0 to 10 {
 // CHECK-NEXT:      %1 = affine.apply #map{{[0-9]*}}(%arg0)
 // CHECK-NEXT:      affine.store %cst, %0[%1] : memref<10xf32>
@@ -47,7 +47,7 @@
 // -----
 
 func @nested_loops_code_invariant_to_both() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
   %cf8 = constant 8.0 : f32
 
@@ -57,7 +57,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
   // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
   // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
@@ -68,8 +68,8 @@
 // -----
 
 func @single_loop_nothing_invariant() {
-  %m1 = alloc() : memref<10xf32>
-  %m2 = alloc() : memref<10xf32>
+  %m1 = memref.alloc() : memref<10xf32>
+  %m2 = memref.alloc() : memref<10xf32>
   affine.for %arg0 = 0 to 10 {
     %v0 = affine.load %m1[%arg0] : memref<10xf32>
     %v1 = affine.load %m2[%arg0] : memref<10xf32>
@@ -77,8 +77,8 @@
     affine.store %v2, %m1[%arg0] : memref<10xf32>
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %1 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
+  // CHECK-NEXT: %1 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
   // CHECK-NEXT: %2 = affine.load %0[%arg0] : memref<10xf32>
   // CHECK-NEXT: %3 = affine.load %1[%arg0] : memref<10xf32>
@@ -91,7 +91,7 @@
 // -----
 
 func @invariant_code_inside_affine_if() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf8 = constant 8.0 : f32
 
   affine.for %arg0 = 0 to 10 {
@@ -103,7 +103,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
   // CHECK-NEXT: %1 = affine.apply #map{{[0-9]*}}(%arg0)
@@ -119,7 +119,7 @@
 // -----
 
 func @dependent_stores() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
   %cf8 = constant 8.0 : f32
 
@@ -132,7 +132,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
   // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
   // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
@@ -149,7 +149,7 @@
 // -----
 
 func @independent_stores() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
   %cf8 = constant 8.0 : f32
 
@@ -162,7 +162,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
   // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
   // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
@@ -179,7 +179,7 @@
 // -----
 
 func @load_dependent_store() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
   %cf8 = constant 8.0 : f32
 
@@ -192,7 +192,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
   // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
   // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
@@ -208,7 +208,7 @@
 // -----
 
 func @load_after_load() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
   %cf8 = constant 8.0 : f32
 
@@ -221,7 +221,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
   // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
   // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
@@ -237,7 +237,7 @@
 // -----
 
 func @invariant_affine_if() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf8 = constant 8.0 : f32
   affine.for %arg0 = 0 to 10 {
     affine.for %arg1 = 0 to 10 {
@@ -249,7 +249,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
   // CHECK-NEXT: affine.if #set(%arg0, %arg0) {
@@ -264,7 +264,7 @@
 // -----
 
 func @invariant_affine_if2() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf8 = constant 8.0 : f32
   affine.for %arg0 = 0 to 10 {
     affine.for %arg1 = 0 to 10 {
@@ -276,7 +276,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
   // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
@@ -293,7 +293,7 @@
 // -----
 
 func @invariant_affine_nested_if() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf8 = constant 8.0 : f32
   affine.for %arg0 = 0 to 10 {
     affine.for %arg1 = 0 to 10 {
@@ -307,7 +307,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
   // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
@@ -327,7 +327,7 @@
 // -----
 
 func @invariant_affine_nested_if_else() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf8 = constant 8.0 : f32
   affine.for %arg0 = 0 to 10 {
     affine.for %arg1 = 0 to 10 {
@@ -343,7 +343,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
   // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
@@ -365,8 +365,8 @@
 // -----
 
 func @invariant_affine_nested_if_else2() {
-  %m = alloc() : memref<10xf32>
-  %m2 = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
+  %m2 = memref.alloc() : memref<10xf32>
   %cf8 = constant 8.0 : f32
   affine.for %arg0 = 0 to 10 {
     affine.for %arg1 = 0 to 10 {
@@ -382,8 +382,8 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %1 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
+  // CHECK-NEXT: %1 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
   // CHECK-NEXT: affine.if #set(%arg0, %arg0) {
@@ -403,7 +403,7 @@
 // -----
 
 func @invariant_affine_nested_if2() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf8 = constant 8.0 : f32
   affine.for %arg0 = 0 to 10 {
     affine.for %arg1 = 0 to 10 {
@@ -417,7 +417,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
   // CHECK-NEXT: affine.if #set(%arg0, %arg0) {
@@ -435,7 +435,7 @@
 // -----
 
 func @invariant_affine_for_inside_affine_if() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf8 = constant 8.0 : f32
   affine.for %arg0 = 0 to 10 {
     affine.for %arg1 = 0 to 10 {
@@ -449,7 +449,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
   // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
@@ -469,16 +469,16 @@
 // -----
 
 func @invariant_constant_and_load() {
-  %m = alloc() : memref<100xf32>
-  %m2 = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
+  %m2 = memref.alloc() : memref<100xf32>
   affine.for %arg0 = 0 to 5 {
     %c0 = constant 0 : index
     %v = affine.load %m2[%c0] : memref<100xf32>
     affine.store %v, %m[%arg0] : memref<100xf32>
   }
 
-  // CHECK: %0 = alloc() : memref<100xf32>
-  // CHECK-NEXT: %1 = alloc() : memref<100xf32>
+  // CHECK: %0 = memref.alloc() : memref<100xf32>
+  // CHECK-NEXT: %1 = memref.alloc() : memref<100xf32>
   // CHECK-NEXT: %c0 = constant 0 : index
   // CHECK-NEXT: %2 = affine.load %1[%c0] : memref<100xf32>
   // CHECK-NEXT: affine.for %arg0 = 0 to 5 {
@@ -491,7 +491,7 @@
 // -----
 
 func @nested_load_store_same_memref() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cst = constant 8.0 : f32
   %c0 = constant 0 : index
    affine.for %arg0 = 0 to 10 {
@@ -501,7 +501,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
   // CHECK-NEXT: %c0 = constant 0 : index
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
@@ -516,7 +516,7 @@
 // -----
 
 func @nested_load_store_same_memref2() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cst = constant 8.0 : f32
   %c0 = constant 0 : index
    affine.for %arg0 = 0 to 10 {
@@ -526,7 +526,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
   // CHECK-NEXT: %c0 = constant 0 : index
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
@@ -541,7 +541,7 @@
 
 // CHECK-LABEL:   func @do_not_hoist_dependent_side_effect_free_op
 func @do_not_hoist_dependent_side_effect_free_op(%arg0: memref<10x512xf32>) {
-  %0 = alloca() : memref<1xf32>
+  %0 = memref.alloca() : memref<1xf32>
   %cst = constant 8.0 : f32
   affine.for %i = 0 to 512 {
     affine.for %j = 0 to 10 {
@@ -571,8 +571,8 @@
 
 // CHECK-LABEL: func @vector_loop_nothing_invariant
 func @vector_loop_nothing_invariant() {
-  %m1 = alloc() : memref<40xf32>
-  %m2 = alloc() : memref<40xf32>
+  %m1 = memref.alloc() : memref<40xf32>
+  %m2 = memref.alloc() : memref<40xf32>
   affine.for %arg0 = 0 to 10 {
     %v0 = affine.vector_load %m1[%arg0*4] : memref<40xf32>, vector<4xf32>
     %v1 = affine.vector_load %m2[%arg0*4] : memref<40xf32>, vector<4xf32>
@@ -593,9 +593,9 @@
 
 // CHECK-LABEL: func @vector_loop_all_invariant
 func @vector_loop_all_invariant() {
-  %m1 = alloc() : memref<4xf32>
-  %m2 = alloc() : memref<4xf32>
-  %m3 = alloc() : memref<4xf32>
+  %m1 = memref.alloc() : memref<4xf32>
+  %m2 = memref.alloc() : memref<4xf32>
+  %m3 = memref.alloc() : memref<4xf32>
   affine.for %arg0 = 0 to 10 {
     %v0 = affine.vector_load %m1[0] : memref<4xf32>, vector<4xf32>
     %v1 = affine.vector_load %m2[0] : memref<4xf32>, vector<4xf32>
@@ -605,9 +605,9 @@
   return
 }
 
-// CHECK:       alloc()
-// CHECK-NEXT:  alloc()
-// CHECK-NEXT:  alloc()
+// CHECK:       memref.alloc()
+// CHECK-NEXT:  memref.alloc()
+// CHECK-NEXT:  memref.alloc()
 // CHECK-NEXT:  affine.vector_load
 // CHECK-NEXT:  affine.vector_load
 // CHECK-NEXT:  addf
diff --git a/mlir/test/Dialect/Affine/affine-loop-normalize.mlir b/mlir/test/Dialect/Affine/affine-loop-normalize.mlir
--- a/mlir/test/Dialect/Affine/affine-loop-normalize.mlir
+++ b/mlir/test/Dialect/Affine/affine-loop-normalize.mlir
@@ -9,7 +9,7 @@
 // CHECK-LABEL: func @normalize_parallel()
 func @normalize_parallel() {
   %cst = constant 1.0 : f32
-  %0 = alloc() : memref<2x4xf32>
+  %0 = memref.alloc() : memref<2x4xf32>
   // CHECK: affine.parallel (%[[i0:.*]], %[[j0:.*]]) = (0, 0) to (4, 2)
   affine.parallel (%i, %j) = (0, 1) to (10, 5) step (3, 2) {
     // CHECK: %[[i1:.*]] = affine.apply [[$MAP0]](%[[i0]])
@@ -77,7 +77,7 @@
 // CHECK-LABEL: func @loop_with_unknown_upper_bound
 // CHECK-SAME: (%[[ARG0:.*]]: memref<?x?xf32>, %[[ARG1:.*]]: index)
 // CHECK-NEXT:  %{{.*}} = constant 0 : index
-// CHECK-NEXT:  %[[DIM:.*]] = dim %arg0, %c0 : memref<?x?xf32>
+// CHECK-NEXT:  %[[DIM:.*]] = memref.dim %arg0, %c0 : memref<?x?xf32>
 // CHECK-NEXT:   affine.for %[[I:.*]] = 0 to [[$UB00]]()[%[[DIM]]] {
 // CHECK-NEXT:     %[[IIV:.*]] = affine.apply [[$IV00]](%[[I]])
 // CHECK-NEXT:     affine.for %[[II:.*]] = 0 to [[$UB11]]()[%[[ARG1]]] {
@@ -89,7 +89,7 @@
 // CHECK-NEXT: }
 func @loop_with_unknown_upper_bound(%arg0: memref<?x?xf32>, %arg1: index) {
   %c0 = constant 0 : index
-  %0 = dim %arg0, %c0 : memref<?x?xf32>
+  %0 = memref.dim %arg0, %c0 : memref<?x?xf32>
   affine.for %i0 = 2 to %0 step 32 {
     affine.for %i1 = 0 to %arg1 step 2 {
       "test.foo"(%i0, %i1) : (index, index) -> ()
@@ -108,7 +108,7 @@
 // CHECK-LABEL: func @loop_with_multiple_upper_bounds
 // CHECK-SAME: (%[[ARG0:.*]]: memref<?x?xf32>, %[[ARG1:.*]]: index)
 // CHECK-NEXT:  %{{.*}} = constant 0 : index
-// CHECK-NEXT:  %[[DIM:.*]] = dim %arg0, %c0 : memref<?x?xf32>
+// CHECK-NEXT:  %[[DIM:.*]] = memref.dim %arg0, %c0 : memref<?x?xf32>
 // CHECK-NEXT:   affine.for %[[I:.*]] = 0 to [[$OUTERUB]]()[%[[DIM]]] {
 // CHECK-NEXT:     %[[IIV:.*]] = affine.apply [[$OUTERIV]](%[[I]])
 // CHECK-NEXT:     affine.for %[[II:.*]] = 0 to min [[$INNERUB]](%[[ARG1]]) {
@@ -120,7 +120,7 @@
 // CHECK-NEXT: }
 func @loop_with_multiple_upper_bounds(%arg0: memref<?x?xf32>, %arg1 : index) {
   %c0 = constant 0 : index
-  %0 = dim %arg0, %c0 : memref<?x?xf32>
+  %0 = memref.dim %arg0, %c0 : memref<?x?xf32>
   affine.for %i0 = 2 to %0 step 32{
     affine.for %i1 = 2 to min affine_map<(d0)[] -> (d0, 512)>(%arg1) {
       "test.foo"(%i0, %i1) : (index, index) -> ()
@@ -140,9 +140,9 @@
 // CHECK-SAME: (%[[ARG0:.*]]: memref<1024x1024xf32>, %[[ARG1:.*]]: memref<1024x1024xf32>, %[[ARG2:.*]]: memref<1024x1024xf32>)
 // CHECK-NEXT:    %{{.*}} = constant 0 : index
 // CHECK-NEXT:    %{{.*}} = constant 1 : index
-// CHECK-NEXT:    %[[DIM0:.*]] = dim %[[ARG0]], %{{.*}}
-// CHECK-NEXT:    %[[DIM1:.*]] = dim %[[ARG1]], %{{.*}}
-// CHECK-NEXT:    %[[DIM2:.*]] = dim %[[ARG0]], %{{.*}}
+// CHECK-NEXT:    %[[DIM0:.*]] = memref.dim %[[ARG0]], %{{.*}}
+// CHECK-NEXT:    %[[DIM1:.*]] = memref.dim %[[ARG1]], %{{.*}}
+// CHECK-NEXT:    %[[DIM2:.*]] = memref.dim %[[ARG0]], %{{.*}}
 // CHECK-NEXT:    affine.for %[[I:.*]] = 0 to [[$INTERUB]]()[%[[DIM0]]] {
 // CHECK-NEXT:      %[[IIV:.*]] = affine.apply [[$INTERIV]](%[[I]])
 // CHECK-NEXT:      affine.for %[[J:.*]] = 0 to [[$INTERUB]]()[%[[DIM1]]] {
@@ -178,9 +178,9 @@
 func @tiled_matmul(%0: memref<1024x1024xf32>, %1: memref<1024x1024xf32>, %2: memref<1024x1024xf32>) {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %3 = dim %0, %c0 : memref<1024x1024xf32>
-  %4 = dim %1, %c1 : memref<1024x1024xf32>
-  %5 = dim %0, %c1 : memref<1024x1024xf32>
+  %3 = memref.dim %0, %c0 : memref<1024x1024xf32>
+  %4 = memref.dim %1, %c1 : memref<1024x1024xf32>
+  %5 = memref.dim %0, %c1 : memref<1024x1024xf32>
   affine.for %arg0 = 0 to %3 step 32 {
     affine.for %arg1 = 0 to %4 step 32 {
       affine.for %arg2 = 0 to %5 step 32 {
diff --git a/mlir/test/Dialect/Affine/canonicalize.mlir b/mlir/test/Dialect/Affine/canonicalize.mlir
--- a/mlir/test/Dialect/Affine/canonicalize.mlir
+++ b/mlir/test/Dialect/Affine/canonicalize.mlir
@@ -7,7 +7,7 @@
 
 // CHECK-LABEL: func @compose_affine_maps_1dto2d_no_symbols() {
 func @compose_affine_maps_1dto2d_no_symbols() {
-  %0 = alloc() : memref<4x4xf32>
+  %0 = memref.alloc() : memref<4x4xf32>
 
   affine.for %i0 = 0 to 15 {
     // Test load[%x, %x]
@@ -17,8 +17,8 @@
     %x1_1 = affine.apply affine_map<(d0, d1) -> (d1)> (%x0, %x0)
 
     // CHECK: %[[I0A:.*]] = affine.apply #[[$MAP0]](%{{.*}})
-    // CHECK-NEXT: %[[V0:.*]] = load %0[%[[I0A]], %[[I0A]]]
-    %v0 = load %0[%x1_0, %x1_1] : memref<4x4xf32>
+    // CHECK-NEXT: %[[V0:.*]] = memref.load %0[%[[I0A]], %[[I0A]]]
+    %v0 = memref.load %0[%x1_0, %x1_1] : memref<4x4xf32>
 
     // Test store[%y, %y]
     %y0 = affine.apply affine_map<(d0) -> (d0 + 1)> (%i0)
@@ -26,21 +26,21 @@
     %y1_1 = affine.apply affine_map<(d0, d1) -> (d1)> (%y0, %y0)
 
     // CHECK-NEXT: %[[I1A:.*]] = affine.apply #[[$MAP1]](%{{.*}})
-    // CHECK-NEXT: store %[[V0]], %0[%[[I1A]], %[[I1A]]]
-    store %v0, %0[%y1_0, %y1_1] : memref<4x4xf32>
+    // CHECK-NEXT: memref.store %[[V0]], %0[%[[I1A]], %[[I1A]]]
+    memref.store %v0, %0[%y1_0, %y1_1] : memref<4x4xf32>
 
     // Test store[%x, %y]
     %xy_0 = affine.apply affine_map<(d0, d1) -> (d0)> (%x0, %y0)
     %xy_1 = affine.apply affine_map<(d0, d1) -> (d1)> (%x0, %y0)
 
-    // CHECK-NEXT: store %[[V0]], %0[%[[I0A]], %[[I1A]]]
-    store %v0, %0[%xy_0, %xy_1] : memref<4x4xf32>
+    // CHECK-NEXT: memref.store %[[V0]], %0[%[[I0A]], %[[I1A]]]
+    memref.store %v0, %0[%xy_0, %xy_1] : memref<4x4xf32>
 
     // Test store[%y, %x]
     %yx_0 = affine.apply affine_map<(d0, d1) -> (d0)> (%y0, %x0)
     %yx_1 = affine.apply affine_map<(d0, d1) -> (d1)> (%y0, %x0)
-    // CHECK-NEXT: store %[[V0]], %0[%[[I1A]], %[[I0A]]]
-    store %v0, %0[%yx_0, %yx_1] : memref<4x4xf32>
+    // CHECK-NEXT: memref.store %[[V0]], %0[%[[I1A]], %[[I0A]]]
+    memref.store %v0, %0[%yx_0, %yx_1] : memref<4x4xf32>
   }
   return
 }
@@ -53,7 +53,7 @@
 
 // CHECK-LABEL: func @compose_affine_maps_1dto2d_with_symbols() {
 func @compose_affine_maps_1dto2d_with_symbols() {
-  %0 = alloc() : memref<4x4xf32>
+  %0 = memref.alloc() : memref<4x4xf32>
 
   affine.for %i0 = 0 to 15 {
     // Test load[%x0, %x0] with symbol %c4
@@ -61,29 +61,29 @@
     %x0 = affine.apply affine_map<(d0)[s0] -> (d0 - s0)> (%i0)[%c4]
 
     // CHECK: %[[I0:.*]] = affine.apply #[[$MAP4]](%{{.*}})
-    // CHECK-NEXT: %[[V0:.*]] = load %{{.*}}[%[[I0]], %[[I0]]]
-    %v0 = load %0[%x0, %x0] : memref<4x4xf32>
+    // CHECK-NEXT: %[[V0:.*]] = memref.load %{{.*}}[%[[I0]], %[[I0]]]
+    %v0 = memref.load %0[%x0, %x0] : memref<4x4xf32>
 
     // Test load[%x0, %x1] with symbol %c4 captured by '%x0' map.
     %x1 = affine.apply affine_map<(d0) -> (d0 + 1)> (%i0)
     %y1 = affine.apply affine_map<(d0, d1) -> (d0+d1)> (%x0, %x1)
     // CHECK-NEXT: %[[I1:.*]] = affine.apply #[[$MAP7]](%{{.*}})
-    // CHECK-NEXT: store %[[V0]], %{{.*}}[%[[I1]], %[[I1]]]
-    store %v0, %0[%y1, %y1] : memref<4x4xf32>
+    // CHECK-NEXT: memref.store %[[V0]], %{{.*}}[%[[I1]], %[[I1]]]
+    memref.store %v0, %0[%y1, %y1] : memref<4x4xf32>
 
     // Test store[%x1, %x0] with symbol %c4 captured by '%x0' map.
     %y2 = affine.apply affine_map<(d0, d1) -> (d0 + d1)> (%x1, %x0)
     // CHECK-NEXT: %[[I2:.*]] = affine.apply #[[$MAP7]](%{{.*}})
-    // CHECK-NEXT: store %[[V0]], %{{.*}}[%[[I2]], %[[I2]]]
-    store %v0, %0[%y2, %y2] : memref<4x4xf32>
+    // CHECK-NEXT: memref.store %[[V0]], %{{.*}}[%[[I2]], %[[I2]]]
+    memref.store %v0, %0[%y2, %y2] : memref<4x4xf32>
 
     // Test store[%x2, %x0] with symbol %c4 from '%x0' and %c5 from '%x2'
     %c5 = constant 5 : index
     %x2 = affine.apply affine_map<(d0)[s0] -> (d0 + s0)> (%i0)[%c5]
     %y3 = affine.apply affine_map<(d0, d1) -> (d0 + d1)> (%x2, %x0)
     // CHECK: %[[I3:.*]] = affine.apply #[[$MAP7a]](%{{.*}})
-    // CHECK-NEXT: store %[[V0]], %{{.*}}[%[[I3]], %[[I3]]]
-    store %v0, %0[%y3, %y3] : memref<4x4xf32>
+    // CHECK-NEXT: memref.store %[[V0]], %{{.*}}[%[[I3]], %[[I3]]]
+    memref.store %v0, %0[%y3, %y3] : memref<4x4xf32>
   }
   return
 }
@@ -95,8 +95,8 @@
 
 // CHECK-LABEL: func @compose_affine_maps_2d_tile() {
 func @compose_affine_maps_2d_tile() {
-  %0 = alloc() : memref<16x32xf32>
-  %1 = alloc() : memref<16x32xf32>
+  %0 = memref.alloc() : memref<16x32xf32>
+  %1 = memref.alloc() : memref<16x32xf32>
 
   %c4 = constant 4 : index
   %c8 = constant 8 : index
@@ -116,11 +116,11 @@
             ((d1 * s1) + d3)> (%x0, %x1, %x2, %x3)[%c4, %c8]
           // CHECK: %[[I0:.*]] = affine.apply #[[$MAP8]](%{{.*}}, %{{.*}})
           // CHECK: %[[I1:.*]] = affine.apply #[[$MAP8a]](%{{.*}}, %{{.*}})
-          // CHECK-NEXT: %[[L0:.*]] = load %{{.*}}[%[[I0]], %[[I1]]]
-          %v0 = load %0[%x40, %x41] : memref<16x32xf32>
+          // CHECK-NEXT: %[[L0:.*]] = memref.load %{{.*}}[%[[I0]], %[[I1]]]
+          %v0 = memref.load %0[%x40, %x41] : memref<16x32xf32>
 
-          // CHECK-NEXT: store %[[L0]], %{{.*}}[%[[I0]], %[[I1]]]
-          store %v0, %1[%x40, %x41] : memref<16x32xf32>
+          // CHECK-NEXT: memref.store %[[L0]], %{{.*}}[%[[I0]], %[[I1]]]
+          memref.store %v0, %1[%x40, %x41] : memref<16x32xf32>
         }
       }
     }
@@ -138,8 +138,8 @@
 
 // CHECK-LABEL: func @compose_affine_maps_dependent_loads() {
 func @compose_affine_maps_dependent_loads() {
-  %0 = alloc() : memref<16x32xf32>
-  %1 = alloc() : memref<16x32xf32>
+  %0 = memref.alloc() : memref<16x32xf32>
+  %1 = memref.alloc() : memref<16x32xf32>
 
   affine.for %i0 = 0 to 3 {
     affine.for %i1 = 0 to 3 {
@@ -157,15 +157,15 @@
         // CHECK: %[[I0:.*]] = affine.apply #[[$MAP9]](%{{.*}})
         // CHECK: %[[I1:.*]] = affine.apply #[[$MAP4b]](%{{.*}})
         // CHECK: %[[I2:.*]] = affine.apply #[[$MAP10]](%{{.*}})
-        // CHECK-NEXT: %[[V0:.*]] = load %{{.*}}[%[[I0]], %[[I1]]]
-        %v0 = load %0[%x00, %x01] : memref<16x32xf32>
+        // CHECK-NEXT: %[[V0:.*]] = memref.load %{{.*}}[%[[I0]], %[[I1]]]
+        %v0 = memref.load %0[%x00, %x01] : memref<16x32xf32>
 
-        // CHECK-NEXT: store %[[V0]], %{{.*}}[%[[I0]], %[[I2]]]
-        store %v0, %0[%x00, %x02] : memref<16x32xf32>
+        // CHECK-NEXT: memref.store %[[V0]], %{{.*}}[%[[I0]], %[[I2]]]
+        memref.store %v0, %0[%x00, %x02] : memref<16x32xf32>
 
         // Swizzle %i0, %i1
-        // CHECK-NEXT: store %[[V0]], %{{.*}}[%[[I1]], %[[I0]]]
-        store %v0, %0[%x01, %x00] : memref<16x32xf32>
+        // CHECK-NEXT: memref.store %[[V0]], %{{.*}}[%[[I1]], %[[I0]]]
+        memref.store %v0, %0[%x01, %x00] : memref<16x32xf32>
 
         // Swizzle %x00, %x01 and %c3, %c7
         %x10 = affine.apply affine_map<(d0, d1)[s0, s1] -> (d0 * s1)>
@@ -175,8 +175,8 @@
 
         // CHECK-NEXT: %[[I2A:.*]] = affine.apply #[[$MAP12]](%{{.*}})
         // CHECK-NEXT: %[[I2B:.*]] = affine.apply #[[$MAP11]](%{{.*}})
-        // CHECK-NEXT: store %[[V0]], %{{.*}}[%[[I2A]], %[[I2B]]]
-        store %v0, %0[%x10, %x11] : memref<16x32xf32>
+        // CHECK-NEXT: memref.store %[[V0]], %{{.*}}[%[[I2A]], %[[I2B]]]
+        memref.store %v0, %0[%x10, %x11] : memref<16x32xf32>
       }
     }
   }
@@ -198,8 +198,8 @@
     %d1 = affine.apply affine_map<(d0, d1) -> (d1 floordiv 3)> (%b, %c)
     // CHECK: %[[I0:.*]] = affine.apply #[[$MAP13A]](%{{.*}})
     // CHECK: %[[I1:.*]] = affine.apply #[[$MAP13B]](%{{.*}})
-    // CHECK-NEXT: store %arg0, %arg1[%[[I0]], %[[I1]]]
-    store %arg0, %arg1[%d0, %d1] : memref<4x4xf32>
+    // CHECK-NEXT: memref.store %arg0, %arg1[%[[I0]], %[[I1]]]
+    memref.store %arg0, %arg1[%d0, %d1] : memref<4x4xf32>
   }
 
   return
@@ -223,16 +223,16 @@
 // CHECK-LABEL: func @arg_used_as_dim_and_symbol
 func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index, %arg2: f32) {
   %c9 = constant 9 : index
-  %1 = alloc() : memref<100x100xf32, 1>
-  %2 = alloc() : memref<1xi32>
+  %1 = memref.alloc() : memref<100x100xf32, 1>
+  %2 = memref.alloc() : memref<1xi32>
   affine.for %i0 = 0 to 100 {
     affine.for %i1 = 0 to 100 {
       %3 = affine.apply affine_map<(d0, d1)[s0, s1] -> (d1 + s0 + s1)>
         (%i0, %i1)[%arg1, %c9]
       %4 = affine.apply affine_map<(d0, d1, d3) -> (d3 - (d0 + d1))>
         (%arg1, %c9, %3)
-      // CHECK: store %arg2, %{{.*}}[%{{.*}}, %{{.*}}]
-      store %arg2, %1[%4, %arg1] : memref<100x100xf32, 1>
+      // CHECK: memref.store %arg2, %{{.*}}[%{{.*}}, %{{.*}}]
+      memref.store %arg2, %1[%4, %arg1] : memref<100x100xf32, 1>
     }
   }
   return
@@ -244,17 +244,17 @@
 func @trivial_maps() {
   // CHECK-NOT: affine.apply
 
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   %c0 = constant 0 : index
   %cst = constant 0.000000e+00 : f32
   affine.for %i1 = 0 to 10 {
     %1 = affine.apply affine_map<()[s0] -> (s0)>()[%c0]
-    store %cst, %0[%1] : memref<10xf32>
-    %2 = load %0[%c0] : memref<10xf32>
+    memref.store %cst, %0[%1] : memref<10xf32>
+    %2 = memref.load %0[%c0] : memref<10xf32>
 
     %3 = affine.apply affine_map<()[] -> (0)>()[]
-    store %cst, %0[%3] : memref<10xf32>
-    store %2, %0[%c0] : memref<10xf32>
+    memref.store %cst, %0[%3] : memref<10xf32>
+    memref.store %2, %0[%c0] : memref<10xf32>
   }
   return
 }
@@ -422,7 +422,7 @@
     %1 = affine.apply affine_map<()[s0] -> (s0 + 1)> ()[%M]
     %2 = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)> (%i0)[%1]
     // CHECK-DAG: {{.*}} = affine.apply #[[$symbolic_semi_affine]](%{{.*}})[%{{.*}}]
-    store %f1, %A[%2] : memref<?xf32>
+    memref.store %f1, %A[%2] : memref<?xf32>
   }
   return
 }
@@ -667,7 +667,7 @@
   %cst = constant 1.0 : f32
   %c0 = constant 0 : index
   %c4 = constant 4 : index
-  %0 = alloc() : memref<4xf32>
+  %0 = memref.alloc() : memref<4xf32>
   // CHECK: affine.parallel (%{{.*}}) = (0) to (4)
   affine.parallel (%i) = (%c0) to (%c0 + %c4) {
     %1 = affine.apply #map3(%i)
@@ -686,11 +686,11 @@
   %1 = affine.apply affine_map<()[s0] -> (3 * s0)> ()[%i0]
   %2 = affine.apply affine_map<(d0)[s0, s1] -> (d0 mod s1 + s0 * s1 + s0 * 4)> (%i1)[%0, %1]
   %3 = index_cast %2: index to i64
-  store %3, %A[]: memref<i64>
+  memref.store %3, %A[]: memref<i64>
   affine.for %i2 = 0 to 3 {
     %4 = affine.apply affine_map<(d0)[s0, s1] -> (d0 ceildiv s1 + s0 + s0 * 3)> (%i2)[%0, %1]
     %5 = index_cast %4: index to i64
-    store %5, %A[]: memref<i64>
+    memref.store %5, %A[]: memref<i64>
   }
   return
 }
diff --git a/mlir/test/Dialect/Affine/dma-generate.mlir b/mlir/test/Dialect/Affine/dma-generate.mlir
--- a/mlir/test/Dialect/Affine/dma-generate.mlir
+++ b/mlir/test/Dialect/Affine/dma-generate.mlir
@@ -14,21 +14,21 @@
 
 // CHECK-LABEL: func @loop_nest_1d() {
 func @loop_nest_1d() {
-  %A = alloc() : memref<256 x f32>
-  %B = alloc() : memref<512 x f32>
-  %F = alloc() : memref<256 x f32, 2>
+  %A = memref.alloc() : memref<256 x f32>
+  %B = memref.alloc() : memref<512 x f32>
+  %F = memref.alloc() : memref<256 x f32, 2>
   // First DMA buffer.
-  // CHECK:  alloc() : memref<256xf32>
-  // CHECK:  alloc() : memref<256xf32, 2>
+  // CHECK:  memref.alloc() : memref<256xf32>
+  // CHECK:  memref.alloc() : memref<256xf32, 2>
   // Tag for first DMA.
-  // CHECK:  alloc() : memref<1xi32>
+  // CHECK:  memref.alloc() : memref<1xi32>
   // First DMA transfer.
   // CHECK:  affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32>, memref<256xf32, 2>, memref<1xi32>
   // CHECK:  affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
   // Second DMA buffer.
-  // CHECK:  alloc() : memref<256xf32, 2>
+  // CHECK:  memref.alloc() : memref<256xf32, 2>
   // Tag for second DMA.
-  // CHECK:  alloc() : memref<1xi32>
+  // CHECK:  memref.alloc() : memref<1xi32>
   // Second DMA transfer.
   // CHECK:       affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<512xf32>, memref<256xf32, 2>, memref<1xi32>
   // CHECK-NEXT:  affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
@@ -58,13 +58,13 @@
 
 // CHECK-LABEL: func @loop_nest_high_d
 // CHECK:      %{{.*}} = constant 16384 : index
-// CHECK-DAG:  [[BUFB:%[0-9]+]] = alloc() : memref<512x32xf32, 2>
-// CHECK-DAG:  [[BUFA:%[0-9]+]] = alloc() : memref<512x32xf32, 2>
-// CHECK-DAG:  [[BUFC:%[0-9]+]] = alloc() : memref<512x32xf32, 2>
-// CHECK-DAG:  [[TAGB:%[0-9]+]] = alloc() : memref<1xi32>
-// CHECK-DAG:  [[TAGA:%[0-9]+]] = alloc() : memref<1xi32>
-// CHECK-DAG:  [[TAGC:%[0-9]+]] = alloc() : memref<1xi32>
-// CHECK-DAG:  [[TAGC_W:%[0-9]+]] = alloc() : memref<1xi32>
+// CHECK-DAG:  [[BUFB:%[0-9]+]] = memref.alloc() : memref<512x32xf32, 2>
+// CHECK-DAG:  [[BUFA:%[0-9]+]] = memref.alloc() : memref<512x32xf32, 2>
+// CHECK-DAG:  [[BUFC:%[0-9]+]] = memref.alloc() : memref<512x32xf32, 2>
+// CHECK-DAG:  [[TAGB:%[0-9]+]] = memref.alloc() : memref<1xi32>
+// CHECK-DAG:  [[TAGA:%[0-9]+]] = memref.alloc() : memref<1xi32>
+// CHECK-DAG:  [[TAGC:%[0-9]+]] = memref.alloc() : memref<1xi32>
+// CHECK-DAG:  [[TAGC_W:%[0-9]+]] = memref.alloc() : memref<1xi32>
 // INCOMING DMA for B
 // CHECK-DAG:  affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], [[BUFB]][%{{.*}}, %{{.*}}], [[TAGB]][%{{.*}}], %{{.*}} : memref<512x32xf32>, memref<512x32xf32, 2>, memref<1xi32>
 // CHECK-DAG:  affine.dma_wait [[TAGB]][%{{.*}}], %{{.*}} : memref<1xi32>
@@ -145,10 +145,10 @@
 // region within a 256 x 8 memref.
 //
 // CHECK-LABEL: func @loop_nest_modulo() {
-// CHECK:       alloc() : memref<256x8xf32>
+// CHECK:       memref.alloc() : memref<256x8xf32>
 // CHECK-NEXT:    affine.for %{{.*}} = 0 to 32 step 4 {
-// CHECK:           alloc() : memref<1x2xf32, 2>
-// CHECK-NEXT:      alloc() : memref<1xi32>
+// CHECK:           memref.alloc() : memref<1x2xf32, 2>
+// CHECK-NEXT:      memref.alloc() : memref<1xi32>
 // Composition of the affine map for '%{{.*}}' causes '%{{.*}}' to be added as a symbol.
 // CHECK-NEXT:      affine.dma_start %{{.*}}[%{{.*}}, 0], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256x8xf32>, memref<1x2xf32, 2>, memref<1xi32>
 // CHECK-NEXT:      affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
@@ -161,7 +161,7 @@
 // CHECK-NEXT:    }
 // CHECK-NEXT:    return
 func @loop_nest_modulo() {
-  %A = alloc() : memref<256 x 8 x f32>
+  %A = memref.alloc() : memref<256 x 8 x f32>
   affine.for %i = 0 to 32 step 4 {
     // DMAs will be performed at this level (%j is the first unit stride loop)
     affine.for %j = 0 to 8 {
@@ -179,11 +179,11 @@
 // dependent on outer loop IVs.
 // CHECK-LABEL: func @loop_nest_tiled() -> memref<256x1024xf32> {
 func @loop_nest_tiled() -> memref<256x1024xf32> {
-  %0 = alloc() : memref<256x1024xf32>
+  %0 = memref.alloc() : memref<256x1024xf32>
   affine.for %i0 = 0 to 256 step 32 {
     affine.for %i1 = 0 to 1024 step 32 {
-// CHECK:      alloc() : memref<32x32xf32, 2>
-// CHECK-NEXT: alloc() : memref<1xi32>
+// CHECK:      memref.alloc() : memref<32x32xf32, 2>
+// CHECK-NEXT: memref.alloc() : memref<1xi32>
 // Strided DMA here: 32 x 32 tile in a 256 x 1024 memref.
 // CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}, %{{.*}}, %{{.*}} : memref<256x1024xf32>, memref<32x32xf32, 2>, memref<1xi32>
 // CHECK-NEXT: affine.dma_wait
@@ -206,8 +206,8 @@
 func @dma_constant_dim_access(%A : memref<100x100xf32>) {
   %one = constant 1 : index
   %N = constant 100 : index
-  // CHECK:      alloc() : memref<1x100xf32, 2>
-  // CHECK-NEXT: alloc() : memref<1xi32>
+  // CHECK:      memref.alloc() : memref<1x100xf32, 2>
+  // CHECK-NEXT: memref.alloc() : memref<1xi32>
   // No strided DMA needed here.
   // CHECK:      affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}  : memref<100x100xf32>, memref<1x100xf32, 2>,
   // CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
@@ -232,8 +232,8 @@
     }
   }
   return
-// CHECK:       alloc() : memref<100x100xf32, 2>
-// CHECK-NEXT:  alloc() : memref<1xi32>
+// CHECK:       memref.alloc() : memref<100x100xf32, 2>
+// CHECK-NEXT:  memref.alloc() : memref<1xi32>
 // CHECK-NEXT:  affine.dma_start %{{.*}}[0, symbol(%{{.*}}) + 9], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}
 // CHECK-NEXT:  affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}}
 // CHECK-NEXT:  affine.for %[[IV0:.*]] = 0 to 100 {
@@ -251,8 +251,8 @@
   %K = constant 9 : index
 // The buffer size can't be bound by a constant smaller than the original
 // memref size; so the DMA buffer is the entire 100x100.
-// CHECK:       alloc() : memref<100x100xf32, 2>
-// CHECK-NEXT:  alloc() : memref<1xi32>
+// CHECK:       memref.alloc() : memref<100x100xf32, 2>
+// CHECK-NEXT:  memref.alloc() : memref<1xi32>
 // CHECK-NEXT:  affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<100x100xf32>, memref<100x100xf32, 2>, memref<1xi32>
 // CHECK-NEXT:  affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
   affine.for %i = 0 to 100 {
@@ -269,8 +269,8 @@
 // CHECK-LABEL: func @dma_unknown_size
 func @dma_unknown_size(%arg0: memref<?x?xf32>) {
   %c0 = constant 0 : index
-  %M = dim %arg0, %c0 : memref<? x ? x f32>
-  %N = dim %arg0, %c0 : memref<? x ? x f32>
+  %M = memref.dim %arg0, %c0 : memref<? x ? x f32>
+  %N = memref.dim %arg0, %c0 : memref<? x ? x f32>
   affine.for %i = 0 to %M {
     affine.for %j = 0 to %N {
       // If this loop nest isn't tiled, the access requires a non-constant DMA
@@ -315,7 +315,7 @@
 
 // CHECK-LABEL: func @multi_load_store_union() {
 func @multi_load_store_union() {
-  %A = alloc() : memref<512 x 512 x f32>
+  %A = memref.alloc() : memref<512 x 512 x f32>
   affine.for %i = 0 to 256 {
     affine.for %j = 0 to 256 {
       %idx = affine.apply affine_map<(d0) -> (d0 + 64)>(%i)
@@ -335,12 +335,12 @@
   }
   return
 }
-// CHECK:       alloc() : memref<512x512xf32>
-// CHECK-NEXT:  alloc() : memref<382x446xf32, 2>
-// CHECK-NEXT:  alloc() : memref<1xi32>
+// CHECK:       memref.alloc() : memref<512x512xf32>
+// CHECK-NEXT:  memref.alloc() : memref<382x446xf32, 2>
+// CHECK-NEXT:  memref.alloc() : memref<1xi32>
 // CHECK-NEXT:  affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}, %{{.*}}, %{{.*}} : memref<512x512xf32>, memref<382x446xf32, 2>, memref<1xi32>
 // CHECK-NEXT:  affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
-// CHECK-NEXT:  alloc() : memref<1xi32>
+// CHECK-NEXT:  memref.alloc() : memref<1xi32>
 // CHECK-NEXT:  affine.for %{{.*}} = 0 to 256 {
 // CHECK-NEXT:    affine.for %{{.*}} = 0 to 256 {
 // CHECK:           affine.load %{{.*}}[%{{.*}}, %{{.*}} + 126] : memref<382x446xf32, 2>
@@ -363,7 +363,7 @@
 func @dma_loop_straightline_interspersed() {
   %c0 = constant 0 : index
   %c255 = constant 255 : index
-  %A = alloc() : memref<256 x f32>
+  %A = memref.alloc() : memref<256 x f32>
   %v = affine.load %A[%c0] : memref<256 x f32>
   affine.for %i = 1 to 255 {
     affine.load %A[%i] : memref<256 x f32>
@@ -374,16 +374,16 @@
 }
 // There are three regions here - the 'load' preceding the loop, the loop
 // itself, and the operations appearing after the scf.
-// CHECK:       alloc() : memref<256xf32>
-// CHECK-NEXT:  alloc() : memref<1xf32, 2>
-// CHECK-NEXT:  alloc() : memref<1xi32>
+// CHECK:       memref.alloc() : memref<256xf32>
+// CHECK-NEXT:  memref.alloc() : memref<1xf32, 2>
+// CHECK-NEXT:  memref.alloc() : memref<1xi32>
 // CHECK-NEXT:  affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32>, memref<1xf32, 2>, memref<1xi32>
 // CHECK-NEXT:  affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
 // CHECK-NEXT:  affine.load %{{.*}}[0] : memref<1xf32, 2>
 // CHECK-NEXT:  dealloc %{{.*}} : memref<1xi32>
 // CHECK-NEXT:  dealloc %{{.*}} : memref<1xf32, 2>
-// CHECK-NEXT:  alloc() : memref<254xf32, 2>
-// CHECK-NEXT:  alloc() : memref<1xi32>
+// CHECK-NEXT:  memref.alloc() : memref<254xf32, 2>
+// CHECK-NEXT:  memref.alloc() : memref<1xi32>
 // CHECK-NEXT:  affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32>, memref<254xf32, 2>, memref<1xi32>
 // CHECK-NEXT:  affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
 // CHECK-NEXT:  affine.for %{{.*}} = 1 to 255 {
@@ -391,11 +391,11 @@
 // CHECK-NEXT:  }
 // CHECK-NEXT:  dealloc %{{.*}} : memref<1xi32>
 // CHECK-NEXT:  dealloc %{{.*}} : memref<254xf32, 2>
-// CHECK-NEXT:  alloc() : memref<256xf32, 2>
-// CHECK-NEXT:  alloc() : memref<1xi32>
+// CHECK-NEXT:  memref.alloc() : memref<256xf32, 2>
+// CHECK-NEXT:  memref.alloc() : memref<1xi32>
 // CHECK-NEXT:  affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32>, memref<256xf32, 2>, memref<1xi32>
 // CHECK-NEXT:  affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
-// CHECK-NEXT:  alloc() : memref<1xi32>
+// CHECK-NEXT:  memref.alloc() : memref<1xi32>
 // CHECK-NEXT:  affine.load %{{.*}}[255] : memref<256xf32, 2>
 // CHECK-NEXT:  affine.store %{{.*}}, %{{.*}}[0] : memref<256xf32, 2>
 // CHECK-NEXT:  affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32, 2>, memref<256xf32>, memref<1xi32>
@@ -410,7 +410,7 @@
 // CHECK-LABEL: func @dma_mixed_loop_blocks() {
 func @dma_mixed_loop_blocks() {
   %c0 = constant 0 : index
-  %A = alloc() : memref<256 x 256 x vector<8 x f32>>
+  %A = memref.alloc() : memref<256 x 256 x vector<8 x f32>>
   affine.for %i = 0 to 256 {
     %v = affine.load %A[%c0, %c0] : memref<256 x 256 x vector<8 x f32>>
     "foo"(%v) : (vector<8 x f32>) -> ()
@@ -421,9 +421,9 @@
   }
   return
 }
-// CHECK-DAG:   [[MEM:%[0-9]+]] = alloc() : memref<256x256xvector<8xf32>>
-// CHECK-DAG:   [[BUF:%[0-9]+]] = alloc() : memref<256x256xvector<8xf32>, 2>
-// CHECK-DAG:   [[TAG:%[0-9]+]] = alloc() : memref<1xi32>
+// CHECK-DAG:   [[MEM:%[0-9]+]] = memref.alloc() : memref<256x256xvector<8xf32>>
+// CHECK-DAG:   [[BUF:%[0-9]+]] = memref.alloc() : memref<256x256xvector<8xf32>, 2>
+// CHECK-DAG:   [[TAG:%[0-9]+]] = memref.alloc() : memref<1xi32>
 // CHECK:       affine.dma_start [[MEM]][%{{.*}}, %{{.*}}], [[BUF]][%{{.*}}, %{{.*}}], [[TAG]][%{{.*}}], %{{.*}} : memref<256x256xvector<8xf32>>, memref<256x256xvector<8xf32>, 2>, memref<1xi32>
 // CHECK-NEXT:  affine.dma_wait [[TAG]][%{{.*}}], %{{.*}} : memref<1xi32>
 // CHECK-NEXT:  affine.for %{{.*}} = 0 to 256 {
@@ -443,8 +443,8 @@
   }
   return
 }
-// CHECK:      [[BUF:%[0-9]+]] = alloc() : memref<1027xf32, 2>
-// CHECK-NEXT: [[MEM:%[0-9]+]] = alloc() : memref<1xi32>
+// CHECK:      [[BUF:%[0-9]+]] = memref.alloc() : memref<1027xf32, 2>
+// CHECK-NEXT: [[MEM:%[0-9]+]] = memref.alloc() : memref<1xi32>
 // CHECK-NEXT: affine.for %{{.*}} = 0 to 1024 {
 // CHECK-NEXT:    affine.for %[[I2:.*]] = {{#map[0-9]+}}(%{{.*}}) to {{#map[0-9]+}}(%{{.*}}) {
 // CHECK:           affine.store %{{.*}}, [[BUF]][%[[I2]]] : memref<1027xf32, 2>
@@ -456,7 +456,7 @@
 // -----
 
 func @test_read_write_region_union() {
-  %0 = alloc() : memref<256xf32>
+  %0 = memref.alloc() : memref<256xf32>
   affine.for %i0 = 0 to 10 {
     // memref dims:  [0, 256)
     // read region:  [100, 110)
@@ -470,12 +470,12 @@
   return
 }
 
-// CHECK:       alloc() : memref<256xf32>
-// CHECK-NEXT:  alloc() : memref<85xf32, 2>
-// CHECK-NEXT:  alloc() : memref<1xi32>
+// CHECK:       memref.alloc() : memref<256xf32>
+// CHECK-NEXT:  memref.alloc() : memref<85xf32, 2>
+// CHECK-NEXT:  memref.alloc() : memref<1xi32>
 // CHECK-NEXT:  affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<256xf32>, memref<85xf32, 2>, memref<1xi32>
 // CHECK-NEXT:  affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
-// CHECK-NEXT:  alloc() : memref<1xi32>
+// CHECK-NEXT:  memref.alloc() : memref<1xi32>
 // CHECK-NEXT:  affine.for %{{.*}} = 0 to 10 {
 // CHECK:         affine.load %{{.*}}[%{{.*}} + 75] : memref<85xf32, 2>
 // CHECK-NEXT:    affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<85xf32, 2>
@@ -493,8 +493,8 @@
 // CHECK-LABEL: func @test_analysis_util
 func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>, %arg2: memref<2xf32>) -> (memref<144x9xf32>, memref<2xf32>) {
   %c0 = constant 0 : index
-  %0 = alloc() : memref<64x1xf32>
-  %1 = alloc() : memref<144x4xf32>
+  %0 = memref.alloc() : memref<64x1xf32>
+  %1 = memref.alloc() : memref<144x4xf32>
   %2 =  constant 0.0 : f32
   affine.for %i8 = 0 to 9 step 3 {
     affine.for %i9 = #map_lb(%i8) to #map_ub(%i8) {
@@ -510,7 +510,7 @@
   return %arg1, %arg2 : memref<144x9xf32>, memref<2xf32>
 }
 // CHECK:       affine.for %{{.*}} = 0 to 9 step 3 {
-// CHECK:         [[BUF:%[0-9]+]] = alloc() : memref<2xf32, 2>
+// CHECK:         [[BUF:%[0-9]+]] = memref.alloc() : memref<2xf32, 2>
 // CHECK:         affine.dma_start %{{.*}}[%{{.*}} floordiv 8], [[BUF]]
 // CHECK:         affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
 // CHECK:         affine.for %{{.*}} =
@@ -539,8 +539,8 @@
   return %arg1, %arg2 : memref<144x9xvector<8x128xf32>>, memref<2xvector<8x128xf32>>
 }
 
-// CHECK:       alloc() : memref<4x4x16x1xvector<8x128xf32>, 2>
-// CHECK-NEXT:  alloc() : memref<1xi32>
+// CHECK:       memref.alloc() : memref<4x4x16x1xvector<8x128xf32>, 2>
+// CHECK-NEXT:  memref.alloc() : memref<1xi32>
 // CHECK-NEXT:  affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}], %{{.*}}[%{{.*}}], %{{.*}} : memref<4x4x16x1xvector<8x128xf32>>, memref<4x4x16x1xvector<8x128xf32>, 2>, memref<1xi32>
 // CHECK-NEXT:  affine.dma_wait %{{.*}}[%{{.*}}], %{{.*}} : memref<1xi32>
 
@@ -553,7 +553,7 @@
 func @load_store_same_memref(%arg0: memref<256x1024xf32>) {
   // FAST-MEM-16KB:  affine.for %{{.*}} = 0 to 256 step 4
   affine.for %i0 = 0 to 256 step 4 {
-    // FAST-MEM-16KB: [[BUF:%[0-9]+]] = alloc() : memref<4x1024xf32, 2>
+    // FAST-MEM-16KB: [[BUF:%[0-9]+]] = memref.alloc() : memref<4x1024xf32, 2>
     // FAST-MEM-16KB:    affine.dma_start %{{.*}}
     // FAST-MEM-16KB-NEXT: affine.dma_wait
     // FAST-MEM-16KB:  affine.for %{{.*}}
diff --git a/mlir/test/Dialect/Affine/dma.mlir b/mlir/test/Dialect/Affine/dma.mlir
--- a/mlir/test/Dialect/Affine/dma.mlir
+++ b/mlir/test/Dialect/Affine/dma.mlir
@@ -4,9 +4,9 @@
 
 // Test with loop IVs.
 func @test0(%arg0 : index, %arg1 : index) {
-  %0 = alloc() : memref<100x100xf32>
-  %1 = alloc() : memref<100x100xf32, affine_map<(d0, d1) -> (d0, d1)>, 2>
-  %2 = alloc() : memref<1xi32>
+  %0 = memref.alloc() : memref<100x100xf32>
+  %1 = memref.alloc() : memref<100x100xf32, affine_map<(d0, d1) -> (d0, d1)>, 2>
+  %2 = memref.alloc() : memref<1xi32>
   %c0 = constant 0 : index
   %c64 = constant 64 : index
   affine.for %i0 = 0 to 10 {
@@ -25,9 +25,9 @@
 
 // Test with loop IVs and optional stride arguments.
 func @test1(%arg0 : index, %arg1 : index) {
-  %0 = alloc() : memref<100x100xf32>
-  %1 = alloc() : memref<100x100xf32, affine_map<(d0, d1) -> (d0, d1)>, 2>
-  %2 = alloc() : memref<1xi32>
+  %0 = memref.alloc() : memref<100x100xf32>
+  %1 = memref.alloc() : memref<100x100xf32, affine_map<(d0, d1) -> (d0, d1)>, 2>
+  %2 = memref.alloc() : memref<1xi32>
   %c0 = constant 0 : index
   %c64 = constant 64 : index
   %c128 = constant 128 : index
@@ -48,9 +48,9 @@
 
 // Test with loop IVs and symbols (without symbol keyword).
 func @test2(%arg0 : index, %arg1 : index) {
-  %0 = alloc() : memref<100x100xf32>
-  %1 = alloc() : memref<100x100xf32, affine_map<(d0, d1) -> (d0, d1)>, 2>
-  %2 = alloc() : memref<1xi32>
+  %0 = memref.alloc() : memref<100x100xf32>
+  %1 = memref.alloc() : memref<100x100xf32, affine_map<(d0, d1) -> (d0, d1)>, 2>
+  %2 = memref.alloc() : memref<1xi32>
   %c0 = constant 0 : index
   %c64 = constant 64 : index
   affine.for %i0 = 0 to 10 {
@@ -70,9 +70,9 @@
 
 // Test with loop IVs and symbols (with symbol keyword).
 func @test3(%arg0 : index, %arg1 : index) {
-  %0 = alloc() : memref<100x100xf32>
-  %1 = alloc() : memref<100x100xf32, affine_map<(d0, d1) -> (d0, d1)>, 2>
-  %2 = alloc() : memref<1xi32>
+  %0 = memref.alloc() : memref<100x100xf32>
+  %1 = memref.alloc() : memref<100x100xf32, affine_map<(d0, d1) -> (d0, d1)>, 2>
+  %2 = memref.alloc() : memref<1xi32>
   %c0 = constant 0 : index
   %c64 = constant 64 : index
   affine.for %i0 = 0 to 10 {
@@ -93,9 +93,9 @@
 
 // Test with loop IVs, symbols and constants in nested affine expressions.
 func @test4(%arg0 : index, %arg1 : index) {
-  %0 = alloc() : memref<100x100xf32>
-  %1 = alloc() : memref<100x100xf32, 2>
-  %2 = alloc() : memref<1xi32>
+  %0 = memref.alloc() : memref<100x100xf32>
+  %1 = memref.alloc() : memref<100x100xf32, 2>
+  %2 = memref.alloc() : memref<1xi32>
   %c64 = constant 64 : index
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = 0 to 10 {
diff --git a/mlir/test/Dialect/Affine/invalid.mlir b/mlir/test/Dialect/Affine/invalid.mlir
--- a/mlir/test/Dialect/Affine/invalid.mlir
+++ b/mlir/test/Dialect/Affine/invalid.mlir
@@ -121,9 +121,9 @@
 
 func @affine_if_invalid_dimop_dim(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
   affine.for %n0 = 0 to 7 {
-    %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
+    %0 = memref.alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
     %c0 = constant 0 : index
-    %dim = dim %0, %c0 : memref<?x?x?x?xf32>
+    %dim = memref.dim %0, %c0 : memref<?x?x?x?xf32>
 
     // expected-error@+1 {{operand cannot be used as a symbol}}
     affine.if #set0(%dim)[%n0] {}
@@ -253,7 +253,7 @@
 // -----
 
 func @affine_parallel(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   //  expected-error@+1 {{reduction must be specified for each output}}
   %1 = affine.parallel (%i, %j) = (0, 0) to (100, 100) step (10, 10) -> (f32) {
     %2 = affine.load %0[%i, %j] : memref<100x100xf32>
@@ -265,7 +265,7 @@
 // -----
 
 func @affine_parallel(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   //  expected-error@+1 {{invalid reduction value: "bad"}}
   %1 = affine.parallel (%i, %j) = (0, 0) to (100, 100) step (10, 10) reduce ("bad") -> (f32) {
     %2 = affine.load %0[%i, %j] : memref<100x100xf32>
@@ -277,7 +277,7 @@
 // -----
 
 func @affine_parallel(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<100x100xi32>
+  %0 = memref.alloc() : memref<100x100xi32>
   %1 = affine.parallel (%i, %j) = (0, 0) to (100, 100) step (10, 10) reduce ("minf") -> (f32) {
     %2 = affine.load %0[%i, %j] : memref<100x100xi32>
     //  expected-error@+1 {{types mismatch between yield op and its parent}}
@@ -289,7 +289,7 @@
 // -----
 
 func @vector_load_invalid_vector_type() {
-  %0 = alloc() : memref<100xf32>
+  %0 = memref.alloc() : memref<100xf32>
   affine.for %i0 = 0 to 16 step 8 {
     // expected-error@+1 {{requires memref and vector types of the same elemental type}}
     %1 = affine.vector_load %0[%i0] : memref<100xf32>, vector<8xf64>
@@ -300,7 +300,7 @@
 // -----
 
 func @vector_store_invalid_vector_type() {
-  %0 = alloc() : memref<100xf32>
+  %0 = memref.alloc() : memref<100xf32>
   %1 = constant dense<7.0> : vector<8xf64>
   affine.for %i0 = 0 to 16 step 8 {
     // expected-error@+1 {{requires memref and vector types of the same elemental type}}
@@ -312,7 +312,7 @@
 // -----
 
 func @vector_load_vector_memref() {
-  %0 = alloc() : memref<100xvector<8xf32>>
+  %0 = memref.alloc() : memref<100xvector<8xf32>>
   affine.for %i0 = 0 to 4 {
     // expected-error@+1 {{requires memref and vector types of the same elemental type}}
     %1 = affine.vector_load %0[%i0] : memref<100xvector<8xf32>>, vector<8xf32>
@@ -323,7 +323,7 @@
 // -----
 
 func @vector_store_vector_memref() {
-  %0 = alloc() : memref<100xvector<8xf32>>
+  %0 = memref.alloc() : memref<100xvector<8xf32>>
   %1 = constant dense<7.0> : vector<8xf32>
   affine.for %i0 = 0 to 4 {
     // expected-error@+1 {{requires memref and vector types of the same elemental type}}
diff --git a/mlir/test/Dialect/Affine/load-store-invalid.mlir b/mlir/test/Dialect/Affine/load-store-invalid.mlir
--- a/mlir/test/Dialect/Affine/load-store-invalid.mlir
+++ b/mlir/test/Dialect/Affine/load-store-invalid.mlir
@@ -63,7 +63,7 @@
 // -----
 
 func @load_non_affine_index(%arg0 : index) {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   affine.for %i0 = 0 to 10 {
     %1 = muli %i0, %arg0 : index
     // expected-error@+1 {{op index must be a dimension or symbol identifier}}
@@ -75,7 +75,7 @@
 // -----
 
 func @store_non_affine_index(%arg0 : index) {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   %1 = constant 11.0 : f32
   affine.for %i0 = 0 to 10 {
     %2 = muli %i0, %arg0 : index
@@ -88,7 +88,7 @@
 // -----
 
 func @invalid_prefetch_rw(%i : index) {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   // expected-error@+1 {{rw specifier has to be 'read' or 'write'}}
   affine.prefetch %0[%i], rw, locality<0>, data  : memref<10xf32>
   return
@@ -97,7 +97,7 @@
 // -----
 
 func @invalid_prefetch_cache_type(%i : index) {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   // expected-error@+1 {{cache type has to be 'data' or 'instr'}}
   affine.prefetch %0[%i], read, locality<0>, false  : memref<10xf32>
   return
@@ -106,9 +106,9 @@
 // -----
 
 func @dma_start_non_affine_src_index(%arg0 : index) {
-  %0 = alloc() : memref<100xf32>
-  %1 = alloc() : memref<100xf32, 2>
-  %2 = alloc() : memref<1xi32, 4>
+  %0 = memref.alloc() : memref<100xf32>
+  %1 = memref.alloc() : memref<100xf32, 2>
+  %2 = memref.alloc() : memref<1xi32, 4>
   %c0 = constant 0 : index
   %c64 = constant 64 : index
   affine.for %i0 = 0 to 10 {
@@ -123,9 +123,9 @@
 // -----
 
 func @dma_start_non_affine_dst_index(%arg0 : index) {
-  %0 = alloc() : memref<100xf32>
-  %1 = alloc() : memref<100xf32, 2>
-  %2 = alloc() : memref<1xi32, 4>
+  %0 = memref.alloc() : memref<100xf32>
+  %1 = memref.alloc() : memref<100xf32, 2>
+  %2 = memref.alloc() : memref<1xi32, 4>
   %c0 = constant 0 : index
   %c64 = constant 64 : index
   affine.for %i0 = 0 to 10 {
@@ -140,9 +140,9 @@
 // -----
 
 func @dma_start_non_affine_tag_index(%arg0 : index) {
-  %0 = alloc() : memref<100xf32>
-  %1 = alloc() : memref<100xf32, 2>
-  %2 = alloc() : memref<1xi32, 4>
+  %0 = memref.alloc() : memref<100xf32>
+  %1 = memref.alloc() : memref<100xf32, 2>
+  %2 = memref.alloc() : memref<1xi32, 4>
   %c0 = constant 0 : index
   %c64 = constant 64 : index
   affine.for %i0 = 0 to 10 {
@@ -157,9 +157,9 @@
 // -----
 
 func @dma_wait_non_affine_tag_index(%arg0 : index) {
-  %0 = alloc() : memref<100xf32>
-  %1 = alloc() : memref<100xf32, 2>
-  %2 = alloc() : memref<1xi32, 4>
+  %0 = memref.alloc() : memref<100xf32>
+  %1 = memref.alloc() : memref<100xf32, 2>
+  %2 = memref.alloc() : memref<1xi32, 4>
   %c0 = constant 0 : index
   %c64 = constant 64 : index
   affine.for %i0 = 0 to 10 {
diff --git a/mlir/test/Dialect/Affine/load-store.mlir b/mlir/test/Dialect/Affine/load-store.mlir
--- a/mlir/test/Dialect/Affine/load-store.mlir
+++ b/mlir/test/Dialect/Affine/load-store.mlir
@@ -4,7 +4,7 @@
 
 // Test with just loop IVs.
 func @test0(%arg0 : index, %arg1 : index) {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = 0 to 10 {
       %1 = affine.load %0[%i0, %i1] : memref<100x100xf32>
@@ -18,7 +18,7 @@
 
 // Test with loop IVs and constants.
 func @test1(%arg0 : index, %arg1 : index) {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = 0 to 10 {
       %1 = affine.load %0[%i0 + 3, %i1 + 7] : memref<100x100xf32>
@@ -35,7 +35,7 @@
 // Test with loop IVs and function args without 'symbol' keyword (should
 // be parsed as dim identifiers).
 func @test2(%arg0 : index, %arg1 : index) {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = 0 to 10 {
       %1 = affine.load %0[%i0 + %arg0, %i1 + %arg1] : memref<100x100xf32>
@@ -52,7 +52,7 @@
 // Test with loop IVs and function args with 'symbol' keyword (should
 // be parsed as symbol identifiers).
 func @test3(%arg0 : index, %arg1 : index) {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = 0 to 10 {
       %1 = affine.load %0[%i0 + symbol(%arg0), %i1 + symbol(%arg1)]
@@ -70,7 +70,7 @@
 
 // Test with loop IVs, symbols and constants in nested affine expressions.
 func @test4(%arg0 : index, %arg1 : index) {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = 0 to 10 {
       %1 = affine.load %0[(%i0 + symbol(%arg0)) floordiv 3 + 11,
@@ -88,7 +88,7 @@
 
 // Test with swizzled loop IVs.
 func @test5(%arg0 : index, %arg1 : index) {
-  %0 = alloc() : memref<10x10x10xf32>
+  %0 = memref.alloc() : memref<10x10x10xf32>
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = 0 to 10 {
       affine.for %i2 = 0 to 10 {
@@ -108,7 +108,7 @@
 // Dim identifiers are assigned in parse order:
 // d0 = %i2, d1 = %arg0, d2 = %i0, d3 = %i1, d4 = %arg1
 func @test6(%arg0 : index, %arg1 : index) {
-  %0 = alloc() : memref<10x10x10xf32>
+  %0 = memref.alloc() : memref<10x10x10xf32>
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = 0 to 10 {
       affine.for %i2 = 0 to 10 {
@@ -131,7 +131,7 @@
 // d0 = %i2, d1 = %i0, d2 = %i1
 // s0 = %arg0, s1 = %arg1
 func @test6(%arg0 : index, %arg1 : index) {
-  %0 = alloc() : memref<10x10x10xf32>
+  %0 = memref.alloc() : memref<10x10x10xf32>
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = 0 to 10 {
       affine.for %i2 = 0 to 10 {
@@ -157,7 +157,7 @@
 
 // Test with operands without special SSA name.
 func @test7() {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   affine.for %i0 = 0 to 10 {
     %1 = affine.apply affine_map<(d1) -> (d1 + 1)>(%i0)
     %2 = affine.load %0[%1] : memref<10xf32>
@@ -183,7 +183,7 @@
 
 // Test with loop IVs and constants.
 func @test_prefetch(%arg0 : index, %arg1 : index) {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = 0 to 10 {
       %1 = affine.load %0[%i0 + 3, %i1 + 7] : memref<100x100xf32>
@@ -200,12 +200,12 @@
 
 // Test with just loop IVs.
 func @vector_load_vector_store_iv() {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   affine.for %i0 = 0 to 16 {
     affine.for %i1 = 0 to 16 step 8 {
       %1 = affine.vector_load %0[%i0, %i1] : memref<100x100xf32>, vector<8xf32>
       affine.vector_store %1, %0[%i0, %i1] : memref<100x100xf32>, vector<8xf32>
-// CHECK:      %[[buf:.*]] = alloc
+// CHECK:      %[[buf:.*]] = memref.alloc
 // CHECK-NEXT: affine.for %[[i0:.*]] = 0
 // CHECK-NEXT:   affine.for %[[i1:.*]] = 0
 // CHECK-NEXT:     %[[val:.*]] = affine.vector_load %[[buf]][%[[i0]], %[[i1]]] : memref<100x100xf32>, vector<8xf32>
@@ -219,12 +219,12 @@
 
 // Test with loop IVs and constants.
 func @vector_load_vector_store_iv_constant() {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = 0 to 16 step 4 {
       %1 = affine.vector_load %0[%i0 + 3, %i1 + 7] : memref<100x100xf32>, vector<4xf32>
       affine.vector_store %1, %0[%i0 + 3, %i1 + 7] : memref<100x100xf32>, vector<4xf32>
-// CHECK:      %[[buf:.*]] = alloc
+// CHECK:      %[[buf:.*]] = memref.alloc
 // CHECK-NEXT: affine.for %[[i0:.*]] = 0
 // CHECK-NEXT:   affine.for %[[i1:.*]] = 0
 // CHECK-NEXT:     %[[val:.*]] = affine.vector_load %{{.*}}[%{{.*}} + 3, %{{.*}} + 7] : memref<100x100xf32>, vector<4xf32>
@@ -237,12 +237,12 @@
 // -----
 
 func @vector_load_vector_store_2d() {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   affine.for %i0 = 0 to 16 step 2{
     affine.for %i1 = 0 to 16 step 8 {
       %1 = affine.vector_load %0[%i0, %i1] : memref<100x100xf32>, vector<2x8xf32>
       affine.vector_store %1, %0[%i0, %i1] : memref<100x100xf32>, vector<2x8xf32>
-// CHECK:      %[[buf:.*]] = alloc
+// CHECK:      %[[buf:.*]] = memref.alloc
 // CHECK-NEXT: affine.for %[[i0:.*]] = 0
 // CHECK-NEXT:   affine.for %[[i1:.*]] = 0
 // CHECK-NEXT:     %[[val:.*]] = affine.vector_load %[[buf]][%[[i0]], %[[i1]]] : memref<100x100xf32>, vector<2x8xf32>
diff --git a/mlir/test/Dialect/Affine/loop-tiling-parametric.mlir b/mlir/test/Dialect/Affine/loop-tiling-parametric.mlir
--- a/mlir/test/Dialect/Affine/loop-tiling-parametric.mlir
+++ b/mlir/test/Dialect/Affine/loop-tiling-parametric.mlir
@@ -73,7 +73,7 @@
 #ub = affine_map<()[s0, s1] -> (s0, 4096 floordiv s1)>
 func @tile_loop_with_div_in_upper_bound(%t5 : index, %A : memref<? x i32>, %L : index, %U : index) {
   %c0 = constant 0 : index
-  %M = dim %A, %c0 : memref<? x i32>
+  %M = memref.dim %A, %c0 : memref<? x i32>
   affine.for %i = 0 to min #ub()[%M, %U] {
     addi %i, %i : index
   }
@@ -93,7 +93,7 @@
 #ub = affine_map<()[s0, s1] -> (s0, 4096 floordiv s1)>
 func @tile_loop_with_div_in_upper_bound_non_unit_step(%t5 : index, %A : memref<? x i32>, %L : index, %U : index) {
   %c0 = constant 0 : index
-  %M = dim %A, %c0 : memref<? x i32>
+  %M = memref.dim %A, %c0 : memref<? x i32>
   affine.for %i = 0 to min #ub()[%M, %U] step 4 {
     addi %i, %i : index
   }
@@ -191,7 +191,7 @@
 func @tile_with_symbolic_loop_upper_bounds(%t9 : index, %t10: index, %arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
   %cst = constant 0.000000e+00 : f32
   %c0 = constant 0 : index
-  %0 = dim %arg0, %c0 : memref<?x?xf32>
+  %0 = memref.dim %arg0, %c0 : memref<?x?xf32>
   affine.for %i0 = 0 to %0 {
     affine.for %i1 = 0 to %0 {
       affine.store %cst, %arg2[%i0, %i1] : memref<?x?xf32>
@@ -217,7 +217,7 @@
 // CHECK: func @tile_with_loop_upper_bounds_in_two_symbols([[ARG0:%arg[0-9]+]]: index{{.*}}){{.*}}
 func @tile_with_loop_upper_bounds_in_two_symbols(%t11 : index, %arg0: memref<?xf32>, %limit: index) {
   %c0 = constant 0 : index
-  %dim0 = dim %arg0, %c0 : memref<?xf32>
+  %dim0 = memref.dim %arg0, %c0 : memref<?xf32>
   affine.for %i0 = 0 to affine_map<()[s0, s1] -> (s0 + s1)> ()[%dim0, %limit] {
     %v0 = affine.load %arg0[%i0] : memref<?xf32>
   }
diff --git a/mlir/test/Dialect/Affine/loop-tiling-validity.mlir b/mlir/test/Dialect/Affine/loop-tiling-validity.mlir
--- a/mlir/test/Dialect/Affine/loop-tiling-validity.mlir
+++ b/mlir/test/Dialect/Affine/loop-tiling-validity.mlir
@@ -9,7 +9,7 @@
 
 // CHECK-LABEL: func @legal_loop()
 func @legal_loop() {
-  %0 = alloc() : memref<64xf32>
+  %0 = memref.alloc() : memref<64xf32>
 
   affine.for %i = 0 to 64 {
     %1 = affine.load %0[%i] : memref<64xf32>
@@ -32,7 +32,7 @@
 
 // CHECK-LABEL: func @illegal_loop_with_diag_dependence
 func @illegal_loop_with_diag_dependence() {
-  %A = alloc() : memref<64x64xf32>
+  %A = memref.alloc() : memref<64x64xf32>
 
   affine.for %i = 0 to 64 {
     // expected-remark@above {{tiled code is illegal due to dependences}}
diff --git a/mlir/test/Dialect/Affine/loop-tiling.mlir b/mlir/test/Dialect/Affine/loop-tiling.mlir
--- a/mlir/test/Dialect/Affine/loop-tiling.mlir
+++ b/mlir/test/Dialect/Affine/loop-tiling.mlir
@@ -67,7 +67,7 @@
 // CHECK-LABEL: func @loop_max_min_bound(%{{.*}}: memref<?xi32>, %{{.*}}: index, %{{.*}}: index) {
 func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
   %c0 = constant 0 : index
-  %M = dim %A, %c0 : memref<? x i32>
+  %M = memref.dim %A, %c0 : memref<? x i32>
   affine.for %i = max #lb()[%L] to min #ub()[%M, %U] {
     addi %i, %i : index
   }
@@ -113,7 +113,7 @@
 func @tile_with_symbolic_loop_upper_bounds(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
   %cst = constant 0.000000e+00 : f32
   %c0 = constant 0 : index
-  %0 = dim %arg0, %c0 : memref<?x?xf32>
+  %0 = memref.dim %arg0, %c0 : memref<?x?xf32>
   affine.for %i0 = 0 to %0 {
     affine.for %i1 = 0 to %0 {
       affine.store %cst, %arg2[%i0, %i1] : memref<?x?xf32>
@@ -130,7 +130,7 @@
   return
 }
 
-// CHECK:       dim %{{.*}}, %c0 : memref<?x?xf32>
+// CHECK:       memref.dim %{{.*}}, %c0 : memref<?x?xf32>
 // CHECK-NEXT:  affine.for %{{.*}} = 0 to %{{.*}} step 32 {
 // CHECK-NEXT:    affine.for %{{.*}} = 0 to %{{.*}} step 32 {
 // CHECK-NEXT:      affine.for %{{.*}} = #map0(%{{.*}}) to min [[$UBMAP]](%{{.*}})[%{{.*}}] {
@@ -158,14 +158,14 @@
 
 func @tile_with_loop_upper_bounds_in_two_symbols(%arg0: memref<?xf32>, %limit: index) {
   %c0 = constant 0 : index
-  %dim0 = dim %arg0, %c0 : memref<?xf32>
+  %dim0 = memref.dim %arg0, %c0 : memref<?xf32>
   affine.for %i0 = 0 to affine_map<()[s0, s1] -> (s0 + s1)> ()[%dim0, %limit] {
     %v0 = affine.load %arg0[%i0] : memref<?xf32>
   }
   return
 }
 
-// CHECK:       dim %{{.*}}, %c0 : memref<?xf32>
+// CHECK:       memref.dim %{{.*}}, %c0 : memref<?xf32>
 // CHECK-NEXT:  affine.for %{{.*}} = 0 to [[MAP1]]()[%{{.*}}, %{{.*}}] step 32 {
 // CHECK-NEXT:    affine.for %{{.*}} = [[MAP0]](%{{.*}}) to min [[$UBMAP]](%{{.*}})[%{{.*}}, %{{.*}}] {
 // CHECK-NEXT:      affine.load
diff --git a/mlir/test/Dialect/Affine/memref-stride-calculation.mlir b/mlir/test/Dialect/Affine/memref-stride-calculation.mlir
--- a/mlir/test/Dialect/Affine/memref-stride-calculation.mlir
+++ b/mlir/test/Dialect/Affine/memref-stride-calculation.mlir
@@ -2,79 +2,79 @@
 
 func @f(%0: index) {
 // CHECK-LABEL: Testing: f
-  %1 = alloc() : memref<3x4x5xf32>
+  %1 = memref.alloc() : memref<3x4x5xf32>
 // CHECK: MemRefType offset: 0 strides: 20, 5, 1
-  %2 = alloc(%0) : memref<3x4x?xf32>
+  %2 = memref.alloc(%0) : memref<3x4x?xf32>
 // CHECK: MemRefType offset: 0 strides: ?, ?, 1
-  %3 = alloc(%0) : memref<3x?x5xf32>
+  %3 = memref.alloc(%0) : memref<3x?x5xf32>
 // CHECK: MemRefType offset: 0 strides: ?, 5, 1
-  %4 = alloc(%0) : memref<?x4x5xf32>
+  %4 = memref.alloc(%0) : memref<?x4x5xf32>
 // CHECK: MemRefType offset: 0 strides: 20, 5, 1
-  %5 = alloc(%0, %0) : memref<?x4x?xf32>
+  %5 = memref.alloc(%0, %0) : memref<?x4x?xf32>
 // CHECK: MemRefType offset: 0 strides: ?, ?, 1
-  %6 = alloc(%0, %0, %0) : memref<?x?x?xf32>
+  %6 = memref.alloc(%0, %0, %0) : memref<?x?x?xf32>
 // CHECK: MemRefType offset: 0 strides: ?, ?, 1
 
-  %11 = alloc() : memref<3x4x5xf32, affine_map<(i, j, k)->(i, j, k)>>
+  %11 = memref.alloc() : memref<3x4x5xf32, affine_map<(i, j, k)->(i, j, k)>>
 // CHECK: MemRefType offset: 0 strides: 20, 5, 1
-  %b11 = alloc() : memref<3x4x5xf32, offset: 0, strides: [20, 5, 1]>
+  %b11 = memref.alloc() : memref<3x4x5xf32, offset: 0, strides: [20, 5, 1]>
 // CHECK: MemRefType offset: 0 strides: 20, 5, 1
-  %12 = alloc(%0) : memref<3x4x?xf32, affine_map<(i, j, k)->(i, j, k)>>
+  %12 = memref.alloc(%0) : memref<3x4x?xf32, affine_map<(i, j, k)->(i, j, k)>>
 // CHECK: MemRefType offset: 0 strides: ?, ?, 1
-  %13 = alloc(%0) : memref<3x?x5xf32, affine_map<(i, j, k)->(i, j, k)>>
+  %13 = memref.alloc(%0) : memref<3x?x5xf32, affine_map<(i, j, k)->(i, j, k)>>
 // CHECK: MemRefType offset: 0 strides: ?, 5, 1
-  %14 = alloc(%0) : memref<?x4x5xf32, affine_map<(i, j, k)->(i, j, k)>>
+  %14 = memref.alloc(%0) : memref<?x4x5xf32, affine_map<(i, j, k)->(i, j, k)>>
 // CHECK: MemRefType offset: 0 strides: 20, 5, 1
-  %15 = alloc(%0, %0) : memref<?x4x?xf32, affine_map<(i, j, k)->(i, j, k)>>
+  %15 = memref.alloc(%0, %0) : memref<?x4x?xf32, affine_map<(i, j, k)->(i, j, k)>>
 // CHECK: MemRefType offset: 0 strides: ?, ?, 1
-  %16 = alloc(%0, %0, %0) : memref<?x?x?xf32, affine_map<(i, j, k)->(i, j, k)>>
+  %16 = memref.alloc(%0, %0, %0) : memref<?x?x?xf32, affine_map<(i, j, k)->(i, j, k)>>
 // CHECK: MemRefType offset: 0 strides: ?, ?, 1
 
-  %21 = alloc()[%0] : memref<3x4x5xf32, affine_map<(i, j, k)[M]->(32 * i + 16 * j + M * k + 1)>>
+  %21 = memref.alloc()[%0] : memref<3x4x5xf32, affine_map<(i, j, k)[M]->(32 * i + 16 * j + M * k + 1)>>
 // CHECK: MemRefType offset: 1 strides: 32, 16, ?
-  %22 = alloc()[%0] : memref<3x4x5xf32, affine_map<(i, j, k)[M]->(32 * i + M * j + 16 * k + 3)>>
+  %22 = memref.alloc()[%0] : memref<3x4x5xf32, affine_map<(i, j, k)[M]->(32 * i + M * j + 16 * k + 3)>>
 // CHECK: MemRefType offset: 3 strides: 32, ?, 16
-  %b22 = alloc(%0)[%0, %0] : memref<3x4x?xf32, offset: 0, strides: [?, ?, 1]>
+  %b22 = memref.alloc(%0)[%0, %0] : memref<3x4x?xf32, offset: 0, strides: [?, ?, 1]>
 // CHECK: MemRefType offset: 0 strides: ?, ?, 1
-  %23 = alloc(%0)[%0] : memref<3x?x5xf32, affine_map<(i, j, k)[M]->(M * i + 32 * j + 16 * k + 7)>>
+  %23 = memref.alloc(%0)[%0] : memref<3x?x5xf32, affine_map<(i, j, k)[M]->(M * i + 32 * j + 16 * k + 7)>>
 // CHECK: MemRefType offset: 7 strides: ?, 32, 16
-  %b23 = alloc(%0)[%0] : memref<3x?x5xf32, offset: 0, strides: [?, 5, 1]>
+  %b23 = memref.alloc(%0)[%0] : memref<3x?x5xf32, offset: 0, strides: [?, 5, 1]>
 // CHECK: MemRefType offset: 0 strides: ?, 5, 1
-  %24 = alloc(%0)[%0] : memref<3x?x5xf32, affine_map<(i, j, k)[M]->(M * i + 32 * j + 16 * k + M)>>
+  %24 = memref.alloc(%0)[%0] : memref<3x?x5xf32, affine_map<(i, j, k)[M]->(M * i + 32 * j + 16 * k + M)>>
 // CHECK: MemRefType offset: ? strides: ?, 32, 16
-  %b24 = alloc(%0)[%0, %0] : memref<3x?x5xf32, offset: ?, strides: [?, 32, 16]>
+  %b24 = memref.alloc(%0)[%0, %0] : memref<3x?x5xf32, offset: ?, strides: [?, 32, 16]>
 // CHECK: MemRefType offset: ? strides: ?, 32, 16
-  %25 = alloc(%0, %0)[%0, %0] : memref<?x?x16xf32, affine_map<(i, j, k)[M, N]->(M * i + N * j + k + 1)>>
+  %25 = memref.alloc(%0, %0)[%0, %0] : memref<?x?x16xf32, affine_map<(i, j, k)[M, N]->(M * i + N * j + k + 1)>>
 // CHECK: MemRefType offset: 1 strides: ?, ?, 1
-  %b25 = alloc(%0, %0)[%0, %0] : memref<?x?x16xf32, offset: 1, strides: [?, ?, 1]>
+  %b25 = memref.alloc(%0, %0)[%0, %0] : memref<?x?x16xf32, offset: 1, strides: [?, ?, 1]>
 // CHECK: MemRefType offset: 1 strides: ?, ?, 1
-  %26 = alloc(%0)[] : memref<?xf32, affine_map<(i)[M]->(i)>>
+  %26 = memref.alloc(%0)[] : memref<?xf32, affine_map<(i)[M]->(i)>>
 // CHECK: MemRefType offset: 0 strides: 1
-  %27 = alloc()[%0] : memref<5xf32, affine_map<(i)[M]->(M)>>
+  %27 = memref.alloc()[%0] : memref<5xf32, affine_map<(i)[M]->(M)>>
 // CHECK: MemRefType memref<5xf32, affine_map<(d0)[s0] -> (s0)>> cannot be converted to strided form
-  %28 = alloc()[%0] : memref<5xf32, affine_map<(i)[M]->(123)>>
+  %28 = memref.alloc()[%0] : memref<5xf32, affine_map<(i)[M]->(123)>>
 // CHECK: MemRefType memref<5xf32, affine_map<(d0)[s0] -> (123)>> cannot be converted to strided form
-  %29 = alloc()[%0] : memref<f32, affine_map<()[M]->(M)>>
+  %29 = memref.alloc()[%0] : memref<f32, affine_map<()[M]->(M)>>
 // CHECK: MemRefType offset: ? strides:
-  %30 = alloc()[%0] : memref<f32, affine_map<()[M]->(123)>>
+  %30 = memref.alloc()[%0] : memref<f32, affine_map<()[M]->(123)>>
 // CHECK: MemRefType offset: 123 strides:
 
-  %100 = alloc(%0, %0)[%0, %0] : memref<?x?x16xf32, affine_map<(i, j, k)[M, N]->(i + j, j, k)>, affine_map<(i, j, k)[M, N]->(M * i + N * j + k + 1)>>
+  %100 = memref.alloc(%0, %0)[%0, %0] : memref<?x?x16xf32, affine_map<(i, j, k)[M, N]->(i + j, j, k)>, affine_map<(i, j, k)[M, N]->(M * i + N * j + k + 1)>>
 // CHECK: MemRefType memref<?x?x16xf32, affine_map<(d0, d1, d2)[s0, s1] -> (d0 + d1, d1, d2)>, affine_map<(d0, d1, d2)[s0, s1] -> (d0 * s0 + d1 * s1 + d2 + 1)>> cannot be converted to strided form
-  %101 = alloc() : memref<3x4x5xf32, affine_map<(i, j, k)->(i floordiv 4 + j + k)>>
+  %101 = memref.alloc() : memref<3x4x5xf32, affine_map<(i, j, k)->(i floordiv 4 + j + k)>>
 // CHECK: MemRefType memref<3x4x5xf32, affine_map<(d0, d1, d2) -> (d0 floordiv 4 + d1 + d2)>> cannot be converted to strided form
-  %102 = alloc() : memref<3x4x5xf32, affine_map<(i, j, k)->(i ceildiv 4 + j + k)>>
+  %102 = memref.alloc() : memref<3x4x5xf32, affine_map<(i, j, k)->(i ceildiv 4 + j + k)>>
 // CHECK: MemRefType memref<3x4x5xf32, affine_map<(d0, d1, d2) -> (d0 ceildiv 4 + d1 + d2)>> cannot be converted to strided form
-  %103 = alloc() : memref<3x4x5xf32, affine_map<(i, j, k)->(i mod 4 + j + k)>>
+  %103 = memref.alloc() : memref<3x4x5xf32, affine_map<(i, j, k)->(i mod 4 + j + k)>>
 // CHECK: MemRefType memref<3x4x5xf32, affine_map<(d0, d1, d2) -> (d0 mod 4 + d1 + d2)>> cannot be converted to strided form
 
-  %200 = alloc()[%0, %0, %0] : memref<3x4x5xf32, affine_map<(i, j, k)[M, N, K]->(M * i + N * i + N * j + K * k - (M + N - 20)* i)>>
+  %200 = memref.alloc()[%0, %0, %0] : memref<3x4x5xf32, affine_map<(i, j, k)[M, N, K]->(M * i + N * i + N * j + K * k - (M + N - 20)* i)>>
   // CHECK: MemRefType offset: 0 strides: 20, ?, ?
-  %201 = alloc()[%0, %0, %0] : memref<3x4x5xf32, affine_map<(i, j, k)[M, N, K]->(M * i + N * i + N * K * j + K * K * k - (M + N - 20) * (i + 1))>>
+  %201 = memref.alloc()[%0, %0, %0] : memref<3x4x5xf32, affine_map<(i, j, k)[M, N, K]->(M * i + N * i + N * K * j + K * K * k - (M + N - 20) * (i + 1))>>
   // CHECK: MemRefType offset: ? strides: 20, ?, ?
-  %202 = alloc()[%0, %0, %0] : memref<3x4x5xf32, affine_map<(i, j, k)[M, N, K]->(M * (i + 1) + j + k - M)>>
+  %202 = memref.alloc()[%0, %0, %0] : memref<3x4x5xf32, affine_map<(i, j, k)[M, N, K]->(M * (i + 1) + j + k - M)>>
   // CHECK: MemRefType offset: 0 strides: ?, 1, 1
-  %203 = alloc()[%0, %0, %0] : memref<3x4x5xf32, affine_map<(i, j, k)[M, N, K]->(M + M * (i + N * (j + K * k)))>>
+  %203 = memref.alloc()[%0, %0, %0] : memref<3x4x5xf32, affine_map<(i, j, k)[M, N, K]->(M + M * (i + N * (j + K * k)))>>
   // CHECK: MemRefType offset: ? strides: ?, ?, ?
 
   return
diff --git a/mlir/test/Dialect/Affine/ops.mlir b/mlir/test/Dialect/Affine/ops.mlir
--- a/mlir/test/Dialect/Affine/ops.mlir
+++ b/mlir/test/Dialect/Affine/ops.mlir
@@ -97,14 +97,14 @@
 func @valid_symbols(%arg0: index, %arg1: index, %arg2: index) {
   %c1 = constant 1 : index
   %c0 = constant 0 : index
-  %0 = alloc(%arg0, %arg1) : memref<?x?xf32>
+  %0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
   affine.for %arg3 = 0 to %arg2 step 768 {
-    %13 = dim %0, %c1 : memref<?x?xf32>
+    %13 = memref.dim %0, %c1 : memref<?x?xf32>
     affine.for %arg4 = 0 to %13 step 264 {
-      %18 = dim %0, %c0 : memref<?x?xf32>
-      %20 = std.subview %0[%c0, %c0][%18,%arg4][%c1,%c1] : memref<?x?xf32>
+      %18 = memref.dim %0, %c0 : memref<?x?xf32>
+      %20 = memref.subview %0[%c0, %c0][%18,%arg4][%c1,%c1] : memref<?x?xf32>
                           to memref<?x?xf32, offset : ?, strides : [?, ?]>
-      %24 = dim %20, %c0 : memref<?x?xf32, offset : ?, strides : [?, ?]>
+      %24 = memref.dim %20, %c0 : memref<?x?xf32, offset : ?, strides : [?, ?]>
       affine.for %arg5 = 0 to %24 step 768 {
         "foo"() : () -> ()
       }
diff --git a/mlir/test/Dialect/Affine/parallelize.mlir b/mlir/test/Dialect/Affine/parallelize.mlir
--- a/mlir/test/Dialect/Affine/parallelize.mlir
+++ b/mlir/test/Dialect/Affine/parallelize.mlir
@@ -4,8 +4,8 @@
 // CHECK-LABEL:    func @reduce_window_max() {
 func @reduce_window_max() {
   %cst = constant 0.000000e+00 : f32
-  %0 = alloc() : memref<1x8x8x64xf32>
-  %1 = alloc() : memref<1x18x18x64xf32>
+  %0 = memref.alloc() : memref<1x8x8x64xf32>
+  %1 = memref.alloc() : memref<1x18x18x64xf32>
   affine.for %arg0 = 0 to 1 {
     affine.for %arg1 = 0 to 8 {
       affine.for %arg2 = 0 to 8 {
@@ -40,8 +40,8 @@
 }
 
 // CHECK:        %[[cst:.*]] = constant 0.000000e+00 : f32
-// CHECK:        %[[v0:.*]] = alloc() : memref<1x8x8x64xf32>
-// CHECK:        %[[v1:.*]] = alloc() : memref<1x18x18x64xf32>
+// CHECK:        %[[v0:.*]] = memref.alloc() : memref<1x8x8x64xf32>
+// CHECK:        %[[v1:.*]] = memref.alloc() : memref<1x18x18x64xf32>
 // CHECK:        affine.parallel (%[[arg0:.*]]) = (0) to (1) {
 // CHECK:          affine.parallel (%[[arg1:.*]]) = (0) to (8) {
 // CHECK:            affine.parallel (%[[arg2:.*]]) = (0) to (8) {
@@ -75,9 +75,9 @@
 // CHECK:      }
 
 func @loop_nest_3d_outer_two_parallel(%N : index) {
-  %0 = alloc() : memref<1024 x 1024 x vector<64xf32>>
-  %1 = alloc() : memref<1024 x 1024 x vector<64xf32>>
-  %2 = alloc() : memref<1024 x 1024 x vector<64xf32>>
+  %0 = memref.alloc() : memref<1024 x 1024 x vector<64xf32>>
+  %1 = memref.alloc() : memref<1024 x 1024 x vector<64xf32>>
+  %2 = memref.alloc() : memref<1024 x 1024 x vector<64xf32>>
   affine.for %i = 0 to %N {
     affine.for %j = 0 to %N {
       %7 = affine.load %2[%i, %j] : memref<1024x1024xvector<64xf32>>
@@ -108,10 +108,10 @@
 
 // CHECK-LABEL: non_affine_load
 func @non_affine_load() {
-  %0 = alloc() : memref<100 x f32>
+  %0 = memref.alloc() : memref<100 x f32>
   affine.for %i = 0 to 100 {
 // CHECK:  affine.for %{{.*}} = 0 to 100 {
-    load %0[%i] : memref<100 x f32>
+    memref.load %0[%i] : memref<100 x f32>
   }
   return
 }
diff --git a/mlir/test/Dialect/Affine/slicing-utils.mlir b/mlir/test/Dialect/Affine/slicing-utils.mlir
--- a/mlir/test/Dialect/Affine/slicing-utils.mlir
+++ b/mlir/test/Dialect/Affine/slicing-utils.mlir
@@ -17,7 +17,7 @@
 // FWDBWD-LABEL: slicing_test
 func @slicing_test() {
   // Fake 0 to align on 1 and match ASCII art.
-  %0 = alloc() : memref<1xi32>
+  %0 = memref.alloc() : memref<1xi32>
 
   // FWD: matched: %[[v1:.*]] {{.*}} forward static slice:
   // FWD-NEXT: %[[v5:.*]] {{.*}} -> i5
diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@@ -265,9 +265,9 @@
 // count threshold set to 2.
 // SHORT-LABEL: func @loop_nest_seq_long() -> i32 {
 func @loop_nest_seq_long() -> i32 {
-  %A = alloc() : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
-  %B = alloc() : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
-  %C = alloc() : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
+  %A = memref.alloc() : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
+  %B = memref.alloc() : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
+  %C = memref.alloc() : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
 
   %zero = constant 0 : i32
   %one = constant 1 : i32
@@ -279,9 +279,9 @@
   affine.for %n0 = 0 to 512 {
     // CHECK: affine.for %arg1 = 0 to 8
     affine.for %n1 = 0 to 8 {
-      store %one,  %A[%n0, %n1] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
-      store %two,  %B[%n0, %n1] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
-      store %zero, %C[%n0, %n1] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
+      memref.store %one,  %A[%n0, %n1] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
+      memref.store %two,  %B[%n0, %n1] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
+      memref.store %zero, %C[%n0, %n1] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
     }
   }
 
@@ -292,27 +292,27 @@
         // CHECK-NOT: affine.for
         // CHECK: %{{[0-9]+}} = affine.apply
         %b2 = "affine.apply" (%y, %arg2) {map = affine_map<(d0, d1) -> (16*d0 + d1)>} : (index, index) -> index
-        %z = load %B[%x, %b2] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
+        %z = memref.load %B[%x, %b2] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
         "op1"(%z) : (i32) -> ()
       }
       affine.for %j1 = 0 to 8 {
         affine.for %j2 = 0 to 8 {
           %a2 = "affine.apply" (%y, %j2) {map = affine_map<(d0, d1) -> (16*d0 + d1)>} : (index, index) -> index
-          %v203 = load %A[%j1, %a2] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
+          %v203 = memref.load %A[%j1, %a2] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
           "op2"(%v203) : (i32) -> ()
         }
         affine.for %k2 = 0 to 8 {
           %s0 = "op3"() : () -> i32
           %c2 = "affine.apply" (%x, %k2) {map = affine_map<(d0, d1) -> (16*d0 + d1)>} : (index, index) -> index
-          %s1 =  load %C[%j1, %c2] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
+          %s1 =  memref.load %C[%j1, %c2] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
           %s2 = "addi32"(%s0, %s1) : (i32, i32) -> i32
-          store %s2, %C[%j1, %c2] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
+          memref.store %s2, %C[%j1, %c2] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
         }
       }
       "op4"() : () -> ()
     }
   }
-  %ret = load %C[%zero_idx, %zero_idx] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
+  %ret = memref.load %C[%zero_idx, %zero_idx] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2>
   return %ret : i32
 }
 
diff --git a/mlir/test/Dialect/Async/async-parallel-for.mlir b/mlir/test/Dialect/Async/async-parallel-for.mlir
--- a/mlir/test/Dialect/Async/async-parallel-for.mlir
+++ b/mlir/test/Dialect/Async/async-parallel-for.mlir
@@ -6,14 +6,14 @@
   // CHECK: scf.for
   // CHECK:   %[[TOKEN:.*]] = async.execute {
   // CHECK:     scf.for
-  // CHECK:       store
+  // CHECK:       memref.store
   // CHECK:     async.yield
   // CHECK:   }
   // CHECK:   async.add_to_group %[[TOKEN]], %[[GROUP]]
   // CHECK: async.await_all %[[GROUP]]
   scf.parallel (%i) = (%arg0) to (%arg1) step (%arg2) {
     %one = constant 1.0 : f32
-    store %one, %arg3[%i] : memref<?xf32>
+    memref.store %one, %arg3[%i] : memref<?xf32>
   }
 
   return
@@ -29,7 +29,7 @@
   // CHECK:     %[[TOKEN:.*]] = async.execute {
   // CHECK:       scf.for
   // CHECK:         scf.for
-  // CHECK:           store
+  // CHECK:           memref.store
   // CHECK:       async.yield
   // CHECK:     }
   // CHECK:     async.add_to_group %[[TOKEN]], %[[GROUP]]
@@ -37,7 +37,7 @@
   scf.parallel (%i0, %i1) = (%arg0, %arg3) to (%arg1, %arg4)
                             step (%arg2, %arg5) {
     %one = constant 1.0 : f32
-    store %one, %arg6[%i0, %i1] : memref<?x?xf32>
+    memref.store %one, %arg6[%i0, %i1] : memref<?x?xf32>
   }
 
   return
diff --git a/mlir/test/Dialect/Async/async-to-async-runtime.mlir b/mlir/test/Dialect/Async/async-to-async-runtime.mlir
--- a/mlir/test/Dialect/Async/async-to-async-runtime.mlir
+++ b/mlir/test/Dialect/Async/async-to-async-runtime.mlir
@@ -4,7 +4,7 @@
 func @execute_no_async_args(%arg0: f32, %arg1: memref<1xf32>) {
   %token = async.execute {
     %c0 = constant 0 : index
-    store %arg0, %arg1[%c0] : memref<1xf32>
+    memref.store %arg0, %arg1[%c0] : memref<1xf32>
     async.yield
   }
   async.await %token : !async.token
@@ -28,7 +28,7 @@
 
 // Resume coroutine after suspension.
 // CHECK: ^[[RESUME]]:
-// CHECK:   store
+// CHECK:   memref.store
 // CHECK:   async.runtime.set_available %[[TOKEN]]
 
 // Delete coroutine.
@@ -50,12 +50,12 @@
 
     %token1 = async.execute {
       %c1 = constant 1: index
-      store %arg0, %arg2[%c0] : memref<1xf32>
+      memref.store %arg0, %arg2[%c0] : memref<1xf32>
       async.yield
     }
     async.await %token1 : !async.token
 
-    store %arg1, %arg2[%c0] : memref<1xf32>
+    memref.store %arg1, %arg2[%c0] : memref<1xf32>
     async.yield
   }
   // CHECK: async.runtime.await %[[TOKEN]]
@@ -77,7 +77,7 @@
 // CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME:.*]], ^[[CLEANUP:.*]]
 
 // CHECK: ^[[RESUME]]:
-// CHECK:   store
+// CHECK:   memref.store
 // CHECK:   async.runtime.set_available %[[TOKEN]]
 
 // Function outlined from the outer async.execute operation.
@@ -103,7 +103,7 @@
 
 // Set token available after second resumption.
 // CHECK: ^[[RESUME_1]]:
-// CHECK:   store
+// CHECK:   memref.store
 // CHECK:   async.runtime.set_available %[[TOKEN]]
 
 // CHECK: ^[[CLEANUP]]:
@@ -116,13 +116,13 @@
   // CHECK: %[[TOKEN:.*]] = call @async_execute_fn
   %token = async.execute {
     %c0 = constant 0 : index
-    store %arg0, %arg1[%c0] : memref<1xf32>
+    memref.store %arg0, %arg1[%c0] : memref<1xf32>
     async.yield
   }
   // CHECK: call @async_execute_fn_0(%[[TOKEN]], %arg0, %arg1)
   %token_0 = async.execute [%token] {
     %c0 = constant 0 : index
-    store %arg0, %arg1[%c0] : memref<1xf32>
+    memref.store %arg0, %arg1[%c0] : memref<1xf32>
     async.yield
   }
   return
@@ -157,7 +157,7 @@
 
 // Emplace result token after second resumption.
 // CHECK: ^[[RESUME_1]]:
-// CHECK:   store
+// CHECK:   memref.store
 // CHECK:   async.runtime.set_available %[[TOKEN]]
 
 // CHECK: ^[[CLEANUP]]:
diff --git a/mlir/test/Dialect/GPU/all-reduce-max.mlir b/mlir/test/Dialect/GPU/all-reduce-max.mlir
--- a/mlir/test/Dialect/GPU/all-reduce-max.mlir
+++ b/mlir/test/Dialect/GPU/all-reduce-max.mlir
@@ -121,7 +121,7 @@
     // CHECK:   cond_br [[VAL_84]], ^bb22, ^bb41
     // CHECK: ^bb22:
     // CHECK:   [[VAL_85:%.*]] = index_cast [[VAL_27]] : i32 to index
-    // CHECK:   [[VAL_86:%.*]] = load [[VAL_1]]{{\[}}[[VAL_85]]] : memref<32xf32, 3>
+    // CHECK:   [[VAL_86:%.*]] = memref.load [[VAL_1]]{{\[}}[[VAL_85]]] : memref<32xf32, 3>
     // CHECK:   [[VAL_87:%.*]] = cmpi slt, [[VAL_83]], [[VAL_5]] : i32
     // CHECK:   cond_br [[VAL_87]], ^bb23, ^bb39
     // CHECK: ^bb23:
diff --git a/mlir/test/Dialect/GPU/all-reduce.mlir b/mlir/test/Dialect/GPU/all-reduce.mlir
--- a/mlir/test/Dialect/GPU/all-reduce.mlir
+++ b/mlir/test/Dialect/GPU/all-reduce.mlir
@@ -111,7 +111,7 @@
     // CHECK:   cond_br [[VAL_74]], ^bb22, ^bb41
     // CHECK: ^bb22:
     // CHECK:   [[VAL_75:%.*]] = index_cast [[VAL_27]] : i32 to index
-    // CHECK:   [[VAL_76:%.*]] = load [[VAL_1]]{{\[}}[[VAL_75]]] : memref<32xf32, 3>
+    // CHECK:   [[VAL_76:%.*]] = memref.load [[VAL_1]]{{\[}}[[VAL_75]]] : memref<32xf32, 3>
     // CHECK:   [[VAL_77:%.*]] = cmpi slt, [[VAL_73]], [[VAL_5]] : i32
     // CHECK:   cond_br [[VAL_77]], ^bb23, ^bb39
     // CHECK: ^bb23:
diff --git a/mlir/test/Dialect/GPU/multiple-all-reduce.mlir b/mlir/test/Dialect/GPU/multiple-all-reduce.mlir
--- a/mlir/test/Dialect/GPU/multiple-all-reduce.mlir
+++ b/mlir/test/Dialect/GPU/multiple-all-reduce.mlir
@@ -1,19 +1,19 @@
 // RUN: mlir-opt --gpu-kernel-outlining --convert-gpu-to-nvvm %s | FileCheck %s
 
 func @main() {
-  %data = alloc() : memref<2x6xf32>
-  %sum = alloc() : memref<2xf32>
-  %mul = alloc() : memref<2xf32>
+  %data = memref.alloc() : memref<2x6xf32>
+  %sum = memref.alloc() : memref<2xf32>
+  %mul = memref.alloc() : memref<2xf32>
   %c1 = constant 1 : index
 
   // ADD + MUL
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
              threads(%tx, %ty, %tz) in (%block_x = %c1, %block_y = %c1, %block_z = %c1) {
-    %val = load %data[%bx, %tx] : memref<2x6xf32>
+    %val = memref.load %data[%bx, %tx] : memref<2x6xf32>
     %reduced0 = "gpu.all_reduce"(%val) ({}) { op = "add" } : (f32) -> (f32)
-    store %reduced0, %sum[%bx] : memref<2xf32>
+    memref.store %reduced0, %sum[%bx] : memref<2xf32>
     %reduced1 = "gpu.all_reduce"(%val) ({}) { op = "mul" } : (f32) -> (f32)
-    store %reduced1, %mul[%bx] : memref<2xf32>
+    memref.store %reduced1, %mul[%bx] : memref<2xf32>
     gpu.terminator
   }
 
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -59,7 +59,7 @@
       "gpu.barrier"() : () -> ()
 
       "some_op"(%bIdX, %tIdX) : (index, index) -> ()
-      %42 = load %arg1[%bIdX] : memref<?xf32, 1>
+      %42 = memref.load %arg1[%bIdX] : memref<?xf32, 1>
       gpu.return
     }
 
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -29,7 +29,7 @@
                                         %block_z = %bDimZ) {
     "use"(%0): (f32) -> ()
     "some_op"(%bx, %block_x) : (index, index) -> ()
-    %42 = load %1[%tx] : memref<?xf32, 1>
+    %42 = memref.load %1[%tx] : memref<?xf32, 1>
     gpu.terminator
   }
   return
@@ -55,7 +55,7 @@
 // CHECK-NEXT: ^[[BLOCK]]:
 // CHECK-NEXT: "use"(%[[KERNEL_ARG0]]) : (f32) -> ()
 // CHECK-NEXT: "some_op"(%[[BID]], %[[BDIM]]) : (index, index) -> ()
-// CHECK-NEXT: = load %[[KERNEL_ARG1]][%[[TID]]] : memref<?xf32, 1>
+// CHECK-NEXT: = memref.load %[[KERNEL_ARG1]][%[[TID]]] : memref<?xf32, 1>
 
 // -----
 
@@ -118,7 +118,7 @@
   %cst = constant 8 : index
   %cst2 = constant 2 : index
   %c0 = constant 0 : index
-  %cst3 = dim %arg0, %c0 : memref<?xf32>
+  %cst3 = memref.dim %arg0, %c0 : memref<?xf32>
   // CHECK: gpu.launch_func @extra_constants_kernel::@extra_constants_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]]) args(%[[ARG0]] : memref<?xf32>)
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
                                        %grid_z = %cst)
@@ -134,7 +134,7 @@
 // CHECK-SAME: %[[KARG0:.*]]: memref<?xf32>
 // CHECK: constant 2
 // CHECK: constant 0
-// CHECK: dim %[[KARG0]]
+// CHECK: memref.dim %[[KARG0]]
 
 // -----
 
@@ -145,8 +145,8 @@
   %cst = constant 8 : index
   %cst2 = constant 2 : index
   %c0 = constant 0 : index
-  // CHECK: dim %[[ARG1]]
-  %cst3 = dim %arg1, %c0 : memref<?xf32>
+  // CHECK: memref.dim %[[ARG1]]
+  %cst3 = memref.dim %arg1, %c0 : memref<?xf32>
   // CHECK: gpu.launch_func @extra_constants_noarg_kernel::@extra_constants_noarg_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]]) args(%[[ARG0]] : memref<?xf32>, {{.*}} : index)
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
                                        %grid_z = %cst)
@@ -192,10 +192,10 @@
 func @multiple_uses2(%arg0 : memref<*xf32>) {
   %c1 = constant 1 : index
   %c2 = constant 2 : index
-  %d = dim %arg0, %c2 : memref<*xf32>
+  %d = memref.dim %arg0, %c2 : memref<*xf32>
   // CHECK: gpu.func {{.*}} {
   // CHECK:   %[[C2:.*]] = constant 2 : index
-  // CHECK:   %[[D:.*]] = dim %[[ARG:.*]], %[[C2]]
+  // CHECK:   %[[D:.*]] = memref.dim %[[ARG:.*]], %[[C2]]
   // CHECK:   "use1"(%[[D]])
   // CHECK:   "use2"(%[[C2]], %[[C2]])
   // CHECK:   "use3"(%[[ARG]])
diff --git a/mlir/test/Dialect/GPU/promotion.mlir b/mlir/test/Dialect/GPU/promotion.mlir
--- a/mlir/test/Dialect/GPU/promotion.mlir
+++ b/mlir/test/Dialect/GPU/promotion.mlir
@@ -26,7 +26,7 @@
     // CHECK:     scf.for %[[i2:.*]] =
 
     // Verify that the copy is emitted and uses only the last two loops.
-    // CHECK:       %[[v:.*]] = load %[[arg]][%[[i1]], %[[i2]]]
+    // CHECK:       %[[v:.*]] = memref.load %[[arg]][%[[i1]], %[[i2]]]
     // CHECK:       store %[[v]], %[[promoted]][%[[i1]], %[[i2]]]
 
     // Verify that the use has been rewritten.
@@ -42,7 +42,7 @@
     // CHECK:     scf.for %[[i2:.*]] =
 
     // Verify that the copy is emitted and uses only the last two loops.
-    // CHECK:       %[[v:.*]] = load %[[promoted]][%[[i1]], %[[i2]]]
+    // CHECK:       %[[v:.*]] = memref.load %[[promoted]][%[[i1]], %[[i2]]]
     // CHECK:       store %[[v]], %[[arg]][%[[i1]], %[[i2]]]
     gpu.return
   }
@@ -80,7 +80,7 @@
     // CHECK:         scf.for %[[i4:.*]] =
 
     // Verify that the copy is emitted.
-    // CHECK:           %[[v:.*]] = load %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
+    // CHECK:           %[[v:.*]] = memref.load %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
     // CHECK:           store %[[v]], %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
 
     // Verify that the use has been rewritten.
@@ -95,7 +95,7 @@
     // CHECK:         scf.for %[[i4:.*]] =
 
     // Verify that the copy is emitted.
-    // CHECK:           %[[v:.*]] = load %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
+    // CHECK:           %[[v:.*]] = memref.load %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
     // CHECK:           store %[[v]], %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
     gpu.return
   }
diff --git a/mlir/test/Dialect/Linalg/affine.mlir b/mlir/test/Dialect/Linalg/affine.mlir
--- a/mlir/test/Dialect/Linalg/affine.mlir
+++ b/mlir/test/Dialect/Linalg/affine.mlir
@@ -12,9 +12,9 @@
 func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %A = view %arg0[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32>
-  %B = view %arg0[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>
-  %C = view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
+  %A = memref.view %arg0[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32>
+  %B = memref.view %arg0[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>
+  %C = memref.view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
   linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
                outs(%C: memref<?x?xf32>)
   return
@@ -24,9 +24,9 @@
 // CHECK-SAME: [[M:arg[0-9]+]]: index
 // CHECK-SAME: [[N:arg[0-9]+]]: index
 // CHECK-SAME: [[K:arg[0-9]+]]: index
-//       CHECK: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECK: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECK: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
+//       CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
+//       CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
+//       CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
 //       CHECK: affine.for %{{.*}}  = 0 to %{{.*}} {
 //       CHECK:   affine.for %{{.*}} = 0 to %{{.*}} {
 //       CHECK:     affine.for %{{.*}} = 0 to %{{.*}} {
@@ -44,11 +44,11 @@
 
 // CHECK-LABEL: func @conv_view3(
 //  CHECK: %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>) {
-//       CHECK:   %[[Z0:.*]] = dim %arg0, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECK:   %[[Q:.*]] = dim %arg0, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECK:   %[[K:.*]] = dim %arg0, %c2 : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECK:   %[[B:.*]] = dim %arg1, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECK:   %[[X0:.*]] = dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECK:   %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECK:   %[[Q:.*]] = memref.dim %arg0, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECK:   %[[K:.*]] = memref.dim %arg0, %c2 : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECK:   %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECK:   %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
 //       CHECK:   affine.for %{{.*}} = 0 to %[[B]] {
 //       CHECK:     affine.for %{{.*}} = 0 to %[[X0]] {
 //       CHECK:       affine.for %{{.*}} = 0 to %[[K]] {
@@ -71,13 +71,13 @@
 // CHECK-LABEL: func @conv_padding
 //       CHECK: %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>) {
 //       CHECK:   %[[ZERO:.*]] = constant 0.000000e+00 : f32
-//       CHECK:   %[[Z0:.*]] = dim %arg0, %c0 : memref<?x?x?x?xf32>
-//       CHECK:   %[[Z1:.*]] = dim %arg0, %c1 : memref<?x?x?x?xf32>
-//       CHECK:   %[[Q:.*]] =  dim %arg0, %c2 : memref<?x?x?x?xf32>
-//       CHECK:   %[[K:.*]] =  dim %arg0, %c3 : memref<?x?x?x?xf32>
-//       CHECK:   %[[B:.*]] =  dim %arg1, %c0 : memref<?x?x?x?xf32>
-//       CHECK:   %[[X0:.*]] = dim %arg2, %c1 : memref<?x?x?x?xf32>
-//       CHECK:   %[[X1:.*]] = dim %arg2, %c2 : memref<?x?x?x?xf32>
+//       CHECK:   %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?x?xf32>
+//       CHECK:   %[[Z1:.*]] = memref.dim %arg0, %c1 : memref<?x?x?x?xf32>
+//       CHECK:   %[[Q:.*]] =  memref.dim %arg0, %c2 : memref<?x?x?x?xf32>
+//       CHECK:   %[[K:.*]] =  memref.dim %arg0, %c3 : memref<?x?x?x?xf32>
+//       CHECK:   %[[B:.*]] =  memref.dim %arg1, %c0 : memref<?x?x?x?xf32>
+//       CHECK:   %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32>
+//       CHECK:   %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32>
 //       CHECK:   affine.for %{{.*}} = 0 to %[[B]] {
 //       CHECK:     affine.for %{{.*}} = 0 to %[[X0]] {
 //       CHECK:       affine.for %{{.*}} = 0 to %[[X1]] {
@@ -90,8 +90,8 @@
 //       CHECK:                 %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]])
 //       CHECK:                 %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]])
 // Padded conv involves an affine.max in the memory access and this is not
-// allowed by affine.load. Use std.load in such cases.
-//       CHECK:                 %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
+// allowed by affine.load. Use memref.load in such cases.
+//       CHECK:                 %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
 //       CHECK:                 %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32
 //       CHECK:                 %{{.*}} = affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
 //       CHECK:                 %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
@@ -111,10 +111,10 @@
 //  CHECK-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
 //  CHECK-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
 //  CHECK-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
-//       CHECK: %[[B:.*]] = dim %[[mA]], %c0 : memref<?x?x?xf32>
-//       CHECK: %[[M:.*]] = dim %[[mA]], %c1 : memref<?x?x?xf32>
-//       CHECK: %[[K:.*]] = dim %[[mA]], %c2 : memref<?x?x?xf32>
-//       CHECK: %[[N:.*]] = dim %[[mB]], %c2 : memref<?x?x?xf32>
+//       CHECK: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref<?x?x?xf32>
+//       CHECK: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32>
+//       CHECK: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32>
+//       CHECK: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32>
 //       CHECK: affine.for %[[b:.*]] = 0 to %[[B]] {
 //       CHECK:   affine.for %[[m:.*]] = 0 to %[[M]] {
 //       CHECK:     affine.for %[[n:.*]] = 0 to %[[N]] {
diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir
--- a/mlir/test/Dialect/Linalg/bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/bufferize.mlir
@@ -3,7 +3,7 @@
 #map0 = affine_map<(d0) -> (d0)>
 
 // In-depth checking of a basic case, this is testing
-// - tensor_to_memref / tensor_load materializations are properly inserted
+// - memref.buffer_cast / memref.tensor_load materializations are properly inserted
 // - payload is correctly carried over
 // - affine maps are correctly carried over
 // Later tests will not check all these details.
@@ -11,8 +11,8 @@
 // CHECK: #map = affine_map<(d0) -> (d0)>
 // CHECK-LABEL:   func @basic(
 // CHECK-SAME:                %[[TENSOR:.*]]: tensor<4xf32>) -> tensor<4xf32> {
-// CHECK:           %[[MEMREF:.*]] = tensor_to_memref %[[TENSOR]] : memref<4xf32>
-// CHECK:           %[[RESULT_MEMREF:.*]] = alloc() : memref<4xf32>
+// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<4xf32>
+// CHECK:           %[[RESULT_MEMREF:.*]] = memref.alloc() : memref<4xf32>
 // CHECK:           linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]}
 // CHECK-SAME:      ins(%[[MEMREF]] : memref<4xf32>)
 // CHECK-SAME:      outs(%[[RESULT_MEMREF]] : memref<4xf32>) {
@@ -20,7 +20,7 @@
 // CHECK:             %[[DIM1:.*]] = math.exp %[[RESULT1]] : f32
 // CHECK:             linalg.yield %[[DIM1]] : f32
 // CHECK:           }
-// CHECK:           %[[RESULT:.*]] = tensor_load %[[RESULT_MEMREF]] : memref<4xf32>
+// CHECK:           %[[RESULT:.*]] = memref.tensor_load %[[RESULT_MEMREF]] : memref<4xf32>
 // CHECK:           return %[[RESULT]] : tensor<4xf32>
 func @basic(%arg0: tensor<4xf32>) -> tensor<4xf32> {
     %0 = linalg.generic {
@@ -45,8 +45,8 @@
 // CHECK: #map = affine_map<(d0) -> (d0)>
 // CHECK-LABEL: func @init_tensor(
 // CHECK-SAME:      %[[IN:.*]]: tensor<?xf32>, %[[SIZE:.*]]: index)
-// CHECK:         %[[OUT_BUF:.*]] = alloc(%[[SIZE]]) : memref<?xf32>
-// CHECK:         %[[MEMREF:.*]] = tensor_to_memref %[[IN]] : memref<?xf32>
+// CHECK:         %[[OUT_BUF:.*]] = memref.alloc(%[[SIZE]]) : memref<?xf32>
+// CHECK:         %[[MEMREF:.*]] = memref.buffer_cast %[[IN]] : memref<?xf32>
 // CHECK:         linalg.generic
 // CHECK-SAME:    ins(%[[MEMREF]] : memref<?xf32>)
 // CHECK-SAME:    outs(%[[OUT_BUF]] : memref<?xf32>) {
@@ -70,8 +70,8 @@
 #map0 = affine_map<(d0) -> (d0)>
 
 // CHECK-LABEL:   func @multiple_results
-// CHECK:           %[[RESULT0:.*]] = alloc() : memref<4xf32>
-// CHECK:           %[[RESULT1:.*]] = alloc() : memref<4xf32>
+// CHECK:           %[[RESULT0:.*]] = memref.alloc() : memref<4xf32>
+// CHECK:           %[[RESULT1:.*]] = memref.alloc() : memref<4xf32>
 // CHECK:           linalg.generic
 // CHECK-SAME:      ins(%{{.*}} : memref<4xf32>)
 // CHECK-SAME:      outs(%[[RESULT0]], %[[RESULT1]] : memref<4xf32>, memref<4xf32>)
@@ -94,8 +94,8 @@
 #map0 = affine_map<(d0) -> (d0)>
 
 // CHECK-LABEL:   func @multiple_results_indexed
-// CHECK:           %[[RESULT0:.*]] = alloc() : memref<4xi32>
-// CHECK:           %[[RESULT1:.*]] = alloc() : memref<4xi32>
+// CHECK:           %[[RESULT0:.*]] = memref.alloc() : memref<4xi32>
+// CHECK:           %[[RESULT1:.*]] = memref.alloc() : memref<4xi32>
 // CHECK:           linalg.indexed_generic
 // CHECK-SAME:      ins(%{{.*}} : memref<4xi32>)
 // CHECK-SAME:      outs(%[[RESULT0]], %[[RESULT1]] : memref<4xi32>, memref<4xi32>)
@@ -126,11 +126,11 @@
 // CHECK-SAME:                          %[[ARG:.*]]: tensor<?x?xf32>
 // CHECK:           %[[C0:.*]] = constant 0 : index
 // CHECK:           %[[C1:.*]] = constant 1 : index
-// CHECK:           %[[MEMREF_ARG:.*]] = tensor_to_memref %[[ARG]] : memref<?x?xf32>
-// CHECK:           %[[DIM0:.*]] = dim %[[ARG]], %[[C0]] : tensor<?x?xf32>
-// CHECK:           %[[DIM1:.*]] = dim %[[ARG]], %[[C1]] : tensor<?x?xf32>
-// CHECK:           %[[RESULT0:.*]] = alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
-// CHECK:           %[[RESULT1:.*]] = alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
+// CHECK:           %[[MEMREF_ARG:.*]] = memref.buffer_cast %[[ARG]] : memref<?x?xf32>
+// CHECK:           %[[DIM0:.*]] = memref.dim %[[ARG]], %[[C0]] : tensor<?x?xf32>
+// CHECK:           %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : tensor<?x?xf32>
+// CHECK:           %[[RESULT0:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
+// CHECK:           %[[RESULT1:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
 // CHECK:           linalg.generic
 // CHECK-SAME:      ins(%[[MEMREF_ARG]] : memref<?x?xf32>)
 // CHECK-SAME:      outs(%[[RESULT0]], %[[RESULT1]] : memref<?x?xf32>, memref<?x?xf32>)
@@ -165,9 +165,9 @@
 // CHECK-LABEL:   func @generic_with_init_tensor(
 // CHECK-SAME:                                   %[[ARG0_TENSOR:.*]]: tensor<2x3x4xvector<3x4xi4>>,
 // CHECK-SAME:                                   %[[ARG1_TENSOR:.*]]: tensor<3x2xf32>) -> tensor<3x2xf32> {
-// CHECK:           %[[ARG0_MEMREF:.*]] = tensor_to_memref %[[ARG0_TENSOR]] : memref<2x3x4xvector<3x4xi4>>
-// CHECK:           %[[ARG1_MEMREF:.*]] = tensor_to_memref %[[ARG1_TENSOR]] : memref<3x2xf32>
-// CHECK:           %[[INIT_BUFFER:.*]] = alloc() : memref<3x2xf32>
+// CHECK:           %[[ARG0_MEMREF:.*]] = memref.buffer_cast %[[ARG0_TENSOR]] : memref<2x3x4xvector<3x4xi4>>
+// CHECK:           %[[ARG1_MEMREF:.*]] = memref.buffer_cast %[[ARG1_TENSOR]] : memref<3x2xf32>
+// CHECK:           %[[INIT_BUFFER:.*]] = memref.alloc() : memref<3x2xf32>
 // CHECK:           linalg.copy(%[[ARG1_MEMREF]], %[[INIT_BUFFER]]) : memref<3x2xf32>, memref<3x2xf32>
 // CHECK:           linalg.generic
 // CHECK-SAME:      ins(%[[ARG0_MEMREF]] : memref<2x3x4xvector<3x4xi4>>)
@@ -198,20 +198,20 @@
   //      CHECK: %[[IDX:.*]] = call @make_index() : () -> index
   %i0 = call @make_index() : () -> index
 
-  //      CHECK: %[[M0:.*]] = tensor_to_memref %[[T]] : memref<?x?xf32>
-  // CHECK-NEXT: %[[A0:.*]] = alloc() : memref<2x3xf32>
-  // CHECK-NEXT: %[[SM0:.*]] = subview %[[M0]][0, 0] [2, 3] [1, 1]
+  //      CHECK: %[[M0:.*]] = memref.buffer_cast %[[T]] : memref<?x?xf32>
+  // CHECK-NEXT: %[[A0:.*]] = memref.alloc() : memref<2x3xf32>
+  // CHECK-NEXT: %[[SM0:.*]] = memref.subview %[[M0]][0, 0] [2, 3] [1, 1]
   // CHECK-SAME:   memref<?x?xf32> to memref<2x3xf32, #[[$MAP0]]>
   // CHECK-NEXT: linalg.copy(%[[SM0]], %[[A0]]) : memref<2x3xf32, #[[$MAP0]]>, memref<2x3xf32>
-  // CHECK-NEXT: %[[RT0:.*]] = tensor_load %[[A0]] : memref<2x3xf32>
+  // CHECK-NEXT: %[[RT0:.*]] = memref.tensor_load %[[A0]] : memref<2x3xf32>
   %st0 = subtensor %t[0, 0][2, 3][1, 1] : tensor<?x?xf32> to tensor<2x3xf32>
 
-  //      CHECK: %[[M1:.*]] = tensor_to_memref %[[T]] : memref<?x?xf32>
-  // CHECK-NEXT: %[[A1:.*]] = alloc(%[[IDX]]) : memref<2x?xf32>
-  // CHECK-NEXT: %[[SM1:.*]] = subview %[[M1]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2]
+  //      CHECK: %[[M1:.*]] = memref.buffer_cast %[[T]] : memref<?x?xf32>
+  // CHECK-NEXT: %[[A1:.*]] = memref.alloc(%[[IDX]]) : memref<2x?xf32>
+  // CHECK-NEXT: %[[SM1:.*]] = memref.subview %[[M1]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2]
   // CHECK-SAME:   memref<?x?xf32> to memref<2x?xf32, #[[$MAP1]]>
   // CHECK-NEXT: linalg.copy(%[[SM1]], %[[A1]]) : memref<2x?xf32, #[[$MAP1]]>, memref<2x?xf32>
-  // CHECK-NEXT: %[[RT1:.*]] = tensor_load %[[A1]] : memref<2x?xf32>
+  // CHECK-NEXT: %[[RT1:.*]] = memref.tensor_load %[[A1]] : memref<2x?xf32>
   %st1 = subtensor %t[0, %i0][2, %i0][1, 2] : tensor<?x?xf32> to tensor<2x?xf32>
 
   // CHECK-NEXT: return %[[RT0]], %[[RT1]]
@@ -239,26 +239,26 @@
   // CHECK: %[[IDX:.*]] = call @make_index() : () -> index
 
 
-  // CHECK-DAG: %[[M0:.*]] = tensor_to_memref %[[T]] : memref<?x?xf32>
-  // CHECK-DAG: %[[SM0:.*]] = tensor_to_memref %[[ST0]] : memref<2x3xf32>
-  // CHECK-NEXT: %[[DIM0:.*]] = dim %[[T]], %[[C0]] : tensor<?x?xf32>
-  // CHECK-NEXT: %[[DIM1:.*]] = dim %[[T]], %[[C1]] : tensor<?x?xf32>
-  // CHECK-NEXT: %[[M0_COPY:.*]] = alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
+  // CHECK-DAG: %[[M0:.*]] = memref.buffer_cast %[[T]] : memref<?x?xf32>
+  // CHECK-DAG: %[[SM0:.*]] = memref.buffer_cast %[[ST0]] : memref<2x3xf32>
+  // CHECK-NEXT: %[[DIM0:.*]] = memref.dim %[[T]], %[[C0]] : tensor<?x?xf32>
+  // CHECK-NEXT: %[[DIM1:.*]] = memref.dim %[[T]], %[[C1]] : tensor<?x?xf32>
+  // CHECK-NEXT: %[[M0_COPY:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
   // CHECK-NEXT: linalg.copy(%[[M0]], %[[M0_COPY]]) : memref<?x?xf32>, memref<?x?xf32>
-  // CHECK-NEXT: %[[SUBVIEW0:.*]] = subview %[[M0_COPY]][0, 0] [2, 3] [1, 1]
+  // CHECK-NEXT: %[[SUBVIEW0:.*]] = memref.subview %[[M0_COPY]][0, 0] [2, 3] [1, 1]
   // CHECK-SAME:   memref<?x?xf32> to memref<2x3xf32, #[[$MAP0]]>
   // CHECK-NEXT: linalg.copy(%[[SM0]], %[[SUBVIEW0]]) : memref<2x3xf32>, memref<2x3xf32, #[[$MAP0]]>
-  // CHECK-NEXT: %[[RT0:.*]] = tensor_load %[[M0_COPY]] : memref<?x?xf32>
+  // CHECK-NEXT: %[[RT0:.*]] = memref.tensor_load %[[M0_COPY]] : memref<?x?xf32>
   %t0 = subtensor_insert %st0 into %t[0, 0][2, 3][1, 1] : tensor<2x3xf32> into tensor<?x?xf32>
 
-  //  CHECK-DAG: %[[M1:.*]] = tensor_to_memref %[[T]] : memref<?x?xf32>
-  //  CHECK-DAG: %[[SM1:.*]] = tensor_to_memref %[[ST1]] : memref<2x?xf32>
-  // CHECK-NEXT: %[[M1_COPY:.*]] = alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
+  //  CHECK-DAG: %[[M1:.*]] = memref.buffer_cast %[[T]] : memref<?x?xf32>
+  //  CHECK-DAG: %[[SM1:.*]] = memref.buffer_cast %[[ST1]] : memref<2x?xf32>
+  // CHECK-NEXT: %[[M1_COPY:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
   // CHECK-NEXT: linalg.copy(%[[M1]], %[[M1_COPY]]) : memref<?x?xf32>, memref<?x?xf32>
-  // CHECK-NEXT: %[[SUBVIEW1:.*]] = subview %[[M1_COPY]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2]
+  // CHECK-NEXT: %[[SUBVIEW1:.*]] = memref.subview %[[M1_COPY]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2]
   // CHECK-SAME:   memref<?x?xf32> to memref<2x?xf32, #[[$MAP1]]>
   // CHECK-NEXT: linalg.copy(%[[SM1]], %[[SUBVIEW1]]) : memref<2x?xf32>, memref<2x?xf32, #[[$MAP1]]>
-  // CHECK-NEXT: %[[RT1:.*]] = tensor_load %[[M1_COPY]] : memref<?x?xf32>
+  // CHECK-NEXT: %[[RT1:.*]] = memref.tensor_load %[[M1_COPY]] : memref<?x?xf32>
   %t1 = subtensor_insert %st1 into %t[0, %i0][2, %i0][1, 2] : tensor<2x?xf32> into tensor<?x?xf32>
 
   //     CHECK: return %[[RT0]], %[[RT1]]
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -6,9 +6,9 @@
   %c1 = constant 1 : index
   %c8 = constant 8 : index
   %c16 = constant 16 : index
-  %1 = alloc (%b) : memref<?xi8>
-  %2 = view %1[%c0][] : memref<?xi8> to memref<16x16xf32>
-  %3 = memref_cast %2 : memref<16x16xf32> to memref<?x?xf32>
+  %1 = memref.alloc (%b) : memref<?xi8>
+  %2 = memref.view %1[%c0][] : memref<?xi8> to memref<16x16xf32>
+  %3 = memref.cast %2 : memref<16x16xf32> to memref<?x?xf32>
 
   // CHECK:  linalg.matmul ins({{.*}}memref<16x16xf32>, memref<16x16xf32>) outs({{.*}}memref<16x16xf32>)
   linalg.matmul ins(%3, %3: memref<?x?xf32>, memref<?x?xf32>)
@@ -339,11 +339,12 @@
   %t = linalg.matmul ins(%a, %b : tensor<?x?xf32>, memref<?x?xf32>)
                     outs(%c : tensor<?x?xf32>) -> tensor<?x?xf32>
 
-  // CHECK-NOT:   %{{.*}} = linalg.matmul
+  // CHECK:   linalg.matmul
   linalg.matmul ins(%a, %c : tensor<?x?xf32>, tensor<?x?xf32>)
                outs(%b : memref<?x?xf32>)
   return
 }
+
 // -----
 
 func @init_tensor_canonicalize() -> (tensor<4x5x?xf32>) {
@@ -363,8 +364,8 @@
   %c2 = constant 2 : index
   %c6 = constant 6 : index
   %0 = linalg.init_tensor [4, 5, %c6] : tensor<4x5x?xf32>
-  %1 = dim %0, %c2 : tensor<4x5x?xf32>
-  %2 = dim %0, %c0 : tensor<4x5x?xf32>
+  %1 = memref.dim %0, %c2 : tensor<4x5x?xf32>
+  %2 = memref.dim %0, %c0 : tensor<4x5x?xf32>
   return %1, %2 : index, index
 }
 //      CHECK: func @init_tensor_static_dim
@@ -377,7 +378,7 @@
 func @init_tensor_dynamic_dim(%arg0 : index) -> (index) {
   %c2 = constant 2 : index
   %0 = linalg.init_tensor [4, 5, %arg0] : tensor<4x5x?xf32>
-  %1 = dim %0, %c2 : tensor<4x5x?xf32>
+  %1 = memref.dim %0, %c2 : tensor<4x5x?xf32>
   return %1 : index
 }
 //      CHECK: func @init_tensor_dynamic_dim
@@ -390,8 +391,8 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %0 = linalg.init_tensor [%arg0, %arg1] : tensor<?x?xf32>
-  %1 = dim %0, %c0 : tensor<?x?xf32>
-  %2 = dim %0, %c1 : tensor<?x?xf32>
+  %1 = memref.dim %0, %c0 : tensor<?x?xf32>
+  %2 = memref.dim %0, %c1 : tensor<?x?xf32>
   return %1, %2 : index, index
 }
 //      CHECK: func @init_tensor_dynamic_dim2
@@ -417,7 +418,7 @@
       %2 = addf %1, %arg5 : f32
       linalg.yield %2 : f32
     } -> tensor<?x?xf32>
-  %3 = dim %0, %c0 : tensor<?x?xf32>
+  %3 = memref.dim %0, %c0 : tensor<?x?xf32>
   return %3 : index
 }
 //       CHECK: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1)>
@@ -427,8 +428,8 @@
 //  CHECK-SAME:   %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
 //   CHECK-DAG:   %[[C0:.+]] = constant 0 : index
 //   CHECK-DAG:   %[[C1:.+]] = constant 1 : index
-//   CHECK-DAG:   %[[T0:.+]] = dim %[[ARG0]], %[[C0]]
-//   CHECK-DAG:   %[[T1:.+]] = dim %[[ARG1]], %[[C1]]
+//   CHECK-DAG:   %[[T0:.+]] = memref.dim %[[ARG0]], %[[C0]]
+//   CHECK-DAG:   %[[T1:.+]] = memref.dim %[[ARG1]], %[[C1]]
 //       CHECK:   %[[T2:.+]] = affine.apply #[[MAP]]()[%[[T0]], %[[T1]]]
 //       CHECK:   return %[[T2]]
 
@@ -438,7 +439,7 @@
   (%arg0 : tensor<?xf32>, %arg1 : index) -> (index) {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %d0 = dim %arg0, %c0 : tensor<?xf32>
+  %d0 = memref.dim %arg0, %c0 : tensor<?xf32>
   %0 = linalg.init_tensor [%d0, %arg1] : tensor<?x?xf32>
   %1 = linalg.generic
     {indexing_maps = [affine_map<(d0, d1) -> (d0)>,
@@ -448,7 +449,7 @@
     ^bb0(%arg2: f32, %arg3: f32) :
       linalg.yield %arg2 : f32
     } -> tensor<?x?xf32>
-  %2 = dim %1, %c1 : tensor<?x?xf32>
+  %2 = memref.dim %1, %c1 : tensor<?x?xf32>
   return %2 : index
 }
 //      CHECK: func @remove_dim_result_uses_outs
@@ -464,8 +465,8 @@
   %c1 = constant 1 : index
   %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
     outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
-  %1 = dim %0, %c0 : tensor<?x?xf32>
-  %2 = dim %0, %c1 : tensor<?x?xf32>
+  %1 = memref.dim %0, %c0 : tensor<?x?xf32>
+  %2 = memref.dim %0, %c1 : tensor<?x?xf32>
   %3 = linalg.generic
     {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0)>,
                       affine_map<(d0, d1, d2) -> (d0, d2)>,
@@ -478,8 +479,8 @@
       %5 = addf %4, %arg5 : f32
       linalg.yield %5 : f32
     } -> tensor<?x?xf32>
-  %6 = dim %3, %c0 : tensor<?x?xf32>
-  %7 = dim %3, %c1 : tensor<?x?xf32>
+  %6 = memref.dim %3, %c0 : tensor<?x?xf32>
+  %7 = memref.dim %3, %c1 : tensor<?x?xf32>
   return %1, %2, %6, %7 : index, index, index, index
 }
 // CHECK-LABEL: func @remove_dim_result_uses_sequence
@@ -488,10 +489,10 @@
 //  CHECK-SAME:   %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
 //   CHECK-DAG:   %[[C0:.+]] = constant 0 : index
 //   CHECK-DAG:   %[[C1:.+]] = constant 1 : index
-//   CHECK-DAG:   %[[T0:.+]] = dim %[[ARG0]], %[[C0]]
-//   CHECK-DAG:   %[[T1:.+]] = dim %[[ARG1]], %[[C1]]
-//   CHECK-DAG:   %[[T2:.+]] = dim %[[ARG0]], %[[C1]]
-//   CHECK-DAG:   %[[T3:.+]] = dim %[[ARG1]], %[[C1]]
+//   CHECK-DAG:   %[[T0:.+]] = memref.dim %[[ARG0]], %[[C0]]
+//   CHECK-DAG:   %[[T1:.+]] = memref.dim %[[ARG1]], %[[C1]]
+//   CHECK-DAG:   %[[T2:.+]] = memref.dim %[[ARG0]], %[[C1]]
+//   CHECK-DAG:   %[[T3:.+]] = memref.dim %[[ARG1]], %[[C1]]
 //       CHECK:   return %[[T0]], %[[T1]], %[[T2]], %[[T3]]
 
 // -----
@@ -500,7 +501,7 @@
   (%arg0 : tensor<?xf32>, %arg1 : index) -> (index, index) {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %d0 = dim %arg0, %c0 : tensor<?xf32>
+  %d0 = memref.dim %arg0, %c0 : tensor<?xf32>
   %0 = linalg.init_tensor [%d0, %arg1] : tensor<?x?xf32>
   %1 = linalg.generic
     {indexing_maps = [affine_map<(d0, d1) -> (d0)>,
@@ -510,15 +511,15 @@
     ^bb0(%arg2: f32, %arg3 : f32):
       linalg.yield %arg2 : f32
     } -> tensor<?x?xf32>
-  %2 = dim %1, %c0 : tensor<?x?xf32>
-  %3 = dim %1, %c1 : tensor<?x?xf32>
+  %2 = memref.dim %1, %c0 : tensor<?x?xf32>
+  %3 = memref.dim %1, %c1 : tensor<?x?xf32>
   return %2, %3 : index, index
 }
 //       CHECK: func @keep_result_dim_uses_sequence2
 //  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?xf32>
 //  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: index
 //   CHECK-DAG:   %[[C0:.+]] = constant 0 : index
-//   CHECK-DAG:   %[[T0:.+]] = dim %[[ARG0]], %[[C0]]
+//   CHECK-DAG:   %[[T0:.+]] = memref.dim %[[ARG0]], %[[C0]]
 //       CHECK:   return %[[T0]], %[[ARG1]]
 
 // -----
@@ -537,16 +538,16 @@
   } -> tensor<?xf32>, tensor<?xf32>
 
   %c0 = constant 0 : index
-  %num_elem_0 = dim %0, %c0 : tensor<?xf32>
+  %num_elem_0 = memref.dim %0, %c0 : tensor<?xf32>
 
-  %num_elem_1 = dim %1, %c0 : tensor<?xf32>
+  %num_elem_1 = memref.dim %1, %c0 : tensor<?xf32>
   return %num_elem_0, %num_elem_1 : index, index
 }
 //      CHECK: func @init_tensor_dim_of_linalg_result(
 // CHECK-SAME:   %[[ARG_0:[a-zA-Z0-9_]+]]: tensor<?xf32>
 // CHECK-SAME:   %[[ARG_1:[a-zA-Z0-9_]+]]: tensor<?xf32>)
-//      CHECK:   %[[R0:.+]] = dim %[[ARG_0]]
-//      CHECK:   %[[R1:.+]] = dim %[[ARG_0]]
+//      CHECK:   %[[R0:.+]] = memref.dim %[[ARG_0]]
+//      CHECK:   %[[R1:.+]] = memref.dim %[[ARG_0]]
 //      CHECK:   return %[[R0]], %[[R1]]
 
 // -----
@@ -593,9 +594,9 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c2 = constant 2 : index
-  %0 = dim %arg0, %c0 : tensor<?x?x?xf32>
-  %1 = dim %arg0, %c1 : tensor<?x?x?xf32>
-  %2 = dim %arg0, %c2 : tensor<?x?x?xf32>
+  %0 = memref.dim %arg0, %c0 : tensor<?x?x?xf32>
+  %1 = memref.dim %arg0, %c1 : tensor<?x?x?xf32>
+  %2 = memref.dim %arg0, %c2 : tensor<?x?x?xf32>
   %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
   %4, %5 = linalg.generic {
     indexing_maps = [#map, #map, #map, #map],
@@ -619,8 +620,8 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %cst = constant 1.000000e+00 : f32
-  %0 = dim %arg0, %c0 : tensor<?x?xf32>
-  %1 = dim %arg0, %c1 : tensor<?x?xf32>
+  %0 = memref.dim %arg0, %c0 : tensor<?x?xf32>
+  %1 = memref.dim %arg0, %c1 : tensor<?x?xf32>
   %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
   br ^bb1(%cst : f32)
 
@@ -645,8 +646,8 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %cst = constant 1.000000e+00 : f32
-  %0 = dim %arg0, %c0 : tensor<?x?xf32>
-  %1 = dim %arg0, %c1 : tensor<?x?xf32>
+  %0 = memref.dim %arg0, %c0 : tensor<?x?xf32>
+  %1 = memref.dim %arg0, %c1 : tensor<?x?xf32>
   %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
   br ^bb1(%cst : f32)
 
@@ -729,9 +730,9 @@
      affine_map<(d0, d1, d2, d3, d4, d5) -> (d2)>,
      affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d4, d5)>] :
      tensor<6x5x?xf32> into tensor<2x3x5x4x?x7xf32>
-  %1 = dim %0, %c1 : tensor<2x3x5x4x?x7xf32>
-  %2 = dim %0, %c3 : tensor<2x3x5x4x?x7xf32>
-  %3 = dim %0, %c4 : tensor<2x3x5x4x?x7xf32>
+  %1 = memref.dim %0, %c1 : tensor<2x3x5x4x?x7xf32>
+  %2 = memref.dim %0, %c3 : tensor<2x3x5x4x?x7xf32>
+  %3 = memref.dim %0, %c4 : tensor<2x3x5x4x?x7xf32>
   return %1, %2, %3 : index, index, index
 }
 //      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 floordiv 28)>
@@ -740,7 +741,7 @@
 //  CHECK-DAG:   %[[C2:.+]] = constant 2 : index
 //  CHECK-DAG:   %[[C3:.+]] = constant 3 : index
 //  CHECK-DAG:   %[[C4:.+]] = constant 4 : index
-//      CHECK:   %[[D0:.+]] = dim %[[ARG0]], %[[C2]]
+//      CHECK:   %[[D0:.+]] = memref.dim %[[ARG0]], %[[C2]]
 //      CHECK:   %[[D1:.+]] = affine.apply #[[MAP]]()[%[[D0]]]
 //      CHECK:   return %[[C3]], %[[C4]], %[[D1]]
 
@@ -755,8 +756,8 @@
      affine_map<(d0, d1, d2, d3, d4, d5) -> (d2)>,
      affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d4, d5)>] :
      tensor<2x3x5x4x?x7xf32> into tensor<6x5x?xf32>
-  %1 = dim %0, %c1 : tensor<6x5x?xf32>
-  %2 = dim %0, %c2 : tensor<6x5x?xf32>
+  %1 = memref.dim %0, %c1 : tensor<6x5x?xf32>
+  %2 = memref.dim %0, %c2 : tensor<6x5x?xf32>
   return %1, %2 : index, index
 }
 //      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 * 28)>
@@ -764,7 +765,7 @@
 // CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<2x3x5x4x?x7xf32>
 //  CHECK-DAG:   %[[C4:.+]] = constant 4 : index
 //  CHECK-DAG:   %[[C5:.+]] = constant 5 : index
-//      CHECK:   %[[D0:.+]] = dim %[[ARG0]], %[[C4]]
+//      CHECK:   %[[D0:.+]] = memref.dim %[[ARG0]], %[[C4]]
 //      CHECK:   %[[D1:.+]] = affine.apply #[[MAP]]()[%[[D0]]]
 //      CHECK:   return %[[C5]], %[[D1]]
 
@@ -778,8 +779,8 @@
   %c42 = constant 42 : index
   %0 = linalg.init_tensor [%c21, %c42] : tensor<?x?xf32>
   %1 = linalg.fill(%0, %arg1) : tensor<?x?xf32>, f32 -> tensor<?x?xf32>
-  %2 = dim %arg0, %c0 : tensor<?x?xf32>
-  %3 = dim %arg0, %c1 : tensor<?x?xf32>
+  %2 = memref.dim %arg0, %c0 : tensor<?x?xf32>
+  %3 = memref.dim %arg0, %c1 : tensor<?x?xf32>
   %4 = subtensor_insert %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
   return %4 : tensor<?x?xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
--- a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
+++ b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
@@ -93,11 +93,11 @@
 //  CHECK-SAME:   %[[ARG1:[0-9a-zA-Z]*]]: tensor<4x?x?x8x2x?xf32>
 func @cmpf(%arg0: tensor<4x?x?x8x2x?xf32>, %arg1: tensor<4x?x?x8x2x?xf32>) -> tensor<4x?x?x8x2x?xi1> {
   // CHECK: %[[C1:.*]] = constant 1 : index
-  // CHECK: %[[D1:.*]] = dim %[[ARG0]], %[[C1]] : tensor<4x?x?x8x2x?xf32>
+  // CHECK: %[[D1:.*]] = memref.dim %[[ARG0]], %[[C1]] : tensor<4x?x?x8x2x?xf32>
   // CHECK: %[[C2:.*]] = constant 2 : index
-  // CHECK: %[[D2:.*]] = dim %[[ARG0]], %[[C2]] : tensor<4x?x?x8x2x?xf32>
+  // CHECK: %[[D2:.*]] = memref.dim %[[ARG0]], %[[C2]] : tensor<4x?x?x8x2x?xf32>
   // CHECK: %[[C5:.*]] = constant 5 : index
-  // CHECK: %[[D5:.*]] = dim %[[ARG0]], %[[C5]] : tensor<4x?x?x8x2x?xf32>
+  // CHECK: %[[D5:.*]] = memref.dim %[[ARG0]], %[[C5]] : tensor<4x?x?x8x2x?xf32>
   // CHECK: %[[INIT:.*]] = linalg.init_tensor [4, %[[D1]], %[[D2]], 8, 2, %[[D5]]] : tensor<4x?x?x8x2x?xi1>
   // CHECK: linalg.generic
   // CHECK-SAME:  ins(%[[ARG0]], %[[ARG1]]
diff --git a/mlir/test/Dialect/Linalg/fold-affine-min-scf.mlir b/mlir/test/Dialect/Linalg/fold-affine-min-scf.mlir
--- a/mlir/test/Dialect/Linalg/fold-affine-min-scf.mlir
+++ b/mlir/test/Dialect/Linalg/fold-affine-min-scf.mlir
@@ -13,21 +13,21 @@
   //      CHECK: scf.for
   // CHECK-NEXT:   %[[C2:.*]] = constant 2 : index
   // CHECK-NEXT:   %[[C2I64:.*]] = index_cast %[[C2:.*]]
-  // CHECK-NEXT:   store %[[C2I64]], %{{.*}}[] : memref<i64>
+  // CHECK-NEXT:   memref.store %[[C2I64]], %{{.*}}[] : memref<i64>
   scf.for %i = %c0 to %c4 step %c2 {
     %1 = affine.min affine_map<(d0, d1)[] -> (2, d1 - d0)> (%i, %c4)
     %2 = index_cast %1: index to i64
-    store %2, %A[]: memref<i64>
+    memref.store %2, %A[]: memref<i64>
   }
 
   //      CHECK: scf.for
   // CHECK-NEXT:   %[[C2:.*]] = constant 2 : index
   // CHECK-NEXT:   %[[C2I64:.*]] = index_cast %[[C2:.*]]
-  // CHECK-NEXT:   store %[[C2I64]], %{{.*}}[] : memref<i64>
+  // CHECK-NEXT:   memref.store %[[C2I64]], %{{.*}}[] : memref<i64>
   scf.for %i = %c1 to %c7 step %c2 {
     %1 = affine.min affine_map<(d0)[s0] -> (s0 - d0, 2)> (%i)[%c7]
     %2 = index_cast %1: index to i64
-    store %2, %A[]: memref<i64>
+    memref.store %2, %A[]: memref<i64>
   }
 
   // This should not canonicalize because: 4 - %i may take the value 1 < 2.
@@ -37,7 +37,7 @@
   scf.for %i = %c1 to %c4 step %c2 {
     %1 = affine.min affine_map<(d0)[s0] -> (2, s0 - d0)> (%i)[%c4]
     %2 = index_cast %1: index to i64
-    store %2, %A[]: memref<i64>
+    memref.store %2, %A[]: memref<i64>
   }
 
   // This should not canonicalize because: 16 - %i may take the value 15 < 1024.
@@ -47,7 +47,7 @@
   scf.for %i = %c1 to %c16 step %c1024 {
     %1 = affine.min affine_map<(d0) -> (1024, 16 - d0)> (%i)
     %2 = index_cast %1: index to i64
-    store %2, %A[]: memref<i64>
+    memref.store %2, %A[]: memref<i64>
   }
 
   // This example should simplify but affine_map is currently missing
@@ -62,7 +62,7 @@
   scf.for %i = %c0 to %ub step %step {
     %1 = affine.min affine_map<(d0, d1, d2) -> (d0, d1 - d2)> (%step, %ub, %i)
     %2 = index_cast %1: index to i64
-    store %2, %A[]: memref<i64>
+    memref.store %2, %A[]: memref<i64>
   }
 
   // This example should simplify but affine_map is currently missing
@@ -79,7 +79,7 @@
   scf.for %i = %c0 to %ub2 step %step {
     %1 = affine.min affine_map<(d0, d1, d2) -> (d0, d2 - d1)> (%step, %i, %ub2)
     %2 = index_cast %1: index to i64
-    store %2, %A[]: memref<i64>
+    memref.store %2, %A[]: memref<i64>
   }
 
   return
@@ -96,21 +96,21 @@
   // CHECK: scf.parallel
   // CHECK-NEXT:   %[[C2:.*]] = constant 2 : index
   // CHECK-NEXT:   %[[C2I64:.*]] = index_cast %[[C2:.*]]
-  // CHECK-NEXT:   store %[[C2I64]], %{{.*}}[] : memref<i64>
+  // CHECK-NEXT:   memref.store %[[C2I64]], %{{.*}}[] : memref<i64>
   scf.parallel (%i) = (%c0) to (%c4) step (%c2) {
     %1 = affine.min affine_map<(d0, d1)[] -> (2, d1 - d0)> (%i, %c4)
     %2 = index_cast %1: index to i64
-    store %2, %A[]: memref<i64>
+    memref.store %2, %A[]: memref<i64>
   }
 
   // CHECK: scf.parallel
   // CHECK-NEXT:   %[[C2:.*]] = constant 2 : index
   // CHECK-NEXT:   %[[C2I64:.*]] = index_cast %[[C2:.*]]
-  // CHECK-NEXT:   store %[[C2I64]], %{{.*}}[] : memref<i64>
+  // CHECK-NEXT:   memref.store %[[C2I64]], %{{.*}}[] : memref<i64>
   scf.parallel (%i) = (%c1) to (%c7) step (%c2) {
     %1 = affine.min affine_map<(d0)[s0] -> (2, s0 - d0)> (%i)[%c7]
     %2 = index_cast %1: index to i64
-    store %2, %A[]: memref<i64>
+    memref.store %2, %A[]: memref<i64>
   }
 
   // This example should simplify but affine_map is currently missing
@@ -125,7 +125,7 @@
   scf.parallel (%i) = (%c0) to (%ub) step (%step) {
     %1 = affine.min affine_map<(d0, d1, d2) -> (d0, d2 - d1)> (%step, %i, %ub)
     %2 = index_cast %1: index to i64
-    store %2, %A[]: memref<i64>
+    memref.store %2, %A[]: memref<i64>
   }
 
   // This example should simplify but affine_map is currently missing
@@ -140,7 +140,7 @@
   scf.parallel (%i) = (%c0) to (%ub2) step (%step) {
     %1 = affine.min affine_map<(d0, d1, d2) -> (d0, d2 - d1)> (%step, %i, %ub2)
     %2 = index_cast %1: index to i64
-    store %2, %A[]: memref<i64>
+    memref.store %2, %A[]: memref<i64>
   }
 
   return
diff --git a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir
--- a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir
+++ b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir
@@ -4,17 +4,17 @@
 //  CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
 //   CHECK-NOT: linalg.fill
 //   CHECK-NOT: linalg.copy
-//       CHECK: %[[ALLOC:.*]] = alloc
+//       CHECK: %[[ALLOC:.*]] = memref.alloc
 //       CHECK: vector.transfer_read %[[ARG0]]
 //   CHECK-NOT: masked
 func @testAllocRead(%in: memref<? x f32>) -> vector<32 x f32> {
   %c0 = constant 0: index
   %f0 = constant 0.0: f32
-  %alloc = alloc() : memref<32 x f32>
-  %subview = subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
+  %alloc = memref.alloc() : memref<32 x f32>
+  %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
   linalg.copy(%in, %subview): memref<? x f32>, memref<16 x f32>
   %0 = vector.transfer_read %alloc[%c0], %f0 {masked = [false]} : memref<32 x f32>, vector<32 x f32>
-  dealloc %alloc : memref<32 x f32>
+  memref.dealloc %alloc : memref<32 x f32>
   return %0: vector<32 x f32>
 }
 
@@ -22,18 +22,18 @@
 //  CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
 //   CHECK-NOT: linalg.fill
 //   CHECK-NOT: linalg.copy
-//       CHECK: %[[ALLOC:.*]] = alloc
+//       CHECK: %[[ALLOC:.*]] = memref.alloc
 //       CHECK: vector.transfer_read %[[ARG0]]
 //   CHECK-NOT: masked
 func @testAllocFillRead(%in: memref<? x f32>) -> vector<32 x f32> {
   %c0 = constant 0: index
   %f0 = constant 0.0: f32
-  %alloc = alloc() : memref<32 x f32>
+  %alloc = memref.alloc() : memref<32 x f32>
   linalg.fill(%alloc, %f0): memref<32 x f32>, f32
-  %subview = subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
+  %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
   linalg.copy(%in, %subview): memref<? x f32>, memref<16 x f32>
   %0 = vector.transfer_read %alloc[%c0], %f0 {masked = [false]} : memref<32 x f32>, vector<32 x f32>
-  dealloc %alloc : memref<32 x f32>
+  memref.dealloc %alloc : memref<32 x f32>
   return %0: vector<32 x f32>
 }
 
@@ -41,18 +41,18 @@
 //  CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
 //   CHECK-NOT: linalg.fill
 //   CHECK-NOT: linalg.copy
-//       CHECK: %[[ALLOC:.*]] = alloc
+//       CHECK: %[[ALLOC:.*]] = memref.alloc
 //       CHECK: vector.transfer_read %[[ARG0]]
 //   CHECK-NOT: masked
 func @testViewRead(%in: memref<? x f32>) -> vector<32 x f32> {
   %c0 = constant 0: index
   %f0 = constant 0.0: f32
-  %alloc = alloc() : memref<128 x i8>
-  %view = view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32>
-  %subview = subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32>
+  %alloc = memref.alloc() : memref<128 x i8>
+  %view = memref.view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32>
+  %subview = memref.subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32>
   linalg.copy(%in, %subview): memref<? x f32>, memref<16 x f32>
   %0 = vector.transfer_read %view[%c0], %f0 {masked = [false]} : memref<32 x f32>, vector<32 x f32>
-  dealloc %alloc : memref<128 x i8>
+  memref.dealloc %alloc : memref<128 x i8>
   return %0: vector<32 x f32>
 }
 
@@ -60,19 +60,19 @@
 //  CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
 //   CHECK-NOT: linalg.fill
 //   CHECK-NOT: linalg.copy
-//       CHECK: %[[ALLOC:.*]] = alloc
+//       CHECK: %[[ALLOC:.*]] = memref.alloc
 //       CHECK: vector.transfer_read %[[ARG0]]
 //   CHECK-NOT: masked
 func @testViewFillRead(%in: memref<? x f32>) -> vector<32 x f32> {
   %c0 = constant 0: index
   %f0 = constant 0.0: f32
-  %alloc = alloc() : memref<128 x i8>
-  %view = view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32>
-  %subview = subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32>
+  %alloc = memref.alloc() : memref<128 x i8>
+  %view = memref.view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32>
+  %subview = memref.subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32>
   linalg.fill(%view, %f0): memref<32 x f32>, f32
   linalg.copy(%in, %subview): memref<? x f32>, memref<16 x f32>
   %0 = vector.transfer_read %view[%c0], %f0 {masked = [false]} : memref<32 x f32>, vector<32 x f32>
-  dealloc %alloc : memref<128 x i8>
+  memref.dealloc %alloc : memref<128 x i8>
   return %0: vector<32 x f32>
 }
 
@@ -80,17 +80,17 @@
 //  CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: vector
 //  CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
 //   CHECK-NOT: linalg.copy
-//       CHECK: %[[ALLOC:.*]] = alloc
+//       CHECK: %[[ALLOC:.*]] = memref.alloc
 //       CHECK: vector.transfer_write %[[ARG0]], %[[ARG1]]
 //   CHECK-NOT: masked
 func @testAllocWrite(%vec: vector<32 x f32>, %out: memref<? x f32>) {
   %c0 = constant 0: index
   %f0 = constant 0.0: f32
-  %alloc = alloc() : memref<32 x f32>
-  %subview = subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
+  %alloc = memref.alloc() : memref<32 x f32>
+  %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
   vector.transfer_write %vec, %alloc[%c0] {masked = [false]} : vector<32 x f32>, memref<32 x f32>
   linalg.copy(%subview, %out): memref<16 x f32>, memref<? x f32>
-  dealloc %alloc : memref<32 x f32>
+  memref.dealloc %alloc : memref<32 x f32>
   return
 }
 
@@ -98,18 +98,18 @@
 //  CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: vector
 //  CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
 //   CHECK-NOT: linalg.copy
-//       CHECK: %[[ALLOC:.*]] = alloc
+//       CHECK: %[[ALLOC:.*]] = memref.alloc
 //       CHECK: vector.transfer_write %[[ARG0]], %[[ARG1]]
 //   CHECK-NOT: masked
 func @testViewWrite(%vec: vector<32 x f32>, %out: memref<? x f32>) {
   %c0 = constant 0: index
   %f0 = constant 0.0: f32
-  %alloc = alloc() : memref<128 x i8>
-  %view = view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32>
-  %subview = subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32>
+  %alloc = memref.alloc() : memref<128 x i8>
+  %view = memref.view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32>
+  %subview = memref.subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32>
   vector.transfer_write %vec, %view[%c0] {masked = [false]} : vector<32 x f32>, memref<32 x f32>
   linalg.copy(%subview, %out): memref<16 x f32>, memref<? x f32>
-  dealloc %alloc : memref<128 x i8>
+  memref.dealloc %alloc : memref<128 x i8>
   return
 }
 
@@ -121,20 +121,20 @@
 // CHECK-LABEL: failAllocFillRead
 //  CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
 //   CHECK-NOT: vector.transfer_read %[[ARG0]]
-//       CHECK: %[[ALLOC:.*]] = alloc
+//       CHECK: %[[ALLOC:.*]] = memref.alloc
 //       CHECK: linalg.copy
 //       CHECK: vector.transfer_read %[[ALLOC]]
 func @failAllocFillRead(%in: memref<? x f32>) -> vector<32 x f32> {
   %c0 = constant 0: index
   %f0 = constant 0.0: f32
   %f1 = constant 1.0: f32
-  %alloc = alloc() : memref<32 x f32>
+  %alloc = memref.alloc() : memref<32 x f32>
   linalg.fill(%alloc, %f0): memref<32 x f32>, f32
-  %subview = subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
+  %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
   linalg.copy(%in, %subview): memref<? x f32>, memref<16 x f32>
   "some_interleaved_use"(%subview) : (memref<16 x f32>) -> ()
   %0 = vector.transfer_read %alloc[%c0], %f1: memref<32 x f32>, vector<32 x f32>
-  dealloc %alloc : memref<32 x f32>
+  memref.dealloc %alloc : memref<32 x f32>
   return %0: vector<32 x f32>
 }
 
@@ -143,17 +143,17 @@
 //  CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: vector
 //  CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
 //   CHECK-NOT: vector.transfer_write %[[ARG0]], %[[ARG1]]
-//       CHECK: %[[ALLOC:.*]] = alloc
+//       CHECK: %[[ALLOC:.*]] = memref.alloc
 //       CHECK: vector.transfer_write %[[ARG0]], %[[ALLOC]]
 //       CHECK: linalg.copy
 func @failAllocWrite(%vec: vector<32 x f32>, %out: memref<? x f32>) {
   %c0 = constant 0: index
   %f0 = constant 0.0: f32
-  %alloc = alloc() : memref<32 x f32>
-  %subview = subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
+  %alloc = memref.alloc() : memref<32 x f32>
+  %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
   vector.transfer_write %vec, %alloc[%c0] : vector<32 x f32>, memref<32 x f32>
   "some_interleaved_use"(%subview) : (memref<16 x f32>) -> ()
   linalg.copy(%subview, %out): memref<16 x f32>, memref<? x f32>
-  dealloc %alloc : memref<32 x f32>
+  memref.dealloc %alloc : memref<32 x f32>
   return
 }
diff --git a/mlir/test/Dialect/Linalg/fusion-2-level.mlir b/mlir/test/Dialect/Linalg/fusion-2-level.mlir
--- a/mlir/test/Dialect/Linalg/fusion-2-level.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-2-level.mlir
@@ -9,26 +9,26 @@
   %c40 = constant 40 : index
   %c30 = constant 30 : index
   %c20 = constant 20 : index
-  %0 = dim %C, %c0 : memref<?x?xf32, offset: ?, strides: [?, 1]>
-  %1 = dim %C, %c1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
-  %2 = dim %D, %c1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
+  %0 = memref.dim %C, %c0 : memref<?x?xf32, offset: ?, strides: [?, 1]>
+  %1 = memref.dim %C, %c1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
+  %2 = memref.dim %D, %c1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
   linalg.matmul ins(%A, %B: memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>)
                outs(%C: memref<?x?xf32, offset: ?, strides: [?, 1]>)
   scf.for %arg5 = %c0 to %0 step %c20 {
     scf.for %arg6 = %c0 to %2 step %c30 {
       scf.for %arg7 = %c0 to %1 step %c40 {
-        %5 = std.subview %C[%arg5, %arg7][%c20, %c40][%c1, %c1] : memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %7 = std.subview %D[%arg7, %arg6][%c40, %c30][%c1, %c1]: memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %8 = std.subview %E[%arg5, %arg6][%c20, %c40][%c1, %c1] : memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %9 = dim %5, %c0 : memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %10 = dim %5, %c1 : memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %11 = dim %7, %c1 : memref<?x?xf32, offset: ?, strides: [?, ?]>
+        %5 = memref.subview %C[%arg5, %arg7][%c20, %c40][%c1, %c1] : memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
+        %7 = memref.subview %D[%arg7, %arg6][%c40, %c30][%c1, %c1]: memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
+        %8 = memref.subview %E[%arg5, %arg6][%c20, %c40][%c1, %c1] : memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
+        %9 = memref.dim %5, %c0 : memref<?x?xf32, offset: ?, strides: [?, ?]>
+        %10 = memref.dim %5, %c1 : memref<?x?xf32, offset: ?, strides: [?, ?]>
+        %11 = memref.dim %7, %c1 : memref<?x?xf32, offset: ?, strides: [?, ?]>
         scf.for %arg8 = %c0 to %9 step %c2 {
           scf.for %arg9 = %c0 to %11 step %c3 {
             scf.for %arg10 = %c0 to %10 step %c4 {
-              %14 = std.subview %5[%arg8, %arg10][%c2, %c4][%c1, %c1] : memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
-              %16 = std.subview %7[%arg10, %arg9][%c4, %c3][%c1, %c1]: memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
-              %17 = std.subview %8[%arg8, %arg9][%c2, %c4][%c1, %c1] : memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
+              %14 = memref.subview %5[%arg8, %arg10][%c2, %c4][%c1, %c1] : memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
+              %16 = memref.subview %7[%arg10, %arg9][%c4, %c3][%c1, %c1]: memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
+              %17 = memref.subview %8[%arg8, %arg9][%c2, %c4][%c1, %c1] : memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
               linalg.matmul ins(%14, %16: memref<?x?xf32, offset: ?, strides: [?, ?]>, memref<?x?xf32, offset: ?, strides: [?, ?]>)
                            outs(%17: memref<?x?xf32, offset: ?, strides: [?, ?]>)
             }
diff --git a/mlir/test/Dialect/Linalg/fusion-indexed-generic.mlir b/mlir/test/Dialect/Linalg/fusion-indexed-generic.mlir
--- a/mlir/test/Dialect/Linalg/fusion-indexed-generic.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-indexed-generic.mlir
@@ -21,15 +21,15 @@
   %c0 = constant 0 : index
   %c25 = constant 25 : index
   %c10 = constant 10 : index
-  %0 = dim %C, %c0 : memref<?x?xf32>
-  %1 = dim %C, %c1 : memref<?x?xf32>
-  %2 = dim %D, %c0 : memref<?x?xf32>
-  %3 = dim %D, %c1 : memref<?x?xf32>
+  %0 = memref.dim %C, %c0 : memref<?x?xf32>
+  %1 = memref.dim %C, %c1 : memref<?x?xf32>
+  %2 = memref.dim %D, %c0 : memref<?x?xf32>
+  %3 = memref.dim %D, %c1 : memref<?x?xf32>
   scf.for %arg2 = %c0 to %0 step %c10 {
     scf.for %arg3 = %c0 to %1 step %c25 {
-      %4 = std.subview %C[%arg2, %arg3][%c10, %c25][%c1, %c1] :
+      %4 = memref.subview %C[%arg2, %arg3][%c10, %c25][%c1, %c1] :
           memref<?x?xf32> to memref<?x?xf32, #map>
-      %5 = std.subview %D[%arg2, %arg3][%c10, %c25][%c1, %c1] :
+      %5 = memref.subview %D[%arg2, %arg3][%c10, %c25][%c1, %c1] :
           memref<?x?xf32> to memref<?x?xf32, #map>
       linalg.indexed_generic {
         indexing_maps = [#id_2d, #id_2d],
@@ -89,14 +89,14 @@
       %out = addf %tmp, %j_float : f32
       linalg.yield %out : f32
   }
-  %C_X = dim %C, %c0 : memref<?x?xf32>
-  %C_Y = dim %C, %c1 : memref<?x?xf32>
-  %D_X = dim %D, %c0 : memref<?x?xf32>
-  %D_Y = dim %D, %c1 : memref<?x?xf32>
+  %C_X = memref.dim %C, %c0 : memref<?x?xf32>
+  %C_Y = memref.dim %C, %c1 : memref<?x?xf32>
+  %D_X = memref.dim %D, %c0 : memref<?x?xf32>
+  %D_Y = memref.dim %D, %c1 : memref<?x?xf32>
   scf.parallel (%arg2, %arg3) = (%c0, %c0) to (%C_X, %C_Y) step (%c10, %c25) {
-    %C_view = std.subview %C[%arg2, %arg3][%c10, %c25][%c1, %c1] :
+    %C_view = memref.subview %C[%arg2, %arg3][%c10, %c25][%c1, %c1] :
         memref<?x?xf32> to memref<?x?xf32, #map>
-    %D_view = std.subview %D[%arg2, %arg3][%c10, %c25][%c1, %c1] :
+    %D_view = memref.subview %D[%arg2, %arg3][%c10, %c25][%c1, %c1] :
         memref<?x?xf32> to memref<?x?xf32, #map>
     linalg.generic {
       indexing_maps = [#id_2d, #id_2d],
@@ -150,18 +150,18 @@
       %out = addf %tmp, %j_float : f32
       linalg.yield %out : f32
   }
-  %C_X = dim %C, %c0 : memref<?x?xf32>
-  %C_Y = dim %C, %c1 : memref<?x?xf32>
-  %D_X = dim %D, %c0 : memref<?x?xf32>
-  %D_Y = dim %D, %c1 : memref<?x?xf32>
+  %C_X = memref.dim %C, %c0 : memref<?x?xf32>
+  %C_Y = memref.dim %C, %c1 : memref<?x?xf32>
+  %D_X = memref.dim %D, %c0 : memref<?x?xf32>
+  %D_Y = memref.dim %D, %c1 : memref<?x?xf32>
   %3 = linalg.range %c0 : %C_Y : %c3 : !linalg.range
   scf.parallel (%j) = (%c0) to (%C_Y) step (%c3) {
     %0 = affine.min affine_map<(d0, d1, d2) -> (d0, d1 - d2)>(%c3, %C_Y, %j)
-    %C_view = subview %C[%c0, %j] [%C_X, %0] [%c1, %c1] :
+    %C_view = memref.subview %C[%c0, %j] [%C_X, %0] [%c1, %c1] :
       memref<?x?xf32> to memref<?x?xf32, #map>
 
     %1 = affine.min affine_map<(d0, d1, d2) -> (d0, d1 - d2)>(%c3, %D_Y, %j)
-    %D_view = subview %D[%c0, %j] [%D_X, %1] [%c1, %c1] :
+    %D_view = memref.subview %D[%c0, %j] [%D_X, %1] [%c1, %c1] :
       memref<?x?xf32> to memref<?x?xf32, #map>
 
     linalg.generic {
diff --git a/mlir/test/Dialect/Linalg/fusion-pattern.mlir b/mlir/test/Dialect/Linalg/fusion-pattern.mlir
--- a/mlir/test/Dialect/Linalg/fusion-pattern.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-pattern.mlir
@@ -28,35 +28,35 @@
 //  CHECK-DAG:   %[[CST:.+]] = constant 0.0{{.*}} : f32
 //  CHECK-DAG:   linalg.fill(%[[ARG2]], %[[CST]])
 // CHECK-SAME:   __internal_linalg_transform__ = "after_basic_fusion_original"
-//  CHECK-DAG:   %[[M:.+]] = dim %[[ARG0]], %[[C0]]
-//  CHECK-DAG:   %[[N:.+]] = dim %[[ARG1]], %[[C1]]
+//  CHECK-DAG:   %[[M:.+]] = memref.dim %[[ARG0]], %[[C0]]
+//  CHECK-DAG:   %[[N:.+]] = memref.dim %[[ARG1]], %[[C1]]
 //      CHECK:   scf.parallel (%[[IV0:.+]], %[[IV1:.+]]) =
 // CHECK-SAME:     to (%[[M]], %[[N]])
 // CHECK-SAME:     step (%[[C32]], %[[C64]]) {
 //      CHECK:     %[[TILE_M:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M]]]
-//      CHECK:     %[[K:.+]] = dim %[[ARG0]], %[[C1]]
-//      CHECK:     %[[SV1:.+]] = subview %[[ARG0]][%[[IV0]], 0]
+//      CHECK:     %[[K:.+]] = memref.dim %[[ARG0]], %[[C1]]
+//      CHECK:     %[[SV1:.+]] = memref.subview %[[ARG0]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M]], %[[K]]]
-//      CHECK:     %[[K_2:.+]] = dim %[[ARG1]], %[[C0]]
+//      CHECK:     %[[K_2:.+]] = memref.dim %[[ARG1]], %[[C0]]
 //      CHECK:     %[[TILE_N:.+]] = affine.min #[[MAP2]](%[[IV1]])[%[[N]]]
-//      CHECK:     %[[SV2:.+]] = subview %[[ARG1]][0, %[[IV1]]]
+//      CHECK:     %[[SV2:.+]] = memref.subview %[[ARG1]][0, %[[IV1]]]
 // CHECK-SAME:       %[[K_2]], %[[TILE_N]]
-//      CHECK:     %[[M_2:.+]] = dim %[[ARG2]], %[[C0]]
+//      CHECK:     %[[M_2:.+]] = memref.dim %[[ARG2]], %[[C0]]
 //      CHECK:     %[[TILE_M_2:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M_2]]]
-//      CHECK:     %[[N_2:.+]] = dim %[[ARG2]], %[[C1]]
+//      CHECK:     %[[N_2:.+]] = memref.dim %[[ARG2]], %[[C1]]
 //      CHECK:     %[[TILE_N_2:.+]] = affine.min #[[MAP2]](%[[IV1]])[%[[N_2]]]
-//      CHECK:     %[[SV3:.+]] = subview %[[ARG2]][%[[IV0]], %[[IV1]]]
+//      CHECK:     %[[SV3:.+]] = memref.subview %[[ARG2]][%[[IV0]], %[[IV1]]]
 // CHECK-SAME:       [%[[TILE_M_2]], %[[TILE_N_2]]]
-//      CHECK:     %[[SV3_2:.+]] = subview %[[ARG2]][%[[IV0]], %[[IV1]]]
+//      CHECK:     %[[SV3_2:.+]] = memref.subview %[[ARG2]][%[[IV0]], %[[IV1]]]
 // CHECK-SAME:       [%[[TILE_M]], %[[TILE_N]]]
 //      CHECK:     linalg.fill(%[[SV3_2]], %[[CST]])
 // CHECK-SAME:       __internal_linalg_transform__ = "after_basic_fusion_producer"
 //      CHECK:     scf.for %[[IV2:.+]] = %[[C0]] to %[[K]] step %[[C16]] {
 //      CHECK:       %[[TILE_K:.+]] = affine.min #[[MAP3]](%[[IV2]])[%[[K]]]
-//      CHECK:       %[[SV4:.+]] = subview %[[SV1]][0, %[[IV2]]]
+//      CHECK:       %[[SV4:.+]] = memref.subview %[[SV1]][0, %[[IV2]]]
 // CHECK-SAME:         [%[[TILE_M]], %[[TILE_K]]]
 //      CHECK:       %[[TILE_K_2:.+]] = affine.min #[[MAP3]](%[[IV2]])[%[[K_2]]]
-//      CHECK:       %[[SV5:.+]] = subview %[[SV2]][%[[IV2]], 0]
+//      CHECK:       %[[SV5:.+]] = memref.subview %[[SV2]][%[[IV2]], 0]
 // CHECK-SAME:         [%[[TILE_K_2]], %[[TILE_N]]]
 //      CHECK:       linalg.matmul
 // CHECK-SAME:         __internal_linalg_transform__ = "after_basic_fusion"
@@ -99,40 +99,40 @@
 //  CHECK-DAG:   %[[CST:.+]] = constant 0.0{{.*}} : f32
 //  CHECK-DAG:   linalg.copy(%[[ARG1]], %[[ARG2]])
 // CHECK-SAME:   __internal_linalg_transform__ = "after_rhs_fusion_original"
-//  CHECK-DAG:   %[[N:.+]] = dim %[[ARG2]], %[[C1]]
+//  CHECK-DAG:   %[[N:.+]] = memref.dim %[[ARG2]], %[[C1]]
 //      CHECK:   scf.parallel (%[[IV0:.+]]) =
 // CHECK-SAME:     (%[[C0]]) to (%[[N]]) step (%[[C64]]) {
-//      CHECK:     %[[K:.+]] = dim %[[ARG2]], %[[C0]]
+//      CHECK:     %[[K:.+]] = memref.dim %[[ARG2]], %[[C0]]
 //      CHECK:     %[[TILE_N:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[N]]]
-//      CHECK:     %[[SV1:.+]] = subview %[[ARG2]][0, %[[IV0]]]
+//      CHECK:     %[[SV1:.+]] = memref.subview %[[ARG2]][0, %[[IV0]]]
 // CHECK-SAME:       [%[[K]], %[[TILE_N]]]
-//      CHECK:     %[[M:.+]] = dim %[[ARG3]], %[[C0]]
-//      CHECK:     %[[N_2:.+]] = dim %[[ARG3]], %[[C1]]
+//      CHECK:     %[[M:.+]] = memref.dim %[[ARG3]], %[[C0]]
+//      CHECK:     %[[N_2:.+]] = memref.dim %[[ARG3]], %[[C1]]
 //      CHECK:     %[[TILE_N_2:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[N_2]]]
-//      CHECK:     %[[SV2:.+]] = subview %[[ARG3]][0, %[[IV0]]]
+//      CHECK:     %[[SV2:.+]] = memref.subview %[[ARG3]][0, %[[IV0]]]
 // CHECK-SAME:       [%[[M]], %[[TILE_N_2]]]
-//      CHECK:     %[[K_2:.+]] = dim %[[ARG1]], %[[C0]]
-//      CHECK:     %[[SV3:.+]] = subview %[[ARG1]][0, %[[IV0]]]
+//      CHECK:     %[[K_2:.+]] = memref.dim %[[ARG1]], %[[C0]]
+//      CHECK:     %[[SV3:.+]] = memref.subview %[[ARG1]][0, %[[IV0]]]
 // CHECK-SAME:       [%[[K_2]], %[[TILE_N]]]
-//      CHECK:     %[[SV3_2:.+]] = subview %[[ARG2]][0, %[[IV0]]]
+//      CHECK:     %[[SV3_2:.+]] = memref.subview %[[ARG2]][0, %[[IV0]]]
 // CHECK-SAME:       [%[[K_2]], %[[TILE_N]]]
 //      CHECK:     linalg.copy(%[[SV3]], %[[SV3_2]])
 // CHECK-SAME:       __internal_linalg_transform__ = "after_rhs_fusion_producer"
 //  CHECK-NOT:     linalg.fill
-//  CHECK-DAG:     %[[M_2:.+]] = dim %[[ARG0]], %[[C0]]
-//  CHECK-DAG:     %[[K_2:.+]] = dim %[[ARG0]], %[[C1]]
+//  CHECK-DAG:     %[[M_2:.+]] = memref.dim %[[ARG0]], %[[C0]]
+//  CHECK-DAG:     %[[K_2:.+]] = memref.dim %[[ARG0]], %[[C1]]
 //      CHECK:     scf.parallel (%[[IV1:.+]]) =
 // CHECK-SAME:       (%[[C0]]) to (%[[M_2]]) step (%[[C32]]) {
 // CHECK-NEXT:       scf.for %[[IV2:.+]] = %[[C0]] to %[[K_2]] step %[[C16]] {
 //      CHECK:         %[[TILE_M:.+]] = affine.min #[[MAP2]](%[[IV1]])[%[[M_2]]]
 //      CHECK:         %[[TILE_K:.+]] = affine.min #[[MAP3]](%[[IV2]])[%[[K_2]]]
-//      CHECK:         %[[SV4:.+]] = subview %[[ARG0]][%[[IV1]], %[[IV2]]]
+//      CHECK:         %[[SV4:.+]] = memref.subview %[[ARG0]][%[[IV1]], %[[IV2]]]
 // CHECK-SAME:           [%[[TILE_M]], %[[TILE_K]]]
 //      CHECK:         %[[TILE_K_2:.+]] = affine.min #[[MAP3]](%[[IV2]])[%[[K]]]
-//      CHECK:         %[[SV5:.+]] = subview %[[SV1]][%[[IV2]], 0]
+//      CHECK:         %[[SV5:.+]] = memref.subview %[[SV1]][%[[IV2]], 0]
 // CHECK-SAME:           [%[[TILE_K_2]], %[[TILE_N]]]
 //      CHECK:         %[[TILE_M_2:.+]] = affine.min #[[MAP2]](%[[IV1]])[%[[M]]]
-//      CHECK:         %[[SV6:.+]] = subview %[[SV2]][%[[IV1]], 0]
+//      CHECK:         %[[SV6:.+]] = memref.subview %[[SV2]][%[[IV1]], 0]
 // CHECK-SAME:           [%[[TILE_M_2]], %[[TILE_N_2]]]
 //      CHECK:         linalg.matmul
 // CHECK-SAME:           __internal_linalg_transform__ = "after_rhs_fusion"
@@ -179,43 +179,43 @@
 // CHECK-SAME:     __internal_linalg_transform__ = "after_two_operand_fusion_original"
 //      CHECK:   linalg.fill(%[[ARG3]], %[[CST]])
 // CHECK-SAME:     __internal_linalg_transform__ = "after_two_operand_fusion_original"
-//  CHECK-DAG:   %[[M:.+]] = dim %[[ARG1]], %[[C0]]
+//  CHECK-DAG:   %[[M:.+]] = memref.dim %[[ARG1]], %[[C0]]
 //      CHECK:   scf.parallel (%[[IV0:.+]]) =
 // CHECK-SAME:     (%[[C0]]) to (%[[M]]) step (%[[C32]]) {
 //      CHECK:     %[[TILE_M:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M]]]
-//      CHECK:     %[[K:.+]] = dim %[[ARG1]], %[[C1]]
-//      CHECK:     %[[SV1:.+]] = subview %[[ARG1]][%[[IV0]], 0]
+//      CHECK:     %[[K:.+]] = memref.dim %[[ARG1]], %[[C1]]
+//      CHECK:     %[[SV1:.+]] = memref.subview %[[ARG1]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M]], %[[K]]]
-//      CHECK:     %[[M_2:.+]] = dim %[[ARG3]], %[[C0]]
+//      CHECK:     %[[M_2:.+]] = memref.dim %[[ARG3]], %[[C0]]
 //      CHECK:     %[[TILE_M_2:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M_2]]]
-//      CHECK:     %[[N:.+]] = dim %[[ARG3]], %[[C1]]
-//      CHECK:     %[[SV2:.+]] = subview %[[ARG3]][%[[IV0]], 0]
+//      CHECK:     %[[N:.+]] = memref.dim %[[ARG3]], %[[C1]]
+//      CHECK:     %[[SV2:.+]] = memref.subview %[[ARG3]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M_2]], %[[N]]]
-//      CHECK:     %[[SV2_2:.+]] = subview %[[ARG3]][%[[IV0]], 0]
+//      CHECK:     %[[SV2_2:.+]] = memref.subview %[[ARG3]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M]], %[[N]]]
-//      CHECK:     %[[K_2:.+]] = dim %[[ARG0]], %[[C1]]
-//      CHECK:     %[[SV3:.+]] = subview %[[ARG0]][%[[IV0]], 0]
+//      CHECK:     %[[K_2:.+]] = memref.dim %[[ARG0]], %[[C1]]
+//      CHECK:     %[[SV3:.+]] = memref.subview %[[ARG0]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M]], %[[K_2]]]
-//      CHECK:     %[[SV3_2:.+]] = subview %[[ARG1]][%[[IV0]], 0]
+//      CHECK:     %[[SV3_2:.+]] = memref.subview %[[ARG1]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M]], %[[K_2]]]
 //      CHECK:     linalg.copy(%[[SV3]], %[[SV3_2]])
 // CHECK-SAME:       __internal_linalg_transform__ = "after_two_operand_fusion_producer"
 //      CHECK:     linalg.fill(%[[SV2_2]], %[[CST]])
 // CHECK-SAME:       __internal_linalg_transform__ = "after_two_operand_fusion_producer"
-//  CHECK-DAG:     %[[N_2:.+]] = dim %[[ARG2]], %[[C1]]
+//  CHECK-DAG:     %[[N_2:.+]] = memref.dim %[[ARG2]], %[[C1]]
 //      CHECK:     scf.parallel (%[[IV1:.+]]) =
 // CHECK-SAME:       (%[[C0]]) to (%[[N_2]]) step (%[[C64]]) {
 // CHECK-NEXT:       scf.for %[[IV2:.+]] = %[[C0]] to %[[K]] step %[[C16]] {
 //      CHECK:         %[[TILE_K:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[K]]]
-//      CHECK:         %[[SV4:.+]] = subview %[[SV1]][0, %[[IV2]]]
+//      CHECK:         %[[SV4:.+]] = memref.subview %[[SV1]][0, %[[IV2]]]
 // CHECK-SAME:           [%[[TILE_M]], %[[TILE_K]]]
-//      CHECK:         %[[K_2:.+]] = dim %[[ARG2]], %[[C0]]
+//      CHECK:         %[[K_2:.+]] = memref.dim %[[ARG2]], %[[C0]]
 //      CHECK:         %[[TILE_K_2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[K_2]]]
 //      CHECK:         %[[TILE_N:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N_2]]]
-//      CHECK:         %[[SV5:.+]] = subview %[[ARG2]][%[[IV2]], %[[IV1]]]
+//      CHECK:         %[[SV5:.+]] = memref.subview %[[ARG2]][%[[IV2]], %[[IV1]]]
 // CHECK-SAME:           [%[[TILE_K_2]], %[[TILE_N]]]
 //      CHECK:         %[[TILE_N_2:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N]]]
-//      CHECK:         %[[SV6:.+]] = subview %[[SV2]][0, %[[IV1]]]
+//      CHECK:         %[[SV6:.+]] = memref.subview %[[SV2]][0, %[[IV1]]]
 // CHECK-SAME:           [%[[TILE_M_2]], %[[TILE_N_2]]]
 //      CHECK:         linalg.matmul
 // CHECK-SAME:           __internal_linalg_transform__ = "after_two_operand_fusion"
@@ -259,44 +259,44 @@
 //  CHECK-DAG:   %[[C16:.+]] = constant 16 : index
 //      CHECK:   linalg.matmul
 // CHECK-SAME:     __internal_linalg_transform__ = "after_lhs_fusion_original"
-//  CHECK-DAG:   %[[M:.+]] = dim %[[ARG2]], %[[C0]]
+//  CHECK-DAG:   %[[M:.+]] = memref.dim %[[ARG2]], %[[C0]]
 //      CHECK:   scf.parallel (%[[IV0:.+]]) =
 // CHECK-SAME:     (%[[C0]]) to (%[[M]]) step (%[[C32]]) {
 //      CHECK:     %[[TILE_M:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M]]]
-//      CHECK:     %[[K2:.+]] = dim %[[ARG2]], %[[C1]]
-//      CHECK:     %[[SV1:.+]] = subview %[[ARG2]][%[[IV0]], 0]
+//      CHECK:     %[[K2:.+]] = memref.dim %[[ARG2]], %[[C1]]
+//      CHECK:     %[[SV1:.+]] = memref.subview %[[ARG2]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M]], %[[K2]]]
-//      CHECK:     %[[M_2:.+]] = dim %[[ARG4]], %[[C0]]
+//      CHECK:     %[[M_2:.+]] = memref.dim %[[ARG4]], %[[C0]]
 //      CHECK:     %[[TILE_M_2:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M_2]]]
-//      CHECK:     %[[N:.+]] = dim %[[ARG4]], %[[C1]]
-//      CHECK:     %[[SV2:.+]] = subview %[[ARG4]][%[[IV0]], 0]
+//      CHECK:     %[[N:.+]] = memref.dim %[[ARG4]], %[[C1]]
+//      CHECK:     %[[SV2:.+]] = memref.subview %[[ARG4]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M_2]], %[[N]]]
-//      CHECK:     %[[K2_2:.+]] = dim %[[ARG1]], %[[C1]]
-//      CHECK:     %[[K1:.+]] = dim %[[ARG0]], %[[C1]]
-//      CHECK:     %[[SV3:.+]] = subview %[[ARG0]][%[[IV0]], 0]
+//      CHECK:     %[[K2_2:.+]] = memref.dim %[[ARG1]], %[[C1]]
+//      CHECK:     %[[K1:.+]] = memref.dim %[[ARG0]], %[[C1]]
+//      CHECK:     %[[SV3:.+]] = memref.subview %[[ARG0]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M]], %[[K1]]]
-//      CHECK:     %[[SV4:.+]] = subview %[[ARG1]][0, 0] [%[[K1]], %[[K2_2]]]
-//      CHECK:     %[[SV1_2:.+]] = subview %[[ARG2]][%[[IV0]], 0]
+//      CHECK:     %[[SV4:.+]] = memref.subview %[[ARG1]][0, 0] [%[[K1]], %[[K2_2]]]
+//      CHECK:     %[[SV1_2:.+]] = memref.subview %[[ARG2]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M]], %[[K2_2]]]
 //      CHECK:     linalg.matmul
 // CHECK-SAME:         __internal_linalg_transform__ = "after_lhs_fusion_producer"
 // CHECK-SAME:         ins(%[[SV3]], %[[SV4]]
 // CHECK-SAME:           : memref<?x?xf32, #[[MAP1]]>, memref<?x?xf32, #[[MAP1]]>)
 // CHECK-SAME:         outs(%[[SV1_2]] : memref<?x?xf32, #[[MAP1]]>)
-//  CHECK-DAG:     %[[N_2:.+]] = dim %[[ARG3]], %[[C1]]
+//  CHECK-DAG:     %[[N_2:.+]] = memref.dim %[[ARG3]], %[[C1]]
 //      CHECK:     scf.parallel (%[[IV1:.+]]) =
 // CHECK-SAME:       (%[[C0]]) to (%[[N_2]]) step (%[[C64]]) {
 // CHECK-NEXT:       scf.for %[[IV2:.+]] = %[[C0]] to %[[K]] step %[[C16]] {
 //      CHECK:         %[[TILE_K:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[K]]]
-//      CHECK:         %[[SV6:.+]] = subview %[[SV1]][0, %[[IV2]]]
+//      CHECK:         %[[SV6:.+]] = memref.subview %[[SV1]][0, %[[IV2]]]
 // CHECK-SAME:           [%[[TILE_M]], %[[TILE_K]]]
-//      CHECK:         %[[K_2:.+]] = dim %[[ARG3]], %[[C0]]
+//      CHECK:         %[[K_2:.+]] = memref.dim %[[ARG3]], %[[C0]]
 //      CHECK:         %[[TILE_K_2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[K_2]]]
 //      CHECK:         %[[TILE_N:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N_2]]]
-//      CHECK:         %[[SV7:.+]] = subview %[[ARG3]][%[[IV2]], %[[IV1]]]
+//      CHECK:         %[[SV7:.+]] = memref.subview %[[ARG3]][%[[IV2]], %[[IV1]]]
 // CHECK-SAME:           [%[[TILE_K_2]], %[[TILE_N]]]
 //      CHECK:         %[[TILE_N_2:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N]]]
-//      CHECK:         %[[SV8:.+]] = subview %[[SV2]][0, %[[IV1]]]
+//      CHECK:         %[[SV8:.+]] = memref.subview %[[SV2]][0, %[[IV1]]]
 // CHECK-SAME:           [%[[TILE_M_2]], %[[TILE_N_2]]]
 //      CHECK:         linalg.matmul
 // CHECK-SAME:           __internal_linalg_transform__ = "after_lhs_fusion"
@@ -316,9 +316,9 @@
                            %arg2: memref<?x?xf32>) {
     %c0 = constant 0 : index
     %c1 = constant 1 : index
-    %0 = dim %arg2, %c0 : memref<?x?xf32>
-    %1 = dim %arg2, %c1 : memref<?x?xf32>
-    %2 = alloc(%0, %1) : memref<?x?xf32>
+    %0 = memref.dim %arg2, %c0 : memref<?x?xf32>
+    %1 = memref.dim %arg2, %c1 : memref<?x?xf32>
+    %2 = memref.alloc(%0, %1) : memref<?x?xf32>
     linalg.matmul ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>)
       outs(%2 : memref<?x?xf32>)
     linalg.generic
@@ -340,14 +340,14 @@
 //  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: memref<?x?xf32>
 //  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: memref<?x?xf32>
 //  CHECK-SAME:   %[[ARG2:[a-zA-Z0-9_]+]]: memref<?x?xf32>
-//       CHECK:   %[[T2:.+]] = alloc(%{{.*}}, %{{.*}}) : memref<?x?xf32>
+//       CHECK:   %[[T2:.+]] = memref.alloc(%{{.*}}, %{{.*}}) : memref<?x?xf32>
 //       CHECK:   linalg.matmul
 //  CHECK-SAME:     after_transpose_fusion_original
 //       CHECK:   scf.parallel (%[[ARG3:[a-zA-Z0-9_]+]], %[[ARG4:.[a-zA-Z0-9_]+]])
-//       CHECK:     %[[T5:.+]] = subview %[[T2]][%[[ARG3]], %[[ARG4]]]
-//       CHECK:     %[[T6:.+]] = subview %[[ARG2]][%[[ARG3]], %[[ARG4]]]
-//       CHECK:     %[[T8:.+]] = subview %[[ARG0]][%[[ARG3]], 0]
-//       CHECK:     %[[T9:.+]] = subview %[[ARG1]][0, %[[ARG4]]]
+//       CHECK:     %[[T5:.+]] = memref.subview %[[T2]][%[[ARG3]], %[[ARG4]]]
+//       CHECK:     %[[T6:.+]] = memref.subview %[[ARG2]][%[[ARG3]], %[[ARG4]]]
+//       CHECK:     %[[T8:.+]] = memref.subview %[[ARG0]][%[[ARG3]], 0]
+//       CHECK:     %[[T9:.+]] = memref.subview %[[ARG1]][0, %[[ARG4]]]
 //       CHECK:     linalg.matmul
 //  CHECK-SAME:       after_transpose_fusion_producer
 //  CHECK-SAME:       ins(%[[T8]], %[[T9]]
@@ -366,9 +366,9 @@
                                      %arg2: memref<?x?xf32>) {
     %c0 = constant 0 : index
     %c1 = constant 1 : index
-    %0 = dim %arg2, %c0 : memref<?x?xf32>
-    %1 = dim %arg2, %c1 : memref<?x?xf32>
-    %2 = alloc(%0, %1) : memref<?x?xf32>
+    %0 = memref.dim %arg2, %c0 : memref<?x?xf32>
+    %1 = memref.dim %arg2, %c1 : memref<?x?xf32>
+    %2 = memref.alloc(%0, %1) : memref<?x?xf32>
     linalg.matmul ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>)
       outs(%2 : memref<?x?xf32>)
     linalg.generic
@@ -413,17 +413,17 @@
     %c16 = constant 16 : index
     %cst = constant 0.000000e+00 : f32
     linalg.fill(%arg2, %cst) : memref<?x?xf32>, f32
-    %0 = dim %arg0, %c0 : memref<?x?xf32>
-    %1 = dim %arg1, %c1 : memref<?x?xf32>
-    %2 = dim %arg0, %c1 : memref<?x?xf32>
+    %0 = memref.dim %arg0, %c0 : memref<?x?xf32>
+    %1 = memref.dim %arg1, %c1 : memref<?x?xf32>
+    %2 = memref.dim %arg0, %c1 : memref<?x?xf32>
     scf.parallel (%arg3, %arg4) = (%c0, %c0) to (%0, %1) step (%c32, %c64) {
       scf.for %arg5 = %c0 to %2 step %c16 {
         %3 = affine.min #map0(%arg3)[%0]
         %4 = affine.min #map1(%arg4)[%1]
         %5 = affine.min #map2(%arg5)[%2]
-        %6 = subview %arg0[%arg3, %arg5] [%3, %5] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #map3>
-        %7 = subview %arg1[%arg5, %arg4] [%5, %4] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #map3>
-        %8 = subview %arg2[%arg3, %arg4] [%3, %4] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #map3>
+        %6 = memref.subview %arg0[%arg3, %arg5] [%3, %5] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #map3>
+        %7 = memref.subview %arg1[%arg5, %arg4] [%5, %4] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #map3>
+        %8 = memref.subview %arg2[%arg3, %arg4] [%3, %4] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #map3>
         linalg.matmul {__internal_linalg_transform__ = "basic_fusion"}
           ins(%6, %7 : memref<?x?xf32, #map3>, memref<?x?xf32, #map3>)
           outs(%8 : memref<?x?xf32, #map3>)
diff --git a/mlir/test/Dialect/Linalg/fusion-sequence.mlir b/mlir/test/Dialect/Linalg/fusion-sequence.mlir
--- a/mlir/test/Dialect/Linalg/fusion-sequence.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-sequence.mlir
@@ -6,9 +6,9 @@
     %cst = constant 0.000000e+00 : f32
     %c0 = constant 0 : index
     %c1 = constant 1 : index
-    %d0 = dim %arg0, %c0 : memref<?x?xf32>
-    %d1 = dim %arg1, %c1 : memref<?x?xf32>
-    %0 = alloc(%d0, %d1) : memref<?x?xf32>
+    %d0 = memref.dim %arg0, %c0 : memref<?x?xf32>
+    %d1 = memref.dim %arg1, %c1 : memref<?x?xf32>
+    %0 = memref.alloc(%d0, %d1) : memref<?x?xf32>
     linalg.fill(%0, %cst) : memref<?x?xf32>, f32
     linalg.matmul ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>)
       outs(%0 : memref<?x?xf32>)
@@ -34,13 +34,13 @@
 //  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: memref<?x?xf32>
 //  CHECK-SAME:   %[[ARG2:[a-zA-Z0-9_]+]]: memref<?xf32>
 //  CHECK-SAME:   %[[ARG3:[a-zA-Z0-9_]+]]: memref<?x?xf32>
-//       CHECK:   %[[TEMP:.+]] = alloc(%{{.*}}, %{{.*}}) : memref<?x?xf32>
+//       CHECK:   %[[TEMP:.+]] = memref.alloc(%{{.*}}, %{{.*}}) : memref<?x?xf32>
 //       CHECK:   scf.parallel (%[[IV0:.+]], %[[IV1:.+]]) = {{.*}} {
-//   CHECK-DAG:     %[[SV_TEMP:.+]] = subview %[[TEMP]][%[[IV0]], %[[IV1]]]
-//   CHECK-DAG:     %[[SV_ARG2:.+]] = subview %[[ARG2]][%[[IV1]]]
-//   CHECK-DAG:     %[[SV_ARG3:.+]] = subview %[[ARG3]][%[[IV0]], %[[IV1]]]
-//   CHECK-DAG:     %[[SV_ARG0:.+]] = subview %[[ARG0]][%[[IV0]], 0]
-//   CHECK-DAG:     %[[SV_ARG1:.+]] = subview %[[ARG1]][0, %[[IV1]]]
+//   CHECK-DAG:     %[[SV_TEMP:.+]] = memref.subview %[[TEMP]][%[[IV0]], %[[IV1]]]
+//   CHECK-DAG:     %[[SV_ARG2:.+]] = memref.subview %[[ARG2]][%[[IV1]]]
+//   CHECK-DAG:     %[[SV_ARG3:.+]] = memref.subview %[[ARG3]][%[[IV0]], %[[IV1]]]
+//   CHECK-DAG:     %[[SV_ARG0:.+]] = memref.subview %[[ARG0]][%[[IV0]], 0]
+//   CHECK-DAG:     %[[SV_ARG1:.+]] = memref.subview %[[ARG1]][0, %[[IV1]]]
 //       CHECK:     linalg.fill(%[[SV_TEMP]], %{{.+}})
 //       CHECK:     linalg.matmul
 //  CHECK-SAME:       ins(%[[SV_ARG0]], %[[SV_ARG1]]
@@ -62,12 +62,12 @@
     %cst = constant 0.000000e+00 : f32
     %c0 = constant 0 : index
     %c1 = constant 1 : index
-    %m = dim %arg0, %c0 : memref<?x?xf32>
-    %n1 = dim %arg1, %c1 : memref<?x?xf32>
-    %n2 = dim %arg2, %c1 : memref<?x?xf32>
-    %n3 = dim %arg3, %c1 : memref<?x?xf32>
-    %0 = alloc(%m, %n1) : memref<?x?xf32>
-    %1 = alloc(%m, %n2) : memref<?x?xf32>
+    %m = memref.dim %arg0, %c0 : memref<?x?xf32>
+    %n1 = memref.dim %arg1, %c1 : memref<?x?xf32>
+    %n2 = memref.dim %arg2, %c1 : memref<?x?xf32>
+    %n3 = memref.dim %arg3, %c1 : memref<?x?xf32>
+    %0 = memref.alloc(%m, %n1) : memref<?x?xf32>
+    %1 = memref.alloc(%m, %n2) : memref<?x?xf32>
     linalg.fill(%0, %cst) : memref<?x?xf32>, f32
     linalg.matmul ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>)
       outs(%0 : memref<?x?xf32>)
@@ -92,30 +92,30 @@
 //   CHECK-DAG:   %[[C0:.+]] = constant 0 : index
 //   CHECK-DAG:   %[[C1:.+]] = constant 1 : index
 //   CHECK-DAG:   %[[C16:.+]] = constant 16 : index
-//   CHECK-DAG:   %[[M:.+]] = dim %[[ARG0]], %[[C0]]
-//   CHECK-DAG:   %[[N1:.+]] = dim %[[ARG1]], %[[C1]]
-//   CHECK-DAG:   %[[N2:.+]] = dim %[[ARG2]], %[[C1]]
-//       CHECK:   %[[ALLOC1:.+]] = alloc(%[[M]], %[[N1]])
-//       CHECK:   %[[ALLOC2:.+]] = alloc(%[[M]], %[[N2]])
+//   CHECK-DAG:   %[[M:.+]] = memref.dim %[[ARG0]], %[[C0]]
+//   CHECK-DAG:   %[[N1:.+]] = memref.dim %[[ARG1]], %[[C1]]
+//   CHECK-DAG:   %[[N2:.+]] = memref.dim %[[ARG2]], %[[C1]]
+//       CHECK:   %[[ALLOC1:.+]] = memref.alloc(%[[M]], %[[N1]])
+//       CHECK:   %[[ALLOC2:.+]] = memref.alloc(%[[M]], %[[N2]])
 //       CHECK:   scf.parallel (%[[IV0:.+]]) = (%[[C0]]) to (%[[M]])
 //  CHECK-SAME:     step (%[[C16]]) {
 //       CHECK:     %[[TILE_M:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M]]]
-//       CHECK:     %[[SV_ALLOC2:.+]] = subview %[[ALLOC2]][%[[IV0]], 0]
+//       CHECK:     %[[SV_ALLOC2:.+]] = memref.subview %[[ALLOC2]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M]], %[[N2]]]
-//       CHECK:     %[[M_2:.+]] = dim %[[ARG4]], %[[C0]]
+//       CHECK:     %[[M_2:.+]] = memref.dim %[[ARG4]], %[[C0]]
 //       CHECK:     %[[TILE_M_2:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M_2]]]
-//       CHECK:     %[[N3:.+]] = dim %[[ARG4]], %[[C1]]
-//       CHECK:     %[[SV_ARG4:.+]] = subview %[[ARG4]][%[[IV0]], 0]
+//       CHECK:     %[[N3:.+]] = memref.dim %[[ARG4]], %[[C1]]
+//       CHECK:     %[[SV_ARG4:.+]] = memref.subview %[[ARG4]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M_2]], %[[N3]]]
-//       CHECK:     %[[SV_ARG4_2:.+]] = subview %[[ARG4]][%[[IV0]], 0]
+//       CHECK:     %[[SV_ARG4_2:.+]] = memref.subview %[[ARG4]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M]], %[[N3]]]
-//       CHECK:     %[[SV_ALLOC1:.+]] = subview %[[ALLOC1]][%[[IV0]], 0]
+//       CHECK:     %[[SV_ALLOC1:.+]] = memref.subview %[[ALLOC1]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M]], %[[N1]]]
-//       CHECK:     %[[SV_ARG2:.+]] = subview %[[ARG2]][0, 0] [%[[N1]], %[[N2]]]
-//       CHECK:     %[[N0:.+]] = dim %[[ARG0]], %[[C1]]
-//       CHECK:     %[[SV_ARG0:.+]] = subview %[[ARG0]][%[[IV0]], 0]
+//       CHECK:     %[[SV_ARG2:.+]] = memref.subview %[[ARG2]][0, 0] [%[[N1]], %[[N2]]]
+//       CHECK:     %[[N0:.+]] = memref.dim %[[ARG0]], %[[C1]]
+//       CHECK:     %[[SV_ARG0:.+]] = memref.subview %[[ARG0]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M:.+]], %[[N0]]]
-//       CHECK:     %[[SV_ARG1:.+]] = subview %[[ARG1]][0, 0] [%[[N0]], %[[N1]]]
+//       CHECK:     %[[SV_ARG1:.+]] = memref.subview %[[ARG1]][0, 0] [%[[N0]], %[[N1]]]
 //       CHECK:     linalg.fill(%[[SV_ALLOC1]], %{{.+}})
 //       CHECK:     linalg.matmul ins(%[[SV_ARG0]], %[[SV_ARG1]]
 //  CHECK-SAME:        : memref<?x?xf32, #[[MAP1]]>, memref<?x?xf32, #[[MAP1]]>)
@@ -141,8 +141,8 @@
     %c1 = constant 1 : index
     %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
       outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
-    %1 = dim %0, %c0 : tensor<?x?xf32>
-    %2 = dim %0, %c1 : tensor<?x?xf32>
+    %1 = memref.dim %0, %c0 : tensor<?x?xf32>
+    %2 = memref.dim %0, %c1 : tensor<?x?xf32>
     %3 = linalg.init_tensor [%1, %2] : tensor<?x?xf32>
     %4 = linalg.generic
       {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
@@ -212,16 +212,16 @@
 //   CHECK-DAG:   %[[C1:.+]] = constant 1 : index
 //       CHECK:   %[[R0:.+]] = scf.for %[[IV0:[a-zA-Z0-9_]+]] =
 //  CHECK-SAME:     iter_args(%[[ARG8:.+]] = %[[ARG6]]) -> (tensor<?x?xf32>) {
-//       CHECK:       %[[N3:.+]] = dim %[[ARG8]], %[[C1]]
+//       CHECK:       %[[N3:.+]] = memref.dim %[[ARG8]], %[[C1]]
 //       CHECK:       %[[STARG6:.+]] = subtensor %[[ARG8]][%[[IV0]], 0]
 //  CHECK-SAME:         [%{{[a-zA-Z0-9_]+}}, %[[N3]]]
-//       CHECK:       %[[N2:.+]] = dim %[[ARG3]], %[[C1]]
-//       CHECK:       %[[N1:.+]] = dim %[[ARG1]], %[[C1]]
+//       CHECK:       %[[N2:.+]] = memref.dim %[[ARG3]], %[[C1]]
+//       CHECK:       %[[N1:.+]] = memref.dim %[[ARG1]], %[[C1]]
 //       CHECK:       %[[STARG3:.+]] = subtensor %[[ARG3]][0, 0]
 //  CHECK-SAME:         [%[[N1]], %[[N2]]]
 //       CHECK:       %[[STARG4:.+]] = subtensor %[[ARG4]][%[[IV0]], 0]
 //  CHECK-SAME:         [%{{[a-zA-Z0-9_]+}}, %[[N2]]]
-//       CHECK:       %[[N0:.+]] = dim %[[ARG0]], %[[C1]]
+//       CHECK:       %[[N0:.+]] = memref.dim %[[ARG0]], %[[C1]]
 //       CHECK:       %[[STARG0:.+]] = subtensor %[[ARG0]][%[[IV0]], 0]
 //  CHECK-SAME:         [%{{[a-zA-Z0-9_]+}}, %[[N0]]]
 //       CHECK:       %[[STARG1:.+]] = subtensor %[[ARG1]][0, 0]
diff --git a/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir b/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir
--- a/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir
@@ -28,18 +28,18 @@
 //  CHECK-DAG:   %[[C32:.+]] = constant 32 : index
 //  CHECK-DAG:   %[[C64:.+]] = constant 64 : index
 //  CHECK-DAG:   %[[C16:.+]] = constant 16 : index
-//  CHECK-DAG:   %[[M:.+]] = dim %[[ARG0]], %[[C0]]
+//  CHECK-DAG:   %[[M:.+]] = memref.dim %[[ARG0]], %[[C0]]
 //      CHECK:   %[[RESULT:.+]] = scf.for %[[IV0:[a-zA-Z0-9]+]] =
 // CHECK-SAME:     %[[C0]] to %[[M]] step %[[C32]]
 // CHECK-SAME:     iter_args(%[[ARG6:.+]] = %[[ARG4]]) -> (tensor<?x?xf32>) {
 //      CHECK:     %[[TILE_M:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M]]]
-//      CHECK:     %[[M_2:.+]] = dim %[[ARG6]], %[[C0]]
+//      CHECK:     %[[M_2:.+]] = memref.dim %[[ARG6]], %[[C0]]
 //      CHECK:     %[[TILE_M_2:.+]] = affine.min #[[MAP1]](%[[M_2]], %[[IV0]])
-//      CHECK:     %[[N3:.+]] = dim %[[ARG6]], %[[C1]]
+//      CHECK:     %[[N3:.+]] = memref.dim %[[ARG6]], %[[C1]]
 //      CHECK:     %[[ST_ARG6:.+]] = subtensor %[[ARG6]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M_2]], %[[N3]]]
-//      CHECK:     %[[N2:.+]] = dim %[[ARG1]], %[[C1]]
-//      CHECK:     %[[N1:.+]] = dim %[[ARG0]], %[[C1]]
+//      CHECK:     %[[N2:.+]] = memref.dim %[[ARG1]], %[[C1]]
+//      CHECK:     %[[N1:.+]] = memref.dim %[[ARG0]], %[[C1]]
 //      CHECK:     %[[ST_ARG0:.+]] = subtensor %[[ARG0]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M]], %[[N1]]]
 //      CHECK:     %[[ST_ARG1:.+]] = subtensor %[[ARG1]][0, 0]
@@ -50,7 +50,7 @@
 // CHECK-SAME:       __internal_linalg_transform__ = "after_lhs_fusion_producer"
 // CHECK-SAME:       ins(%[[ST_ARG0]], %[[ST_ARG1]] : tensor<?x?xf32>, tensor<?x?xf32>)
 // CHECK-SAME:       outs(%[[ST_ARG2]] : tensor<?x?xf32>)
-//      CHECK:     %[[N3_2:.+]] = dim %[[ARG3]], %[[C1]]
+//      CHECK:     %[[N3_2:.+]] = memref.dim %[[ARG3]], %[[C1]]
 //      CHECK:     %[[YIELD0:.+]] = scf.for %[[IV1:[a-zA-Z0-9]+]] =
 // CHECK-SAME:       %[[C0]] to %[[N3_2]] step %[[C64]]
 // CHECK-SAME:       iter_args(%[[ARG8:.+]] = %[[ST_ARG6]]) -> (tensor<?x?xf32>) {
@@ -60,13 +60,13 @@
 //      CHECK:         %[[TILE_N2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[N2]]]
 //      CHECK:         %[[ST_LHS:.+]] = subtensor %[[LHS]][0, %[[IV2]]]
 // CHECK-SAME:           [%[[TILE_M]], %[[TILE_N2]]]
-//      CHECK:         %[[N2_3:.+]] = dim %[[ARG3]], %[[C0]]
+//      CHECK:         %[[N2_3:.+]] = memref.dim %[[ARG3]], %[[C0]]
 //      CHECK:         %[[TILE_N2_2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[N2_3]]]
 //      CHECK:         %[[TILE_N3:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N3_2]]]
 //      CHECK:         %[[ST_ARG3:.+]] = subtensor %[[ARG3]][%[[IV2]], %[[IV1]]]
 // CHECK-SAME:           [%[[TILE_N2_2]], %[[TILE_N3]]]
-//      CHECK:         %[[M_4:.+]] = dim %[[ARG10]], %[[C0]]
-//      CHECK:         %[[N3_3:.+]] = dim %[[ARG10]], %[[C1]]
+//      CHECK:         %[[M_4:.+]] = memref.dim %[[ARG10]], %[[C0]]
+//      CHECK:         %[[N3_3:.+]] = memref.dim %[[ARG10]], %[[C1]]
 //      CHECK:         %[[TILE_N3_2:.+]] = affine.min #[[MAP4]](%[[N3_3]], %[[IV1]])
 //      CHECK:         %[[ST_ARG4:.+]] = subtensor %[[ARG10]][0, %[[IV1]]]
 // CHECK-SAME:           [%[[M_4]], %[[TILE_N3_2]]]
@@ -94,12 +94,12 @@
                            %arg2: tensor<?x?xf32>) -> tensor<?x?xf32>{
     %c0 = constant 0 : index
     %c1 = constant 1 : index
-    %0 = dim %arg2, %c0 : tensor<?x?xf32>
-    %1 = dim %arg2, %c1 : tensor<?x?xf32>
+    %0 = memref.dim %arg2, %c0 : tensor<?x?xf32>
+    %1 = memref.dim %arg2, %c1 : tensor<?x?xf32>
     %2 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
       outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
-    %3 = dim %2, %c0 : tensor<?x?xf32>
-    %4 = dim %2, %c1 : tensor<?x?xf32>
+    %3 = memref.dim %2, %c0 : tensor<?x?xf32>
+    %4 = memref.dim %2, %c1 : tensor<?x?xf32>
     %5 = linalg.init_tensor [%3, %4] : tensor<?x?xf32>
     %6 = linalg.generic
       {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
diff --git a/mlir/test/Dialect/Linalg/fusion-tensor.mlir b/mlir/test/Dialect/Linalg/fusion-tensor.mlir
--- a/mlir/test/Dialect/Linalg/fusion-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-tensor.mlir
@@ -8,8 +8,8 @@
 {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %0 = dim %arg0, %c0 : tensor<?x?xf32>
-  %1 = dim %arg0, %c1 : tensor<?x?xf32>
+  %0 = memref.dim %arg0, %c0 : tensor<?x?xf32>
+  %1 = memref.dim %arg0, %c1 : tensor<?x?xf32>
   %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
@@ -50,8 +50,8 @@
 {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %0 = dim %arg0, %c0 : tensor<?x?xf32>
-  %1 = dim %arg0, %c1 : tensor<?x?xf32>
+  %0 = memref.dim %arg0, %c0 : tensor<?x?xf32>
+  %1 = memref.dim %arg0, %c1 : tensor<?x?xf32>
   %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
@@ -84,8 +84,8 @@
 {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %0 = dim %arg0, %c0 : tensor<?x?xf32>
-  %1 = dim %arg0, %c1 : tensor<?x?xf32>
+  %0 = memref.dim %arg0, %c0 : tensor<?x?xf32>
+  %1 = memref.dim %arg0, %c1 : tensor<?x?xf32>
   %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
@@ -119,7 +119,7 @@
 {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %0 = dim %arg0, %c0 : tensor<?xf32>
+  %0 = memref.dim %arg0, %c0 : tensor<?xf32>
   %1 = linalg.init_tensor [%0] : tensor<?xf32>
   %2 = linalg.generic {indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel"]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
@@ -130,7 +130,7 @@
   } -> tensor<?xf32>
   // CHECK: linalg.generic {
   // CHECK-SAME: indexing_maps = {{\[}}[[$MAP1]], [[$MAP1]], [[$MAP0]], [[$MAP0]]
-  %3 = dim %arg2, %c1 : tensor<?x?xf32>
+  %3 = memref.dim %arg2, %c1 : tensor<?x?xf32>
   %4 = linalg.init_tensor [%0, %3] : tensor<?x?xf32>
   %5 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%2, %arg2 : tensor<?xf32>, tensor<?x?xf32>)
@@ -182,8 +182,8 @@
   %c1 = constant 1 : index
   %c2 = constant 2 : index
   %cst = constant dense<42.0> : tensor<5xf32>
-  %0 = dim %arg0, %c1 : tensor<5x?x?xf32>
-  %1 = dim %arg0, %c2 : tensor<5x?x?xf32>
+  %0 = memref.dim %arg0, %c1 : tensor<5x?x?xf32>
+  %1 = memref.dim %arg0, %c2 : tensor<5x?x?xf32>
   %2 = linalg.init_tensor [5, %0, %1] : tensor<5x?x?xf32>
   %3 = linalg.generic {
     indexing_maps = [#map0, #map1, #map1],
@@ -214,8 +214,8 @@
   %c1 = constant 1 : index
   %c2 = constant 2 : index
   %cst = constant dense<42.0> : tensor<5xf32>
-  %0 = dim %arg0, %c1 : tensor<5x?x?xf32>
-  %1 = dim %arg0, %c2 : tensor<5x?x?xf32>
+  %0 = memref.dim %arg0, %c1 : tensor<5x?x?xf32>
+  %1 = memref.dim %arg0, %c2 : tensor<5x?x?xf32>
   %2 = linalg.init_tensor [5, %0, %1] : tensor<5x?x?xf32>
   %3 = linalg.indexed_generic {
     indexing_maps = [#map0, #map1, #map1],
@@ -250,8 +250,8 @@
   %c1 = constant 1 : index
   %c2 = constant 2 : index
   %cst = constant dense<42.0> : tensor<f32>
-  %0 = dim %arg0, %c1 : tensor<5x?x?xf32>
-  %1 = dim %arg0, %c2 : tensor<5x?x?xf32>
+  %0 = memref.dim %arg0, %c1 : tensor<5x?x?xf32>
+  %1 = memref.dim %arg0, %c2 : tensor<5x?x?xf32>
   %2 = linalg.init_tensor [5, %0, %1] : tensor<5x?x?xf32>
   %3 = linalg.generic {
     indexing_maps = [#map0, #map1, #map1],
@@ -282,8 +282,8 @@
   %c1 = constant 1 : index
   %c2 = constant 2 : index
   %cst = constant dense<42.0> : tensor<f32>
-  %0 = dim %arg0, %c1 : tensor<5x?x?xf32>
-  %1 = dim %arg0, %c2 : tensor<5x?x?xf32>
+  %0 = memref.dim %arg0, %c1 : tensor<5x?x?xf32>
+  %1 = memref.dim %arg0, %c2 : tensor<5x?x?xf32>
   %2 = linalg.init_tensor [5, %0, %1] : tensor<5x?x?xf32>
   %3 = linalg.indexed_generic {
     indexing_maps = [#map0, #map1, #map1],
@@ -314,8 +314,8 @@
                                            %arg1: tensor<?x?xi32>) -> tensor<?x?xi32> {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %0 = dim %arg0, %c0 : tensor<?x?xi32>
-  %1 = dim %arg0, %c1 : tensor<?x?xi32>
+  %0 = memref.dim %arg0, %c0 : tensor<?x?xi32>
+  %1 = memref.dim %arg0, %c1 : tensor<?x?xi32>
   %2 = linalg.init_tensor [%0, %1] : tensor<?x?xi32>
   %3 = linalg.generic {
     indexing_maps = [#map0, #map0, #map0],
@@ -364,8 +364,8 @@
                                            %arg1: tensor<?x?xi32>) -> tensor<?x?xi32> {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %0 = dim %arg0, %c0 : tensor<?x?xi32>
-  %1 = dim %arg0, %c1 : tensor<?x?xi32>
+  %0 = memref.dim %arg0, %c0 : tensor<?x?xi32>
+  %1 = memref.dim %arg0, %c1 : tensor<?x?xi32>
   %2 = linalg.init_tensor [%0, %1] : tensor<?x?xi32>
   %3 = linalg.indexed_generic {
     indexing_maps = [#map0, #map0],
@@ -415,8 +415,8 @@
 func @indexed_generic_op_fusion(%arg0: tensor<?x?xi32>) -> tensor<?x?xi32> {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %0 = dim %arg0, %c0 : tensor<?x?xi32>
-  %1 = dim %arg0, %c1 : tensor<?x?xi32>
+  %0 = memref.dim %arg0, %c0 : tensor<?x?xi32>
+  %1 = memref.dim %arg0, %c1 : tensor<?x?xi32>
   %2 = linalg.init_tensor [%0, %1] : tensor<?x?xi32>
   %3 = linalg.indexed_generic {
     indexing_maps = [#map0, #map0],
@@ -468,7 +468,7 @@
 func @scalar_indexed_generic_fusion
   (%arg0: tensor<5x1x1xf32>, %arg1 : tensor<i32>) -> tensor<10xf32>
 {
-  %c0 = constant 0 : index  
+  %c0 = constant 0 : index
   %cst = constant dense<1.000000e+00> : tensor<10xf32>
   %0 = linalg.init_tensor [] : tensor<f32>
   %1 = linalg.indexed_generic
diff --git a/mlir/test/Dialect/Linalg/fusion.mlir b/mlir/test/Dialect/Linalg/fusion.mlir
--- a/mlir/test/Dialect/Linalg/fusion.mlir
+++ b/mlir/test/Dialect/Linalg/fusion.mlir
@@ -11,22 +11,22 @@
   %c3 = constant 3 : index
   %c2 = constant 2 : index
   %c1 = constant 1 : index
-  %0 = dim %A, %c0 : memref<?x?xf32, offset: 0, strides: [?, 1]>
-  %1 = dim %A, %c1 : memref<?x?xf32, offset: 0, strides: [?, 1]>
-  %2 = dim %B, %c1 : memref<?x?xf32, offset: 0, strides: [?, 1]>
+  %0 = memref.dim %A, %c0 : memref<?x?xf32, offset: 0, strides: [?, 1]>
+  %1 = memref.dim %A, %c1 : memref<?x?xf32, offset: 0, strides: [?, 1]>
+  %2 = memref.dim %B, %c1 : memref<?x?xf32, offset: 0, strides: [?, 1]>
   linalg.matmul ins(%A, %B : memref<?x?xf32, offset: 0, strides: [?, 1]>,
                              memref<?x?xf32, offset: 0, strides: [?, 1]>)
                outs(%C : memref<?x?xf32, offset: 0, strides: [?, 1]>)
   scf.for %arg5 = %c0 to %0 step %c2 {
     scf.for %arg6 = %c0 to %2 step %c3 {
       scf.for %arg7 = %c0 to %1 step %c4 {
-        %5 = std.subview %A[%arg5, %arg7][%c2, %c4][%c1, %c1] :
+        %5 = memref.subview %A[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, 1]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %7 = std.subview %B[%arg7, %arg6][%c4, %c3][%c1, %c1] :
+        %7 = memref.subview %B[%arg7, %arg6][%c4, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, 1]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %8 = std.subview %C[%arg5, %arg6][%c2, %c3][%c1, %c1] :
+        %8 = memref.subview %C[%arg5, %arg6][%c2, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, 1]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
         linalg.matmul ins(%5, %7 : memref<?x?xf32, offset: ?, strides: [?, ?]>,
@@ -62,19 +62,19 @@
   linalg.matmul ins(%A, %B : memref<?x?xf32, offset: 0, strides: [?, ?]>,
                              memref<?x?xf32, offset: 0, strides: [?, ?]>)
                outs(%C: memref<?x?xf32, offset: 0, strides: [?, ?]>)
-  %0 = dim %C, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %1 = dim %C, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %2 = dim %D, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %0 = memref.dim %C, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %1 = memref.dim %C, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %2 = memref.dim %D, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   scf.for %arg5 = %c0 to %0 step %c2 {
     scf.for %arg6 = %c0 to %2 step %c3 {
       scf.for %arg7 = %c0 to %1 step %c4 {
-        %5 = std.subview %C[%arg5, %arg7][%c2, %c4][%c1, %c1] :
+        %5 = memref.subview %C[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %7 = std.subview %D[%arg7, %arg6][%c4, %c3][%c1, %c1] :
+        %7 = memref.subview %D[%arg7, %arg6][%c4, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %8 = std.subview %E[%arg5, %arg6][%c2, %c3][%c1, %c1] :
+        %8 = memref.subview %E[%arg5, %arg6][%c2, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
         linalg.matmul ins(%5, %7 : memref<?x?xf32, offset: ?, strides: [?, ?]>,
@@ -87,9 +87,9 @@
 }
 // CHECK-LABEL: func @f2
 // CHECK:  (%[[A:.*]]:{{.*}}, %[[B:.*]]:{{.*}}, %[[C:.*]]:{{.*}}, %[[D:.*]]:{{.*}}, %[[E:.*]]:{{.*}})
-// CHECK-DAG:  %[[C_0:.*]] = dim %[[C]], %c0{{[_0-9]*}} : memref<?x?xf32, #[[$strided2D]]>
-// CHECK-DAG:  %[[C_1:.*]] = dim %[[C]], %c1{{[_0-9]*}} : memref<?x?xf32, #[[$strided2D]]>
-// CHECK-DAG:  %[[D_1:.*]] = dim %[[D]], %c1{{[_0-9]*}} : memref<?x?xf32, #[[$strided2D]]>
+// CHECK-DAG:  %[[C_0:.*]] = memref.dim %[[C]], %c0{{[_0-9]*}} : memref<?x?xf32, #[[$strided2D]]>
+// CHECK-DAG:  %[[C_1:.*]] = memref.dim %[[C]], %c1{{[_0-9]*}} : memref<?x?xf32, #[[$strided2D]]>
+// CHECK-DAG:  %[[D_1:.*]] = memref.dim %[[D]], %c1{{[_0-9]*}} : memref<?x?xf32, #[[$strided2D]]>
 // CHECK:  scf.for %{{.*}} = %{{.*}} to %[[C_0]] step %{{.*}} {
 // CHECK:    scf.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
 // CHECK:      scf.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
@@ -114,19 +114,19 @@
   linalg.matmul ins(%A, %B : memref<?x?xf32, offset: 0, strides: [?, ?]>,
                              memref<?x?xf32, offset: 0, strides: [?, ?]>)
                outs(%C : memref<?x?xf32, offset: 0, strides: [?, ?]>)
-  %0 = dim %D, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %1 = dim %D, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %2 = dim %C, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %0 = memref.dim %D, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %1 = memref.dim %D, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %2 = memref.dim %C, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   scf.for %arg5 = %c0 to %0 step %c2 {
     scf.for %arg6 = %c0 to %2 step %c3 {
       scf.for %arg7 = %c0 to %1 step %c4 {
-        %5 = std.subview %D[%arg5, %arg7][%c2, %c4][%c1, %c1] :
+        %5 = memref.subview %D[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %7 = std.subview %C[%arg7, %arg6][%c4, %c3][%c1, %c1] :
+        %7 = memref.subview %C[%arg7, %arg6][%c4, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %8 = std.subview %E[%arg5, %arg6][%c2, %c3][%c1, %c1] :
+        %8 = memref.subview %E[%arg5, %arg6][%c2, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
         linalg.matmul ins(%5, %7 : memref<?x?xf32, offset: ?, strides: [?, ?]>,
@@ -141,9 +141,9 @@
 // CHECK:  (%[[A:.*]]:{{.*}}, %[[B:.*]]:{{.*}}, %[[C:.*]]:{{.*}}, %[[D:.*]]:{{.*}}, %[[E:.*]]:{{.*}})
 // CHECK-DAG:  %[[C0:.*]] = constant 0 : index
 // CHECK-DAG:  %[[C1:.*]] = constant 1 : index
-// CHECK:  %[[D_0:.*]] = dim %[[D]], %[[C0]] : memref<?x?xf32, #[[$strided2D]]>
-// CHECK:  %[[D_1:.*]] = dim %[[D]], %[[C1]] : memref<?x?xf32, #[[$strided2D]]>
-// CHECK:  %[[C_1:.*]] = dim %[[C]], %[[C1]] : memref<?x?xf32, #[[$strided2D]]>
+// CHECK:  %[[D_0:.*]] = memref.dim %[[D]], %[[C0]] : memref<?x?xf32, #[[$strided2D]]>
+// CHECK:  %[[D_1:.*]] = memref.dim %[[D]], %[[C1]] : memref<?x?xf32, #[[$strided2D]]>
+// CHECK:  %[[C_1:.*]] = memref.dim %[[C]], %[[C1]] : memref<?x?xf32, #[[$strided2D]]>
 // CHECK:  scf.for %{{.*}} = %{{.*}} to %[[D_0]] step %{{.*}} {
 // CHECK:    scf.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
 // CHECK:      scf.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
@@ -171,19 +171,19 @@
   linalg.matmul ins(%A, %B : memref<?x?xf32, offset: 0, strides: [?, ?]>,
                              memref<?x?xf32, offset: 0, strides: [?, ?]>)
                outs(%D : memref<?x?xf32, offset: 0, strides: [?, ?]>)
-  %0 = dim %C, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %1 = dim %C, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %2 = dim %D, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %0 = memref.dim %C, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %1 = memref.dim %C, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %2 = memref.dim %D, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   scf.for %arg5 = %c0 to %0 step %c2 {
     scf.for %arg6 = %c0 to %2 step %c3 {
       scf.for %arg7 = %c0 to %1 step %c4 {
-        %5 = std.subview %C[%arg5, %arg7][%c2, %c4][%c1, %c1] :
+        %5 = memref.subview %C[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %7 = std.subview %D[%arg7, %arg6][%c4, %c3][%c1, %c1] :
+        %7 = memref.subview %D[%arg7, %arg6][%c4, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %8 = std.subview %E[%arg5, %arg6][%c2, %c3][%c1, %c1] :
+        %8 = memref.subview %E[%arg5, %arg6][%c2, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
         linalg.matmul ins(%5, %7 : memref<?x?xf32, offset: ?, strides: [?, ?]>,
@@ -198,9 +198,9 @@
 // CHECK:  (%[[A:.*]]:{{.*}}, %[[B:.*]]:{{.*}}, %[[C:.*]]:{{.*}}, %[[D:.*]]:{{.*}}, %[[E:.*]]:{{.*}})
 // CHECK-DAG:  %[[C0:.*]] = constant 0 : index
 // CHECK-DAG:  %[[C1:.*]] = constant 1 : index
-// CHECK:  %[[C_0:.*]] = dim %[[C]], %[[C0:.*]] : memref<?x?xf32, #[[$strided2D]]>
-// CHECK:  %[[C_1:.*]] = dim %[[C]], %[[C1:.*]] : memref<?x?xf32, #[[$strided2D]]>
-// CHECK:  %[[D_1:.*]] = dim %[[D]], %[[C1:.*]] : memref<?x?xf32, #[[$strided2D]]>
+// CHECK:  %[[C_0:.*]] = memref.dim %[[C]], %[[C0:.*]] : memref<?x?xf32, #[[$strided2D]]>
+// CHECK:  %[[C_1:.*]] = memref.dim %[[C]], %[[C1:.*]] : memref<?x?xf32, #[[$strided2D]]>
+// CHECK:  %[[D_1:.*]] = memref.dim %[[D]], %[[C1:.*]] : memref<?x?xf32, #[[$strided2D]]>
 // CHECK:  scf.for %{{.*}} = %{{.*}} to %[[C_0]] step %{{.*}} {
 // CHECK:    scf.for %{{.*}} = %{{.*}} to %[[D_1]] step %{{.*}} {
 // CHECK:      scf.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
@@ -223,9 +223,9 @@
   %c4 = constant 4 : index
   %c3 = constant 3 : index
   %c2 = constant 2 : index
-  %0 = dim %B, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %1 = dim %D, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %2 = dim %D, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %0 = memref.dim %B, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %1 = memref.dim %D, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %2 = memref.dim %D, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   linalg.matmul ins(%A, %B : memref<?x?xf32, offset: 0, strides: [?, ?]>,
                              memref<?x?xf32, offset: 0, strides: [?, ?]>)
                outs(%C : memref<?x?xf32, offset: 0, strides: [?, ?]>)
@@ -235,13 +235,13 @@
   scf.for %arg5 = %c0 to %1 step %c2 {
     scf.for %arg6 = %c0 to %0 step %c3 {
       scf.for %arg7 = %c0 to %2 step %c4 {
-        %5 = std.subview %D[%arg5, %arg7][%c2, %c4][%c1, %c1] :
+        %5 = memref.subview %D[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %7 = std.subview %B[%arg7, %arg6][%c4, %c3][%c1, %c1] :
+        %7 = memref.subview %B[%arg7, %arg6][%c4, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %8 = std.subview %E[%arg5, %arg6][%c2, %c3][%c1, %c1] :
+        %8 = memref.subview %E[%arg5, %arg6][%c2, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
         linalg.matmul ins(%5, %7 : memref<?x?xf32, offset: ?, strides: [?, ?]>,
@@ -256,19 +256,19 @@
 // CHECK:  (%[[A:.*]]:{{.*}}, %[[B:.*]]:{{.*}}, %[[C:.*]]:{{.*}}, %[[D:.*]]:{{.*}}, %[[E:.*]]:{{.*}})
 // CHECK-DAG:  %[[C0:.*]] = constant 0 : index
 // CHECK-DAG:  %[[C1:.*]] = constant 1 : index
-// CHECK-DAG:  %[[B_1:.*]] = dim %[[B]], %[[C1:.*]] : memref<?x?xf32, #[[$strided2D]]>
-// CHECK-DAG:  %[[D_0:.*]] = dim %[[D]], %[[C0:.*]] : memref<?x?xf32, #[[$strided2D]]>
-// CHECK-DAG:  %[[D_1:.*]] = dim %[[D]], %[[C1:.*]] : memref<?x?xf32, #[[$strided2D]]>
-// CHECK-DAG:  %[[B_00:.*]] = subview %[[B]][0, 0]{{.*}}
+// CHECK-DAG:  %[[B_1:.*]] = memref.dim %[[B]], %[[C1:.*]] : memref<?x?xf32, #[[$strided2D]]>
+// CHECK-DAG:  %[[D_0:.*]] = memref.dim %[[D]], %[[C0:.*]] : memref<?x?xf32, #[[$strided2D]]>
+// CHECK-DAG:  %[[D_1:.*]] = memref.dim %[[D]], %[[C1:.*]] : memref<?x?xf32, #[[$strided2D]]>
+// CHECK-DAG:  %[[B_00:.*]] = memref.subview %[[B]][0, 0]{{.*}}
 //     CHECK:  scf.for %[[I:.*]] = %{{.*}} to %[[D_0]] step %{{.*}} {
-// CHECK-DAG:    %[[A_I0:.*]] = subview %[[A]][%[[I]], 0]
-// CHECK-DAG:    %[[C_I0:.*]] = subview %[[C]][%[[I]], 0]
+// CHECK-DAG:    %[[A_I0:.*]] = memref.subview %[[A]][%[[I]], 0]
+// CHECK-DAG:    %[[C_I0:.*]] = memref.subview %[[C]][%[[I]], 0]
 //     CHECK:    scf.for %[[J:.*]] = %{{.*}} to %[[B_1]] step %{{.*}} {
-//     CHECK:      %[[E_IJ:.*]] = subview %[[E]][%[[I]], %[[J]]]
+//     CHECK:      %[[E_IJ:.*]] = memref.subview %[[E]][%[[I]], %[[J]]]
 //     CHECK:      scf.for %[[K:.*]] = %{{.*}} to %[[D_1]] step %{{.*}} {
-// CHECK-DAG:        %[[D_IK:.*]] = subview %[[D]][%[[I]], %[[K]]]
-// CHECK-DAG:        %[[B_0K:.*]] = subview %[[B]][0, %[[K]]]
-// CHECK-DAG:        %[[B_KJ:.*]] = subview %[[B]][%[[K]], %[[J]]]
+// CHECK-DAG:        %[[D_IK:.*]] = memref.subview %[[D]][%[[I]], %[[K]]]
+// CHECK-DAG:        %[[B_0K:.*]] = memref.subview %[[B]][0, %[[K]]]
+// CHECK-DAG:        %[[B_KJ:.*]] = memref.subview %[[B]][%[[K]], %[[J]]]
 //     CHECK:        linalg.matmul ins(%[[A_I0]], %[[B_00]]{{.*}} outs(%[[C_I0]]
 //     CHECK:        linalg.matmul ins(%[[C_I0]], %[[B_0K]]{{.*}} outs(%[[D_IK]]
 //     CHECK:        linalg.matmul ins(%[[D_IK]], %[[B_KJ]]{{.*}} outs(%[[E_IJ]]
@@ -290,28 +290,28 @@
   %c4 = constant 4 : index
   %c3 = constant 3 : index
   %c2 = constant 2 : index
-  %0 = dim %C, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %0 = memref.dim %C, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   linalg.matmul ins(%A, %B : memref<?x?xf32, offset: 0, strides: [?, ?]>,
                              memref<?x?xf32, offset: 0, strides: [?, ?]>)
                outs(%C : memref<?x?xf32, offset: 0, strides: [?, ?]>)
   linalg.matmul ins(%A, %C : memref<?x?xf32, offset: 0, strides: [?, ?]>,
                              memref<?x?xf32, offset: 0, strides: [?, ?]>)
                outs(%E : memref<?x?xf32, offset: 0, strides: [?, ?]>)
-  %1 = dim %C, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %2 = dim %D, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %1 = memref.dim %C, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %2 = memref.dim %D, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   scf.for %arg5 = %c0 to %1 step %c2 {
     scf.for %arg6 = %c0 to %2 step %c3 {
       scf.for %arg7 = %c0 to %0 step %c4 {
         %3 = affine.apply #map0(%arg5)
         %4 = affine.apply #map1(%arg7)
-        %5 = std.subview %C[%arg5, %arg7][%c2, %c4][%c1, %c1] :
+        %5 = memref.subview %C[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
         %6 = affine.apply #map2(%arg6)
-        %7 = std.subview %D[%arg7, %arg6][%c4, %c3][%c1, %c1] :
+        %7 = memref.subview %D[%arg7, %arg6][%c4, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %8 = std.subview %E[%arg5, %arg6][%c2, %c3][%c1, %c1] :
+        %8 = memref.subview %E[%arg5, %arg6][%c2, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
         linalg.matmul ins(%5, %7 : memref<?x?xf32, offset: ?, strides: [?, ?]>,
@@ -345,11 +345,11 @@
   %c4 = constant 4 : index
   %c3 = constant 3 : index
   %c2 = constant 2 : index
-  %0 = dim %A, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %1 = dim %A, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %2 = dim %C, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %3 = dim %C, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %4 = dim %D, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %0 = memref.dim %A, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %1 = memref.dim %A, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %2 = memref.dim %C, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %3 = memref.dim %C, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %4 = memref.dim %D, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   linalg.matmul ins(%A, %C : memref<?x?xf32, offset: 0, strides: [?, ?]>,
                              memref<?x?xf32, offset: 0, strides: [?, ?]>)
                outs(%E : memref<?x?xf32, offset: 0, strides: [?, ?]>)
@@ -359,13 +359,13 @@
   scf.for %arg5 = %c0 to %0 step %c2 {
     scf.for %arg6 = %c0 to %2 step %c3 {
       scf.for %arg7 = %c0 to %1 step %c4 {
-        %7 = std.subview %A[%arg5, %arg7][%c2, %c4][%c1, %c1] :
+        %7 = memref.subview %A[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %9 = std.subview %C[%arg7, %arg6][%c4, %c3][%c1, %c1] :
+        %9 = memref.subview %C[%arg7, %arg6][%c4, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %10 = std.subview %E[%arg5, %arg6][%c2, %c3][%c1, %c1] :
+        %10 = memref.subview %E[%arg5, %arg6][%c2, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
         linalg.matmul ins(%7, %9 : memref<?x?xf32, offset: ?, strides: [?, ?]>,
@@ -377,13 +377,13 @@
   scf.for %arg5 = %c0 to %3 step %c2 {
     scf.for %arg6 = %c0 to %4 step %c3 {
       scf.for %arg7 = %c0 to %2 step %c4 {
-        %7 = std.subview %C[%arg5, %arg7][%c2, %c4][%c1, %c1] :
+        %7 = memref.subview %C[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %9 = std.subview %D[%arg7, %arg6][%c4, %c3][%c1, %c1] :
+        %9 = memref.subview %D[%arg7, %arg6][%c4, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %10 = std.subview %E[%arg5, %arg6][%c2, %c3][%c1, %c1] :
+        %10 = memref.subview %E[%arg5, %arg6][%c2, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
         linalg.matmul ins(%7, %9 : memref<?x?xf32, offset: ?, strides: [?, ?]>,
@@ -398,11 +398,11 @@
 // CHECK:  (%[[A:.*]]:{{.*}}, %[[B:.*]]:{{.*}}, %[[C:.*]]:{{.*}}, %[[D:.*]]:{{.*}}, %[[E:.*]]:{{.*}})
 // CHECK-DAG:  %[[C0:.*]] = constant 0 : index
 // CHECK-DAG:  %[[C1:.*]] = constant 1 : index
-// CHECK:  %[[A_0:.*]] = dim %[[A]], %[[C0:.*]] : memref<?x?xf32, #[[$strided2D]]>
-// CHECK:  %[[A_1:.*]] = dim %[[A]], %[[C1:.*]] : memref<?x?xf32, #[[$strided2D]]>
-// CHECK:  %[[C_1:.*]] = dim %[[C]], %[[C1:.*]] : memref<?x?xf32, #[[$strided2D]]>
-// CHECK:  %[[C_0:.*]] = dim %[[C]], %[[C0:.*]] : memref<?x?xf32, #[[$strided2D]]>
-// CHECK:  %[[D_1:.*]] = dim %[[D]], %[[C1:.*]] : memref<?x?xf32, #[[$strided2D]]>
+// CHECK:  %[[A_0:.*]] = memref.dim %[[A]], %[[C0:.*]] : memref<?x?xf32, #[[$strided2D]]>
+// CHECK:  %[[A_1:.*]] = memref.dim %[[A]], %[[C1:.*]] : memref<?x?xf32, #[[$strided2D]]>
+// CHECK:  %[[C_1:.*]] = memref.dim %[[C]], %[[C1:.*]] : memref<?x?xf32, #[[$strided2D]]>
+// CHECK:  %[[C_0:.*]] = memref.dim %[[C]], %[[C0:.*]] : memref<?x?xf32, #[[$strided2D]]>
+// CHECK:  %[[D_1:.*]] = memref.dim %[[D]], %[[C1:.*]] : memref<?x?xf32, #[[$strided2D]]>
 // CHECK:  linalg.matmul ins(%[[A]], %[[C]]{{.*}} outs(%[[E]]
 // CHECK:  scf.for %{{.*}} = %{{.*}} to %[[A_0]] step %{{.*}} {
 // CHECK:    scf.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
@@ -432,28 +432,28 @@
   %c4 = constant 4 : index
   %c3 = constant 3 : index
   %c2 = constant 2 : index
-  %0 = dim %A, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %1 = dim %A, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %0 = memref.dim %A, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %1 = memref.dim %A, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   linalg.matmul ins(%A, %C : memref<?x?xf32, offset: 0, strides: [?, ?]>,
                              memref<?x?xf32, offset: 0, strides: [?, ?]>)
                outs(%D : memref<?x?xf32, offset: 0, strides: [?, ?]>)
   linalg.matmul ins(%A, %B : memref<?x?xf32, offset: 0, strides: [?, ?]>,
                              memref<?x?xf32, offset: 0, strides: [?, ?]>)
                outs(%C : memref<?x?xf32, offset: 0, strides: [?, ?]>)
-  %2 = dim %D, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %2 = memref.dim %D, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   scf.for %arg5 = %c0 to %0 step %c2 {
     scf.for %arg6 = %c0 to %2 step %c3 {
       scf.for %arg7 = %c0 to %1 step %c4 {
         %3 = affine.apply #map0(%arg5)
         %4 = affine.apply #map1(%arg7)
-        %5 = std.subview %A[%arg5, %arg7][%c2, %c4][%c1, %c1] :
+        %5 = memref.subview %A[%arg5, %arg7][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
         %6 = affine.apply #map2(%arg6)
-        %7 = std.subview %D[%arg7, %arg6][%c4, %c3][%c1, %c1] :
+        %7 = memref.subview %D[%arg7, %arg6][%c4, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %8 = std.subview %E[%arg5, %arg6][%c2, %c3][%c1, %c1] :
+        %8 = memref.subview %E[%arg5, %arg6][%c2, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
         linalg.matmul ins(%5, %7 : memref<?x?xf32, offset: ?, strides: [?, ?]>,
@@ -497,17 +497,17 @@
     %2 = addf %E, %arg5 : f32
     linalg.yield %2 : f32
   }
-  %0 = dim %B, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %1 = dim %B, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %0 = memref.dim %B, %c0 : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %1 = memref.dim %B, %c1 : memref<?x?xf32, offset: 0, strides: [?, ?]>
   scf.for %arg4 = %c0 to %0 step %c2 {
     scf.for %arg5 = %c0 to %1 step %c3 {
-      %4 = std.subview %B[%arg4, %arg5][%c2, %c3][%c1, %c1] :
+      %4 = memref.subview %B[%arg4, %arg5][%c2, %c3][%c1, %c1] :
         memref<?x?xf32, offset: 0, strides: [?, ?]> to
         memref<?x?xf32, offset: ?, strides: [?, ?]>
-      %5 = std.subview %C[%arg4, %arg5][%c2, %c3][%c1, %c1] :
+      %5 = memref.subview %C[%arg4, %arg5][%c2, %c3][%c1, %c1] :
         memref<?x?xf32, offset: 0, strides: [?, ?]> to
         memref<?x?xf32, offset: ?, strides: [?, ?]>
-      %6 = std.subview %D[%arg4, %arg5][%c2, %c3][%c1, %c1] :
+      %6 = memref.subview %D[%arg4, %arg5][%c2, %c3][%c1, %c1] :
         memref<?x?xf32, offset: 0, strides: [?, ?]> to
         memref<?x?xf32, offset: ?, strides: [?, ?]>
       linalg.generic #pointwise_2d_trait
@@ -543,11 +543,11 @@
   %c0 = constant 0 : index
   %c3 = constant 3 : index
   %c2 = constant 2 : index
-  %A = alloc (%M, %N): memref<?x?xf32>
-  %B = alloc (%M, %N): memref<?x?xf32>
-  %C = alloc (%M, %N): memref<?x?xf32>
-  %D = alloc (%M, %N): memref<?x?xf32>
-  %E = alloc (%M, %N): memref<?x?xf32>
+  %A = memref.alloc (%M, %N): memref<?x?xf32>
+  %B = memref.alloc (%M, %N): memref<?x?xf32>
+  %C = memref.alloc (%M, %N): memref<?x?xf32>
+  %D = memref.alloc (%M, %N): memref<?x?xf32>
+  %E = memref.alloc (%M, %N): memref<?x?xf32>
   linalg.generic #pointwise_2d_trait
     ins(%A, %A : memref<?x?xf32>, memref<?x?xf32>)
    outs(%B : memref<?x?xf32>) {
@@ -555,17 +555,17 @@
     %2 = addf %e, %arg5 : f32
     linalg.yield %2 : f32
   }
-  %0 = dim %B, %c0 : memref<?x?xf32>
-  %1 = dim %B, %c1 : memref<?x?xf32>
+  %0 = memref.dim %B, %c0 : memref<?x?xf32>
+  %1 = memref.dim %B, %c1 : memref<?x?xf32>
   scf.for %arg4 = %c0 to %0 step %c2 {
     scf.for %arg5 = %c0 to %1 step %c3 {
-      %4 = std.subview %B[%arg4, %arg5][%c2, %c3][%c1, %c1] :
+      %4 = memref.subview %B[%arg4, %arg5][%c2, %c3][%c1, %c1] :
         memref<?x?xf32> to
         memref<?x?xf32, offset: ?, strides: [?, ?]>
-      %5 = std.subview %C[%arg4, %arg5][%c2, %c3][%c1, %c1] :
+      %5 = memref.subview %C[%arg4, %arg5][%c2, %c3][%c1, %c1] :
         memref<?x?xf32> to
         memref<?x?xf32, offset: ?, strides: [?, ?]>
-      %6 = std.subview %D[%arg4, %arg5][%c2, %c3][%c1, %c1] :
+      %6 = memref.subview %D[%arg4, %arg5][%c2, %c3][%c1, %c1] :
         memref<?x?xf32> to
         memref<?x?xf32, offset: ?, strides: [?, ?]>
       linalg.generic #pointwise_2d_trait
@@ -601,7 +601,7 @@
                       %arg2: memref<100x10xf32>) {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %0 = alloc() {temp = true} : memref<100x10xf32>
+  %0 = memref.alloc() {temp = true} : memref<100x10xf32>
   linalg.generic {
     indexing_maps = [#map0, #map1],
     iterator_types = ["parallel", "parallel"]}
@@ -610,7 +610,7 @@
       ^bb0(%arg3: f32, %arg4: f32): // no predecessors
         linalg.yield %arg3 : f32
       }
-  %1 = alloc() {temp = true} : memref<100x10xf32>
+  %1 = memref.alloc() {temp = true} : memref<100x10xf32>
   linalg.generic {
     indexing_maps = [#map1, #map1, #map1],
     iterator_types = ["parallel", "parallel"]}
@@ -620,16 +620,16 @@
         %2 = subf %arg3, %arg4 : f32
         linalg.yield %2 : f32
       }
-  dealloc %0 : memref<100x10xf32>
-  %2 = dim %1, %c0 : memref<100x10xf32>
-  %3 = dim %1, %c1 : memref<100x10xf32>
-  %4 = dim %arg2, %c0 : memref<100x10xf32>
-  %5 = dim %arg2, %c1 : memref<100x10xf32>
+  memref.dealloc %0 : memref<100x10xf32>
+  %2 = memref.dim %1, %c0 : memref<100x10xf32>
+  %3 = memref.dim %1, %c1 : memref<100x10xf32>
+  %4 = memref.dim %arg2, %c0 : memref<100x10xf32>
+  %5 = memref.dim %arg2, %c1 : memref<100x10xf32>
   scf.for %i = %c0 to %2 step %c1 {
     scf.for %j = %c0 to %3 step %c1 {
-      %6 = std.subview %1[%i, %j][%c1, %c1][%c1, %c1] :
+      %6 = memref.subview %1[%i, %j][%c1, %c1][%c1, %c1] :
       memref<100x10xf32> to memref<?x?xf32, #map2>
-      %7 = std.subview %arg2[%i, %j][%c1, %c1][%c1, %c1] :
+      %7 = memref.subview %arg2[%i, %j][%c1, %c1][%c1, %c1] :
       memref<100x10xf32> to memref<?x?xf32, #map2>
       linalg.generic {
         indexing_maps = [#map1, #map1],
@@ -642,7 +642,7 @@
           }
     }
   }
- dealloc %1 : memref<100x10xf32>
+ memref.dealloc %1 : memref<100x10xf32>
  return
 }
 // CHECK-LABEL: func @fusion
@@ -674,15 +674,15 @@
   %c0 = constant 0 : index
   %c2 = constant 2 : index
   %c3 = constant 3 : index
-  %4 = dim %arg1, %c0 : memref<2x3x1x1xf32>
-  %5 = dim %arg1, %c1 : memref<2x3x1x1xf32>
-  %6 = dim %arg0, %c0 : memref<?x?x?x?xf32>
-  %7 = dim %arg0, %c1 : memref<?x?x?x?xf32>
-  %8 = dim %arg0, %c3 : memref<?x?x?x?xf32>
-  %9 = dim %arg2, %c0 : memref<?x?x?x?xf32>
-  %10 = dim %arg2, %c1 : memref<?x?x?x?xf32>
-  %11 = dim %arg2, %c2 : memref<?x?x?x?xf32>
-  %12 = dim %arg2, %c3 : memref<?x?x?x?xf32>
+  %4 = memref.dim %arg1, %c0 : memref<2x3x1x1xf32>
+  %5 = memref.dim %arg1, %c1 : memref<2x3x1x1xf32>
+  %6 = memref.dim %arg0, %c0 : memref<?x?x?x?xf32>
+  %7 = memref.dim %arg0, %c1 : memref<?x?x?x?xf32>
+  %8 = memref.dim %arg0, %c3 : memref<?x?x?x?xf32>
+  %9 = memref.dim %arg2, %c0 : memref<?x?x?x?xf32>
+  %10 = memref.dim %arg2, %c1 : memref<?x?x?x?xf32>
+  %11 = memref.dim %arg2, %c2 : memref<?x?x?x?xf32>
+  %12 = memref.dim %arg2, %c3 : memref<?x?x?x?xf32>
   %13 = linalg.range %c0 : %6 : %c2 : !linalg.range
   %14 = linalg.range %c0 : %10 : %c3 : !linalg.range
   scf.for %arg3 = %c0 to %6 step %c2 {
@@ -690,14 +690,14 @@
       %15 = affine.min #map0(%c2, %c1, %arg3)
       %16 = affine.apply #map2()[%7]
       %17 = affine.min #map0(%16, %c4, %arg4)
-      %18 = dim %arg0, %c2 : memref<?x?x?x?xf32>
-      %19 = dim %arg0, %c3 : memref<?x?x?x?xf32>
-      %20 = subview %arg0[%arg3, %arg4, %c0, %c0] [%15, %17, %18, %19] [%c1, %c1, %c1, %c1] : memref<?x?x?x?xf32> to memref<?x?x?x?xf32, #map1>
+      %18 = memref.dim %arg0, %c2 : memref<?x?x?x?xf32>
+      %19 = memref.dim %arg0, %c3 : memref<?x?x?x?xf32>
+      %20 = memref.subview %arg0[%arg3, %arg4, %c0, %c0] [%15, %17, %18, %19] [%c1, %c1, %c1, %c1] : memref<?x?x?x?xf32> to memref<?x?x?x?xf32, #map1>
       %21 = affine.min #map0(%c2, %c1, %arg3)
       %22 = affine.min #map0(%c3, %c4, %arg4)
-      %23 = dim %arg2, %c2 : memref<?x?x?x?xf32>
-      %24 = dim %arg2, %c3 : memref<?x?x?x?xf32>
-      %25 = subview %arg2[%arg3, %arg4, %c0, %c0] [%21, %22, %23, %24] [%c1, %c1, %c1, %c1] : memref<?x?x?x?xf32> to memref<?x?x?x?xf32, #map1>
+      %23 = memref.dim %arg2, %c2 : memref<?x?x?x?xf32>
+      %24 = memref.dim %arg2, %c3 : memref<?x?x?x?xf32>
+      %25 = memref.subview %arg2[%arg3, %arg4, %c0, %c0] [%21, %22, %23, %24] [%c1, %c1, %c1, %c1] : memref<?x?x?x?xf32> to memref<?x?x?x?xf32, #map1>
       linalg.conv(%arg1, %20, %25) {dilations = [1, 1], strides = [1, 1]} : memref<2x3x1x1xf32>, memref<?x?x?x?xf32, #map1>, memref<?x?x?x?xf32, #map1>
     }
   }
@@ -719,9 +719,9 @@
   %c3 = constant 3 : index
   %c4 = constant 4 : index
 
-  %A = alloca(%dim, %dim)[%s0, %s1] : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %B = alloca(%dim, %dim)[%s0, %s1] : memref<?x?xf32, offset: 0, strides: [?, ?]>
-  %C = alloc(%dim, %dim)[%s0, %s1]  : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %A = memref.alloca(%dim, %dim)[%s0, %s1] : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %B = memref.alloca(%dim, %dim)[%s0, %s1] : memref<?x?xf32, offset: 0, strides: [?, ?]>
+  %C = memref.alloc(%dim, %dim)[%s0, %s1]  : memref<?x?xf32, offset: 0, strides: [?, ?]>
 
   linalg.matmul ins(%A, %B : memref<?x?xf32, offset: 0, strides: [?, ?]>,
                              memref<?x?xf32, offset: 0, strides: [?, ?]>)
@@ -730,13 +730,13 @@
   scf.for %i = %c0 to %dim step %c2 {
     scf.for %j = %c0 to %dim step %c3 {
       scf.for %k = %c0 to %dim step %c4 {
-        %0 = std.subview %A[%i, %k][%c2, %c4][%c1, %c1] :
+        %0 = memref.subview %A[%i, %k][%c2, %c4][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %1 = std.subview %B[%k, %j][%c4, %c3][%c1, %c1] :
+        %1 = memref.subview %B[%k, %j][%c4, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %2 = std.subview %C[%i, %j][%c2, %c3][%c1, %c1] :
+        %2 = memref.subview %C[%i, %j][%c2, %c3][%c1, %c1] :
           memref<?x?xf32, offset: 0, strides: [?, ?]> to
           memref<?x?xf32, offset: ?, strides: [?, ?]>
         linalg.matmul ins(%0, %1 : memref<?x?xf32, offset: ?, strides: [?, ?]>,
diff --git a/mlir/test/Dialect/Linalg/hoist-padding.mlir b/mlir/test/Dialect/Linalg/hoist-padding.mlir
--- a/mlir/test/Dialect/Linalg/hoist-padding.mlir
+++ b/mlir/test/Dialect/Linalg/hoist-padding.mlir
@@ -29,12 +29,12 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
 
-  //  CHECK-DAG: %[[dM:.*]] = dim %[[TA]], %[[C0]] : tensor<?x?xf32>
-  //  CHECK-DAG: %[[dK:.*]] = dim %[[TA]], %[[C1]] : tensor<?x?xf32>
-  //  CHECK-DAG: %[[dN:.*]] = dim %[[TB]], %[[C1]] : tensor<?x?xf32>
-  %0 = dim %arg0, %c0 : tensor<?x?xf32>
-  %1 = dim %arg0, %c1 : tensor<?x?xf32>
-  %2 = dim %arg1, %c1 : tensor<?x?xf32>
+  //  CHECK-DAG: %[[dM:.*]] = memref.dim %[[TA]], %[[C0]] : tensor<?x?xf32>
+  //  CHECK-DAG: %[[dK:.*]] = memref.dim %[[TA]], %[[C1]] : tensor<?x?xf32>
+  //  CHECK-DAG: %[[dN:.*]] = memref.dim %[[TB]], %[[C1]] : tensor<?x?xf32>
+  %0 = memref.dim %arg0, %c0 : tensor<?x?xf32>
+  %1 = memref.dim %arg0, %c1 : tensor<?x?xf32>
+  %2 = memref.dim %arg1, %c1 : tensor<?x?xf32>
 
   //      CHECK: scf.for %[[I:[0-9a-z]+]] =
   // First padded tensor is MxKx2x4 under loop M so Kx2x4
@@ -85,19 +85,19 @@
   %3 = scf.for %arg3 = %c0 to %0 step %c2 iter_args(%arg4 = %arg2) -> (tensor<?x?xf32>) {
     %4 = scf.for %arg5 = %c0 to %2 step %c3 iter_args(%arg6 = %arg4) -> (tensor<?x?xf32>) {
       %5 = scf.for %arg7 = %c0 to %1 step %c4 iter_args(%arg8 = %arg6) -> (tensor<?x?xf32>) {
-        %6 = dim %arg0, %c0 : tensor<?x?xf32>
+        %6 = memref.dim %arg0, %c0 : tensor<?x?xf32>
         %7 = affine.min #map0(%arg3)[%6]
-        %8 = dim %arg0, %c1 : tensor<?x?xf32>
+        %8 = memref.dim %arg0, %c1 : tensor<?x?xf32>
         %9 = affine.min #map1(%arg7)[%8]
         %10 = subtensor %arg0[%arg3, %arg7] [%7, %9] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-        %11 = dim %arg1, %c0 : tensor<?x?xf32>
+        %11 = memref.dim %arg1, %c0 : tensor<?x?xf32>
         %12 = affine.min #map1(%arg7)[%11]
-        %13 = dim %arg1, %c1 : tensor<?x?xf32>
+        %13 = memref.dim %arg1, %c1 : tensor<?x?xf32>
         %14 = affine.min #map2(%arg5)[%13]
         %15 = subtensor %arg1[%arg7, %arg5] [%12, %14] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-        %16 = dim %arg8, %c0 : tensor<?x?xf32>
+        %16 = memref.dim %arg8, %c0 : tensor<?x?xf32>
         %17 = affine.min #map3(%16, %arg3)
-        %18 = dim %arg8, %c1 : tensor<?x?xf32>
+        %18 = memref.dim %arg8, %c1 : tensor<?x?xf32>
         %19 = affine.min #map4(%18, %arg5)
         %20 = subtensor %arg8[%arg3, %arg5] [%17, %19] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
         %21 = subi %c2, %7 : index
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -3,7 +3,7 @@
 func @load_number_of_indices(%v : memref<f32>) {
   // expected-error @+2 {{incorrect number of indices for load}}
   %c0 = constant 0 : index
-  load %v[%c0] : memref<f32>
+  memref.load %v[%c0] : memref<f32>
 }
 
 // -----
@@ -12,7 +12,7 @@
   // expected-error @+3 {{store index operand count not equal to memref rank}}
   %c0 = constant 0 : index
   %f0 = constant 0.0 : f32
-  store %f0, %v[%c0] : memref<f32>
+  memref.store %f0, %v[%c0] : memref<f32>
 }
 
 // -----
diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir
--- a/mlir/test/Dialect/Linalg/loops.mlir
+++ b/mlir/test/Dialect/Linalg/loops.mlir
@@ -34,9 +34,9 @@
 func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %A = view %arg0[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32>
-  %B = view %arg0[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>
-  %C = view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
+  %A = memref.view %arg0[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32>
+  %B = memref.view %arg0[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>
+  %C = memref.view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
   linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
                outs(%C: memref<?x?xf32>)
   return
@@ -45,16 +45,16 @@
 //  CHECKLOOP-SAME: [[M:arg[0-9]+]]: index
 //  CHECKLOOP-SAME: [[N:arg[0-9]+]]: index
 //  CHECKLOOP-SAME: [[K:arg[0-9]+]]: index
-//       CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
+//       CHECKLOOP: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
+//       CHECKLOOP: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
+//       CHECKLOOP: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
 //       CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} {
 //       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[N]] step %{{.*}} {
 //       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-//   CHECKLOOP-DAG:       %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
-//   CHECKLOOP-DAG:       %[[b:.*]] = load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//   CHECKLOOP-DAG:       %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//   CHECKLOOP-DAG:       %[[b:.*]] = memref.load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //   CHECKLOOP-DAG:       %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
-//   CHECKLOOP-DAG:       %[[c:.*]] = load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//   CHECKLOOP-DAG:       %[[c:.*]] = memref.load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //   CHECKLOOP-DAG:       %[[res:.*]] = addf %[[c]], %[[inc]] : f32
 //       CHECKLOOP:       store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 
@@ -62,15 +62,15 @@
 //  CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index
 //  CHECKPARALLEL-SAME: [[N:arg[0-9]+]]: index
 //  CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index
-//       CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
+//       CHECKPARALLEL: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
+//       CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
+//       CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
 //       CHECKPARALLEL: scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[M]], %[[N]]) step (%{{.*}}, %{{.*}} {
 //       CHECKPARALLEL:   scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-//   CHECKPARALLEL-DAG:     %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
-//   CHECKPARALLEL-DAG:     %[[b:.*]] = load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//   CHECKPARALLEL-DAG:     %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//   CHECKPARALLEL-DAG:     %[[b:.*]] = memref.load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //   CHECKPARALLEL-DAG:     %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
-//   CHECKPARALLEL-DAG:     %[[c:.*]] = load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//   CHECKPARALLEL-DAG:     %[[c:.*]] = memref.load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //   CHECKPARALLEL-DAG:     %[[res:.*]] = addf %[[c]], %[[inc]] : f32
 //       CHECKPARALLEL:     store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
 
@@ -79,9 +79,9 @@
 func @matvec(%arg0: memref<?xi8>, %M: index, %N: index) {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %2 = view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
-  %3 = view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32>
-  %4 = view %arg0[%c0][%N] : memref<?xi8> to memref<?xf32>
+  %2 = memref.view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
+  %3 = memref.view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32>
+  %4 = memref.view %arg0[%c0][%N] : memref<?xi8> to memref<?xf32>
   linalg.matvec ins(%2, %3: memref<?x?xf32>, memref<?xf32>)
                outs(%4 : memref<?xf32>)
   return
@@ -89,30 +89,30 @@
 // CHECKLOOP-LABEL: func @matvec(%{{.*}}: memref<?xi8>,
 //  CHECKLOOP-SAME: [[M:arg[0-9]+]]: index
 //  CHECKLOOP-SAME: [[K:arg[0-9]+]]: index
-//       CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
-//       CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
+//       CHECKLOOP: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
+//       CHECKLOOP: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
+//       CHECKLOOP: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
 //       CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} {
 //       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-//   CHECKLOOP-DAG:     %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
-//   CHECKLOOP-DAG:     %[[b:.*]] = load %[[B]][%{{.*}}] : memref<?xf32>
+//   CHECKLOOP-DAG:     %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//   CHECKLOOP-DAG:     %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32>
 //   CHECKLOOP-DAG:     %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
-//   CHECKLOOP-DAG:     %[[c:.*]] = load %[[C]][%{{.*}}] : memref<?xf32>
+//   CHECKLOOP-DAG:     %[[c:.*]] = memref.load %[[C]][%{{.*}}] : memref<?xf32>
 //   CHECKLOOP-DAG:     %[[res:.*]] = addf %[[c]], %[[inc]] : f32
 //       CHECKLOOP:     store %[[res]], %[[C]][%{{.*}}] : memref<?xf32>
 
 // CHECKPARALLEL-LABEL: func @matvec(%{{.*}}: memref<?xi8>,
 //  CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index
 //  CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index
-//       CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
-//       CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
+//       CHECKPARALLEL: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
+//       CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
+//       CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?xf32>
 //       CHECKPARALLEL: scf.parallel (%{{.*}}) = (%{{.*}}) to (%[[M]]) step (%{{.*}}) {
 //       CHECKPARALLEL:   scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-//   CHECKPARALLEL-DAG:     %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
-//   CHECKPARALLEL-DAG:     %[[b:.*]] = load %[[B]][%{{.*}}] : memref<?xf32>
+//   CHECKPARALLEL-DAG:     %[[a:.*]] = memref.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//   CHECKPARALLEL-DAG:     %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32>
 //   CHECKPARALLEL-DAG:     %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
-//   CHECKPARALLEL-DAG:     %[[c:.*]] = load %[[C]][%{{.*}}] : memref<?xf32>
+//   CHECKPARALLEL-DAG:     %[[c:.*]] = memref.load %[[C]][%{{.*}}] : memref<?xf32>
 //   CHECKPARALLEL-DAG:     %[[res:.*]] = addf %[[c]], %[[inc]] : f32
 //       CHECKPARALLEL:     store %[[res]], %[[C]][%{{.*}}] : memref<?xf32>
 
@@ -120,36 +120,36 @@
 func @dot(%arg0: memref<?xi8>, %M: index) {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %1 = view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32>
-  %2 = view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32>
-  %3 = view %arg0[%c0][] : memref<?xi8> to memref<f32>
+  %1 = memref.view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32>
+  %2 = memref.view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32>
+  %3 = memref.view %arg0[%c0][] : memref<?xi8> to memref<f32>
   linalg.dot ins(%1, %2 : memref<?xf32>, memref<?xf32>)
             outs(%3 : memref<f32>)
   return
 }
 // CHECKLOOP-LABEL: func @dot(%{{.*}}: memref<?xi8>,
 //  CHECKLOOP-SAME: [[K:arg[0-9]+]]: index
-//       CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
-//       CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
-//       CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32>
+//       CHECKLOOP: %[[A:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
+//       CHECKLOOP: %[[B:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
+//       CHECKLOOP: %[[C:.*]] = memref.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32>
 //       CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-//   CHECKLOOP-DAG:   %[[a:.*]] = load %[[A]][%{{.*}}] : memref<?xf32>
-//   CHECKLOOP-DAG:   %[[b:.*]] = load %[[B]][%{{.*}}] : memref<?xf32>
+//   CHECKLOOP-DAG:   %[[a:.*]] = memref.load %[[A]][%{{.*}}] : memref<?xf32>
+//   CHECKLOOP-DAG:   %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32>
 //   CHECKLOOP-DAG:   %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
-//   CHECKLOOP-DAG:   %[[c:.*]] = load %[[C]][] : memref<f32>
+//   CHECKLOOP-DAG:   %[[c:.*]] = memref.load %[[C]][] : memref<f32>
 //   CHECKLOOP-DAG:   %[[res:.*]] = addf %[[c]], %[[inc]] : f32
 //       CHECKLOOP:   store %[[res]], %[[C]][] : memref<f32>
 
 // CHECKPARALLEL-LABEL: func @dot(%{{.*}}: memref<?xi8>,
 //  CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index
-//       CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
-//       CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
-//       CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32>
+//       CHECKPARALLEL: %[[A:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
+//       CHECKPARALLEL: %[[B:.*]] = memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?xf32>
+//       CHECKPARALLEL: %[[C:.*]] = memref.view %{{.*}}[{{.*}}][] : memref<?xi8> to memref<f32>
 //       CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-//   CHECKPARALLEL-DAG:   %[[a:.*]] = load %[[A]][%{{.*}}] : memref<?xf32>
-//   CHECKPARALLEL-DAG:   %[[b:.*]] = load %[[B]][%{{.*}}] : memref<?xf32>
+//   CHECKPARALLEL-DAG:   %[[a:.*]] = memref.load %[[A]][%{{.*}}] : memref<?xf32>
+//   CHECKPARALLEL-DAG:   %[[b:.*]] = memref.load %[[B]][%{{.*}}] : memref<?xf32>
 //   CHECKPARALLEL-DAG:   %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
-//   CHECKPARALLEL-DAG:   %[[c:.*]] = load %[[C]][] : memref<f32>
+//   CHECKPARALLEL-DAG:   %[[c:.*]] = memref.load %[[C]][] : memref<f32>
 //   CHECKPARALLEL-DAG:   %[[res:.*]] = addf %[[c]], %[[inc]] : f32
 //       CHECKPARALLEL:   store %[[res]], %[[C]][] : memref<f32>
 
@@ -162,23 +162,23 @@
 }
 // CHECKLOOP-LABEL: func @dot_view(
 //       CHECKLOOP:   %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<f32>) {
-//       CHECKLOOP: %[[K:.*]] = dim %arg0, %c0 : memref<?xf32, #[[$strided1D]]>
+//       CHECKLOOP: %[[K:.*]] = memref.dim %arg0, %c0 : memref<?xf32, #[[$strided1D]]>
 //       CHECKLOOP: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-//   CHECKLOOP-DAG:   %[[a:.*]] = load %arg0[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
-//   CHECKLOOP-DAG:   %[[b:.*]] = load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
+//   CHECKLOOP-DAG:   %[[a:.*]] = memref.load %arg0[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
+//   CHECKLOOP-DAG:   %[[b:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
 //   CHECKLOOP-DAG:   %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
-//   CHECKLOOP-DAG:   %[[c:.*]] = load %{{.*}}[] : memref<f32>
+//   CHECKLOOP-DAG:   %[[c:.*]] = memref.load %{{.*}}[] : memref<f32>
 //   CHECKLOOP-DAG:   %[[res:.*]] = addf %[[c]], %[[inc]] : f32
 //       CHECKLOOP:   store %[[res]], %{{.*}}[] : memref<f32>
 
 // CHECKPARALLEL-LABEL: func @dot_view(
 //       CHECKPARALLEL:   %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<f32>) {
-//       CHECKPARALLEL: %[[K:.*]] = dim %arg0, %c0 : memref<?xf32, #[[$strided1D]]>
+//       CHECKPARALLEL: %[[K:.*]] = memref.dim %arg0, %c0 : memref<?xf32, #[[$strided1D]]>
 //       CHECKPARALLEL: scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
-//   CHECKPARALLEL-DAG:   %[[a:.*]] = load %arg0[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
-//   CHECKPARALLEL-DAG:   %[[b:.*]] = load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
+//   CHECKPARALLEL-DAG:   %[[a:.*]] = memref.load %arg0[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
+//   CHECKPARALLEL-DAG:   %[[b:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
 //   CHECKPARALLEL-DAG:   %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
-//   CHECKPARALLEL-DAG:   %[[c:.*]] = load %{{.*}}[] : memref<f32>
+//   CHECKPARALLEL-DAG:   %[[c:.*]] = memref.load %{{.*}}[] : memref<f32>
 //   CHECKPARALLEL-DAG:   %[[res:.*]] = addf %[[c]], %[[inc]] : f32
 //       CHECKPARALLEL:   store %[[res]], %{{.*}}[] : memref<f32>
 
@@ -229,13 +229,13 @@
 // CHECKLOOP-LABEL: func @copy_view(
 //       CHECKLOOP: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<?xf32, #[[$strided1D]]>) {
 //       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECKLOOP:     %[[L:.*]] = load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
+//       CHECKLOOP:     %[[L:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
 //       CHECKLOOP:     store %[[L]], %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
 
 // CHECKPARALLEL-LABEL: func @copy_view(
 //       CHECKPARALLEL: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: memref<?xf32, #[[$strided1D]]>) {
 //       CHECKPARALLEL:   scf.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
-//       CHECKPARALLEL:     %[[L:.*]] = load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
+//       CHECKPARALLEL:     %[[L:.*]] = memref.load %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
 //       CHECKPARALLEL:     store %[[L]], %{{.*}}[%{{.*}}] : memref<?xf32, #[[$strided1D]]>
 
 func @copy_view0(%arg0: memref<f32>, %arg1: memref<f32>) {
@@ -243,11 +243,11 @@
   return
 }
 // CHECKLOOP-LABEL: func @copy_view0(%{{.*}}: memref<f32>, %{{.*}}: memref<f32>) {
-//       CHECKLOOP:   %{{.*}} = load %{{.*}}[] : memref<f32>
+//       CHECKLOOP:   %{{.*}} = memref.load %{{.*}}[] : memref<f32>
 //       CHECKLOOP:   store %{{.*}}, %{{.*}}[] : memref<f32>
 
 // CHECKPARALLEL-LABEL: func @copy_view0(%{{.*}}: memref<f32>, %{{.*}}: memref<f32>) {
-//       CHECKPARALLEL:   %{{.*}} = load %{{.*}}[] : memref<f32>
+//       CHECKPARALLEL:   %{{.*}} = memref.load %{{.*}}[] : memref<f32>
 //       CHECKPARALLEL:   store %{{.*}}, %{{.*}}[] : memref<f32>
 
 func @copy_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
@@ -261,13 +261,13 @@
 //       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECKLOOP:         %[[L:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKLOOP:         %[[L:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
 //       CHECKLOOP:         store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
 
 // CHECKPARALLEL-LABEL: func @copy_view3
 //       CHECKPARALLEL: (%{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>) {
 //       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) {
-//       CHECKPARALLEL:     %[[L:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKPARALLEL:     %[[L:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
 //       CHECKPARALLEL:     store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
 
 func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg2: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
@@ -276,39 +276,39 @@
 }
 // CHECKLOOP-LABEL: func @conv_view3(
 //       CHECKLOOP: %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>) {
-//       CHECKLOOP:   %[[Z0:.*]] = dim %arg0, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECKLOOP:   %[[Q:.*]] = dim %arg0, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECKLOOP:   %[[K:.*]] = dim %arg0, %c2 : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECKLOOP:   %[[B:.*]] = dim %arg1, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECKLOOP:   %[[X0:.*]] = dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKLOOP:   %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKLOOP:   %[[Q:.*]] = memref.dim %arg0, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKLOOP:   %[[K:.*]] = memref.dim %arg0, %c2 : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKLOOP:   %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKLOOP:   %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
 //       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
 //       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
 //       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
 //       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
 //       CHECKLOOP:           scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
 //       CHECKLOOP:             %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
-//       CHECKLOOP:             %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECKLOOP:             %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKLOOP:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKLOOP:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
 //       CHECKLOOP:             %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-//       CHECKLOOP:             %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKLOOP:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
 //       CHECKLOOP:             %{{.*}} = addf %{{.*}}, %{{.*}} : f32
 //       CHECKLOOP:             store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
 
 // CHECKPARALLEL-LABEL: func @conv_view3(
 //       CHECKPARALLEL: %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>) {
-//       CHECKPARALLEL:   %[[Z0:.*]] = dim %arg0, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECKPARALLEL:   %[[Q:.*]] = dim %arg0, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECKPARALLEL:   %[[K:.*]] = dim %arg0, %c2 : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECKPARALLEL:   %[[B:.*]] = dim %arg1, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECKPARALLEL:   %[[X0:.*]] = dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKPARALLEL:   %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKPARALLEL:   %[[Q:.*]] = memref.dim %arg0, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKPARALLEL:   %[[K:.*]] = memref.dim %arg0, %c2 : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKPARALLEL:   %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKPARALLEL:   %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
 //       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}) {
 //       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
 //       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
 //       CHECKPARALLEL:         %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
-//       CHECKPARALLEL:         %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECKPARALLEL:         %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKPARALLEL:         %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKPARALLEL:         %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
 //       CHECKPARALLEL:         %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-//       CHECKPARALLEL:         %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKPARALLEL:         %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
 //       CHECKPARALLEL:         %{{.*}} = addf %{{.*}}, %{{.*}} : f32
 //       CHECKPARALLEL:         store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?xf32, #[[$strided3D]]>
 
@@ -318,13 +318,13 @@
 }
 // CHECKLOOP-LABEL: func @conv_view4(
 //       CHECKLOOP: %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>) {
-//       CHECKLOOP:   %[[Z0:.*]] = dim %arg0, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       CHECKLOOP:   %[[Z1:.*]] = dim %arg0, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       CHECKLOOP:   %[[Q:.*]] = dim %arg0, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       CHECKLOOP:   %[[K:.*]] = dim %arg0, %c3 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       CHECKLOOP:   %[[B:.*]] = dim %arg1, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       CHECKLOOP:   %[[X0:.*]] = dim %arg2, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       CHECKLOOP:   %[[X1:.*]] = dim %arg2, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKLOOP:   %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKLOOP:   %[[Z1:.*]] = memref.dim %arg0, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKLOOP:   %[[Q:.*]] = memref.dim %arg0, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKLOOP:   %[[K:.*]] = memref.dim %arg0, %c3 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKLOOP:   %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKLOOP:   %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKLOOP:   %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
 //       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
 //       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
 //       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} {
@@ -334,32 +334,32 @@
 //       CHECKLOOP:               scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
 //       CHECKLOOP:                 %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:                 %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]](%{{.*}}, %{{.*}})
-//       CHECKLOOP:                 %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       CHECKLOOP:                 %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKLOOP:                 %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKLOOP:                 %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
 //       CHECKLOOP:                 %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-//       CHECKLOOP:                 %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKLOOP:                 %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
 //       CHECKLOOP:                 %{{.*}} = addf %{{.*}}, %{{.*}} : f32
 //       CHECKLOOP:                 store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
 
 // CHECKPARALLEL-LABEL: func @conv_view4(
 //       CHECKPARALLEL: %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[$strided4D]]>) {
-//       CHECKPARALLEL:   %[[Z0:.*]] = dim %arg0, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       CHECKPARALLEL:   %[[Z1:.*]] = dim %arg0, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       CHECKPARALLEL:   %[[Q:.*]] = dim %arg0, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       CHECKPARALLEL:   %[[K:.*]] = dim %arg0, %c3 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       CHECKPARALLEL:   %[[B:.*]] = dim %arg1, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       CHECKPARALLEL:   %[[X0:.*]] = dim %arg2, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       CHECKPARALLEL:   %[[X1:.*]] = dim %arg2, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKPARALLEL:   %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKPARALLEL:   %[[Z1:.*]] = memref.dim %arg0, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKPARALLEL:   %[[Q:.*]] = memref.dim %arg0, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKPARALLEL:   %[[K:.*]] = memref.dim %arg0, %c3 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKPARALLEL:   %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKPARALLEL:   %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKPARALLEL:   %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
 //       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {
 //       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
 //       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
 //       CHECKPARALLEL:         scf.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
 //       CHECKPARALLEL:           %[[SUM0:.*]] = affine.apply #[[$stride2Dilation4]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:           %[[SUM1:.*]] = affine.apply #[[$stride3Dilation5]](%{{.*}}, %{{.*}})
-//       CHECKPARALLEL:           %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       CHECKPARALLEL:           %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKPARALLEL:           %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKPARALLEL:           %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
 //       CHECKPARALLEL:           %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-//       CHECKPARALLEL:           %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       CHECKPARALLEL:           %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
 //       CHECKPARALLEL:           %{{.*}} = addf %{{.*}}, %{{.*}} : f32
 //       CHECKPARALLEL:           store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[$strided4D]]>
 
@@ -375,13 +375,13 @@
 // CHECKLOOP-LABEL: func @conv_padding
 //       CHECKLOOP: %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>) {
 //       CHECKLOOP:   %[[ZERO:.*]] = constant 0.000000e+00 : f32
-//       CHECKLOOP:   %[[Z0:.*]] = dim %arg0, %c0 : memref<?x?x?x?xf32>
-//       CHECKLOOP:   %[[Z1:.*]] = dim %arg0, %c1 : memref<?x?x?x?xf32>
-//       CHECKLOOP:   %[[Q:.*]] =  dim %arg0, %c2 : memref<?x?x?x?xf32>
-//       CHECKLOOP:   %[[K:.*]] =  dim %arg0, %c3 : memref<?x?x?x?xf32>
-//       CHECKLOOP:   %[[B:.*]] =  dim %arg1, %c0 : memref<?x?x?x?xf32>
-//       CHECKLOOP:   %[[X0:.*]] = dim %arg2, %c1 : memref<?x?x?x?xf32>
-//       CHECKLOOP:   %[[X1:.*]] = dim %arg2, %c2 : memref<?x?x?x?xf32>
+//       CHECKLOOP:   %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?x?xf32>
+//       CHECKLOOP:   %[[Z1:.*]] = memref.dim %arg0, %c1 : memref<?x?x?x?xf32>
+//       CHECKLOOP:   %[[Q:.*]] =  memref.dim %arg0, %c2 : memref<?x?x?x?xf32>
+//       CHECKLOOP:   %[[K:.*]] =  memref.dim %arg0, %c3 : memref<?x?x?x?xf32>
+//       CHECKLOOP:   %[[B:.*]] =  memref.dim %arg1, %c0 : memref<?x?x?x?xf32>
+//       CHECKLOOP:   %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32>
+//       CHECKLOOP:   %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32>
 //       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
 //       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
 //       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} {
@@ -393,24 +393,24 @@
 //       CHECKLOOP:                 %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
 //       CHECKLOOP:                 %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]])
 //       CHECKLOOP:                 %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]])
-//       CHECKLOOP:                 %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
+//       CHECKLOOP:                 %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
 //       CHECKLOOP:                 %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32
-//       CHECKLOOP:                 %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
+//       CHECKLOOP:                 %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
 //       CHECKLOOP:                 %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-//       CHECKLOOP:                 %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
+//       CHECKLOOP:                 %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
 //       CHECKLOOP:                 %{{.*}} = addf %{{.*}}, %{{.*}} : f32
 //       CHECKLOOP:                 store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
 
 // CHECKPARALLEL-LABEL: func @conv_padding
 //       CHECKPARALLEL: %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>) {
 //       CHECKPARALLEL:   %[[ZERO:.*]] = constant 0.000000e+00 : f32
-//       CHECKPARALLEL:   %[[Z0:.*]] = dim %arg0, %c0 : memref<?x?x?x?xf32>
-//       CHECKPARALLEL:   %[[Z1:.*]] = dim %arg0, %c1 : memref<?x?x?x?xf32>
-//       CHECKPARALLEL:   %[[Q:.*]] =  dim %arg0, %c2 : memref<?x?x?x?xf32>
-//       CHECKPARALLEL:   %[[K:.*]] =  dim %arg0, %c3 : memref<?x?x?x?xf32>
-//       CHECKPARALLEL:   %[[B:.*]] =  dim %arg1, %c0 : memref<?x?x?x?xf32>
-//       CHECKPARALLEL:   %[[X0:.*]] = dim %arg2, %c1 : memref<?x?x?x?xf32>
-//       CHECKPARALLEL:   %[[X1:.*]] = dim %arg2, %c2 : memref<?x?x?x?xf32>
+//       CHECKPARALLEL:   %[[Z0:.*]] = memref.dim %arg0, %c0 : memref<?x?x?x?xf32>
+//       CHECKPARALLEL:   %[[Z1:.*]] = memref.dim %arg0, %c1 : memref<?x?x?x?xf32>
+//       CHECKPARALLEL:   %[[Q:.*]] =  memref.dim %arg0, %c2 : memref<?x?x?x?xf32>
+//       CHECKPARALLEL:   %[[K:.*]] =  memref.dim %arg0, %c3 : memref<?x?x?x?xf32>
+//       CHECKPARALLEL:   %[[B:.*]] =  memref.dim %arg1, %c0 : memref<?x?x?x?xf32>
+//       CHECKPARALLEL:   %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32>
+//       CHECKPARALLEL:   %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32>
 //       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {
 //       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
 //       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
@@ -419,11 +419,11 @@
 //       CHECKPARALLEL:           %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:           %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]])
 //       CHECKPARALLEL:           %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]])
-//       CHECKPARALLEL:           %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
+//       CHECKPARALLEL:           %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
 //       CHECKPARALLEL:           %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32
-//       CHECKPARALLEL:           %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
+//       CHECKPARALLEL:           %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
 //       CHECKPARALLEL:           %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-//       CHECKPARALLEL:           %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
+//       CHECKPARALLEL:           %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
 //       CHECKPARALLEL:           %{{.*}} = addf %{{.*}}, %{{.*}} : f32
 //       CHECKPARALLEL:           store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
 
@@ -435,33 +435,33 @@
   return
 }
 // CHECKLOOP-LABEL: func @pooling_max
-//       CHECKLOOP:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKLOOP:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKLOOP:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
-//       CHECKLOOP:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
+//       CHECKLOOP:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKLOOP:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKLOOP:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+//       CHECKLOOP:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
 //       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
 //       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
 //       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
 //       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKLOOP:           %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}})
-//       CHECKLOOP:           %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
-//       CHECKLOOP:           %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
+//       CHECKLOOP:           %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//       CHECKLOOP:           %{{.*}} = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
 //       CHECKLOOP:           %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32
 //       CHECKLOOP:           store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 
 // CHECKPARALLEL-LABEL: func @pooling_max
-//       CHECKPARALLEL:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
-//       CHECKPARALLEL:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
+//       CHECKPARALLEL:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+//       CHECKPARALLEL:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
 //       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
 //       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
 //       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKPARALLEL:         %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}})
-//       CHECKPARALLEL:         %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
-//       CHECKPARALLEL:         %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
+//       CHECKPARALLEL:         %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//       CHECKPARALLEL:         %{{.*}} = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
 //       CHECKPARALLEL:         %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32
 //       CHECKPARALLEL:         store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 
@@ -474,20 +474,20 @@
 }
 // CHECKLOOP-LABEL: func @pooling_max_padding
 //       CHECKLOOP:   %[[PAD:.*]] = constant 0xFF800000 : f32
-//       CHECKLOOP:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKLOOP:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKLOOP:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
-//       CHECKLOOP:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
+//       CHECKLOOP:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKLOOP:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKLOOP:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+//       CHECKLOOP:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
 //       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
 //       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
 //       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
 //       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKLOOP:           %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-//       CHECKLOOP:           %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//       CHECKLOOP:           %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //       CHECKLOOP:           %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
 //       CHECKLOOP:           %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-//       CHECKLOOP:           %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
+//       CHECKLOOP:           %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
 //       CHECKLOOP:           %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32
 //       CHECKLOOP:           %[[CMP:.*]] = cmpf ogt, %[[RHS]], %[[SEL]] : f32
 //       CHECKLOOP:           %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32
@@ -495,19 +495,19 @@
 
 // CHECKPARALLEL-LABEL: func @pooling_max_padding
 //       CHECKPARALLEL:   %[[PAD:.*]] = constant 0xFF800000 : f32
-//       CHECKPARALLEL:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
-//       CHECKPARALLEL:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
+//       CHECKPARALLEL:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+//       CHECKPARALLEL:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
 //       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
 //       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
 //       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKPARALLEL:         %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-//       CHECKPARALLEL:         %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//       CHECKPARALLEL:         %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //       CHECKPARALLEL:         %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
 //       CHECKPARALLEL:         %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-//       CHECKPARALLEL:         %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
+//       CHECKPARALLEL:         %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
 //       CHECKPARALLEL:         %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32
 //       CHECKPARALLEL:         %[[CMP:.*]] = cmpf ogt, %[[RHS]], %[[SEL]] : f32
 //       CHECKPARALLEL:         %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32
@@ -522,20 +522,20 @@
 }
 // CHECKLOOP-LABEL: func @pooling_max_padding_i32
 //       CHECKLOOP:   %[[PAD:.*]] = constant -2147483648 : i32
-//       CHECKLOOP:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKLOOP:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKLOOP:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xi32>
-//       CHECKLOOP:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xi32>
+//       CHECKLOOP:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKLOOP:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKLOOP:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32>
+//       CHECKLOOP:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32>
 //       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
 //       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
 //       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
 //       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKLOOP:           %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-//       CHECKLOOP:           %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
+//       CHECKLOOP:           %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
 //       CHECKLOOP:           %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
 //       CHECKLOOP:           %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-//       CHECKLOOP:           %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
+//       CHECKLOOP:           %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
 //       CHECKLOOP:           %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32
 //       CHECKLOOP:           %[[CMP:.*]] = cmpi sgt, %[[RHS]], %[[SEL]] : i32
 //       CHECKLOOP:           %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32
@@ -543,19 +543,19 @@
 
 // CHECKPARALLEL-LABEL: func @pooling_max_padding_i32
 //       CHECKPARALLEL:   %[[PAD:.*]] = constant -2147483648 : i32
-//       CHECKPARALLEL:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32>
 //       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
 //       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
 //       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKPARALLEL:         %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-//       CHECKPARALLEL:         %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
+//       CHECKPARALLEL:         %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
 //       CHECKPARALLEL:         %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
 //       CHECKPARALLEL:         %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-//       CHECKPARALLEL:         %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
+//       CHECKPARALLEL:         %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
 //       CHECKPARALLEL:         %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32
 //       CHECKPARALLEL:         %[[CMP:.*]] = cmpi sgt, %[[RHS]], %[[SEL]] : i32
 //       CHECKPARALLEL:         %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32
@@ -569,33 +569,33 @@
   return
 }
 // CHECKLOOP-LABEL: func @pooling_min
-//       CHECKLOOP:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKLOOP:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKLOOP:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
-//       CHECKLOOP:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
+//       CHECKLOOP:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKLOOP:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKLOOP:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+//       CHECKLOOP:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
 //       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
 //       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
 //       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
 //       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKLOOP:           %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}})
-//       CHECKLOOP:           %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
-//       CHECKLOOP:           %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
+//       CHECKLOOP:           %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//       CHECKLOOP:           %{{.*}} = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
 //       CHECKLOOP:           %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32
 //       CHECKLOOP:           store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 
 // CHECKPARALLEL-LABEL: func @pooling_min
-//       CHECKPARALLEL:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
-//       CHECKPARALLEL:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
+//       CHECKPARALLEL:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+//       CHECKPARALLEL:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
 //       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
 //       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
 //       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKPARALLEL:         %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}})
-//       CHECKPARALLEL:         %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
-//       CHECKPARALLEL:         %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
+//       CHECKPARALLEL:         %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//       CHECKPARALLEL:         %{{.*}} = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
 //       CHECKPARALLEL:         %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32
 //       CHECKPARALLEL:         store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 
@@ -608,20 +608,20 @@
 }
 // CHECKLOOP-LABEL: func @pooling_min_padding
 //       CHECKLOOP:   %[[PAD:.*]] = constant 0x7F800000 : f32
-//       CHECKLOOP:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKLOOP:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKLOOP:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
-//       CHECKLOOP:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
+//       CHECKLOOP:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKLOOP:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKLOOP:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+//       CHECKLOOP:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
 //       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
 //       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
 //       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
 //       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKLOOP:           %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-//       CHECKLOOP:           %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//       CHECKLOOP:           %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //       CHECKLOOP:           %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
 //       CHECKLOOP:           %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-//       CHECKLOOP:           %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
+//       CHECKLOOP:           %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
 //       CHECKLOOP:           %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32
 //       CHECKLOOP:           %[[CMP:.*]] = cmpf olt, %[[RHS]], %[[SEL]] : f32
 //       CHECKLOOP:           %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32
@@ -629,19 +629,19 @@
 
 // CHECKPARALLEL-LABEL: func @pooling_min_padding
 //       CHECKPARALLEL:   %[[PAD:.*]] = constant 0x7F800000 : f32
-//       CHECKPARALLEL:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
-//       CHECKPARALLEL:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
+//       CHECKPARALLEL:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+//       CHECKPARALLEL:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
 //       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
 //       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
 //       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKPARALLEL:         %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-//       CHECKPARALLEL:         %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//       CHECKPARALLEL:         %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //       CHECKPARALLEL:         %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
 //       CHECKPARALLEL:         %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-//       CHECKPARALLEL:         %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
+//       CHECKPARALLEL:         %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
 //       CHECKPARALLEL:         %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32
 //       CHECKPARALLEL:         %[[CMP:.*]] = cmpf olt, %[[RHS]], %[[SEL]] : f32
 //       CHECKPARALLEL:         %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : f32
@@ -656,20 +656,20 @@
 }
 // CHECKLOOP-LABEL: func @pooling_min_padding_i32
 //       CHECKLOOP:   %[[PAD:.*]] = constant 2147483647 : i32
-//       CHECKLOOP:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKLOOP:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKLOOP:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xi32>
-//       CHECKLOOP:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xi32>
+//       CHECKLOOP:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKLOOP:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKLOOP:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32>
+//       CHECKLOOP:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32>
 //       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
 //       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
 //       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
 //       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKLOOP:           %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-//       CHECKLOOP:           %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
+//       CHECKLOOP:           %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
 //       CHECKLOOP:           %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
 //       CHECKLOOP:           %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-//       CHECKLOOP:           %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
+//       CHECKLOOP:           %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
 //       CHECKLOOP:           %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32
 //       CHECKLOOP:           %[[CMP:.*]] = cmpi slt, %[[RHS]], %[[SEL]] : i32
 //       CHECKLOOP:           %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32
@@ -677,19 +677,19 @@
 
 // CHECKPARALLEL-LABEL: func @pooling_min_padding_i32
 //       CHECKPARALLEL:   %[[PAD:.*]] = constant 2147483647 : i32
-//       CHECKPARALLEL:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32>
 //       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
 //       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
 //       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKPARALLEL:         %[[IX:.*]] = affine.apply #[[$stride1Dilation1Padding2]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
-//       CHECKPARALLEL:         %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
+//       CHECKPARALLEL:         %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
 //       CHECKPARALLEL:         %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
 //       CHECKPARALLEL:         %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-//       CHECKPARALLEL:         %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
+//       CHECKPARALLEL:         %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
 //       CHECKPARALLEL:         %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32
 //       CHECKPARALLEL:         %[[CMP:.*]] = cmpi slt, %[[RHS]], %[[SEL]] : i32
 //       CHECKPARALLEL:         %[[RES:.*]] = select %{{.*}}, %[[RHS]], %[[SEL]] : i32
@@ -703,33 +703,33 @@
   return
 }
 // CHECKLOOP-LABEL: func @pooling_sum
-//       CHECKLOOP:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKLOOP:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKLOOP:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
-//       CHECKLOOP:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
+//       CHECKLOOP:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKLOOP:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKLOOP:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+//       CHECKLOOP:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
 //       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
 //       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
 //       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
 //       CHECKLOOP:         scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKLOOP:           %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}})
-//       CHECKLOOP:           %[[RHS:.*]] = load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
-//       CHECKLOOP:           %[[LHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//       CHECKLOOP:           %[[RHS:.*]] = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
+//       CHECKLOOP:           %[[LHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //       CHECKLOOP:           %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32
 //       CHECKLOOP:           store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 
 // CHECKPARALLEL-LABEL: func @pooling_sum
-//       CHECKPARALLEL:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
-//       CHECKPARALLEL:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
+//       CHECKPARALLEL:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+//       CHECKPARALLEL:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
 //       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
 //       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
 //       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
 //       CHECKPARALLEL:         %[[IX:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[IY:.*]] = affine.apply #[[$stride1Dilation1]](%{{.*}}, %{{.*}})
-//       CHECKPARALLEL:         %[[RHS:.*]] = load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
-//       CHECKPARALLEL:         %[[LHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//       CHECKPARALLEL:         %[[RHS:.*]] = memref.load %{{.*}}[%[[IX]], %[[IY]]] : memref<?x?xf32>
+//       CHECKPARALLEL:         %[[LHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //       CHECKPARALLEL:         %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32
 //       CHECKPARALLEL:         store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 
@@ -742,10 +742,10 @@
 }
 // CHECKLOOP-LABEL: func @pooling_sum_padding
 //       CHECKLOOP:   %[[PAD:.*]] = constant 0.000000e+00 : f32
-//       CHECKLOOP:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKLOOP:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKLOOP:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
-//       CHECKLOOP:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
+//       CHECKLOOP:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKLOOP:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKLOOP:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+//       CHECKLOOP:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
 //       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
 //       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
 //       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
@@ -754,18 +754,18 @@
 //       CHECKLOOP:           %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
 //       CHECKLOOP:           %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-//       CHECKLOOP:           %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
+//       CHECKLOOP:           %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
 //       CHECKLOOP:           %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32
-//       CHECKLOOP:           %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//       CHECKLOOP:           %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //       CHECKLOOP:           %[[RES:.*]] = addf %[[RHS]], %[[SEL]] : f32
 //       CHECKLOOP:           store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 
 // CHECKPARALLEL-LABEL: func @pooling_sum_padding
 //       CHECKPARALLEL:   %[[PAD:.*]] = constant 0.000000e+00 : f32
-//       CHECKPARALLEL:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xf32>
-//       CHECKPARALLEL:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xf32>
+//       CHECKPARALLEL:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xf32>
+//       CHECKPARALLEL:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xf32>
 //       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
 //       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
 //       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
@@ -773,9 +773,9 @@
 //       CHECKPARALLEL:         %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
 //       CHECKPARALLEL:         %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-//       CHECKPARALLEL:         %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
+//       CHECKPARALLEL:         %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xf32>
 //       CHECKPARALLEL:         %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : f32
-//       CHECKPARALLEL:         %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//       CHECKPARALLEL:         %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 //       CHECKPARALLEL:         %[[RES:.*]] = addf %[[RHS]], %[[SEL]] : f32
 //       CHECKPARALLEL:         store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 
@@ -788,10 +788,10 @@
 }
 // CHECKLOOP-LABEL: func @pooling_sum_padding_i32
 //       CHECKLOOP:   %[[PAD:.*]] = constant 0 : i32
-//       CHECKLOOP:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKLOOP:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKLOOP:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xi32>
-//       CHECKLOOP:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xi32>
+//       CHECKLOOP:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKLOOP:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKLOOP:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32>
+//       CHECKLOOP:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32>
 //       CHECKLOOP:   scf.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} {
 //       CHECKLOOP:     scf.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} {
 //       CHECKLOOP:       scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
@@ -800,18 +800,18 @@
 //       CHECKLOOP:           %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
 //       CHECKLOOP:           %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
 //       CHECKLOOP:           %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-//       CHECKLOOP:           %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
+//       CHECKLOOP:           %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
 //       CHECKLOOP:           %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32
-//       CHECKLOOP:           %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
+//       CHECKLOOP:           %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
 //       CHECKLOOP:           %[[RES:.*]] = addi %[[RHS]], %[[SEL]] : i32
 //       CHECKLOOP:           store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
 
 // CHECKPARALLEL-LABEL: func @pooling_sum_padding_i32
 //       CHECKPARALLEL:   %[[PAD:.*]] = constant 0 : i32
-//       CHECKPARALLEL:   %[[WX:.*]] = dim %arg1, %c0 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[WY:.*]] = dim %arg1, %c1 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[OX:.*]] = dim %arg2, %c0 : memref<?x?xi32>
-//       CHECKPARALLEL:   %[[OY:.*]] = dim %arg2, %c1 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[WX:.*]] = memref.dim %arg1, %c0 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[WY:.*]] = memref.dim %arg1, %c1 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[OX:.*]] = memref.dim %arg2, %c0 : memref<?x?xi32>
+//       CHECKPARALLEL:   %[[OY:.*]] = memref.dim %arg2, %c1 : memref<?x?xi32>
 //       CHECKPARALLEL:   scf.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) {
 //       CHECKPARALLEL:     scf.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} {
 //       CHECKPARALLEL:       scf.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} {
@@ -819,9 +819,9 @@
 //       CHECKPARALLEL:         %[[IY:.*]] = affine.apply #[[$stride1Dilation1Padding1]](%{{.*}}, %{{.*}})
 //       CHECKPARALLEL:         %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[IX]])
 //       CHECKPARALLEL:         %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[IY]])
-//       CHECKPARALLEL:         %[[LHS:.*]] = load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
+//       CHECKPARALLEL:         %[[LHS:.*]] = memref.load %{{.*}}[%[[IDX]], %[[IDY]]] : memref<?x?xi32>
 //       CHECKPARALLEL:         %[[SEL:.*]] = select %{{.*}}, %[[PAD]], %[[LHS]] : i32
-//       CHECKPARALLEL:         %[[RHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
+//       CHECKPARALLEL:         %[[RHS:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
 //       CHECKPARALLEL:         %[[RES:.*]] = addi %[[RHS]], %[[SEL]] : i32
 //       CHECKPARALLEL:         store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xi32>
 
@@ -854,9 +854,9 @@
 //       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
 //       CHECKLOOP:   scf.for %[[j:.*]] = {{.*}}
 //       CHECKLOOP:     scf.for %[[k:.*]] = {{.*}}
-//       CHECKLOOP:       %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[$strided2D]]>
-//       CHECKLOOP:       %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECKLOOP:       %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKLOOP:       %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[$strided2D]]>
+//       CHECKLOOP:       %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKLOOP:       %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[$strided3D]]>
 //       CHECKLOOP:       %[[d:.*]] = mulf %[[a]], %[[b]] : f32
 //       CHECKLOOP:       %[[e:.*]] = addf %[[c]], %[[d]] : f32
 //       CHECKLOOP:       store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[$strided3D]]>
@@ -864,9 +864,9 @@
 
 // CHECKPARALLEL-LABEL: @generic_region
 //       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]])
-//       CHECKPARALLEL:   %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[$strided2D]]>
-//       CHECKPARALLEL:   %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECKPARALLEL:   %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKPARALLEL:   %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[$strided2D]]>
+//       CHECKPARALLEL:   %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[$strided3D]]>
+//       CHECKPARALLEL:   %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[$strided3D]]>
 //       CHECKPARALLEL:   %[[d:.*]] = mulf %[[a]], %[[b]] : f32
 //       CHECKPARALLEL:   %[[e:.*]] = addf %[[c]], %[[d]] : f32
 //       CHECKPARALLEL:   store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[$strided3D]]>
@@ -906,9 +906,9 @@
 //       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
 //       CHECKLOOP:   scf.for %[[j:.*]] = {{.*}}
 //       CHECKLOOP:     scf.for %[[k:.*]] = {{.*}}
-//       CHECKLOOP:       %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]]
-//       CHECKLOOP:       %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]]
-//       CHECKLOOP:       %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]]
+//       CHECKLOOP:       %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]]
+//       CHECKLOOP:       %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]]
+//       CHECKLOOP:       %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]]
 //       CHECKLOOP:       %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32
 //       CHECKLOOP:       %[[ij:.*]] = addi %[[i]], %[[j]] : index
 //       CHECKLOOP:       %[[ijk:.*]] = addi %[[ij]], %[[k]] : index
@@ -920,9 +920,9 @@
 
 // CHECKPARALLEL-LABEL: @indexed_generic_region
 //       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]])
-//       CHECKPARALLEL:   %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]]
-//       CHECKPARALLEL:   %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]]
-//       CHECKPARALLEL:   %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]]
+//       CHECKPARALLEL:   %[[a:.*]] = memref.load %{{.*}}[%[[i]], %[[j]]]
+//       CHECKPARALLEL:   %[[b:.*]] = memref.load %{{.*}}[%[[i]], %[[j]], %[[k]]]
+//       CHECKPARALLEL:   %[[c:.*]] = memref.load %{{.*}}[%[[i]], %[[k]], %[[j]]]
 //       CHECKPARALLEL:   %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32
 //       CHECKPARALLEL:   %[[ij:.*]] = addi %[[i]], %[[j]] : index
 //       CHECKPARALLEL:   %[[ijk:.*]] = addi %[[ij]], %[[k]] : index
@@ -963,14 +963,14 @@
 //  CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32>
 //       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
 //       CHECKLOOP:   scf.for %[[j:.*]] = {{.*}}
-//       CHECKLOOP:     %[[a:.*]] = load %[[ARG0]][]
+//       CHECKLOOP:     %[[a:.*]] = memref.load %[[ARG0]][]
 //       CHECKLOOP:     store %[[a]], %[[ARG1]][%[[i]], %[[j]]]
 
 // CHECKPARALLEL-LABEL: @generic_op_zero_rank
 //  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<f32>
 //  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32>
 //       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]])
-//       CHECKPARALLEL:   %[[a:.*]] = load %[[ARG0]][]
+//       CHECKPARALLEL:   %[[a:.*]] = memref.load %[[ARG0]][]
 //       CHECKPARALLEL:   store %[[a]], %[[ARG1]][%[[i]], %[[j]]]
 
 func @indexed_generic_op_zero_rank(%arg0: memref<i32>, %arg1: memref<3x4xi32>)
@@ -992,7 +992,7 @@
 //  CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32>
 //       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
 //       CHECKLOOP:   scf.for %[[j:.*]] = {{.*}}
-//       CHECKLOOP:     %[[a:.*]] = load %[[ARG0]][
+//       CHECKLOOP:     %[[a:.*]] = memref.load %[[ARG0]][
 //       CHECKLOOP:     %[[ij:.*]] = addi %[[i]], %[[j]] : index
 //       CHECKLOOP:     %[[ij_int:.*]] = index_cast %[[ij]] : index to i32
 //       CHECKLOOP:     %[[result:.*]] = addi %[[a]], %[[ij_int]] : i32
@@ -1002,7 +1002,7 @@
 //  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<i32>
 //  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32>
 //       CHECKPARALLEL: scf.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]])
-//       CHECKPARALLEL:   %[[a:.*]] = load %[[ARG0]][
+//       CHECKPARALLEL:   %[[a:.*]] = memref.load %[[ARG0]][
 //       CHECKPARALLEL:   %[[ij:.*]] = addi %[[i]], %[[j]] : index
 //       CHECKPARALLEL:   %[[ij_int:.*]] = index_cast %[[ij]] : index to i32
 //       CHECKPARALLEL:   %[[result:.*]] = addi %[[a]], %[[ij_int]] : i32
@@ -1036,8 +1036,8 @@
 //  CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
 //  CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
 //       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
-//       CHECKLOOP:   %[[a:.*]] = load %[[ARG0]][%[[i]]]
-//       CHECKLOOP:   %[[b:.*]] = load %[[ARG1]][]
+//       CHECKLOOP:   %[[a:.*]] = memref.load %[[ARG0]][%[[i]]]
+//       CHECKLOOP:   %[[b:.*]] = memref.load %[[ARG1]][]
 //       CHECKLOOP:   %[[c:.*]] = addf %[[a]], %[[b]] : f32
 //       CHECKLOOP:   store %[[c]], %[[ARG1]][]
 
@@ -1045,8 +1045,8 @@
 //  CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?xf32>
 //  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
 //       CHECKPARALLEL: scf.for %[[i:.*]] = {{.*}}
-//       CHECKPARALLEL:   %[[a:.*]] = load %[[ARG0]][%[[i]]]
-//       CHECKPARALLEL:   %[[b:.*]] = load %[[ARG1]][]
+//       CHECKPARALLEL:   %[[a:.*]] = memref.load %[[ARG0]][%[[i]]]
+//       CHECKPARALLEL:   %[[b:.*]] = memref.load %[[ARG1]][]
 //       CHECKPARALLEL:   %[[c:.*]] = addf %[[a]], %[[b]] : f32
 //       CHECKPARALLEL:   store %[[c]], %[[ARG1]][]
 
@@ -1086,9 +1086,9 @@
 //  CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
 //  CHECKLOOP-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32>
 //       CHECKLOOP: scf.for %[[i:.*]] = {{.*}}
-//       CHECKLOOP:   %[[a:.*]] = load %[[ARG0]][%[[i]]]
-//       CHECKLOOP:   %[[b:.*]] = load %[[ARG1]][]
-//       CHECKLOOP:   %[[c:.*]] = load %[[ARG2]][]
+//       CHECKLOOP:   %[[a:.*]] = memref.load %[[ARG0]][%[[i]]]
+//       CHECKLOOP:   %[[b:.*]] = memref.load %[[ARG1]][]
+//       CHECKLOOP:   %[[c:.*]] = memref.load %[[ARG2]][]
 //       CHECKLOOP:   %[[d:.*]] = select %{{.*}}, %[[b]], %[[c]]
 //       CHECKLOOP:   %[[e:.*]] = addf %[[a]], %[[d]]
 //       CHECKLOOP:   store %[[e]], %[[ARG2]][]
@@ -1098,9 +1098,9 @@
 //  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
 //  CHECKPARALLEL-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32>
 //       CHECKPARALLEL: scf.for %[[i:.*]] = {{.*}}
-//       CHECKPARALLEL:   %[[a:.*]] = load %[[ARG0]][%[[i]]]
-//       CHECKPARALLEL:   %[[b:.*]] = load %[[ARG1]][]
-//       CHECKPARALLEL:   %[[c:.*]] = load %[[ARG2]][]
+//       CHECKPARALLEL:   %[[a:.*]] = memref.load %[[ARG0]][%[[i]]]
+//       CHECKPARALLEL:   %[[b:.*]] = memref.load %[[ARG1]][]
+//       CHECKPARALLEL:   %[[c:.*]] = memref.load %[[ARG2]][]
 //       CHECKPARALLEL:   %[[d:.*]] = select %{{.*}}, %[[b]], %[[c]]
 //       CHECKPARALLEL:   %[[e:.*]] = addf %[[a]], %[[d]]
 //       CHECKPARALLEL:   store %[[e]], %[[ARG2]][]
@@ -1164,8 +1164,8 @@
 //  CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
 //  CHECKLOOP-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32>
 //   CHECKLOOP-NOT: scf.for
-//       CHECKLOOP: load %[[ARG0]][]
-//       CHECKLOOP: load %[[ARG1]][]
+//       CHECKLOOP: memref.load %[[ARG0]][]
+//       CHECKLOOP: memref.load %[[ARG1]][]
 //       CHECKLOOP: scf.if
 //       CHECKLOOP: scf.yield
 //       CHECKLOOP: else
@@ -1177,8 +1177,8 @@
 //  CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<f32>
 //  CHECKPARALLEL-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<f32>
 //   CHECKPARALLEL-NOT: scf.for
-//       CHECKPARALLEL: load %[[ARG0]][]
-//       CHECKPARALLEL: load %[[ARG1]][]
+//       CHECKPARALLEL: memref.load %[[ARG0]][]
+//       CHECKPARALLEL: memref.load %[[ARG1]][]
 //       CHECKPARALLEL: scf.if
 //       CHECKPARALLEL: scf.yield
 //       CHECKPARALLEL: else
@@ -1197,17 +1197,17 @@
 //  CHECKLOOP-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
 //  CHECKLOOP-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
 //  CHECKLOOP-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
-//       CHECKLOOP: %[[B:.*]] = dim %[[mA]], %c0 : memref<?x?x?xf32>
-//       CHECKLOOP: %[[M:.*]] = dim %[[mA]], %c1 : memref<?x?x?xf32>
-//       CHECKLOOP: %[[K:.*]] = dim %[[mA]], %c2 : memref<?x?x?xf32>
-//       CHECKLOOP: %[[N:.*]] = dim %[[mB]], %c2 : memref<?x?x?xf32>
+//       CHECKLOOP: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref<?x?x?xf32>
+//       CHECKLOOP: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32>
+//       CHECKLOOP: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32>
+//       CHECKLOOP: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32>
 //       CHECKLOOP: scf.for %[[b:.*]] = %{{.*}} to %[[B]] step %{{.*}} {
 //       CHECKLOOP:   scf.for %[[m:.*]] = %{{.*}} to %[[M]] step %{{.*}} {
 //       CHECKLOOP:     scf.for %[[n:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
 //       CHECKLOOP:       scf.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} {
-//       CHECKLOOP:       %[[va:.*]] = load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
-//       CHECKLOOP:       %[[vb:.*]] = load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
-//       CHECKLOOP:       %[[vc:.*]] = load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
+//       CHECKLOOP:       %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
+//       CHECKLOOP:       %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
+//       CHECKLOOP:       %[[vc:.*]] = memref.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
 //       CHECKLOOP:       %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32
 //       CHECKLOOP:       %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
 //       CHECKLOOP:       store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
@@ -1216,15 +1216,15 @@
 //  CHECKPARALLEL-SAME: %[[mA:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
 //  CHECKPARALLEL-SAME: %[[mB:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
 //  CHECKPARALLEL-SAME: %[[mC:[a-zA-Z0-9]+]]: memref<?x?x?xf32>
-//       CHECKPARALLEL: %[[B:.*]] = dim %[[mA]], %c0 : memref<?x?x?xf32>
-//       CHECKPARALLEL: %[[M:.*]] = dim %[[mA]], %c1 : memref<?x?x?xf32>
-//       CHECKPARALLEL: %[[K:.*]] = dim %[[mA]], %c2 : memref<?x?x?xf32>
-//       CHECKPARALLEL: %[[N:.*]] = dim %[[mB]], %c2 : memref<?x?x?xf32>
+//       CHECKPARALLEL: %[[B:.*]] = memref.dim %[[mA]], %c0 : memref<?x?x?xf32>
+//       CHECKPARALLEL: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32>
+//       CHECKPARALLEL: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32>
+//       CHECKPARALLEL: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32>
 //       CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]], %[[n:.*]]) = ({{.*}}) to (%[[B]], %[[M]], %[[N]]) step ({{.*}}) {
 //       CHECKPARALLEL:   scf.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} {
-//       CHECKPARALLEL:       %[[va:.*]] = load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
-//       CHECKPARALLEL:       %[[vb:.*]] = load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
-//       CHECKPARALLEL:       %[[vc:.*]] = load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
+//       CHECKPARALLEL:       %[[va:.*]] = memref.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
+//       CHECKPARALLEL:       %[[vb:.*]] = memref.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
+//       CHECKPARALLEL:       %[[vc:.*]] = memref.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
 //       CHECKPARALLEL:       %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32
 //       CHECKPARALLEL:       %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
 //       CHECKPARALLEL:       store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
@@ -1242,14 +1242,14 @@
 //  CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?xf32>
 //       CHECKLOOP: %[[c0:.*]] = constant 0 : index
 //       CHECKLOOP: %[[c1:.*]] = constant 1 : index
-//       CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref<?xf32>
-//       CHECKLOOP: %[[dim1:.*]] = dim %[[arg2]], %[[c0]] : memref<?xf32>
+//       CHECKLOOP: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?xf32>
+//       CHECKLOOP: %[[dim1:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?xf32>
 //       CHECKLOOP: scf.for %[[b:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] {
 //       CHECKLOOP:   scf.for %[[m:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
 //       CHECKLOOP:     %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]])
-//       CHECKLOOP:     %[[vb:.*]] = load %[[arg0]][%[[aff]]] : memref<?xf32>
-//       CHECKLOOP:     %[[va:.*]] = load %[[arg1]][%[[m]]] : memref<?xf32>
-//       CHECKLOOP:     %[[vc:.*]] = load %[[arg2]][%[[b]]] : memref<?xf32>
+//       CHECKLOOP:     %[[vb:.*]] = memref.load %[[arg0]][%[[aff]]] : memref<?xf32>
+//       CHECKLOOP:     %[[va:.*]] = memref.load %[[arg1]][%[[m]]] : memref<?xf32>
+//       CHECKLOOP:     %[[vc:.*]] = memref.load %[[arg2]][%[[b]]] : memref<?xf32>
 //       CHECKLOOP:     %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32
 //       CHECKLOOP:     %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
 //       CHECKLOOP:     store %[[res]], %[[arg2]][%[[b]]] : memref<?xf32>
@@ -1260,14 +1260,14 @@
 //  CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?xf32>
 //       CHECKPARALLEL: %[[c0:.*]] = constant 0 : index
 //       CHECKPARALLEL: %[[c1:.*]] = constant 1 : index
-//       CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref<?xf32>
-//       CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg2]], %[[c0]] : memref<?xf32>
+//       CHECKPARALLEL: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?xf32>
+//       CHECKPARALLEL: %[[dim1:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?xf32>
 //       CHECKPARALLEL: scf.parallel (%[[b:.*]]) = (%[[c0]]) to (%[[dim1]]) step (%[[c1]]) {
 //       CHECKPARALLEL:   scf.for %[[m:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
 //       CHECKPARALLEL:     %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]])
-//       CHECKPARALLEL:     %[[vb:.*]] = load %[[arg0]][%[[aff]]] : memref<?xf32>
-//       CHECKPARALLEL:     %[[va:.*]] = load %[[arg1]][%[[m]]] : memref<?xf32>
-//       CHECKPARALLEL:     %[[vc:.*]] = load %[[arg2]][%[[b]]] : memref<?xf32>
+//       CHECKPARALLEL:     %[[vb:.*]] = memref.load %[[arg0]][%[[aff]]] : memref<?xf32>
+//       CHECKPARALLEL:     %[[va:.*]] = memref.load %[[arg1]][%[[m]]] : memref<?xf32>
+//       CHECKPARALLEL:     %[[vc:.*]] = memref.load %[[arg2]][%[[b]]] : memref<?xf32>
 //       CHECKPARALLEL:     %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32
 //       CHECKPARALLEL:     %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
 //       CHECKPARALLEL:     store %[[res]], %[[arg2]][%[[b]]] : memref<?xf32>
@@ -1284,20 +1284,20 @@
 //  CHECKLOOP-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?xf32>
 //       CHECKLOOP: %[[c0:.*]] = constant 0 : index
 //       CHECKLOOP: %[[c1:.*]] = constant 1 : index
-//       CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?xf32>
-//       CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?xf32>
-//       CHECKLOOP: %[[dim2:.*]] = dim %[[arg2]], %[[c0]] : memref<?x?xf32>
-//       CHECKLOOP: %[[dim3:.*]] = dim %[[arg2]], %[[c1]] : memref<?x?xf32>
+//       CHECKLOOP: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?xf32>
+//       CHECKLOOP: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?xf32>
+//       CHECKLOOP: %[[dim2:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?xf32>
+//       CHECKLOOP: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?xf32>
 //       CHECKLOOP: scf.for %[[arg3:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] {
 //       CHECKLOOP:   scf.for %[[arg4:.*]] = %[[c0]] to %[[dim3]] step %[[c1]] {
 //       CHECKLOOP:     scf.for %[[arg5:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
 //       CHECKLOOP:       scf.for %[[arg6:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] {
 //       CHECKLOOP:         %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]])
 //       CHECKLOOP:         %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]])
-//       CHECKLOOP:         %[[vb:.*]] = load %[[arg0]][%[[aff]], %[[aff2]]] : memref<?x?xf32>
+//       CHECKLOOP:         %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]]] : memref<?x?xf32>
 
-//       CHECKLOOP:         %[[va:.*]] = load %[[arg1]][%[[arg5]], %[[arg6]]] : memref<?x?xf32>
-//       CHECKLOOP:         %[[vc:.*]] = load %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32>
+//       CHECKLOOP:         %[[va:.*]] = memref.load %[[arg1]][%[[arg5]], %[[arg6]]] : memref<?x?xf32>
+//       CHECKLOOP:         %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32>
 
 //       CHECKLOOP:         %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32
 //       CHECKLOOP:         %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
@@ -1309,18 +1309,18 @@
 //  CHECKPARALLEL-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref<?x?xf32>
 //       CHECKPARALLEL: %[[c0:.*]] = constant 0 : index
 //       CHECKPARALLEL: %[[c1:.*]] = constant 1 : index
-//       CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?xf32>
-//       CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?xf32>
-//       CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg2]], %[[c0]] : memref<?x?xf32>
-//       CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg2]], %[[c1]] : memref<?x?xf32>
+//       CHECKPARALLEL: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?xf32>
+//       CHECKPARALLEL: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?xf32>
+//       CHECKPARALLEL: %[[dim2:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?xf32>
+//       CHECKPARALLEL: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?xf32>
 //       CHECKPARALLEL: scf.parallel (%[[arg3:.*]], %[[arg4:.*]]) = (%[[c0]], %[[c0]]) to (%[[dim2]], %[[dim3]]) step (%[[c1]], %[[c1]]) {
 //       CHECKPARALLEL:   scf.for %[[arg5:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
 //       CHECKPARALLEL:     scf.for %[[arg6:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] {
 //       CHECKPARALLEL:       %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]])
 //       CHECKPARALLEL:       %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]])
-//       CHECKPARALLEL:       %[[vb:.*]] = load %[[arg0]][%[[aff]], %[[aff2]]] : memref<?x?xf32>
-//       CHECKPARALLEL:       %[[va:.*]] = load %[[arg1]][%[[arg5]], %[[arg6]]] : memref<?x?xf32>
-//       CHECKPARALLEL:       %[[vc:.*]] = load %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32>
+//       CHECKPARALLEL:       %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]]] : memref<?x?xf32>
+//       CHECKPARALLEL:       %[[va:.*]] = memref.load %[[arg1]][%[[arg5]], %[[arg6]]] : memref<?x?xf32>
+//       CHECKPARALLEL:       %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32>
 //       CHECKPARALLEL:       %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32
 //       CHECKPARALLEL:       %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
 //       CHECKPARALLEL:       store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]]] : memref<?x?xf32>
@@ -1339,12 +1339,12 @@
 //       CHECKLOOP: %[[c2:.*]] = constant 2 : index
 //       CHECKLOOP: %[[c0:.*]] = constant 0 : index
 //       CHECKLOOP: %[[c1:.*]] = constant 1 : index
-//       CHECKLOOP: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?x?xf32>
-//       CHECKLOOP: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?x?xf32>
-//       CHECKLOOP: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref<?x?x?xf32>
-//       CHECKLOOP: %[[dim3:.*]] = dim %[[arg2]], %[[c0]] : memref<?x?x?xf32>
-//       CHECKLOOP: %[[dim4:.*]] = dim %[[arg2]], %[[c1]] : memref<?x?x?xf32>
-//       CHECKLOOP: %[[dim5:.*]] = dim %[[arg2]], %[[c2]] : memref<?x?x?xf32>
+//       CHECKLOOP: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?x?xf32>
+//       CHECKLOOP: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?x?xf32>
+//       CHECKLOOP: %[[dim2:.*]] = memref.dim %[[arg1]], %[[c2]] : memref<?x?x?xf32>
+//       CHECKLOOP: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?x?xf32>
+//       CHECKLOOP: %[[dim4:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?x?xf32>
+//       CHECKLOOP: %[[dim5:.*]] = memref.dim %[[arg2]], %[[c2]] : memref<?x?x?xf32>
 //       CHECKLOOP: scf.for %[[arg3:.*]] = %[[c0]] to %[[dim3]] step %[[c1]] {
 //       CHECKLOOP:   scf.for %[[arg4:.*]] = %[[c0]] to %[[dim4]] step %[[c1]] {
 //       CHECKLOOP:     scf.for %[[arg5:.*]] = %[[c0]] to %[[dim5]] step %[[c1]] {
@@ -1354,10 +1354,10 @@
 //       CHECKLOOP:             %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]])
 //       CHECKLOOP:             %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]])
 //       CHECKLOOP:             %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]])
-//       CHECKLOOP:             %[[vb:.*]] = load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref<?x?x?xf32>
+//       CHECKLOOP:             %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref<?x?x?xf32>
 
-//       CHECKLOOP:             %[[va:.*]] = load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref<?x?x?xf32>
-//       CHECKLOOP:             %[[vc:.*]] = load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32>
+//       CHECKLOOP:             %[[va:.*]] = memref.load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref<?x?x?xf32>
+//       CHECKLOOP:             %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32>
 
 //       CHECKLOOP:             %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32
 //       CHECKLOOP:             %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
@@ -1370,12 +1370,12 @@
 //       CHECKPARALLEL: %[[c2:.*]] = constant 2 : index
 //       CHECKPARALLEL: %[[c0:.*]] = constant 0 : index
 //       CHECKPARALLEL: %[[c1:.*]] = constant 1 : index
-//       CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref<?x?x?xf32>
-//       CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref<?x?x?xf32>
-//       CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg1]], %[[c2]] : memref<?x?x?xf32>
-//       CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg2]], %[[c0]] : memref<?x?x?xf32>
-//       CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg2]], %[[c1]] : memref<?x?x?xf32>
-//       CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg2]], %[[c2]] : memref<?x?x?xf32>
+//       CHECKPARALLEL: %[[dim0:.*]] = memref.dim %[[arg1]], %[[c0]] : memref<?x?x?xf32>
+//       CHECKPARALLEL: %[[dim1:.*]] = memref.dim %[[arg1]], %[[c1]] : memref<?x?x?xf32>
+//       CHECKPARALLEL: %[[dim2:.*]] = memref.dim %[[arg1]], %[[c2]] : memref<?x?x?xf32>
+//       CHECKPARALLEL: %[[dim3:.*]] = memref.dim %[[arg2]], %[[c0]] : memref<?x?x?xf32>
+//       CHECKPARALLEL: %[[dim4:.*]] = memref.dim %[[arg2]], %[[c1]] : memref<?x?x?xf32>
+//       CHECKPARALLEL: %[[dim5:.*]] = memref.dim %[[arg2]], %[[c2]] : memref<?x?x?xf32>
 //       CHECKPARALLEL: scf.parallel (%[[arg3:.*]], %[[arg4:.*]], %[[arg5:.*]]) = (%[[c0]], %[[c0]], %[[c0]]) to (%[[dim3]], %[[dim4]], %[[dim5]]) step (%[[c1]], %[[c1]], %[[c1]]) {
 //       CHECKPARALLEL:   scf.for %[[arg6:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] {
 //       CHECKPARALLEL:     scf.for %[[arg7:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] {
@@ -1383,9 +1383,9 @@
 //       CHECKPARALLEL:         %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]])
 //       CHECKPARALLEL:         %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]])
 //       CHECKPARALLEL:         %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]])
-//       CHECKPARALLEL:         %[[vb:.*]] = load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref<?x?x?xf32>
-//       CHECKPARALLEL:         %[[va:.*]] = load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref<?x?x?xf32>
-//       CHECKPARALLEL:         %[[vc:.*]] = load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32>
+//       CHECKPARALLEL:         %[[vb:.*]] = memref.load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref<?x?x?xf32>
+//       CHECKPARALLEL:         %[[va:.*]] = memref.load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref<?x?x?xf32>
+//       CHECKPARALLEL:         %[[vc:.*]] = memref.load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32>
 //       CHECKPARALLEL:         %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32
 //       CHECKPARALLEL:         %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
 //       CHECKPARALLEL:         store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/parallel-loops.mlir b/mlir/test/Dialect/Linalg/parallel-loops.mlir
--- a/mlir/test/Dialect/Linalg/parallel-loops.mlir
+++ b/mlir/test/Dialect/Linalg/parallel-loops.mlir
@@ -21,8 +21,8 @@
 // CHECK-DAG: %[[C0:.*]] = constant 0
 // CHECK-DAG: %[[C1:.*]] = constant 1
 // CHECK: scf.parallel (%[[I:.*]], %[[J:.*]]) = {{.*}}
-// CHECK:   %[[LHS_ELEM:.*]] = load %[[LHS]][%[[I]], %[[J]]]
-// CHECK:   %[[RHS_ELEM:.*]] = load %[[RHS]][%[[I]], %[[J]]]
+// CHECK:   %[[LHS_ELEM:.*]] = memref.load %[[LHS]][%[[I]], %[[J]]]
+// CHECK:   %[[RHS_ELEM:.*]] = memref.load %[[RHS]][%[[I]], %[[J]]]
 // CHECK:   %[[SUM:.*]] = addf %[[LHS_ELEM]], %[[RHS_ELEM]] : f32
 // CHECK:   store %[[SUM]], %{{.*}}[%[[I]], %[[J]]]
 // CHECK:   scf.yield
@@ -50,14 +50,14 @@
 // CHECK-LABEL: @lower_outer_parallel
 //   CHECK-DAG: %[[C0:.*]] = constant 0
 //   CHECK-DAG: %[[C1:.*]] = constant 1
-//   CHECK-DAG: %[[D0:.*]] = dim %{{.*}}, %c0
-//   CHECK-DAG: %[[D1:.*]] = dim %{{.*}}, %c1
-//   CHECK-DAG: %[[D2:.*]] = dim %{{.*}}, %c2
-//   CHECK-DAG: %[[D3:.*]] = dim %{{.*}}, %c3
+//   CHECK-DAG: %[[D0:.*]] = memref.dim %{{.*}}, %c0
+//   CHECK-DAG: %[[D1:.*]] = memref.dim %{{.*}}, %c1
+//   CHECK-DAG: %[[D2:.*]] = memref.dim %{{.*}}, %c2
+//   CHECK-DAG: %[[D3:.*]] = memref.dim %{{.*}}, %c3
 //       CHECK: scf.parallel (%[[IV0:.*]], %[[IV1:.*]]) = (%[[C0]], %[[C0]]) to (%[[D0]], %[[D1]]) step (%[[C1]], %[[C1]])
 //       CHECK:   scf.for %[[IV2:.*]] = %[[C0]] to %[[D2]] step %[[C1]]
 //       CHECK:     scf.parallel (%[[IV3:.*]]) = (%[[C0]]) to (%[[D3]]) step (%[[C1]])
-//       CHECK:       load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
+//       CHECK:       memref.load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
 //       CHECK:       store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV1]], %[[IV3]]]
 
 // -----
@@ -83,15 +83,15 @@
 // CHECK-LABEL: @lower_mixed_parallel
 //   CHECK-DAG: %[[C0:.*]] = constant 0
 //   CHECK-DAG: %[[C1:.*]] = constant 1
-//   CHECK-DAG: %[[D0:.*]] = dim %{{.*}}, %c0
-//   CHECK-DAG: %[[D1:.*]] = dim %{{.*}}, %c1
-//   CHECK-DAG: %[[D2:.*]] = dim %{{.*}}, %c2
-//   CHECK-DAG: %[[D3:.*]] = dim %{{.*}}, %c3
-//   CHECK-DAG: %[[D4:.*]] = dim %{{.*}}, %c4
-//   CHECK-DAG: %[[D5:.*]] = dim %{{.*}}, %c5
+//   CHECK-DAG: %[[D0:.*]] = memref.dim %{{.*}}, %c0
+//   CHECK-DAG: %[[D1:.*]] = memref.dim %{{.*}}, %c1
+//   CHECK-DAG: %[[D2:.*]] = memref.dim %{{.*}}, %c2
+//   CHECK-DAG: %[[D3:.*]] = memref.dim %{{.*}}, %c3
+//   CHECK-DAG: %[[D4:.*]] = memref.dim %{{.*}}, %c4
+//   CHECK-DAG: %[[D5:.*]] = memref.dim %{{.*}}, %c5
 //       CHECK: scf.parallel (%[[IV0:.*]], %[[IV1:.*]]) = (%[[C0]], %[[C0]]) to (%[[D0]], %[[D1]]) step (%[[C1]], %[[C1]])
 //       CHECK:   scf.for %[[IV2:.*]] = %[[C0]] to %[[D2]] step %[[C1]]
 //       CHECK:     scf.parallel (%[[IV3:.*]], %[[IV4:.*]]) = (%[[C0]], %[[C0]]) to (%[[D3]], %[[D4]]) step (%[[C1]], %[[C1]])
 //       CHECK:       scf.for %[[IV5:.*]] = %[[C0]] to %[[D5]] step %[[C1]]
-//       CHECK:       load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]], %[[IV4]], %[[IV5]]]
+//       CHECK:       memref.load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]], %[[IV4]], %[[IV5]]]
 //       CHECK:       store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV1]], %[[IV4]], %[[IV3]]]
diff --git a/mlir/test/Dialect/Linalg/promote.mlir b/mlir/test/Dialect/Linalg/promote.mlir
--- a/mlir/test/Dialect/Linalg/promote.mlir
+++ b/mlir/test/Dialect/Linalg/promote.mlir
@@ -14,18 +14,18 @@
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %3 = view %A[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32>
-  %4 = view %A[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>
-  %5 = view %A[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
-  %6 = dim %3, %c0 : memref<?x?xf32>
-  %7 = dim %3, %c1 : memref<?x?xf32>
-  %8 = dim %4, %c1 : memref<?x?xf32>
+  %3 = memref.view %A[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32>
+  %4 = memref.view %A[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>
+  %5 = memref.view %A[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
+  %6 = memref.dim %3, %c0 : memref<?x?xf32>
+  %7 = memref.dim %3, %c1 : memref<?x?xf32>
+  %8 = memref.dim %4, %c1 : memref<?x?xf32>
   scf.for %arg4 = %c0 to %6 step %c2 {
     scf.for %arg5 = %c0 to %8 step %c3 {
       scf.for %arg6 = %c0 to %7 step %c4 {
-        %11 = std.subview %3[%arg4, %arg6][%c2, %c4][1, 1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, 1]>
-        %14 = std.subview %4[%arg6, %arg5][%c4, %c3][1, 1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, 1]>
-        %17 = std.subview %5[%arg4, %arg5][%c2, %c3][1, 1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, 1]>
+        %11 = memref.subview %3[%arg4, %arg6][%c2, %c4][1, 1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, 1]>
+        %14 = memref.subview %4[%arg6, %arg5][%c4, %c3][1, 1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, 1]>
+        %17 = memref.subview %5[%arg4, %arg5][%c2, %c3][1, 1] : memref<?x?xf32> to memref<?x?xf32, offset: ?, strides: [?, 1]>
         linalg.matmul
           ins(%11, %14: memref<?x?xf32, offset: ?, strides: [?, 1]>,
                         memref<?x?xf32, offset: ?, strides: [?, 1]>)
@@ -40,27 +40,27 @@
 //       CHECK:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECK:     scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECK:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECK:         %[[vA:.*]] = subview {{.*}} : memref<?x?xf32>
-//       CHECK:         %[[vB:.*]] = subview {{.*}} : memref<?x?xf32>
-//       CHECK:         %[[vC:.*]] = subview {{.*}} : memref<?x?xf32>
+//       CHECK:         %[[vA:.*]] = memref.subview {{.*}} : memref<?x?xf32>
+//       CHECK:         %[[vB:.*]] = memref.subview {{.*}} : memref<?x?xf32>
+//       CHECK:         %[[vC:.*]] = memref.subview {{.*}} : memref<?x?xf32>
 ///
-//       CHECK:         %[[tmpA:.*]] = alloc() : memref<32xi8>
-//      ALLOCA:         %[[tmpA:.*]] = alloca() : memref<32xi8>
-//       CHECK:         %[[fullA:.*]] = std.view %[[tmpA]][{{.*}}][{{.*}}] : memref<32xi8> to memref<?x?xf32>
-//     DYNAMIC:         std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECK:         %[[partialA:.*]] = subview %[[fullA]]{{.*}} : memref<?x?xf32> to memref<?x?xf32, #[[$strided2D]]>
+//       CHECK:         %[[tmpA:.*]] = memref.alloc() : memref<32xi8>
+//      ALLOCA:         %[[tmpA:.*]] = memref.alloca() : memref<32xi8>
+//       CHECK:         %[[fullA:.*]] = memref.view %[[tmpA]][{{.*}}][{{.*}}] : memref<32xi8> to memref<?x?xf32>
+//     DYNAMIC:         memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?x?xf32>
+//       CHECK:         %[[partialA:.*]] = memref.subview %[[fullA]]{{.*}} : memref<?x?xf32> to memref<?x?xf32, #[[$strided2D]]>
 ///
-//       CHECK:         %[[tmpB:.*]] = alloc() : memref<48xi8>
-//      ALLOCA:         %[[tmpB:.*]] = alloca() : memref<48xi8>
-//       CHECK:         %[[fullB:.*]] = std.view %[[tmpB]][{{.*}}][{{.*}}] : memref<48xi8> to memref<?x?xf32>
-//     DYNAMIC:         std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECK:         %[[partialB:.*]] = subview %[[fullB]]{{.*}} : memref<?x?xf32> to memref<?x?xf32, #[[$strided2D]]>
+//       CHECK:         %[[tmpB:.*]] = memref.alloc() : memref<48xi8>
+//      ALLOCA:         %[[tmpB:.*]] = memref.alloca() : memref<48xi8>
+//       CHECK:         %[[fullB:.*]] = memref.view %[[tmpB]][{{.*}}][{{.*}}] : memref<48xi8> to memref<?x?xf32>
+//     DYNAMIC:         memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?x?xf32>
+//       CHECK:         %[[partialB:.*]] = memref.subview %[[fullB]]{{.*}} : memref<?x?xf32> to memref<?x?xf32, #[[$strided2D]]>
 ///
-//       CHECK:         %[[tmpC:.*]] = alloc() : memref<24xi8>
-//      ALLOCA:         %[[tmpC:.*]] = alloca() : memref<24xi8>
-//       CHECK:         %[[fullC:.*]] = std.view %[[tmpC]][{{.*}}][{{.*}}] : memref<24xi8> to memref<?x?xf32>
-//     DYNAMIC:         std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECK:         %[[partialC:.*]] = subview %[[fullC]]{{.*}} : memref<?x?xf32> to memref<?x?xf32, #[[$strided2D]]>
+//       CHECK:         %[[tmpC:.*]] = memref.alloc() : memref<24xi8>
+//      ALLOCA:         %[[tmpC:.*]] = memref.alloca() : memref<24xi8>
+//       CHECK:         %[[fullC:.*]] = memref.view %[[tmpC]][{{.*}}][{{.*}}] : memref<24xi8> to memref<?x?xf32>
+//     DYNAMIC:         memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?x?xf32>
+//       CHECK:         %[[partialC:.*]] = memref.subview %[[fullC]]{{.*}} : memref<?x?xf32> to memref<?x?xf32, #[[$strided2D]]>
 
 //       CHECK:         linalg.copy(%[[vA]], %[[partialA]]) : memref<?x?xf32, #[[$strided2D]]>, memref<?x?xf32, #[[$strided2D]]>
 //       CHECK:         linalg.copy(%[[vB]], %[[partialB]]) : memref<?x?xf32, #[[$strided2D]]>, memref<?x?xf32, #[[$strided2D]]>
@@ -72,12 +72,12 @@
 //       CHECK:           memref<?x?xf32, #[[$strided2D]]>,
 //       CHECK:           memref<?x?xf32, #[[$strided2D]]>
 //
-//       CHECK:         dealloc %[[tmpA]] : memref<32xi8>
-//       CHECK:         dealloc %[[tmpB]] : memref<48xi8>
-//       CHECK:         dealloc %[[tmpC]] : memref<24xi8>
-//  ALLOCA-NOT:         dealloc %[[tmpA]] : memref<32xi8>
-//  ALLOCA-NOT:         dealloc %[[tmpB]] : memref<48xi8>
-//  ALLOCA-NOT:         dealloc %[[tmpC]] : memref<24xi8>
+//       CHECK:         memref.dealloc %[[tmpA]] : memref<32xi8>
+//       CHECK:         memref.dealloc %[[tmpB]] : memref<48xi8>
+//       CHECK:         memref.dealloc %[[tmpC]] : memref<24xi8>
+//  ALLOCA-NOT:         memref.dealloc %[[tmpA]] : memref<32xi8>
+//  ALLOCA-NOT:         memref.dealloc %[[tmpB]] : memref<48xi8>
+//  ALLOCA-NOT:         memref.dealloc %[[tmpC]] : memref<24xi8>
 
 // -----
 
@@ -87,18 +87,18 @@
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %3 = view %A[%c0][%M, %K] : memref<?xi8> to memref<?x?xf64>
-  %4 = view %A[%c0][%K, %N] : memref<?xi8> to memref<?x?xf64>
-  %5 = view %A[%c0][%M, %N] : memref<?xi8> to memref<?x?xf64>
-  %6 = dim %3, %c0 : memref<?x?xf64>
-  %7 = dim %3, %c1 : memref<?x?xf64>
-  %8 = dim %4, %c1 : memref<?x?xf64>
+  %3 = memref.view %A[%c0][%M, %K] : memref<?xi8> to memref<?x?xf64>
+  %4 = memref.view %A[%c0][%K, %N] : memref<?xi8> to memref<?x?xf64>
+  %5 = memref.view %A[%c0][%M, %N] : memref<?xi8> to memref<?x?xf64>
+  %6 = memref.dim %3, %c0 : memref<?x?xf64>
+  %7 = memref.dim %3, %c1 : memref<?x?xf64>
+  %8 = memref.dim %4, %c1 : memref<?x?xf64>
   scf.for %arg4 = %c0 to %6 step %c2 {
     scf.for %arg5 = %c0 to %8 step %c3 {
       scf.for %arg6 = %c0 to %7 step %c4 {
-        %11 = std.subview %3[%arg4, %arg6][%c2, %c4][1, 1] : memref<?x?xf64> to memref<?x?xf64, offset: ?, strides: [?, 1]>
-        %14 = std.subview %4[%arg6, %arg5][%c4, %c3][1, 1] : memref<?x?xf64> to memref<?x?xf64, offset: ?, strides: [?, 1]>
-        %17 = std.subview %5[%arg4, %arg5][%c2, %c3][1, 1] : memref<?x?xf64> to memref<?x?xf64, offset: ?, strides: [?, 1]>
+        %11 = memref.subview %3[%arg4, %arg6][%c2, %c4][1, 1] : memref<?x?xf64> to memref<?x?xf64, offset: ?, strides: [?, 1]>
+        %14 = memref.subview %4[%arg6, %arg5][%c4, %c3][1, 1] : memref<?x?xf64> to memref<?x?xf64, offset: ?, strides: [?, 1]>
+        %17 = memref.subview %5[%arg4, %arg5][%c2, %c3][1, 1] : memref<?x?xf64> to memref<?x?xf64, offset: ?, strides: [?, 1]>
         linalg.matmul
           ins(%11, %14: memref<?x?xf64, offset: ?, strides: [?, 1]>,
                         memref<?x?xf64, offset: ?, strides: [?, 1]>)
@@ -113,24 +113,24 @@
 //       CHECK:   scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECK:     scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //       CHECK:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//       CHECK:         %[[vA_f64:.*]] = subview {{.*}} : memref<?x?xf64>
-//       CHECK:         %[[vB_f64:.*]] = subview {{.*}} : memref<?x?xf64>
-//       CHECK:         %[[vC_f64:.*]] = subview {{.*}} : memref<?x?xf64>
+//       CHECK:         %[[vA_f64:.*]] = memref.subview {{.*}} : memref<?x?xf64>
+//       CHECK:         %[[vB_f64:.*]] = memref.subview {{.*}} : memref<?x?xf64>
+//       CHECK:         %[[vC_f64:.*]] = memref.subview {{.*}} : memref<?x?xf64>
 ///
-//       CHECK:         %[[tmpA_f64:.*]] = alloc() : memref<64xi8>
-//       CHECK:         %[[fullA_f64:.*]] = std.view %[[tmpA_f64]][{{.*}}][{{.*}}] : memref<64xi8> to memref<?x?xf64>
-//     DYNAMIC:         std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?x?xf64>
-//       CHECK:         %[[partialA_f64:.*]] = subview %[[fullA_f64]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf64> to memref<?x?xf64, #[[$strided2D]]>
+//       CHECK:         %[[tmpA_f64:.*]] = memref.alloc() : memref<64xi8>
+//       CHECK:         %[[fullA_f64:.*]] = memref.view %[[tmpA_f64]][{{.*}}][{{.*}}] : memref<64xi8> to memref<?x?xf64>
+//     DYNAMIC:         memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?x?xf64>
+//       CHECK:         %[[partialA_f64:.*]] = memref.subview %[[fullA_f64]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf64> to memref<?x?xf64, #[[$strided2D]]>
 ///
-//       CHECK:         %[[tmpB_f64:.*]] = alloc() : memref<96xi8>
-//       CHECK:         %[[fullB_f64:.*]] = std.view %[[tmpB_f64]][{{.*}}][{{.*}}] : memref<96xi8> to memref<?x?xf64>
-//     DYNAMIC:         std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?x?xf64>
-//       CHECK:         %[[partialB_f64:.*]] = subview %[[fullB_f64]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf64> to memref<?x?xf64, #[[$strided2D]]>
+//       CHECK:         %[[tmpB_f64:.*]] = memref.alloc() : memref<96xi8>
+//       CHECK:         %[[fullB_f64:.*]] = memref.view %[[tmpB_f64]][{{.*}}][{{.*}}] : memref<96xi8> to memref<?x?xf64>
+//     DYNAMIC:         memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?x?xf64>
+//       CHECK:         %[[partialB_f64:.*]] = memref.subview %[[fullB_f64]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf64> to memref<?x?xf64, #[[$strided2D]]>
 ///
-//       CHECK:         %[[tmpC_f64:.*]] = alloc() : memref<48xi8>
-//       CHECK:         %[[fullC_f64:.*]] = std.view %[[tmpC_f64]][{{.*}}][{{.*}}] : memref<48xi8> to memref<?x?xf64>
-//     DYNAMIC:         std.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?x?xf64>
-//       CHECK:         %[[partialC_f64:.*]] = subview %[[fullC_f64]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf64> to memref<?x?xf64, #[[$strided2D]]>
+//       CHECK:         %[[tmpC_f64:.*]] = memref.alloc() : memref<48xi8>
+//       CHECK:         %[[fullC_f64:.*]] = memref.view %[[tmpC_f64]][{{.*}}][{{.*}}] : memref<48xi8> to memref<?x?xf64>
+//     DYNAMIC:         memref.view %{{.*}}[{{.*}}][{{.*}}] : memref<?xi8> to memref<?x?xf64>
+//       CHECK:         %[[partialC_f64:.*]] = memref.subview %[[fullC_f64]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf64> to memref<?x?xf64, #[[$strided2D]]>
 
 //       CHECK:         linalg.copy(%[[vA_f64]], %[[partialA_f64]]) : memref<?x?xf64, #[[$strided2D]]>, memref<?x?xf64, #[[$strided2D]]>
 //       CHECK:         linalg.copy(%[[vB_f64]], %[[partialB_f64]]) : memref<?x?xf64, #[[$strided2D]]>, memref<?x?xf64, #[[$strided2D]]>
@@ -142,6 +142,6 @@
 //       CHECK:           memref<?x?xf64, #[[$strided2D]]>,
 //       CHECK:           memref<?x?xf64, #[[$strided2D]]>
 //
-//       CHECK:         dealloc %[[tmpA_f64]] : memref<64xi8>
-//       CHECK:         dealloc %[[tmpB_f64]] : memref<96xi8>
-//       CHECK:         dealloc %[[tmpC_f64]] : memref<48xi8>
+//       CHECK:         memref.dealloc %[[tmpA_f64]] : memref<64xi8>
+//       CHECK:         memref.dealloc %[[tmpB_f64]] : memref<96xi8>
+//       CHECK:         memref.dealloc %[[tmpC_f64]] : memref<48xi8>
diff --git a/mlir/test/Dialect/Linalg/promotion_options.mlir b/mlir/test/Dialect/Linalg/promotion_options.mlir
--- a/mlir/test/Dialect/Linalg/promotion_options.mlir
+++ b/mlir/test/Dialect/Linalg/promotion_options.mlir
@@ -16,13 +16,13 @@
 //      CHECK: scf.for
 //      CHECK:   scf.for
 //      CHECK:     scf.for
-//      CHECK:       %[[T7:.+]] = subview %[[ARG0]]
-//      CHECK:       %[[T12:.+]] = subview %[[ARG1]]
-//      CHECK:       %[[T17:.+]] = subview %[[ARG2]]
-//      CHECK:       %[[T18:.+]] = alloc(%{{.*}}, %{{.*}}) : memref<?x?xf32, 3>
-//      CHECK:       %[[T19:.+]] = subview %[[T18]]
-//      CHECK:       %[[T20:.+]] = alloc(%{{.*}}, %{{.*}}) : memref<?x?xf32, 3>
-//      CHECK:       %[[T21:.+]] = subview %[[T20]]
+//      CHECK:       %[[T7:.+]] = memref.subview %[[ARG0]]
+//      CHECK:       %[[T12:.+]] = memref.subview %[[ARG1]]
+//      CHECK:       %[[T17:.+]] = memref.subview %[[ARG2]]
+//      CHECK:       %[[T18:.+]] = memref.alloc(%{{.*}}, %{{.*}}) : memref<?x?xf32, 3>
+//      CHECK:       %[[T19:.+]] = memref.subview %[[T18]]
+//      CHECK:       %[[T20:.+]] = memref.alloc(%{{.*}}, %{{.*}}) : memref<?x?xf32, 3>
+//      CHECK:       %[[T21:.+]] = memref.subview %[[T20]]
 //      CHECK:       linalg.fill(%[[T19]], %[[C42]])
 //      CHECK:       linalg.copy(%[[T7]], %[[T19]])
 //      CHECK:       linalg.fill(%[[T21]], %[[C42]])
@@ -30,5 +30,5 @@
 //      CHECK:       linalg.matmul ins(%[[T19]], %[[T12]]{{.*}} outs(%[[T21]]
 //  CHECK-NOT:       linalg.fill
 //      CHECK:       linalg.copy(%[[T21]], %[[T17]])
-//      CHECK:       dealloc %[[T18]]
-//      CHECK:       dealloc %[[T20]]
+//      CHECK:       memref.dealloc %[[T18]]
+//      CHECK:       memref.dealloc %[[T20]]
diff --git a/mlir/test/Dialect/Linalg/reshape_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_fusion.mlir
--- a/mlir/test/Dialect/Linalg/reshape_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/reshape_fusion.mlir
@@ -512,7 +512,7 @@
      affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d4)>,
      affine_map<(d0, d1, d2, d3, d4, d5) -> (d5)>]
     : tensor<1x?x1x2x1x4xf32> into tensor<?x2x4xf32>
-  %1 = dim %arg0, %c1 : tensor<1x?x1x2x1x4xf32>
+  %1 = memref.dim %arg0, %c1 : tensor<1x?x1x2x1x4xf32>
   %2 = linalg.init_tensor [%1, 2, 4] : tensor<?x2x4xf32>
   %3 = linalg.generic
     {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -110,22 +110,22 @@
 func @views(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index) {
   %c0 = constant 0 : index
   %0 = muli %arg0, %arg0 : index
-  %1 = alloc (%0) : memref<?xi8>
+  %1 = memref.alloc (%0) : memref<?xi8>
   %2 = linalg.range %arg0:%arg1:%arg2 : !linalg.range
-  %3 = view %1[%c0][%arg0, %arg0] : memref<?xi8> to memref<?x?xf32>
-  %4 = view %1[%c0][%arg0, %arg0] : memref<?xi8> to memref<?x?xvector<4x4xf32>>
-  dealloc %1 : memref<?xi8>
+  %3 = memref.view %1[%c0][%arg0, %arg0] : memref<?xi8> to memref<?x?xf32>
+  %4 = memref.view %1[%c0][%arg0, %arg0] : memref<?xi8> to memref<?x?xvector<4x4xf32>>
+  memref.dealloc %1 : memref<?xi8>
   return
 }
 // CHECK-LABEL: func @views
 //  CHECK:  muli %{{.*}}, %{{.*}} : index
-//  CHECK-NEXT:  alloc(%{{.*}}) : memref<?xi8>
+//  CHECK-NEXT:  memref.alloc(%{{.*}}) : memref<?xi8>
 //  CHECK-NEXT:  range
-//  CHECK-NEXT:  std.view %{{.*}}[%{{.*}}][%{{.*}}] :
+//  CHECK-NEXT:  memref.view %{{.*}}[%{{.*}}][%{{.*}}] :
 //  CHECK-SAME:     memref<?xi8> to memref<?x?xf32>
-//  CHECK-NEXT:  view %{{.*}}[%{{.*}}][%{{.*}}] :
+//  CHECK-NEXT:  memref.view %{{.*}}[%{{.*}}][%{{.*}}] :
 //  CHECK-SAME:     memref<?xi8> to memref<?x?xvector<4x4xf32>>
-//  CHECK-NEXT:  dealloc %{{.*}} : memref<?xi8>
+//  CHECK-NEXT:  memref.dealloc %{{.*}} : memref<?xi8>
 
 // -----
 
@@ -172,11 +172,11 @@
 // -----
 
 func @transpose(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
-  %0 = transpose %arg0 (i, j, k) -> (k, j, i) : memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2 * s1 + s0 + d1 * s2 + d0)>>
+  %0 = memref.transpose %arg0 (i, j, k) -> (k, j, i) : memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2 * s1 + s0 + d1 * s2 + d0)>>
   return
 }
 // CHECK-LABEL: func @transpose
-//       CHECK:   transpose %{{.*}} ([[i:.*]], [[j:.*]], [[k:.*]]) -> ([[k]], [[j]], [[i]]) :
+//       CHECK:   memref.transpose %{{.*}} ([[i:.*]], [[j:.*]], [[k:.*]]) -> ([[k]], [[j]], [[i]]) :
 //  CHECK-SAME:      memref<?x?x?xf32, #[[$strided3D]]> to memref<?x?x?xf32, #[[$strided3DT]]>
 
 // -----
@@ -847,9 +847,9 @@
   %c2 = constant 2 : index
   %c4 = constant 4 : index
   %c8 = constant 8 : index
-  %X = dim %input_3d, %c0 : tensor<16x24x32xf32>
-  %Y = dim %input_3d, %c1 : tensor<16x24x32xf32>
-  %Z = dim %input_3d, %c2 : tensor<16x24x32xf32>
+  %X = memref.dim %input_3d, %c0 : tensor<16x24x32xf32>
+  %Y = memref.dim %input_3d, %c1 : tensor<16x24x32xf32>
+  %Z = memref.dim %input_3d, %c2 : tensor<16x24x32xf32>
   %result = linalg.tiled_loop (%i, %j, %k)
       = (%c0, %c0, %c0) to (%X, %Y, %Z) step (%c2, %c4, %c8)
       ins(%input_3d, %input_2d: tensor<16x24x32xf32>, tensor<16x32xf32>)
diff --git a/mlir/test/Dialect/Linalg/sparse_1d.mlir b/mlir/test/Dialect/Linalg/sparse_1d.mlir
--- a/mlir/test/Dialect/Linalg/sparse_1d.mlir
+++ b/mlir/test/Dialect/Linalg/sparse_1d.mlir
@@ -21,16 +21,16 @@
 // CHECK:           %[[VAL_3:.*]] = constant 32 : index
 // CHECK:           %[[VAL_4:.*]] = constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = constant 1 : index
-// CHECK:           %[[VAL_6:.*]] = tensor_to_memref %[[VAL_0]] : memref<32xf32>
-// CHECK:           %[[VAL_7:.*]] = tensor_to_memref %[[VAL_2]] : memref<32xf32>
-// CHECK:           %[[VAL_8:.*]] = alloc() : memref<32xf32>
+// CHECK:           %[[VAL_6:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32xf32>
+// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_8:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           linalg.copy(%[[VAL_7]], %[[VAL_8]]) : memref<32xf32>, memref<32xf32>
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
-// CHECK:             %[[VAL_10:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref<32xf32>
+// CHECK:             %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref<32xf32>
 // CHECK:             %[[VAL_11:.*]] = addf %[[VAL_10]], %[[VAL_1]] : f32
-// CHECK:             store %[[VAL_11]], %[[VAL_8]]{{\[}}%[[VAL_9]]] : memref<32xf32>
+// CHECK:             memref.store %[[VAL_11]], %[[VAL_8]]{{\[}}%[[VAL_9]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_12:.*]] = tensor_load %[[VAL_8]] : memref<32xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.tensor_load %[[VAL_8]] : memref<32xf32>
 // CHECK:           return %[[VAL_12]] : tensor<32xf32>
 // CHECK:         }
 func @add_d(%arga: tensor<32xf32>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
@@ -51,16 +51,16 @@
 // CHECK:           %[[VAL_3:.*]] = constant 32 : index
 // CHECK:           %[[VAL_4:.*]] = constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = constant 1 : index
-// CHECK:           %[[VAL_6:.*]] = tensor_to_memref %[[VAL_0]] : memref<32xf32>
-// CHECK:           %[[VAL_7:.*]] = tensor_to_memref %[[VAL_2]] : memref<32xf32>
-// CHECK:           %[[VAL_8:.*]] = alloc() : memref<32xf32>
+// CHECK:           %[[VAL_6:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32xf32>
+// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_8:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           linalg.copy(%[[VAL_7]], %[[VAL_8]]) : memref<32xf32>, memref<32xf32>
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
-// CHECK:             %[[VAL_10:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref<32xf32>
+// CHECK:             %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref<32xf32>
 // CHECK:             %[[VAL_11:.*]] = mulf %[[VAL_10]], %[[VAL_1]] : f32
-// CHECK:             store %[[VAL_11]], %[[VAL_8]]{{\[}}%[[VAL_9]]] : memref<32xf32>
+// CHECK:             memref.store %[[VAL_11]], %[[VAL_8]]{{\[}}%[[VAL_9]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_12:.*]] = tensor_load %[[VAL_8]] : memref<32xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.tensor_load %[[VAL_8]] : memref<32xf32>
 // CHECK:           return %[[VAL_12]] : tensor<32xf32>
 // CHECK:         }
 func @mul_d(%arga: tensor<32xf32>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
@@ -98,25 +98,25 @@
 // CHECK:           %[[VAL_7:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_4]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_4]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32xf32> to memref<?xf32>
-// CHECK:           %[[VAL_10:.*]] = tensor_to_memref %[[VAL_2]] : memref<32xf32>
-// CHECK:           %[[VAL_11:.*]] = alloc() : memref<32xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           linalg.copy(%[[VAL_10]], %[[VAL_11]]) : memref<32xf32>, memref<32xf32>
-// CHECK:           %[[VAL_12:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_13:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_17:.*]] = cmpi ult, %[[VAL_15]], %[[VAL_13]] : index
 // CHECK:             scf.condition(%[[VAL_17]]) %[[VAL_15]], %[[VAL_16]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_18:.*]]: index, %[[VAL_19:.*]]: index):
-// CHECK:             %[[VAL_20:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref<?xindex>
+// CHECK:             %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref<?xindex>
 // CHECK:             %[[VAL_21:.*]] = cmpi eq, %[[VAL_20]], %[[VAL_19]] : index
 // CHECK:             scf.if %[[VAL_21]] {
-// CHECK:               %[[VAL_22:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_18]]] : memref<?xf32>
+// CHECK:               %[[VAL_22:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_18]]] : memref<?xf32>
 // CHECK:               %[[VAL_23:.*]] = addf %[[VAL_22]], %[[VAL_1]] : f32
-// CHECK:               store %[[VAL_23]], %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<32xf32>
+// CHECK:               memref.store %[[VAL_23]], %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<32xf32>
 // CHECK:             } else {
 // CHECK:               scf.if %[[VAL_5]] {
-// CHECK:                 store %[[VAL_1]], %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<32xf32>
+// CHECK:                 memref.store %[[VAL_1]], %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<32xf32>
 // CHECK:               } else {
 // CHECK:               }
 // CHECK:             }
@@ -127,9 +127,9 @@
 // CHECK:             scf.yield %[[VAL_26]], %[[VAL_27]] : index, index
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_28:.*]] = %[[VAL_29:.*]]#1 to %[[VAL_3]] step %[[VAL_6]] {
-// CHECK:             store %[[VAL_1]], %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<32xf32>
+// CHECK:             memref.store %[[VAL_1]], %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_30:.*]] = tensor_load %[[VAL_11]] : memref<32xf32>
+// CHECK:           %[[VAL_30:.*]] = memref.tensor_load %[[VAL_11]] : memref<32xf32>
 // CHECK:           return %[[VAL_30]] : tensor<32xf32>
 // CHECK:         }
 func @add_s(%arga: tensor<32xf32>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
@@ -151,23 +151,23 @@
 // CHECK:           %[[VAL_4:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_2]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_5:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_2]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32xf32> to memref<?xf32>
-// CHECK:           %[[VAL_7:.*]] = tensor_to_memref %[[VAL_1]] : memref<32xf32>
-// CHECK:           %[[VAL_8:.*]] = alloc() : memref<32xf32>
+// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf32>
+// CHECK:           %[[VAL_8:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           linalg.copy(%[[VAL_7]], %[[VAL_8]]) : memref<32xf32>, memref<32xf32>
-// CHECK:           %[[VAL_9:.*]] = load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
-// CHECK:           %[[VAL_10:.*]] = load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_3]] {
-// CHECK:             %[[VAL_12:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_11]]] : memref<?xindex>
-// CHECK:             %[[VAL_13:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf32>
-// CHECK:             %[[VAL_14:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf32>
+// CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_11]]] : memref<?xindex>
+// CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf32>
+// CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf32>
 // CHECK:             %[[VAL_15:.*]] = addf %[[VAL_13]], %[[VAL_14]] : f32
-// CHECK:             %[[VAL_16:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf32>
-// CHECK:             %[[VAL_17:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf32>
+// CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf32>
+// CHECK:             %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf32>
 // CHECK:             %[[VAL_18:.*]] = addf %[[VAL_16]], %[[VAL_17]] : f32
 // CHECK:             %[[VAL_19:.*]] = addf %[[VAL_15]], %[[VAL_18]] : f32
-// CHECK:             store %[[VAL_19]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<32xf32>
+// CHECK:             memref.store %[[VAL_19]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_20:.*]] = tensor_load %[[VAL_8]] : memref<32xf32>
+// CHECK:           %[[VAL_20:.*]] = memref.tensor_load %[[VAL_8]] : memref<32xf32>
 // CHECK:           return %[[VAL_20]] : tensor<32xf32>
 // CHECK:         }
 func @repeated_add_s(%arga: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@@ -192,18 +192,18 @@
 // CHECK:           %[[VAL_5:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_3]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32xf32> to memref<?xf32>
-// CHECK:           %[[VAL_8:.*]] = tensor_to_memref %[[VAL_2]] : memref<32xf32>
-// CHECK:           %[[VAL_9:.*]] = alloc() : memref<32xf32>
+// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_9:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           linalg.copy(%[[VAL_8]], %[[VAL_9]]) : memref<32xf32>, memref<32xf32>
-// CHECK:           %[[VAL_10:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_11:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] {
-// CHECK:             %[[VAL_13:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
-// CHECK:             %[[VAL_14:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_12]]] : memref<?xf32>
+// CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
+// CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_12]]] : memref<?xf32>
 // CHECK:             %[[VAL_15:.*]] = mulf %[[VAL_14]], %[[VAL_1]] : f32
-// CHECK:             store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_13]]] : memref<32xf32>
+// CHECK:             memref.store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_13]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_16:.*]] = tensor_load %[[VAL_9]] : memref<32xf32>
+// CHECK:           %[[VAL_16:.*]] = memref.tensor_load %[[VAL_9]] : memref<32xf32>
 // CHECK:           return %[[VAL_16]] : tensor<32xf32>
 // CHECK:         }
 func @mul_s(%arga: tensor<32xf32>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
@@ -239,18 +239,18 @@
 // CHECK:           %[[VAL_3:.*]] = constant 32 : index
 // CHECK:           %[[VAL_4:.*]] = constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = constant 1 : index
-// CHECK:           %[[VAL_6:.*]] = tensor_to_memref %[[VAL_0]] : memref<32xf32>
-// CHECK:           %[[VAL_7:.*]] = tensor_to_memref %[[VAL_1]] : memref<32xf32>
-// CHECK:           %[[VAL_8:.*]] = tensor_to_memref %[[VAL_2]] : memref<32xf32>
-// CHECK:           %[[VAL_9:.*]] = alloc() : memref<32xf32>
+// CHECK:           %[[VAL_6:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32xf32>
+// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf32>
+// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_9:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           linalg.copy(%[[VAL_8]], %[[VAL_9]]) : memref<32xf32>, memref<32xf32>
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
-// CHECK:             %[[VAL_11:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<32xf32>
-// CHECK:             %[[VAL_12:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<32xf32>
+// CHECK:             %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<32xf32>
+// CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<32xf32>
 // CHECK:             %[[VAL_13:.*]] = addf %[[VAL_11]], %[[VAL_12]] : f32
-// CHECK:             store %[[VAL_13]], %[[VAL_9]]{{\[}}%[[VAL_10]]] : memref<32xf32>
+// CHECK:             memref.store %[[VAL_13]], %[[VAL_9]]{{\[}}%[[VAL_10]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_14:.*]] = tensor_load %[[VAL_9]] : memref<32xf32>
+// CHECK:           %[[VAL_14:.*]] = memref.tensor_load %[[VAL_9]] : memref<32xf32>
 // CHECK:           return %[[VAL_14]] : tensor<32xf32>
 // CHECK:         }
 func @add_dd(%arga: tensor<32xf32>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@@ -271,18 +271,18 @@
 // CHECK:           %[[VAL_3:.*]] = constant 32 : index
 // CHECK:           %[[VAL_4:.*]] = constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = constant 1 : index
-// CHECK:           %[[VAL_6:.*]] = tensor_to_memref %[[VAL_0]] : memref<32xf32>
-// CHECK:           %[[VAL_7:.*]] = tensor_to_memref %[[VAL_1]] : memref<32xf32>
-// CHECK:           %[[VAL_8:.*]] = tensor_to_memref %[[VAL_2]] : memref<32xf32>
-// CHECK:           %[[VAL_9:.*]] = alloc() : memref<32xf32>
+// CHECK:           %[[VAL_6:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32xf32>
+// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf32>
+// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_9:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           linalg.copy(%[[VAL_8]], %[[VAL_9]]) : memref<32xf32>, memref<32xf32>
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
-// CHECK:             %[[VAL_11:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<32xf32>
-// CHECK:             %[[VAL_12:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<32xf32>
+// CHECK:             %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<32xf32>
+// CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<32xf32>
 // CHECK:             %[[VAL_13:.*]] = mulf %[[VAL_11]], %[[VAL_12]] : f32
-// CHECK:             store %[[VAL_13]], %[[VAL_9]]{{\[}}%[[VAL_10]]] : memref<32xf32>
+// CHECK:             memref.store %[[VAL_13]], %[[VAL_9]]{{\[}}%[[VAL_10]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_14:.*]] = tensor_load %[[VAL_9]] : memref<32xf32>
+// CHECK:           %[[VAL_14:.*]] = memref.tensor_load %[[VAL_9]] : memref<32xf32>
 // CHECK:           return %[[VAL_14]] : tensor<32xf32>
 // CHECK:         }
 func @mul_dd(%arga: tensor<32xf32>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@@ -319,31 +319,31 @@
 // CHECK:           %[[VAL_4:.*]] = constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = constant true
 // CHECK:           %[[VAL_6:.*]] = constant 1 : index
-// CHECK:           %[[VAL_7:.*]] = tensor_to_memref %[[VAL_0]] : memref<32xf32>
+// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32xf32>
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_pointers %[[VAL_1]], %[[VAL_4]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_indices %[[VAL_1]], %[[VAL_4]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_values %[[VAL_1]] : tensor<32xf32> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_2]] : memref<32xf32>
-// CHECK:           %[[VAL_12:.*]] = alloc() : memref<32xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           linalg.copy(%[[VAL_11]], %[[VAL_12]]) : memref<32xf32>, memref<32xf32>
-// CHECK:           %[[VAL_13:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_14:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_18:.*]] = cmpi ult, %[[VAL_16]], %[[VAL_14]] : index
 // CHECK:             scf.condition(%[[VAL_18]]) %[[VAL_16]], %[[VAL_17]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_19:.*]]: index, %[[VAL_20:.*]]: index):
-// CHECK:             %[[VAL_21:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xindex>
+// CHECK:             %[[VAL_21:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xindex>
 // CHECK:             %[[VAL_22:.*]] = cmpi eq, %[[VAL_21]], %[[VAL_20]] : index
 // CHECK:             scf.if %[[VAL_22]] {
-// CHECK:               %[[VAL_23:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_20]]] : memref<32xf32>
-// CHECK:               %[[VAL_24:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_19]]] : memref<?xf32>
+// CHECK:               %[[VAL_23:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_20]]] : memref<32xf32>
+// CHECK:               %[[VAL_24:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_19]]] : memref<?xf32>
 // CHECK:               %[[VAL_25:.*]] = addf %[[VAL_23]], %[[VAL_24]] : f32
-// CHECK:               store %[[VAL_25]], %[[VAL_12]]{{\[}}%[[VAL_20]]] : memref<32xf32>
+// CHECK:               memref.store %[[VAL_25]], %[[VAL_12]]{{\[}}%[[VAL_20]]] : memref<32xf32>
 // CHECK:             } else {
 // CHECK:               scf.if %[[VAL_5]] {
-// CHECK:                 %[[VAL_26:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_20]]] : memref<32xf32>
-// CHECK:                 store %[[VAL_26]], %[[VAL_12]]{{\[}}%[[VAL_20]]] : memref<32xf32>
+// CHECK:                 %[[VAL_26:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_20]]] : memref<32xf32>
+// CHECK:                 memref.store %[[VAL_26]], %[[VAL_12]]{{\[}}%[[VAL_20]]] : memref<32xf32>
 // CHECK:               } else {
 // CHECK:               }
 // CHECK:             }
@@ -354,10 +354,10 @@
 // CHECK:             scf.yield %[[VAL_29]], %[[VAL_30]] : index, index
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_31:.*]] = %[[VAL_32:.*]]#1 to %[[VAL_3]] step %[[VAL_6]] {
-// CHECK:             %[[VAL_33:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_31]]] : memref<32xf32>
-// CHECK:             store %[[VAL_33]], %[[VAL_12]]{{\[}}%[[VAL_31]]] : memref<32xf32>
+// CHECK:             %[[VAL_33:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_31]]] : memref<32xf32>
+// CHECK:             memref.store %[[VAL_33]], %[[VAL_12]]{{\[}}%[[VAL_31]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_34:.*]] = tensor_load %[[VAL_12]] : memref<32xf32>
+// CHECK:           %[[VAL_34:.*]] = memref.tensor_load %[[VAL_12]] : memref<32xf32>
 // CHECK:           return %[[VAL_34]] : tensor<32xf32>
 // CHECK:         }
 func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@@ -377,23 +377,23 @@
 // CHECK-SAME:                 %[[VAL_2:.*2]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK:           %[[VAL_3:.*]] = constant 0 : index
 // CHECK:           %[[VAL_4:.*]] = constant 1 : index
-// CHECK:           %[[VAL_5:.*]] = tensor_to_memref %[[VAL_0]] : memref<32xf32>
+// CHECK:           %[[VAL_5:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32xf32>
 // CHECK:           %[[VAL_6:.*]] = linalg.sparse_pointers %[[VAL_1]], %[[VAL_3]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = linalg.sparse_indices %[[VAL_1]], %[[VAL_3]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_values %[[VAL_1]] : tensor<32xf32> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = tensor_to_memref %[[VAL_2]] : memref<32xf32>
-// CHECK:           %[[VAL_10:.*]] = alloc() : memref<32xf32>
+// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           linalg.copy(%[[VAL_9]], %[[VAL_10]]) : memref<32xf32>, memref<32xf32>
-// CHECK:           %[[VAL_11:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] {
-// CHECK:             %[[VAL_14:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xindex>
-// CHECK:             %[[VAL_15:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_14]]] : memref<32xf32>
-// CHECK:             %[[VAL_16:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_13]]] : memref<?xf32>
+// CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xindex>
+// CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_14]]] : memref<32xf32>
+// CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_13]]] : memref<?xf32>
 // CHECK:             %[[VAL_17:.*]] = mulf %[[VAL_15]], %[[VAL_16]] : f32
-// CHECK:             store %[[VAL_17]], %[[VAL_10]]{{\[}}%[[VAL_14]]] : memref<32xf32>
+// CHECK:             memref.store %[[VAL_17]], %[[VAL_10]]{{\[}}%[[VAL_14]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_18:.*]] = tensor_load %[[VAL_10]] : memref<32xf32>
+// CHECK:           %[[VAL_18:.*]] = memref.tensor_load %[[VAL_10]] : memref<32xf32>
 // CHECK:           return %[[VAL_18]] : tensor<32xf32>
 // CHECK:         }
 func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@@ -433,28 +433,28 @@
 // CHECK:           %[[VAL_7:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_4]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_4]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32xf32> to memref<?xf32>
-// CHECK:           %[[VAL_10:.*]] = tensor_to_memref %[[VAL_1]] : memref<32xf32>
-// CHECK:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_2]] : memref<32xf32>
-// CHECK:           %[[VAL_12:.*]] = alloc() : memref<32xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           linalg.copy(%[[VAL_11]], %[[VAL_12]]) : memref<32xf32>, memref<32xf32>
-// CHECK:           %[[VAL_13:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_14:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_18:.*]] = cmpi ult, %[[VAL_16]], %[[VAL_14]] : index
 // CHECK:             scf.condition(%[[VAL_18]]) %[[VAL_16]], %[[VAL_17]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_19:.*]]: index, %[[VAL_20:.*]]: index):
-// CHECK:             %[[VAL_21:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_19]]] : memref<?xindex>
+// CHECK:             %[[VAL_21:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_19]]] : memref<?xindex>
 // CHECK:             %[[VAL_22:.*]] = cmpi eq, %[[VAL_21]], %[[VAL_20]] : index
 // CHECK:             scf.if %[[VAL_22]] {
-// CHECK:               %[[VAL_23:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xf32>
-// CHECK:               %[[VAL_24:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_20]]] : memref<32xf32>
+// CHECK:               %[[VAL_23:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xf32>
+// CHECK:               %[[VAL_24:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_20]]] : memref<32xf32>
 // CHECK:               %[[VAL_25:.*]] = addf %[[VAL_23]], %[[VAL_24]] : f32
-// CHECK:               store %[[VAL_25]], %[[VAL_12]]{{\[}}%[[VAL_20]]] : memref<32xf32>
+// CHECK:               memref.store %[[VAL_25]], %[[VAL_12]]{{\[}}%[[VAL_20]]] : memref<32xf32>
 // CHECK:             } else {
 // CHECK:               scf.if %[[VAL_5]] {
-// CHECK:                 %[[VAL_26:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_20]]] : memref<32xf32>
-// CHECK:                 store %[[VAL_26]], %[[VAL_12]]{{\[}}%[[VAL_20]]] : memref<32xf32>
+// CHECK:                 %[[VAL_26:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_20]]] : memref<32xf32>
+// CHECK:                 memref.store %[[VAL_26]], %[[VAL_12]]{{\[}}%[[VAL_20]]] : memref<32xf32>
 // CHECK:               } else {
 // CHECK:               }
 // CHECK:             }
@@ -465,10 +465,10 @@
 // CHECK:             scf.yield %[[VAL_29]], %[[VAL_30]] : index, index
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_31:.*]] = %[[VAL_32:.*]]#1 to %[[VAL_3]] step %[[VAL_6]] {
-// CHECK:             %[[VAL_33:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_31]]] : memref<32xf32>
-// CHECK:             store %[[VAL_33]], %[[VAL_12]]{{\[}}%[[VAL_31]]] : memref<32xf32>
+// CHECK:             %[[VAL_33:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_31]]] : memref<32xf32>
+// CHECK:             memref.store %[[VAL_33]], %[[VAL_12]]{{\[}}%[[VAL_31]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_34:.*]] = tensor_load %[[VAL_12]] : memref<32xf32>
+// CHECK:           %[[VAL_34:.*]] = memref.tensor_load %[[VAL_12]] : memref<32xf32>
 // CHECK:           return %[[VAL_34]] : tensor<32xf32>
 // CHECK:         }
 func @add_sd(%arga: tensor<32xf32>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@@ -491,20 +491,20 @@
 // CHECK:           %[[VAL_5:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_3]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32xf32> to memref<?xf32>
-// CHECK:           %[[VAL_8:.*]] = tensor_to_memref %[[VAL_1]] : memref<32xf32>
-// CHECK:           %[[VAL_9:.*]] = tensor_to_memref %[[VAL_2]] : memref<32xf32>
-// CHECK:           %[[VAL_10:.*]] = alloc() : memref<32xf32>
+// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf32>
+// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           linalg.copy(%[[VAL_9]], %[[VAL_10]]) : memref<32xf32>, memref<32xf32>
-// CHECK:           %[[VAL_11:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] {
-// CHECK:             %[[VAL_14:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref<?xindex>
-// CHECK:             %[[VAL_15:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xf32>
-// CHECK:             %[[VAL_16:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref<32xf32>
+// CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref<?xindex>
+// CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xf32>
+// CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref<32xf32>
 // CHECK:             %[[VAL_17:.*]] = mulf %[[VAL_15]], %[[VAL_16]] : f32
-// CHECK:             store %[[VAL_17]], %[[VAL_10]]{{\[}}%[[VAL_14]]] : memref<32xf32>
+// CHECK:             memref.store %[[VAL_17]], %[[VAL_10]]{{\[}}%[[VAL_14]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_18:.*]] = tensor_load %[[VAL_10]] : memref<32xf32>
+// CHECK:           %[[VAL_18:.*]] = memref.tensor_load %[[VAL_10]] : memref<32xf32>
 // CHECK:           return %[[VAL_18]] : tensor<32xf32>
 // CHECK:         }
 func @mul_sd(%arga: tensor<32xf32>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@@ -545,13 +545,13 @@
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_pointers %[[VAL_1]], %[[VAL_3]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_indices %[[VAL_1]], %[[VAL_3]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_values %[[VAL_1]] : tensor<32xf32> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_2]] : memref<32xf32>
-// CHECK:           %[[VAL_12:.*]] = alloc() : memref<32xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           linalg.copy(%[[VAL_11]], %[[VAL_12]]) : memref<32xf32>, memref<32xf32>
-// CHECK:           %[[VAL_13:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_14:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_15:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_16:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_13]], %[[VAL_19:.*]] = %[[VAL_15]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_20:.*]] = cmpi ult, %[[VAL_18]], %[[VAL_14]] : index
 // CHECK:             %[[VAL_21:.*]] = cmpi ult, %[[VAL_19]], %[[VAL_16]] : index
@@ -559,28 +559,28 @@
 // CHECK:             scf.condition(%[[VAL_22]]) %[[VAL_18]], %[[VAL_19]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_23:.*]]: index, %[[VAL_24:.*]]: index):
-// CHECK:             %[[VAL_25:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_23]]] : memref<?xindex>
-// CHECK:             %[[VAL_26:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_24]]] : memref<?xindex>
+// CHECK:             %[[VAL_25:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK:             %[[VAL_26:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_24]]] : memref<?xindex>
 // CHECK:             %[[VAL_27:.*]] = cmpi ult, %[[VAL_26]], %[[VAL_25]] : index
 // CHECK:             %[[VAL_28:.*]] = select %[[VAL_27]], %[[VAL_26]], %[[VAL_25]] : index
 // CHECK:             %[[VAL_29:.*]] = cmpi eq, %[[VAL_25]], %[[VAL_28]] : index
 // CHECK:             %[[VAL_30:.*]] = cmpi eq, %[[VAL_26]], %[[VAL_28]] : index
 // CHECK:             %[[VAL_31:.*]] = and %[[VAL_29]], %[[VAL_30]] : i1
 // CHECK:             scf.if %[[VAL_31]] {
-// CHECK:               %[[VAL_32:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_23]]] : memref<?xf32>
-// CHECK:               %[[VAL_33:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xf32>
+// CHECK:               %[[VAL_32:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_23]]] : memref<?xf32>
+// CHECK:               %[[VAL_33:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xf32>
 // CHECK:               %[[VAL_34:.*]] = addf %[[VAL_32]], %[[VAL_33]] : f32
-// CHECK:               store %[[VAL_34]], %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<32xf32>
+// CHECK:               memref.store %[[VAL_34]], %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<32xf32>
 // CHECK:             } else {
 // CHECK:               %[[VAL_35:.*]] = cmpi eq, %[[VAL_25]], %[[VAL_28]] : index
 // CHECK:               scf.if %[[VAL_35]] {
-// CHECK:                 %[[VAL_36:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_23]]] : memref<?xf32>
-// CHECK:                 store %[[VAL_36]], %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<32xf32>
+// CHECK:                 %[[VAL_36:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_23]]] : memref<?xf32>
+// CHECK:                 memref.store %[[VAL_36]], %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<32xf32>
 // CHECK:               } else {
 // CHECK:                 %[[VAL_37:.*]] = cmpi eq, %[[VAL_26]], %[[VAL_28]] : index
 // CHECK:                 scf.if %[[VAL_37]] {
-// CHECK:                   %[[VAL_38:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xf32>
-// CHECK:                   store %[[VAL_38]], %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<32xf32>
+// CHECK:                   %[[VAL_38:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xf32>
+// CHECK:                   memref.store %[[VAL_38]], %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<32xf32>
 // CHECK:                 } else {
 // CHECK:                 }
 // CHECK:               }
@@ -594,16 +594,16 @@
 // CHECK:             scf.yield %[[VAL_41]], %[[VAL_44]] : index, index
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_45:.*]] = %[[VAL_46:.*]]#0 to %[[VAL_14]] step %[[VAL_4]] {
-// CHECK:             %[[VAL_47:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_45]]] : memref<?xindex>
-// CHECK:             %[[VAL_48:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_45]]] : memref<?xf32>
-// CHECK:             store %[[VAL_48]], %[[VAL_12]]{{\[}}%[[VAL_47]]] : memref<32xf32>
+// CHECK:             %[[VAL_47:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_45]]] : memref<?xindex>
+// CHECK:             %[[VAL_48:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_45]]] : memref<?xf32>
+// CHECK:             memref.store %[[VAL_48]], %[[VAL_12]]{{\[}}%[[VAL_47]]] : memref<32xf32>
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_49:.*]] = %[[VAL_50:.*]]#1 to %[[VAL_16]] step %[[VAL_4]] {
-// CHECK:             %[[VAL_51:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_49]]] : memref<?xindex>
-// CHECK:             %[[VAL_52:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_49]]] : memref<?xf32>
-// CHECK:             store %[[VAL_52]], %[[VAL_12]]{{\[}}%[[VAL_51]]] : memref<32xf32>
+// CHECK:             %[[VAL_51:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_49]]] : memref<?xindex>
+// CHECK:             %[[VAL_52:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_49]]] : memref<?xf32>
+// CHECK:             memref.store %[[VAL_52]], %[[VAL_12]]{{\[}}%[[VAL_51]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_53:.*]] = tensor_load %[[VAL_12]] : memref<32xf32>
+// CHECK:           %[[VAL_53:.*]] = memref.tensor_load %[[VAL_12]] : memref<32xf32>
 // CHECK:           return %[[VAL_53]] : tensor<32xf32>
 // CHECK:         }
 func @add_ss(%arga: tensor<32xf32>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@@ -629,13 +629,13 @@
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_pointers %[[VAL_1]], %[[VAL_3]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_indices %[[VAL_1]], %[[VAL_3]] : tensor<32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_values %[[VAL_1]] : tensor<32xf32> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_2]] : memref<32xf32>
-// CHECK:           %[[VAL_12:.*]] = alloc() : memref<32xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           linalg.copy(%[[VAL_11]], %[[VAL_12]]) : memref<32xf32>, memref<32xf32>
-// CHECK:           %[[VAL_13:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_14:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_15:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_16:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_13]], %[[VAL_19:.*]] = %[[VAL_15]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_20:.*]] = cmpi ult, %[[VAL_18]], %[[VAL_14]] : index
 // CHECK:             %[[VAL_21:.*]] = cmpi ult, %[[VAL_19]], %[[VAL_16]] : index
@@ -643,18 +643,18 @@
 // CHECK:             scf.condition(%[[VAL_22]]) %[[VAL_18]], %[[VAL_19]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_23:.*]]: index, %[[VAL_24:.*]]: index):
-// CHECK:             %[[VAL_25:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_23]]] : memref<?xindex>
-// CHECK:             %[[VAL_26:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_24]]] : memref<?xindex>
+// CHECK:             %[[VAL_25:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK:             %[[VAL_26:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_24]]] : memref<?xindex>
 // CHECK:             %[[VAL_27:.*]] = cmpi ult, %[[VAL_26]], %[[VAL_25]] : index
 // CHECK:             %[[VAL_28:.*]] = select %[[VAL_27]], %[[VAL_26]], %[[VAL_25]] : index
 // CHECK:             %[[VAL_29:.*]] = cmpi eq, %[[VAL_25]], %[[VAL_28]] : index
 // CHECK:             %[[VAL_30:.*]] = cmpi eq, %[[VAL_26]], %[[VAL_28]] : index
 // CHECK:             %[[VAL_31:.*]] = and %[[VAL_29]], %[[VAL_30]] : i1
 // CHECK:             scf.if %[[VAL_31]] {
-// CHECK:               %[[VAL_32:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_23]]] : memref<?xf32>
-// CHECK:               %[[VAL_33:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xf32>
+// CHECK:               %[[VAL_32:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_23]]] : memref<?xf32>
+// CHECK:               %[[VAL_33:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xf32>
 // CHECK:               %[[VAL_34:.*]] = mulf %[[VAL_32]], %[[VAL_33]] : f32
-// CHECK:               store %[[VAL_34]], %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<32xf32>
+// CHECK:               memref.store %[[VAL_34]], %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<32xf32>
 // CHECK:             } else {
 // CHECK:             }
 // CHECK:             %[[VAL_35:.*]] = cmpi eq, %[[VAL_25]], %[[VAL_28]] : index
@@ -665,7 +665,7 @@
 // CHECK:             %[[VAL_40:.*]] = select %[[VAL_38]], %[[VAL_39]], %[[VAL_24]] : index
 // CHECK:             scf.yield %[[VAL_37]], %[[VAL_40]] : index, index
 // CHECK:           }
-// CHECK:           %[[VAL_41:.*]] = tensor_load %[[VAL_12]] : memref<32xf32>
+// CHECK:           %[[VAL_41:.*]] = memref.tensor_load %[[VAL_12]] : memref<32xf32>
 // CHECK:           return %[[VAL_41]] : tensor<32xf32>
 // CHECK:         }
 func @mul_ss(%arga: tensor<32xf32>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@@ -707,13 +707,13 @@
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_pointers %[[VAL_1]], %[[VAL_4]] : tensor<16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_indices %[[VAL_1]], %[[VAL_4]] : tensor<16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = linalg.sparse_values %[[VAL_1]] : tensor<16xf32> to memref<?xf32>
-// CHECK:           %[[VAL_12:.*]] = tensor_to_memref %[[VAL_3]] : memref<16xf32>
-// CHECK:           %[[VAL_13:.*]] = alloc() : memref<16xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_3]] : memref<16xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<16xf32>
 // CHECK:           linalg.copy(%[[VAL_12]], %[[VAL_13]]) : memref<16xf32>, memref<16xf32>
-// CHECK:           %[[VAL_14:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_15:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
-// CHECK:           %[[VAL_16:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_17:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_14]], %[[VAL_20:.*]] = %[[VAL_16]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_21:.*]] = cmpi ult, %[[VAL_19]], %[[VAL_15]] : index
 // CHECK:             %[[VAL_22:.*]] = cmpi ult, %[[VAL_20]], %[[VAL_17]] : index
@@ -721,32 +721,32 @@
 // CHECK:             scf.condition(%[[VAL_23]]) %[[VAL_19]], %[[VAL_20]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_24:.*]]: index, %[[VAL_25:.*]]: index):
-// CHECK:             %[[VAL_26:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_24]]] : memref<?xindex>
-// CHECK:             %[[VAL_27:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_25]]] : memref<?xindex>
+// CHECK:             %[[VAL_26:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_24]]] : memref<?xindex>
+// CHECK:             %[[VAL_27:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_25]]] : memref<?xindex>
 // CHECK:             %[[VAL_28:.*]] = cmpi ult, %[[VAL_27]], %[[VAL_26]] : index
 // CHECK:             %[[VAL_29:.*]] = select %[[VAL_28]], %[[VAL_27]], %[[VAL_26]] : index
 // CHECK:             %[[VAL_30:.*]] = cmpi eq, %[[VAL_26]], %[[VAL_29]] : index
 // CHECK:             %[[VAL_31:.*]] = cmpi eq, %[[VAL_27]], %[[VAL_29]] : index
 // CHECK:             %[[VAL_32:.*]] = and %[[VAL_30]], %[[VAL_31]] : i1
 // CHECK:             scf.if %[[VAL_32]] {
-// CHECK:               %[[VAL_33:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref<?xf32>
+// CHECK:               %[[VAL_33:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref<?xf32>
 // CHECK:               %[[VAL_34:.*]] = mulf %[[VAL_33]], %[[VAL_2]] : f32
-// CHECK:               %[[VAL_35:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_25]]] : memref<?xf32>
+// CHECK:               %[[VAL_35:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_25]]] : memref<?xf32>
 // CHECK:               %[[VAL_36:.*]] = mulf %[[VAL_35]], %[[VAL_2]] : f32
 // CHECK:               %[[VAL_37:.*]] = addf %[[VAL_34]], %[[VAL_36]] : f32
-// CHECK:               store %[[VAL_37]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
+// CHECK:               memref.store %[[VAL_37]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
 // CHECK:             } else {
 // CHECK:               %[[VAL_38:.*]] = cmpi eq, %[[VAL_26]], %[[VAL_29]] : index
 // CHECK:               scf.if %[[VAL_38]] {
-// CHECK:                 %[[VAL_39:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref<?xf32>
+// CHECK:                 %[[VAL_39:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref<?xf32>
 // CHECK:                 %[[VAL_40:.*]] = mulf %[[VAL_39]], %[[VAL_2]] : f32
-// CHECK:                 store %[[VAL_40]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
+// CHECK:                 memref.store %[[VAL_40]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
 // CHECK:               } else {
 // CHECK:                 %[[VAL_41:.*]] = cmpi eq, %[[VAL_27]], %[[VAL_29]] : index
 // CHECK:                 scf.if %[[VAL_41]] {
-// CHECK:                   %[[VAL_42:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_25]]] : memref<?xf32>
+// CHECK:                   %[[VAL_42:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_25]]] : memref<?xf32>
 // CHECK:                   %[[VAL_43:.*]] = mulf %[[VAL_42]], %[[VAL_2]] : f32
-// CHECK:                   store %[[VAL_43]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
+// CHECK:                   memref.store %[[VAL_43]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
 // CHECK:                 } else {
 // CHECK:                 }
 // CHECK:               }
@@ -760,18 +760,18 @@
 // CHECK:             scf.yield %[[VAL_46]], %[[VAL_49]] : index, index
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_50:.*]] = %[[VAL_51:.*]]#0 to %[[VAL_15]] step %[[VAL_5]] {
-// CHECK:             %[[VAL_52:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_50]]] : memref<?xindex>
-// CHECK:             %[[VAL_53:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_50]]] : memref<?xf32>
+// CHECK:             %[[VAL_52:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_50]]] : memref<?xindex>
+// CHECK:             %[[VAL_53:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_50]]] : memref<?xf32>
 // CHECK:             %[[VAL_54:.*]] = mulf %[[VAL_53]], %[[VAL_2]] : f32
-// CHECK:             store %[[VAL_54]], %[[VAL_13]]{{\[}}%[[VAL_52]]] : memref<16xf32>
+// CHECK:             memref.store %[[VAL_54]], %[[VAL_13]]{{\[}}%[[VAL_52]]] : memref<16xf32>
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_55:.*]] = %[[VAL_56:.*]]#1 to %[[VAL_17]] step %[[VAL_5]] {
-// CHECK:             %[[VAL_57:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_55]]] : memref<?xindex>
-// CHECK:             %[[VAL_58:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_55]]] : memref<?xf32>
+// CHECK:             %[[VAL_57:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_55]]] : memref<?xindex>
+// CHECK:             %[[VAL_58:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_55]]] : memref<?xf32>
 // CHECK:             %[[VAL_59:.*]] = mulf %[[VAL_58]], %[[VAL_2]] : f32
-// CHECK:             store %[[VAL_59]], %[[VAL_13]]{{\[}}%[[VAL_57]]] : memref<16xf32>
+// CHECK:             memref.store %[[VAL_59]], %[[VAL_13]]{{\[}}%[[VAL_57]]] : memref<16xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_60:.*]] = tensor_load %[[VAL_13]] : memref<16xf32>
+// CHECK:           %[[VAL_60:.*]] = memref.tensor_load %[[VAL_13]] : memref<16xf32>
 // CHECK:           return %[[VAL_60]] : tensor<16xf32>
 // CHECK:         }
 func @two_way_inv(%arga: tensor<16xf32>, %argb: tensor<16xf32>, %argc: f32, %argx: tensor<16xf32>) -> tensor<16xf32> {
@@ -800,13 +800,13 @@
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_pointers %[[VAL_1]], %[[VAL_4]] : tensor<16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_indices %[[VAL_1]], %[[VAL_4]] : tensor<16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = linalg.sparse_values %[[VAL_1]] : tensor<16xf32> to memref<?xf32>
-// CHECK:           %[[VAL_12:.*]] = tensor_to_memref %[[VAL_3]] : memref<16xf32>
-// CHECK:           %[[VAL_13:.*]] = alloc() : memref<16xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_3]] : memref<16xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<16xf32>
 // CHECK:           linalg.copy(%[[VAL_12]], %[[VAL_13]]) : memref<16xf32>, memref<16xf32>
-// CHECK:           %[[VAL_14:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_15:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
-// CHECK:           %[[VAL_16:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_17:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_14]], %[[VAL_20:.*]] = %[[VAL_16]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_21:.*]] = cmpi ult, %[[VAL_19]], %[[VAL_15]] : index
 // CHECK:             %[[VAL_22:.*]] = cmpi ult, %[[VAL_20]], %[[VAL_17]] : index
@@ -814,31 +814,31 @@
 // CHECK:             scf.condition(%[[VAL_23]]) %[[VAL_19]], %[[VAL_20]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_24:.*]]: index, %[[VAL_25:.*]]: index):
-// CHECK:             %[[VAL_26:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_24]]] : memref<?xindex>
-// CHECK:             %[[VAL_27:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_25]]] : memref<?xindex>
+// CHECK:             %[[VAL_26:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_24]]] : memref<?xindex>
+// CHECK:             %[[VAL_27:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_25]]] : memref<?xindex>
 // CHECK:             %[[VAL_28:.*]] = cmpi ult, %[[VAL_27]], %[[VAL_26]] : index
 // CHECK:             %[[VAL_29:.*]] = select %[[VAL_28]], %[[VAL_27]], %[[VAL_26]] : index
 // CHECK:             %[[VAL_30:.*]] = cmpi eq, %[[VAL_26]], %[[VAL_29]] : index
 // CHECK:             %[[VAL_31:.*]] = cmpi eq, %[[VAL_27]], %[[VAL_29]] : index
 // CHECK:             %[[VAL_32:.*]] = and %[[VAL_30]], %[[VAL_31]] : i1
 // CHECK:             scf.if %[[VAL_32]] {
-// CHECK:               %[[VAL_33:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref<?xf32>
-// CHECK:               %[[VAL_34:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_25]]] : memref<?xf32>
+// CHECK:               %[[VAL_33:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref<?xf32>
+// CHECK:               %[[VAL_34:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_25]]] : memref<?xf32>
 // CHECK:               %[[VAL_35:.*]] = addf %[[VAL_33]], %[[VAL_34]] : f32
 // CHECK:               %[[VAL_36:.*]] = mulf %[[VAL_35]], %[[VAL_2]] : f32
-// CHECK:               store %[[VAL_36]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
+// CHECK:               memref.store %[[VAL_36]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
 // CHECK:             } else {
 // CHECK:               %[[VAL_37:.*]] = cmpi eq, %[[VAL_26]], %[[VAL_29]] : index
 // CHECK:               scf.if %[[VAL_37]] {
-// CHECK:                 %[[VAL_38:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref<?xf32>
+// CHECK:                 %[[VAL_38:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref<?xf32>
 // CHECK:                 %[[VAL_39:.*]] = mulf %[[VAL_38]], %[[VAL_2]] : f32
-// CHECK:                 store %[[VAL_39]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
+// CHECK:                 memref.store %[[VAL_39]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
 // CHECK:               } else {
 // CHECK:                 %[[VAL_40:.*]] = cmpi eq, %[[VAL_27]], %[[VAL_29]] : index
 // CHECK:                 scf.if %[[VAL_40]] {
-// CHECK:                   %[[VAL_41:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_25]]] : memref<?xf32>
+// CHECK:                   %[[VAL_41:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_25]]] : memref<?xf32>
 // CHECK:                   %[[VAL_42:.*]] = mulf %[[VAL_41]], %[[VAL_2]] : f32
-// CHECK:                   store %[[VAL_42]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
+// CHECK:                   memref.store %[[VAL_42]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
 // CHECK:                 } else {
 // CHECK:                 }
 // CHECK:               }
@@ -852,18 +852,18 @@
 // CHECK:             scf.yield %[[VAL_45]], %[[VAL_48]] : index, index
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_49:.*]] = %[[VAL_50:.*]]#0 to %[[VAL_15]] step %[[VAL_5]] {
-// CHECK:             %[[VAL_51:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_49]]] : memref<?xindex>
-// CHECK:             %[[VAL_52:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_49]]] : memref<?xf32>
+// CHECK:             %[[VAL_51:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_49]]] : memref<?xindex>
+// CHECK:             %[[VAL_52:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_49]]] : memref<?xf32>
 // CHECK:             %[[VAL_53:.*]] = mulf %[[VAL_52]], %[[VAL_2]] : f32
-// CHECK:             store %[[VAL_53]], %[[VAL_13]]{{\[}}%[[VAL_51]]] : memref<16xf32>
+// CHECK:             memref.store %[[VAL_53]], %[[VAL_13]]{{\[}}%[[VAL_51]]] : memref<16xf32>
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_54:.*]] = %[[VAL_55:.*]]#1 to %[[VAL_17]] step %[[VAL_5]] {
-// CHECK:             %[[VAL_56:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_54]]] : memref<?xindex>
-// CHECK:             %[[VAL_57:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_54]]] : memref<?xf32>
+// CHECK:             %[[VAL_56:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_54]]] : memref<?xindex>
+// CHECK:             %[[VAL_57:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_54]]] : memref<?xf32>
 // CHECK:             %[[VAL_58:.*]] = mulf %[[VAL_57]], %[[VAL_2]] : f32
-// CHECK:             store %[[VAL_58]], %[[VAL_13]]{{\[}}%[[VAL_56]]] : memref<16xf32>
+// CHECK:             memref.store %[[VAL_58]], %[[VAL_13]]{{\[}}%[[VAL_56]]] : memref<16xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_59:.*]] = tensor_load %[[VAL_13]] : memref<16xf32>
+// CHECK:           %[[VAL_59:.*]] = memref.tensor_load %[[VAL_13]] : memref<16xf32>
 // CHECK:           return %[[VAL_59]] : tensor<16xf32>
 // CHECK:         }
 func @two_way_inv_alt(%arga: tensor<16xf32>,
@@ -900,19 +900,19 @@
 // CHECK:           %[[VAL_3:.*]] = constant 1 : index
 // CHECK:           %[[VAL_4:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_2]] : tensor<?xf32> to memref<?xindex>
 // CHECK:           %[[VAL_5:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<?xf32> to memref<?xf32>
-// CHECK:           %[[VAL_6:.*]] = tensor_to_memref %[[VAL_1]] : memref<f32>
-// CHECK:           %[[VAL_7:.*]] = alloc() : memref<f32>
+// CHECK:           %[[VAL_6:.*]] = memref.buffer_cast %[[VAL_1]] : memref<f32>
+// CHECK:           %[[VAL_7:.*]] = memref.alloc() : memref<f32>
 // CHECK:           linalg.copy(%[[VAL_6]], %[[VAL_7]]) : memref<f32>, memref<f32>
-// CHECK:           %[[VAL_8:.*]] = load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_10:.*]] = load %[[VAL_7]][] : memref<f32>
+// CHECK:           %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_7]][] : memref<f32>
 // CHECK:           %[[VAL_11:.*]] = scf.for %[[VAL_12:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_3]] iter_args(%[[VAL_13:.*]] = %[[VAL_10]]) -> (f32) {
-// CHECK:             %[[VAL_14:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_12]]] : memref<?xf32>
+// CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_12]]] : memref<?xf32>
 // CHECK:             %[[VAL_15:.*]] = addf %[[VAL_13]], %[[VAL_14]] : f32
 // CHECK:             scf.yield %[[VAL_15]] : f32
 // CHECK:           }
-// CHECK:           store %[[VAL_16:.*]], %[[VAL_7]][] : memref<f32>
-// CHECK:           %[[VAL_17:.*]] = tensor_load %[[VAL_7]] : memref<f32>
+// CHECK:           memref.store %[[VAL_16:.*]], %[[VAL_7]][] : memref<f32>
+// CHECK:           %[[VAL_17:.*]] = memref.tensor_load %[[VAL_7]] : memref<f32>
 // CHECK:           return %[[VAL_17]] : tensor<f32>
 // CHECK:         }
 func @sum_reduction(%arga: tensor<?xf32>, %argx: tensor<f32>) -> tensor<f32> {
@@ -953,13 +953,13 @@
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_pointers %[[VAL_1]], %[[VAL_3]] : tensor<16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_indices %[[VAL_1]], %[[VAL_3]] : tensor<16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_values %[[VAL_1]] : tensor<16xf32> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_2]] : memref<f32>
-// CHECK:           %[[VAL_12:.*]] = alloc() : memref<f32>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<f32>
+// CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<f32>
 // CHECK:           linalg.copy(%[[VAL_11]], %[[VAL_12]]) : memref<f32>, memref<f32>
-// CHECK:           %[[VAL_13:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_14:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_15:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_16:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_13]], %[[VAL_19:.*]] = %[[VAL_15]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_20:.*]] = cmpi ult, %[[VAL_18]], %[[VAL_14]] : index
 // CHECK:             %[[VAL_21:.*]] = cmpi ult, %[[VAL_19]], %[[VAL_16]] : index
@@ -967,34 +967,34 @@
 // CHECK:             scf.condition(%[[VAL_22]]) %[[VAL_18]], %[[VAL_19]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_23:.*]]: index, %[[VAL_24:.*]]: index):
-// CHECK:             %[[VAL_25:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_23]]] : memref<?xindex>
-// CHECK:             %[[VAL_26:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_24]]] : memref<?xindex>
+// CHECK:             %[[VAL_25:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK:             %[[VAL_26:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_24]]] : memref<?xindex>
 // CHECK:             %[[VAL_27:.*]] = cmpi ult, %[[VAL_26]], %[[VAL_25]] : index
 // CHECK:             %[[VAL_28:.*]] = select %[[VAL_27]], %[[VAL_26]], %[[VAL_25]] : index
 // CHECK:             %[[VAL_29:.*]] = cmpi eq, %[[VAL_25]], %[[VAL_28]] : index
 // CHECK:             %[[VAL_30:.*]] = cmpi eq, %[[VAL_26]], %[[VAL_28]] : index
 // CHECK:             %[[VAL_31:.*]] = and %[[VAL_29]], %[[VAL_30]] : i1
 // CHECK:             scf.if %[[VAL_31]] {
-// CHECK:               %[[VAL_32:.*]] = load %[[VAL_12]][] : memref<f32>
-// CHECK:               %[[VAL_33:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_23]]] : memref<?xf32>
-// CHECK:               %[[VAL_34:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xf32>
+// CHECK:               %[[VAL_32:.*]] = memref.load %[[VAL_12]][] : memref<f32>
+// CHECK:               %[[VAL_33:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_23]]] : memref<?xf32>
+// CHECK:               %[[VAL_34:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xf32>
 // CHECK:               %[[VAL_35:.*]] = addf %[[VAL_33]], %[[VAL_34]] : f32
 // CHECK:               %[[VAL_36:.*]] = addf %[[VAL_32]], %[[VAL_35]] : f32
-// CHECK:               store %[[VAL_36]], %[[VAL_12]][] : memref<f32>
+// CHECK:               memref.store %[[VAL_36]], %[[VAL_12]][] : memref<f32>
 // CHECK:             } else {
 // CHECK:               %[[VAL_37:.*]] = cmpi eq, %[[VAL_25]], %[[VAL_28]] : index
 // CHECK:               scf.if %[[VAL_37]] {
-// CHECK:                 %[[VAL_38:.*]] = load %[[VAL_12]][] : memref<f32>
-// CHECK:                 %[[VAL_39:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_23]]] : memref<?xf32>
+// CHECK:                 %[[VAL_38:.*]] = memref.load %[[VAL_12]][] : memref<f32>
+// CHECK:                 %[[VAL_39:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_23]]] : memref<?xf32>
 // CHECK:                 %[[VAL_40:.*]] = addf %[[VAL_38]], %[[VAL_39]] : f32
-// CHECK:                 store %[[VAL_40]], %[[VAL_12]][] : memref<f32>
+// CHECK:                 memref.store %[[VAL_40]], %[[VAL_12]][] : memref<f32>
 // CHECK:               } else {
 // CHECK:                 %[[VAL_41:.*]] = cmpi eq, %[[VAL_26]], %[[VAL_28]] : index
 // CHECK:                 scf.if %[[VAL_41]] {
-// CHECK:                   %[[VAL_42:.*]] = load %[[VAL_12]][] : memref<f32>
-// CHECK:                   %[[VAL_43:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xf32>
+// CHECK:                   %[[VAL_42:.*]] = memref.load %[[VAL_12]][] : memref<f32>
+// CHECK:                   %[[VAL_43:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xf32>
 // CHECK:                   %[[VAL_44:.*]] = addf %[[VAL_42]], %[[VAL_43]] : f32
-// CHECK:                   store %[[VAL_44]], %[[VAL_12]][] : memref<f32>
+// CHECK:                   memref.store %[[VAL_44]], %[[VAL_12]][] : memref<f32>
 // CHECK:                 } else {
 // CHECK:                 }
 // CHECK:               }
@@ -1007,19 +1007,19 @@
 // CHECK:             %[[VAL_50:.*]] = select %[[VAL_48]], %[[VAL_49]], %[[VAL_24]] : index
 // CHECK:             scf.yield %[[VAL_47]], %[[VAL_50]] : index, index
 // CHECK:           }
-// CHECK:           %[[VAL_51:.*]] = load %[[VAL_12]][] : memref<f32>
+// CHECK:           %[[VAL_51:.*]] = memref.load %[[VAL_12]][] : memref<f32>
 // CHECK:           %[[VAL_52:.*]] = scf.for %[[VAL_53:.*]] = %[[VAL_54:.*]]#0 to %[[VAL_14]] step %[[VAL_4]] iter_args(%[[VAL_55:.*]] = %[[VAL_51]]) -> (f32) {
-// CHECK:             %[[VAL_56:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_53]]] : memref<?xf32>
+// CHECK:             %[[VAL_56:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_53]]] : memref<?xf32>
 // CHECK:             %[[VAL_57:.*]] = addf %[[VAL_55]], %[[VAL_56]] : f32
 // CHECK:             scf.yield %[[VAL_57]] : f32
 // CHECK:           }
 // CHECK:           %[[VAL_58:.*]] = scf.for %[[VAL_59:.*]] = %[[VAL_60:.*]]#1 to %[[VAL_16]] step %[[VAL_4]] iter_args(%[[VAL_61:.*]] = %[[VAL_62:.*]]) -> (f32) {
-// CHECK:             %[[VAL_63:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_59]]] : memref<?xf32>
+// CHECK:             %[[VAL_63:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_59]]] : memref<?xf32>
 // CHECK:             %[[VAL_64:.*]] = addf %[[VAL_61]], %[[VAL_63]] : f32
 // CHECK:             scf.yield %[[VAL_64]] : f32
 // CHECK:           }
-// CHECK:           store %[[VAL_65:.*]], %[[VAL_12]][] : memref<f32>
-// CHECK:           %[[VAL_66:.*]] = tensor_load %[[VAL_12]] : memref<f32>
+// CHECK:           memref.store %[[VAL_65:.*]], %[[VAL_12]][] : memref<f32>
+// CHECK:           %[[VAL_66:.*]] = memref.tensor_load %[[VAL_12]] : memref<f32>
 // CHECK:           return %[[VAL_66]] : tensor<f32>
 // CHECK:         }
 func @sum_reduction_ss(%arga: tensor<16xf32>,
@@ -1065,18 +1065,18 @@
 // CHECK:           %[[VAL_6:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_4]] : tensor<16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_4]] : tensor<16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<16xf32> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = tensor_to_memref %[[VAL_1]] : memref<f32>
+// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<f32>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_pointers %[[VAL_2]], %[[VAL_4]] : tensor<16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = linalg.sparse_indices %[[VAL_2]], %[[VAL_4]] : tensor<16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_12:.*]] = linalg.sparse_values %[[VAL_2]] : tensor<16xf32> to memref<?xf32>
-// CHECK:           %[[VAL_13:.*]] = tensor_to_memref %[[VAL_3]] : memref<f32>
-// CHECK:           %[[VAL_14:.*]] = alloc() : memref<f32>
+// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_3]] : memref<f32>
+// CHECK:           %[[VAL_14:.*]] = memref.alloc() : memref<f32>
 // CHECK:           linalg.copy(%[[VAL_13]], %[[VAL_14]]) : memref<f32>, memref<f32>
-// CHECK:           %[[VAL_15:.*]] = load %[[VAL_9]][] : memref<f32>
-// CHECK:           %[[VAL_16:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_17:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
-// CHECK:           %[[VAL_18:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_19:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_9]][] : memref<f32>
+// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           %[[VAL_20:.*]]:2 = scf.while (%[[VAL_21:.*]] = %[[VAL_16]], %[[VAL_22:.*]] = %[[VAL_18]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_23:.*]] = cmpi ult, %[[VAL_21]], %[[VAL_17]] : index
 // CHECK:             %[[VAL_24:.*]] = cmpi ult, %[[VAL_22]], %[[VAL_19]] : index
@@ -1084,36 +1084,36 @@
 // CHECK:             scf.condition(%[[VAL_25]]) %[[VAL_21]], %[[VAL_22]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_26:.*]]: index, %[[VAL_27:.*]]: index):
-// CHECK:             %[[VAL_28:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_26]]] : memref<?xindex>
-// CHECK:             %[[VAL_29:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:             %[[VAL_28:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_26]]] : memref<?xindex>
+// CHECK:             %[[VAL_29:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_27]]] : memref<?xindex>
 // CHECK:             %[[VAL_30:.*]] = cmpi ult, %[[VAL_29]], %[[VAL_28]] : index
 // CHECK:             %[[VAL_31:.*]] = select %[[VAL_30]], %[[VAL_29]], %[[VAL_28]] : index
 // CHECK:             %[[VAL_32:.*]] = cmpi eq, %[[VAL_28]], %[[VAL_31]] : index
 // CHECK:             %[[VAL_33:.*]] = cmpi eq, %[[VAL_29]], %[[VAL_31]] : index
 // CHECK:             %[[VAL_34:.*]] = and %[[VAL_32]], %[[VAL_33]] : i1
 // CHECK:             scf.if %[[VAL_34]] {
-// CHECK:               %[[VAL_35:.*]] = load %[[VAL_14]][] : memref<f32>
-// CHECK:               %[[VAL_36:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_26]]] : memref<?xf32>
+// CHECK:               %[[VAL_35:.*]] = memref.load %[[VAL_14]][] : memref<f32>
+// CHECK:               %[[VAL_36:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_26]]] : memref<?xf32>
 // CHECK:               %[[VAL_37:.*]] = mulf %[[VAL_36]], %[[VAL_15]] : f32
-// CHECK:               %[[VAL_38:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_27]]] : memref<?xf32>
+// CHECK:               %[[VAL_38:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_27]]] : memref<?xf32>
 // CHECK:               %[[VAL_39:.*]] = addf %[[VAL_37]], %[[VAL_38]] : f32
 // CHECK:               %[[VAL_40:.*]] = addf %[[VAL_35]], %[[VAL_39]] : f32
-// CHECK:               store %[[VAL_40]], %[[VAL_14]][] : memref<f32>
+// CHECK:               memref.store %[[VAL_40]], %[[VAL_14]][] : memref<f32>
 // CHECK:             } else {
 // CHECK:               %[[VAL_41:.*]] = cmpi eq, %[[VAL_28]], %[[VAL_31]] : index
 // CHECK:               scf.if %[[VAL_41]] {
-// CHECK:                 %[[VAL_42:.*]] = load %[[VAL_14]][] : memref<f32>
-// CHECK:                 %[[VAL_43:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_26]]] : memref<?xf32>
+// CHECK:                 %[[VAL_42:.*]] = memref.load %[[VAL_14]][] : memref<f32>
+// CHECK:                 %[[VAL_43:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_26]]] : memref<?xf32>
 // CHECK:                 %[[VAL_44:.*]] = mulf %[[VAL_43]], %[[VAL_15]] : f32
 // CHECK:                 %[[VAL_45:.*]] = addf %[[VAL_42]], %[[VAL_44]] : f32
-// CHECK:                 store %[[VAL_45]], %[[VAL_14]][] : memref<f32>
+// CHECK:                 memref.store %[[VAL_45]], %[[VAL_14]][] : memref<f32>
 // CHECK:               } else {
 // CHECK:                 %[[VAL_46:.*]] = cmpi eq, %[[VAL_29]], %[[VAL_31]] : index
 // CHECK:                 scf.if %[[VAL_46]] {
-// CHECK:                   %[[VAL_47:.*]] = load %[[VAL_14]][] : memref<f32>
-// CHECK:                   %[[VAL_48:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_27]]] : memref<?xf32>
+// CHECK:                   %[[VAL_47:.*]] = memref.load %[[VAL_14]][] : memref<f32>
+// CHECK:                   %[[VAL_48:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_27]]] : memref<?xf32>
 // CHECK:                   %[[VAL_49:.*]] = addf %[[VAL_47]], %[[VAL_48]] : f32
-// CHECK:                   store %[[VAL_49]], %[[VAL_14]][] : memref<f32>
+// CHECK:                   memref.store %[[VAL_49]], %[[VAL_14]][] : memref<f32>
 // CHECK:                 } else {
 // CHECK:                 }
 // CHECK:               }
@@ -1126,20 +1126,20 @@
 // CHECK:             %[[VAL_55:.*]] = select %[[VAL_53]], %[[VAL_54]], %[[VAL_27]] : index
 // CHECK:             scf.yield %[[VAL_52]], %[[VAL_55]] : index, index
 // CHECK:           }
-// CHECK:           %[[VAL_56:.*]] = load %[[VAL_14]][] : memref<f32>
+// CHECK:           %[[VAL_56:.*]] = memref.load %[[VAL_14]][] : memref<f32>
 // CHECK:           %[[VAL_57:.*]] = scf.for %[[VAL_58:.*]] = %[[VAL_59:.*]]#0 to %[[VAL_17]] step %[[VAL_5]] iter_args(%[[VAL_60:.*]] = %[[VAL_56]]) -> (f32) {
-// CHECK:             %[[VAL_61:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_58]]] : memref<?xf32>
+// CHECK:             %[[VAL_61:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_58]]] : memref<?xf32>
 // CHECK:             %[[VAL_62:.*]] = mulf %[[VAL_61]], %[[VAL_15]] : f32
 // CHECK:             %[[VAL_63:.*]] = addf %[[VAL_60]], %[[VAL_62]] : f32
 // CHECK:             scf.yield %[[VAL_63]] : f32
 // CHECK:           }
 // CHECK:           %[[VAL_64:.*]] = scf.for %[[VAL_65:.*]] = %[[VAL_66:.*]]#1 to %[[VAL_19]] step %[[VAL_5]] iter_args(%[[VAL_67:.*]] = %[[VAL_68:.*]]) -> (f32) {
-// CHECK:             %[[VAL_69:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_65]]] : memref<?xf32>
+// CHECK:             %[[VAL_69:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_65]]] : memref<?xf32>
 // CHECK:             %[[VAL_70:.*]] = addf %[[VAL_67]], %[[VAL_69]] : f32
 // CHECK:             scf.yield %[[VAL_70]] : f32
 // CHECK:           }
-// CHECK:           store %[[VAL_71:.*]], %[[VAL_14]][] : memref<f32>
-// CHECK:           %[[VAL_72:.*]] = tensor_load %[[VAL_14]] : memref<f32>
+// CHECK:           memref.store %[[VAL_71:.*]], %[[VAL_14]][] : memref<f32>
+// CHECK:           %[[VAL_72:.*]] = memref.tensor_load %[[VAL_14]] : memref<f32>
 // CHECK:           return %[[VAL_72]] : tensor<f32>
 // CHECK:         }
 func @sum_reduction_inv(%arga: tensor<16xf32>,
@@ -1188,22 +1188,22 @@
 // CHECK:           %[[VAL_5:.*]] = constant 0 : index
 // CHECK:           %[[VAL_6:.*]] = constant true
 // CHECK:           %[[VAL_7:.*]] = constant 1 : index
-// CHECK:           %[[VAL_8:.*]] = tensor_to_memref %[[VAL_0]] : memref<?xf64>
+// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_0]] : memref<?xf64>
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_pointers %[[VAL_1]], %[[VAL_5]] : tensor<?xf64> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_indices %[[VAL_1]], %[[VAL_5]] : tensor<?xf64> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = linalg.sparse_values %[[VAL_1]] : tensor<?xf64> to memref<?xf64>
-// CHECK:           %[[VAL_12:.*]] = tensor_to_memref %[[VAL_2]] : memref<?xf64>
+// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<?xf64>
 // CHECK:           %[[VAL_13:.*]] = linalg.sparse_pointers %[[VAL_3]], %[[VAL_5]] : tensor<?xf64> to memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = linalg.sparse_indices %[[VAL_3]], %[[VAL_5]] : tensor<?xf64> to memref<?xindex>
 // CHECK:           %[[VAL_15:.*]] = linalg.sparse_values %[[VAL_3]] : tensor<?xf64> to memref<?xf64>
-// CHECK:           %[[VAL_16:.*]] = dim %[[VAL_4]], %[[VAL_5]] : tensor<?xf64>
-// CHECK:           %[[VAL_17:.*]] = tensor_to_memref %[[VAL_4]] : memref<?xf64>
-// CHECK:           %[[VAL_18:.*]] = alloc(%[[VAL_16]]) : memref<?xf64>
+// CHECK:           %[[VAL_16:.*]] = memref.dim %[[VAL_4]], %[[VAL_5]] : tensor<?xf64>
+// CHECK:           %[[VAL_17:.*]] = memref.buffer_cast %[[VAL_4]] : memref<?xf64>
+// CHECK:           %[[VAL_18:.*]] = memref.alloc(%[[VAL_16]]) : memref<?xf64>
 // CHECK:           linalg.copy(%[[VAL_17]], %[[VAL_18]]) : memref<?xf64>, memref<?xf64>
-// CHECK:           %[[VAL_19:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
-// CHECK:           %[[VAL_20:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
-// CHECK:           %[[VAL_21:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_5]]] : memref<?xindex>
-// CHECK:           %[[VAL_22:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_7]]] : memref<?xindex>
+// CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
+// CHECK:           %[[VAL_21:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_22:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_23:.*]]:3 = scf.while (%[[VAL_24:.*]] = %[[VAL_19]], %[[VAL_25:.*]] = %[[VAL_21]], %[[VAL_26:.*]] = %[[VAL_5]]) : (index, index, index) -> (index, index, index) {
 // CHECK:             %[[VAL_27:.*]] = cmpi ult, %[[VAL_24]], %[[VAL_20]] : index
 // CHECK:             %[[VAL_28:.*]] = cmpi ult, %[[VAL_25]], %[[VAL_22]] : index
@@ -1211,44 +1211,44 @@
 // CHECK:             scf.condition(%[[VAL_29]]) %[[VAL_24]], %[[VAL_25]], %[[VAL_26]] : index, index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_30:.*]]: index, %[[VAL_31:.*]]: index, %[[VAL_32:.*]]: index):
-// CHECK:             %[[VAL_33:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_30]]] : memref<?xindex>
-// CHECK:             %[[VAL_34:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_31]]] : memref<?xindex>
+// CHECK:             %[[VAL_33:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_30]]] : memref<?xindex>
+// CHECK:             %[[VAL_34:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_31]]] : memref<?xindex>
 // CHECK:             %[[VAL_35:.*]] = cmpi eq, %[[VAL_33]], %[[VAL_32]] : index
 // CHECK:             %[[VAL_36:.*]] = cmpi eq, %[[VAL_34]], %[[VAL_32]] : index
 // CHECK:             %[[VAL_37:.*]] = and %[[VAL_35]], %[[VAL_36]] : i1
 // CHECK:             scf.if %[[VAL_37]] {
-// CHECK:               %[[VAL_38:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
-// CHECK:               %[[VAL_39:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<?xf64>
+// CHECK:               %[[VAL_38:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
+// CHECK:               %[[VAL_39:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<?xf64>
 // CHECK:               %[[VAL_40:.*]] = addf %[[VAL_38]], %[[VAL_39]] : f64
-// CHECK:               %[[VAL_41:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<?xf64>
-// CHECK:               %[[VAL_42:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_31]]] : memref<?xf64>
+// CHECK:               %[[VAL_41:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<?xf64>
+// CHECK:               %[[VAL_42:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_31]]] : memref<?xf64>
 // CHECK:               %[[VAL_43:.*]] = addf %[[VAL_41]], %[[VAL_42]] : f64
 // CHECK:               %[[VAL_44:.*]] = addf %[[VAL_40]], %[[VAL_43]] : f64
-// CHECK:               store %[[VAL_44]], %[[VAL_18]]{{\[}}%[[VAL_32]]] : memref<?xf64>
+// CHECK:               memref.store %[[VAL_44]], %[[VAL_18]]{{\[}}%[[VAL_32]]] : memref<?xf64>
 // CHECK:             } else {
 // CHECK:               %[[VAL_45:.*]] = cmpi eq, %[[VAL_33]], %[[VAL_32]] : index
 // CHECK:               scf.if %[[VAL_45]] {
-// CHECK:                 %[[VAL_46:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
-// CHECK:                 %[[VAL_47:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<?xf64>
+// CHECK:                 %[[VAL_46:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
+// CHECK:                 %[[VAL_47:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<?xf64>
 // CHECK:                 %[[VAL_48:.*]] = addf %[[VAL_46]], %[[VAL_47]] : f64
-// CHECK:                 %[[VAL_49:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<?xf64>
+// CHECK:                 %[[VAL_49:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<?xf64>
 // CHECK:                 %[[VAL_50:.*]] = addf %[[VAL_48]], %[[VAL_49]] : f64
-// CHECK:                 store %[[VAL_50]], %[[VAL_18]]{{\[}}%[[VAL_32]]] : memref<?xf64>
+// CHECK:                 memref.store %[[VAL_50]], %[[VAL_18]]{{\[}}%[[VAL_32]]] : memref<?xf64>
 // CHECK:               } else {
 // CHECK:                 %[[VAL_51:.*]] = cmpi eq, %[[VAL_34]], %[[VAL_32]] : index
 // CHECK:                 scf.if %[[VAL_51]] {
-// CHECK:                   %[[VAL_52:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
-// CHECK:                   %[[VAL_53:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<?xf64>
-// CHECK:                   %[[VAL_54:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_31]]] : memref<?xf64>
+// CHECK:                   %[[VAL_52:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
+// CHECK:                   %[[VAL_53:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<?xf64>
+// CHECK:                   %[[VAL_54:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_31]]] : memref<?xf64>
 // CHECK:                   %[[VAL_55:.*]] = addf %[[VAL_53]], %[[VAL_54]] : f64
 // CHECK:                   %[[VAL_56:.*]] = addf %[[VAL_52]], %[[VAL_55]] : f64
-// CHECK:                   store %[[VAL_56]], %[[VAL_18]]{{\[}}%[[VAL_32]]] : memref<?xf64>
+// CHECK:                   memref.store %[[VAL_56]], %[[VAL_18]]{{\[}}%[[VAL_32]]] : memref<?xf64>
 // CHECK:                 } else {
 // CHECK:                   scf.if %[[VAL_6]] {
-// CHECK:                     %[[VAL_57:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
-// CHECK:                     %[[VAL_58:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<?xf64>
+// CHECK:                     %[[VAL_57:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
+// CHECK:                     %[[VAL_58:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<?xf64>
 // CHECK:                     %[[VAL_59:.*]] = addf %[[VAL_57]], %[[VAL_58]] : f64
-// CHECK:                     store %[[VAL_59]], %[[VAL_18]]{{\[}}%[[VAL_32]]] : memref<?xf64>
+// CHECK:                     memref.store %[[VAL_59]], %[[VAL_18]]{{\[}}%[[VAL_32]]] : memref<?xf64>
 // CHECK:                   } else {
 // CHECK:                   }
 // CHECK:                 }
@@ -1268,21 +1268,21 @@
 // CHECK:             scf.condition(%[[VAL_71]]) %[[VAL_68]], %[[VAL_70]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_72:.*]]: index, %[[VAL_73:.*]]: index):
-// CHECK:             %[[VAL_74:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_72]]] : memref<?xindex>
+// CHECK:             %[[VAL_74:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_72]]] : memref<?xindex>
 // CHECK:             %[[VAL_75:.*]] = cmpi eq, %[[VAL_74]], %[[VAL_73]] : index
 // CHECK:             scf.if %[[VAL_75]] {
-// CHECK:               %[[VAL_76:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_73]]] : memref<?xf64>
-// CHECK:               %[[VAL_77:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_72]]] : memref<?xf64>
+// CHECK:               %[[VAL_76:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_73]]] : memref<?xf64>
+// CHECK:               %[[VAL_77:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_72]]] : memref<?xf64>
 // CHECK:               %[[VAL_78:.*]] = addf %[[VAL_76]], %[[VAL_77]] : f64
-// CHECK:               %[[VAL_79:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_73]]] : memref<?xf64>
+// CHECK:               %[[VAL_79:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_73]]] : memref<?xf64>
 // CHECK:               %[[VAL_80:.*]] = addf %[[VAL_78]], %[[VAL_79]] : f64
-// CHECK:               store %[[VAL_80]], %[[VAL_18]]{{\[}}%[[VAL_73]]] : memref<?xf64>
+// CHECK:               memref.store %[[VAL_80]], %[[VAL_18]]{{\[}}%[[VAL_73]]] : memref<?xf64>
 // CHECK:             } else {
 // CHECK:               scf.if %[[VAL_6]] {
-// CHECK:                 %[[VAL_81:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_73]]] : memref<?xf64>
-// CHECK:                 %[[VAL_82:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_73]]] : memref<?xf64>
+// CHECK:                 %[[VAL_81:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_73]]] : memref<?xf64>
+// CHECK:                 %[[VAL_82:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_73]]] : memref<?xf64>
 // CHECK:                 %[[VAL_83:.*]] = addf %[[VAL_81]], %[[VAL_82]] : f64
-// CHECK:                 store %[[VAL_83]], %[[VAL_18]]{{\[}}%[[VAL_73]]] : memref<?xf64>
+// CHECK:                 memref.store %[[VAL_83]], %[[VAL_18]]{{\[}}%[[VAL_73]]] : memref<?xf64>
 // CHECK:               } else {
 // CHECK:               }
 // CHECK:             }
@@ -1297,21 +1297,21 @@
 // CHECK:             scf.condition(%[[VAL_93]]) %[[VAL_89]], %[[VAL_91]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_94:.*]]: index, %[[VAL_95:.*]]: index):
-// CHECK:             %[[VAL_96:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_94]]] : memref<?xindex>
+// CHECK:             %[[VAL_96:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_94]]] : memref<?xindex>
 // CHECK:             %[[VAL_97:.*]] = cmpi eq, %[[VAL_96]], %[[VAL_95]] : index
 // CHECK:             scf.if %[[VAL_97]] {
-// CHECK:               %[[VAL_98:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_95]]] : memref<?xf64>
-// CHECK:               %[[VAL_99:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_95]]] : memref<?xf64>
-// CHECK:               %[[VAL_100:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_94]]] : memref<?xf64>
+// CHECK:               %[[VAL_98:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_95]]] : memref<?xf64>
+// CHECK:               %[[VAL_99:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_95]]] : memref<?xf64>
+// CHECK:               %[[VAL_100:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_94]]] : memref<?xf64>
 // CHECK:               %[[VAL_101:.*]] = addf %[[VAL_99]], %[[VAL_100]] : f64
 // CHECK:               %[[VAL_102:.*]] = addf %[[VAL_98]], %[[VAL_101]] : f64
-// CHECK:               store %[[VAL_102]], %[[VAL_18]]{{\[}}%[[VAL_95]]] : memref<?xf64>
+// CHECK:               memref.store %[[VAL_102]], %[[VAL_18]]{{\[}}%[[VAL_95]]] : memref<?xf64>
 // CHECK:             } else {
 // CHECK:               scf.if %[[VAL_6]] {
-// CHECK:                 %[[VAL_103:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_95]]] : memref<?xf64>
-// CHECK:                 %[[VAL_104:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_95]]] : memref<?xf64>
+// CHECK:                 %[[VAL_103:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_95]]] : memref<?xf64>
+// CHECK:                 %[[VAL_104:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_95]]] : memref<?xf64>
 // CHECK:                 %[[VAL_105:.*]] = addf %[[VAL_103]], %[[VAL_104]] : f64
-// CHECK:                 store %[[VAL_105]], %[[VAL_18]]{{\[}}%[[VAL_95]]] : memref<?xf64>
+// CHECK:                 memref.store %[[VAL_105]], %[[VAL_18]]{{\[}}%[[VAL_95]]] : memref<?xf64>
 // CHECK:               } else {
 // CHECK:               }
 // CHECK:             }
@@ -1322,12 +1322,12 @@
 // CHECK:             scf.yield %[[VAL_108]], %[[VAL_109]] : index, index
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_110:.*]] = %[[VAL_111:.*]]#1 to %[[VAL_16]] step %[[VAL_7]] {
-// CHECK:             %[[VAL_112:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_110]]] : memref<?xf64>
-// CHECK:             %[[VAL_113:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_110]]] : memref<?xf64>
+// CHECK:             %[[VAL_112:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_110]]] : memref<?xf64>
+// CHECK:             %[[VAL_113:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_110]]] : memref<?xf64>
 // CHECK:             %[[VAL_114:.*]] = addf %[[VAL_112]], %[[VAL_113]] : f64
-// CHECK:             store %[[VAL_114]], %[[VAL_18]]{{\[}}%[[VAL_110]]] : memref<?xf64>
+// CHECK:             memref.store %[[VAL_114]], %[[VAL_18]]{{\[}}%[[VAL_110]]] : memref<?xf64>
 // CHECK:           }
-// CHECK:           %[[VAL_115:.*]] = tensor_load %[[VAL_18]] : memref<?xf64>
+// CHECK:           %[[VAL_115:.*]] = memref.tensor_load %[[VAL_18]] : memref<?xf64>
 // CHECK:           return %[[VAL_115]] : tensor<?xf64>
 // CHECK:         }
 func @four_tensors_op(%arga: tensor<?xf64>,
@@ -1380,15 +1380,15 @@
 // CHECK:           %[[VAL_12:.*]] = linalg.sparse_pointers %[[VAL_2]], %[[VAL_4]] : tensor<?xf64> to memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = linalg.sparse_indices %[[VAL_2]], %[[VAL_4]] : tensor<?xf64> to memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = linalg.sparse_values %[[VAL_2]] : tensor<?xf64> to memref<?xf64>
-// CHECK:           %[[VAL_15:.*]] = tensor_to_memref %[[VAL_3]] : memref<f64>
-// CHECK:           %[[VAL_16:.*]] = alloc() : memref<f64>
+// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_3]] : memref<f64>
+// CHECK:           %[[VAL_16:.*]] = memref.alloc() : memref<f64>
 // CHECK:           linalg.copy(%[[VAL_15]], %[[VAL_16]]) : memref<f64>, memref<f64>
-// CHECK:           %[[VAL_17:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_18:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
-// CHECK:           %[[VAL_19:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_20:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
-// CHECK:           %[[VAL_21:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_22:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_21:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_22:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           %[[VAL_23:.*]]:3 = scf.while (%[[VAL_24:.*]] = %[[VAL_17]], %[[VAL_25:.*]] = %[[VAL_19]], %[[VAL_26:.*]] = %[[VAL_21]]) : (index, index, index) -> (index, index, index) {
 // CHECK:             %[[VAL_27:.*]] = cmpi ult, %[[VAL_24]], %[[VAL_18]] : index
 // CHECK:             %[[VAL_28:.*]] = cmpi ult, %[[VAL_25]], %[[VAL_20]] : index
@@ -1398,11 +1398,11 @@
 // CHECK:             scf.condition(%[[VAL_31]]) %[[VAL_24]], %[[VAL_25]], %[[VAL_26]] : index, index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_32:.*]]: index, %[[VAL_33:.*]]: index, %[[VAL_34:.*]]: index):
-// CHECK:             %[[VAL_35:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_32]]] : memref<?xindex>
-// CHECK:             %[[VAL_36:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_33]]] : memref<?xindex>
+// CHECK:             %[[VAL_35:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_32]]] : memref<?xindex>
+// CHECK:             %[[VAL_36:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_33]]] : memref<?xindex>
 // CHECK:             %[[VAL_37:.*]] = cmpi ult, %[[VAL_36]], %[[VAL_35]] : index
 // CHECK:             %[[VAL_38:.*]] = select %[[VAL_37]], %[[VAL_36]], %[[VAL_35]] : index
-// CHECK:             %[[VAL_39:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_34]]] : memref<?xindex>
+// CHECK:             %[[VAL_39:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_34]]] : memref<?xindex>
 // CHECK:             %[[VAL_40:.*]] = cmpi ult, %[[VAL_39]], %[[VAL_38]] : index
 // CHECK:             %[[VAL_41:.*]] = select %[[VAL_40]], %[[VAL_39]], %[[VAL_38]] : index
 // CHECK:             %[[VAL_42:.*]] = cmpi eq, %[[VAL_35]], %[[VAL_41]] : index
@@ -1411,68 +1411,68 @@
 // CHECK:             %[[VAL_45:.*]] = cmpi eq, %[[VAL_39]], %[[VAL_41]] : index
 // CHECK:             %[[VAL_46:.*]] = and %[[VAL_44]], %[[VAL_45]] : i1
 // CHECK:             scf.if %[[VAL_46]] {
-// CHECK:               %[[VAL_47:.*]] = load %[[VAL_16]][] : memref<f64>
-// CHECK:               %[[VAL_48:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
+// CHECK:               %[[VAL_47:.*]] = memref.load %[[VAL_16]][] : memref<f64>
+// CHECK:               %[[VAL_48:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
 // CHECK:               %[[VAL_49:.*]] = addf %[[VAL_47]], %[[VAL_48]] : f64
-// CHECK:               %[[VAL_50:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref<?xf64>
+// CHECK:               %[[VAL_50:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref<?xf64>
 // CHECK:               %[[VAL_51:.*]] = addf %[[VAL_49]], %[[VAL_50]] : f64
-// CHECK:               %[[VAL_52:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref<?xf64>
+// CHECK:               %[[VAL_52:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref<?xf64>
 // CHECK:               %[[VAL_53:.*]] = addf %[[VAL_51]], %[[VAL_52]] : f64
-// CHECK:               store %[[VAL_53]], %[[VAL_16]][] : memref<f64>
+// CHECK:               memref.store %[[VAL_53]], %[[VAL_16]][] : memref<f64>
 // CHECK:             } else {
 // CHECK:               %[[VAL_54:.*]] = cmpi eq, %[[VAL_36]], %[[VAL_41]] : index
 // CHECK:               %[[VAL_55:.*]] = cmpi eq, %[[VAL_39]], %[[VAL_41]] : index
 // CHECK:               %[[VAL_56:.*]] = and %[[VAL_54]], %[[VAL_55]] : i1
 // CHECK:               scf.if %[[VAL_56]] {
-// CHECK:                 %[[VAL_57:.*]] = load %[[VAL_16]][] : memref<f64>
-// CHECK:                 %[[VAL_58:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref<?xf64>
+// CHECK:                 %[[VAL_57:.*]] = memref.load %[[VAL_16]][] : memref<f64>
+// CHECK:                 %[[VAL_58:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref<?xf64>
 // CHECK:                 %[[VAL_59:.*]] = addf %[[VAL_57]], %[[VAL_58]] : f64
-// CHECK:                 %[[VAL_60:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref<?xf64>
+// CHECK:                 %[[VAL_60:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref<?xf64>
 // CHECK:                 %[[VAL_61:.*]] = addf %[[VAL_59]], %[[VAL_60]] : f64
-// CHECK:                 store %[[VAL_61]], %[[VAL_16]][] : memref<f64>
+// CHECK:                 memref.store %[[VAL_61]], %[[VAL_16]][] : memref<f64>
 // CHECK:               } else {
 // CHECK:                 %[[VAL_62:.*]] = cmpi eq, %[[VAL_35]], %[[VAL_41]] : index
 // CHECK:                 %[[VAL_63:.*]] = cmpi eq, %[[VAL_39]], %[[VAL_41]] : index
 // CHECK:                 %[[VAL_64:.*]] = and %[[VAL_62]], %[[VAL_63]] : i1
 // CHECK:                 scf.if %[[VAL_64]] {
-// CHECK:                   %[[VAL_65:.*]] = load %[[VAL_16]][] : memref<f64>
-// CHECK:                   %[[VAL_66:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
+// CHECK:                   %[[VAL_65:.*]] = memref.load %[[VAL_16]][] : memref<f64>
+// CHECK:                   %[[VAL_66:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
 // CHECK:                   %[[VAL_67:.*]] = addf %[[VAL_65]], %[[VAL_66]] : f64
-// CHECK:                   %[[VAL_68:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref<?xf64>
+// CHECK:                   %[[VAL_68:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref<?xf64>
 // CHECK:                   %[[VAL_69:.*]] = addf %[[VAL_67]], %[[VAL_68]] : f64
-// CHECK:                   store %[[VAL_69]], %[[VAL_16]][] : memref<f64>
+// CHECK:                   memref.store %[[VAL_69]], %[[VAL_16]][] : memref<f64>
 // CHECK:                 } else {
 // CHECK:                   %[[VAL_70:.*]] = cmpi eq, %[[VAL_39]], %[[VAL_41]] : index
 // CHECK:                   scf.if %[[VAL_70]] {
-// CHECK:                     %[[VAL_71:.*]] = load %[[VAL_16]][] : memref<f64>
-// CHECK:                     %[[VAL_72:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref<?xf64>
+// CHECK:                     %[[VAL_71:.*]] = memref.load %[[VAL_16]][] : memref<f64>
+// CHECK:                     %[[VAL_72:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref<?xf64>
 // CHECK:                     %[[VAL_73:.*]] = addf %[[VAL_71]], %[[VAL_72]] : f64
-// CHECK:                     store %[[VAL_73]], %[[VAL_16]][] : memref<f64>
+// CHECK:                     memref.store %[[VAL_73]], %[[VAL_16]][] : memref<f64>
 // CHECK:                   } else {
 // CHECK:                     %[[VAL_74:.*]] = cmpi eq, %[[VAL_35]], %[[VAL_41]] : index
 // CHECK:                     %[[VAL_75:.*]] = cmpi eq, %[[VAL_36]], %[[VAL_41]] : index
 // CHECK:                     %[[VAL_76:.*]] = and %[[VAL_74]], %[[VAL_75]] : i1
 // CHECK:                     scf.if %[[VAL_76]] {
-// CHECK:                       %[[VAL_77:.*]] = load %[[VAL_16]][] : memref<f64>
-// CHECK:                       %[[VAL_78:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
+// CHECK:                       %[[VAL_77:.*]] = memref.load %[[VAL_16]][] : memref<f64>
+// CHECK:                       %[[VAL_78:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
 // CHECK:                       %[[VAL_79:.*]] = addf %[[VAL_77]], %[[VAL_78]] : f64
-// CHECK:                       %[[VAL_80:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref<?xf64>
+// CHECK:                       %[[VAL_80:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref<?xf64>
 // CHECK:                       %[[VAL_81:.*]] = addf %[[VAL_79]], %[[VAL_80]] : f64
-// CHECK:                       store %[[VAL_81]], %[[VAL_16]][] : memref<f64>
+// CHECK:                       memref.store %[[VAL_81]], %[[VAL_16]][] : memref<f64>
 // CHECK:                     } else {
 // CHECK:                       %[[VAL_82:.*]] = cmpi eq, %[[VAL_36]], %[[VAL_41]] : index
 // CHECK:                       scf.if %[[VAL_82]] {
-// CHECK:                         %[[VAL_83:.*]] = load %[[VAL_16]][] : memref<f64>
-// CHECK:                         %[[VAL_84:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref<?xf64>
+// CHECK:                         %[[VAL_83:.*]] = memref.load %[[VAL_16]][] : memref<f64>
+// CHECK:                         %[[VAL_84:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref<?xf64>
 // CHECK:                         %[[VAL_85:.*]] = addf %[[VAL_83]], %[[VAL_84]] : f64
-// CHECK:                         store %[[VAL_85]], %[[VAL_16]][] : memref<f64>
+// CHECK:                         memref.store %[[VAL_85]], %[[VAL_16]][] : memref<f64>
 // CHECK:                       } else {
 // CHECK:                         %[[VAL_86:.*]] = cmpi eq, %[[VAL_35]], %[[VAL_41]] : index
 // CHECK:                         scf.if %[[VAL_86]] {
-// CHECK:                           %[[VAL_87:.*]] = load %[[VAL_16]][] : memref<f64>
-// CHECK:                           %[[VAL_88:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
+// CHECK:                           %[[VAL_87:.*]] = memref.load %[[VAL_16]][] : memref<f64>
+// CHECK:                           %[[VAL_88:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
 // CHECK:                           %[[VAL_89:.*]] = addf %[[VAL_87]], %[[VAL_88]] : f64
-// CHECK:                           store %[[VAL_89]], %[[VAL_16]][] : memref<f64>
+// CHECK:                           memref.store %[[VAL_89]], %[[VAL_16]][] : memref<f64>
 // CHECK:                         } else {
 // CHECK:                         }
 // CHECK:                       }
@@ -1499,34 +1499,34 @@
 // CHECK:             scf.condition(%[[VAL_105]]) %[[VAL_100]], %[[VAL_102]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_106:.*]]: index, %[[VAL_107:.*]]: index):
-// CHECK:             %[[VAL_108:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_106]]] : memref<?xindex>
-// CHECK:             %[[VAL_109:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_107]]] : memref<?xindex>
+// CHECK:             %[[VAL_108:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_106]]] : memref<?xindex>
+// CHECK:             %[[VAL_109:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_107]]] : memref<?xindex>
 // CHECK:             %[[VAL_110:.*]] = cmpi ult, %[[VAL_109]], %[[VAL_108]] : index
 // CHECK:             %[[VAL_111:.*]] = select %[[VAL_110]], %[[VAL_109]], %[[VAL_108]] : index
 // CHECK:             %[[VAL_112:.*]] = cmpi eq, %[[VAL_108]], %[[VAL_111]] : index
 // CHECK:             %[[VAL_113:.*]] = cmpi eq, %[[VAL_109]], %[[VAL_111]] : index
 // CHECK:             %[[VAL_114:.*]] = and %[[VAL_112]], %[[VAL_113]] : i1
 // CHECK:             scf.if %[[VAL_114]] {
-// CHECK:               %[[VAL_115:.*]] = load %[[VAL_16]][] : memref<f64>
-// CHECK:               %[[VAL_116:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_106]]] : memref<?xf64>
+// CHECK:               %[[VAL_115:.*]] = memref.load %[[VAL_16]][] : memref<f64>
+// CHECK:               %[[VAL_116:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_106]]] : memref<?xf64>
 // CHECK:               %[[VAL_117:.*]] = addf %[[VAL_115]], %[[VAL_116]] : f64
-// CHECK:               %[[VAL_118:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_107]]] : memref<?xf64>
+// CHECK:               %[[VAL_118:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_107]]] : memref<?xf64>
 // CHECK:               %[[VAL_119:.*]] = addf %[[VAL_117]], %[[VAL_118]] : f64
-// CHECK:               store %[[VAL_119]], %[[VAL_16]][] : memref<f64>
+// CHECK:               memref.store %[[VAL_119]], %[[VAL_16]][] : memref<f64>
 // CHECK:             } else {
 // CHECK:               %[[VAL_120:.*]] = cmpi eq, %[[VAL_109]], %[[VAL_111]] : index
 // CHECK:               scf.if %[[VAL_120]] {
-// CHECK:                 %[[VAL_121:.*]] = load %[[VAL_16]][] : memref<f64>
-// CHECK:                 %[[VAL_122:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_107]]] : memref<?xf64>
+// CHECK:                 %[[VAL_121:.*]] = memref.load %[[VAL_16]][] : memref<f64>
+// CHECK:                 %[[VAL_122:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_107]]] : memref<?xf64>
 // CHECK:                 %[[VAL_123:.*]] = addf %[[VAL_121]], %[[VAL_122]] : f64
-// CHECK:                 store %[[VAL_123]], %[[VAL_16]][] : memref<f64>
+// CHECK:                 memref.store %[[VAL_123]], %[[VAL_16]][] : memref<f64>
 // CHECK:               } else {
 // CHECK:                 %[[VAL_124:.*]] = cmpi eq, %[[VAL_108]], %[[VAL_111]] : index
 // CHECK:                 scf.if %[[VAL_124]] {
-// CHECK:                   %[[VAL_125:.*]] = load %[[VAL_16]][] : memref<f64>
-// CHECK:                   %[[VAL_126:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_106]]] : memref<?xf64>
+// CHECK:                   %[[VAL_125:.*]] = memref.load %[[VAL_16]][] : memref<f64>
+// CHECK:                   %[[VAL_126:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_106]]] : memref<?xf64>
 // CHECK:                   %[[VAL_127:.*]] = addf %[[VAL_125]], %[[VAL_126]] : f64
-// CHECK:                   store %[[VAL_127]], %[[VAL_16]][] : memref<f64>
+// CHECK:                   memref.store %[[VAL_127]], %[[VAL_16]][] : memref<f64>
 // CHECK:                 } else {
 // CHECK:                 }
 // CHECK:               }
@@ -1546,34 +1546,34 @@
 // CHECK:             scf.condition(%[[VAL_141]]) %[[VAL_135]], %[[VAL_137]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_142:.*]]: index, %[[VAL_143:.*]]: index):
-// CHECK:             %[[VAL_144:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_142]]] : memref<?xindex>
-// CHECK:             %[[VAL_145:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_143]]] : memref<?xindex>
+// CHECK:             %[[VAL_144:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_142]]] : memref<?xindex>
+// CHECK:             %[[VAL_145:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_143]]] : memref<?xindex>
 // CHECK:             %[[VAL_146:.*]] = cmpi ult, %[[VAL_145]], %[[VAL_144]] : index
 // CHECK:             %[[VAL_147:.*]] = select %[[VAL_146]], %[[VAL_145]], %[[VAL_144]] : index
 // CHECK:             %[[VAL_148:.*]] = cmpi eq, %[[VAL_144]], %[[VAL_147]] : index
 // CHECK:             %[[VAL_149:.*]] = cmpi eq, %[[VAL_145]], %[[VAL_147]] : index
 // CHECK:             %[[VAL_150:.*]] = and %[[VAL_148]], %[[VAL_149]] : i1
 // CHECK:             scf.if %[[VAL_150]] {
-// CHECK:               %[[VAL_151:.*]] = load %[[VAL_16]][] : memref<f64>
-// CHECK:               %[[VAL_152:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_142]]] : memref<?xf64>
+// CHECK:               %[[VAL_151:.*]] = memref.load %[[VAL_16]][] : memref<f64>
+// CHECK:               %[[VAL_152:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_142]]] : memref<?xf64>
 // CHECK:               %[[VAL_153:.*]] = addf %[[VAL_151]], %[[VAL_152]] : f64
-// CHECK:               %[[VAL_154:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_143]]] : memref<?xf64>
+// CHECK:               %[[VAL_154:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_143]]] : memref<?xf64>
 // CHECK:               %[[VAL_155:.*]] = addf %[[VAL_153]], %[[VAL_154]] : f64
-// CHECK:               store %[[VAL_155]], %[[VAL_16]][] : memref<f64>
+// CHECK:               memref.store %[[VAL_155]], %[[VAL_16]][] : memref<f64>
 // CHECK:             } else {
 // CHECK:               %[[VAL_156:.*]] = cmpi eq, %[[VAL_145]], %[[VAL_147]] : index
 // CHECK:               scf.if %[[VAL_156]] {
-// CHECK:                 %[[VAL_157:.*]] = load %[[VAL_16]][] : memref<f64>
-// CHECK:                 %[[VAL_158:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_143]]] : memref<?xf64>
+// CHECK:                 %[[VAL_157:.*]] = memref.load %[[VAL_16]][] : memref<f64>
+// CHECK:                 %[[VAL_158:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_143]]] : memref<?xf64>
 // CHECK:                 %[[VAL_159:.*]] = addf %[[VAL_157]], %[[VAL_158]] : f64
-// CHECK:                 store %[[VAL_159]], %[[VAL_16]][] : memref<f64>
+// CHECK:                 memref.store %[[VAL_159]], %[[VAL_16]][] : memref<f64>
 // CHECK:               } else {
 // CHECK:                 %[[VAL_160:.*]] = cmpi eq, %[[VAL_144]], %[[VAL_147]] : index
 // CHECK:                 scf.if %[[VAL_160]] {
-// CHECK:                   %[[VAL_161:.*]] = load %[[VAL_16]][] : memref<f64>
-// CHECK:                   %[[VAL_162:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_142]]] : memref<?xf64>
+// CHECK:                   %[[VAL_161:.*]] = memref.load %[[VAL_16]][] : memref<f64>
+// CHECK:                   %[[VAL_162:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_142]]] : memref<?xf64>
 // CHECK:                   %[[VAL_163:.*]] = addf %[[VAL_161]], %[[VAL_162]] : f64
-// CHECK:                   store %[[VAL_163]], %[[VAL_16]][] : memref<f64>
+// CHECK:                   memref.store %[[VAL_163]], %[[VAL_16]][] : memref<f64>
 // CHECK:                 } else {
 // CHECK:                 }
 // CHECK:               }
@@ -1586,13 +1586,13 @@
 // CHECK:             %[[VAL_169:.*]] = select %[[VAL_167]], %[[VAL_168]], %[[VAL_143]] : index
 // CHECK:             scf.yield %[[VAL_166]], %[[VAL_169]] : index, index
 // CHECK:           }
-// CHECK:           %[[VAL_170:.*]] = load %[[VAL_16]][] : memref<f64>
+// CHECK:           %[[VAL_170:.*]] = memref.load %[[VAL_16]][] : memref<f64>
 // CHECK:           %[[VAL_171:.*]] = scf.for %[[VAL_172:.*]] = %[[VAL_173:.*]]#1 to %[[VAL_22]] step %[[VAL_5]] iter_args(%[[VAL_174:.*]] = %[[VAL_170]]) -> (f64) {
-// CHECK:             %[[VAL_175:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_172]]] : memref<?xf64>
+// CHECK:             %[[VAL_175:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_172]]] : memref<?xf64>
 // CHECK:             %[[VAL_176:.*]] = addf %[[VAL_174]], %[[VAL_175]] : f64
 // CHECK:             scf.yield %[[VAL_176]] : f64
 // CHECK:           }
-// CHECK:           store %[[VAL_177:.*]], %[[VAL_16]][] : memref<f64>
+// CHECK:           memref.store %[[VAL_177:.*]], %[[VAL_16]][] : memref<f64>
 // CHECK:           %[[VAL_178:.*]]:2 = scf.while (%[[VAL_179:.*]] = %[[VAL_180:.*]]#0, %[[VAL_181:.*]] = %[[VAL_182:.*]]#0) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_183:.*]] = cmpi ult, %[[VAL_179]], %[[VAL_18]] : index
 // CHECK:             %[[VAL_184:.*]] = cmpi ult, %[[VAL_181]], %[[VAL_20]] : index
@@ -1600,34 +1600,34 @@
 // CHECK:             scf.condition(%[[VAL_185]]) %[[VAL_179]], %[[VAL_181]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_186:.*]]: index, %[[VAL_187:.*]]: index):
-// CHECK:             %[[VAL_188:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_186]]] : memref<?xindex>
-// CHECK:             %[[VAL_189:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_187]]] : memref<?xindex>
+// CHECK:             %[[VAL_188:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_186]]] : memref<?xindex>
+// CHECK:             %[[VAL_189:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_187]]] : memref<?xindex>
 // CHECK:             %[[VAL_190:.*]] = cmpi ult, %[[VAL_189]], %[[VAL_188]] : index
 // CHECK:             %[[VAL_191:.*]] = select %[[VAL_190]], %[[VAL_189]], %[[VAL_188]] : index
 // CHECK:             %[[VAL_192:.*]] = cmpi eq, %[[VAL_188]], %[[VAL_191]] : index
 // CHECK:             %[[VAL_193:.*]] = cmpi eq, %[[VAL_189]], %[[VAL_191]] : index
 // CHECK:             %[[VAL_194:.*]] = and %[[VAL_192]], %[[VAL_193]] : i1
 // CHECK:             scf.if %[[VAL_194]] {
-// CHECK:               %[[VAL_195:.*]] = load %[[VAL_16]][] : memref<f64>
-// CHECK:               %[[VAL_196:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_186]]] : memref<?xf64>
+// CHECK:               %[[VAL_195:.*]] = memref.load %[[VAL_16]][] : memref<f64>
+// CHECK:               %[[VAL_196:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_186]]] : memref<?xf64>
 // CHECK:               %[[VAL_197:.*]] = addf %[[VAL_195]], %[[VAL_196]] : f64
-// CHECK:               %[[VAL_198:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_187]]] : memref<?xf64>
+// CHECK:               %[[VAL_198:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_187]]] : memref<?xf64>
 // CHECK:               %[[VAL_199:.*]] = addf %[[VAL_197]], %[[VAL_198]] : f64
-// CHECK:               store %[[VAL_199]], %[[VAL_16]][] : memref<f64>
+// CHECK:               memref.store %[[VAL_199]], %[[VAL_16]][] : memref<f64>
 // CHECK:             } else {
 // CHECK:               %[[VAL_200:.*]] = cmpi eq, %[[VAL_189]], %[[VAL_191]] : index
 // CHECK:               scf.if %[[VAL_200]] {
-// CHECK:                 %[[VAL_201:.*]] = load %[[VAL_16]][] : memref<f64>
-// CHECK:                 %[[VAL_202:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_187]]] : memref<?xf64>
+// CHECK:                 %[[VAL_201:.*]] = memref.load %[[VAL_16]][] : memref<f64>
+// CHECK:                 %[[VAL_202:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_187]]] : memref<?xf64>
 // CHECK:                 %[[VAL_203:.*]] = addf %[[VAL_201]], %[[VAL_202]] : f64
-// CHECK:                 store %[[VAL_203]], %[[VAL_16]][] : memref<f64>
+// CHECK:                 memref.store %[[VAL_203]], %[[VAL_16]][] : memref<f64>
 // CHECK:               } else {
 // CHECK:                 %[[VAL_204:.*]] = cmpi eq, %[[VAL_188]], %[[VAL_191]] : index
 // CHECK:                 scf.if %[[VAL_204]] {
-// CHECK:                   %[[VAL_205:.*]] = load %[[VAL_16]][] : memref<f64>
-// CHECK:                   %[[VAL_206:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_186]]] : memref<?xf64>
+// CHECK:                   %[[VAL_205:.*]] = memref.load %[[VAL_16]][] : memref<f64>
+// CHECK:                   %[[VAL_206:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_186]]] : memref<?xf64>
 // CHECK:                   %[[VAL_207:.*]] = addf %[[VAL_205]], %[[VAL_206]] : f64
-// CHECK:                   store %[[VAL_207]], %[[VAL_16]][] : memref<f64>
+// CHECK:                   memref.store %[[VAL_207]], %[[VAL_16]][] : memref<f64>
 // CHECK:                 } else {
 // CHECK:                 }
 // CHECK:               }
@@ -1640,19 +1640,19 @@
 // CHECK:             %[[VAL_213:.*]] = select %[[VAL_211]], %[[VAL_212]], %[[VAL_187]] : index
 // CHECK:             scf.yield %[[VAL_210]], %[[VAL_213]] : index, index
 // CHECK:           }
-// CHECK:           %[[VAL_214:.*]] = load %[[VAL_16]][] : memref<f64>
+// CHECK:           %[[VAL_214:.*]] = memref.load %[[VAL_16]][] : memref<f64>
 // CHECK:           %[[VAL_215:.*]] = scf.for %[[VAL_216:.*]] = %[[VAL_217:.*]]#1 to %[[VAL_20]] step %[[VAL_5]] iter_args(%[[VAL_218:.*]] = %[[VAL_214]]) -> (f64) {
-// CHECK:             %[[VAL_219:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_216]]] : memref<?xf64>
+// CHECK:             %[[VAL_219:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_216]]] : memref<?xf64>
 // CHECK:             %[[VAL_220:.*]] = addf %[[VAL_218]], %[[VAL_219]] : f64
 // CHECK:             scf.yield %[[VAL_220]] : f64
 // CHECK:           }
 // CHECK:           %[[VAL_221:.*]] = scf.for %[[VAL_222:.*]] = %[[VAL_223:.*]]#0 to %[[VAL_18]] step %[[VAL_5]] iter_args(%[[VAL_224:.*]] = %[[VAL_225:.*]]) -> (f64) {
-// CHECK:             %[[VAL_226:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_222]]] : memref<?xf64>
+// CHECK:             %[[VAL_226:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_222]]] : memref<?xf64>
 // CHECK:             %[[VAL_227:.*]] = addf %[[VAL_224]], %[[VAL_226]] : f64
 // CHECK:             scf.yield %[[VAL_227]] : f64
 // CHECK:           }
-// CHECK:           store %[[VAL_228:.*]], %[[VAL_16]][] : memref<f64>
-// CHECK:           %[[VAL_229:.*]] = tensor_load %[[VAL_16]] : memref<f64>
+// CHECK:           memref.store %[[VAL_228:.*]], %[[VAL_16]][] : memref<f64>
+// CHECK:           %[[VAL_229:.*]] = memref.tensor_load %[[VAL_16]] : memref<f64>
 // CHECK:           return %[[VAL_229]] : tensor<f64>
 // CHECK:         }
 func @red3s(%arga: tensor<?xf64>,
diff --git a/mlir/test/Dialect/Linalg/sparse_2d.mlir b/mlir/test/Dialect/Linalg/sparse_2d.mlir
--- a/mlir/test/Dialect/Linalg/sparse_2d.mlir
+++ b/mlir/test/Dialect/Linalg/sparse_2d.mlir
@@ -24,20 +24,20 @@
 // CHECK:           %[[VAL_4:.*]] = constant 16 : index
 // CHECK:           %[[VAL_5:.*]] = constant 0 : index
 // CHECK:           %[[VAL_6:.*]] = constant 1 : index
-// CHECK:           %[[VAL_7:.*]] = tensor_to_memref %[[VAL_0]] : memref<32x16xf32>
-// CHECK:           %[[VAL_8:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16xf32>
-// CHECK:           %[[VAL_9:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           %[[VAL_10:.*]] = alloc() : memref<32x16xf32>
+// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32x16xf32>
+// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           linalg.copy(%[[VAL_9]], %[[VAL_10]]) : memref<32x16xf32>, memref<32x16xf32>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_12:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
-// CHECK:               %[[VAL_13:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xf32>
-// CHECK:               %[[VAL_14:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xf32>
+// CHECK:               %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xf32>
+// CHECK:               %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xf32>
 // CHECK:               %[[VAL_15:.*]] = addf %[[VAL_13]], %[[VAL_14]] : f32
 // CHECK:               store %[[VAL_15]], %[[VAL_10]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_16:.*]] = tensor_load %[[VAL_10]] : memref<32x16xf32>
+// CHECK:           %[[VAL_16:.*]] = memref.tensor_load %[[VAL_10]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_16]] : tensor<32x16xf32>
 // CHECK:         }
 func @add_dd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@@ -59,20 +59,20 @@
 // CHECK:           %[[VAL_4:.*]] = constant 16 : index
 // CHECK:           %[[VAL_5:.*]] = constant 0 : index
 // CHECK:           %[[VAL_6:.*]] = constant 1 : index
-// CHECK:           %[[VAL_7:.*]] = tensor_to_memref %[[VAL_0]] : memref<32x16xf32>
-// CHECK:           %[[VAL_8:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16xf32>
-// CHECK:           %[[VAL_9:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           %[[VAL_10:.*]] = alloc() : memref<32x16xf32>
+// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32x16xf32>
+// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           linalg.copy(%[[VAL_9]], %[[VAL_10]]) : memref<32x16xf32>, memref<32x16xf32>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_12:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
-// CHECK:               %[[VAL_13:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xf32>
-// CHECK:               %[[VAL_14:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xf32>
+// CHECK:               %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xf32>
+// CHECK:               %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xf32>
 // CHECK:               %[[VAL_15:.*]] = mulf %[[VAL_13]], %[[VAL_14]] : f32
 // CHECK:               store %[[VAL_15]], %[[VAL_10]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_16:.*]] = tensor_load %[[VAL_10]] : memref<32x16xf32>
+// CHECK:           %[[VAL_16:.*]] = memref.tensor_load %[[VAL_10]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_16]] : tensor<32x16xf32>
 // CHECK:         }
 func @mul_dd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@@ -113,29 +113,29 @@
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_7]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_7]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16xf32> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16xf32>
-// CHECK:           %[[VAL_12:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           %[[VAL_13:.*]] = alloc() : memref<32x16xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           linalg.copy(%[[VAL_12]], %[[VAL_13]]) : memref<32x16xf32>, memref<32x16xf32>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_7]] {
-// CHECK:             %[[VAL_15:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref<?xindex>
+// CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref<?xindex>
 // CHECK:             %[[VAL_16:.*]] = addi %[[VAL_14]], %[[VAL_7]] : index
-// CHECK:             %[[VAL_17:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_16]]] : memref<?xindex>
+// CHECK:             %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_16]]] : memref<?xindex>
 // CHECK:             %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_15]], %[[VAL_20:.*]] = %[[VAL_5]]) : (index, index) -> (index, index) {
 // CHECK:               %[[VAL_21:.*]] = cmpi ult, %[[VAL_19]], %[[VAL_17]] : index
 // CHECK:               scf.condition(%[[VAL_21]]) %[[VAL_19]], %[[VAL_20]] : index, index
 // CHECK:             } do {
 // CHECK:             ^bb0(%[[VAL_22:.*]]: index, %[[VAL_23:.*]]: index):
-// CHECK:               %[[VAL_24:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_22]]] : memref<?xindex>
+// CHECK:               %[[VAL_24:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_22]]] : memref<?xindex>
 // CHECK:               %[[VAL_25:.*]] = cmpi eq, %[[VAL_24]], %[[VAL_23]] : index
 // CHECK:               scf.if %[[VAL_25]] {
-// CHECK:                 %[[VAL_26:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_22]]] : memref<?xf32>
-// CHECK:                 %[[VAL_27:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_14]], %[[VAL_23]]] : memref<32x16xf32>
+// CHECK:                 %[[VAL_26:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_22]]] : memref<?xf32>
+// CHECK:                 %[[VAL_27:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_14]], %[[VAL_23]]] : memref<32x16xf32>
 // CHECK:                 %[[VAL_28:.*]] = addf %[[VAL_26]], %[[VAL_27]] : f32
 // CHECK:                 store %[[VAL_28]], %[[VAL_13]]{{\[}}%[[VAL_14]], %[[VAL_23]]] : memref<32x16xf32>
 // CHECK:               } else {
 // CHECK:                 scf.if %[[VAL_6]] {
-// CHECK:                   %[[VAL_29:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_14]], %[[VAL_23]]] : memref<32x16xf32>
+// CHECK:                   %[[VAL_29:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_14]], %[[VAL_23]]] : memref<32x16xf32>
 // CHECK:                   store %[[VAL_29]], %[[VAL_13]]{{\[}}%[[VAL_14]], %[[VAL_23]]] : memref<32x16xf32>
 // CHECK:                 } else {
 // CHECK:                 }
@@ -147,11 +147,11 @@
 // CHECK:               scf.yield %[[VAL_32]], %[[VAL_33]] : index, index
 // CHECK:             }
 // CHECK:             scf.for %[[VAL_34:.*]] = %[[VAL_35:.*]]#1 to %[[VAL_4]] step %[[VAL_7]] {
-// CHECK:               %[[VAL_36:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_14]], %[[VAL_34]]] : memref<32x16xf32>
+// CHECK:               %[[VAL_36:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_14]], %[[VAL_34]]] : memref<32x16xf32>
 // CHECK:               store %[[VAL_36]], %[[VAL_13]]{{\[}}%[[VAL_14]], %[[VAL_34]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_37:.*]] = tensor_load %[[VAL_13]] : memref<32x16xf32>
+// CHECK:           %[[VAL_37:.*]] = memref.tensor_load %[[VAL_13]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_37]] : tensor<32x16xf32>
 // CHECK:         }
 func @add_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@@ -175,23 +175,23 @@
 // CHECK:           %[[VAL_6:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_5]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_5]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16xf32> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16xf32>
-// CHECK:           %[[VAL_10:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           %[[VAL_11:.*]] = alloc() : memref<32x16xf32>
+// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           linalg.copy(%[[VAL_10]], %[[VAL_11]]) : memref<32x16xf32>, memref<32x16xf32>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
-// CHECK:             %[[VAL_13:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
+// CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
 // CHECK:             %[[VAL_14:.*]] = addi %[[VAL_12]], %[[VAL_5]] : index
-// CHECK:             %[[VAL_15:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref<?xindex>
+// CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref<?xindex>
 // CHECK:             scf.for %[[VAL_16:.*]] = %[[VAL_13]] to %[[VAL_15]] step %[[VAL_5]] {
-// CHECK:               %[[VAL_17:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_16]]] : memref<?xindex>
-// CHECK:               %[[VAL_18:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_16]]] : memref<?xf32>
-// CHECK:               %[[VAL_19:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_12]], %[[VAL_17]]] : memref<32x16xf32>
+// CHECK:               %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_16]]] : memref<?xindex>
+// CHECK:               %[[VAL_18:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_16]]] : memref<?xf32>
+// CHECK:               %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_12]], %[[VAL_17]]] : memref<32x16xf32>
 // CHECK:               %[[VAL_20:.*]] = mulf %[[VAL_18]], %[[VAL_19]] : f32
 // CHECK:               store %[[VAL_20]], %[[VAL_11]]{{\[}}%[[VAL_12]], %[[VAL_17]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_21:.*]] = tensor_load %[[VAL_11]] : memref<32x16xf32>
+// CHECK:           %[[VAL_21:.*]] = memref.tensor_load %[[VAL_11]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_21]] : tensor<32x16xf32>
 // CHECK:         }
 func @mul_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@@ -232,32 +232,32 @@
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_6]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_6]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16xf32> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16xf32>
-// CHECK:           %[[VAL_12:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           %[[VAL_13:.*]] = alloc() : memref<32x16xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           linalg.copy(%[[VAL_12]], %[[VAL_13]]) : memref<32x16xf32>, memref<32x16xf32>
-// CHECK:           %[[VAL_14:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
-// CHECK:           %[[VAL_15:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_16:.*]]:2 = scf.while (%[[VAL_17:.*]] = %[[VAL_14]], %[[VAL_18:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_19:.*]] = cmpi ult, %[[VAL_17]], %[[VAL_15]] : index
 // CHECK:             scf.condition(%[[VAL_19]]) %[[VAL_17]], %[[VAL_18]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_20:.*]]: index, %[[VAL_21:.*]]: index):
-// CHECK:             %[[VAL_22:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref<?xindex>
+// CHECK:             %[[VAL_22:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref<?xindex>
 // CHECK:             %[[VAL_23:.*]] = cmpi eq, %[[VAL_22]], %[[VAL_21]] : index
 // CHECK:             scf.if %[[VAL_23]] {
 // CHECK:               scf.for %[[VAL_24:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
 // CHECK:                 %[[VAL_25:.*]] = muli %[[VAL_20]], %[[VAL_4]] : index
 // CHECK:                 %[[VAL_26:.*]] = addi %[[VAL_25]], %[[VAL_24]] : index
-// CHECK:                 %[[VAL_27:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_26]]] : memref<?xf32>
-// CHECK:                 %[[VAL_28:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_21]], %[[VAL_24]]] : memref<32x16xf32>
+// CHECK:                 %[[VAL_27:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_26]]] : memref<?xf32>
+// CHECK:                 %[[VAL_28:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_21]], %[[VAL_24]]] : memref<32x16xf32>
 // CHECK:                 %[[VAL_29:.*]] = addf %[[VAL_27]], %[[VAL_28]] : f32
 // CHECK:                 store %[[VAL_29]], %[[VAL_13]]{{\[}}%[[VAL_21]], %[[VAL_24]]] : memref<32x16xf32>
 // CHECK:               }
 // CHECK:             } else {
 // CHECK:               scf.if %[[VAL_5]] {
 // CHECK:                 scf.for %[[VAL_30:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
-// CHECK:                   %[[VAL_31:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_21]], %[[VAL_30]]] : memref<32x16xf32>
+// CHECK:                   %[[VAL_31:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_21]], %[[VAL_30]]] : memref<32x16xf32>
 // CHECK:                   store %[[VAL_31]], %[[VAL_13]]{{\[}}%[[VAL_21]], %[[VAL_30]]] : memref<32x16xf32>
 // CHECK:                 }
 // CHECK:               } else {
@@ -271,11 +271,11 @@
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_36:.*]] = %[[VAL_37:.*]]#1 to %[[VAL_3]] step %[[VAL_7]] {
 // CHECK:             scf.for %[[VAL_38:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
-// CHECK:               %[[VAL_39:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_36]], %[[VAL_38]]] : memref<32x16xf32>
+// CHECK:               %[[VAL_39:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_36]], %[[VAL_38]]] : memref<32x16xf32>
 // CHECK:               store %[[VAL_39]], %[[VAL_13]]{{\[}}%[[VAL_36]], %[[VAL_38]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_40:.*]] = tensor_load %[[VAL_13]] : memref<32x16xf32>
+// CHECK:           %[[VAL_40:.*]] = memref.tensor_load %[[VAL_13]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_40]] : tensor<32x16xf32>
 // CHECK:         }
 func @add_sd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@@ -299,24 +299,24 @@
 // CHECK:           %[[VAL_6:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_4]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_4]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16xf32> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16xf32>
-// CHECK:           %[[VAL_10:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           %[[VAL_11:.*]] = alloc() : memref<32x16xf32>
+// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           linalg.copy(%[[VAL_10]], %[[VAL_11]]) : memref<32x16xf32>, memref<32x16xf32>
-// CHECK:           %[[VAL_12:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_13:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_5]] {
-// CHECK:             %[[VAL_15:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_14]]] : memref<?xindex>
+// CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_14]]] : memref<?xindex>
 // CHECK:             scf.for %[[VAL_16:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:               %[[VAL_17:.*]] = muli %[[VAL_14]], %[[VAL_3]] : index
 // CHECK:               %[[VAL_18:.*]] = addi %[[VAL_17]], %[[VAL_16]] : index
-// CHECK:               %[[VAL_19:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref<?xf32>
-// CHECK:               %[[VAL_20:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_15]], %[[VAL_16]]] : memref<32x16xf32>
+// CHECK:               %[[VAL_19:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref<?xf32>
+// CHECK:               %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_15]], %[[VAL_16]]] : memref<32x16xf32>
 // CHECK:               %[[VAL_21:.*]] = mulf %[[VAL_19]], %[[VAL_20]] : f32
 // CHECK:               store %[[VAL_21]], %[[VAL_11]]{{\[}}%[[VAL_15]], %[[VAL_16]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_22:.*]] = tensor_load %[[VAL_11]] : memref<32x16xf32>
+// CHECK:           %[[VAL_22:.*]] = memref.tensor_load %[[VAL_11]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_22]] : tensor<32x16xf32>
 // CHECK:         }
 func @mul_sd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@@ -359,38 +359,38 @@
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_7]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_7]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_12:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16xf32> to memref<?xf32>
-// CHECK:           %[[VAL_13:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16xf32>
-// CHECK:           %[[VAL_14:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           %[[VAL_15:.*]] = alloc() : memref<32x16xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_14:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_15:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           linalg.copy(%[[VAL_14]], %[[VAL_15]]) : memref<32x16xf32>, memref<32x16xf32>
-// CHECK:           %[[VAL_16:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
-// CHECK:           %[[VAL_17:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref<?xindex>
+// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_16]], %[[VAL_20:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_21:.*]] = cmpi ult, %[[VAL_19]], %[[VAL_17]] : index
 // CHECK:             scf.condition(%[[VAL_21]]) %[[VAL_19]], %[[VAL_20]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_22:.*]]: index, %[[VAL_23:.*]]: index):
-// CHECK:             %[[VAL_24:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_22]]] : memref<?xindex>
+// CHECK:             %[[VAL_24:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_22]]] : memref<?xindex>
 // CHECK:             %[[VAL_25:.*]] = cmpi eq, %[[VAL_24]], %[[VAL_23]] : index
 // CHECK:             scf.if %[[VAL_25]] {
-// CHECK:               %[[VAL_26:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_22]]] : memref<?xindex>
+// CHECK:               %[[VAL_26:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_22]]] : memref<?xindex>
 // CHECK:               %[[VAL_27:.*]] = addi %[[VAL_22]], %[[VAL_7]] : index
-// CHECK:               %[[VAL_28:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:               %[[VAL_28:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_27]]] : memref<?xindex>
 // CHECK:               %[[VAL_29:.*]]:2 = scf.while (%[[VAL_30:.*]] = %[[VAL_26]], %[[VAL_31:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
 // CHECK:                 %[[VAL_32:.*]] = cmpi ult, %[[VAL_30]], %[[VAL_28]] : index
 // CHECK:                 scf.condition(%[[VAL_32]]) %[[VAL_30]], %[[VAL_31]] : index, index
 // CHECK:               } do {
 // CHECK:               ^bb0(%[[VAL_33:.*]]: index, %[[VAL_34:.*]]: index):
-// CHECK:                 %[[VAL_35:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref<?xindex>
+// CHECK:                 %[[VAL_35:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref<?xindex>
 // CHECK:                 %[[VAL_36:.*]] = cmpi eq, %[[VAL_35]], %[[VAL_34]] : index
 // CHECK:                 scf.if %[[VAL_36]] {
-// CHECK:                   %[[VAL_37:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_33]]] : memref<?xf32>
-// CHECK:                   %[[VAL_38:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_23]], %[[VAL_34]]] : memref<32x16xf32>
+// CHECK:                   %[[VAL_37:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_33]]] : memref<?xf32>
+// CHECK:                   %[[VAL_38:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_23]], %[[VAL_34]]] : memref<32x16xf32>
 // CHECK:                   %[[VAL_39:.*]] = addf %[[VAL_37]], %[[VAL_38]] : f32
 // CHECK:                   store %[[VAL_39]], %[[VAL_15]]{{\[}}%[[VAL_23]], %[[VAL_34]]] : memref<32x16xf32>
 // CHECK:                 } else {
 // CHECK:                   scf.if %[[VAL_5]] {
-// CHECK:                     %[[VAL_40:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_23]], %[[VAL_34]]] : memref<32x16xf32>
+// CHECK:                     %[[VAL_40:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_23]], %[[VAL_34]]] : memref<32x16xf32>
 // CHECK:                     store %[[VAL_40]], %[[VAL_15]]{{\[}}%[[VAL_23]], %[[VAL_34]]] : memref<32x16xf32>
 // CHECK:                   } else {
 // CHECK:                   }
@@ -402,13 +402,13 @@
 // CHECK:                 scf.yield %[[VAL_43]], %[[VAL_44]] : index, index
 // CHECK:               }
 // CHECK:               scf.for %[[VAL_45:.*]] = %[[VAL_46:.*]]#1 to %[[VAL_4]] step %[[VAL_7]] {
-// CHECK:                 %[[VAL_47:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_23]], %[[VAL_45]]] : memref<32x16xf32>
+// CHECK:                 %[[VAL_47:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_23]], %[[VAL_45]]] : memref<32x16xf32>
 // CHECK:                 store %[[VAL_47]], %[[VAL_15]]{{\[}}%[[VAL_23]], %[[VAL_45]]] : memref<32x16xf32>
 // CHECK:               }
 // CHECK:             } else {
 // CHECK:               scf.if %[[VAL_5]] {
 // CHECK:                 scf.for %[[VAL_48:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
-// CHECK:                   %[[VAL_49:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_23]], %[[VAL_48]]] : memref<32x16xf32>
+// CHECK:                   %[[VAL_49:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_23]], %[[VAL_48]]] : memref<32x16xf32>
 // CHECK:                   store %[[VAL_49]], %[[VAL_15]]{{\[}}%[[VAL_23]], %[[VAL_48]]] : memref<32x16xf32>
 // CHECK:                 }
 // CHECK:               } else {
@@ -422,11 +422,11 @@
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_54:.*]] = %[[VAL_55:.*]]#1 to %[[VAL_3]] step %[[VAL_7]] {
 // CHECK:             scf.for %[[VAL_56:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
-// CHECK:               %[[VAL_57:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_54]], %[[VAL_56]]] : memref<32x16xf32>
+// CHECK:               %[[VAL_57:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_54]], %[[VAL_56]]] : memref<32x16xf32>
 // CHECK:               store %[[VAL_57]], %[[VAL_15]]{{\[}}%[[VAL_54]], %[[VAL_56]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_58:.*]] = tensor_load %[[VAL_15]] : memref<32x16xf32>
+// CHECK:           %[[VAL_58:.*]] = memref.tensor_load %[[VAL_15]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_58]] : tensor<32x16xf32>
 // CHECK:         }
 func @add_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@@ -451,26 +451,26 @@
 // CHECK:           %[[VAL_7:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_4]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_4]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16xf32> to memref<?xf32>
-// CHECK:           %[[VAL_10:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16xf32>
-// CHECK:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           %[[VAL_12:.*]] = alloc() : memref<32x16xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           linalg.copy(%[[VAL_11]], %[[VAL_12]]) : memref<32x16xf32>, memref<32x16xf32>
-// CHECK:           %[[VAL_13:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_14:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_4]] {
-// CHECK:             %[[VAL_16:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_15]]] : memref<?xindex>
-// CHECK:             %[[VAL_17:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
+// CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_15]]] : memref<?xindex>
+// CHECK:             %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
 // CHECK:             %[[VAL_18:.*]] = addi %[[VAL_15]], %[[VAL_4]] : index
-// CHECK:             %[[VAL_19:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_18]]] : memref<?xindex>
+// CHECK:             %[[VAL_19:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_18]]] : memref<?xindex>
 // CHECK:             scf.for %[[VAL_20:.*]] = %[[VAL_17]] to %[[VAL_19]] step %[[VAL_4]] {
-// CHECK:               %[[VAL_21:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_20]]] : memref<?xindex>
-// CHECK:               %[[VAL_22:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref<?xf32>
-// CHECK:               %[[VAL_23:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_16]], %[[VAL_21]]] : memref<32x16xf32>
+// CHECK:               %[[VAL_21:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_20]]] : memref<?xindex>
+// CHECK:               %[[VAL_22:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref<?xf32>
+// CHECK:               %[[VAL_23:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_16]], %[[VAL_21]]] : memref<32x16xf32>
 // CHECK:               %[[VAL_24:.*]] = mulf %[[VAL_22]], %[[VAL_23]] : f32
 // CHECK:               store %[[VAL_24]], %[[VAL_12]]{{\[}}%[[VAL_16]], %[[VAL_21]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_25:.*]] = tensor_load %[[VAL_12]] : memref<32x16xf32>
+// CHECK:           %[[VAL_25:.*]] = memref.tensor_load %[[VAL_12]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_25]] : tensor<32x16xf32>
 // CHECK:         }
 func @mul_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@@ -515,13 +515,13 @@
 // CHECK:           %[[VAL_12:.*]] = linalg.sparse_pointers %[[VAL_1]], %[[VAL_4]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = linalg.sparse_indices %[[VAL_1]], %[[VAL_4]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = linalg.sparse_values %[[VAL_1]] : tensor<32x16xf32> to memref<?xf32>
-// CHECK:           %[[VAL_15:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           %[[VAL_16:.*]] = alloc() : memref<32x16xf32>
+// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_16:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           linalg.copy(%[[VAL_15]], %[[VAL_16]]) : memref<32x16xf32>, memref<32x16xf32>
-// CHECK:           %[[VAL_17:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_18:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_19:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_20:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_21:.*]]:2 = scf.while (%[[VAL_22:.*]] = %[[VAL_17]], %[[VAL_23:.*]] = %[[VAL_19]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_24:.*]] = cmpi ult, %[[VAL_22]], %[[VAL_18]] : index
 // CHECK:             %[[VAL_25:.*]] = cmpi ult, %[[VAL_23]], %[[VAL_20]] : index
@@ -529,20 +529,20 @@
 // CHECK:             scf.condition(%[[VAL_26]]) %[[VAL_22]], %[[VAL_23]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_27:.*]]: index, %[[VAL_28:.*]]: index):
-// CHECK:             %[[VAL_29:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_27]]] : memref<?xindex>
-// CHECK:             %[[VAL_30:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK:             %[[VAL_29:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:             %[[VAL_30:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<?xindex>
 // CHECK:             %[[VAL_31:.*]] = cmpi ult, %[[VAL_30]], %[[VAL_29]] : index
 // CHECK:             %[[VAL_32:.*]] = select %[[VAL_31]], %[[VAL_30]], %[[VAL_29]] : index
 // CHECK:             %[[VAL_33:.*]] = cmpi eq, %[[VAL_29]], %[[VAL_32]] : index
 // CHECK:             %[[VAL_34:.*]] = cmpi eq, %[[VAL_30]], %[[VAL_32]] : index
 // CHECK:             %[[VAL_35:.*]] = and %[[VAL_33]], %[[VAL_34]] : i1
 // CHECK:             scf.if %[[VAL_35]] {
-// CHECK:               %[[VAL_36:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:               %[[VAL_36:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_27]]] : memref<?xindex>
 // CHECK:               %[[VAL_37:.*]] = addi %[[VAL_27]], %[[VAL_4]] : index
-// CHECK:               %[[VAL_38:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_37]]] : memref<?xindex>
-// CHECK:               %[[VAL_39:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK:               %[[VAL_38:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_37]]] : memref<?xindex>
+// CHECK:               %[[VAL_39:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xindex>
 // CHECK:               %[[VAL_40:.*]] = addi %[[VAL_28]], %[[VAL_4]] : index
-// CHECK:               %[[VAL_41:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_40]]] : memref<?xindex>
+// CHECK:               %[[VAL_41:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_40]]] : memref<?xindex>
 // CHECK:               %[[VAL_42:.*]]:2 = scf.while (%[[VAL_43:.*]] = %[[VAL_36]], %[[VAL_44:.*]] = %[[VAL_39]]) : (index, index) -> (index, index) {
 // CHECK:                 %[[VAL_45:.*]] = cmpi ult, %[[VAL_43]], %[[VAL_38]] : index
 // CHECK:                 %[[VAL_46:.*]] = cmpi ult, %[[VAL_44]], %[[VAL_41]] : index
@@ -550,27 +550,27 @@
 // CHECK:                 scf.condition(%[[VAL_47]]) %[[VAL_43]], %[[VAL_44]] : index, index
 // CHECK:               } do {
 // CHECK:               ^bb0(%[[VAL_48:.*]]: index, %[[VAL_49:.*]]: index):
-// CHECK:                 %[[VAL_50:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_48]]] : memref<?xindex>
-// CHECK:                 %[[VAL_51:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_49]]] : memref<?xindex>
+// CHECK:                 %[[VAL_50:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_48]]] : memref<?xindex>
+// CHECK:                 %[[VAL_51:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_49]]] : memref<?xindex>
 // CHECK:                 %[[VAL_52:.*]] = cmpi ult, %[[VAL_51]], %[[VAL_50]] : index
 // CHECK:                 %[[VAL_53:.*]] = select %[[VAL_52]], %[[VAL_51]], %[[VAL_50]] : index
 // CHECK:                 %[[VAL_54:.*]] = cmpi eq, %[[VAL_50]], %[[VAL_53]] : index
 // CHECK:                 %[[VAL_55:.*]] = cmpi eq, %[[VAL_51]], %[[VAL_53]] : index
 // CHECK:                 %[[VAL_56:.*]] = and %[[VAL_54]], %[[VAL_55]] : i1
 // CHECK:                 scf.if %[[VAL_56]] {
-// CHECK:                   %[[VAL_57:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_48]]] : memref<?xf32>
-// CHECK:                   %[[VAL_58:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_49]]] : memref<?xf32>
+// CHECK:                   %[[VAL_57:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_48]]] : memref<?xf32>
+// CHECK:                   %[[VAL_58:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_49]]] : memref<?xf32>
 // CHECK:                   %[[VAL_59:.*]] = addf %[[VAL_57]], %[[VAL_58]] : f32
 // CHECK:                   store %[[VAL_59]], %[[VAL_16]]{{\[}}%[[VAL_32]], %[[VAL_53]]] : memref<32x16xf32>
 // CHECK:                 } else {
 // CHECK:                   %[[VAL_60:.*]] = cmpi eq, %[[VAL_50]], %[[VAL_53]] : index
 // CHECK:                   scf.if %[[VAL_60]] {
-// CHECK:                     %[[VAL_61:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_48]]] : memref<?xf32>
+// CHECK:                     %[[VAL_61:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_48]]] : memref<?xf32>
 // CHECK:                     store %[[VAL_61]], %[[VAL_16]]{{\[}}%[[VAL_32]], %[[VAL_53]]] : memref<32x16xf32>
 // CHECK:                   } else {
 // CHECK:                     %[[VAL_62:.*]] = cmpi eq, %[[VAL_51]], %[[VAL_53]] : index
 // CHECK:                     scf.if %[[VAL_62]] {
-// CHECK:                       %[[VAL_63:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_49]]] : memref<?xf32>
+// CHECK:                       %[[VAL_63:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_49]]] : memref<?xf32>
 // CHECK:                       store %[[VAL_63]], %[[VAL_16]]{{\[}}%[[VAL_32]], %[[VAL_53]]] : memref<32x16xf32>
 // CHECK:                     } else {
 // CHECK:                     }
@@ -585,35 +585,35 @@
 // CHECK:                 scf.yield %[[VAL_66]], %[[VAL_69]] : index, index
 // CHECK:               }
 // CHECK:               scf.for %[[VAL_70:.*]] = %[[VAL_71:.*]]#0 to %[[VAL_38]] step %[[VAL_4]] {
-// CHECK:                 %[[VAL_72:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_70]]] : memref<?xindex>
-// CHECK:                 %[[VAL_73:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_70]]] : memref<?xf32>
+// CHECK:                 %[[VAL_72:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_70]]] : memref<?xindex>
+// CHECK:                 %[[VAL_73:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_70]]] : memref<?xf32>
 // CHECK:                 store %[[VAL_73]], %[[VAL_16]]{{\[}}%[[VAL_32]], %[[VAL_72]]] : memref<32x16xf32>
 // CHECK:               }
 // CHECK:               scf.for %[[VAL_74:.*]] = %[[VAL_75:.*]]#1 to %[[VAL_41]] step %[[VAL_4]] {
-// CHECK:                 %[[VAL_76:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_74]]] : memref<?xindex>
-// CHECK:                 %[[VAL_77:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_74]]] : memref<?xf32>
+// CHECK:                 %[[VAL_76:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_74]]] : memref<?xindex>
+// CHECK:                 %[[VAL_77:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_74]]] : memref<?xf32>
 // CHECK:                 store %[[VAL_77]], %[[VAL_16]]{{\[}}%[[VAL_32]], %[[VAL_76]]] : memref<32x16xf32>
 // CHECK:               }
 // CHECK:             } else {
 // CHECK:               %[[VAL_78:.*]] = cmpi eq, %[[VAL_29]], %[[VAL_32]] : index
 // CHECK:               scf.if %[[VAL_78]] {
-// CHECK:                 %[[VAL_79:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:                 %[[VAL_79:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_27]]] : memref<?xindex>
 // CHECK:                 %[[VAL_80:.*]] = addi %[[VAL_27]], %[[VAL_4]] : index
-// CHECK:                 %[[VAL_81:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_80]]] : memref<?xindex>
+// CHECK:                 %[[VAL_81:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_80]]] : memref<?xindex>
 // CHECK:                 scf.for %[[VAL_82:.*]] = %[[VAL_79]] to %[[VAL_81]] step %[[VAL_4]] {
-// CHECK:                   %[[VAL_83:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_82]]] : memref<?xindex>
-// CHECK:                   %[[VAL_84:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_82]]] : memref<?xf32>
+// CHECK:                   %[[VAL_83:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_82]]] : memref<?xindex>
+// CHECK:                   %[[VAL_84:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_82]]] : memref<?xf32>
 // CHECK:                   store %[[VAL_84]], %[[VAL_16]]{{\[}}%[[VAL_32]], %[[VAL_83]]] : memref<32x16xf32>
 // CHECK:                 }
 // CHECK:               } else {
 // CHECK:                 %[[VAL_85:.*]] = cmpi eq, %[[VAL_30]], %[[VAL_32]] : index
 // CHECK:                 scf.if %[[VAL_85]] {
-// CHECK:                   %[[VAL_86:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK:                   %[[VAL_86:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xindex>
 // CHECK:                   %[[VAL_87:.*]] = addi %[[VAL_28]], %[[VAL_4]] : index
-// CHECK:                   %[[VAL_88:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_87]]] : memref<?xindex>
+// CHECK:                   %[[VAL_88:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_87]]] : memref<?xindex>
 // CHECK:                   scf.for %[[VAL_89:.*]] = %[[VAL_86]] to %[[VAL_88]] step %[[VAL_4]] {
-// CHECK:                     %[[VAL_90:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_89]]] : memref<?xindex>
-// CHECK:                     %[[VAL_91:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_89]]] : memref<?xf32>
+// CHECK:                     %[[VAL_90:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_89]]] : memref<?xindex>
+// CHECK:                     %[[VAL_91:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_89]]] : memref<?xf32>
 // CHECK:                     store %[[VAL_91]], %[[VAL_16]]{{\[}}%[[VAL_32]], %[[VAL_90]]] : memref<32x16xf32>
 // CHECK:                   }
 // CHECK:                 } else {
@@ -629,28 +629,28 @@
 // CHECK:             scf.yield %[[VAL_94]], %[[VAL_97]] : index, index
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_98:.*]] = %[[VAL_99:.*]]#0 to %[[VAL_18]] step %[[VAL_4]] {
-// CHECK:             %[[VAL_100:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_98]]] : memref<?xindex>
-// CHECK:             %[[VAL_101:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_98]]] : memref<?xindex>
+// CHECK:             %[[VAL_100:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_98]]] : memref<?xindex>
+// CHECK:             %[[VAL_101:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_98]]] : memref<?xindex>
 // CHECK:             %[[VAL_102:.*]] = addi %[[VAL_98]], %[[VAL_4]] : index
-// CHECK:             %[[VAL_103:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_102]]] : memref<?xindex>
+// CHECK:             %[[VAL_103:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_102]]] : memref<?xindex>
 // CHECK:             scf.for %[[VAL_104:.*]] = %[[VAL_101]] to %[[VAL_103]] step %[[VAL_4]] {
-// CHECK:               %[[VAL_105:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_104]]] : memref<?xindex>
-// CHECK:               %[[VAL_106:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_104]]] : memref<?xf32>
+// CHECK:               %[[VAL_105:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_104]]] : memref<?xindex>
+// CHECK:               %[[VAL_106:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_104]]] : memref<?xf32>
 // CHECK:               store %[[VAL_106]], %[[VAL_16]]{{\[}}%[[VAL_100]], %[[VAL_105]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_107:.*]] = %[[VAL_108:.*]]#1 to %[[VAL_20]] step %[[VAL_4]] {
-// CHECK:             %[[VAL_109:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_107]]] : memref<?xindex>
-// CHECK:             %[[VAL_110:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_107]]] : memref<?xindex>
+// CHECK:             %[[VAL_109:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_107]]] : memref<?xindex>
+// CHECK:             %[[VAL_110:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_107]]] : memref<?xindex>
 // CHECK:             %[[VAL_111:.*]] = addi %[[VAL_107]], %[[VAL_4]] : index
-// CHECK:             %[[VAL_112:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_111]]] : memref<?xindex>
+// CHECK:             %[[VAL_112:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_111]]] : memref<?xindex>
 // CHECK:             scf.for %[[VAL_113:.*]] = %[[VAL_110]] to %[[VAL_112]] step %[[VAL_4]] {
-// CHECK:               %[[VAL_114:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_113]]] : memref<?xindex>
-// CHECK:               %[[VAL_115:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_113]]] : memref<?xf32>
+// CHECK:               %[[VAL_114:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_113]]] : memref<?xindex>
+// CHECK:               %[[VAL_115:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_113]]] : memref<?xf32>
 // CHECK:               store %[[VAL_115]], %[[VAL_16]]{{\[}}%[[VAL_109]], %[[VAL_114]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_116:.*]] = tensor_load %[[VAL_16]] : memref<32x16xf32>
+// CHECK:           %[[VAL_116:.*]] = memref.tensor_load %[[VAL_16]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_116]] : tensor<32x16xf32>
 // CHECK:         }
 func @add_ss_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@@ -680,13 +680,13 @@
 // CHECK:           %[[VAL_12:.*]] = linalg.sparse_pointers %[[VAL_1]], %[[VAL_4]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = linalg.sparse_indices %[[VAL_1]], %[[VAL_4]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = linalg.sparse_values %[[VAL_1]] : tensor<32x16xf32> to memref<?xf32>
-// CHECK:           %[[VAL_15:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           %[[VAL_16:.*]] = alloc() : memref<32x16xf32>
+// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_16:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           linalg.copy(%[[VAL_15]], %[[VAL_16]]) : memref<32x16xf32>, memref<32x16xf32>
-// CHECK:           %[[VAL_17:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_18:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_19:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_20:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_21:.*]]:2 = scf.while (%[[VAL_22:.*]] = %[[VAL_17]], %[[VAL_23:.*]] = %[[VAL_19]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_24:.*]] = cmpi ult, %[[VAL_22]], %[[VAL_18]] : index
 // CHECK:             %[[VAL_25:.*]] = cmpi ult, %[[VAL_23]], %[[VAL_20]] : index
@@ -694,20 +694,20 @@
 // CHECK:             scf.condition(%[[VAL_26]]) %[[VAL_22]], %[[VAL_23]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_27:.*]]: index, %[[VAL_28:.*]]: index):
-// CHECK:             %[[VAL_29:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_27]]] : memref<?xindex>
-// CHECK:             %[[VAL_30:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK:             %[[VAL_29:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:             %[[VAL_30:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<?xindex>
 // CHECK:             %[[VAL_31:.*]] = cmpi ult, %[[VAL_30]], %[[VAL_29]] : index
 // CHECK:             %[[VAL_32:.*]] = select %[[VAL_31]], %[[VAL_30]], %[[VAL_29]] : index
 // CHECK:             %[[VAL_33:.*]] = cmpi eq, %[[VAL_29]], %[[VAL_32]] : index
 // CHECK:             %[[VAL_34:.*]] = cmpi eq, %[[VAL_30]], %[[VAL_32]] : index
 // CHECK:             %[[VAL_35:.*]] = and %[[VAL_33]], %[[VAL_34]] : i1
 // CHECK:             scf.if %[[VAL_35]] {
-// CHECK:               %[[VAL_36:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:               %[[VAL_36:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_27]]] : memref<?xindex>
 // CHECK:               %[[VAL_37:.*]] = addi %[[VAL_27]], %[[VAL_4]] : index
-// CHECK:               %[[VAL_38:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_37]]] : memref<?xindex>
-// CHECK:               %[[VAL_39:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK:               %[[VAL_38:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_37]]] : memref<?xindex>
+// CHECK:               %[[VAL_39:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xindex>
 // CHECK:               %[[VAL_40:.*]] = addi %[[VAL_28]], %[[VAL_4]] : index
-// CHECK:               %[[VAL_41:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_40]]] : memref<?xindex>
+// CHECK:               %[[VAL_41:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_40]]] : memref<?xindex>
 // CHECK:               %[[VAL_42:.*]]:2 = scf.while (%[[VAL_43:.*]] = %[[VAL_36]], %[[VAL_44:.*]] = %[[VAL_39]]) : (index, index) -> (index, index) {
 // CHECK:                 %[[VAL_45:.*]] = cmpi ult, %[[VAL_43]], %[[VAL_38]] : index
 // CHECK:                 %[[VAL_46:.*]] = cmpi ult, %[[VAL_44]], %[[VAL_41]] : index
@@ -715,16 +715,16 @@
 // CHECK:                 scf.condition(%[[VAL_47]]) %[[VAL_43]], %[[VAL_44]] : index, index
 // CHECK:               } do {
 // CHECK:               ^bb0(%[[VAL_48:.*]]: index, %[[VAL_49:.*]]: index):
-// CHECK:                 %[[VAL_50:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_48]]] : memref<?xindex>
-// CHECK:                 %[[VAL_51:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_49]]] : memref<?xindex>
+// CHECK:                 %[[VAL_50:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_48]]] : memref<?xindex>
+// CHECK:                 %[[VAL_51:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_49]]] : memref<?xindex>
 // CHECK:                 %[[VAL_52:.*]] = cmpi ult, %[[VAL_51]], %[[VAL_50]] : index
 // CHECK:                 %[[VAL_53:.*]] = select %[[VAL_52]], %[[VAL_51]], %[[VAL_50]] : index
 // CHECK:                 %[[VAL_54:.*]] = cmpi eq, %[[VAL_50]], %[[VAL_53]] : index
 // CHECK:                 %[[VAL_55:.*]] = cmpi eq, %[[VAL_51]], %[[VAL_53]] : index
 // CHECK:                 %[[VAL_56:.*]] = and %[[VAL_54]], %[[VAL_55]] : i1
 // CHECK:                 scf.if %[[VAL_56]] {
-// CHECK:                   %[[VAL_57:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_48]]] : memref<?xf32>
-// CHECK:                   %[[VAL_58:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_49]]] : memref<?xf32>
+// CHECK:                   %[[VAL_57:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_48]]] : memref<?xf32>
+// CHECK:                   %[[VAL_58:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_49]]] : memref<?xf32>
 // CHECK:                   %[[VAL_59:.*]] = mulf %[[VAL_57]], %[[VAL_58]] : f32
 // CHECK:                   store %[[VAL_59]], %[[VAL_16]]{{\[}}%[[VAL_32]], %[[VAL_53]]] : memref<32x16xf32>
 // CHECK:                 } else {
@@ -747,7 +747,7 @@
 // CHECK:             %[[VAL_71:.*]] = select %[[VAL_69]], %[[VAL_70]], %[[VAL_28]] : index
 // CHECK:             scf.yield %[[VAL_68]], %[[VAL_71]] : index, index
 // CHECK:           }
-// CHECK:           %[[VAL_72:.*]] = tensor_load %[[VAL_16]] : memref<32x16xf32>
+// CHECK:           %[[VAL_72:.*]] = memref.tensor_load %[[VAL_16]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_72]] : tensor<32x16xf32>
 // CHECK:         }
 func @mul_ss_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@@ -792,13 +792,13 @@
 // CHECK:           %[[VAL_12:.*]] = linalg.sparse_pointers %[[VAL_1]], %[[VAL_4]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = linalg.sparse_indices %[[VAL_1]], %[[VAL_4]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = linalg.sparse_values %[[VAL_1]] : tensor<32x16xf32> to memref<?xf32>
-// CHECK:           %[[VAL_15:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           %[[VAL_16:.*]] = alloc() : memref<32x16xf32>
+// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_16:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           linalg.copy(%[[VAL_15]], %[[VAL_16]]) : memref<32x16xf32>, memref<32x16xf32>
-// CHECK:           %[[VAL_17:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_18:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_19:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_20:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_21:.*]]:2 = scf.while (%[[VAL_22:.*]] = %[[VAL_17]], %[[VAL_23:.*]] = %[[VAL_19]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_24:.*]] = cmpi ult, %[[VAL_22]], %[[VAL_18]] : index
 // CHECK:             %[[VAL_25:.*]] = cmpi ult, %[[VAL_23]], %[[VAL_20]] : index
@@ -806,20 +806,20 @@
 // CHECK:             scf.condition(%[[VAL_26]]) %[[VAL_22]], %[[VAL_23]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_27:.*]]: index, %[[VAL_28:.*]]: index):
-// CHECK:             %[[VAL_29:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_27]]] : memref<?xindex>
-// CHECK:             %[[VAL_30:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK:             %[[VAL_29:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:             %[[VAL_30:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<?xindex>
 // CHECK:             %[[VAL_31:.*]] = cmpi ult, %[[VAL_30]], %[[VAL_29]] : index
 // CHECK:             %[[VAL_32:.*]] = select %[[VAL_31]], %[[VAL_30]], %[[VAL_29]] : index
 // CHECK:             %[[VAL_33:.*]] = cmpi eq, %[[VAL_29]], %[[VAL_32]] : index
 // CHECK:             %[[VAL_34:.*]] = cmpi eq, %[[VAL_30]], %[[VAL_32]] : index
 // CHECK:             %[[VAL_35:.*]] = and %[[VAL_33]], %[[VAL_34]] : i1
 // CHECK:             scf.if %[[VAL_35]] {
-// CHECK:               %[[VAL_36:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:               %[[VAL_36:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_27]]] : memref<?xindex>
 // CHECK:               %[[VAL_37:.*]] = addi %[[VAL_27]], %[[VAL_4]] : index
-// CHECK:               %[[VAL_38:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_37]]] : memref<?xindex>
-// CHECK:               %[[VAL_39:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK:               %[[VAL_38:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_37]]] : memref<?xindex>
+// CHECK:               %[[VAL_39:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xindex>
 // CHECK:               %[[VAL_40:.*]] = addi %[[VAL_28]], %[[VAL_4]] : index
-// CHECK:               %[[VAL_41:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_40]]] : memref<?xindex>
+// CHECK:               %[[VAL_41:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_40]]] : memref<?xindex>
 // CHECK:               %[[VAL_42:.*]]:2 = scf.while (%[[VAL_43:.*]] = %[[VAL_36]], %[[VAL_44:.*]] = %[[VAL_39]]) : (index, index) -> (index, index) {
 // CHECK:                 %[[VAL_45:.*]] = cmpi ult, %[[VAL_43]], %[[VAL_38]] : index
 // CHECK:                 %[[VAL_46:.*]] = cmpi ult, %[[VAL_44]], %[[VAL_41]] : index
@@ -827,27 +827,27 @@
 // CHECK:                 scf.condition(%[[VAL_47]]) %[[VAL_43]], %[[VAL_44]] : index, index
 // CHECK:               } do {
 // CHECK:               ^bb0(%[[VAL_48:.*]]: index, %[[VAL_49:.*]]: index):
-// CHECK:                 %[[VAL_50:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_48]]] : memref<?xindex>
-// CHECK:                 %[[VAL_51:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_49]]] : memref<?xindex>
+// CHECK:                 %[[VAL_50:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_48]]] : memref<?xindex>
+// CHECK:                 %[[VAL_51:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_49]]] : memref<?xindex>
 // CHECK:                 %[[VAL_52:.*]] = cmpi ult, %[[VAL_51]], %[[VAL_50]] : index
 // CHECK:                 %[[VAL_53:.*]] = select %[[VAL_52]], %[[VAL_51]], %[[VAL_50]] : index
 // CHECK:                 %[[VAL_54:.*]] = cmpi eq, %[[VAL_50]], %[[VAL_53]] : index
 // CHECK:                 %[[VAL_55:.*]] = cmpi eq, %[[VAL_51]], %[[VAL_53]] : index
 // CHECK:                 %[[VAL_56:.*]] = and %[[VAL_54]], %[[VAL_55]] : i1
 // CHECK:                 scf.if %[[VAL_56]] {
-// CHECK:                   %[[VAL_57:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_48]]] : memref<?xf32>
-// CHECK:                   %[[VAL_58:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_49]]] : memref<?xf32>
+// CHECK:                   %[[VAL_57:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_48]]] : memref<?xf32>
+// CHECK:                   %[[VAL_58:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_49]]] : memref<?xf32>
 // CHECK:                   %[[VAL_59:.*]] = addf %[[VAL_57]], %[[VAL_58]] : f32
 // CHECK:                   store %[[VAL_59]], %[[VAL_16]]{{\[}}%[[VAL_32]], %[[VAL_53]]] : memref<32x16xf32>
 // CHECK:                 } else {
 // CHECK:                   %[[VAL_60:.*]] = cmpi eq, %[[VAL_50]], %[[VAL_53]] : index
 // CHECK:                   scf.if %[[VAL_60]] {
-// CHECK:                     %[[VAL_61:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_48]]] : memref<?xf32>
+// CHECK:                     %[[VAL_61:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_48]]] : memref<?xf32>
 // CHECK:                     store %[[VAL_61]], %[[VAL_16]]{{\[}}%[[VAL_32]], %[[VAL_53]]] : memref<32x16xf32>
 // CHECK:                   } else {
 // CHECK:                     %[[VAL_62:.*]] = cmpi eq, %[[VAL_51]], %[[VAL_53]] : index
 // CHECK:                     scf.if %[[VAL_62]] {
-// CHECK:                       %[[VAL_63:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_49]]] : memref<?xf32>
+// CHECK:                       %[[VAL_63:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_49]]] : memref<?xf32>
 // CHECK:                       store %[[VAL_63]], %[[VAL_16]]{{\[}}%[[VAL_32]], %[[VAL_53]]] : memref<32x16xf32>
 // CHECK:                     } else {
 // CHECK:                     }
@@ -862,35 +862,35 @@
 // CHECK:                 scf.yield %[[VAL_66]], %[[VAL_69]] : index, index
 // CHECK:               }
 // CHECK:               scf.for %[[VAL_70:.*]] = %[[VAL_71:.*]]#0 to %[[VAL_38]] step %[[VAL_4]] {
-// CHECK:                 %[[VAL_72:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_70]]] : memref<?xindex>
-// CHECK:                 %[[VAL_73:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_70]]] : memref<?xf32>
+// CHECK:                 %[[VAL_72:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_70]]] : memref<?xindex>
+// CHECK:                 %[[VAL_73:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_70]]] : memref<?xf32>
 // CHECK:                 store %[[VAL_73]], %[[VAL_16]]{{\[}}%[[VAL_32]], %[[VAL_72]]] : memref<32x16xf32>
 // CHECK:               }
 // CHECK:               scf.for %[[VAL_74:.*]] = %[[VAL_75:.*]]#1 to %[[VAL_41]] step %[[VAL_4]] {
-// CHECK:                 %[[VAL_76:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_74]]] : memref<?xindex>
-// CHECK:                 %[[VAL_77:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_74]]] : memref<?xf32>
+// CHECK:                 %[[VAL_76:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_74]]] : memref<?xindex>
+// CHECK:                 %[[VAL_77:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_74]]] : memref<?xf32>
 // CHECK:                 store %[[VAL_77]], %[[VAL_16]]{{\[}}%[[VAL_32]], %[[VAL_76]]] : memref<32x16xf32>
 // CHECK:               }
 // CHECK:             } else {
 // CHECK:               %[[VAL_78:.*]] = cmpi eq, %[[VAL_29]], %[[VAL_32]] : index
 // CHECK:               scf.if %[[VAL_78]] {
-// CHECK:                 %[[VAL_79:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:                 %[[VAL_79:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_27]]] : memref<?xindex>
 // CHECK:                 %[[VAL_80:.*]] = addi %[[VAL_27]], %[[VAL_4]] : index
-// CHECK:                 %[[VAL_81:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_80]]] : memref<?xindex>
+// CHECK:                 %[[VAL_81:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_80]]] : memref<?xindex>
 // CHECK:                 scf.for %[[VAL_82:.*]] = %[[VAL_79]] to %[[VAL_81]] step %[[VAL_4]] {
-// CHECK:                   %[[VAL_83:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_82]]] : memref<?xindex>
-// CHECK:                   %[[VAL_84:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_82]]] : memref<?xf32>
+// CHECK:                   %[[VAL_83:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_82]]] : memref<?xindex>
+// CHECK:                   %[[VAL_84:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_82]]] : memref<?xf32>
 // CHECK:                   store %[[VAL_84]], %[[VAL_16]]{{\[}}%[[VAL_32]], %[[VAL_83]]] : memref<32x16xf32>
 // CHECK:                 }
 // CHECK:               } else {
 // CHECK:                 %[[VAL_85:.*]] = cmpi eq, %[[VAL_30]], %[[VAL_32]] : index
 // CHECK:                 scf.if %[[VAL_85]] {
-// CHECK:                   %[[VAL_86:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK:                   %[[VAL_86:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xindex>
 // CHECK:                   %[[VAL_87:.*]] = addi %[[VAL_28]], %[[VAL_4]] : index
-// CHECK:                   %[[VAL_88:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_87]]] : memref<?xindex>
+// CHECK:                   %[[VAL_88:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_87]]] : memref<?xindex>
 // CHECK:                   scf.for %[[VAL_89:.*]] = %[[VAL_86]] to %[[VAL_88]] step %[[VAL_4]] {
-// CHECK:                     %[[VAL_90:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_89]]] : memref<?xindex>
-// CHECK:                     %[[VAL_91:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_89]]] : memref<?xf32>
+// CHECK:                     %[[VAL_90:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_89]]] : memref<?xindex>
+// CHECK:                     %[[VAL_91:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_89]]] : memref<?xf32>
 // CHECK:                     store %[[VAL_91]], %[[VAL_16]]{{\[}}%[[VAL_32]], %[[VAL_90]]] : memref<32x16xf32>
 // CHECK:                   }
 // CHECK:                 } else {
@@ -906,28 +906,28 @@
 // CHECK:             scf.yield %[[VAL_94]], %[[VAL_97]] : index, index
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_98:.*]] = %[[VAL_99:.*]]#0 to %[[VAL_18]] step %[[VAL_4]] {
-// CHECK:             %[[VAL_100:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_98]]] : memref<?xindex>
-// CHECK:             %[[VAL_101:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_98]]] : memref<?xindex>
+// CHECK:             %[[VAL_100:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_98]]] : memref<?xindex>
+// CHECK:             %[[VAL_101:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_98]]] : memref<?xindex>
 // CHECK:             %[[VAL_102:.*]] = addi %[[VAL_98]], %[[VAL_4]] : index
-// CHECK:             %[[VAL_103:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_102]]] : memref<?xindex>
+// CHECK:             %[[VAL_103:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_102]]] : memref<?xindex>
 // CHECK:             scf.for %[[VAL_104:.*]] = %[[VAL_101]] to %[[VAL_103]] step %[[VAL_4]] {
-// CHECK:               %[[VAL_105:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_104]]] : memref<?xindex>
-// CHECK:               %[[VAL_106:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_104]]] : memref<?xf32>
+// CHECK:               %[[VAL_105:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_104]]] : memref<?xindex>
+// CHECK:               %[[VAL_106:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_104]]] : memref<?xf32>
 // CHECK:               store %[[VAL_106]], %[[VAL_16]]{{\[}}%[[VAL_100]], %[[VAL_105]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_107:.*]] = %[[VAL_108:.*]]#1 to %[[VAL_20]] step %[[VAL_4]] {
-// CHECK:             %[[VAL_109:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_107]]] : memref<?xindex>
-// CHECK:             %[[VAL_110:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_107]]] : memref<?xindex>
+// CHECK:             %[[VAL_109:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_107]]] : memref<?xindex>
+// CHECK:             %[[VAL_110:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_107]]] : memref<?xindex>
 // CHECK:             %[[VAL_111:.*]] = addi %[[VAL_107]], %[[VAL_4]] : index
-// CHECK:             %[[VAL_112:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_111]]] : memref<?xindex>
+// CHECK:             %[[VAL_112:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_111]]] : memref<?xindex>
 // CHECK:             scf.for %[[VAL_113:.*]] = %[[VAL_110]] to %[[VAL_112]] step %[[VAL_4]] {
-// CHECK:               %[[VAL_114:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_113]]] : memref<?xindex>
-// CHECK:               %[[VAL_115:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_113]]] : memref<?xf32>
+// CHECK:               %[[VAL_114:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_113]]] : memref<?xindex>
+// CHECK:               %[[VAL_115:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_113]]] : memref<?xf32>
 // CHECK:               store %[[VAL_115]], %[[VAL_16]]{{\[}}%[[VAL_109]], %[[VAL_114]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_116:.*]] = tensor_load %[[VAL_16]] : memref<32x16xf32>
+// CHECK:           %[[VAL_116:.*]] = memref.tensor_load %[[VAL_16]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_116]] : tensor<32x16xf32>
 // CHECK:         }
 func @add_sd_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@@ -957,13 +957,13 @@
 // CHECK:           %[[VAL_12:.*]] = linalg.sparse_pointers %[[VAL_1]], %[[VAL_4]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = linalg.sparse_indices %[[VAL_1]], %[[VAL_4]] : tensor<32x16xf32> to memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = linalg.sparse_values %[[VAL_1]] : tensor<32x16xf32> to memref<?xf32>
-// CHECK:           %[[VAL_15:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           %[[VAL_16:.*]] = alloc() : memref<32x16xf32>
+// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_16:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           linalg.copy(%[[VAL_15]], %[[VAL_16]]) : memref<32x16xf32>, memref<32x16xf32>
-// CHECK:           %[[VAL_17:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_18:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_19:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_20:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_21:.*]]:2 = scf.while (%[[VAL_22:.*]] = %[[VAL_17]], %[[VAL_23:.*]] = %[[VAL_19]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_24:.*]] = cmpi ult, %[[VAL_22]], %[[VAL_18]] : index
 // CHECK:             %[[VAL_25:.*]] = cmpi ult, %[[VAL_23]], %[[VAL_20]] : index
@@ -971,20 +971,20 @@
 // CHECK:             scf.condition(%[[VAL_26]]) %[[VAL_22]], %[[VAL_23]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_27:.*]]: index, %[[VAL_28:.*]]: index):
-// CHECK:             %[[VAL_29:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_27]]] : memref<?xindex>
-// CHECK:             %[[VAL_30:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK:             %[[VAL_29:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:             %[[VAL_30:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<?xindex>
 // CHECK:             %[[VAL_31:.*]] = cmpi ult, %[[VAL_30]], %[[VAL_29]] : index
 // CHECK:             %[[VAL_32:.*]] = select %[[VAL_31]], %[[VAL_30]], %[[VAL_29]] : index
 // CHECK:             %[[VAL_33:.*]] = cmpi eq, %[[VAL_29]], %[[VAL_32]] : index
 // CHECK:             %[[VAL_34:.*]] = cmpi eq, %[[VAL_30]], %[[VAL_32]] : index
 // CHECK:             %[[VAL_35:.*]] = and %[[VAL_33]], %[[VAL_34]] : i1
 // CHECK:             scf.if %[[VAL_35]] {
-// CHECK:               %[[VAL_36:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:               %[[VAL_36:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_27]]] : memref<?xindex>
 // CHECK:               %[[VAL_37:.*]] = addi %[[VAL_27]], %[[VAL_4]] : index
-// CHECK:               %[[VAL_38:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_37]]] : memref<?xindex>
-// CHECK:               %[[VAL_39:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK:               %[[VAL_38:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_37]]] : memref<?xindex>
+// CHECK:               %[[VAL_39:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xindex>
 // CHECK:               %[[VAL_40:.*]] = addi %[[VAL_28]], %[[VAL_4]] : index
-// CHECK:               %[[VAL_41:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_40]]] : memref<?xindex>
+// CHECK:               %[[VAL_41:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_40]]] : memref<?xindex>
 // CHECK:               %[[VAL_42:.*]]:2 = scf.while (%[[VAL_43:.*]] = %[[VAL_36]], %[[VAL_44:.*]] = %[[VAL_39]]) : (index, index) -> (index, index) {
 // CHECK:                 %[[VAL_45:.*]] = cmpi ult, %[[VAL_43]], %[[VAL_38]] : index
 // CHECK:                 %[[VAL_46:.*]] = cmpi ult, %[[VAL_44]], %[[VAL_41]] : index
@@ -992,16 +992,16 @@
 // CHECK:                 scf.condition(%[[VAL_47]]) %[[VAL_43]], %[[VAL_44]] : index, index
 // CHECK:               } do {
 // CHECK:               ^bb0(%[[VAL_48:.*]]: index, %[[VAL_49:.*]]: index):
-// CHECK:                 %[[VAL_50:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_48]]] : memref<?xindex>
-// CHECK:                 %[[VAL_51:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_49]]] : memref<?xindex>
+// CHECK:                 %[[VAL_50:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_48]]] : memref<?xindex>
+// CHECK:                 %[[VAL_51:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_49]]] : memref<?xindex>
 // CHECK:                 %[[VAL_52:.*]] = cmpi ult, %[[VAL_51]], %[[VAL_50]] : index
 // CHECK:                 %[[VAL_53:.*]] = select %[[VAL_52]], %[[VAL_51]], %[[VAL_50]] : index
 // CHECK:                 %[[VAL_54:.*]] = cmpi eq, %[[VAL_50]], %[[VAL_53]] : index
 // CHECK:                 %[[VAL_55:.*]] = cmpi eq, %[[VAL_51]], %[[VAL_53]] : index
 // CHECK:                 %[[VAL_56:.*]] = and %[[VAL_54]], %[[VAL_55]] : i1
 // CHECK:                 scf.if %[[VAL_56]] {
-// CHECK:                   %[[VAL_57:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_48]]] : memref<?xf32>
-// CHECK:                   %[[VAL_58:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_49]]] : memref<?xf32>
+// CHECK:                   %[[VAL_57:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_48]]] : memref<?xf32>
+// CHECK:                   %[[VAL_58:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_49]]] : memref<?xf32>
 // CHECK:                   %[[VAL_59:.*]] = mulf %[[VAL_57]], %[[VAL_58]] : f32
 // CHECK:                   store %[[VAL_59]], %[[VAL_16]]{{\[}}%[[VAL_32]], %[[VAL_53]]] : memref<32x16xf32>
 // CHECK:                 } else {
@@ -1024,7 +1024,7 @@
 // CHECK:             %[[VAL_71:.*]] = select %[[VAL_69]], %[[VAL_70]], %[[VAL_28]] : index
 // CHECK:             scf.yield %[[VAL_68]], %[[VAL_71]] : index, index
 // CHECK:           }
-// CHECK:           %[[VAL_72:.*]] = tensor_load %[[VAL_16]] : memref<32x16xf32>
+// CHECK:           %[[VAL_72:.*]] = memref.tensor_load %[[VAL_16]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_72]] : tensor<32x16xf32>
 // CHECK:         }
 func @mul_sd_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@@ -1063,26 +1063,26 @@
 // CHECK:           %[[VAL_6:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_5]] : tensor<16x32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_5]] : tensor<16x32xf32> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<16x32xf32> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = tensor_to_memref %[[VAL_1]] : memref<32xf32>
-// CHECK:           %[[VAL_10:.*]] = tensor_to_memref %[[VAL_2]] : memref<16xf32>
-// CHECK:           %[[VAL_11:.*]] = alloc() : memref<16xf32>
+// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<16xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.alloc() : memref<16xf32>
 // CHECK:           linalg.copy(%[[VAL_10]], %[[VAL_11]]) : memref<16xf32>, memref<16xf32>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
-// CHECK:             %[[VAL_13:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
+// CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
 // CHECK:             %[[VAL_14:.*]] = addi %[[VAL_12]], %[[VAL_5]] : index
-// CHECK:             %[[VAL_15:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref<?xindex>
-// CHECK:             %[[VAL_16:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_12]]] : memref<16xf32>
+// CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref<?xindex>
+// CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_12]]] : memref<16xf32>
 // CHECK:             %[[VAL_17:.*]] = scf.for %[[VAL_18:.*]] = %[[VAL_13]] to %[[VAL_15]] step %[[VAL_5]] iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f32) {
-// CHECK:               %[[VAL_20:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_18]]] : memref<?xindex>
-// CHECK:               %[[VAL_21:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref<?xf32>
-// CHECK:               %[[VAL_22:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref<32xf32>
+// CHECK:               %[[VAL_20:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_18]]] : memref<?xindex>
+// CHECK:               %[[VAL_21:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref<?xf32>
+// CHECK:               %[[VAL_22:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref<32xf32>
 // CHECK:               %[[VAL_23:.*]] = mulf %[[VAL_21]], %[[VAL_22]] : f32
 // CHECK:               %[[VAL_24:.*]] = addf %[[VAL_23]], %[[VAL_19]] : f32
 // CHECK:               scf.yield %[[VAL_24]] : f32
 // CHECK:             }
 // CHECK:             store %[[VAL_25:.*]], %[[VAL_11]]{{\[}}%[[VAL_12]]] : memref<16xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_26:.*]] = tensor_load %[[VAL_11]] : memref<16xf32>
+// CHECK:           %[[VAL_26:.*]] = memref.tensor_load %[[VAL_11]] : memref<16xf32>
 // CHECK:           return %[[VAL_26]] : tensor<16xf32>
 // CHECK:         }
 func @matvec(%argA: tensor<16x32xf32>, %argb: tensor<32xf32>, %argx: tensor<16xf32>) -> tensor<16xf32> {
@@ -1118,22 +1118,22 @@
 // CHECK:           %[[VAL_4:.*]] = constant 1 : index
 // CHECK:           %[[VAL_5:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_4]] : tensor<10x20xf32> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<10x20xf32> to memref<?xf32>
-// CHECK:           %[[VAL_7:.*]] = tensor_to_memref %[[VAL_1]] : memref<f32>
-// CHECK:           %[[VAL_8:.*]] = alloc() : memref<f32>
+// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_1]] : memref<f32>
+// CHECK:           %[[VAL_8:.*]] = memref.alloc() : memref<f32>
 // CHECK:           linalg.copy(%[[VAL_7]], %[[VAL_8]]) : memref<f32>, memref<f32>
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_3]] to %[[VAL_2]] step %[[VAL_4]] {
-// CHECK:             %[[VAL_10:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_9]]] : memref<?xindex>
+// CHECK:             %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_9]]] : memref<?xindex>
 // CHECK:             %[[VAL_11:.*]] = addi %[[VAL_9]], %[[VAL_4]] : index
-// CHECK:             %[[VAL_12:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_11]]] : memref<?xindex>
-// CHECK:             %[[VAL_13:.*]] = load %[[VAL_8]][] : memref<f32>
+// CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_11]]] : memref<?xindex>
+// CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_8]][] : memref<f32>
 // CHECK:             %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_10]] to %[[VAL_12]] step %[[VAL_4]] iter_args(%[[VAL_16:.*]] = %[[VAL_13]]) -> (f32) {
-// CHECK:               %[[VAL_17:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_15]]] : memref<?xf32>
+// CHECK:               %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_15]]] : memref<?xf32>
 // CHECK:               %[[VAL_18:.*]] = addf %[[VAL_16]], %[[VAL_17]] : f32
 // CHECK:               scf.yield %[[VAL_18]] : f32
 // CHECK:             }
 // CHECK:             store %[[VAL_19:.*]], %[[VAL_8]][] : memref<f32>
 // CHECK:           }
-// CHECK:           %[[VAL_20:.*]] = tensor_load %[[VAL_8]] : memref<f32>
+// CHECK:           %[[VAL_20:.*]] = memref.tensor_load %[[VAL_8]] : memref<f32>
 // CHECK:           return %[[VAL_20]] : tensor<f32>
 // CHECK:         }
 func @sum_reduction(%arga: tensor<10x20xf32>, %argx: tensor<f32>) -> tensor<f32> {
@@ -1169,23 +1169,23 @@
 // CHECK:           %[[VAL_5:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_4]] : tensor<?x?xf64> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_4]] : tensor<?x?xf64> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<?x?xf64> to memref<?xf64>
-// CHECK:           %[[VAL_8:.*]] = dim %[[VAL_1]], %[[VAL_3]] : tensor<?x?xf64>
-// CHECK:           %[[VAL_9:.*]] = dim %[[VAL_1]], %[[VAL_4]] : tensor<?x?xf64>
-// CHECK:           %[[VAL_10:.*]] = tensor_to_memref %[[VAL_1]] : memref<?x?xf64>
-// CHECK:           %[[VAL_11:.*]] = alloc(%[[VAL_8]], %[[VAL_9]]) : memref<?x?xf64>
+// CHECK:           %[[VAL_8:.*]] = memref.dim %[[VAL_1]], %[[VAL_3]] : tensor<?x?xf64>
+// CHECK:           %[[VAL_9:.*]] = memref.dim %[[VAL_1]], %[[VAL_4]] : tensor<?x?xf64>
+// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<?x?xf64>
+// CHECK:           %[[VAL_11:.*]] = memref.alloc(%[[VAL_8]], %[[VAL_9]]) : memref<?x?xf64>
 // CHECK:           linalg.copy(%[[VAL_10]], %[[VAL_11]]) : memref<?x?xf64>, memref<?x?xf64>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_4]] {
-// CHECK:             %[[VAL_13:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_12]]] : memref<?xindex>
+// CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_12]]] : memref<?xindex>
 // CHECK:             %[[VAL_14:.*]] = addi %[[VAL_12]], %[[VAL_4]] : index
-// CHECK:             %[[VAL_15:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_14]]] : memref<?xindex>
+// CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_14]]] : memref<?xindex>
 // CHECK:             scf.for %[[VAL_16:.*]] = %[[VAL_13]] to %[[VAL_15]] step %[[VAL_4]] {
-// CHECK:               %[[VAL_17:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_16]]] : memref<?xindex>
-// CHECK:               %[[VAL_18:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_16]]] : memref<?xf64>
+// CHECK:               %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_16]]] : memref<?xindex>
+// CHECK:               %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_16]]] : memref<?xf64>
 // CHECK:               %[[VAL_19:.*]] = mulf %[[VAL_18]], %[[VAL_2]] : f64
 // CHECK:               store %[[VAL_19]], %[[VAL_11]]{{\[}}%[[VAL_12]], %[[VAL_17]]] : memref<?x?xf64>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_20:.*]] = tensor_load %[[VAL_11]] : memref<?x?xf64>
+// CHECK:           %[[VAL_20:.*]] = memref.tensor_load %[[VAL_11]] : memref<?x?xf64>
 // CHECK:           return %[[VAL_20]] : tensor<?x?xf64>
 // CHECK:         }
 func @scale(%arga: tensor<?x?xf64>, %argx: tensor<?x?xf64>) -> tensor<?x?xf64> {
@@ -1229,28 +1229,28 @@
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_5]] : tensor<?x?xf32> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_5]] : tensor<?x?xf32> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<?x?xf32> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_1]] : memref<?x?xf32>
-// CHECK:           %[[VAL_12:.*]] = dim %[[VAL_2]], %[[VAL_4]] : tensor<?x?xf32>
-// CHECK:           %[[VAL_13:.*]] = tensor_to_memref %[[VAL_2]] : memref<?x?xf32>
-// CHECK:           %[[VAL_14:.*]] = dim %[[VAL_3]], %[[VAL_4]] : tensor<?x?xf32>
-// CHECK:           %[[VAL_15:.*]] = dim %[[VAL_3]], %[[VAL_5]] : tensor<?x?xf32>
-// CHECK:           %[[VAL_16:.*]] = tensor_to_memref %[[VAL_3]] : memref<?x?xf32>
-// CHECK:           %[[VAL_17:.*]] = alloc(%[[VAL_14]], %[[VAL_15]]) : memref<?x?xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<?x?xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.dim %[[VAL_2]], %[[VAL_4]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_2]] : memref<?x?xf32>
+// CHECK:           %[[VAL_14:.*]] = memref.dim %[[VAL_3]], %[[VAL_4]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_15:.*]] = memref.dim %[[VAL_3]], %[[VAL_5]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_16:.*]] = memref.buffer_cast %[[VAL_3]] : memref<?x?xf32>
+// CHECK:           %[[VAL_17:.*]] = memref.alloc(%[[VAL_14]], %[[VAL_15]]) : memref<?x?xf32>
 // CHECK:           linalg.copy(%[[VAL_16]], %[[VAL_17]]) : memref<?x?xf32>, memref<?x?xf32>
-// CHECK:           %[[VAL_18:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_19:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_20:.*]] = %[[VAL_18]] to %[[VAL_19]] step %[[VAL_5]] {
-// CHECK:             %[[VAL_21:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_20]]] : memref<?xindex>
+// CHECK:             %[[VAL_21:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_20]]] : memref<?xindex>
 // CHECK:             scf.for %[[VAL_22:.*]] = %[[VAL_4]] to %[[VAL_12]] step %[[VAL_5]] {
-// CHECK:               %[[VAL_23:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_21]], %[[VAL_22]]] : memref<?x?xf32>
-// CHECK:               %[[VAL_24:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_20]]] : memref<?xindex>
+// CHECK:               %[[VAL_23:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_21]], %[[VAL_22]]] : memref<?x?xf32>
+// CHECK:               %[[VAL_24:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_20]]] : memref<?xindex>
 // CHECK:               %[[VAL_25:.*]] = addi %[[VAL_20]], %[[VAL_5]] : index
-// CHECK:               %[[VAL_26:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_25]]] : memref<?xindex>
+// CHECK:               %[[VAL_26:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_25]]] : memref<?xindex>
 // CHECK:               scf.for %[[VAL_27:.*]] = %[[VAL_24]] to %[[VAL_26]] step %[[VAL_5]] {
-// CHECK:                 %[[VAL_28:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_27]]] : memref<?xindex>
-// CHECK:                 %[[VAL_29:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_21]], %[[VAL_28]]] : memref<?x?xf32>
-// CHECK:                 %[[VAL_30:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_27]]] : memref<?xf32>
-// CHECK:                 %[[VAL_31:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_22]], %[[VAL_28]]] : memref<?x?xf32>
+// CHECK:                 %[[VAL_28:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:                 %[[VAL_29:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_21]], %[[VAL_28]]] : memref<?x?xf32>
+// CHECK:                 %[[VAL_30:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_27]]] : memref<?xf32>
+// CHECK:                 %[[VAL_31:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_22]], %[[VAL_28]]] : memref<?x?xf32>
 // CHECK:                 %[[VAL_32:.*]] = mulf %[[VAL_23]], %[[VAL_31]] : f32
 // CHECK:                 %[[VAL_33:.*]] = mulf %[[VAL_30]], %[[VAL_32]] : f32
 // CHECK:                 %[[VAL_34:.*]] = addf %[[VAL_29]], %[[VAL_33]] : f32
@@ -1258,7 +1258,7 @@
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_35:.*]] = tensor_load %[[VAL_17]] : memref<?x?xf32>
+// CHECK:           %[[VAL_35:.*]] = memref.tensor_load %[[VAL_17]] : memref<?x?xf32>
 // CHECK:           return %[[VAL_35]] : tensor<?x?xf32>
 // CHECK:         }
 func @sampled_dense_dense(%args: tensor<?x?xf32>,
@@ -1319,33 +1319,33 @@
 // CHECK:           %[[VAL_17:.*]] = linalg.sparse_pointers %[[VAL_2]], %[[VAL_8]] : tensor<?x?xf32> to memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = linalg.sparse_indices %[[VAL_2]], %[[VAL_8]] : tensor<?x?xf32> to memref<?xindex>
 // CHECK:           %[[VAL_19:.*]] = linalg.sparse_values %[[VAL_2]] : tensor<?x?xf32> to memref<?xf32>
-// CHECK:           %[[VAL_20:.*]] = tensor_to_memref %[[VAL_3]] : memref<?xf32>
-// CHECK:           %[[VAL_21:.*]] = tensor_to_memref %[[VAL_4]] : memref<f32>
-// CHECK:           %[[VAL_22:.*]] = dim %[[VAL_5]], %[[VAL_6]] : tensor<?xf32>
-// CHECK:           %[[VAL_23:.*]] = tensor_to_memref %[[VAL_5]] : memref<?xf32>
-// CHECK:           %[[VAL_24:.*]] = alloc(%[[VAL_22]]) : memref<?xf32>
+// CHECK:           %[[VAL_20:.*]] = memref.buffer_cast %[[VAL_3]] : memref<?xf32>
+// CHECK:           %[[VAL_21:.*]] = memref.buffer_cast %[[VAL_4]] : memref<f32>
+// CHECK:           %[[VAL_22:.*]] = memref.dim %[[VAL_5]], %[[VAL_6]] : tensor<?xf32>
+// CHECK:           %[[VAL_23:.*]] = memref.buffer_cast %[[VAL_5]] : memref<?xf32>
+// CHECK:           %[[VAL_24:.*]] = memref.alloc(%[[VAL_22]]) : memref<?xf32>
 // CHECK:           linalg.copy(%[[VAL_23]], %[[VAL_24]]) : memref<?xf32>, memref<?xf32>
-// CHECK:           %[[VAL_25:.*]] = load %[[VAL_21]][] : memref<f32>
-// CHECK:           %[[VAL_26:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_6]]] : memref<?xindex>
-// CHECK:           %[[VAL_27:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref<?xindex>
+// CHECK:           %[[VAL_25:.*]] = memref.load %[[VAL_21]][] : memref<f32>
+// CHECK:           %[[VAL_26:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK:           %[[VAL_27:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_28:.*]]:2 = scf.while (%[[VAL_29:.*]] = %[[VAL_26]], %[[VAL_30:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_31:.*]] = cmpi ult, %[[VAL_29]], %[[VAL_27]] : index
 // CHECK:             scf.condition(%[[VAL_31]]) %[[VAL_29]], %[[VAL_30]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_32:.*]]: index, %[[VAL_33:.*]]: index):
-// CHECK:             %[[VAL_34:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_32]]] : memref<?xindex>
+// CHECK:             %[[VAL_34:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_32]]] : memref<?xindex>
 // CHECK:             %[[VAL_35:.*]] = cmpi eq, %[[VAL_34]], %[[VAL_33]] : index
 // CHECK:             scf.if %[[VAL_35]] {
-// CHECK:               %[[VAL_36:.*]] = load %[[VAL_20]]{{\[}}%[[VAL_33]]] : memref<?xf32>
-// CHECK:               %[[VAL_37:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_32]]] : memref<?xindex>
+// CHECK:               %[[VAL_36:.*]] = memref.load %[[VAL_20]]{{\[}}%[[VAL_33]]] : memref<?xf32>
+// CHECK:               %[[VAL_37:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_32]]] : memref<?xindex>
 // CHECK:               %[[VAL_38:.*]] = addi %[[VAL_32]], %[[VAL_8]] : index
-// CHECK:               %[[VAL_39:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_38]]] : memref<?xindex>
-// CHECK:               %[[VAL_40:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_33]]] : memref<?xindex>
+// CHECK:               %[[VAL_39:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_38]]] : memref<?xindex>
+// CHECK:               %[[VAL_40:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_33]]] : memref<?xindex>
 // CHECK:               %[[VAL_41:.*]] = addi %[[VAL_33]], %[[VAL_8]] : index
-// CHECK:               %[[VAL_42:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_41]]] : memref<?xindex>
-// CHECK:               %[[VAL_43:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_33]]] : memref<?xindex>
+// CHECK:               %[[VAL_42:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_41]]] : memref<?xindex>
+// CHECK:               %[[VAL_43:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_33]]] : memref<?xindex>
 // CHECK:               %[[VAL_44:.*]] = addi %[[VAL_33]], %[[VAL_8]] : index
-// CHECK:               %[[VAL_45:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_44]]] : memref<?xindex>
+// CHECK:               %[[VAL_45:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_44]]] : memref<?xindex>
 // CHECK:               %[[VAL_46:.*]]:3 = scf.while (%[[VAL_47:.*]] = %[[VAL_37]], %[[VAL_48:.*]] = %[[VAL_40]], %[[VAL_49:.*]] = %[[VAL_43]]) : (index, index, index) -> (index, index, index) {
 // CHECK:                 %[[VAL_50:.*]] = cmpi ult, %[[VAL_47]], %[[VAL_39]] : index
 // CHECK:                 %[[VAL_51:.*]] = cmpi ult, %[[VAL_48]], %[[VAL_42]] : index
@@ -1355,11 +1355,11 @@
 // CHECK:                 scf.condition(%[[VAL_54]]) %[[VAL_47]], %[[VAL_48]], %[[VAL_49]] : index, index, index
 // CHECK:               } do {
 // CHECK:               ^bb0(%[[VAL_55:.*]]: index, %[[VAL_56:.*]]: index, %[[VAL_57:.*]]: index):
-// CHECK:                 %[[VAL_58:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_55]]] : memref<?xindex>
-// CHECK:                 %[[VAL_59:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_56]]] : memref<?xindex>
+// CHECK:                 %[[VAL_58:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_55]]] : memref<?xindex>
+// CHECK:                 %[[VAL_59:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_56]]] : memref<?xindex>
 // CHECK:                 %[[VAL_60:.*]] = cmpi ult, %[[VAL_59]], %[[VAL_58]] : index
 // CHECK:                 %[[VAL_61:.*]] = select %[[VAL_60]], %[[VAL_59]], %[[VAL_58]] : index
-// CHECK:                 %[[VAL_62:.*]] = load %[[VAL_18]]{{\[}}%[[VAL_57]]] : memref<?xindex>
+// CHECK:                 %[[VAL_62:.*]] = memref.load %[[VAL_18]]{{\[}}%[[VAL_57]]] : memref<?xindex>
 // CHECK:                 %[[VAL_63:.*]] = cmpi ult, %[[VAL_62]], %[[VAL_61]] : index
 // CHECK:                 %[[VAL_64:.*]] = select %[[VAL_63]], %[[VAL_62]], %[[VAL_61]] : index
 // CHECK:                 %[[VAL_65:.*]] = cmpi eq, %[[VAL_58]], %[[VAL_64]] : index
@@ -1368,13 +1368,13 @@
 // CHECK:                 %[[VAL_68:.*]] = cmpi eq, %[[VAL_62]], %[[VAL_64]] : index
 // CHECK:                 %[[VAL_69:.*]] = and %[[VAL_67]], %[[VAL_68]] : i1
 // CHECK:                 scf.if %[[VAL_69]] {
-// CHECK:                   %[[VAL_70:.*]] = load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref<?xf32>
-// CHECK:                   %[[VAL_71:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_55]]] : memref<?xf32>
-// CHECK:                   %[[VAL_72:.*]] = load %[[VAL_16]]{{\[}}%[[VAL_56]]] : memref<?xf32>
+// CHECK:                   %[[VAL_70:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref<?xf32>
+// CHECK:                   %[[VAL_71:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_55]]] : memref<?xf32>
+// CHECK:                   %[[VAL_72:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_56]]] : memref<?xf32>
 // CHECK:                   %[[VAL_73:.*]] = mulf %[[VAL_71]], %[[VAL_72]] : f32
 // CHECK:                   %[[VAL_74:.*]] = mulf %[[VAL_73]], %[[VAL_36]] : f32
 // CHECK:                   %[[VAL_75:.*]] = mulf %[[VAL_74]], %[[VAL_25]] : f32
-// CHECK:                   %[[VAL_76:.*]] = load %[[VAL_19]]{{\[}}%[[VAL_57]]] : memref<?xf32>
+// CHECK:                   %[[VAL_76:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_57]]] : memref<?xf32>
 // CHECK:                   %[[VAL_77:.*]] = addf %[[VAL_75]], %[[VAL_76]] : f32
 // CHECK:                   %[[VAL_78:.*]] = addf %[[VAL_70]], %[[VAL_77]] : f32
 // CHECK:                   store %[[VAL_78]], %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref<?xf32>
@@ -1383,9 +1383,9 @@
 // CHECK:                   %[[VAL_80:.*]] = cmpi eq, %[[VAL_59]], %[[VAL_64]] : index
 // CHECK:                   %[[VAL_81:.*]] = and %[[VAL_79]], %[[VAL_80]] : i1
 // CHECK:                   scf.if %[[VAL_81]] {
-// CHECK:                     %[[VAL_82:.*]] = load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref<?xf32>
-// CHECK:                     %[[VAL_83:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_55]]] : memref<?xf32>
-// CHECK:                     %[[VAL_84:.*]] = load %[[VAL_16]]{{\[}}%[[VAL_56]]] : memref<?xf32>
+// CHECK:                     %[[VAL_82:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref<?xf32>
+// CHECK:                     %[[VAL_83:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_55]]] : memref<?xf32>
+// CHECK:                     %[[VAL_84:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_56]]] : memref<?xf32>
 // CHECK:                     %[[VAL_85:.*]] = mulf %[[VAL_83]], %[[VAL_84]] : f32
 // CHECK:                     %[[VAL_86:.*]] = mulf %[[VAL_85]], %[[VAL_36]] : f32
 // CHECK:                     %[[VAL_87:.*]] = mulf %[[VAL_86]], %[[VAL_25]] : f32
@@ -1394,8 +1394,8 @@
 // CHECK:                   } else {
 // CHECK:                     %[[VAL_89:.*]] = cmpi eq, %[[VAL_62]], %[[VAL_64]] : index
 // CHECK:                     scf.if %[[VAL_89]] {
-// CHECK:                       %[[VAL_90:.*]] = load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref<?xf32>
-// CHECK:                       %[[VAL_91:.*]] = load %[[VAL_19]]{{\[}}%[[VAL_57]]] : memref<?xf32>
+// CHECK:                       %[[VAL_90:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref<?xf32>
+// CHECK:                       %[[VAL_91:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_57]]] : memref<?xf32>
 // CHECK:                       %[[VAL_92:.*]] = addf %[[VAL_90]], %[[VAL_91]] : f32
 // CHECK:                       store %[[VAL_92]], %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref<?xf32>
 // CHECK:                     } else {
@@ -1420,17 +1420,17 @@
 // CHECK:                 scf.condition(%[[VAL_108]]) %[[VAL_103]], %[[VAL_105]] : index, index
 // CHECK:               } do {
 // CHECK:               ^bb0(%[[VAL_109:.*]]: index, %[[VAL_110:.*]]: index):
-// CHECK:                 %[[VAL_111:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_109]]] : memref<?xindex>
-// CHECK:                 %[[VAL_112:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_110]]] : memref<?xindex>
+// CHECK:                 %[[VAL_111:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_109]]] : memref<?xindex>
+// CHECK:                 %[[VAL_112:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_110]]] : memref<?xindex>
 // CHECK:                 %[[VAL_113:.*]] = cmpi ult, %[[VAL_112]], %[[VAL_111]] : index
 // CHECK:                 %[[VAL_114:.*]] = select %[[VAL_113]], %[[VAL_112]], %[[VAL_111]] : index
 // CHECK:                 %[[VAL_115:.*]] = cmpi eq, %[[VAL_111]], %[[VAL_114]] : index
 // CHECK:                 %[[VAL_116:.*]] = cmpi eq, %[[VAL_112]], %[[VAL_114]] : index
 // CHECK:                 %[[VAL_117:.*]] = and %[[VAL_115]], %[[VAL_116]] : i1
 // CHECK:                 scf.if %[[VAL_117]] {
-// CHECK:                   %[[VAL_118:.*]] = load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref<?xf32>
-// CHECK:                   %[[VAL_119:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_109]]] : memref<?xf32>
-// CHECK:                   %[[VAL_120:.*]] = load %[[VAL_16]]{{\[}}%[[VAL_110]]] : memref<?xf32>
+// CHECK:                   %[[VAL_118:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref<?xf32>
+// CHECK:                   %[[VAL_119:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_109]]] : memref<?xf32>
+// CHECK:                   %[[VAL_120:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_110]]] : memref<?xf32>
 // CHECK:                   %[[VAL_121:.*]] = mulf %[[VAL_119]], %[[VAL_120]] : f32
 // CHECK:                   %[[VAL_122:.*]] = mulf %[[VAL_121]], %[[VAL_36]] : f32
 // CHECK:                   %[[VAL_123:.*]] = mulf %[[VAL_122]], %[[VAL_25]] : f32
@@ -1446,21 +1446,21 @@
 // CHECK:                 %[[VAL_130:.*]] = select %[[VAL_128]], %[[VAL_129]], %[[VAL_110]] : index
 // CHECK:                 scf.yield %[[VAL_127]], %[[VAL_130]] : index, index
 // CHECK:               }
-// CHECK:               %[[VAL_131:.*]] = load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref<?xf32>
+// CHECK:               %[[VAL_131:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref<?xf32>
 // CHECK:               %[[VAL_132:.*]] = scf.for %[[VAL_133:.*]] = %[[VAL_134:.*]]#2 to %[[VAL_45]] step %[[VAL_8]] iter_args(%[[VAL_135:.*]] = %[[VAL_131]]) -> (f32) {
-// CHECK:                 %[[VAL_136:.*]] = load %[[VAL_19]]{{\[}}%[[VAL_133]]] : memref<?xf32>
+// CHECK:                 %[[VAL_136:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_133]]] : memref<?xf32>
 // CHECK:                 %[[VAL_137:.*]] = addf %[[VAL_135]], %[[VAL_136]] : f32
 // CHECK:                 scf.yield %[[VAL_137]] : f32
 // CHECK:               }
 // CHECK:               store %[[VAL_138:.*]], %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref<?xf32>
 // CHECK:             } else {
 // CHECK:               scf.if %[[VAL_7]] {
-// CHECK:                 %[[VAL_139:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_33]]] : memref<?xindex>
+// CHECK:                 %[[VAL_139:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_33]]] : memref<?xindex>
 // CHECK:                 %[[VAL_140:.*]] = addi %[[VAL_33]], %[[VAL_8]] : index
-// CHECK:                 %[[VAL_141:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_140]]] : memref<?xindex>
-// CHECK:                 %[[VAL_142:.*]] = load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref<?xf32>
+// CHECK:                 %[[VAL_141:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_140]]] : memref<?xindex>
+// CHECK:                 %[[VAL_142:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref<?xf32>
 // CHECK:                 %[[VAL_143:.*]] = scf.for %[[VAL_144:.*]] = %[[VAL_139]] to %[[VAL_141]] step %[[VAL_8]] iter_args(%[[VAL_145:.*]] = %[[VAL_142]]) -> (f32) {
-// CHECK:                   %[[VAL_146:.*]] = load %[[VAL_19]]{{\[}}%[[VAL_144]]] : memref<?xf32>
+// CHECK:                   %[[VAL_146:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_144]]] : memref<?xf32>
 // CHECK:                   %[[VAL_147:.*]] = addf %[[VAL_145]], %[[VAL_146]] : f32
 // CHECK:                   scf.yield %[[VAL_147]] : f32
 // CHECK:                 }
@@ -1475,18 +1475,18 @@
 // CHECK:             scf.yield %[[VAL_151]], %[[VAL_152]] : index, index
 // CHECK:           }
 // CHECK:           scf.for %[[VAL_153:.*]] = %[[VAL_154:.*]]#1 to %[[VAL_22]] step %[[VAL_8]] {
-// CHECK:             %[[VAL_155:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_153]]] : memref<?xindex>
+// CHECK:             %[[VAL_155:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_153]]] : memref<?xindex>
 // CHECK:             %[[VAL_156:.*]] = addi %[[VAL_153]], %[[VAL_8]] : index
-// CHECK:             %[[VAL_157:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_156]]] : memref<?xindex>
-// CHECK:             %[[VAL_158:.*]] = load %[[VAL_24]]{{\[}}%[[VAL_153]]] : memref<?xf32>
+// CHECK:             %[[VAL_157:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_156]]] : memref<?xindex>
+// CHECK:             %[[VAL_158:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_153]]] : memref<?xf32>
 // CHECK:             %[[VAL_159:.*]] = scf.for %[[VAL_160:.*]] = %[[VAL_155]] to %[[VAL_157]] step %[[VAL_8]] iter_args(%[[VAL_161:.*]] = %[[VAL_158]]) -> (f32) {
-// CHECK:               %[[VAL_162:.*]] = load %[[VAL_19]]{{\[}}%[[VAL_160]]] : memref<?xf32>
+// CHECK:               %[[VAL_162:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_160]]] : memref<?xf32>
 // CHECK:               %[[VAL_163:.*]] = addf %[[VAL_161]], %[[VAL_162]] : f32
 // CHECK:               scf.yield %[[VAL_163]] : f32
 // CHECK:             }
 // CHECK:             store %[[VAL_164:.*]], %[[VAL_24]]{{\[}}%[[VAL_153]]] : memref<?xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_165:.*]] = tensor_load %[[VAL_24]] : memref<?xf32>
+// CHECK:           %[[VAL_165:.*]] = memref.tensor_load %[[VAL_24]] : memref<?xf32>
 // CHECK:           return %[[VAL_165]] : tensor<?xf32>
 // CHECK:         }
 func @sum_kernel_with_inv(%arga: tensor<?x?xf32>,
diff --git a/mlir/test/Dialect/Linalg/sparse_3d.mlir b/mlir/test/Dialect/Linalg/sparse_3d.mlir
--- a/mlir/test/Dialect/Linalg/sparse_3d.mlir
+++ b/mlir/test/Dialect/Linalg/sparse_3d.mlir
@@ -25,22 +25,22 @@
 // CHECK:           %[[VAL_5:.*]] = constant 8 : index
 // CHECK:           %[[VAL_6:.*]] = constant 0 : index
 // CHECK:           %[[VAL_7:.*]] = constant 1 : index
-// CHECK:           %[[VAL_8:.*]] = tensor_to_memref %[[VAL_0]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_9:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_10:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_11:.*]] = alloc() : memref<32x16x8xf32>
+// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           linalg.copy(%[[VAL_10]], %[[VAL_11]]) : memref<32x16x8xf32>, memref<32x16x8xf32>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
 // CHECK:             scf.for %[[VAL_13:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
 // CHECK:               scf.for %[[VAL_14:.*]] = %[[VAL_6]] to %[[VAL_5]] step %[[VAL_7]] {
-// CHECK:                 %[[VAL_15:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_12]], %[[VAL_13]], %[[VAL_14]]] : memref<32x16x8xf32>
-// CHECK:                 %[[VAL_16:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_12]], %[[VAL_13]], %[[VAL_14]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_12]], %[[VAL_13]], %[[VAL_14]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_12]], %[[VAL_13]], %[[VAL_14]]] : memref<32x16x8xf32>
 // CHECK:                 %[[VAL_17:.*]] = addf %[[VAL_15]], %[[VAL_16]] : f32
 // CHECK:                 store %[[VAL_17]], %[[VAL_11]]{{\[}}%[[VAL_12]], %[[VAL_13]], %[[VAL_14]]] : memref<32x16x8xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_18:.*]] = tensor_load %[[VAL_11]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_18:.*]] = memref.tensor_load %[[VAL_11]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_18]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @add_ddd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@@ -63,22 +63,22 @@
 // CHECK:           %[[VAL_5:.*]] = constant 8 : index
 // CHECK:           %[[VAL_6:.*]] = constant 0 : index
 // CHECK:           %[[VAL_7:.*]] = constant 1 : index
-// CHECK:           %[[VAL_8:.*]] = tensor_to_memref %[[VAL_0]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_9:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_10:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_11:.*]] = alloc() : memref<32x16x8xf32>
+// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           linalg.copy(%[[VAL_10]], %[[VAL_11]]) : memref<32x16x8xf32>, memref<32x16x8xf32>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
 // CHECK:             scf.for %[[VAL_13:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
 // CHECK:               scf.for %[[VAL_14:.*]] = %[[VAL_6]] to %[[VAL_5]] step %[[VAL_7]] {
-// CHECK:                 %[[VAL_15:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_12]], %[[VAL_13]], %[[VAL_14]]] : memref<32x16x8xf32>
-// CHECK:                 %[[VAL_16:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_12]], %[[VAL_13]], %[[VAL_14]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_12]], %[[VAL_13]], %[[VAL_14]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_12]], %[[VAL_13]], %[[VAL_14]]] : memref<32x16x8xf32>
 // CHECK:                 %[[VAL_17:.*]] = mulf %[[VAL_15]], %[[VAL_16]] : f32
 // CHECK:                 store %[[VAL_17]], %[[VAL_11]]{{\[}}%[[VAL_12]], %[[VAL_13]], %[[VAL_14]]] : memref<32x16x8xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_18:.*]] = tensor_load %[[VAL_11]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_18:.*]] = memref.tensor_load %[[VAL_11]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_18]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @mul_ddd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@@ -121,32 +121,32 @@
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_12:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16x8xf32> to memref<?xf32>
-// CHECK:           %[[VAL_13:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_14:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_15:.*]] = alloc() : memref<32x16x8xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_14:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_15:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           linalg.copy(%[[VAL_14]], %[[VAL_15]]) : memref<32x16x8xf32>, memref<32x16x8xf32>
 // CHECK:           scf.for %[[VAL_16:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_9]] {
 // CHECK:             scf.for %[[VAL_17:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_9]] {
 // CHECK:               %[[VAL_18:.*]] = muli %[[VAL_16]], %[[VAL_5]] : index
 // CHECK:               %[[VAL_19:.*]] = addi %[[VAL_18]], %[[VAL_17]] : index
-// CHECK:               %[[VAL_20:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_19]]] : memref<?xindex>
+// CHECK:               %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_19]]] : memref<?xindex>
 // CHECK:               %[[VAL_21:.*]] = addi %[[VAL_19]], %[[VAL_9]] : index
-// CHECK:               %[[VAL_22:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_21]]] : memref<?xindex>
+// CHECK:               %[[VAL_22:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_21]]] : memref<?xindex>
 // CHECK:               %[[VAL_23:.*]]:2 = scf.while (%[[VAL_24:.*]] = %[[VAL_20]], %[[VAL_25:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
 // CHECK:                 %[[VAL_26:.*]] = cmpi ult, %[[VAL_24]], %[[VAL_22]] : index
 // CHECK:                 scf.condition(%[[VAL_26]]) %[[VAL_24]], %[[VAL_25]] : index, index
 // CHECK:               } do {
 // CHECK:               ^bb0(%[[VAL_27:.*]]: index, %[[VAL_28:.*]]: index):
-// CHECK:                 %[[VAL_29:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:                 %[[VAL_29:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_27]]] : memref<?xindex>
 // CHECK:                 %[[VAL_30:.*]] = cmpi eq, %[[VAL_29]], %[[VAL_28]] : index
 // CHECK:                 scf.if %[[VAL_30]] {
-// CHECK:                   %[[VAL_31:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_27]]] : memref<?xf32>
-// CHECK:                   %[[VAL_32:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_16]], %[[VAL_17]], %[[VAL_28]]] : memref<32x16x8xf32>
+// CHECK:                   %[[VAL_31:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_27]]] : memref<?xf32>
+// CHECK:                   %[[VAL_32:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_16]], %[[VAL_17]], %[[VAL_28]]] : memref<32x16x8xf32>
 // CHECK:                   %[[VAL_33:.*]] = addf %[[VAL_31]], %[[VAL_32]] : f32
 // CHECK:                   store %[[VAL_33]], %[[VAL_15]]{{\[}}%[[VAL_16]], %[[VAL_17]], %[[VAL_28]]] : memref<32x16x8xf32>
 // CHECK:                 } else {
 // CHECK:                   scf.if %[[VAL_8]] {
-// CHECK:                     %[[VAL_34:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_16]], %[[VAL_17]], %[[VAL_28]]] : memref<32x16x8xf32>
+// CHECK:                     %[[VAL_34:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_16]], %[[VAL_17]], %[[VAL_28]]] : memref<32x16x8xf32>
 // CHECK:                     store %[[VAL_34]], %[[VAL_15]]{{\[}}%[[VAL_16]], %[[VAL_17]], %[[VAL_28]]] : memref<32x16x8xf32>
 // CHECK:                   } else {
 // CHECK:                   }
@@ -158,12 +158,12 @@
 // CHECK:                 scf.yield %[[VAL_37]], %[[VAL_38]] : index, index
 // CHECK:               }
 // CHECK:               scf.for %[[VAL_39:.*]] = %[[VAL_40:.*]]#1 to %[[VAL_6]] step %[[VAL_9]] {
-// CHECK:                 %[[VAL_41:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_16]], %[[VAL_17]], %[[VAL_39]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_41:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_16]], %[[VAL_17]], %[[VAL_39]]] : memref<32x16x8xf32>
 // CHECK:                 store %[[VAL_41]], %[[VAL_15]]{{\[}}%[[VAL_16]], %[[VAL_17]], %[[VAL_39]]] : memref<32x16x8xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_42:.*]] = tensor_load %[[VAL_15]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_42:.*]] = memref.tensor_load %[[VAL_15]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_42]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @add_dds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@@ -189,27 +189,27 @@
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16x8xf32> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_12:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_13:.*]] = alloc() : memref<32x16x8xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           linalg.copy(%[[VAL_12]], %[[VAL_13]]) : memref<32x16x8xf32>, memref<32x16x8xf32>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
 // CHECK:             scf.for %[[VAL_15:.*]] = %[[VAL_6]] to %[[VAL_5]] step %[[VAL_7]] {
 // CHECK:               %[[VAL_16:.*]] = muli %[[VAL_14]], %[[VAL_5]] : index
 // CHECK:               %[[VAL_17:.*]] = addi %[[VAL_16]], %[[VAL_15]] : index
-// CHECK:               %[[VAL_18:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_17]]] : memref<?xindex>
+// CHECK:               %[[VAL_18:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_17]]] : memref<?xindex>
 // CHECK:               %[[VAL_19:.*]] = addi %[[VAL_17]], %[[VAL_7]] : index
-// CHECK:               %[[VAL_20:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_19]]] : memref<?xindex>
+// CHECK:               %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_19]]] : memref<?xindex>
 // CHECK:               scf.for %[[VAL_21:.*]] = %[[VAL_18]] to %[[VAL_20]] step %[[VAL_7]] {
-// CHECK:                 %[[VAL_22:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_21]]] : memref<?xindex>
-// CHECK:                 %[[VAL_23:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_21]]] : memref<?xf32>
-// CHECK:                 %[[VAL_24:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_14]], %[[VAL_15]], %[[VAL_22]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_22:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_21]]] : memref<?xindex>
+// CHECK:                 %[[VAL_23:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_21]]] : memref<?xf32>
+// CHECK:                 %[[VAL_24:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_14]], %[[VAL_15]], %[[VAL_22]]] : memref<32x16x8xf32>
 // CHECK:                 %[[VAL_25:.*]] = mulf %[[VAL_23]], %[[VAL_24]] : f32
 // CHECK:                 store %[[VAL_25]], %[[VAL_13]]{{\[}}%[[VAL_14]], %[[VAL_15]], %[[VAL_22]]] : memref<32x16x8xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_26:.*]] = tensor_load %[[VAL_13]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_26:.*]] = memref.tensor_load %[[VAL_13]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_26]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @mul_dds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@@ -251,34 +251,34 @@
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_8]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_8]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16x8xf32> to memref<?xf32>
-// CHECK:           %[[VAL_12:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_13:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_14:.*]] = alloc() : memref<32x16x8xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_14:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           linalg.copy(%[[VAL_13]], %[[VAL_14]]) : memref<32x16x8xf32>, memref<32x16x8xf32>
 // CHECK:           scf.for %[[VAL_15:.*]] = %[[VAL_7]] to %[[VAL_3]] step %[[VAL_8]] {
-// CHECK:             %[[VAL_16:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_15]]] : memref<?xindex>
+// CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_15]]] : memref<?xindex>
 // CHECK:             %[[VAL_17:.*]] = addi %[[VAL_15]], %[[VAL_8]] : index
-// CHECK:             %[[VAL_18:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_17]]] : memref<?xindex>
+// CHECK:             %[[VAL_18:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_17]]] : memref<?xindex>
 // CHECK:             %[[VAL_19:.*]]:2 = scf.while (%[[VAL_20:.*]] = %[[VAL_16]], %[[VAL_21:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
 // CHECK:               %[[VAL_22:.*]] = cmpi ult, %[[VAL_20]], %[[VAL_18]] : index
 // CHECK:               scf.condition(%[[VAL_22]]) %[[VAL_20]], %[[VAL_21]] : index, index
 // CHECK:             } do {
 // CHECK:             ^bb0(%[[VAL_23:.*]]: index, %[[VAL_24:.*]]: index):
-// CHECK:               %[[VAL_25:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK:               %[[VAL_25:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_23]]] : memref<?xindex>
 // CHECK:               %[[VAL_26:.*]] = cmpi eq, %[[VAL_25]], %[[VAL_24]] : index
 // CHECK:               scf.if %[[VAL_26]] {
 // CHECK:                 scf.for %[[VAL_27:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
 // CHECK:                   %[[VAL_28:.*]] = muli %[[VAL_23]], %[[VAL_5]] : index
 // CHECK:                   %[[VAL_29:.*]] = addi %[[VAL_28]], %[[VAL_27]] : index
-// CHECK:                   %[[VAL_30:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_29]]] : memref<?xf32>
-// CHECK:                   %[[VAL_31:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_15]], %[[VAL_24]], %[[VAL_27]]] : memref<32x16x8xf32>
+// CHECK:                   %[[VAL_30:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_29]]] : memref<?xf32>
+// CHECK:                   %[[VAL_31:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_15]], %[[VAL_24]], %[[VAL_27]]] : memref<32x16x8xf32>
 // CHECK:                   %[[VAL_32:.*]] = addf %[[VAL_30]], %[[VAL_31]] : f32
 // CHECK:                   store %[[VAL_32]], %[[VAL_14]]{{\[}}%[[VAL_15]], %[[VAL_24]], %[[VAL_27]]] : memref<32x16x8xf32>
 // CHECK:                 }
 // CHECK:               } else {
 // CHECK:                 scf.if %[[VAL_6]] {
 // CHECK:                   scf.for %[[VAL_33:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
-// CHECK:                     %[[VAL_34:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_15]], %[[VAL_24]], %[[VAL_33]]] : memref<32x16x8xf32>
+// CHECK:                     %[[VAL_34:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_15]], %[[VAL_24]], %[[VAL_33]]] : memref<32x16x8xf32>
 // CHECK:                     store %[[VAL_34]], %[[VAL_14]]{{\[}}%[[VAL_15]], %[[VAL_24]], %[[VAL_33]]] : memref<32x16x8xf32>
 // CHECK:                   }
 // CHECK:                 } else {
@@ -292,12 +292,12 @@
 // CHECK:             }
 // CHECK:             scf.for %[[VAL_39:.*]] = %[[VAL_40:.*]]#1 to %[[VAL_4]] step %[[VAL_8]] {
 // CHECK:               scf.for %[[VAL_41:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
-// CHECK:                 %[[VAL_42:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_15]], %[[VAL_39]], %[[VAL_41]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_42:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_15]], %[[VAL_39]], %[[VAL_41]]] : memref<32x16x8xf32>
 // CHECK:                 store %[[VAL_42]], %[[VAL_14]]{{\[}}%[[VAL_15]], %[[VAL_39]], %[[VAL_41]]] : memref<32x16x8xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_43:.*]] = tensor_load %[[VAL_14]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_43:.*]] = memref.tensor_load %[[VAL_14]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_43]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @add_dsd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@@ -322,27 +322,27 @@
 // CHECK:           %[[VAL_7:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_6]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_6]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16x8xf32> to memref<?xf32>
-// CHECK:           %[[VAL_10:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_12:.*]] = alloc() : memref<32x16x8xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           linalg.copy(%[[VAL_11]], %[[VAL_12]]) : memref<32x16x8xf32>, memref<32x16x8xf32>
 // CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
-// CHECK:             %[[VAL_14:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xindex>
+// CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xindex>
 // CHECK:             %[[VAL_15:.*]] = addi %[[VAL_13]], %[[VAL_6]] : index
-// CHECK:             %[[VAL_16:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
+// CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
 // CHECK:             scf.for %[[VAL_17:.*]] = %[[VAL_14]] to %[[VAL_16]] step %[[VAL_6]] {
-// CHECK:               %[[VAL_18:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_17]]] : memref<?xindex>
+// CHECK:               %[[VAL_18:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_17]]] : memref<?xindex>
 // CHECK:               scf.for %[[VAL_19:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
 // CHECK:                 %[[VAL_20:.*]] = muli %[[VAL_17]], %[[VAL_4]] : index
 // CHECK:                 %[[VAL_21:.*]] = addi %[[VAL_20]], %[[VAL_19]] : index
-// CHECK:                 %[[VAL_22:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_21]]] : memref<?xf32>
-// CHECK:                 %[[VAL_23:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_13]], %[[VAL_18]], %[[VAL_19]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_22:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_21]]] : memref<?xf32>
+// CHECK:                 %[[VAL_23:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_13]], %[[VAL_18]], %[[VAL_19]]] : memref<32x16x8xf32>
 // CHECK:                 %[[VAL_24:.*]] = mulf %[[VAL_22]], %[[VAL_23]] : f32
 // CHECK:                 store %[[VAL_24]], %[[VAL_12]]{{\[}}%[[VAL_13]], %[[VAL_18]], %[[VAL_19]]] : memref<32x16x8xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_25:.*]] = tensor_load %[[VAL_12]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_25:.*]] = memref.tensor_load %[[VAL_12]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_25]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @mul_dsd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@@ -387,40 +387,40 @@
 // CHECK:           %[[VAL_12:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16x8xf32> to memref<?xf32>
-// CHECK:           %[[VAL_15:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_16:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_17:.*]] = alloc() : memref<32x16x8xf32>
+// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_16:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_17:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           linalg.copy(%[[VAL_16]], %[[VAL_17]]) : memref<32x16x8xf32>, memref<32x16x8xf32>
 // CHECK:           scf.for %[[VAL_18:.*]] = %[[VAL_8]] to %[[VAL_4]] step %[[VAL_9]] {
-// CHECK:             %[[VAL_19:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_18]]] : memref<?xindex>
+// CHECK:             %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_18]]] : memref<?xindex>
 // CHECK:             %[[VAL_20:.*]] = addi %[[VAL_18]], %[[VAL_9]] : index
-// CHECK:             %[[VAL_21:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_20]]] : memref<?xindex>
+// CHECK:             %[[VAL_21:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_20]]] : memref<?xindex>
 // CHECK:             %[[VAL_22:.*]]:2 = scf.while (%[[VAL_23:.*]] = %[[VAL_19]], %[[VAL_24:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) {
 // CHECK:               %[[VAL_25:.*]] = cmpi ult, %[[VAL_23]], %[[VAL_21]] : index
 // CHECK:               scf.condition(%[[VAL_25]]) %[[VAL_23]], %[[VAL_24]] : index, index
 // CHECK:             } do {
 // CHECK:             ^bb0(%[[VAL_26:.*]]: index, %[[VAL_27:.*]]: index):
-// CHECK:               %[[VAL_28:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_26]]] : memref<?xindex>
+// CHECK:               %[[VAL_28:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_26]]] : memref<?xindex>
 // CHECK:               %[[VAL_29:.*]] = cmpi eq, %[[VAL_28]], %[[VAL_27]] : index
 // CHECK:               scf.if %[[VAL_29]] {
-// CHECK:                 %[[VAL_30:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_26]]] : memref<?xindex>
+// CHECK:                 %[[VAL_30:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_26]]] : memref<?xindex>
 // CHECK:                 %[[VAL_31:.*]] = addi %[[VAL_26]], %[[VAL_9]] : index
-// CHECK:                 %[[VAL_32:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_31]]] : memref<?xindex>
+// CHECK:                 %[[VAL_32:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_31]]] : memref<?xindex>
 // CHECK:                 %[[VAL_33:.*]]:2 = scf.while (%[[VAL_34:.*]] = %[[VAL_30]], %[[VAL_35:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) {
 // CHECK:                   %[[VAL_36:.*]] = cmpi ult, %[[VAL_34]], %[[VAL_32]] : index
 // CHECK:                   scf.condition(%[[VAL_36]]) %[[VAL_34]], %[[VAL_35]] : index, index
 // CHECK:                 } do {
 // CHECK:                 ^bb0(%[[VAL_37:.*]]: index, %[[VAL_38:.*]]: index):
-// CHECK:                   %[[VAL_39:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_37]]] : memref<?xindex>
+// CHECK:                   %[[VAL_39:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_37]]] : memref<?xindex>
 // CHECK:                   %[[VAL_40:.*]] = cmpi eq, %[[VAL_39]], %[[VAL_38]] : index
 // CHECK:                   scf.if %[[VAL_40]] {
-// CHECK:                     %[[VAL_41:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_37]]] : memref<?xf32>
-// CHECK:                     %[[VAL_42:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_18]], %[[VAL_27]], %[[VAL_38]]] : memref<32x16x8xf32>
+// CHECK:                     %[[VAL_41:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_37]]] : memref<?xf32>
+// CHECK:                     %[[VAL_42:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_18]], %[[VAL_27]], %[[VAL_38]]] : memref<32x16x8xf32>
 // CHECK:                     %[[VAL_43:.*]] = addf %[[VAL_41]], %[[VAL_42]] : f32
 // CHECK:                     store %[[VAL_43]], %[[VAL_17]]{{\[}}%[[VAL_18]], %[[VAL_27]], %[[VAL_38]]] : memref<32x16x8xf32>
 // CHECK:                   } else {
 // CHECK:                     scf.if %[[VAL_7]] {
-// CHECK:                       %[[VAL_44:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_18]], %[[VAL_27]], %[[VAL_38]]] : memref<32x16x8xf32>
+// CHECK:                       %[[VAL_44:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_18]], %[[VAL_27]], %[[VAL_38]]] : memref<32x16x8xf32>
 // CHECK:                       store %[[VAL_44]], %[[VAL_17]]{{\[}}%[[VAL_18]], %[[VAL_27]], %[[VAL_38]]] : memref<32x16x8xf32>
 // CHECK:                     } else {
 // CHECK:                     }
@@ -432,13 +432,13 @@
 // CHECK:                   scf.yield %[[VAL_47]], %[[VAL_48]] : index, index
 // CHECK:                 }
 // CHECK:                 scf.for %[[VAL_49:.*]] = %[[VAL_50:.*]]#1 to %[[VAL_6]] step %[[VAL_9]] {
-// CHECK:                   %[[VAL_51:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_18]], %[[VAL_27]], %[[VAL_49]]] : memref<32x16x8xf32>
+// CHECK:                   %[[VAL_51:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_18]], %[[VAL_27]], %[[VAL_49]]] : memref<32x16x8xf32>
 // CHECK:                   store %[[VAL_51]], %[[VAL_17]]{{\[}}%[[VAL_18]], %[[VAL_27]], %[[VAL_49]]] : memref<32x16x8xf32>
 // CHECK:                 }
 // CHECK:               } else {
 // CHECK:                 scf.if %[[VAL_7]] {
 // CHECK:                   scf.for %[[VAL_52:.*]] = %[[VAL_8]] to %[[VAL_6]] step %[[VAL_9]] {
-// CHECK:                     %[[VAL_53:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_18]], %[[VAL_27]], %[[VAL_52]]] : memref<32x16x8xf32>
+// CHECK:                     %[[VAL_53:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_18]], %[[VAL_27]], %[[VAL_52]]] : memref<32x16x8xf32>
 // CHECK:                     store %[[VAL_53]], %[[VAL_17]]{{\[}}%[[VAL_18]], %[[VAL_27]], %[[VAL_52]]] : memref<32x16x8xf32>
 // CHECK:                   }
 // CHECK:                 } else {
@@ -452,12 +452,12 @@
 // CHECK:             }
 // CHECK:             scf.for %[[VAL_58:.*]] = %[[VAL_59:.*]]#1 to %[[VAL_5]] step %[[VAL_9]] {
 // CHECK:               scf.for %[[VAL_60:.*]] = %[[VAL_8]] to %[[VAL_6]] step %[[VAL_9]] {
-// CHECK:                 %[[VAL_61:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_18]], %[[VAL_58]], %[[VAL_60]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_61:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_18]], %[[VAL_58]], %[[VAL_60]]] : memref<32x16x8xf32>
 // CHECK:                 store %[[VAL_61]], %[[VAL_17]]{{\[}}%[[VAL_18]], %[[VAL_58]], %[[VAL_60]]] : memref<32x16x8xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_62:.*]] = tensor_load %[[VAL_17]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_62:.*]] = memref.tensor_load %[[VAL_17]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_62]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @add_dss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@@ -484,29 +484,29 @@
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16x8xf32> to memref<?xf32>
-// CHECK:           %[[VAL_12:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_13:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_14:.*]] = alloc() : memref<32x16x8xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_14:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           linalg.copy(%[[VAL_13]], %[[VAL_14]]) : memref<32x16x8xf32>, memref<32x16x8xf32>
 // CHECK:           scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
-// CHECK:             %[[VAL_16:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
+// CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
 // CHECK:             %[[VAL_17:.*]] = addi %[[VAL_15]], %[[VAL_6]] : index
-// CHECK:             %[[VAL_18:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_17]]] : memref<?xindex>
+// CHECK:             %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_17]]] : memref<?xindex>
 // CHECK:             scf.for %[[VAL_19:.*]] = %[[VAL_16]] to %[[VAL_18]] step %[[VAL_6]] {
-// CHECK:               %[[VAL_20:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_19]]] : memref<?xindex>
-// CHECK:               %[[VAL_21:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xindex>
+// CHECK:               %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_19]]] : memref<?xindex>
+// CHECK:               %[[VAL_21:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xindex>
 // CHECK:               %[[VAL_22:.*]] = addi %[[VAL_19]], %[[VAL_6]] : index
-// CHECK:               %[[VAL_23:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_22]]] : memref<?xindex>
+// CHECK:               %[[VAL_23:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_22]]] : memref<?xindex>
 // CHECK:               scf.for %[[VAL_24:.*]] = %[[VAL_21]] to %[[VAL_23]] step %[[VAL_6]] {
-// CHECK:                 %[[VAL_25:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xindex>
-// CHECK:                 %[[VAL_26:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_24]]] : memref<?xf32>
-// CHECK:                 %[[VAL_27:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_15]], %[[VAL_20]], %[[VAL_25]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_25:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xindex>
+// CHECK:                 %[[VAL_26:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_24]]] : memref<?xf32>
+// CHECK:                 %[[VAL_27:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_15]], %[[VAL_20]], %[[VAL_25]]] : memref<32x16x8xf32>
 // CHECK:                 %[[VAL_28:.*]] = mulf %[[VAL_26]], %[[VAL_27]] : f32
 // CHECK:                 store %[[VAL_28]], %[[VAL_14]]{{\[}}%[[VAL_15]], %[[VAL_20]], %[[VAL_25]]] : memref<32x16x8xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_29:.*]] = tensor_load %[[VAL_14]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_29:.*]] = memref.tensor_load %[[VAL_14]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_29]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @mul_dss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@@ -548,18 +548,18 @@
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_7]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_7]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16x8xf32> to memref<?xf32>
-// CHECK:           %[[VAL_12:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_13:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_14:.*]] = alloc() : memref<32x16x8xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_14:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           linalg.copy(%[[VAL_13]], %[[VAL_14]]) : memref<32x16x8xf32>, memref<32x16x8xf32>
-// CHECK:           %[[VAL_15:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
-// CHECK:           %[[VAL_16:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref<?xindex>
+// CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
+// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_15]], %[[VAL_19:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_20:.*]] = cmpi ult, %[[VAL_18]], %[[VAL_16]] : index
 // CHECK:             scf.condition(%[[VAL_20]]) %[[VAL_18]], %[[VAL_19]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_21:.*]]: index, %[[VAL_22:.*]]: index):
-// CHECK:             %[[VAL_23:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_21]]] : memref<?xindex>
+// CHECK:             %[[VAL_23:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_21]]] : memref<?xindex>
 // CHECK:             %[[VAL_24:.*]] = cmpi eq, %[[VAL_23]], %[[VAL_22]] : index
 // CHECK:             scf.if %[[VAL_24]] {
 // CHECK:               scf.for %[[VAL_25:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
@@ -568,8 +568,8 @@
 // CHECK:                 scf.for %[[VAL_28:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
 // CHECK:                   %[[VAL_29:.*]] = muli %[[VAL_27]], %[[VAL_5]] : index
 // CHECK:                   %[[VAL_30:.*]] = addi %[[VAL_29]], %[[VAL_28]] : index
-// CHECK:                   %[[VAL_31:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<?xf32>
-// CHECK:                   %[[VAL_32:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]] : memref<32x16x8xf32>
+// CHECK:                   %[[VAL_31:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<?xf32>
+// CHECK:                   %[[VAL_32:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]] : memref<32x16x8xf32>
 // CHECK:                   %[[VAL_33:.*]] = addf %[[VAL_31]], %[[VAL_32]] : f32
 // CHECK:                   store %[[VAL_33]], %[[VAL_14]]{{\[}}%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]] : memref<32x16x8xf32>
 // CHECK:                 }
@@ -578,7 +578,7 @@
 // CHECK:               scf.if %[[VAL_6]] {
 // CHECK:                 scf.for %[[VAL_34:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
 // CHECK:                   scf.for %[[VAL_35:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
-// CHECK:                     %[[VAL_36:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_22]], %[[VAL_34]], %[[VAL_35]]] : memref<32x16x8xf32>
+// CHECK:                     %[[VAL_36:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_22]], %[[VAL_34]], %[[VAL_35]]] : memref<32x16x8xf32>
 // CHECK:                     store %[[VAL_36]], %[[VAL_14]]{{\[}}%[[VAL_22]], %[[VAL_34]], %[[VAL_35]]] : memref<32x16x8xf32>
 // CHECK:                   }
 // CHECK:                 }
@@ -594,12 +594,12 @@
 // CHECK:           scf.for %[[VAL_41:.*]] = %[[VAL_42:.*]]#1 to %[[VAL_3]] step %[[VAL_8]] {
 // CHECK:             scf.for %[[VAL_43:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
 // CHECK:               scf.for %[[VAL_44:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
-// CHECK:                 %[[VAL_45:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_41]], %[[VAL_43]], %[[VAL_44]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_45:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_41]], %[[VAL_43]], %[[VAL_44]]] : memref<32x16x8xf32>
 // CHECK:                 store %[[VAL_45]], %[[VAL_14]]{{\[}}%[[VAL_41]], %[[VAL_43]], %[[VAL_44]]] : memref<32x16x8xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_46:.*]] = tensor_load %[[VAL_14]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_46:.*]] = memref.tensor_load %[[VAL_14]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_46]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @add_sdd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@@ -624,28 +624,28 @@
 // CHECK:           %[[VAL_7:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_5]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_5]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16x8xf32> to memref<?xf32>
-// CHECK:           %[[VAL_10:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_12:.*]] = alloc() : memref<32x16x8xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           linalg.copy(%[[VAL_11]], %[[VAL_12]]) : memref<32x16x8xf32>, memref<32x16x8xf32>
-// CHECK:           %[[VAL_13:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
-// CHECK:           %[[VAL_14:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_6]] {
-// CHECK:             %[[VAL_16:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_15]]] : memref<?xindex>
+// CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_15]]] : memref<?xindex>
 // CHECK:             scf.for %[[VAL_17:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:               %[[VAL_18:.*]] = muli %[[VAL_15]], %[[VAL_3]] : index
 // CHECK:               %[[VAL_19:.*]] = addi %[[VAL_18]], %[[VAL_17]] : index
 // CHECK:               scf.for %[[VAL_20:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
 // CHECK:                 %[[VAL_21:.*]] = muli %[[VAL_19]], %[[VAL_4]] : index
 // CHECK:                 %[[VAL_22:.*]] = addi %[[VAL_21]], %[[VAL_20]] : index
-// CHECK:                 %[[VAL_23:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_22]]] : memref<?xf32>
-// CHECK:                 %[[VAL_24:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_16]], %[[VAL_17]], %[[VAL_20]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_23:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_22]]] : memref<?xf32>
+// CHECK:                 %[[VAL_24:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_16]], %[[VAL_17]], %[[VAL_20]]] : memref<32x16x8xf32>
 // CHECK:                 %[[VAL_25:.*]] = mulf %[[VAL_23]], %[[VAL_24]] : f32
 // CHECK:                 store %[[VAL_25]], %[[VAL_12]]{{\[}}%[[VAL_16]], %[[VAL_17]], %[[VAL_20]]] : memref<32x16x8xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_26:.*]] = tensor_load %[[VAL_12]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_26:.*]] = memref.tensor_load %[[VAL_12]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_26]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @mul_sdd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@@ -690,41 +690,41 @@
 // CHECK:           %[[VAL_12:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16x8xf32> to memref<?xf32>
-// CHECK:           %[[VAL_15:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_16:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_17:.*]] = alloc() : memref<32x16x8xf32>
+// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_16:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_17:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           linalg.copy(%[[VAL_16]], %[[VAL_17]]) : memref<32x16x8xf32>, memref<32x16x8xf32>
-// CHECK:           %[[VAL_18:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref<?xindex>
-// CHECK:           %[[VAL_19:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_9]]] : memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref<?xindex>
+// CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_9]]] : memref<?xindex>
 // CHECK:           %[[VAL_20:.*]]:2 = scf.while (%[[VAL_21:.*]] = %[[VAL_18]], %[[VAL_22:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_23:.*]] = cmpi ult, %[[VAL_21]], %[[VAL_19]] : index
 // CHECK:             scf.condition(%[[VAL_23]]) %[[VAL_21]], %[[VAL_22]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_24:.*]]: index, %[[VAL_25:.*]]: index):
-// CHECK:             %[[VAL_26:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_24]]] : memref<?xindex>
+// CHECK:             %[[VAL_26:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_24]]] : memref<?xindex>
 // CHECK:             %[[VAL_27:.*]] = cmpi eq, %[[VAL_26]], %[[VAL_25]] : index
 // CHECK:             scf.if %[[VAL_27]] {
 // CHECK:               scf.for %[[VAL_28:.*]] = %[[VAL_8]] to %[[VAL_5]] step %[[VAL_9]] {
 // CHECK:                 %[[VAL_29:.*]] = muli %[[VAL_24]], %[[VAL_5]] : index
 // CHECK:                 %[[VAL_30:.*]] = addi %[[VAL_29]], %[[VAL_28]] : index
-// CHECK:                 %[[VAL_31:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_30]]] : memref<?xindex>
+// CHECK:                 %[[VAL_31:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_30]]] : memref<?xindex>
 // CHECK:                 %[[VAL_32:.*]] = addi %[[VAL_30]], %[[VAL_9]] : index
-// CHECK:                 %[[VAL_33:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<?xindex>
+// CHECK:                 %[[VAL_33:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<?xindex>
 // CHECK:                 %[[VAL_34:.*]]:2 = scf.while (%[[VAL_35:.*]] = %[[VAL_31]], %[[VAL_36:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) {
 // CHECK:                   %[[VAL_37:.*]] = cmpi ult, %[[VAL_35]], %[[VAL_33]] : index
 // CHECK:                   scf.condition(%[[VAL_37]]) %[[VAL_35]], %[[VAL_36]] : index, index
 // CHECK:                 } do {
 // CHECK:                 ^bb0(%[[VAL_38:.*]]: index, %[[VAL_39:.*]]: index):
-// CHECK:                   %[[VAL_40:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_38]]] : memref<?xindex>
+// CHECK:                   %[[VAL_40:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_38]]] : memref<?xindex>
 // CHECK:                   %[[VAL_41:.*]] = cmpi eq, %[[VAL_40]], %[[VAL_39]] : index
 // CHECK:                   scf.if %[[VAL_41]] {
-// CHECK:                     %[[VAL_42:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_38]]] : memref<?xf32>
-// CHECK:                     %[[VAL_43:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_25]], %[[VAL_28]], %[[VAL_39]]] : memref<32x16x8xf32>
+// CHECK:                     %[[VAL_42:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_38]]] : memref<?xf32>
+// CHECK:                     %[[VAL_43:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_25]], %[[VAL_28]], %[[VAL_39]]] : memref<32x16x8xf32>
 // CHECK:                     %[[VAL_44:.*]] = addf %[[VAL_42]], %[[VAL_43]] : f32
 // CHECK:                     store %[[VAL_44]], %[[VAL_17]]{{\[}}%[[VAL_25]], %[[VAL_28]], %[[VAL_39]]] : memref<32x16x8xf32>
 // CHECK:                   } else {
 // CHECK:                     scf.if %[[VAL_7]] {
-// CHECK:                       %[[VAL_45:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_25]], %[[VAL_28]], %[[VAL_39]]] : memref<32x16x8xf32>
+// CHECK:                       %[[VAL_45:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_25]], %[[VAL_28]], %[[VAL_39]]] : memref<32x16x8xf32>
 // CHECK:                       store %[[VAL_45]], %[[VAL_17]]{{\[}}%[[VAL_25]], %[[VAL_28]], %[[VAL_39]]] : memref<32x16x8xf32>
 // CHECK:                     } else {
 // CHECK:                     }
@@ -736,7 +736,7 @@
 // CHECK:                   scf.yield %[[VAL_48]], %[[VAL_49]] : index, index
 // CHECK:                 }
 // CHECK:                 scf.for %[[VAL_50:.*]] = %[[VAL_51:.*]]#1 to %[[VAL_6]] step %[[VAL_9]] {
-// CHECK:                   %[[VAL_52:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_25]], %[[VAL_28]], %[[VAL_50]]] : memref<32x16x8xf32>
+// CHECK:                   %[[VAL_52:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_25]], %[[VAL_28]], %[[VAL_50]]] : memref<32x16x8xf32>
 // CHECK:                   store %[[VAL_52]], %[[VAL_17]]{{\[}}%[[VAL_25]], %[[VAL_28]], %[[VAL_50]]] : memref<32x16x8xf32>
 // CHECK:                 }
 // CHECK:               }
@@ -744,7 +744,7 @@
 // CHECK:               scf.if %[[VAL_7]] {
 // CHECK:                 scf.for %[[VAL_53:.*]] = %[[VAL_8]] to %[[VAL_5]] step %[[VAL_9]] {
 // CHECK:                   scf.for %[[VAL_54:.*]] = %[[VAL_8]] to %[[VAL_6]] step %[[VAL_9]] {
-// CHECK:                     %[[VAL_55:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_25]], %[[VAL_53]], %[[VAL_54]]] : memref<32x16x8xf32>
+// CHECK:                     %[[VAL_55:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_25]], %[[VAL_53]], %[[VAL_54]]] : memref<32x16x8xf32>
 // CHECK:                     store %[[VAL_55]], %[[VAL_17]]{{\[}}%[[VAL_25]], %[[VAL_53]], %[[VAL_54]]] : memref<32x16x8xf32>
 // CHECK:                   }
 // CHECK:                 }
@@ -760,12 +760,12 @@
 // CHECK:           scf.for %[[VAL_60:.*]] = %[[VAL_61:.*]]#1 to %[[VAL_4]] step %[[VAL_9]] {
 // CHECK:             scf.for %[[VAL_62:.*]] = %[[VAL_8]] to %[[VAL_5]] step %[[VAL_9]] {
 // CHECK:               scf.for %[[VAL_63:.*]] = %[[VAL_8]] to %[[VAL_6]] step %[[VAL_9]] {
-// CHECK:                 %[[VAL_64:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_60]], %[[VAL_62]], %[[VAL_63]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_64:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_60]], %[[VAL_62]], %[[VAL_63]]] : memref<32x16x8xf32>
 // CHECK:                 store %[[VAL_64]], %[[VAL_17]]{{\[}}%[[VAL_60]], %[[VAL_62]], %[[VAL_63]]] : memref<32x16x8xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_65:.*]] = tensor_load %[[VAL_17]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_65:.*]] = memref.tensor_load %[[VAL_17]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_65]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @add_sds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@@ -792,30 +792,30 @@
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16x8xf32> to memref<?xf32>
-// CHECK:           %[[VAL_12:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_13:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_14:.*]] = alloc() : memref<32x16x8xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_14:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           linalg.copy(%[[VAL_13]], %[[VAL_14]]) : memref<32x16x8xf32>, memref<32x16x8xf32>
-// CHECK:           %[[VAL_15:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
-// CHECK:           %[[VAL_16:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_17:.*]] = %[[VAL_15]] to %[[VAL_16]] step %[[VAL_6]] {
-// CHECK:             %[[VAL_18:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_17]]] : memref<?xindex>
+// CHECK:             %[[VAL_18:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_17]]] : memref<?xindex>
 // CHECK:             scf.for %[[VAL_19:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
 // CHECK:               %[[VAL_20:.*]] = muli %[[VAL_17]], %[[VAL_4]] : index
 // CHECK:               %[[VAL_21:.*]] = addi %[[VAL_20]], %[[VAL_19]] : index
-// CHECK:               %[[VAL_22:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_21]]] : memref<?xindex>
+// CHECK:               %[[VAL_22:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_21]]] : memref<?xindex>
 // CHECK:               %[[VAL_23:.*]] = addi %[[VAL_21]], %[[VAL_6]] : index
-// CHECK:               %[[VAL_24:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK:               %[[VAL_24:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_23]]] : memref<?xindex>
 // CHECK:               scf.for %[[VAL_25:.*]] = %[[VAL_22]] to %[[VAL_24]] step %[[VAL_6]] {
-// CHECK:                 %[[VAL_26:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_25]]] : memref<?xindex>
-// CHECK:                 %[[VAL_27:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_25]]] : memref<?xf32>
-// CHECK:                 %[[VAL_28:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_18]], %[[VAL_19]], %[[VAL_26]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_26:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_25]]] : memref<?xindex>
+// CHECK:                 %[[VAL_27:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_25]]] : memref<?xf32>
+// CHECK:                 %[[VAL_28:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_18]], %[[VAL_19]], %[[VAL_26]]] : memref<32x16x8xf32>
 // CHECK:                 %[[VAL_29:.*]] = mulf %[[VAL_27]], %[[VAL_28]] : f32
 // CHECK:                 store %[[VAL_29]], %[[VAL_14]]{{\[}}%[[VAL_18]], %[[VAL_19]], %[[VAL_26]]] : memref<32x16x8xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_30:.*]] = tensor_load %[[VAL_14]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_30:.*]] = memref.tensor_load %[[VAL_14]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_30]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @mul_sds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@@ -859,43 +859,43 @@
 // CHECK:           %[[VAL_11:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_8]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_12:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_8]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16x8xf32> to memref<?xf32>
-// CHECK:           %[[VAL_14:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_15:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_16:.*]] = alloc() : memref<32x16x8xf32>
+// CHECK:           %[[VAL_14:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_16:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           linalg.copy(%[[VAL_15]], %[[VAL_16]]) : memref<32x16x8xf32>, memref<32x16x8xf32>
-// CHECK:           %[[VAL_17:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
-// CHECK:           %[[VAL_18:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_19:.*]]:2 = scf.while (%[[VAL_20:.*]] = %[[VAL_17]], %[[VAL_21:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_22:.*]] = cmpi ult, %[[VAL_20]], %[[VAL_18]] : index
 // CHECK:             scf.condition(%[[VAL_22]]) %[[VAL_20]], %[[VAL_21]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_23:.*]]: index, %[[VAL_24:.*]]: index):
-// CHECK:             %[[VAL_25:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK:             %[[VAL_25:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_23]]] : memref<?xindex>
 // CHECK:             %[[VAL_26:.*]] = cmpi eq, %[[VAL_25]], %[[VAL_24]] : index
 // CHECK:             scf.if %[[VAL_26]] {
-// CHECK:               %[[VAL_27:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK:               %[[VAL_27:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_23]]] : memref<?xindex>
 // CHECK:               %[[VAL_28:.*]] = addi %[[VAL_23]], %[[VAL_8]] : index
-// CHECK:               %[[VAL_29:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK:               %[[VAL_29:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<?xindex>
 // CHECK:               %[[VAL_30:.*]]:2 = scf.while (%[[VAL_31:.*]] = %[[VAL_27]], %[[VAL_32:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
 // CHECK:                 %[[VAL_33:.*]] = cmpi ult, %[[VAL_31]], %[[VAL_29]] : index
 // CHECK:                 scf.condition(%[[VAL_33]]) %[[VAL_31]], %[[VAL_32]] : index, index
 // CHECK:               } do {
 // CHECK:               ^bb0(%[[VAL_34:.*]]: index, %[[VAL_35:.*]]: index):
-// CHECK:                 %[[VAL_36:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_34]]] : memref<?xindex>
+// CHECK:                 %[[VAL_36:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_34]]] : memref<?xindex>
 // CHECK:                 %[[VAL_37:.*]] = cmpi eq, %[[VAL_36]], %[[VAL_35]] : index
 // CHECK:                 scf.if %[[VAL_37]] {
 // CHECK:                   scf.for %[[VAL_38:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
 // CHECK:                     %[[VAL_39:.*]] = muli %[[VAL_34]], %[[VAL_5]] : index
 // CHECK:                     %[[VAL_40:.*]] = addi %[[VAL_39]], %[[VAL_38]] : index
-// CHECK:                     %[[VAL_41:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_40]]] : memref<?xf32>
-// CHECK:                     %[[VAL_42:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_24]], %[[VAL_35]], %[[VAL_38]]] : memref<32x16x8xf32>
+// CHECK:                     %[[VAL_41:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_40]]] : memref<?xf32>
+// CHECK:                     %[[VAL_42:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_24]], %[[VAL_35]], %[[VAL_38]]] : memref<32x16x8xf32>
 // CHECK:                     %[[VAL_43:.*]] = addf %[[VAL_41]], %[[VAL_42]] : f32
 // CHECK:                     store %[[VAL_43]], %[[VAL_16]]{{\[}}%[[VAL_24]], %[[VAL_35]], %[[VAL_38]]] : memref<32x16x8xf32>
 // CHECK:                   }
 // CHECK:                 } else {
 // CHECK:                   scf.if %[[VAL_6]] {
 // CHECK:                     scf.for %[[VAL_44:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
-// CHECK:                       %[[VAL_45:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_24]], %[[VAL_35]], %[[VAL_44]]] : memref<32x16x8xf32>
+// CHECK:                       %[[VAL_45:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_24]], %[[VAL_35]], %[[VAL_44]]] : memref<32x16x8xf32>
 // CHECK:                       store %[[VAL_45]], %[[VAL_16]]{{\[}}%[[VAL_24]], %[[VAL_35]], %[[VAL_44]]] : memref<32x16x8xf32>
 // CHECK:                     }
 // CHECK:                   } else {
@@ -909,7 +909,7 @@
 // CHECK:               }
 // CHECK:               scf.for %[[VAL_50:.*]] = %[[VAL_51:.*]]#1 to %[[VAL_4]] step %[[VAL_8]] {
 // CHECK:                 scf.for %[[VAL_52:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
-// CHECK:                   %[[VAL_53:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_24]], %[[VAL_50]], %[[VAL_52]]] : memref<32x16x8xf32>
+// CHECK:                   %[[VAL_53:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_24]], %[[VAL_50]], %[[VAL_52]]] : memref<32x16x8xf32>
 // CHECK:                   store %[[VAL_53]], %[[VAL_16]]{{\[}}%[[VAL_24]], %[[VAL_50]], %[[VAL_52]]] : memref<32x16x8xf32>
 // CHECK:                 }
 // CHECK:               }
@@ -917,7 +917,7 @@
 // CHECK:               scf.if %[[VAL_6]] {
 // CHECK:                 scf.for %[[VAL_54:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
 // CHECK:                   scf.for %[[VAL_55:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
-// CHECK:                     %[[VAL_56:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_24]], %[[VAL_54]], %[[VAL_55]]] : memref<32x16x8xf32>
+// CHECK:                     %[[VAL_56:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_24]], %[[VAL_54]], %[[VAL_55]]] : memref<32x16x8xf32>
 // CHECK:                     store %[[VAL_56]], %[[VAL_16]]{{\[}}%[[VAL_24]], %[[VAL_54]], %[[VAL_55]]] : memref<32x16x8xf32>
 // CHECK:                   }
 // CHECK:                 }
@@ -933,12 +933,12 @@
 // CHECK:           scf.for %[[VAL_61:.*]] = %[[VAL_62:.*]]#1 to %[[VAL_3]] step %[[VAL_8]] {
 // CHECK:             scf.for %[[VAL_63:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
 // CHECK:               scf.for %[[VAL_64:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
-// CHECK:                 %[[VAL_65:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_61]], %[[VAL_63]], %[[VAL_64]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_65:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_61]], %[[VAL_63]], %[[VAL_64]]] : memref<32x16x8xf32>
 // CHECK:                 store %[[VAL_65]], %[[VAL_16]]{{\[}}%[[VAL_61]], %[[VAL_63]], %[[VAL_64]]] : memref<32x16x8xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_66:.*]] = tensor_load %[[VAL_16]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_66:.*]] = memref.tensor_load %[[VAL_16]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_66]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @add_ssd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@@ -964,30 +964,30 @@
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_5]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_5]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16x8xf32> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_12:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_13:.*]] = alloc() : memref<32x16x8xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           linalg.copy(%[[VAL_12]], %[[VAL_13]]) : memref<32x16x8xf32>, memref<32x16x8xf32>
-// CHECK:           %[[VAL_14:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_15:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_16:.*]] = %[[VAL_14]] to %[[VAL_15]] step %[[VAL_5]] {
-// CHECK:             %[[VAL_17:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_16]]] : memref<?xindex>
-// CHECK:             %[[VAL_18:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_16]]] : memref<?xindex>
+// CHECK:             %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_16]]] : memref<?xindex>
+// CHECK:             %[[VAL_18:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_16]]] : memref<?xindex>
 // CHECK:             %[[VAL_19:.*]] = addi %[[VAL_16]], %[[VAL_5]] : index
-// CHECK:             %[[VAL_20:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_19]]] : memref<?xindex>
+// CHECK:             %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_19]]] : memref<?xindex>
 // CHECK:             scf.for %[[VAL_21:.*]] = %[[VAL_18]] to %[[VAL_20]] step %[[VAL_5]] {
-// CHECK:               %[[VAL_22:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_21]]] : memref<?xindex>
+// CHECK:               %[[VAL_22:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_21]]] : memref<?xindex>
 // CHECK:               scf.for %[[VAL_23:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:                 %[[VAL_24:.*]] = muli %[[VAL_21]], %[[VAL_3]] : index
 // CHECK:                 %[[VAL_25:.*]] = addi %[[VAL_24]], %[[VAL_23]] : index
-// CHECK:                 %[[VAL_26:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_25]]] : memref<?xf32>
-// CHECK:                 %[[VAL_27:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_17]], %[[VAL_22]], %[[VAL_23]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_26:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_25]]] : memref<?xf32>
+// CHECK:                 %[[VAL_27:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_17]], %[[VAL_22]], %[[VAL_23]]] : memref<32x16x8xf32>
 // CHECK:                 %[[VAL_28:.*]] = mulf %[[VAL_26]], %[[VAL_27]] : f32
 // CHECK:                 store %[[VAL_28]], %[[VAL_13]]{{\[}}%[[VAL_17]], %[[VAL_22]], %[[VAL_23]]] : memref<32x16x8xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_29:.*]] = tensor_load %[[VAL_13]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_29:.*]] = memref.tensor_load %[[VAL_13]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_29]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @mul_ssd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@@ -1034,49 +1034,49 @@
 // CHECK:           %[[VAL_14:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_15:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_16:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16x8xf32> to memref<?xf32>
-// CHECK:           %[[VAL_17:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_18:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_19:.*]] = alloc() : memref<32x16x8xf32>
+// CHECK:           %[[VAL_17:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_18:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_19:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           linalg.copy(%[[VAL_18]], %[[VAL_19]]) : memref<32x16x8xf32>, memref<32x16x8xf32>
-// CHECK:           %[[VAL_20:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref<?xindex>
-// CHECK:           %[[VAL_21:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_9]]] : memref<?xindex>
+// CHECK:           %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref<?xindex>
+// CHECK:           %[[VAL_21:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_9]]] : memref<?xindex>
 // CHECK:           %[[VAL_22:.*]]:2 = scf.while (%[[VAL_23:.*]] = %[[VAL_20]], %[[VAL_24:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_25:.*]] = cmpi ult, %[[VAL_23]], %[[VAL_21]] : index
 // CHECK:             scf.condition(%[[VAL_25]]) %[[VAL_23]], %[[VAL_24]] : index, index
 // CHECK:           } do {
 // CHECK:           ^bb0(%[[VAL_26:.*]]: index, %[[VAL_27:.*]]: index):
-// CHECK:             %[[VAL_28:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_26]]] : memref<?xindex>
+// CHECK:             %[[VAL_28:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_26]]] : memref<?xindex>
 // CHECK:             %[[VAL_29:.*]] = cmpi eq, %[[VAL_28]], %[[VAL_27]] : index
 // CHECK:             scf.if %[[VAL_29]] {
-// CHECK:               %[[VAL_30:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_26]]] : memref<?xindex>
+// CHECK:               %[[VAL_30:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_26]]] : memref<?xindex>
 // CHECK:               %[[VAL_31:.*]] = addi %[[VAL_26]], %[[VAL_9]] : index
-// CHECK:               %[[VAL_32:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_31]]] : memref<?xindex>
+// CHECK:               %[[VAL_32:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_31]]] : memref<?xindex>
 // CHECK:               %[[VAL_33:.*]]:2 = scf.while (%[[VAL_34:.*]] = %[[VAL_30]], %[[VAL_35:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) {
 // CHECK:                 %[[VAL_36:.*]] = cmpi ult, %[[VAL_34]], %[[VAL_32]] : index
 // CHECK:                 scf.condition(%[[VAL_36]]) %[[VAL_34]], %[[VAL_35]] : index, index
 // CHECK:               } do {
 // CHECK:               ^bb0(%[[VAL_37:.*]]: index, %[[VAL_38:.*]]: index):
-// CHECK:                 %[[VAL_39:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_37]]] : memref<?xindex>
+// CHECK:                 %[[VAL_39:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_37]]] : memref<?xindex>
 // CHECK:                 %[[VAL_40:.*]] = cmpi eq, %[[VAL_39]], %[[VAL_38]] : index
 // CHECK:                 scf.if %[[VAL_40]] {
-// CHECK:                   %[[VAL_41:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_37]]] : memref<?xindex>
+// CHECK:                   %[[VAL_41:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_37]]] : memref<?xindex>
 // CHECK:                   %[[VAL_42:.*]] = addi %[[VAL_37]], %[[VAL_9]] : index
-// CHECK:                   %[[VAL_43:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_42]]] : memref<?xindex>
+// CHECK:                   %[[VAL_43:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_42]]] : memref<?xindex>
 // CHECK:                   %[[VAL_44:.*]]:2 = scf.while (%[[VAL_45:.*]] = %[[VAL_41]], %[[VAL_46:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) {
 // CHECK:                     %[[VAL_47:.*]] = cmpi ult, %[[VAL_45]], %[[VAL_43]] : index
 // CHECK:                     scf.condition(%[[VAL_47]]) %[[VAL_45]], %[[VAL_46]] : index, index
 // CHECK:                   } do {
 // CHECK:                   ^bb0(%[[VAL_48:.*]]: index, %[[VAL_49:.*]]: index):
-// CHECK:                     %[[VAL_50:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_48]]] : memref<?xindex>
+// CHECK:                     %[[VAL_50:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_48]]] : memref<?xindex>
 // CHECK:                     %[[VAL_51:.*]] = cmpi eq, %[[VAL_50]], %[[VAL_49]] : index
 // CHECK:                     scf.if %[[VAL_51]] {
-// CHECK:                       %[[VAL_52:.*]] = load %[[VAL_16]]{{\[}}%[[VAL_48]]] : memref<?xf32>
-// CHECK:                       %[[VAL_53:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_27]], %[[VAL_38]], %[[VAL_49]]] : memref<32x16x8xf32>
+// CHECK:                       %[[VAL_52:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_48]]] : memref<?xf32>
+// CHECK:                       %[[VAL_53:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_27]], %[[VAL_38]], %[[VAL_49]]] : memref<32x16x8xf32>
 // CHECK:                       %[[VAL_54:.*]] = addf %[[VAL_52]], %[[VAL_53]] : f32
 // CHECK:                       store %[[VAL_54]], %[[VAL_19]]{{\[}}%[[VAL_27]], %[[VAL_38]], %[[VAL_49]]] : memref<32x16x8xf32>
 // CHECK:                     } else {
 // CHECK:                       scf.if %[[VAL_7]] {
-// CHECK:                         %[[VAL_55:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_27]], %[[VAL_38]], %[[VAL_49]]] : memref<32x16x8xf32>
+// CHECK:                         %[[VAL_55:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_27]], %[[VAL_38]], %[[VAL_49]]] : memref<32x16x8xf32>
 // CHECK:                         store %[[VAL_55]], %[[VAL_19]]{{\[}}%[[VAL_27]], %[[VAL_38]], %[[VAL_49]]] : memref<32x16x8xf32>
 // CHECK:                       } else {
 // CHECK:                       }
@@ -1088,13 +1088,13 @@
 // CHECK:                     scf.yield %[[VAL_58]], %[[VAL_59]] : index, index
 // CHECK:                   }
 // CHECK:                   scf.for %[[VAL_60:.*]] = %[[VAL_61:.*]]#1 to %[[VAL_6]] step %[[VAL_9]] {
-// CHECK:                     %[[VAL_62:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_27]], %[[VAL_38]], %[[VAL_60]]] : memref<32x16x8xf32>
+// CHECK:                     %[[VAL_62:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_27]], %[[VAL_38]], %[[VAL_60]]] : memref<32x16x8xf32>
 // CHECK:                     store %[[VAL_62]], %[[VAL_19]]{{\[}}%[[VAL_27]], %[[VAL_38]], %[[VAL_60]]] : memref<32x16x8xf32>
 // CHECK:                   }
 // CHECK:                 } else {
 // CHECK:                   scf.if %[[VAL_7]] {
 // CHECK:                     scf.for %[[VAL_63:.*]] = %[[VAL_8]] to %[[VAL_6]] step %[[VAL_9]] {
-// CHECK:                       %[[VAL_64:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_27]], %[[VAL_38]], %[[VAL_63]]] : memref<32x16x8xf32>
+// CHECK:                       %[[VAL_64:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_27]], %[[VAL_38]], %[[VAL_63]]] : memref<32x16x8xf32>
 // CHECK:                       store %[[VAL_64]], %[[VAL_19]]{{\[}}%[[VAL_27]], %[[VAL_38]], %[[VAL_63]]] : memref<32x16x8xf32>
 // CHECK:                     }
 // CHECK:                   } else {
@@ -1108,7 +1108,7 @@
 // CHECK:               }
 // CHECK:               scf.for %[[VAL_69:.*]] = %[[VAL_70:.*]]#1 to %[[VAL_5]] step %[[VAL_9]] {
 // CHECK:                 scf.for %[[VAL_71:.*]] = %[[VAL_8]] to %[[VAL_6]] step %[[VAL_9]] {
-// CHECK:                   %[[VAL_72:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_27]], %[[VAL_69]], %[[VAL_71]]] : memref<32x16x8xf32>
+// CHECK:                   %[[VAL_72:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_27]], %[[VAL_69]], %[[VAL_71]]] : memref<32x16x8xf32>
 // CHECK:                   store %[[VAL_72]], %[[VAL_19]]{{\[}}%[[VAL_27]], %[[VAL_69]], %[[VAL_71]]] : memref<32x16x8xf32>
 // CHECK:                 }
 // CHECK:               }
@@ -1116,7 +1116,7 @@
 // CHECK:               scf.if %[[VAL_7]] {
 // CHECK:                 scf.for %[[VAL_73:.*]] = %[[VAL_8]] to %[[VAL_5]] step %[[VAL_9]] {
 // CHECK:                   scf.for %[[VAL_74:.*]] = %[[VAL_8]] to %[[VAL_6]] step %[[VAL_9]] {
-// CHECK:                     %[[VAL_75:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_27]], %[[VAL_73]], %[[VAL_74]]] : memref<32x16x8xf32>
+// CHECK:                     %[[VAL_75:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_27]], %[[VAL_73]], %[[VAL_74]]] : memref<32x16x8xf32>
 // CHECK:                     store %[[VAL_75]], %[[VAL_19]]{{\[}}%[[VAL_27]], %[[VAL_73]], %[[VAL_74]]] : memref<32x16x8xf32>
 // CHECK:                   }
 // CHECK:                 }
@@ -1132,12 +1132,12 @@
 // CHECK:           scf.for %[[VAL_80:.*]] = %[[VAL_81:.*]]#1 to %[[VAL_4]] step %[[VAL_9]] {
 // CHECK:             scf.for %[[VAL_82:.*]] = %[[VAL_8]] to %[[VAL_5]] step %[[VAL_9]] {
 // CHECK:               scf.for %[[VAL_83:.*]] = %[[VAL_8]] to %[[VAL_6]] step %[[VAL_9]] {
-// CHECK:                 %[[VAL_84:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_80]], %[[VAL_82]], %[[VAL_83]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_84:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_80]], %[[VAL_82]], %[[VAL_83]]] : memref<32x16x8xf32>
 // CHECK:                 store %[[VAL_84]], %[[VAL_19]]{{\[}}%[[VAL_80]], %[[VAL_82]], %[[VAL_83]]] : memref<32x16x8xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_85:.*]] = tensor_load %[[VAL_19]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_85:.*]] = memref.tensor_load %[[VAL_19]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_85]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @add_sss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@@ -1165,32 +1165,32 @@
 // CHECK:           %[[VAL_10:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = linalg.sparse_indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32> to memref<?xindex>
 // CHECK:           %[[VAL_12:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<32x16x8xf32> to memref<?xf32>
-// CHECK:           %[[VAL_13:.*]] = tensor_to_memref %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_14:.*]] = tensor_to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_15:.*]] = alloc() : memref<32x16x8xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_14:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_15:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           linalg.copy(%[[VAL_14]], %[[VAL_15]]) : memref<32x16x8xf32>, memref<32x16x8xf32>
-// CHECK:           %[[VAL_16:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:           %[[VAL_17:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_18:.*]] = %[[VAL_16]] to %[[VAL_17]] step %[[VAL_5]] {
-// CHECK:             %[[VAL_19:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_18]]] : memref<?xindex>
-// CHECK:             %[[VAL_20:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref<?xindex>
+// CHECK:             %[[VAL_19:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_18]]] : memref<?xindex>
+// CHECK:             %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref<?xindex>
 // CHECK:             %[[VAL_21:.*]] = addi %[[VAL_18]], %[[VAL_5]] : index
-// CHECK:             %[[VAL_22:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_21]]] : memref<?xindex>
+// CHECK:             %[[VAL_22:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_21]]] : memref<?xindex>
 // CHECK:             scf.for %[[VAL_23:.*]] = %[[VAL_20]] to %[[VAL_22]] step %[[VAL_5]] {
-// CHECK:               %[[VAL_24:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_23]]] : memref<?xindex>
-// CHECK:               %[[VAL_25:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK:               %[[VAL_24:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK:               %[[VAL_25:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_23]]] : memref<?xindex>
 // CHECK:               %[[VAL_26:.*]] = addi %[[VAL_23]], %[[VAL_5]] : index
-// CHECK:               %[[VAL_27:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_26]]] : memref<?xindex>
+// CHECK:               %[[VAL_27:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_26]]] : memref<?xindex>
 // CHECK:               scf.for %[[VAL_28:.*]] = %[[VAL_25]] to %[[VAL_27]] step %[[VAL_5]] {
-// CHECK:                 %[[VAL_29:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<?xindex>
-// CHECK:                 %[[VAL_30:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xf32>
-// CHECK:                 %[[VAL_31:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_19]], %[[VAL_24]], %[[VAL_29]]] : memref<32x16x8xf32>
+// CHECK:                 %[[VAL_29:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK:                 %[[VAL_30:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xf32>
+// CHECK:                 %[[VAL_31:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_19]], %[[VAL_24]], %[[VAL_29]]] : memref<32x16x8xf32>
 // CHECK:                 %[[VAL_32:.*]] = mulf %[[VAL_30]], %[[VAL_31]] : f32
 // CHECK:                 store %[[VAL_32]], %[[VAL_15]]{{\[}}%[[VAL_19]], %[[VAL_24]], %[[VAL_29]]] : memref<32x16x8xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_33:.*]] = tensor_load %[[VAL_15]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_33:.*]] = memref.tensor_load %[[VAL_15]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_33]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @mul_sss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@@ -1232,37 +1232,37 @@
 // CHECK:           %[[VAL_7:.*]] = linalg.sparse_pointers %[[VAL_1]], %[[VAL_4]] : tensor<?x?x?xf32> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_indices %[[VAL_1]], %[[VAL_4]] : tensor<?x?x?xf32> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = linalg.sparse_values %[[VAL_1]] : tensor<?x?x?xf32> to memref<?xf32>
-// CHECK:           %[[VAL_10:.*]] = dim %[[VAL_2]], %[[VAL_5]] : tensor<?x?xf32>
-// CHECK:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_2]] : memref<?x?xf32>
-// CHECK:           %[[VAL_12:.*]] = tensor_to_memref %[[VAL_3]] : memref<?x?xf32>
-// CHECK:           %[[VAL_13:.*]] = dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?xf32>
-// CHECK:           %[[VAL_14:.*]] = dim %[[VAL_0]], %[[VAL_6]] : tensor<?x?xf32>
-// CHECK:           %[[VAL_15:.*]] = tensor_to_memref %[[VAL_0]] : memref<?x?xf32>
-// CHECK:           %[[VAL_16:.*]] = alloc(%[[VAL_13]], %[[VAL_14]]) : memref<?x?xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.dim %[[VAL_2]], %[[VAL_5]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<?x?xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_3]] : memref<?x?xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_14:.*]] = memref.dim %[[VAL_0]], %[[VAL_6]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_0]] : memref<?x?xf32>
+// CHECK:           %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]], %[[VAL_14]]) : memref<?x?xf32>
 // CHECK:           linalg.copy(%[[VAL_15]], %[[VAL_16]]) : memref<?x?xf32>, memref<?x?xf32>
 // CHECK:           scf.for %[[VAL_17:.*]] = %[[VAL_5]] to %[[VAL_13]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_18:.*]] = %[[VAL_5]] to %[[VAL_10]] step %[[VAL_6]] {
 // CHECK:               %[[VAL_19:.*]] = muli %[[VAL_10]], %[[VAL_17]] : index
 // CHECK:               %[[VAL_20:.*]] = addi %[[VAL_19]], %[[VAL_18]] : index
-// CHECK:               %[[VAL_21:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_20]]] : memref<?xindex>
+// CHECK:               %[[VAL_21:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_20]]] : memref<?xindex>
 // CHECK:               %[[VAL_22:.*]] = addi %[[VAL_20]], %[[VAL_6]] : index
-// CHECK:               %[[VAL_23:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_22]]] : memref<?xindex>
+// CHECK:               %[[VAL_23:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_22]]] : memref<?xindex>
 // CHECK:               scf.for %[[VAL_24:.*]] = %[[VAL_21]] to %[[VAL_23]] step %[[VAL_6]] {
-// CHECK:                 %[[VAL_25:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref<?xindex>
-// CHECK:                 %[[VAL_26:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_24]]] : memref<?xf32>
+// CHECK:                 %[[VAL_25:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref<?xindex>
+// CHECK:                 %[[VAL_26:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_24]]] : memref<?xf32>
 // CHECK:                 scf.for %[[VAL_27:.*]] = %[[VAL_5]] to %[[VAL_14]] step %[[VAL_6]] {
-// CHECK:                   %[[VAL_28:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_18]], %[[VAL_27]]] : memref<?x?xf32>
+// CHECK:                   %[[VAL_28:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_18]], %[[VAL_27]]] : memref<?x?xf32>
 // CHECK:                   %[[VAL_29:.*]] = mulf %[[VAL_26]], %[[VAL_28]] : f32
-// CHECK:                   %[[VAL_30:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_25]], %[[VAL_27]]] : memref<?x?xf32>
+// CHECK:                   %[[VAL_30:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_25]], %[[VAL_27]]] : memref<?x?xf32>
 // CHECK:                   %[[VAL_31:.*]] = mulf %[[VAL_29]], %[[VAL_30]] : f32
-// CHECK:                   %[[VAL_32:.*]] = load %[[VAL_16]]{{\[}}%[[VAL_17]], %[[VAL_27]]] : memref<?x?xf32>
+// CHECK:                   %[[VAL_32:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_17]], %[[VAL_27]]] : memref<?x?xf32>
 // CHECK:                   %[[VAL_33:.*]] = addf %[[VAL_31]], %[[VAL_32]] : f32
 // CHECK:                   store %[[VAL_33]], %[[VAL_16]]{{\[}}%[[VAL_17]], %[[VAL_27]]] : memref<?x?xf32>
 // CHECK:                 }
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_34:.*]] = tensor_load %[[VAL_16]] : memref<?x?xf32>
+// CHECK:           %[[VAL_34:.*]] = memref.tensor_load %[[VAL_16]] : memref<?x?xf32>
 // CHECK:           return %[[VAL_34]] : tensor<?x?xf32>
 // CHECK:         }
 func @kernel_3d(%arga: tensor<?x?xf32>,
@@ -1304,29 +1304,29 @@
 // CHECK:           %[[VAL_6:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_4]] : tensor<10x20x30xf32> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = linalg.sparse_pointers %[[VAL_0]], %[[VAL_2]] : tensor<10x20x30xf32> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = linalg.sparse_values %[[VAL_0]] : tensor<10x20x30xf32> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = tensor_to_memref %[[VAL_1]] : memref<f32>
-// CHECK:           %[[VAL_10:.*]] = alloc() : memref<f32>
+// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<f32>
+// CHECK:           %[[VAL_10:.*]] = memref.alloc() : memref<f32>
 // CHECK:           linalg.copy(%[[VAL_9]], %[[VAL_10]]) : memref<f32>, memref<f32>
-// CHECK:           %[[VAL_11:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] {
-// CHECK:             %[[VAL_14:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref<?xindex>
+// CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref<?xindex>
 // CHECK:             %[[VAL_15:.*]] = addi %[[VAL_13]], %[[VAL_4]] : index
-// CHECK:             %[[VAL_16:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_15]]] : memref<?xindex>
+// CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_15]]] : memref<?xindex>
 // CHECK:             scf.for %[[VAL_17:.*]] = %[[VAL_14]] to %[[VAL_16]] step %[[VAL_4]] {
-// CHECK:               %[[VAL_18:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_17]]] : memref<?xindex>
+// CHECK:               %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_17]]] : memref<?xindex>
 // CHECK:               %[[VAL_19:.*]] = addi %[[VAL_17]], %[[VAL_4]] : index
-// CHECK:               %[[VAL_20:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_19]]] : memref<?xindex>
-// CHECK:               %[[VAL_21:.*]] = load %[[VAL_10]][] : memref<f32>
+// CHECK:               %[[VAL_20:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_19]]] : memref<?xindex>
+// CHECK:               %[[VAL_21:.*]] = memref.load %[[VAL_10]][] : memref<f32>
 // CHECK:               %[[VAL_22:.*]] = scf.for %[[VAL_23:.*]] = %[[VAL_18]] to %[[VAL_20]] step %[[VAL_4]] iter_args(%[[VAL_24:.*]] = %[[VAL_21]]) -> (f32) {
-// CHECK:                 %[[VAL_25:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_23]]] : memref<?xf32>
+// CHECK:                 %[[VAL_25:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_23]]] : memref<?xf32>
 // CHECK:                 %[[VAL_26:.*]] = addf %[[VAL_24]], %[[VAL_25]] : f32
 // CHECK:                 scf.yield %[[VAL_26]] : f32
 // CHECK:               }
 // CHECK:               store %[[VAL_27:.*]], %[[VAL_10]][] : memref<f32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_28:.*]] = tensor_load %[[VAL_10]] : memref<f32>
+// CHECK:           %[[VAL_28:.*]] = memref.tensor_load %[[VAL_10]] : memref<f32>
 // CHECK:           return %[[VAL_28]] : tensor<f32>
 // CHECK:         }
 func @sum_reduction(%arga: tensor<10x20x30xf32>, %argx: tensor<f32>) -> tensor<f32> {
@@ -1362,20 +1362,20 @@
 // CHECK:           %[[VAL_3:.*]] = constant 2 : index
 // CHECK:           %[[VAL_4:.*]] = constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = constant 1 : index
-// CHECK:           %[[VAL_6:.*]] = dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?x?xf32>
-// CHECK:           %[[VAL_7:.*]] = dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xf32>
-// CHECK:           %[[VAL_8:.*]] = tensor_to_memref %[[VAL_0]] : memref<?x?x?xf32>
-// CHECK:           %[[VAL_9:.*]] = dim %[[VAL_1]], %[[VAL_4]] : tensor<?xf32>
-// CHECK:           %[[VAL_10:.*]] = tensor_to_memref %[[VAL_1]] : memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_2]] : memref<f32>
-// CHECK:           %[[VAL_12:.*]] = alloc() : memref<f32>
+// CHECK:           %[[VAL_6:.*]] = memref.dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?x?xf32>
+// CHECK:           %[[VAL_7:.*]] = memref.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xf32>
+// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_0]] : memref<?x?x?xf32>
+// CHECK:           %[[VAL_9:.*]] = memref.dim %[[VAL_1]], %[[VAL_4]] : tensor<?xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<?xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<f32>
+// CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<f32>
 // CHECK:           linalg.copy(%[[VAL_11]], %[[VAL_12]]) : memref<f32>, memref<f32>
 // CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_9]] step %[[VAL_5]] {
-// CHECK:             %[[VAL_14:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_13]]] : memref<?xf32>
+// CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_13]]] : memref<?xf32>
 // CHECK:             scf.for %[[VAL_15:.*]] = %[[VAL_4]] to %[[VAL_6]] step %[[VAL_5]] {
-// CHECK:               %[[VAL_16:.*]] = load %[[VAL_12]][] : memref<f32>
+// CHECK:               %[[VAL_16:.*]] = memref.load %[[VAL_12]][] : memref<f32>
 // CHECK:               %[[VAL_17:.*]] = scf.for %[[VAL_18:.*]] = %[[VAL_4]] to %[[VAL_7]] step %[[VAL_5]] iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f32) {
-// CHECK:                 %[[VAL_20:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_13]], %[[VAL_15]], %[[VAL_18]]] : memref<?x?x?xf32>
+// CHECK:                 %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_13]], %[[VAL_15]], %[[VAL_18]]] : memref<?x?x?xf32>
 // CHECK:                 %[[VAL_21:.*]] = mulf %[[VAL_20]], %[[VAL_14]] : f32
 // CHECK:                 %[[VAL_22:.*]] = addf %[[VAL_19]], %[[VAL_21]] : f32
 // CHECK:                 scf.yield %[[VAL_22]] : f32
@@ -1383,7 +1383,7 @@
 // CHECK:               store %[[VAL_23:.*]], %[[VAL_12]][] : memref<f32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_24:.*]] = tensor_load %[[VAL_12]] : memref<f32>
+// CHECK:           %[[VAL_24:.*]] = memref.tensor_load %[[VAL_12]] : memref<f32>
 // CHECK:           return %[[VAL_24]] : tensor<f32>
 // CHECK:         }
 func @sum_reduction_inv(%arga: tensor<?x?x?xf32>,
@@ -1427,25 +1427,25 @@
 // CHECK:           %[[VAL_6:.*]] = constant 30 : index
 // CHECK:           %[[VAL_7:.*]] = constant 0 : index
 // CHECK:           %[[VAL_8:.*]] = constant 1 : index
-// CHECK:           %[[VAL_9:.*]] = tensor_to_memref %[[VAL_0]] : memref<10xf32>
-// CHECK:           %[[VAL_10:.*]] = tensor_to_memref %[[VAL_1]] : memref<20xf32>
-// CHECK:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_2]] : memref<30xf32>
-// CHECK:           %[[VAL_12:.*]] = tensor_to_memref %[[VAL_3]] : memref<10x20x30xf32>
-// CHECK:           %[[VAL_13:.*]] = alloc() : memref<10x20x30xf32>
+// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_0]] : memref<10xf32>
+// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<20xf32>
+// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<30xf32>
+// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_3]] : memref<10x20x30xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<10x20x30xf32>
 // CHECK:           linalg.copy(%[[VAL_12]], %[[VAL_13]]) : memref<10x20x30xf32>, memref<10x20x30xf32>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
-// CHECK:             %[[VAL_15:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_14]]] : memref<10xf32>
+// CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_14]]] : memref<10xf32>
 // CHECK:             scf.for %[[VAL_16:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
-// CHECK:               %[[VAL_17:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_16]]] : memref<20xf32>
+// CHECK:               %[[VAL_17:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_16]]] : memref<20xf32>
 // CHECK:               scf.for %[[VAL_18:.*]] = %[[VAL_7]] to %[[VAL_6]] step %[[VAL_8]] {
 // CHECK:                 %[[VAL_19:.*]] = mulf %[[VAL_15]], %[[VAL_17]] : f32
-// CHECK:                 %[[VAL_20:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_18]]] : memref<30xf32>
+// CHECK:                 %[[VAL_20:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_18]]] : memref<30xf32>
 // CHECK:                 %[[VAL_21:.*]] = mulf %[[VAL_19]], %[[VAL_20]] : f32
 // CHECK:                 store %[[VAL_21]], %[[VAL_13]]{{\[}}%[[VAL_14]], %[[VAL_16]], %[[VAL_18]]] : memref<10x20x30xf32>
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_22:.*]] = tensor_load %[[VAL_13]] : memref<10x20x30xf32>
+// CHECK:           %[[VAL_22:.*]] = memref.tensor_load %[[VAL_13]] : memref<10x20x30xf32>
 // CHECK:           return %[[VAL_22]] : tensor<10x20x30xf32>
 // CHECK:         }
 func @invariants(%arga: tensor<10xf32>,
diff --git a/mlir/test/Dialect/Linalg/sparse_lower.mlir b/mlir/test/Dialect/Linalg/sparse_lower.mlir
--- a/mlir/test/Dialect/Linalg/sparse_lower.mlir
+++ b/mlir/test/Dialect/Linalg/sparse_lower.mlir
@@ -45,26 +45,26 @@
 // CHECK-HIR:           %[[VAL_7:.*]] = linalg.sparse_pointers %[[VAL_6]], %[[VAL_5]] : tensor<64x64xf64> to memref<?xindex>
 // CHECK-HIR:           %[[VAL_8:.*]] = linalg.sparse_indices %[[VAL_6]], %[[VAL_5]] : tensor<64x64xf64> to memref<?xindex>
 // CHECK-HIR:           %[[VAL_9:.*]] = linalg.sparse_values %[[VAL_6]] : tensor<64x64xf64> to memref<?xf64>
-// CHECK-HIR:           %[[VAL_10:.*]] = tensor_to_memref %[[VAL_1]] : memref<64xf64>
-// CHECK-HIR:           %[[VAL_11:.*]] = tensor_to_memref %[[VAL_2]] : memref<64xf64>
-// CHECK-HIR:           %[[VAL_12:.*]] = alloc() : memref<64xf64>
+// CHECK-HIR:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<64xf64>
+// CHECK-HIR:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<64xf64>
+// CHECK-HIR:           %[[VAL_12:.*]] = memref.alloc() : memref<64xf64>
 // CHECK-HIR:           linalg.copy(%[[VAL_11]], %[[VAL_12]]) : memref<64xf64>, memref<64xf64>
 // CHECK-HIR:           scf.for %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
-// CHECK-HIR:             %[[VAL_14:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xindex>
+// CHECK-HIR:             %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xindex>
 // CHECK-HIR:             %[[VAL_15:.*]] = addi %[[VAL_13]], %[[VAL_5]] : index
-// CHECK-HIR:             %[[VAL_16:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
-// CHECK-HIR:             %[[VAL_17:.*]] = load %[[VAL_12]]{{\[}}%[[VAL_13]]] : memref<64xf64>
+// CHECK-HIR:             %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
+// CHECK-HIR:             %[[VAL_17:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_13]]] : memref<64xf64>
 // CHECK-HIR:             %[[VAL_18:.*]] = scf.for %[[VAL_19:.*]] = %[[VAL_14]] to %[[VAL_16]] step %[[VAL_5]] iter_args(%[[VAL_20:.*]] = %[[VAL_17]]) -> (f64) {
-// CHECK-HIR:               %[[VAL_21:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_19]]] : memref<?xindex>
-// CHECK-HIR:               %[[VAL_22:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xf64>
-// CHECK-HIR:               %[[VAL_23:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_21]]] : memref<64xf64>
+// CHECK-HIR:               %[[VAL_21:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_19]]] : memref<?xindex>
+// CHECK-HIR:               %[[VAL_22:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xf64>
+// CHECK-HIR:               %[[VAL_23:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_21]]] : memref<64xf64>
 // CHECK-HIR:               %[[VAL_24:.*]] = mulf %[[VAL_22]], %[[VAL_23]] : f64
 // CHECK-HIR:               %[[VAL_25:.*]] = addf %[[VAL_20]], %[[VAL_24]] : f64
 // CHECK-HIR:               scf.yield %[[VAL_25]] : f64
 // CHECK-HIR:             }
 // CHECK-HIR:             store %[[VAL_26:.*]], %[[VAL_12]]{{\[}}%[[VAL_13]]] : memref<64xf64>
 // CHECK-HIR:           }
-// CHECK-HIR:           %[[VAL_27:.*]] = tensor_load %[[VAL_12]] : memref<64xf64>
+// CHECK-HIR:           %[[VAL_27:.*]] = memref.tensor_load %[[VAL_12]] : memref<64xf64>
 // CHECK-HIR:           return %[[VAL_27]] : tensor<64xf64>
 // CHECK-HIR:         }
 
@@ -78,29 +78,29 @@
 // CHECK-MIR:           %[[VAL_6:.*]] = call @sparsePointers64(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
 // CHECK-MIR:           %[[VAL_7:.*]] = call @sparseIndices64(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
 // CHECK-MIR:           %[[VAL_8:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr<i8>) -> memref<?xf64>
-// CHECK-MIR:           %[[VAL_9:.*]] = tensor_to_memref %[[VAL_1]] : memref<64xf64>
-// CHECK-MIR:           %[[VAL_10:.*]] = tensor_to_memref %[[VAL_2]] : memref<64xf64>
-// CHECK-MIR:           %[[VAL_11:.*]] = alloc() : memref<64xf64>
+// CHECK-MIR:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<64xf64>
+// CHECK-MIR:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<64xf64>
+// CHECK-MIR:           %[[VAL_11:.*]] = memref.alloc() : memref<64xf64>
 // CHECK-MIR:           scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
-// CHECK-MIR:             %[[VAL_13:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_12]]] : memref<64xf64>
+// CHECK-MIR:             %[[VAL_13:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_12]]] : memref<64xf64>
 // CHECK-MIR:             store %[[VAL_13]], %[[VAL_11]]{{\[}}%[[VAL_12]]] : memref<64xf64>
 // CHECK-MIR:           }
 // CHECK-MIR:           scf.for %[[VAL_14:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
-// CHECK-MIR:             %[[VAL_15:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref<?xindex>
+// CHECK-MIR:             %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref<?xindex>
 // CHECK-MIR:             %[[VAL_16:.*]] = addi %[[VAL_14]], %[[VAL_5]] : index
-// CHECK-MIR:             %[[VAL_17:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_16]]] : memref<?xindex>
-// CHECK-MIR:             %[[VAL_18:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_14]]] : memref<64xf64>
+// CHECK-MIR:             %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_16]]] : memref<?xindex>
+// CHECK-MIR:             %[[VAL_18:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_14]]] : memref<64xf64>
 // CHECK-MIR:             %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_15]] to %[[VAL_17]] step %[[VAL_5]] iter_args(%[[VAL_21:.*]] = %[[VAL_18]]) -> (f64) {
-// CHECK-MIR:               %[[VAL_22:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_20]]] : memref<?xindex>
-// CHECK-MIR:               %[[VAL_23:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_20]]] : memref<?xf64>
-// CHECK-MIR:               %[[VAL_24:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_22]]] : memref<64xf64>
+// CHECK-MIR:               %[[VAL_22:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_20]]] : memref<?xindex>
+// CHECK-MIR:               %[[VAL_23:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_20]]] : memref<?xf64>
+// CHECK-MIR:               %[[VAL_24:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_22]]] : memref<64xf64>
 // CHECK-MIR:               %[[VAL_25:.*]] = mulf %[[VAL_23]], %[[VAL_24]] : f64
 // CHECK-MIR:               %[[VAL_26:.*]] = addf %[[VAL_21]], %[[VAL_25]] : f64
 // CHECK-MIR:               scf.yield %[[VAL_26]] : f64
 // CHECK-MIR:             }
 // CHECK-MIR:             store %[[VAL_27:.*]], %[[VAL_11]]{{\[}}%[[VAL_14]]] : memref<64xf64>
 // CHECK-MIR:           }
-// CHECK-MIR:           %[[VAL_28:.*]] = tensor_load %[[VAL_11]] : memref<64xf64>
+// CHECK-MIR:           %[[VAL_28:.*]] = memref.tensor_load %[[VAL_11]] : memref<64xf64>
 // CHECK-MIR:           return %[[VAL_28]] : tensor<64xf64>
 // CHECK-MIR:         }
 
@@ -114,20 +114,20 @@
 // CHECK-LIR:           %[[VAL_6:.*]] = call @sparsePointers64(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
 // CHECK-LIR:           %[[VAL_7:.*]] = call @sparseIndices64(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
 // CHECK-LIR:           %[[VAL_8:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr<i8>) -> memref<?xf64>
-// CHECK-LIR:           %[[VAL_9:.*]] = alloc() : memref<64xf64>
+// CHECK-LIR:           %[[VAL_9:.*]] = memref.alloc() : memref<64xf64>
 // CHECK-LIR:           scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
-// CHECK-LIR:             %[[VAL_11:.*]] = load %[[VAL_2]]{{\[}}%[[VAL_10]]] : memref<64xf64>
+// CHECK-LIR:             %[[VAL_11:.*]] = memref.load %[[VAL_2]]{{\[}}%[[VAL_10]]] : memref<64xf64>
 // CHECK-LIR:             store %[[VAL_11]], %[[VAL_9]]{{\[}}%[[VAL_10]]] : memref<64xf64>
 // CHECK-LIR:           }
 // CHECK-LIR:           scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
-// CHECK-LIR:             %[[VAL_13:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
+// CHECK-LIR:             %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
 // CHECK-LIR:             %[[VAL_14:.*]] = addi %[[VAL_12]], %[[VAL_5]] : index
-// CHECK-LIR:             %[[VAL_15:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref<?xindex>
-// CHECK-LIR:             %[[VAL_16:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_12]]] : memref<64xf64>
+// CHECK-LIR:             %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref<?xindex>
+// CHECK-LIR:             %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_12]]] : memref<64xf64>
 // CHECK-LIR:             %[[VAL_17:.*]] = scf.for %[[VAL_18:.*]] = %[[VAL_13]] to %[[VAL_15]] step %[[VAL_5]] iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f64) {
-// CHECK-LIR:               %[[VAL_20:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_18]]] : memref<?xindex>
-// CHECK-LIR:               %[[VAL_21:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref<?xf64>
-// CHECK-LIR:               %[[VAL_22:.*]] = load %[[VAL_1]]{{\[}}%[[VAL_20]]] : memref<64xf64>
+// CHECK-LIR:               %[[VAL_20:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_18]]] : memref<?xindex>
+// CHECK-LIR:               %[[VAL_21:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref<?xf64>
+// CHECK-LIR:               %[[VAL_22:.*]] = memref.load %[[VAL_1]]{{\[}}%[[VAL_20]]] : memref<64xf64>
 // CHECK-LIR:               %[[VAL_23:.*]] = mulf %[[VAL_21]], %[[VAL_22]] : f64
 // CHECK-LIR:               %[[VAL_24:.*]] = addf %[[VAL_19]], %[[VAL_23]] : f64
 // CHECK-LIR:               scf.yield %[[VAL_24]] : f64
@@ -148,14 +148,14 @@
 // CHECK-FAST:           %[[VAL_7:.*]] = call @sparseIndices64(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
 // CHECK-FAST:           %[[VAL_8:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr<i8>) -> memref<?xf64>
 // CHECK-FAST:           scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
-// CHECK-FAST:             %[[VAL_10:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref<?xindex>
+// CHECK-FAST:             %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref<?xindex>
 // CHECK-FAST:             %[[VAL_11:.*]] = addi %[[VAL_9]], %[[VAL_5]] : index
-// CHECK-FAST:             %[[VAL_12:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xindex>
-// CHECK-FAST:             %[[VAL_13:.*]] = load %[[VAL_2]]{{\[}}%[[VAL_9]]] : memref<64xf64>
+// CHECK-FAST:             %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xindex>
+// CHECK-FAST:             %[[VAL_13:.*]] = memref.load %[[VAL_2]]{{\[}}%[[VAL_9]]] : memref<64xf64>
 // CHECK-FAST:             %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_10]] to %[[VAL_12]] step %[[VAL_5]] iter_args(%[[VAL_16:.*]] = %[[VAL_13]]) -> (f64) {
-// CHECK-FAST:               %[[VAL_17:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
-// CHECK-FAST:               %[[VAL_18:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_15]]] : memref<?xf64>
-// CHECK-FAST:               %[[VAL_19:.*]] = load %[[VAL_1]]{{\[}}%[[VAL_17]]] : memref<64xf64>
+// CHECK-FAST:               %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
+// CHECK-FAST:               %[[VAL_18:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_15]]] : memref<?xf64>
+// CHECK-FAST:               %[[VAL_19:.*]] = memref.load %[[VAL_1]]{{\[}}%[[VAL_17]]] : memref<64xf64>
 // CHECK-FAST:               %[[VAL_20:.*]] = mulf %[[VAL_18]], %[[VAL_19]] : f64
 // CHECK-FAST:               %[[VAL_21:.*]] = addf %[[VAL_16]], %[[VAL_20]] : f64
 // CHECK-FAST:               scf.yield %[[VAL_21]] : f64
diff --git a/mlir/test/Dialect/Linalg/sparse_nd.mlir b/mlir/test/Dialect/Linalg/sparse_nd.mlir
--- a/mlir/test/Dialect/Linalg/sparse_nd.mlir
+++ b/mlir/test/Dialect/Linalg/sparse_nd.mlir
@@ -33,14 +33,14 @@
 // CHECK:           %[[VAL_10:.*]] = constant 80 : index
 // CHECK:           %[[VAL_11:.*]] = constant 0 : index
 // CHECK:           %[[VAL_12:.*]] = constant 1 : index
-// CHECK:           %[[VAL_13:.*]] = tensor_to_memref %[[VAL_0]] : memref<10x20x30x40x50x60x70x80xf32>
+// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_0]] : memref<10x20x30x40x50x60x70x80xf32>
 // CHECK:           %[[VAL_14:.*]] = linalg.sparse_pointers %[[VAL_1]], %[[VAL_3]] : tensor<10x20x30x40x50x60x70x80xf32> to memref<?xindex>
 // CHECK:           %[[VAL_15:.*]] = linalg.sparse_indices %[[VAL_1]], %[[VAL_3]] : tensor<10x20x30x40x50x60x70x80xf32> to memref<?xindex>
 // CHECK:           %[[VAL_16:.*]] = linalg.sparse_pointers %[[VAL_1]], %[[VAL_4]] : tensor<10x20x30x40x50x60x70x80xf32> to memref<?xindex>
 // CHECK:           %[[VAL_17:.*]] = linalg.sparse_indices %[[VAL_1]], %[[VAL_4]] : tensor<10x20x30x40x50x60x70x80xf32> to memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = linalg.sparse_values %[[VAL_1]] : tensor<10x20x30x40x50x60x70x80xf32> to memref<?xf32>
-// CHECK:           %[[VAL_19:.*]] = tensor_to_memref %[[VAL_2]] : memref<10x20x30x40x50x60x70x80xf32>
-// CHECK:           %[[VAL_20:.*]] = alloc() : memref<10x20x30x40x50x60x70x80xf32>
+// CHECK:           %[[VAL_19:.*]] = memref.buffer_cast %[[VAL_2]] : memref<10x20x30x40x50x60x70x80xf32>
+// CHECK:           %[[VAL_20:.*]] = memref.alloc() : memref<10x20x30x40x50x60x70x80xf32>
 // CHECK:           linalg.copy(%[[VAL_19]], %[[VAL_20]]) : memref<10x20x30x40x50x60x70x80xf32>, memref<10x20x30x40x50x60x70x80xf32>
 // CHECK:           scf.for %[[VAL_21:.*]] = %[[VAL_11]] to %[[VAL_10]] step %[[VAL_12]] {
 // CHECK:             scf.for %[[VAL_22:.*]] = %[[VAL_11]] to %[[VAL_9]] step %[[VAL_12]] {
@@ -49,16 +49,16 @@
 // CHECK:               scf.for %[[VAL_25:.*]] = %[[VAL_11]] to %[[VAL_8]] step %[[VAL_12]] {
 // CHECK:                 %[[VAL_26:.*]] = muli %[[VAL_24]], %[[VAL_8]] : index
 // CHECK:                 %[[VAL_27:.*]] = addi %[[VAL_26]], %[[VAL_25]] : index
-// CHECK:                 %[[VAL_28:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:                 %[[VAL_28:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_27]]] : memref<?xindex>
 // CHECK:                 %[[VAL_29:.*]] = addi %[[VAL_27]], %[[VAL_12]] : index
-// CHECK:                 %[[VAL_30:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_29]]] : memref<?xindex>
+// CHECK:                 %[[VAL_30:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_29]]] : memref<?xindex>
 // CHECK:                 scf.for %[[VAL_31:.*]] = %[[VAL_28]] to %[[VAL_30]] step %[[VAL_12]] {
-// CHECK:                   %[[VAL_32:.*]] = load %[[VAL_15]]{{\[}}%[[VAL_31]]] : memref<?xindex>
-// CHECK:                   %[[VAL_33:.*]] = load %[[VAL_16]]{{\[}}%[[VAL_31]]] : memref<?xindex>
+// CHECK:                   %[[VAL_32:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_31]]] : memref<?xindex>
+// CHECK:                   %[[VAL_33:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_31]]] : memref<?xindex>
 // CHECK:                   %[[VAL_34:.*]] = addi %[[VAL_31]], %[[VAL_12]] : index
-// CHECK:                   %[[VAL_35:.*]] = load %[[VAL_16]]{{\[}}%[[VAL_34]]] : memref<?xindex>
+// CHECK:                   %[[VAL_35:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_34]]] : memref<?xindex>
 // CHECK:                   scf.for %[[VAL_36:.*]] = %[[VAL_33]] to %[[VAL_35]] step %[[VAL_12]] {
-// CHECK:                     %[[VAL_37:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_36]]] : memref<?xindex>
+// CHECK:                     %[[VAL_37:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_36]]] : memref<?xindex>
 // CHECK:                     scf.for %[[VAL_38:.*]] = %[[VAL_11]] to %[[VAL_7]] step %[[VAL_12]] {
 // CHECK:                       %[[VAL_39:.*]] = muli %[[VAL_36]], %[[VAL_7]] : index
 // CHECK:                       %[[VAL_40:.*]] = addi %[[VAL_39]], %[[VAL_38]] : index
@@ -68,10 +68,10 @@
 // CHECK:                         scf.for %[[VAL_44:.*]] = %[[VAL_11]] to %[[VAL_5]] step %[[VAL_12]] {
 // CHECK:                           %[[VAL_45:.*]] = muli %[[VAL_43]], %[[VAL_5]] : index
 // CHECK:                           %[[VAL_46:.*]] = addi %[[VAL_45]], %[[VAL_44]] : index
-// CHECK:                           %[[VAL_47:.*]] = load %[[VAL_13]]{{\[}}%[[VAL_44]], %[[VAL_41]], %[[VAL_38]], %[[VAL_37]], %[[VAL_32]], %[[VAL_25]], %[[VAL_22]], %[[VAL_21]]] : memref<10x20x30x40x50x60x70x80xf32>
-// CHECK:                           %[[VAL_48:.*]] = load %[[VAL_18]]{{\[}}%[[VAL_46]]] : memref<?xf32>
+// CHECK:                           %[[VAL_47:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_44]], %[[VAL_41]], %[[VAL_38]], %[[VAL_37]], %[[VAL_32]], %[[VAL_25]], %[[VAL_22]], %[[VAL_21]]] : memref<10x20x30x40x50x60x70x80xf32>
+// CHECK:                           %[[VAL_48:.*]] = memref.load %[[VAL_18]]{{\[}}%[[VAL_46]]] : memref<?xf32>
 // CHECK:                           %[[VAL_49:.*]] = mulf %[[VAL_47]], %[[VAL_48]] : f32
-// CHECK:                           store %[[VAL_49]], %[[VAL_20]]{{\[}}%[[VAL_44]], %[[VAL_41]], %[[VAL_38]], %[[VAL_37]], %[[VAL_32]], %[[VAL_25]], %[[VAL_22]], %[[VAL_21]]] : memref<10x20x30x40x50x60x70x80xf32>
+// CHECK:                           memref.store %[[VAL_49]], %[[VAL_20]]{{\[}}%[[VAL_44]], %[[VAL_41]], %[[VAL_38]], %[[VAL_37]], %[[VAL_32]], %[[VAL_25]], %[[VAL_22]], %[[VAL_21]]] : memref<10x20x30x40x50x60x70x80xf32>
 // CHECK:                         }
 // CHECK:                       }
 // CHECK:                     }
@@ -80,7 +80,7 @@
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_50:.*]] = tensor_load %[[VAL_20]] : memref<10x20x30x40x50x60x70x80xf32>
+// CHECK:           %[[VAL_50:.*]] = memref.tensor_load %[[VAL_20]] : memref<10x20x30x40x50x60x70x80xf32>
 // CHECK:           return %[[VAL_50]] : tensor<10x20x30x40x50x60x70x80xf32>
 // CHECK:         }
 func @mul(%arga: tensor<10x20x30x40x50x60x70x80xf32>,
diff --git a/mlir/test/Dialect/Linalg/sparse_storage.mlir b/mlir/test/Dialect/Linalg/sparse_storage.mlir
--- a/mlir/test/Dialect/Linalg/sparse_storage.mlir
+++ b/mlir/test/Dialect/Linalg/sparse_storage.mlir
@@ -29,15 +29,15 @@
 // CHECK-TYPE0-LABEL: func @mul_dd(
 // CHECK-TYPE0: %[[C0:.*]] = constant 0 : index
 // CHECK-TYPE0: %[[C1:.*]] = constant 1 : index
-// CHECK-TYPE0: %[[P0:.*]] = load %{{.*}}[%[[C0]]] : memref<?xi64>
+// CHECK-TYPE0: %[[P0:.*]] = memref.load %{{.*}}[%[[C0]]] : memref<?xi64>
 // CHECK-TYPE0: %[[B0:.*]] = index_cast %[[P0]] : i64 to index
-// CHECK-TYPE0: %[[P1:.*]] = load %{{.*}}[%[[C1]]] : memref<?xi64>
+// CHECK-TYPE0: %[[P1:.*]] = memref.load %{{.*}}[%[[C1]]] : memref<?xi64>
 // CHECK-TYPE0: %[[B1:.*]] = index_cast %[[P1]] : i64 to index
 // CHECK-TYPE0: scf.for %[[I:.*]] = %[[B0]] to %[[B1]] step %[[C1]] {
-// CHECK-TYPE0:   %[[IND0:.*]] = load %{{.*}}[%[[I]]] : memref<?xi64>
+// CHECK-TYPE0:   %[[IND0:.*]] = memref.load %{{.*}}[%[[I]]] : memref<?xi64>
 // CHECK-TYPE0:   %[[INDC:.*]] = index_cast %[[IND0]] : i64 to index
-// CHECK-TYPE0:   %[[VAL0:.*]] = load %{{.*}}[%[[I]]] : memref<?xf64>
-// CHECK-TYPE0:   %[[VAL1:.*]] = load %{{.*}}[%[[INDC]]] : memref<32xf64>
+// CHECK-TYPE0:   %[[VAL0:.*]] = memref.load %{{.*}}[%[[I]]] : memref<?xf64>
+// CHECK-TYPE0:   %[[VAL1:.*]] = memref.load %{{.*}}[%[[INDC]]] : memref<32xf64>
 // CHECK-TYPE0:   %[[MUL:.*]] = mulf %[[VAL0]], %[[VAL1]] : f64
 // CHECK-TYPE0:   store %[[MUL]], %{{.*}}[%[[INDC]]] : memref<32xf64>
 // CHECK-TYPE0: }
@@ -45,15 +45,15 @@
 // CHECK-TYPE1-LABEL: func @mul_dd(
 // CHECK-TYPE1: %[[C0:.*]] = constant 0 : index
 // CHECK-TYPE1: %[[C1:.*]] = constant 1 : index
-// CHECK-TYPE1: %[[P0:.*]] = load %{{.*}}[%[[C0]]] : memref<?xi64>
+// CHECK-TYPE1: %[[P0:.*]] = memref.load %{{.*}}[%[[C0]]] : memref<?xi64>
 // CHECK-TYPE1: %[[B0:.*]] = index_cast %[[P0]] : i64 to index
-// CHECK-TYPE1: %[[P1:.*]] = load %{{.*}}[%[[C1]]] : memref<?xi64>
+// CHECK-TYPE1: %[[P1:.*]] = memref.load %{{.*}}[%[[C1]]] : memref<?xi64>
 // CHECK-TYPE1: %[[B1:.*]] = index_cast %[[P1]] : i64 to index
 // CHECK-TYPE1: scf.for %[[I:.*]] = %[[B0]] to %[[B1]] step %[[C1]] {
-// CHECK-TYPE1:   %[[IND0:.*]] = load %{{.*}}[%[[I]]] : memref<?xi32>
+// CHECK-TYPE1:   %[[IND0:.*]] = memref.load %{{.*}}[%[[I]]] : memref<?xi32>
 // CHECK-TYPE1:   %[[INDC:.*]] = index_cast %[[IND0]] : i32 to index
-// CHECK-TYPE1:   %[[VAL0:.*]] = load %{{.*}}[%[[I]]] : memref<?xf64>
-// CHECK-TYPE1:   %[[VAL1:.*]] = load %{{.*}}[%[[INDC]]] : memref<32xf64>
+// CHECK-TYPE1:   %[[VAL0:.*]] = memref.load %{{.*}}[%[[I]]] : memref<?xf64>
+// CHECK-TYPE1:   %[[VAL1:.*]] = memref.load %{{.*}}[%[[INDC]]] : memref<32xf64>
 // CHECK-TYPE1:   %[[MUL:.*]] = mulf %[[VAL0]], %[[VAL1]] : f64
 // CHECK-TYPE1:   store %[[MUL]], %{{.*}}[%[[INDC]]] : memref<32xf64>
 // CHECK-TYPE1: }
@@ -61,15 +61,15 @@
 // CHECK-TYPE2-LABEL: func @mul_dd(
 // CHECK-TYPE2: %[[C0:.*]] = constant 0 : index
 // CHECK-TYPE2: %[[C1:.*]] = constant 1 : index
-// CHECK-TYPE2: %[[P0:.*]] = load %{{.*}}[%[[C0]]] : memref<?xi32>
+// CHECK-TYPE2: %[[P0:.*]] = memref.load %{{.*}}[%[[C0]]] : memref<?xi32>
 // CHECK-TYPE2: %[[B0:.*]] = index_cast %[[P0]] : i32 to index
-// CHECK-TYPE2: %[[P1:.*]] = load %{{.*}}[%[[C1]]] : memref<?xi32>
+// CHECK-TYPE2: %[[P1:.*]] = memref.load %{{.*}}[%[[C1]]] : memref<?xi32>
 // CHECK-TYPE2: %[[B1:.*]] = index_cast %[[P1]] : i32 to index
 // CHECK-TYPE2: scf.for %[[I:.*]] = %[[B0]] to %[[B1]] step %[[C1]] {
-// CHECK-TYPE2:   %[[IND0:.*]] = load %{{.*}}[%[[I]]] : memref<?xi64>
+// CHECK-TYPE2:   %[[IND0:.*]] = memref.load %{{.*}}[%[[I]]] : memref<?xi64>
 // CHECK-TYPE2:   %[[INDC:.*]] = index_cast %[[IND0]] : i64 to index
-// CHECK-TYPE2:   %[[VAL0:.*]] = load %{{.*}}[%[[I]]] : memref<?xf64>
-// CHECK-TYPE2:   %[[VAL1:.*]] = load %{{.*}}[%[[INDC]]] : memref<32xf64>
+// CHECK-TYPE2:   %[[VAL0:.*]] = memref.load %{{.*}}[%[[I]]] : memref<?xf64>
+// CHECK-TYPE2:   %[[VAL1:.*]] = memref.load %{{.*}}[%[[INDC]]] : memref<32xf64>
 // CHECK-TYPE2:   %[[MUL:.*]] = mulf %[[VAL0]], %[[VAL1]] : f64
 // CHECK-TYPE2:   store %[[MUL]], %{{.*}}[%[[INDC]]] : memref<32xf64>
 // CHECK-TYPE2: }
@@ -77,15 +77,15 @@
 // CHECK-TYPE3-LABEL: func @mul_dd(
 // CHECK-TYPE3: %[[C0:.*]] = constant 0 : index
 // CHECK-TYPE3: %[[C1:.*]] = constant 1 : index
-// CHECK-TYPE3: %[[P0:.*]] = load %{{.*}}[%[[C0]]] : memref<?xi32>
+// CHECK-TYPE3: %[[P0:.*]] = memref.load %{{.*}}[%[[C0]]] : memref<?xi32>
 // CHECK-TYPE3: %[[B0:.*]] = index_cast %[[P0]] : i32 to index
-// CHECK-TYPE3: %[[P1:.*]] = load %{{.*}}[%[[C1]]] : memref<?xi32>
+// CHECK-TYPE3: %[[P1:.*]] = memref.load %{{.*}}[%[[C1]]] : memref<?xi32>
 // CHECK-TYPE3: %[[B1:.*]] = index_cast %[[P1]] : i32 to index
 // CHECK-TYPE3: scf.for %[[I:.*]] = %[[B0]] to %[[B1]] step %[[C1]] {
-// CHECK-TYPE3:   %[[IND0:.*]] = load %{{.*}}[%[[I]]] : memref<?xi32>
+// CHECK-TYPE3:   %[[IND0:.*]] = memref.load %{{.*}}[%[[I]]] : memref<?xi32>
 // CHECK-TYPE3:   %[[INDC:.*]] = index_cast %[[IND0]] : i32 to index
-// CHECK-TYPE3:   %[[VAL0:.*]] = load %{{.*}}[%[[I]]] : memref<?xf64>
-// CHECK-TYPE3:   %[[VAL1:.*]] = load %{{.*}}[%[[INDC]]] : memref<32xf64>
+// CHECK-TYPE3:   %[[VAL0:.*]] = memref.load %{{.*}}[%[[I]]] : memref<?xf64>
+// CHECK-TYPE3:   %[[VAL1:.*]] = memref.load %{{.*}}[%[[INDC]]] : memref<32xf64>
 // CHECK-TYPE3:   %[[MUL:.*]] = mulf %[[VAL0]], %[[VAL1]] : f64
 // CHECK-TYPE3:   store %[[MUL]], %{{.*}}[%[[INDC]]] : memref<32xf64>
 // CHECK-TYPE3: }
@@ -93,15 +93,15 @@
 // CHECK-TYPE4-LABEL: func @mul_dd(
 // CHECK-TYPE4: %[[C0:.*]] = constant 0 : index
 // CHECK-TYPE4: %[[C1:.*]] = constant 1 : index
-// CHECK-TYPE4: %[[P0:.*]] = load %{{.*}}[%[[C0]]] : memref<?xi16>
+// CHECK-TYPE4: %[[P0:.*]] = memref.load %{{.*}}[%[[C0]]] : memref<?xi16>
 // CHECK-TYPE4: %[[B0:.*]] = index_cast %[[P0]] : i16 to index
-// CHECK-TYPE4: %[[P1:.*]] = load %{{.*}}[%[[C1]]] : memref<?xi16>
+// CHECK-TYPE4: %[[P1:.*]] = memref.load %{{.*}}[%[[C1]]] : memref<?xi16>
 // CHECK-TYPE4: %[[B1:.*]] = index_cast %[[P1]] : i16 to index
 // CHECK-TYPE4: scf.for %[[I:.*]] = %[[B0]] to %[[B1]] step %[[C1]] {
-// CHECK-TYPE4:   %[[IND0:.*]] = load %{{.*}}[%[[I]]] : memref<?xi16>
+// CHECK-TYPE4:   %[[IND0:.*]] = memref.load %{{.*}}[%[[I]]] : memref<?xi16>
 // CHECK-TYPE4:   %[[INDC:.*]] = index_cast %[[IND0]] : i16 to index
-// CHECK-TYPE4:   %[[VAL0:.*]] = load %{{.*}}[%[[I]]] : memref<?xf64>
-// CHECK-TYPE4:   %[[VAL1:.*]] = load %{{.*}}[%[[INDC]]] : memref<32xf64>
+// CHECK-TYPE4:   %[[VAL0:.*]] = memref.load %{{.*}}[%[[I]]] : memref<?xf64>
+// CHECK-TYPE4:   %[[VAL1:.*]] = memref.load %{{.*}}[%[[INDC]]] : memref<32xf64>
 // CHECK-TYPE4:   %[[MUL:.*]] = mulf %[[VAL0]], %[[VAL1]] : f64
 // CHECK-TYPE4:   store %[[MUL]], %{{.*}}[%[[INDC]]] : memref<32xf64>
 // CHECK-TYPE4: }
@@ -109,15 +109,15 @@
 // CHECK-TYPE5-LABEL: func @mul_dd(
 // CHECK-TYPE5: %[[C0:.*]] = constant 0 : index
 // CHECK-TYPE5: %[[C1:.*]] = constant 1 : index
-// CHECK-TYPE5: %[[P0:.*]] = load %{{.*}}[%[[C0]]] : memref<?xi8>
+// CHECK-TYPE5: %[[P0:.*]] = memref.load %{{.*}}[%[[C0]]] : memref<?xi8>
 // CHECK-TYPE5: %[[B0:.*]] = index_cast %[[P0]] : i8 to index
-// CHECK-TYPE5: %[[P1:.*]] = load %{{.*}}[%[[C1]]] : memref<?xi8>
+// CHECK-TYPE5: %[[P1:.*]] = memref.load %{{.*}}[%[[C1]]] : memref<?xi8>
 // CHECK-TYPE5: %[[B1:.*]] = index_cast %[[P1]] : i8 to index
 // CHECK-TYPE5: scf.for %[[I:.*]] = %[[B0]] to %[[B1]] step %[[C1]] {
-// CHECK-TYPE5:   %[[IND0:.*]] = load %{{.*}}[%[[I]]] : memref<?xi8>
+// CHECK-TYPE5:   %[[IND0:.*]] = memref.load %{{.*}}[%[[I]]] : memref<?xi8>
 // CHECK-TYPE5:   %[[INDC:.*]] = index_cast %[[IND0]] : i8 to index
-// CHECK-TYPE5:   %[[VAL0:.*]] = load %{{.*}}[%[[I]]] : memref<?xf64>
-// CHECK-TYPE5:   %[[VAL1:.*]] = load %{{.*}}[%[[INDC]]] : memref<32xf64>
+// CHECK-TYPE5:   %[[VAL0:.*]] = memref.load %{{.*}}[%[[I]]] : memref<?xf64>
+// CHECK-TYPE5:   %[[VAL1:.*]] = memref.load %{{.*}}[%[[INDC]]] : memref<32xf64>
 // CHECK-TYPE5:   %[[MUL:.*]] = mulf %[[VAL0]], %[[VAL1]] : f64
 // CHECK-TYPE5:   store %[[MUL]], %{{.*}}[%[[INDC]]] : memref<32xf64>
 // CHECK-TYPE5: }
diff --git a/mlir/test/Dialect/Linalg/sparse_vector.mlir b/mlir/test/Dialect/Linalg/sparse_vector.mlir
--- a/mlir/test/Dialect/Linalg/sparse_vector.mlir
+++ b/mlir/test/Dialect/Linalg/sparse_vector.mlir
@@ -24,7 +24,7 @@
 // CHECK-VEC0-DAG:   %[[c1:.*]] = constant 1 : index
 // CHECK-VEC0-DAG:   %[[c1024:.*]] = constant 1024 : index
 // CHECK-VEC0:       scf.for %[[i:.*]] = %[[c0]] to %[[c1024]] step %[[c1]] {
-// CHECK-VEC0:         %[[l:.*]] = load %{{.*}}[%[[i]]] : memref<1024xf32>
+// CHECK-VEC0:         %[[l:.*]] = memref.load %{{.*}}[%[[i]]] : memref<1024xf32>
 // CHECK-VEC0:         %[[m:.*]] = mulf %[[l]], %{{.*}} : f32
 // CHECK-VEC0:         store %[[m]], %{{.*}}[%[[i]]] : memref<1024xf32>
 // CHECK-VEC0:       }
@@ -84,15 +84,15 @@
 // CHECK-VEC0-LABEL: func @mul_s
 // CHECK-VEC0-DAG:   %[[c0:.*]] = constant 0 : index
 // CHECK-VEC0-DAG:   %[[c1:.*]] = constant 1 : index
-// CHECK-VEC0:       %[[p:.*]] = load %{{.*}}[%[[c0]]] : memref<?xi32>
+// CHECK-VEC0:       %[[p:.*]] = memref.load %{{.*}}[%[[c0]]] : memref<?xi32>
 // CHECK-VEC0:       %[[q:.*]] = index_cast %[[p]] : i32 to index
-// CHECK-VEC0:       %[[r:.*]] = load %{{.*}}[%[[c1]]] : memref<?xi32>
+// CHECK-VEC0:       %[[r:.*]] = memref.load %{{.*}}[%[[c1]]] : memref<?xi32>
 // CHECK-VEC0:       %[[s:.*]] = index_cast %[[r]] : i32 to index
 // CHECK-VEC0:       scf.for %[[i:.*]] = %[[q]] to %[[s]] step %[[c1]] {
-// CHECK-VEC0:         %[[li:.*]] = load %{{.*}}[%[[i]]] : memref<?xi32>
+// CHECK-VEC0:         %[[li:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xi32>
 // CHECK-VEC0:         %[[ci:.*]] = index_cast %[[li]] : i32 to index
-// CHECK-VEC0:         %[[la:.*]] = load %{{.*}}[%[[i]]] : memref<?xf32>
-// CHECK-VEC0:         %[[lb:.*]] = load %{{.*}}[%[[ci]]] : memref<1024xf32>
+// CHECK-VEC0:         %[[la:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xf32>
+// CHECK-VEC0:         %[[lb:.*]] = memref.load %{{.*}}[%[[ci]]] : memref<1024xf32>
 // CHECK-VEC0:         %[[m:.*]] = mulf %[[la]], %[[lb]] : f32
 // CHECK-VEC0:         store %[[m]], %{{.*}}[%[[ci]]] : memref<1024xf32>
 // CHECK-VEC0:       }
@@ -101,15 +101,15 @@
 // CHECK-VEC1-LABEL: func @mul_s
 // CHECK-VEC1-DAG:   %[[c0:.*]] = constant 0 : index
 // CHECK-VEC1-DAG:   %[[c1:.*]] = constant 1 : index
-// CHECK-VEC1:       %[[p:.*]] = load %{{.*}}[%[[c0]]] : memref<?xi32>
+// CHECK-VEC1:       %[[p:.*]] = memref.load %{{.*}}[%[[c0]]] : memref<?xi32>
 // CHECK-VEC1:       %[[q:.*]] = index_cast %[[p]] : i32 to index
-// CHECK-VEC1:       %[[r:.*]] = load %{{.*}}[%[[c1]]] : memref<?xi32>
+// CHECK-VEC1:       %[[r:.*]] = memref.load %{{.*}}[%[[c1]]] : memref<?xi32>
 // CHECK-VEC1:       %[[s:.*]] = index_cast %[[r]] : i32 to index
 // CHECK-VEC1:       scf.for %[[i:.*]] = %[[q]] to %[[s]] step %[[c1]] {
-// CHECK-VEC1:         %[[li:.*]] = load %{{.*}}[%[[i]]] : memref<?xi32>
+// CHECK-VEC1:         %[[li:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xi32>
 // CHECK-VEC1:         %[[ci:.*]] = index_cast %[[li]] : i32 to index
-// CHECK-VEC1:         %[[la:.*]] = load %{{.*}}[%[[i]]] : memref<?xf32>
-// CHECK-VEC1:         %[[lb:.*]] = load %{{.*}}[%[[ci]]] : memref<1024xf32>
+// CHECK-VEC1:         %[[la:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xf32>
+// CHECK-VEC1:         %[[lb:.*]] = memref.load %{{.*}}[%[[ci]]] : memref<1024xf32>
 // CHECK-VEC1:         %[[m:.*]] = mulf %[[la]], %[[lb]] : f32
 // CHECK-VEC1:         store %[[m]], %{{.*}}[%[[ci]]] : memref<1024xf32>
 // CHECK-VEC1:       }
@@ -119,9 +119,9 @@
 // CHECK-VEC2-DAG:   %[[c0:.*]] = constant 0 : index
 // CHECK-VEC2-DAG:   %[[c1:.*]] = constant 1 : index
 // CHECK-VEC2-DAG:   %[[c16:.*]] = constant 16 : index
-// CHECK-VEC2:       %[[p:.*]] = load %{{.*}}[%[[c0]]] : memref<?xi32>
+// CHECK-VEC2:       %[[p:.*]] = memref.load %{{.*}}[%[[c0]]] : memref<?xi32>
 // CHECK-VEC2:       %[[q:.*]] = index_cast %[[p]] : i32 to index
-// CHECK-VEC2:       %[[r:.*]] = load %{{.*}}[%[[c1]]] : memref<?xi32>
+// CHECK-VEC2:       %[[r:.*]] = memref.load %{{.*}}[%[[c1]]] : memref<?xi32>
 // CHECK-VEC2:       %[[s:.*]] = index_cast %[[r]] : i32 to index
 // CHECK-VEC2:       scf.for %[[i:.*]] = %[[q]] to %[[s]] step %[[c16]] {
 // CHECK-VEC2:         %[[sub:.*]] = subi %[[s]], %[[i]] : index
@@ -150,9 +150,9 @@
 // CHECK-VEC2-DAG:   %[[c0:.*]] = constant 0 : index
 // CHECK-VEC2-DAG:   %[[c1:.*]] = constant 1 : index
 // CHECK-VEC2-DAG:   %[[c16:.*]] = constant 16 : index
-// CHECK-VEC2:       %[[p:.*]] = load %{{.*}}[%[[c0]]] : memref<?xi32>
+// CHECK-VEC2:       %[[p:.*]] = memref.load %{{.*}}[%[[c0]]] : memref<?xi32>
 // CHECK-VEC2:       %[[q:.*]] = index_cast %[[p]] : i32 to index
-// CHECK-VEC2:       %[[r:.*]] = load %{{.*}}[%[[c1]]] : memref<?xi32>
+// CHECK-VEC2:       %[[r:.*]] = memref.load %{{.*}}[%[[c1]]] : memref<?xi32>
 // CHECK-VEC2:       %[[s:.*]] = index_cast %[[r]] : i32 to index
 // CHECK-VEC2:       scf.for %[[i:.*]] = %[[q]] to %[[s]] step %[[c16]] {
 // CHECK-VEC2:         %[[sub:.*]] = subi %[[s]], %[[i]] : index
@@ -200,8 +200,8 @@
 // CHECK-VEC0-DAG:   %[[c1:.*]] = constant 1 : index
 // CHECK-VEC0-DAG:   %[[c1024:.*]] = constant 1024 : index
 // CHECK-VEC0:       %[[red:.*]] = scf.for %[[i:.*]] = %[[c0]] to %[[c1024]] step %[[c1]] iter_args(%[[red_in:.*]] = %{{.*}}) -> (f32) {
-// CHECK-VEC0:         %[[la:.*]] = load %{{.*}}[%[[i]]] : memref<1024xf32>
-// CHECK-VEC0:         %[[lb:.*]] = load %{{.*}}[%[[i]]] : memref<1024xf32>
+// CHECK-VEC0:         %[[la:.*]] = memref.load %{{.*}}[%[[i]]] : memref<1024xf32>
+// CHECK-VEC0:         %[[lb:.*]] = memref.load %{{.*}}[%[[i]]] : memref<1024xf32>
 // CHECK-VEC0:         %[[m:.*]] = mulf %[[la]], %[[lb]] : f32
 // CHECK-VEC0:         %[[a:.*]] = addf %[[red_in]], %[[m]] : f32
 // CHECK-VEC0:         scf.yield %[[a]] : f32
@@ -302,16 +302,16 @@
 // CHECK-VEC0-DAG:   %[[c1:.*]] = constant 1 : index
 // CHECK-VEC0-DAG:   %[[c512:.*]] = constant 512 : index
 // CHECK-VEC0:       scf.for %[[i:.*]] = %[[c0]] to %[[c512]] step %[[c1]] {
-// CHECK-VEC0:         %[[p:.*]] = load %{{.*}}[%[[i]]] : memref<?xi32>
+// CHECK-VEC0:         %[[p:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xi32>
 // CHECK-VEC0:         %[[q:.*]] = index_cast %[[p]] : i32 to index
 // CHECK-VEC0:         %[[a:.*]] = addi %[[i]], %[[c1]] : index
-// CHECK-VEC0:         %[[r:.*]] = load %{{.*}}[%[[a]]] : memref<?xi32>
+// CHECK-VEC0:         %[[r:.*]] = memref.load %{{.*}}[%[[a]]] : memref<?xi32>
 // CHECK-VEC0:         %[[s:.*]] = index_cast %[[r]] : i32 to index
 // CHECK-VEC0:         scf.for %[[j:.*]] = %[[q]] to %[[s]] step %[[c1]] {
-// CHECK-VEC0:           %[[lj:.*]] = load %{{.*}}[%[[j]]] : memref<?xi32>
+// CHECK-VEC0:           %[[lj:.*]] = memref.load %{{.*}}[%[[j]]] : memref<?xi32>
 // CHECK-VEC0:           %[[cj:.*]] = index_cast %[[lj]] : i32 to index
-// CHECK-VEC0:           %[[la:.*]] = load %{{.*}}[%[[j]]] : memref<?xf32>
-// CHECK-VEC0:           %[[lb:.*]] = load %{{.*}}[%[[i]], %[[cj]]] : memref<512x1024xf32>
+// CHECK-VEC0:           %[[la:.*]] = memref.load %{{.*}}[%[[j]]] : memref<?xf32>
+// CHECK-VEC0:           %[[lb:.*]] = memref.load %{{.*}}[%[[i]], %[[cj]]] : memref<512x1024xf32>
 // CHECK-VEC0:           %[[m:.*]] = mulf %[[la]], %[[lb]] : f32
 // CHECK-VEC0:           store %[[m]], %{{.*}}[%[[i]], %[[cj]]] : memref<512x1024xf32>
 // CHECK-VEC0:         }
@@ -323,16 +323,16 @@
 // CHECK-VEC1-DAG:   %[[c1:.*]] = constant 1 : index
 // CHECK-VEC1-DAG:   %[[c512:.*]] = constant 512 : index
 // CHECK-VEC1:       scf.for %[[i:.*]] = %[[c0]] to %[[c512]] step %[[c1]] {
-// CHECK-VEC1:         %[[p:.*]] = load %{{.*}}[%[[i]]] : memref<?xi32>
+// CHECK-VEC1:         %[[p:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xi32>
 // CHECK-VEC1:         %[[q:.*]] = index_cast %[[p]] : i32 to index
 // CHECK-VEC1:         %[[a:.*]] = addi %[[i]], %[[c1]] : index
-// CHECK-VEC1:         %[[r:.*]] = load %{{.*}}[%[[a]]] : memref<?xi32>
+// CHECK-VEC1:         %[[r:.*]] = memref.load %{{.*}}[%[[a]]] : memref<?xi32>
 // CHECK-VEC1:         %[[s:.*]] = index_cast %[[r]] : i32 to index
 // CHECK-VEC1:         scf.for %[[j:.*]] = %[[q]] to %[[s]] step %[[c1]] {
-// CHECK-VEC1:           %[[lj:.*]] = load %{{.*}}[%[[j]]] : memref<?xi32>
+// CHECK-VEC1:           %[[lj:.*]] = memref.load %{{.*}}[%[[j]]] : memref<?xi32>
 // CHECK-VEC1:           %[[cj:.*]] = index_cast %[[lj]] : i32 to index
-// CHECK-VEC1:           %[[la:.*]] = load %{{.*}}[%[[j]]] : memref<?xf32>
-// CHECK-VEC1:           %[[lb:.*]] = load %{{.*}}[%[[i]], %[[cj]]] : memref<512x1024xf32>
+// CHECK-VEC1:           %[[la:.*]] = memref.load %{{.*}}[%[[j]]] : memref<?xf32>
+// CHECK-VEC1:           %[[lb:.*]] = memref.load %{{.*}}[%[[i]], %[[cj]]] : memref<512x1024xf32>
 // CHECK-VEC1:           %[[m:.*]] = mulf %[[la]], %[[lb]] : f32
 // CHECK-VEC1:           store %[[m]], %{{.*}}[%[[i]], %[[cj]]] : memref<512x1024xf32>
 // CHECK-VEC1:         }
@@ -345,10 +345,10 @@
 // CHECK-VEC2-DAG:   %[[c16:.*]] = constant 16 : index
 // CHECK-VEC2-DAG:   %[[c512:.*]] = constant 512 : index
 // CHECK-VEC2:       scf.for %[[i:.*]] = %[[c0]] to %[[c512]] step %[[c1]] {
-// CHECK-VEC2:         %[[p:.*]] = load %{{.*}}[%[[i]]] : memref<?xi32>
+// CHECK-VEC2:         %[[p:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xi32>
 // CHECK-VEC2:         %[[q:.*]] = index_cast %[[p]] : i32 to index
 // CHECK-VEC2:         %[[a:.*]] = addi %[[i]], %[[c1]] : index
-// CHECK-VEC2:         %[[r:.*]] = load %{{.*}}[%[[a]]] : memref<?xi32>
+// CHECK-VEC2:         %[[r:.*]] = memref.load %{{.*}}[%[[a]]] : memref<?xi32>
 // CHECK-VEC2:         %[[s:.*]] = index_cast %[[r]] : i32 to index
 // CHECK-VEC2:         scf.for %[[j:.*]] = %[[q]] to %[[s]] step %[[c16]] {
 // CHECK-VEC2:           %[[sub:.*]] = subi %[[s]], %[[j]] : index
diff --git a/mlir/test/Dialect/Linalg/standard.mlir b/mlir/test/Dialect/Linalg/standard.mlir
--- a/mlir/test/Dialect/Linalg/standard.mlir
+++ b/mlir/test/Dialect/Linalg/standard.mlir
@@ -20,11 +20,11 @@
 //  CHECK-SAME: %[[arg0:[a-zA-z0-9]*]]: memref<?xf32, #[[$map0]]>,
 //  CHECK-SAME: %[[arg1:[a-zA-z0-9]*]]: memref<?xf32, #[[$map0]]>,
 //  CHECK-SAME: %[[arg2:[a-zA-z0-9]*]]: memref<f32>) {
-//       CHECK:   %[[o0:.*]] = memref_cast %[[arg0]] :
+//       CHECK:   %[[o0:.*]] = memref.cast %[[arg0]] :
 //  CHECK-SAME:     memref<?xf32, #[[$map0]]> to memref<?xf32, #[[$map6]]>
-//       CHECK:   %[[o1:.*]] = memref_cast %[[arg1]] :
+//       CHECK:   %[[o1:.*]] = memref.cast %[[arg1]] :
 //  CHECK-SAME:     memref<?xf32, #[[$map0]]> to memref<?xf32, #[[$map6]]>
-//       CHECK:   %[[o2:.*]] = memref_cast %[[arg2]] :
+//       CHECK:   %[[o2:.*]] = memref.cast %[[arg2]] :
 //  CHECK-SAME:     memref<f32> to memref<f32, #[[$map7]]>
 //       CHECK:   call @linalg_dot_viewsxf32_viewsxf32_viewf32(
 //  CHECK-SAME:     %[[o0]], %[[o1]], %[[o2]]) :
@@ -37,9 +37,9 @@
 // CHECK-LABEL: func @copy(
 //  CHECK-SAME: %[[arg0:[a-zA-z0-9]*]]: memref<?x?x?xf32, #[[$map1]]>,
 //  CHECK-SAME: %[[arg1:[a-zA-z0-9]*]]: memref<?x?x?xf32, #[[$map1]]>) {
-//       CHECK:   %[[o0:.*]] = memref_cast %[[arg0]] :
+//       CHECK:   %[[o0:.*]] = memref.cast %[[arg0]] :
 //  CHECK-SAME:     memref<?x?x?xf32, #[[$map1]]> to memref<?x?x?xf32, #[[$map8]]>
-//       CHECK:   %[[o1:.*]] = memref_cast %[[arg1]] :
+//       CHECK:   %[[o1:.*]] = memref.cast %[[arg1]] :
 //  CHECK-SAME:     memref<?x?x?xf32, #[[$map1]]> to memref<?x?x?xf32, #[[$map8]]>
 //       CHECK:   call @linalg_copy_viewsxsxsxf32_viewsxsxsxf32(%[[o0]], %[[o1]]) :
 //  CHECK-SAME:   memref<?x?x?xf32, #[[$map8]]>, memref<?x?x?xf32, #[[$map8]]>
@@ -53,13 +53,13 @@
 // CHECK-LABEL: func @copy_transpose(
 //  CHECK-SAME: %[[arg0:[a-zA-z0-9]*]]: memref<?x?x?xf32, #[[$map1]]>,
 //  CHECK-SAME: %[[arg1:[a-zA-z0-9]*]]: memref<?x?x?xf32, #[[$map1]]>) {
-//       CHECK:   %[[t0:.*]] = transpose %[[arg0]]
+//       CHECK:   %[[t0:.*]] = memref.transpose %[[arg0]]
 //  CHECK-SAME:     (d0, d1, d2) -> (d0, d2, d1) : memref<?x?x?xf32, #[[$map1]]>
-//       CHECK:   %[[t1:.*]] = transpose %[[arg1]]
+//       CHECK:   %[[t1:.*]] = memref.transpose %[[arg1]]
 //  CHECK-SAME:     (d0, d1, d2) -> (d2, d1, d0) : memref<?x?x?xf32, #[[$map1]]>
-//       CHECK:   %[[o0:.*]] = memref_cast %[[t0]] :
+//       CHECK:   %[[o0:.*]] = memref.cast %[[t0]] :
 //  CHECK-SAME:     memref<?x?x?xf32, #[[$map2]]> to memref<?x?x?xf32, #[[$map8]]>
-//       CHECK:   %[[o1:.*]] = memref_cast %[[t1]] :
+//       CHECK:   %[[o1:.*]] = memref.cast %[[t1]] :
 //  CHECK-SAME:     memref<?x?x?xf32, #[[$map4]]> to memref<?x?x?xf32, #[[$map8]]>
 //       CHECK:   call @linalg_copy_viewsxsxsxf32_viewsxsxsxf32(%[[o0]], %[[o1]]) :
 //  CHECK-SAME:   memref<?x?x?xf32, #[[$map8]]>, memref<?x?x?xf32, #[[$map8]]>
diff --git a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir
--- a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir
@@ -16,12 +16,12 @@
 //      CHECK: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"}
 //      CHECK: scf.for %[[ARG3:.*]] =
 //      CHECK:   %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:   %[[SV1:.*]] = subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
+//      CHECK:   %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
 //      CHECK:   %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:   %[[SV2:.*]] = subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
+//      CHECK:   %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
 //      CHECK:   %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
 //      CHECK:   %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:   %[[SV3:.*]] = subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX]]]
+//      CHECK:   %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX]]]
 //      CHECK:   linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
 
 // -----
@@ -48,12 +48,12 @@
 //      CHECK: scf.if %[[INBOUNDS]]
 //      CHECK:   scf.for %[[ARG3:.*]] =
 //      CHECK:     %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:     %[[SV1:.*]] = subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
+//      CHECK:     %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
 //      CHECK:     %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[SV2:.*]] = subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
+//      CHECK:     %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
 //      CHECK:     %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
 //      CHECK:     %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[SV3:.*]] = subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]]
+//      CHECK:     %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]]
 //      CHECK:     linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
 
 // -----
@@ -80,9 +80,9 @@
 //      CHECK: %[[STEPX:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX]]]
 //      CHECK: scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) = (%[[LBY]], %[[LBX]]) to (%{{.*}}, %{{.*}}) step (%[[STEPY]], %[[STEPX]])
 //      CHECK:   scf.for %[[ARG5:.*]] =
-//      CHECK:     %[[SV1:.*]] = subview %[[ARG0]][%[[ARG3]], %[[ARG5]]]
-//      CHECK:     %[[SV2:.*]] = subview %[[ARG1]][%[[ARG5]], %[[ARG4]]]
-//      CHECK:     %[[SV3:.*]] = subview %[[ARG2]][%[[ARG3]], %[[ARG4]]]
+//      CHECK:     %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG5]]]
+//      CHECK:     %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG5]], %[[ARG4]]]
+//      CHECK:     %[[SV3:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[ARG4]]]
 //      CHECK:     linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
 
 // -----
@@ -106,12 +106,12 @@
 //      CHECK: scf.if %[[INBOUNDS]]
 //      CHECK:   scf.for %[[ARG3:.*]] =
 //      CHECK:     %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:     %[[SV1:.*]] = subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
+//      CHECK:     %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
 //      CHECK:     %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[SV2:.*]] = subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
+//      CHECK:     %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
 //      CHECK:     %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
 //      CHECK:     %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[SV3:.*]] = subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]]
+//      CHECK:     %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]]
 //      CHECK:     linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
 
 // -----
@@ -139,10 +139,10 @@
 //      CHECK:   scf.parallel (%[[ARG3.*]]) = (%[[LBX]]) to (%{{.*}}) step (%[[STEPX]])
 //      CHECK:     scf.for %[[ARG4:.*]] =
 //      CHECK:      %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:       %[[SV1:.*]] = subview %[[ARG0]][%[[OFFSETY]], %[[ARG4]]]
-//      CHECK:       %[[SV2:.*]] = subview %[[ARG1]][%[[ARG4]], %[[ARG3]]]
+//      CHECK:       %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG4]]]
+//      CHECK:       %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[ARG3]]]
 //      CHECK:       %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:       %[[SV3:.*]] = subview %[[ARG2]][%[[OFFSETY_2]], %[[ARG3]]]
+//      CHECK:       %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[ARG3]]]
 //      CHECK:       linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
 
 // -----
@@ -166,11 +166,11 @@
 //      CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]]
 //      CHECK: scf.parallel (%[[ARG3.*]]) = (%[[LBY]]) to (%{{.*}}) step (%[[STEPY]])
 //      CHECK:   scf.for %[[ARG4:.*]] =
-//      CHECK:     %[[SV1:.*]] = subview %[[ARG0]][%[[ARG3]], %[[ARG4]]]
+//      CHECK:     %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]]
 //      CHECK:     %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[SV2:.*]] = subview %[[ARG1]][%[[ARG4]], %[[OFFSETX]]]
+//      CHECK:     %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[OFFSETX]]]
 //      CHECK:     %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[SV3:.*]] = subview %[[ARG2]][%[[ARG3]], %[[OFFSETX_2]]]
+//      CHECK:     %[[SV3:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[OFFSETX_2]]]
 //      CHECK:     linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
 
 // -----
diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
@@ -16,9 +16,9 @@
   %c0 = constant 0 : index
   %c3 = constant 3 : index
   %c1 = constant 1 : index
-  %0 = dim %t0, %c0 : tensor<?x?xf32>
-  %1 = dim %t0, %c1 : tensor<?x?xf32>
-  %2 = dim %arg1, %c1 : tensor<?x?xf32>
+  %0 = memref.dim %t0, %c0 : tensor<?x?xf32>
+  %1 = memref.dim %t0, %c1 : tensor<?x?xf32>
+  %2 = memref.dim %arg1, %c1 : tensor<?x?xf32>
   %3 = scf.for %arg3 = %c0 to %0 step %c2 iter_args(%arg4 = %arg2) -> (tensor<?x?xf32>) {
     %4 = scf.for %arg5 = %c0 to %2 step %c3 iter_args(%arg6 = %arg4) -> (tensor<?x?xf32>) {
       %5 = scf.for %arg7 = %c0 to %1 step %c4 iter_args(%arg8 = %arg6) -> (tensor<?x?xf32>) {
@@ -42,7 +42,7 @@
 //  CHECK-SAME: %[[C:[0-9a-z]*]]: tensor<?x?xf32>
 //   CHECK-DAG: %[[C0:.*]] = constant 0 : index
 //   CHECK-DAG: %[[C1:.*]] = constant 1 : index
-//   CHECK-DAG: %[[dA1:.*]] = dim %[[A]], %[[C1]] : tensor<?x?xf32>
+//   CHECK-DAG: %[[dA1:.*]] = memref.dim %[[A]], %[[C1]] : tensor<?x?xf32>
 //       CHECK: scf.for %[[I:[0-9a-z]*]]
 //       CHECK:     %[[stA:.*]] = subtensor %[[A]][%[[I]], 0] [2, %[[dA1]]] [1, 1]  : tensor<?x?xf32> to tensor<2x?xf32>
 //  CHECK-NEXT:   scf.for %[[J:[0-9a-z]*]]
diff --git a/mlir/test/Dialect/Linalg/tile-conv-padding.mlir b/mlir/test/Dialect/Linalg/tile-conv-padding.mlir
--- a/mlir/test/Dialect/Linalg/tile-conv-padding.mlir
+++ b/mlir/test/Dialect/Linalg/tile-conv-padding.mlir
@@ -21,18 +21,18 @@
 //  TILE-20000-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?x?x?xf32, #[[$strided4D]]>)
 //   TILE-20000-DAG:   %[[C0:.*]] = constant 0 : index
 //   TILE-20000-DAG:   %[[C2:.*]] = constant 2 : index
-//       TILE-20000:   %[[B:.*]] = dim %[[ARG1]], %c0
+//       TILE-20000:   %[[B:.*]] = memref.dim %[[ARG1]], %c0
 //       TILE-20000:   scf.for %[[ivI:.*]] = %[[C0]] to %[[B]] step %[[C2]] {
-//       TILE-20000:     %[[DIM10:.*]] = dim %[[ARG1]], %c0
+//       TILE-20000:     %[[DIM10:.*]] = memref.dim %[[ARG1]], %c0
 //       TILE-20000:     %[[EXTENT:.*]] = affine.min #[[$minmap]](%[[ivI]])[%[[DIM10]]]
-//       TILE-20000:     %[[DIM11:.*]] = dim %[[ARG1]], %c1
-//       TILE-20000:     %[[DIM12:.*]] = dim %[[ARG1]], %c2
-//       TILE-20000:     %[[DIM13:.*]] = dim %[[ARG1]], %c3
-//       TILE-20000:     %[[SUBVIEW1:.*]] = subview %[[ARG1]][%[[ivI]], 0, 0, 0] [%[[EXTENT]], %[[DIM11]], %[[DIM12]], %[[DIM13]]]
-//       TILE-20000:     %[[DIM20:.*]] = dim %[[ARG2]], %c0
+//       TILE-20000:     %[[DIM11:.*]] = memref.dim %[[ARG1]], %c1
+//       TILE-20000:     %[[DIM12:.*]] = memref.dim %[[ARG1]], %c2
+//       TILE-20000:     %[[DIM13:.*]] = memref.dim %[[ARG1]], %c3
+//       TILE-20000:     %[[SUBVIEW1:.*]] = memref.subview %[[ARG1]][%[[ivI]], 0, 0, 0] [%[[EXTENT]], %[[DIM11]], %[[DIM12]], %[[DIM13]]]
+//       TILE-20000:     %[[DIM20:.*]] = memref.dim %[[ARG2]], %c0
 //       TILE-20000:     %[[EXTENT:.*]] = affine.min #[[$minmap]](%[[ivI]])[%[[DIM20]]]
-//       TILE-20000:     %[[DIM21:.*]] = dim %[[ARG2]], %c1
-//       TILE-20000:     %[[DIM22:.*]] = dim %[[ARG2]], %c2
-//       TILE-20000:     %[[DIM23:.*]] = dim %[[ARG2]], %c3
-//       TILE-20000:     %[[SUBVIEW2:.*]] = subview %[[ARG2]][%[[ivI]], 0, 0, 0] [%[[EXTENT]], %[[DIM21]], %[[DIM22]], %[[DIM23]]]
+//       TILE-20000:     %[[DIM21:.*]] = memref.dim %[[ARG2]], %c1
+//       TILE-20000:     %[[DIM22:.*]] = memref.dim %[[ARG2]], %c2
+//       TILE-20000:     %[[DIM23:.*]] = memref.dim %[[ARG2]], %c3
+//       TILE-20000:     %[[SUBVIEW2:.*]] = memref.subview %[[ARG2]][%[[ivI]], 0, 0, 0] [%[[EXTENT]], %[[DIM21]], %[[DIM22]], %[[DIM23]]]
 //       TILE-20000:     linalg.conv(%[[ARG0]], %[[SUBVIEW1]], %[[SUBVIEW2]])
diff --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir
--- a/mlir/test/Dialect/Linalg/tile-conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile-conv.mlir
@@ -17,30 +17,30 @@
 //   TILE-23004-DAG:   %[[C2:.*]] = constant 2 : index
 //   TILE-23004-DAG:   %[[C3:.*]] = constant 3 : index
 //   TILE-23004-DAG:   %[[C4:.*]] = constant 4 : index
-//       TILE-23004:   %[[Z0:.*]] = dim %[[ARG0]], %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       TILE-23004:   %[[Q:.*]] = dim %[[ARG0]], %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       TILE-23004:   %[[B:.*]] = dim %[[ARG1]], %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       TILE-23004:   %[[X0:.*]] = dim %[[ARG2]], %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       TILE-23004:   %[[Z0:.*]] = memref.dim %[[ARG0]], %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       TILE-23004:   %[[Q:.*]] = memref.dim %[[ARG0]], %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       TILE-23004:   %[[B:.*]] = memref.dim %[[ARG1]], %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       TILE-23004:   %[[X0:.*]] = memref.dim %[[ARG2]], %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
 //       TILE-23004:   scf.for %[[ivI:.*]] = %{{.*}} to %[[B]] step %{{.*}} {
 //       TILE-23004:     scf.for %[[ivJ:.*]] = %{{.*}} to %[[X0]] step %{{.*}} {
 //       TILE-23004:       scf.for %[[ivK:.*]] = %{{.*}} to %[[Q]] step %{{.*}} {
-//       TILE-23004:         %[[Z0_1:.*]] = dim %[[ARG0]], %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       TILE-23004:         %[[Z1:.*]] = dim %[[ARG0]], %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       TILE-23004:         %[[Z2:.*]] = dim %[[ARG0]], %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       TILE-23004:         %[[Z0_1:.*]] = memref.dim %[[ARG0]], %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       TILE-23004:         %[[Z1:.*]] = memref.dim %[[ARG0]], %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       TILE-23004:         %[[Z2:.*]] = memref.dim %[[ARG0]], %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
 //       TILE-23004:         %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[ivK]])[%[[Z2]]]
-//       TILE-23004:         %[[K:.*]] = dim %[[ARG0]], %c3 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       TILE-23004:         %[[FilterView:.*]] = subview %{{.*}}[0, 0, %[[ivK]], 0] [%[[Z0_1]], %[[Z1]], %[[szK]], %[[K]]] [1, 1, 1, 1] : memref<?x?x?x?xf32, #[[$strided4D]]> to memref<?x?x?x?xf32, #[[$strided4D]]>
+//       TILE-23004:         %[[K:.*]] = memref.dim %[[ARG0]], %c3 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       TILE-23004:         %[[FilterView:.*]] = memref.subview %{{.*}}[0, 0, %[[ivK]], 0] [%[[Z0_1]], %[[Z1]], %[[szK]], %[[K]]] [1, 1, 1, 1] : memref<?x?x?x?xf32, #[[$strided4D]]> to memref<?x?x?x?xf32, #[[$strided4D]]>
 //
 //       TILE-23004:         %[[J1:.*]] = affine.apply #[[$D0x30pS0x10]](%[[ivJ]])
-//       TILE-23004:         %[[PaddedInput0b:.*]] = dim %[[ARG1]], %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       TILE-23004:         %[[PaddedInput0b:.*]] = memref.dim %[[ARG1]], %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
 //       TILE-23004:         %[[I1pStep:.*]] = affine.min #[[$S0x10p90D0x30pS1]](%[[ivJ]])[%[[Z0]], %[[PaddedInput0b]]]
-//       TILE-23004:         %[[SZ2:.*]] = dim %[[ARG1]], %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       TILE-23004:         %[[dim3:.*]] = dim %[[ARG1]], %c3
+//       TILE-23004:         %[[SZ2:.*]] = memref.dim %[[ARG1]], %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       TILE-23004:         %[[dim3:.*]] = memref.dim %[[ARG1]], %c3
 //       TILE-23004:         %[[sz3:.*]] = affine.min #[[$bound_map_4]](%[[ivK]])[%[[dim3]]]
-//       TILE-23004:         %[[InputView:.*]] = subview %{{.*}}[%[[ivI]], %[[J1]], 0, %[[ivK]]] [%{{.*}}, %{{.*}}, %[[SZ2]], %[[sz3]]] [1, 1, 1, 1] : memref<?x?x?x?xf32, #[[$strided4D]]> to memref<?x?x?x?xf32, #[[$strided4D]]>
+//       TILE-23004:         %[[InputView:.*]] = memref.subview %{{.*}}[%[[ivI]], %[[J1]], 0, %[[ivK]]] [%{{.*}}, %{{.*}}, %[[SZ2]], %[[sz3]]] [1, 1, 1, 1] : memref<?x?x?x?xf32, #[[$strided4D]]> to memref<?x?x?x?xf32, #[[$strided4D]]>
 //
-//       TILE-23004:         %[[X0:.*]] = dim %[[ARG2]], %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       TILE-23004:         %[[X1:.*]] = dim %[[ARG2]], %c3 : memref<?x?x?x?xf32, #[[$strided4D]]>
-//       TILE-23004:         %[[OutputView:.*]] = subview %{{.*}}[%[[ivI]], %[[ivJ]], 0, 0] [%{{.*}}, %{{.*}}, %[[X0]], %[[X1]]] [1, 1, 1, 1] : memref<?x?x?x?xf32, #[[$strided4D]]> to memref<?x?x?x?xf32, #[[$strided4D]]>
+//       TILE-23004:         %[[X0:.*]] = memref.dim %[[ARG2]], %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       TILE-23004:         %[[X1:.*]] = memref.dim %[[ARG2]], %c3 : memref<?x?x?x?xf32, #[[$strided4D]]>
+//       TILE-23004:         %[[OutputView:.*]] = memref.subview %{{.*}}[%[[ivI]], %[[ivJ]], 0, 0] [%{{.*}}, %{{.*}}, %[[X0]], %[[X1]]] [1, 1, 1, 1] : memref<?x?x?x?xf32, #[[$strided4D]]> to memref<?x?x?x?xf32, #[[$strided4D]]>
 //
 //       TILE-23004:         linalg.conv(%[[FilterView]], %[[InputView]], %[[OutputView]]) {dilations = [10, 20], strides = [30, 40]} : memref<?x?x?x?xf32, #[[$strided4D]]>, memref<?x?x?x?xf32, #[[$strided4D]]>, memref<?x?x?x?xf32, #[[$strided4D]]>
diff --git a/mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir b/mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir
--- a/mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir
+++ b/mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir
@@ -18,18 +18,18 @@
 //  CHECK-SAME:     step (%[[C2]], %[[C4]])
 //       CHECK:     scf.for %[[ARG5:.*]] =
 //  CHECK-SAME:       step %[[C8]]
-//       CHECK:       %[[SV1:.*]] = subview %{{.*}}[%[[ARG3]], %[[ARG5]]]
-//       CHECK:       %[[SV2:.*]] = subview %{{.*}}[%[[ARG5]], %[[ARG4]]]
-//       CHECK:       %[[SV3:.*]] = subview %{{.*}}[%[[ARG3]], %[[ARG4]]]
+//       CHECK:       %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG5]]]
+//       CHECK:       %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG5]], %[[ARG4]]]
+//       CHECK:       %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]]]
 //       CHECK:       linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
 
 // TILE1-LABEL: func @gemm
 //   TILE1-DAG:   %[[C2:.*]] = constant 2 : index
 //       TILE1:   scf.parallel (%[[ARG3:.*]]) =
 //  TILE1-SAME:     step (%[[C2]])
-//       TILE1:     %[[SV1:.*]] = subview %{{.*}}[%[[ARG3]], 0]
-//       TILE1:     %[[SV3:.*]] = subview %{{.*}}[%[[ARG3]], 0]
-//   TILE1-NOT:     subview
+//       TILE1:     %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0]
+//       TILE1:     %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0]
+//   TILE1-NOT:     memref.subview
 //       TILE1:     linalg.matmul ins(%[[SV1]], %{{.*}} outs(%[[SV3]]
 
 // TILE2-LABEL: func @gemm
@@ -37,9 +37,9 @@
 //   TILE2-DAG:   %[[C4:.*]] = constant 4 : index
 //       TILE2:   scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) =
 //  TILE2-SAME:     step (%[[C2]], %[[C4]])
-//       TILE2:       %[[SV1:.*]] = subview %{{.*}}[%[[ARG3]], 0]
-//       TILE2:       %[[SV2:.*]] = subview %{{.*}}[0, %[[ARG4]]]
-//       TILE2:       %[[SV3:.*]] = subview %{{.*}}[%[[ARG3]], %[[ARG4]]]
+//       TILE2:       %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0]
+//       TILE2:       %[[SV2:.*]] = memref.subview %{{.*}}[0, %[[ARG4]]]
+//       TILE2:       %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]]]
 //       TILE2:       linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
 
 // -----
@@ -80,9 +80,9 @@
 //  CHECK-SAME:       step (%[[C4]])
 //       CHECK:       scf.for %[[ARG5:.*]] =
 //  CHECK-SAME:         step %[[C8]]
-//       CHECK:         %[[SV1:.*]] = subview %{{.*}}[%[[ARG3]], %[[ARG4]], %[[ARG5]]]
-//       CHECK:         %[[SV2:.*]] = subview %{{.*}}[%[[ARG3]], %[[ARG5]]]
-//       CHECK:         %[[SV3:.*]] = subview %{{.*}}[%[[ARG4]]]
+//       CHECK:         %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]], %[[ARG5]]]
+//       CHECK:         %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG5]]]
+//       CHECK:         %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG4]]]
 //       CHECK:         linalg.generic
 //  CHECK-SAME:           ins(%[[SV1]], %[[SV2]]
 //  CHECK-SAME:          outs(%[[SV3]]
@@ -91,9 +91,9 @@
 //   TILE1-DAG:   %[[C2:.*]] = constant 2 : index
 //       TILE1:   scf.for %[[ARG3:.*]] =
 //  TILE1-SAME:     step %[[C2]]
-//       TILE1:         %[[SV1:.*]] = subview %{{.*}}[%[[ARG3]], 0, 0]
-//       TILE1:         %[[SV2:.*]] = subview %{{.*}}[%[[ARG3]], 0]
-//   TILE1-NOT:         subview
+//       TILE1:         %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0, 0]
+//       TILE1:         %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0]
+//   TILE1-NOT:         memref.subview
 //       TILE1:         linalg.generic
 //  TILE1-SAME:           ins(%[[SV1]], %[[SV2]]
 //  TILE1-SAME:          outs(%{{.*}}
@@ -105,9 +105,9 @@
 //  TILE2-SAME:     step %[[C2]]
 //       TILE2:     scf.parallel (%[[ARG4:.*]]) =
 //  TILE2-SAME:       step (%[[C4]])
-//       TILE2:         %[[SV1:.*]] = subview %{{.*}}[%[[ARG3]], %[[ARG4]], 0]
-//       TILE2:         %[[SV2:.*]] = subview %{{.*}}[%[[ARG3]], 0]
-//       TILE2:         %[[SV3:.*]] = subview %{{.*}}[%[[ARG4]]]
+//       TILE2:         %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]], 0]
+//       TILE2:         %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0]
+//       TILE2:         %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG4]]]
 //       TILE2:         linalg.generic
 //  TILE2-SAME:           ins(%[[SV1]], %[[SV2]]
 //  TILE2-SAME:          outs(%[[SV3]]
diff --git a/mlir/test/Dialect/Linalg/tile-parallel.mlir b/mlir/test/Dialect/Linalg/tile-parallel.mlir
--- a/mlir/test/Dialect/Linalg/tile-parallel.mlir
+++ b/mlir/test/Dialect/Linalg/tile-parallel.mlir
@@ -28,24 +28,24 @@
 // TILE-2-SAME:    [[LHS:%.*]]: {{.*}}, [[RHS:%.*]]: {{.*}}, [[SUM:%.*]]: {{.*}}) {
 // TILE-2-DAG: [[C0:%.*]] = constant 0 : index
 // TILE-2-DAG: [[C2:%.*]] = constant 2 : index
-// TILE-2: [[LHS_ROWS:%.*]] = dim [[LHS]], %c0
+// TILE-2: [[LHS_ROWS:%.*]] = memref.dim [[LHS]], %c0
 // TILE-2: scf.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_ROWS]]) step ([[C2]]) {
 // TILE-2-NO: scf.parallel
-// TILE-2:   [[LHS_SUBVIEW:%.*]] = subview [[LHS]]
-// TILE-2:   [[RHS_SUBVIEW:%.*]] = subview [[RHS]]
-// TILE-2:   [[SUM_SUBVIEW:%.*]] = subview [[SUM]]
+// TILE-2:   [[LHS_SUBVIEW:%.*]] = memref.subview [[LHS]]
+// TILE-2:   [[RHS_SUBVIEW:%.*]] = memref.subview [[RHS]]
+// TILE-2:   [[SUM_SUBVIEW:%.*]] = memref.subview [[SUM]]
 // TILE-2:   linalg.generic {{.*}} ins([[LHS_SUBVIEW]], [[RHS_SUBVIEW]]{{.*}} outs([[SUM_SUBVIEW]]
 
 // TILE-02-LABEL: func @sum(
 // TILE-02-SAME:    [[LHS:%.*]]: {{.*}}, [[RHS:%.*]]: {{.*}}, [[SUM:%.*]]: {{.*}}) {
 // TILE-02-DAG: [[C0:%.*]] = constant 0 : index
 // TILE-02-DAG: [[C2:%.*]] = constant 2 : index
-// TILE-02: [[LHS_COLS:%.*]] = dim [[LHS]], %c1
+// TILE-02: [[LHS_COLS:%.*]] = memref.dim [[LHS]], %c1
 // TILE-02: scf.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_COLS]]) step ([[C2]]) {
 // TILE-02-NO: scf.parallel
-// TILE-02:   [[LHS_SUBVIEW:%.*]] = subview [[LHS]]
-// TILE-02:   [[RHS_SUBVIEW:%.*]] = subview [[RHS]]
-// TILE-02:   [[SUM_SUBVIEW:%.*]] = subview [[SUM]]
+// TILE-02:   [[LHS_SUBVIEW:%.*]] = memref.subview [[LHS]]
+// TILE-02:   [[RHS_SUBVIEW:%.*]] = memref.subview [[RHS]]
+// TILE-02:   [[SUM_SUBVIEW:%.*]] = memref.subview [[SUM]]
 // TILE-02:    linalg.generic {{.*}} ins([[LHS_SUBVIEW]], [[RHS_SUBVIEW]]{{.*}} outs([[SUM_SUBVIEW]]
 
 // TILE-002-LABEL: func @sum(
@@ -58,11 +58,11 @@
 // TILE-234-DAG: [[C0:%.*]] = constant 0 : index
 // TILE-234-DAG: [[C2:%.*]] = constant 2 : index
 // TILE-234-DAG: [[C3:%.*]] = constant 3 : index
-// TILE-234: [[LHS_ROWS:%.*]] = dim [[LHS]], %c0
-// TILE-234: [[LHS_COLS:%.*]] = dim [[LHS]], %c1
+// TILE-234: [[LHS_ROWS:%.*]] = memref.dim [[LHS]], %c0
+// TILE-234: [[LHS_COLS:%.*]] = memref.dim [[LHS]], %c1
 // TILE-234: scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]]) to ([[LHS_ROWS]], [[LHS_COLS]]) step ([[C2]], [[C3]]) {
 // TILE-234-NO: scf.parallel
-// TILE-234:   [[LHS_SUBVIEW:%.*]] = subview [[LHS]]
-// TILE-234:   [[RHS_SUBVIEW:%.*]] = subview [[RHS]]
-// TILE-234:   [[SUM_SUBVIEW:%.*]] = subview [[SUM]]
+// TILE-234:   [[LHS_SUBVIEW:%.*]] = memref.subview [[LHS]]
+// TILE-234:   [[RHS_SUBVIEW:%.*]] = memref.subview [[RHS]]
+// TILE-234:   [[SUM_SUBVIEW:%.*]] = memref.subview [[SUM]]
 // TILE-234:   linalg.generic {{.*}} ins([[LHS_SUBVIEW]], [[RHS_SUBVIEW]]{{.*}} outs([[SUM_SUBVIEW]]
diff --git a/mlir/test/Dialect/Linalg/tile-simple-conv.mlir b/mlir/test/Dialect/Linalg/tile-simple-conv.mlir
--- a/mlir/test/Dialect/Linalg/tile-simple-conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile-simple-conv.mlir
@@ -20,30 +20,30 @@
 //   CHECK-DAG:   %[[C2:.*]] = constant 2 : index
 //   CHECK-DAG:   %[[C3:.*]] = constant 3 : index
 //   CHECK-DAG:   %[[C4:.*]] = constant 4 : index
-//       CHECK:   %[[T0:.*]] = dim %[[ARG0]], %[[C0]]
-//       CHECK:   %[[T1:.*]] = dim %[[ARG0]], %[[C1]]
-//       CHECK:   %[[T2:.*]] = dim %[[ARG1]], %[[C0]]
-//       CHECK:   %[[T3:.*]] = dim %[[ARG2]], %[[C1]]
-//       CHECK:   %[[T4:.*]] = dim %[[ARG2]], %[[C2]]
+//       CHECK:   %[[T0:.*]] = memref.dim %[[ARG0]], %[[C0]]
+//       CHECK:   %[[T1:.*]] = memref.dim %[[ARG0]], %[[C1]]
+//       CHECK:   %[[T2:.*]] = memref.dim %[[ARG1]], %[[C0]]
+//       CHECK:   %[[T3:.*]] = memref.dim %[[ARG2]], %[[C1]]
+//       CHECK:   %[[T4:.*]] = memref.dim %[[ARG2]], %[[C2]]
 //       CHECK:   scf.for %[[ARG3:.*]] = %[[C0]] to %[[T2]] step %[[C2]]
 //       CHECK:     scf.for %[[ARG4:.*]] = %[[C0]] to %[[T3]] step %[[C3]]
 //       CHECK:       scf.for %[[ARG5:.*]] = %[[C0]] to %[[T4]] step %[[C4]]
-//       CHECK:         %[[T5:.*]] = dim %[[ARG1]], %[[C0]]
+//       CHECK:         %[[T5:.*]] = memref.dim %[[ARG1]], %[[C0]]
 //       CHECK:         %[[T6:.*]] = affine.min #[[MAP0]](%[[ARG3]])[%[[T5]]]
-//       CHECK:         %[[T7:.*]] = dim %[[ARG1]], %[[C1]]
+//       CHECK:         %[[T7:.*]] = memref.dim %[[ARG1]], %[[C1]]
 //       CHECK:         %[[T8:.*]] = affine.min #[[MAP1]](%[[ARG4]])[%[[T0]], %[[T7]]]
-//       CHECK:         %[[T9:.*]] = dim %[[ARG1]], %[[C2]]
+//       CHECK:         %[[T9:.*]] = memref.dim %[[ARG1]], %[[C2]]
 //       CHECK:         %[[T10:.*]] = affine.min #[[MAP2]](%[[ARG5]])[%[[T1]], %[[T9]]]
-//       CHECK:         %[[T11:.*]] = dim %[[ARG1]], %[[C3]]
-//       CHECK:         %[[SV1:.*]] = subview %[[ARG1]][%[[ARG3]], %[[ARG4]], %[[ARG5]], 0]
+//       CHECK:         %[[T11:.*]] = memref.dim %[[ARG1]], %[[C3]]
+//       CHECK:         %[[SV1:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[ARG4]], %[[ARG5]], 0]
 //  CHECK-SAME:                                        [%[[T6]], %[[T8]], %[[T10]], %[[T11]]]
-//       CHECK:         %[[T13:.*]] = dim %[[ARG2]], %[[C0]]
+//       CHECK:         %[[T13:.*]] = memref.dim %[[ARG2]], %[[C0]]
 //       CHECK:         %[[T14:.*]] = affine.min #[[MAP0]](%[[ARG3]])[%[[T13]]]
-//       CHECK:         %[[T15:.*]] = dim %[[ARG2]], %[[C1]]
+//       CHECK:         %[[T15:.*]] = memref.dim %[[ARG2]], %[[C1]]
 //       CHECK:         %[[T16:.*]] = affine.min #[[MAP4]](%[[ARG4]])[%[[T15]]]
-//       CHECK:         %[[T17:.*]] = dim %[[ARG2]], %[[C2]]
+//       CHECK:         %[[T17:.*]] = memref.dim %[[ARG2]], %[[C2]]
 //       CHECK:         %[[T18:.*]] = affine.min #[[MAP5]](%[[ARG5]])[%[[T17]]]
-//       CHECK:         %[[T19:.*]] = dim %[[ARG2]], %[[C3]]
-//       CHECK:         %[[SV2:.*]] = subview %[[ARG2]][%[[ARG3]], %[[ARG4]], %[[ARG5]], 0]
+//       CHECK:         %[[T19:.*]] = memref.dim %[[ARG2]], %[[C3]]
+//       CHECK:         %[[SV2:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[ARG4]], %[[ARG5]], 0]
 //  CHECK-SAME:                                        [%[[T14]], %[[T16]], %[[T18]], %[[T19]]]
 //       CHECK:         linalg.conv(%[[ARG0]], %[[SV1]], %[[SV2]])
diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir
--- a/mlir/test/Dialect/Linalg/tile-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir
@@ -34,9 +34,9 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c2 = constant 2 : index
-  %0 = dim %arg0, %c0 : tensor<?x?x?xf32>
-  %1 = dim %arg0, %c1 : tensor<?x?x?xf32>
-  %2 = dim %arg0, %c2 : tensor<?x?x?xf32>
+  %0 = memref.dim %arg0, %c0 : tensor<?x?x?xf32>
+  %1 = memref.dim %arg0, %c1 : tensor<?x?x?xf32>
+  %2 = memref.dim %arg0, %c2 : tensor<?x?x?xf32>
   %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
   %4 = linalg.generic
     {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
@@ -81,9 +81,9 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c2 = constant 2 : index
-  %0 = dim %arg0, %c0 : tensor<?x?x?xf32>
-  %1 = dim %arg0, %c1 : tensor<?x?x?xf32>
-  %2 = dim %arg0, %c2 : tensor<?x?x?xf32>
+  %0 = memref.dim %arg0, %c0 : tensor<?x?x?xf32>
+  %1 = memref.dim %arg0, %c1 : tensor<?x?x?xf32>
+  %2 = memref.dim %arg0, %c2 : tensor<?x?x?xf32>
   %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
   %4 = linalg.indexed_generic
     {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
diff --git a/mlir/test/Dialect/Linalg/tile.mlir b/mlir/test/Dialect/Linalg/tile.mlir
--- a/mlir/test/Dialect/Linalg/tile.mlir
+++ b/mlir/test/Dialect/Linalg/tile.mlir
@@ -40,46 +40,46 @@
 // TILE-2-LABEL: func @matmul(
 //       TILE-2-DAG: %[[C0:.*]] = constant 0 : index
 //       TILE-2-DAG: %[[C2:.*]] = constant 2 : index
-//       TILE-2: %[[M:.*]] = dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
 //       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-2:   %[[localM:.*]] = dim %{{.*}}, %c0
+//       TILE-2:   %[[localM:.*]] = memref.dim %{{.*}}, %c0
 //       TILE-2:   %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[localM]]]
-//       TILE-2:   %[[K:.*]] = dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
-//       TILE-2:   %[[sAi:.*]] = subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[K]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-//       TILE-2:   %[[localK:.*]] = dim %{{.*}}, %c0
+//       TILE-2:   %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-2:   %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[K]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
+//       TILE-2:   %[[localK:.*]] = memref.dim %{{.*}}, %c0
 //       TILE-2:   %[[szK:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[localK]]]
-//       TILE-2:   %[[N:.*]] = dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
-//       TILE-2:   %[[sCi:.*]] = subview %{{.*}}[%[[I]], 0] [%[[szK]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
+//       TILE-2:   %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-2:   %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szK]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
 //       TILE-2:   linalg.matmul ins(%[[sAi]]{{.*}} outs(%[[sCi]]
 
 // TILE-02-LABEL: func @matmul(
 //       TILE-02-DAG: %[[C0:.*]] = constant 0 : index
 //       TILE-02-DAG: %[[C2:.*]] = constant 2 : index
-//       TILE-02: %[[N:.*]] = dim %arg1, %c1 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-02: %[[N:.*]] = memref.dim %arg1, %c1 : memref<?x?xf32, #[[$strided2D]]>
 //       TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
-//       TILE-02:   %[[K:.*]] = dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
-//       TILE-02:   %[[localN:.*]] = dim %{{.*}}, %c1
+//       TILE-02:   %[[K:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-02:   %[[localN:.*]] = memref.dim %{{.*}}, %c1
 //       TILE-02:   %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[localN]]]
-//       TILE-02:   %[[sBj:.*]] = subview %{{.*}}[0, %[[J]]] [%[[K]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-//       TILE-02:   %[[M:.*]] = dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
-//       TILE-02:   %[[localK:.*]] = dim %{{.*}}, %c1
+//       TILE-02:   %[[sBj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[K]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
+//       TILE-02:   %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-02:   %[[localK:.*]] = memref.dim %{{.*}}, %c1
 //       TILE-02:   %[[szK:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[localK]]]
-//       TILE-02:   %[[sCj:.*]] = subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
+//       TILE-02:   %[[sCj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
 //       TILE-02:   linalg.matmul ins(%{{.*}}, %[[sBj]]{{.*}} outs(%[[sCj]]
 
 // TILE-002-LABEL: func @matmul(
 //       TILE-002-DAG: %[[C0:.*]] = constant 0 : index
 //       TILE-002-DAG: %[[C2:.*]] = constant 2 : index
-//       TILE-002: %[[ubK:.*]] = dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-002: %[[ubK:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
 //       TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
-//       TILE-002:   %[[M:.*]] = dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
-//       TILE-002:   %[[localK:.*]] = dim %{{.*}}, %c1
+//       TILE-002:   %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-002:   %[[localK:.*]] = memref.dim %{{.*}}, %c1
 //       TILE-002:   %[[szK:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[localK]]]
-//       TILE-002:   %[[sAj:.*]] = subview %{{.*}}[0, %[[K]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-//       TILE-002:   %[[localK:.*]] = dim %{{.*}}, %c0
+//       TILE-002:   %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[K]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
+//       TILE-002:   %[[localK:.*]] = memref.dim %{{.*}}, %c0
 //       TILE-002:   %[[szK:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[localK]]]
-//       TILE-002:   %[[N:.*]] = dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
-//       TILE-002:   %[[sBj:.*]] = subview %{{.*}}[%[[K]], 0] [%[[szK]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
+//       TILE-002:   %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-002:   %[[sBj:.*]] = memref.subview %{{.*}}[%[[K]], 0] [%[[szK]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
 //       TILE-002:   linalg.matmul ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}}
 
 // TILE-234-LABEL: func @matmul(
@@ -87,27 +87,27 @@
 //       TILE-234-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-234-DAG: %[[C3:.*]] = constant 3 : index
 //       TILE-234-DAG: %[[C4:.*]] = constant 4 : index
-//       TILE-234: %[[ubM:.*]] = dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
-//       TILE-234: %[[ubK:.*]] = dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
-//       TILE-234: %[[ubN:.*]] = dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-234: %[[ubM:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-234: %[[ubK:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-234: %[[ubN:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
 //       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[ubM]] step %{{.*}} {
 //       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[ubN]] step %{{.*}} {
 //       TILE-234:      scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
-//       TILE-234:        %[[localM:.*]] = dim %{{.*}}, %c0
+//       TILE-234:        %[[localM:.*]] = memref.dim %{{.*}}, %c0
 //       TILE-234:        %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[localM]]]
-//       TILE-234:        %[[localK:.*]] = dim %{{.*}}, %c1
+//       TILE-234:        %[[localK:.*]] = memref.dim %{{.*}}, %c1
 //       TILE-234:        %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[localK]]]
-//       TILE-234:        %[[sAik:.*]] = subview %{{.*}}[%[[I]], %[[K]]] [%[[szM]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-//       TILE-234:        %[[localK:.*]] = dim %{{.*}}, %c0
+//       TILE-234:        %[[sAik:.*]] = memref.subview %{{.*}}[%[[I]], %[[K]]] [%[[szM]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
+//       TILE-234:        %[[localK:.*]] = memref.dim %{{.*}}, %c0
 //       TILE-234:        %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[localK]]]
-//       TILE-234:        %[[localN:.*]] = dim %{{.*}}, %c1
+//       TILE-234:        %[[localN:.*]] = memref.dim %{{.*}}, %c1
 //       TILE-234:        %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[localN]]]
-//       TILE-234:        %[[sBkj:.*]] = subview %{{.*}}[%[[K]], %[[J]]] [%[[szK]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-//       TILE-234:        %[[localM:.*]] = dim %{{.*}}, %c0
+//       TILE-234:        %[[sBkj:.*]] = memref.subview %{{.*}}[%[[K]], %[[J]]] [%[[szK]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
+//       TILE-234:        %[[localM:.*]] = memref.dim %{{.*}}, %c0
 //       TILE-234:        %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[localM]]]
-//       TILE-234:        %[[localN:.*]] = dim %{{.*}}, %c1
+//       TILE-234:        %[[localN:.*]] = memref.dim %{{.*}}, %c1
 //       TILE-234:        %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[localN]]]
-//       TILE-234:        %[[sCij:.*]] = subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
+//       TILE-234:        %[[sCij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
 //
 //       TILE-234:        linalg.matmul ins(%[[sAik]], %[[sBkj]]{{.*}} outs(%[[sCij]]
 
@@ -133,9 +133,9 @@
 //       TILE-2-DAG: %[[M:.*]] = constant 10 : index
 //       TILE-2: scf.for %[[I:.*]] = %{{.*}} to %[[M]] step %{{.*}} {
 //       TILE-2:   %[[MIN2:.*]] = affine.min #[[$bound_map_static]](%[[I]])
-//       TILE-2:   %[[sAi:.*]] = subview %{{.*}}[%[[I]], 0] [%[[MIN2]], 16] [1, 1] : memref<10x16xf32, #[[$strided2D]]> to memref<?x16xf32, #[[$strided2D]]>
+//       TILE-2:   %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[MIN2]], 16] [1, 1] : memref<10x16xf32, #[[$strided2D]]> to memref<?x16xf32, #[[$strided2D]]>
 //       TILE-2:   %[[MIN22:.*]] = affine.min #[[$bound_map_static]](%[[I]])
-//       TILE-2:   %[[sCi:.*]] = subview %{{.*}}[%[[I]], 0] [%[[MIN22]], 12] [1, 1] : memref<10x12xf32, #[[$strided2D]]> to memref<?x12xf32, #[[$strided2D]]>
+//       TILE-2:   %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[MIN22]], 12] [1, 1] : memref<10x12xf32, #[[$strided2D]]> to memref<?x12xf32, #[[$strided2D]]>
 //       TILE-2:   linalg.matmul ins(%[[sAi]], %{{.*}}{{.*}} outs(%[[sCi]]
 
 // TILE-02-LABEL: func @matmul_static(
@@ -144,9 +144,9 @@
 //       TILE-02-DAG: %[[N:.*]] = constant 12 : index
 //       TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
 //       TILE-02:   %[[MIN2:.*]] = affine.min #[[$bound_map_static]](%[[J]])
-//       TILE-02:   %[[sBj:.*]] = subview %{{.*}}[0, %[[J]]] [16, %[[MIN2]]] [1, 1] : memref<16x12xf32, #[[$strided2D]]> to memref<16x?xf32, #[[$strided2D]]>
+//       TILE-02:   %[[sBj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [16, %[[MIN2]]] [1, 1] : memref<16x12xf32, #[[$strided2D]]> to memref<16x?xf32, #[[$strided2D]]>
 //       TILE-02:   %[[MIN22:.*]] = affine.min #[[$bound_map_static]](%[[J]])
-//       TILE-02:   %[[sCj:.*]] = subview %{{.*}}[0, %[[J]]] [10, %[[MIN22]]] [1, 1] : memref<10x12xf32, #[[$strided2D]]> to memref<10x?xf32, #[[$strided2D]]>
+//       TILE-02:   %[[sCj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [10, %[[MIN22]]] [1, 1] : memref<10x12xf32, #[[$strided2D]]> to memref<10x?xf32, #[[$strided2D]]>
 //       TILE-02:   linalg.matmul ins(%{{.*}}, %[[sBj]]{{.*}} outs(%[[sCj]]
 
 // TILE-002-LABEL: func @matmul_static(
@@ -155,9 +155,9 @@
 //       TILE-002-DAG: %[[C16:.*]] = constant 16 : index
 //       TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[C16]] step %{{.*}} {
 //       TILE-002:   %[[MIN2:.*]] = affine.min #[[$bound_map_static]](%[[K]])
-//       TILE-002:   %[[sAj:.*]] = subview %{{.*}}[0, %[[K]]] [10, %[[MIN2]]] [1, 1] : memref<10x16xf32, #[[$strided2D]]> to memref<10x?xf32, #[[$strided2D]]>
+//       TILE-002:   %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[K]]] [10, %[[MIN2]]] [1, 1] : memref<10x16xf32, #[[$strided2D]]> to memref<10x?xf32, #[[$strided2D]]>
 //       TILE-002:   %[[MIN22:.*]] = affine.min #[[$bound_map_static]](%[[K]])
-//       TILE-002:   %[[sBj:.*]] = subview %{{.*}}[%[[K]], 0] [%[[MIN22]], 12] [1, 1] : memref<16x12xf32, #[[$strided2D]]> to memref<?x12xf32, #[[$strided2D]]>
+//       TILE-002:   %[[sBj:.*]] = memref.subview %{{.*}}[%[[K]], 0] [%[[MIN22]], 12] [1, 1] : memref<16x12xf32, #[[$strided2D]]> to memref<?x12xf32, #[[$strided2D]]>
 //       TILE-002:   linalg.matmul ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}}
 
 // TILE-234-LABEL: func @matmul_static(
@@ -171,9 +171,9 @@
 //       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[C10]] step %{{.*}} {
 //       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[C12]] step %{{.*}} {
 //       TILE-234:      scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[C16]] step %{{.*}} {
-//       TILE-234:        %[[sAik:.*]] = subview %{{.*}}[%[[I]], %[[K]]] [%{{.*}}, %{{.*}}] [1, 1] : memref<10x16xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-//       TILE-234:        %[[sBkj:.*]] = subview %{{.*}}[%[[K]], %[[J]]] [%{{.*}}, %{{.*}}] [1, 1] : memref<16x12xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-//       TILE-234:        %[[sCij:.*]] = subview %{{.*}}[%[[I]], %[[J]]] [%{{.*}}, %{{.*}}] [1, 1] : memref<10x12xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
+//       TILE-234:        %[[sAik:.*]] = memref.subview %{{.*}}[%[[I]], %[[K]]] [%{{.*}}, %{{.*}}] [1, 1] : memref<10x16xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
+//       TILE-234:        %[[sBkj:.*]] = memref.subview %{{.*}}[%[[K]], %[[J]]] [%{{.*}}, %{{.*}}] [1, 1] : memref<16x12xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
+//       TILE-234:        %[[sCij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%{{.*}}, %{{.*}}] [1, 1] : memref<10x12xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
 //
 //       TILE-234:        linalg.matmul ins(%[[sAik]], %[[sBkj]]{{.*}} outs(%[[sCij]]
 
@@ -190,15 +190,15 @@
 //  TILE-2-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
 //       TILE-2-DAG: %[[C0:.*]] = constant 0 : index
 //       TILE-2-DAG: %[[C2:.*]] = constant 2 : index
-//       TILE-2: %[[M:.*]] = dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
 //       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-2:   %[[localM:.*]] = dim %[[ARG0]], %c0
+//       TILE-2:   %[[localM:.*]] = memref.dim %[[ARG0]], %c0
 //       TILE-2:   %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[localM]]]
-//       TILE-2:   %[[N:.*]] = dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
-//       TILE-2:   %[[sAi:.*]] = subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-//       TILE-2:   %[[localN:.*]] = dim %{{.*}}, %c0
+//       TILE-2:   %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-2:   %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
+//       TILE-2:   %[[localN:.*]] = memref.dim %{{.*}}, %c0
 //       TILE-2:   %[[szN:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[localN]]]
-//       TILE-2:   %[[sCi:.*]] = subview %{{.*}}[%[[I]]] [%[[szN]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
+//       TILE-2:   %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szN]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
 //       TILE-2:   linalg.matvec ins(%[[sAi]], %{{.*}} outs(%[[sCi]]
 
 // TILE-02-LABEL: func @matvec(
@@ -207,15 +207,15 @@
 // TILE-02-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
 //       TILE-02-DAG: %[[C0:.*]] = constant 0 : index
 //       TILE-02-DAG: %[[C2:.*]] = constant 2 : index
-//       TILE-02: %[[K:.*]] = dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-02: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
 //       TILE-02: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
-//       TILE-02:   %[[M:.*]] = dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
-//       TILE-02:   %[[localN:.*]] = dim %{{.*}}, %c1
+//       TILE-02:   %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-02:   %[[localN:.*]] = memref.dim %{{.*}}, %c1
 //       TILE-02:   %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[localN]]]
-//       TILE-02:   %[[sAj:.*]] = subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-//       TILE-02:   %[[localN:.*]] = dim %{{.*}}, %c0
+//       TILE-02:   %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
+//       TILE-02:   %[[localN:.*]] = memref.dim %{{.*}}, %c0
 //       TILE-02:   %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[localN]]]
-//       TILE-02:   %[[sBj:.*]] = subview %{{.*}}[%[[J]]] [%[[szN]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
+//       TILE-02:   %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
 //       TILE-02:   linalg.matvec ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}}
 
 // TILE-002-LABEL: func @matvec(
@@ -231,21 +231,21 @@
 //       TILE-234-DAG: %[[C0:.*]] = constant 0 : index
 //       TILE-234-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-234-DAG: %[[C3:.*]] = constant 3 : index
-//       TILE-234: %[[M:.*]] = dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
-//       TILE-234: %[[K:.*]] = dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-234: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
+//       TILE-234: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
 //       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
 //       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
-//       TILE-234:      %[[localM:.*]] = dim %{{.*}}, %c0
+//       TILE-234:      %[[localM:.*]] = memref.dim %{{.*}}, %c0
 //       TILE-234:      %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[localM]]]
-//       TILE-234:      %[[localN:.*]] = dim %{{.*}}, %c1
+//       TILE-234:      %[[localN:.*]] = memref.dim %{{.*}}, %c1
 //       TILE-234:      %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[localN]]]
-//       TILE-234:      %[[sAij:.*]] = subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-//       TILE-234:      %[[localN:.*]] = dim %{{.*}}, %c0
+//       TILE-234:      %[[sAij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
+//       TILE-234:      %[[localN:.*]] = memref.dim %{{.*}}, %c0
 //       TILE-234:      %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[localN]]]
-//       TILE-234:      %[[sBj:.*]] = subview %{{.*}}[%[[J]]] [%[[szN]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
-//       TILE-234:      %[[localM:.*]] = dim %{{.*}}, %c0
+//       TILE-234:      %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
+//       TILE-234:      %[[localM:.*]] = memref.dim %{{.*}}, %c0
 //       TILE-234:      %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[localM]]]
-//       TILE-234:      %[[sCi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
+//       TILE-234:      %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
 //
 //       TILE-234:      linalg.matvec ins(%[[sAij]], %[[sBj]]{{.*}} outs(%[[sCi]]
 
@@ -258,14 +258,14 @@
 // TILE-2-LABEL: func @dot(
 //       TILE-2-DAG: %[[C0:.*]] = constant 0 : index
 //       TILE-2-DAG: %[[C2:.*]] = constant 2 : index
-//       TILE-2: %[[M:.*]] = dim %{{.*}}, %c0 : memref<?xf32, #[[$strided1D]]>
+//       TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?xf32, #[[$strided1D]]>
 //       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-2:   %[[localM:.*]] = dim %{{.*}}, %c0
+//       TILE-2:   %[[localM:.*]] = memref.dim %{{.*}}, %c0
 //       TILE-2:   %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[localM]]]
-//       TILE-2:   %[[sAi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
-//       TILE-2:   %[[localM:.*]] = dim %{{.*}}, %c0
+//       TILE-2:   %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
+//       TILE-2:   %[[localM:.*]] = memref.dim %{{.*}}, %c0
 //       TILE-2:   %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[localM]]]
-//       TILE-2:   %[[sBi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
+//       TILE-2:   %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
 //       TILE-2:   linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs(
 
 // TILE-02-LABEL: func @dot(
@@ -277,14 +277,14 @@
 // TILE-234-LABEL: func @dot(
 //       TILE-234-DAG: %[[C0:.*]] = constant 0 : index
 //       TILE-234-DAG: %[[C2:.*]] = constant 2 : index
-//       TILE-234:  %[[ubK:.*]] = dim %{{.*}}, %c0 : memref<?xf32, #[[$strided1D]]>
+//       TILE-234:  %[[ubK:.*]] = memref.dim %{{.*}}, %c0 : memref<?xf32, #[[$strided1D]]>
 //       TILE-234:  scf.for %[[I:.*]] = %{{.*}} to %[[ubK]] step %{{.*}} {
-//       TILE-234:    %[[localM:.*]] = dim %{{.*}}, %c0
+//       TILE-234:    %[[localM:.*]] = memref.dim %{{.*}}, %c0
 //       TILE-234:    %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[localM]]]
-//       TILE-234:    %[[sAi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
-//       TILE-234:    %[[localM:.*]] = dim %{{.*}}, %c0
+//       TILE-234:    %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
+//       TILE-234:    %[[localM:.*]] = memref.dim %{{.*}}, %c0
 //       TILE-234:    %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[localM]]]
-//       TILE-234:    %[[sBi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
+//       TILE-234:    %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
 //       TILE-234:    linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs(
 
 func @fill_static(%arg0: memref<127x99xf32>, %arg1: f32) {
@@ -294,13 +294,13 @@
 // TILE-2-LABEL: func @fill_static
 //       TILE-2:   for
 //   TILE-2-NOT:   for
-//       TILE-2:       subview{{.*}} : memref<127x99xf32>
+//       TILE-2:       memref.subview{{.*}} : memref<127x99xf32>
 //       TILE-2:       linalg.fill{{.*}} : memref<?x99xf32, #[[$stride_99_1_layout_map]]>, f32
 
 // TILE-02-LABEL: func @fill_static
 //       TILE-02:   for
 //   TILE-02-NOT:   for
-//       TILE-02:       subview{{.*}} : memref<127x99xf32>
+//       TILE-02:       memref.subview{{.*}} : memref<127x99xf32>
 //       TILE-02:       linalg.fill{{.*}} : memref<127x?xf32, #[[$stride_99_1_layout_map]]>, f32
 
 // TILE-002-LABEL: func @fill_static
@@ -311,7 +311,7 @@
 //       TILE-234:   for
 //       TILE-234:     for
 //   TILE-234-NOT:   for
-//       TILE-234:       subview{{.*}} : memref<127x99xf32>
+//       TILE-234:       memref.subview{{.*}} : memref<127x99xf32>
 //       TILE-234:       linalg.fill{{.*}} : memref<?x?xf32, #[[$stride_99_1_layout_map]]>, f32
 
 
diff --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir
--- a/mlir/test/Dialect/Linalg/transform-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir
@@ -216,17 +216,17 @@
   %c4000 = constant 4000 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %0 = dim %arg0, %c0 : memref<?x?xf32, offset: ?, strides: [?, 1]>
-  %1 = dim %arg0, %c1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
-  %2 = dim %arg1, %c1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
+  %0 = memref.dim %arg0, %c0 : memref<?x?xf32, offset: ?, strides: [?, 1]>
+  %1 = memref.dim %arg0, %c1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
+  %2 = memref.dim %arg1, %c1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
   scf.for %arg3 = %c0 to %0 step %c2000 {
     scf.for %arg4 = %c0 to %2 step %c3000 {
       scf.for %arg5 = %c0 to %1 step %c4000 {
-        %3 = subview %arg0[%arg3, %arg5][%c2000, %c4000][%c1, %c1] :
+        %3 = memref.subview %arg0[%arg3, %arg5][%c2000, %c4000][%c1, %c1] :
              memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %4 = subview %arg1[%arg5, %arg4][%c4000, %c3000][%c1, %c1] :
+        %4 = memref.subview %arg1[%arg5, %arg4][%c4000, %c3000][%c1, %c1] :
              memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %5 = subview %arg2[%arg3, %arg4][%c2000, %c3000][%c1, %c1] :
+        %5 = memref.subview %arg2[%arg3, %arg4][%c2000, %c3000][%c1, %c1] :
              memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         linalg.matmul {__internal_linalg_transform__ = "_promote_views_"}
           ins(%3, %4: memref<?x?xf32, offset: ?, strides: [?, ?]>,
@@ -245,20 +245,20 @@
 // CHECK:         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
 // CHECK:           scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
 // CHECK:             scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
-// CHECK:               %[[s0:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
-// CHECK:               %[[s1:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
-// CHECK:               %[[s2:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
-// CHECK:               %[[a0:.*]] = alloc({{%.*}}) : memref<?xi8>
-// CHECK:               %[[v0:.*]] = std.view %[[a0]][{{.*}}][{{%.*}}, {{%.*}}] : memref<?xi8> to memref<?x?xf32>
-// CHECK:               %[[l0:.*]] = subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1]
+// CHECK:               %[[s0:.*]] = memref.subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
+// CHECK:               %[[s1:.*]] = memref.subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
+// CHECK:               %[[s2:.*]] = memref.subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
+// CHECK:               %[[a0:.*]] = memref.alloc({{%.*}}) : memref<?xi8>
+// CHECK:               %[[v0:.*]] = memref.view %[[a0]][{{.*}}][{{%.*}}, {{%.*}}] : memref<?xi8> to memref<?x?xf32>
+// CHECK:               %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1]
 // CHECK-SAME:            memref<?x?xf32> to memref<?x?xf32, #[[$STRIDED_2D_u_1]]>
-// CHECK:               %[[a1:.*]] = alloc({{%.*}}) : memref<?xi8>
-// CHECK:               %[[v1:.*]] = std.view %[[a1]][{{.*}}][{{%.*}}, {{%.*}}] : memref<?xi8> to memref<?x?xf32>
-// CHECK:               %[[l1:.*]] = subview %[[v1]][0, 0] [%{{.*}}, %{{.*}}] [1, 1]
+// CHECK:               %[[a1:.*]] = memref.alloc({{%.*}}) : memref<?xi8>
+// CHECK:               %[[v1:.*]] = memref.view %[[a1]][{{.*}}][{{%.*}}, {{%.*}}] : memref<?xi8> to memref<?x?xf32>
+// CHECK:               %[[l1:.*]] = memref.subview %[[v1]][0, 0] [%{{.*}}, %{{.*}}] [1, 1]
 // CHECK-SAME:            memref<?x?xf32> to memref<?x?xf32, #[[$STRIDED_2D_u_1]]>
-// CHECK:               %[[a2:.*]] = alloc({{%.*}}) : memref<?xi8>
-// CHECK:               %[[v2:.*]] = std.view %[[a2]][{{.*}}][{{%.*}}, {{%.*}}] : memref<?xi8> to memref<?x?xf32>
-// CHECK:               %[[l2:.*]] = subview %[[v2]][0, 0] [%{{.*}}, %{{.*}}] [1, 1]
+// CHECK:               %[[a2:.*]] = memref.alloc({{%.*}}) : memref<?xi8>
+// CHECK:               %[[v2:.*]] = memref.view %[[a2]][{{.*}}][{{%.*}}, {{%.*}}] : memref<?xi8> to memref<?x?xf32>
+// CHECK:               %[[l2:.*]] = memref.subview %[[v2]][0, 0] [%{{.*}}, %{{.*}}] [1, 1]
 // CHECK-SAME:            memref<?x?xf32> to memref<?x?xf32, #[[$STRIDED_2D_u_1]]>
 // CHECK:               linalg.copy(%[[s0]], %[[l0]]) : memref<?x?xf32, #map{{.*}}>, memref<?x?xf32, #map{{.*}}>
 // CHECK:               linalg.copy(%[[s1]], %[[l1]]) : memref<?x?xf32, #map{{.*}}>, memref<?x?xf32, #map{{.*}}>
@@ -275,17 +275,17 @@
   %c4000 = constant 4000 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %0 = dim %arg0, %c0 : memref<?x?xf32, offset: ?, strides: [?, 1]>
-  %1 = dim %arg0, %c1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
-  %2 = dim %arg1, %c1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
+  %0 = memref.dim %arg0, %c0 : memref<?x?xf32, offset: ?, strides: [?, 1]>
+  %1 = memref.dim %arg0, %c1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
+  %2 = memref.dim %arg1, %c1 : memref<?x?xf32, offset: ?, strides: [?, 1]>
   scf.for %arg3 = %c0 to %0 step %c2000 {
     scf.for %arg4 = %c0 to %2 step %c3000 {
       scf.for %arg5 = %c0 to %1 step %c4000 {
-        %3 = std.subview %arg0[%arg3, %arg5][%c2000, %c4000][%c1, %c1] :
+        %3 = memref.subview %arg0[%arg3, %arg5][%c2000, %c4000][%c1, %c1] :
              memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %4 = std.subview %arg1[%arg5, %arg4][%c4000, %c3000][%c1, %c1] :
+        %4 = memref.subview %arg1[%arg5, %arg4][%c4000, %c3000][%c1, %c1] :
              memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
-        %5 = std.subview %arg2[%arg3, %arg4][%c2000, %c3000][%c1, %c1] :
+        %5 = memref.subview %arg2[%arg3, %arg4][%c2000, %c3000][%c1, %c1] :
              memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
         linalg.matmul {__internal_linalg_transform__ = "_promote_first_view_"}
           ins(%3, %4: memref<?x?xf32, offset: ?, strides: [?, ?]>,
@@ -304,18 +304,18 @@
 // CHECK:   scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c2000]] {
 // CHECK:     scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c3000]] {
 // CHECK:       scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c4000]] {
-// CHECK:         %[[s0:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
-// CHECK:         %[[s1:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
-// CHECK:         %[[s2:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
-// CHECK:         %[[a0:.*]] = alloc({{%.*}}) : memref<?xi8>
-// CHECK:         %[[v0:.*]] = std.view %[[a0]][{{.*}}][{{%.*}}, {{%.*}}] : memref<?xi8> to memref<?x?xf32>
-// CHECK:         %[[l0:.*]] = subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #[[$STRIDED_2D_u_1]]>
-// CHECK-NOT:     %[[a1:.*]] = alloc({{%.*}}) : memref<?xi8>
-// CHECK-NOT:     %[[v1:.*]] = std.view %[[a1]][{{.*}}][{{%.*}}, {{%.*}}] : memref<?xi8> to memref<?x?xf32>
-// CHECK-NOT:     %[[l0:.*]] = subview %[[v1]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #[[$STRIDED_2D_u_1]]>
-// CHECK-NOT:     %[[a2:.*]] = alloc({{%.*}}) : memref<?xi8>
-// CHECK-NOT:     %[[v2:.*]] = std.view %[[a2]][{{.*}}][{{%.*}}, {{%.*}}] : memref<?xi8> to memref<?x?xf32>
-// CHECK-NOT:     %[[l0:.*]] = subview %[[v2]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #[[$STRIDED_2D_u_1]]>
+// CHECK:         %[[s0:.*]] = memref.subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
+// CHECK:         %[[s1:.*]] = memref.subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
+// CHECK:         %[[s2:.*]] = memref.subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
+// CHECK:         %[[a0:.*]] = memref.alloc({{%.*}}) : memref<?xi8>
+// CHECK:         %[[v0:.*]] = memref.view %[[a0]][{{.*}}][{{%.*}}, {{%.*}}] : memref<?xi8> to memref<?x?xf32>
+// CHECK:         %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #[[$STRIDED_2D_u_1]]>
+// CHECK-NOT:     %[[a1:.*]] = memref.alloc({{%.*}}) : memref<?xi8>
+// CHECK-NOT:     %[[v1:.*]] = memref.view %[[a1]][{{.*}}][{{%.*}}, {{%.*}}] : memref<?xi8> to memref<?x?xf32>
+// CHECK-NOT:     %[[l0:.*]] = memref.subview %[[v1]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #[[$STRIDED_2D_u_1]]>
+// CHECK-NOT:     %[[a2:.*]] = memref.alloc({{%.*}}) : memref<?xi8>
+// CHECK-NOT:     %[[v2:.*]] = memref.view %[[a2]][{{.*}}][{{%.*}}, {{%.*}}] : memref<?xi8> to memref<?x?xf32>
+// CHECK-NOT:     %[[l0:.*]] = memref.subview %[[v2]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #[[$STRIDED_2D_u_1]]>
 // CHECK:         linalg.copy(%[[s0]], %[[l0]]) : memref<?x?xf32, #map{{.*}}>, memref<?x?xf32, #map{{.*}}>
 // CHECK-NOT:     linalg.copy(%[[s1]], %[[l1]]) : memref<?x?xf32, #map{{.*}}>, memref<?x?xf32, #map{{.*}}>
 // CHECK-NOT:     linalg.copy(%[[s2]], %[[l2]]) : memref<?x?xf32, #map{{.*}}>, memref<?x?xf32, #map{{.*}}>^
@@ -329,7 +329,7 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %cf = constant 1.0 : f32
-  %3 = std.subview %arg0[%c0, %c0][%c2000, %c4000][%c1, %c1] :
+  %3 = memref.subview %arg0[%c0, %c0][%c2000, %c4000][%c1, %c1] :
  	 memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
   linalg.fill(%3, %cf) { __internal_linalg_transform__ = "_promote_views_aligned_"}
   	:  memref<?x?xf32, offset: ?, strides: [?, ?]>, f32
@@ -337,10 +337,10 @@
 }
 // CHECK-LABEL: func @aligned_promote_fill
 // CHECK:	  %[[cf:.*]] = constant {{.*}} : f32
-// CHECK:         %[[s0:.*]] = subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
-// CHECK:         %[[a0:.*]] = alloc({{%.*}}) {alignment = 32 : i64} : memref<?xi8>
-// CHECK:         %[[v0:.*]] = std.view %[[a0]][{{.*}}][{{%.*}}, {{%.*}}] : memref<?xi8> to memref<?x?xf32>
-// CHECK:         %[[l0:.*]] = subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #[[$STRIDED_2D_u_1]]>
+// CHECK:         %[[s0:.*]] = memref.subview {{%.*}}[{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] [{{%.*}}, {{%.*}}] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
+// CHECK:         %[[a0:.*]] = memref.alloc({{%.*}}) {alignment = 32 : i64} : memref<?xi8>
+// CHECK:         %[[v0:.*]] = memref.view %[[a0]][{{.*}}][{{%.*}}, {{%.*}}] : memref<?xi8> to memref<?x?xf32>
+// CHECK:         %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #[[$STRIDED_2D_u_1]]>
 // CHECK:         linalg.fill(%[[v0]], {{%.*}}) : memref<?x?xf32>, f32
 // CHECK:         linalg.copy(%[[s0]], %[[l0]]) : memref<?x?xf32, #map{{.*}}>, memref<?x?xf32, #map{{.*}}>
 // CHECK:         linalg.fill(%[[v0]], %[[cf]]) : memref<?x?xf32>, f32
@@ -361,9 +361,9 @@
 //   CHECK-DAG:   %[[C8:.*]] = constant 8 : index
 //   CHECK-DAG:   %[[C4:.*]] = constant 4 : index
 //   CHECK-DAG:   %[[C0:.*]] = constant 0 : index
-//   CHECK-DAG:   %[[D0:.*]] = dim %[[ARG0]], %c0
-//   CHECK-DAG:   %[[D1:.*]] = dim %[[ARG0]], %c1
-//   CHECK-DAG:   %[[D2:.*]] = dim %[[ARG1]], %c1
+//   CHECK-DAG:   %[[D0:.*]] = memref.dim %[[ARG0]], %c0
+//   CHECK-DAG:   %[[D1:.*]] = memref.dim %[[ARG0]], %c1
+//   CHECK-DAG:   %[[D2:.*]] = memref.dim %[[ARG1]], %c1
 //       CHECK:   scf.parallel (%{{.*}}) = (%[[C0]]) to (%[[D2]]) step (%[[C8]])
 //       CHECK:     scf.for %{{.*}} = %[[C0]] to %[[D1]] step %[[C4]]
 //       CHECK:       scf.parallel (%{{.*}}) = (%[[C0]]) to (%[[D0]]) step (%[[C16]])
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -166,7 +166,7 @@
 
 // CHECK-LABEL: func @test_vectorize_copy_scalar
 func @test_vectorize_copy_scalar(%A : memref<f32>, %B : memref<f32>) {
-  //       CHECK: %[[V:.*]] = load {{.*}} : memref<f32>
+  //       CHECK: %[[V:.*]] = memref.load {{.*}} : memref<f32>
   //       CHECK: store %[[V]], {{.*}} : memref<f32>
   linalg.copy(%A, %B) :  memref<f32>, memref<f32>
   return
diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir
--- a/mlir/test/Dialect/OpenACC/invalid.mlir
+++ b/mlir/test/Dialect/OpenACC/invalid.mlir
@@ -82,21 +82,21 @@
 // -----
 
 %cst = constant 1 : index
-%value = alloc() : memref<10xf32>
+%value = memref.alloc() : memref<10xf32>
 // expected-error@+1 {{wait_devnum cannot appear without waitOperands}}
 acc.update wait_devnum(%cst: index) host(%value: memref<10xf32>)
 
 // -----
 
 %cst = constant 1 : index
-%value = alloc() : memref<10xf32>
+%value = memref.alloc() : memref<10xf32>
 // expected-error@+1 {{async attribute cannot appear with  asyncOperand}}
 acc.update async(%cst: index) host(%value: memref<10xf32>) attributes {async}
 
 // -----
 
 %cst = constant 1 : index
-%value = alloc() : memref<10xf32>
+%value = memref.alloc() : memref<10xf32>
 // expected-error@+1 {{wait attribute cannot appear with waitOperands}}
 acc.update wait(%cst: index) host(%value: memref<10xf32>) attributes {wait}
 
@@ -162,14 +162,14 @@
 // -----
 
 %cst = constant 1 : index
-%value = alloc() : memref<10xf32>
+%value = memref.alloc() : memref<10xf32>
 // expected-error@+1 {{async attribute cannot appear with asyncOperand}}
 acc.exit_data async(%cst: index) delete(%value : memref<10xf32>) attributes {async}
 
 // -----
 
 %cst = constant 1 : index
-%value = alloc() : memref<10xf32>
+%value = memref.alloc() : memref<10xf32>
 // expected-error@+1 {{wait_devnum cannot appear without waitOperands}}
 acc.exit_data wait_devnum(%cst: index) delete(%value : memref<10xf32>)
 
@@ -181,20 +181,20 @@
 // -----
 
 %cst = constant 1 : index
-%value = alloc() : memref<10xf32>
+%value = memref.alloc() : memref<10xf32>
 // expected-error@+1 {{async attribute cannot appear with asyncOperand}}
 acc.enter_data async(%cst: index) create(%value : memref<10xf32>) attributes {async}
 
 // -----
 
 %cst = constant 1 : index
-%value = alloc() : memref<10xf32>
+%value = memref.alloc() : memref<10xf32>
 // expected-error@+1 {{wait attribute cannot appear with waitOperands}}
 acc.enter_data wait(%cst: index) create(%value : memref<10xf32>) attributes {wait}
 
 // -----
 
 %cst = constant 1 : index
-%value = alloc() : memref<10xf32>
+%value = memref.alloc() : memref<10xf32>
 // expected-error@+1 {{wait_devnum cannot appear without waitOperands}}
 acc.enter_data wait_devnum(%cst: index) create(%value : memref<10xf32>)
diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir
--- a/mlir/test/Dialect/OpenACC/ops.mlir
+++ b/mlir/test/Dialect/OpenACC/ops.mlir
@@ -15,12 +15,12 @@
       scf.for %arg3 = %c0 to %c10 step %c1 {
         scf.for %arg4 = %c0 to %c10 step %c1 {
           scf.for %arg5 = %c0 to %c10 step %c1 {
-            %a = load %A[%arg3, %arg5] : memref<10x10xf32>
-            %b = load %B[%arg5, %arg4] : memref<10x10xf32>
-            %cij = load %C[%arg3, %arg4] : memref<10x10xf32>
+            %a = memref.load %A[%arg3, %arg5] : memref<10x10xf32>
+            %b = memref.load %B[%arg5, %arg4] : memref<10x10xf32>
+            %cij = memref.load %C[%arg3, %arg4] : memref<10x10xf32>
             %p = mulf %a, %b : f32
             %co = addf %cij, %p : f32
-            store %co, %C[%arg3, %arg4] : memref<10x10xf32>
+            memref.store %co, %C[%arg3, %arg4] : memref<10x10xf32>
           }
         }
       }
@@ -42,12 +42,12 @@
 //  CHECK-NEXT:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //  CHECK-NEXT:         scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //  CHECK-NEXT:           scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//  CHECK-NEXT:             %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-//  CHECK-NEXT:             %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-//  CHECK-NEXT:             %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
 //  CHECK-NEXT:             %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
 //  CHECK-NEXT:             %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-//  CHECK-NEXT:             store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:             memref.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
 //  CHECK-NEXT:           }
 //  CHECK-NEXT:         }
 //  CHECK-NEXT:       }
@@ -70,12 +70,12 @@
       scf.for %arg3 = %c0 to %c10 step %c1 {
         scf.for %arg4 = %c0 to %c10 step %c1 {
           scf.for %arg5 = %c0 to %c10 step %c1 {
-            %a = load %A[%arg3, %arg5] : memref<10x10xf32>
-            %b = load %B[%arg5, %arg4] : memref<10x10xf32>
-            %cij = load %C[%arg3, %arg4] : memref<10x10xf32>
+            %a = memref.load %A[%arg3, %arg5] : memref<10x10xf32>
+            %b = memref.load %B[%arg5, %arg4] : memref<10x10xf32>
+            %cij = memref.load %C[%arg3, %arg4] : memref<10x10xf32>
             %p = mulf %a, %b : f32
             %co = addf %cij, %p : f32
-            store %co, %C[%arg3, %arg4] : memref<10x10xf32>
+            memref.store %co, %C[%arg3, %arg4] : memref<10x10xf32>
           }
         }
       }
@@ -96,12 +96,12 @@
 //  CHECK-NEXT:       scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //  CHECK-NEXT:         scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
 //  CHECK-NEXT:           scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} {
-//  CHECK-NEXT:             %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-//  CHECK-NEXT:             %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-//  CHECK-NEXT:             %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:             %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
 //  CHECK-NEXT:             %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
 //  CHECK-NEXT:             %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-//  CHECK-NEXT:             store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+//  CHECK-NEXT:             memref.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
 //  CHECK-NEXT:           }
 //  CHECK-NEXT:         }
 //  CHECK-NEXT:       }
@@ -127,10 +127,10 @@
         scf.for %x = %lb to %c10 step %st {
           acc.loop worker {
             scf.for %y = %lb to %c10 step %st {
-              %axy = load %a[%x, %y] : memref<10x10xf32>
-              %bxy = load %b[%x, %y] : memref<10x10xf32>
+              %axy = memref.load %a[%x, %y] : memref<10x10xf32>
+              %bxy = memref.load %b[%x, %y] : memref<10x10xf32>
               %tmp = addf %axy, %bxy : f32
-              store %tmp, %c[%y] : memref<10xf32>
+              memref.store %tmp, %c[%y] : memref<10xf32>
             }
             acc.yield
           }
@@ -139,10 +139,10 @@
             // for i = 0 to 10 step 1
             //   d[x] += c[i]
             scf.for %i = %lb to %c10 step %st {
-              %ci = load %c[%i] : memref<10xf32>
-              %dx = load %d[%x] : memref<10xf32>
+              %ci = memref.load %c[%i] : memref<10xf32>
+              %dx = memref.load %d[%x] : memref<10xf32>
               %z = addf %ci, %dx : f32
-              store %z, %d[%x] : memref<10xf32>
+              memref.store %z, %d[%x] : memref<10xf32>
             }
             acc.yield
           } attributes {seq}
@@ -169,19 +169,19 @@
 // CHECK-NEXT:         scf.for %{{.*}} = [[C0]] to [[C10]] step [[C1]] {
 // CHECK-NEXT:           acc.loop worker {
 // CHECK-NEXT:             scf.for %{{.*}} = [[C0]] to [[C10]] step [[C1]] {
-// CHECK-NEXT:               %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
-// CHECK-NEXT:               %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+// CHECK-NEXT:               %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+// CHECK-NEXT:               %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
 // CHECK-NEXT:               %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-// CHECK-NEXT:               store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
+// CHECK-NEXT:               memref.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
 // CHECK-NEXT:             }
 // CHECK-NEXT:             acc.yield
 // CHECK-NEXT:           }
 // CHECK-NEXT:           acc.loop {
 // CHECK-NEXT:             scf.for %{{.*}} = [[C0]] to [[C10]] step [[C1]] {
-// CHECK-NEXT:               %{{.*}} = load %{{.*}}[%{{.*}}] : memref<10xf32>
-// CHECK-NEXT:               %{{.*}} = load %{{.*}}[%{{.*}}] : memref<10xf32>
+// CHECK-NEXT:               %{{.*}} = memref.load %{{.*}}[%{{.*}}] : memref<10xf32>
+// CHECK-NEXT:               %{{.*}} = memref.load %{{.*}}[%{{.*}}] : memref<10xf32>
 // CHECK-NEXT:               %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-// CHECK-NEXT:               store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
+// CHECK-NEXT:               memref.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
 // CHECK-NEXT:             }
 // CHECK-NEXT:             acc.yield
 // CHECK-NEXT:           } attributes {seq}
diff --git a/mlir/test/Dialect/SCF/bufferize.mlir b/mlir/test/Dialect/SCF/bufferize.mlir
--- a/mlir/test/Dialect/SCF/bufferize.mlir
+++ b/mlir/test/Dialect/SCF/bufferize.mlir
@@ -5,13 +5,13 @@
 // CHECK-SAME:             %[[TRUE_TENSOR:.*]]: tensor<?xf32>,
 // CHECK-SAME:             %[[FALSE_TENSOR:.*]]: tensor<?xf32>) -> tensor<?xf32> {
 // CHECK:           %[[RESULT_MEMREF:.*]] = scf.if %[[PRED]] -> (memref<?xf32>) {
-// CHECK:             %[[TRUE_MEMREF:.*]] = tensor_to_memref %[[TRUE_TENSOR]] : memref<?xf32>
+// CHECK:             %[[TRUE_MEMREF:.*]] = memref.buffer_cast %[[TRUE_TENSOR]] : memref<?xf32>
 // CHECK:             scf.yield %[[TRUE_MEMREF]] : memref<?xf32>
 // CHECK:           } else {
-// CHECK:             %[[FALSE_MEMREF:.*]] = tensor_to_memref %[[FALSE_TENSOR]] : memref<?xf32>
+// CHECK:             %[[FALSE_MEMREF:.*]] = memref.buffer_cast %[[FALSE_TENSOR]] : memref<?xf32>
 // CHECK:             scf.yield %[[FALSE_MEMREF]] : memref<?xf32>
 // CHECK:           }
-// CHECK:           %[[RESULT_TENSOR:.*]] = tensor_load %[[RESULT_MEMREF:.*]] : memref<?xf32>
+// CHECK:           %[[RESULT_TENSOR:.*]] = memref.tensor_load %[[RESULT_MEMREF:.*]] : memref<?xf32>
 // CHECK:           return %[[RESULT_TENSOR]] : tensor<?xf32>
 // CHECK:         }
 func @if(%pred: i1, %true_val: tensor<?xf32>, %false_val: tensor<?xf32>) -> tensor<?xf32> {
@@ -27,13 +27,13 @@
 // CHECK-SAME:              %[[TENSOR:.*]]: tensor<f32>,
 // CHECK-SAME:              %[[LB:.*]]: index, %[[UB:.*]]: index,
 // CHECK-SAME:              %[[STEP:.*]]: index) -> tensor<f32> {
-// CHECK:           %[[MEMREF:.*]] = tensor_to_memref %[[TENSOR]] : memref<f32>
+// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<f32>
 // CHECK:           %[[RESULT_MEMREF:.*]] = scf.for %[[VAL_6:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args(%[[ITER:.*]] = %[[MEMREF]]) -> (memref<f32>) {
-// CHECK:             %[[TENSOR_ITER:.*]] = tensor_load %[[ITER]] : memref<f32>
-// CHECK:             %[[MEMREF_YIELDED:.*]] = tensor_to_memref %[[TENSOR_ITER]] : memref<f32>
+// CHECK:             %[[TENSOR_ITER:.*]] = memref.tensor_load %[[ITER]] : memref<f32>
+// CHECK:             %[[MEMREF_YIELDED:.*]] = memref.buffer_cast %[[TENSOR_ITER]] : memref<f32>
 // CHECK:             scf.yield %[[MEMREF_YIELDED]] : memref<f32>
 // CHECK:           }
-// CHECK:           %[[VAL_8:.*]] = tensor_load %[[VAL_9:.*]] : memref<f32>
+// CHECK:           %[[VAL_8:.*]] = memref.tensor_load %[[VAL_9:.*]] : memref<f32>
 // CHECK:           return %[[VAL_8]] : tensor<f32>
 // CHECK:         }
 func @for(%arg0: tensor<f32>, %lb: index, %ub: index, %step: index) -> tensor<f32> {
@@ -62,14 +62,14 @@
 // CHECK-LABEL:   func @for_correct_recursive_legalization_behavior(
 // CHECK-SAME:                                                      %[[TENSOR:.*]]: tensor<f32>,
 // CHECK-SAME:                                                      %[[INDEX:.*]]: index) -> tensor<f32> {
-// CHECK:           %[[MEMREF:.*]] = tensor_to_memref %[[TENSOR]] : memref<f32>
+// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<f32>
 // CHECK:           %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[INDEX]] to %[[INDEX]] step %[[INDEX]] iter_args(%[[MEMREF_ITER:.*]] = %[[MEMREF]]) -> (memref<f32>) {
-// CHECK:             %[[TENSOR_ITER:.*]] = tensor_load %[[MEMREF_ITER]] : memref<f32>
+// CHECK:             %[[TENSOR_ITER:.*]] = memref.tensor_load %[[MEMREF_ITER]] : memref<f32>
 // CHECK:             %[[TENSOR_MUNGED:.*]] = "test.munge_tensor"(%[[TENSOR_ITER]]) : (tensor<f32>) -> tensor<f32>
-// CHECK:             %[[MEMREF_MUNGED:.*]] = tensor_to_memref %[[TENSOR_MUNGED]] : memref<f32>
+// CHECK:             %[[MEMREF_MUNGED:.*]] = memref.buffer_cast %[[TENSOR_MUNGED]] : memref<f32>
 // CHECK:             scf.yield %[[MEMREF_MUNGED]] : memref<f32>
 // CHECK:           }
-// CHECK:           %[[TENSOR:.*]] = tensor_load %[[RESULT:.*]] : memref<f32>
+// CHECK:           %[[TENSOR:.*]] = memref.tensor_load %[[RESULT:.*]] : memref<f32>
 // CHECK:           return %[[TENSOR]] : tensor<f32>
 // CHECK:         }
 func @for_correct_recursive_legalization_behavior(%arg0: tensor<f32>, %index: index) -> tensor<f32> {
diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir
--- a/mlir/test/Dialect/SCF/canonicalize.mlir
+++ b/mlir/test/Dialect/SCF/canonicalize.mlir
@@ -13,7 +13,7 @@
   %c10 = constant 10 : index
   scf.parallel (%i0, %i1, %i2) = (%c0, %c3, %c7) to (%c1, %c6, %c10) step (%c1, %c2, %c3) {
     %c42 = constant 42 : i32
-    store %c42, %A[%i0, %i1, %i2] : memref<?x?x?xi32>
+    memref.store %c42, %A[%i0, %i1, %i2] : memref<?x?x?xi32>
     scf.yield
   }
   return
@@ -28,7 +28,7 @@
 // CHECK:           [[C7:%.*]] = constant 7 : index
 // CHECK:           [[C42:%.*]] = constant 42 : i32
 // CHECK:           scf.parallel ([[V0:%.*]]) = ([[C3]]) to ([[C6]]) step ([[C2]]) {
-// CHECK:             store [[C42]], [[ARG0]]{{\[}}[[C0]], [[V0]], [[C7]]] : memref<?x?x?xi32>
+// CHECK:             memref.store [[C42]], [[ARG0]]{{\[}}[[C0]], [[V0]], [[C7]]] : memref<?x?x?xi32>
 // CHECK:             scf.yield
 // CHECK:           }
 // CHECK:           return
@@ -348,12 +348,12 @@
                  %lb : index, %ub : index, %step : index)
   -> (tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x128xf32>)
 {
-  // CHECK-NEXT: %[[M1:.*]] = tensor_to_memref %[[T1]] : memref<128x128xf32>
+  // CHECK-NEXT: %[[M1:.*]] = memref.buffer_cast %[[T1]] : memref<128x128xf32>
   // CHECK-NEXT: %[[FOR_RES:.*]] = scf.for {{.*}} iter_args(%[[BBARG_T2:.*]] = %[[T2]]) -> (tensor<128x128xf32>) {
   %0:3 = scf.for %arg0 = %lb to %ub step %step iter_args(%arg1 = %t0, %arg2 = %t1, %arg3 = %t2)
     -> (tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x128xf32>)
   {
-    %m1 = tensor_to_memref %arg2 : memref<128x128xf32>
+    %m1 = memref.buffer_cast %arg2 : memref<128x128xf32>
 
     // CHECK-NEXT:   call @process(%[[M0]]) : (memref<128x128xf32>) -> ()
     call @process(%m0) : (memref<128x128xf32>) -> ()
@@ -363,13 +363,13 @@
 
     // This does not hoist (fails the bbArg has at most a single check).
     // CHECK-NEXT:   %[[T:.*]] = call @process_tensor(%[[BBARG_T2]]) : (tensor<128x128xf32>) -> memref<128x128xf32>
-    // CHECK-NEXT:   %[[YIELD_T:.*]] = tensor_load %[[T:.*]]
+    // CHECK-NEXT:   %[[YIELD_T:.*]] = memref.tensor_load %[[T:.*]]
     %m2 = call @process_tensor(%arg3): (tensor<128x128xf32>) -> memref<128x128xf32>
-    %3 = tensor_load %m2 : memref<128x128xf32>
+    %3 = memref.tensor_load %m2 : memref<128x128xf32>
 
     // All this stuff goes away, incrementally
-    %1 = tensor_load %m0 : memref<128x128xf32>
-    %2 = tensor_load %m1 : memref<128x128xf32>
+    %1 = memref.tensor_load %m0 : memref<128x128xf32>
+    %2 = memref.tensor_load %m1 : memref<128x128xf32>
 
     // CHECK-NEXT:   scf.yield %[[YIELD_T]] : tensor<128x128xf32>
     scf.yield %1, %2, %3 : tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x128xf32>
@@ -377,8 +377,8 @@
   // CHECK-NEXT: }
   }
 
-  // CHECK-NEXT: %[[R0:.*]] = tensor_load %[[M0]] : memref<128x128xf32>
-  // CHECK-NEXT: %[[R1:.*]] = tensor_load %[[M1]] : memref<128x128xf32>
+  // CHECK-NEXT: %[[R0:.*]] = memref.tensor_load %[[M0]] : memref<128x128xf32>
+  // CHECK-NEXT: %[[R1:.*]] = memref.tensor_load %[[M1]] : memref<128x128xf32>
   // CHECK-NEXT: return %[[R0]], %[[R1]], %[[FOR_RES]] : tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x128xf32>
   return %0#0, %0#1, %0#2 : tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x128xf32>
 }
diff --git a/mlir/test/Dialect/SCF/for-loop-specialization.mlir b/mlir/test/Dialect/SCF/for-loop-specialization.mlir
--- a/mlir/test/Dialect/SCF/for-loop-specialization.mlir
+++ b/mlir/test/Dialect/SCF/for-loop-specialization.mlir
@@ -7,13 +7,13 @@
           %C: memref<?xf32>, %result: memref<?xf32>) {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %d0 = dim %A, %c0 : memref<?xf32>
+  %d0 = memref.dim %A, %c0 : memref<?xf32>
   %b0 = affine.min #map0()[%d0, %outer]
   scf.for %i0 = %c0 to %b0 step %c1 {
-    %B_elem = load %B[%i0] : memref<?xf32>
-    %C_elem = load %C[%i0] : memref<?xf32>
+    %B_elem = memref.load %B[%i0] : memref<?xf32>
+    %C_elem = memref.load %C[%i0] : memref<?xf32>
     %sum_elem = addf %B_elem, %C_elem : f32
-    store %sum_elem, %result[%i0] : memref<?xf32>
+    memref.store %sum_elem, %result[%i0] : memref<?xf32>
   }
   return
 }
@@ -22,17 +22,17 @@
 // CHECK-SAME:              [[ARG0:%.*]]: index, [[ARG1:%.*]]: memref<?xf32>, [[ARG2:%.*]]: memref<?xf32>, [[ARG3:%.*]]: memref<?xf32>, [[ARG4:%.*]]: memref<?xf32>) {
 // CHECK:           [[CST_0:%.*]] = constant 0 : index
 // CHECK:           [[CST_1:%.*]] = constant 1 : index
-// CHECK:           [[DIM_0:%.*]] = dim [[ARG1]], [[CST_0]] : memref<?xf32>
+// CHECK:           [[DIM_0:%.*]] = memref.dim [[ARG1]], [[CST_0]] : memref<?xf32>
 // CHECK:           [[MIN:%.*]] = affine.min #map(){{\[}}[[DIM_0]], [[ARG0]]]
 // CHECK:           [[CST_1024:%.*]] = constant 1024 : index
 // CHECK:           [[PRED:%.*]] = cmpi eq, [[MIN]], [[CST_1024]] : index
 // CHECK:           scf.if [[PRED]] {
 // CHECK:             scf.for [[IDX0:%.*]] = [[CST_0]] to [[CST_1024]] step [[CST_1]] {
-// CHECK:               store
+// CHECK:               memref.store
 // CHECK:             }
 // CHECK:           } else {
 // CHECK:             scf.for [[IDX0:%.*]] = [[CST_0]] to [[MIN]] step [[CST_1]] {
-// CHECK:               store
+// CHECK:               memref.store
 // CHECK:             }
 // CHECK:           }
 // CHECK:           return
diff --git a/mlir/test/Dialect/SCF/loop-unroll.mlir b/mlir/test/Dialect/SCF/loop-unroll.mlir
--- a/mlir/test/Dialect/SCF/loop-unroll.mlir
+++ b/mlir/test/Dialect/SCF/loop-unroll.mlir
@@ -8,7 +8,7 @@
                           %arg3: memref<?xf32>) {
   %0 = constant 7.0 : f32
   scf.for %i0 = %arg0 to %arg1 step %arg2 {
-    store %0, %arg3[%i0] : memref<?xf32>
+    memref.store %0, %arg3[%i0] : memref<?xf32>
   }
   return
 }
@@ -34,14 +34,14 @@
 //       Compute step of unrolled loop in V8.
 //   UNROLL-BY-2-DAG:  %[[V8:.*]] = muli %[[STEP]], %[[C2]] : index
 //       UNROLL-BY-2:  scf.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
-//  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+//  UNROLL-BY-2-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-2-NEXT:    %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-BY-2-NEXT:    %[[V9:.*]] = muli %[[STEP]], %[[C1_IV]] : index
 //  UNROLL-BY-2-NEXT:    %[[V10:.*]] = addi %[[IV]], %[[V9]] : index
-//  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[V10]]] : memref<?xf32>
+//  UNROLL-BY-2-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V10]]] : memref<?xf32>
 //  UNROLL-BY-2-NEXT:  }
 //  UNROLL-BY-2-NEXT:  scf.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
-//  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+//  UNROLL-BY-2-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-2-NEXT:  }
 //  UNROLL-BY-2-NEXT:  return
 
@@ -67,18 +67,18 @@
 //       Compute step of unrolled loop in V8.
 //   UNROLL-BY-3-DAG:  %[[V8:.*]] = muli %[[STEP]], %[[C3]] : index
 //       UNROLL-BY-3:  scf.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
-//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-BY-3-NEXT:    %[[V9:.*]] = muli %[[STEP]], %[[C1_IV]] : index
 //  UNROLL-BY-3-NEXT:    %[[V10:.*]] = addi %[[IV]], %[[V9]] : index
-//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V10]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V10]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:    %[[C2_IV:.*]] = constant 2 : index
 //  UNROLL-BY-3-NEXT:    %[[V11:.*]] = muli %[[STEP]], %[[C2_IV]] : index
 //  UNROLL-BY-3-NEXT:    %[[V12:.*]] = addi %[[IV]], %[[V11]] : index
-//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V12]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V12]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:  }
 //  UNROLL-BY-3-NEXT:  scf.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
-//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:  }
 //  UNROLL-BY-3-NEXT:  return
 
@@ -88,7 +88,7 @@
   %0 = constant 7.0 : f32
   scf.for %i0 = %arg0 to %arg1 step %arg2 {
     scf.for %i1 = %arg3 to %arg4 step %arg5 {
-     store %0, %arg6[%i1] : memref<?xf32>
+     memref.store %0, %arg6[%i1] : memref<?xf32>
     }
   }
   return
@@ -104,15 +104,15 @@
 //
 //       UNROLL-OUTER-BY-2:  scf.for %[[IV0:.*]] = %[[LB0]] to %{{.*}} step %{{.*}} {
 //  UNROLL-OUTER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-//  UNROLL-OUTER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
+//  UNROLL-OUTER-BY-2-NEXT:      memref.store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
 //  UNROLL-OUTER-BY-2-NEXT:    }
 //  UNROLL-OUTER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-//  UNROLL-OUTER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
+//  UNROLL-OUTER-BY-2-NEXT:      memref.store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
 //  UNROLL-OUTER-BY-2-NEXT:    }
 //  UNROLL-OUTER-BY-2-NEXT:  }
 //  UNROLL-OUTER-BY-2-NEXT:  scf.for %[[IV0:.*]] = %{{.*}} to %[[UB0]] step %[[STEP0]] {
 //  UNROLL-OUTER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-//  UNROLL-OUTER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
+//  UNROLL-OUTER-BY-2-NEXT:      memref.store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
 //  UNROLL-OUTER-BY-2-NEXT:    }
 //  UNROLL-OUTER-BY-2-NEXT:  }
 //  UNROLL-OUTER-BY-2-NEXT:  return
@@ -123,7 +123,7 @@
   %0 = constant 7.0 : f32
   scf.for %i0 = %arg0 to %arg1 step %arg2 {
     scf.for %i1 = %arg3 to %arg4 step %arg5 {
-     store %0, %arg6[%i1] : memref<?xf32>
+     memref.store %0, %arg6[%i1] : memref<?xf32>
     }
   }
   return
@@ -139,14 +139,14 @@
 //
 //       UNROLL-INNER-BY-2:  scf.for %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
 //       UNROLL-INNER-BY-2:    scf.for %[[IV1:.*]] = %[[LB1]] to %{{.*}} step %{{.*}} {
-//  UNROLL-INNER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
+//  UNROLL-INNER-BY-2-NEXT:      memref.store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
 //  UNROLL-INNER-BY-2-NEXT:      %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-INNER-BY-2-NEXT:      %[[V0:.*]] = muli %[[STEP1]], %[[C1_IV]] : index
 //  UNROLL-INNER-BY-2-NEXT:      %[[V1:.*]] = addi %[[IV1]], %[[V0]] : index
-//  UNROLL-INNER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
+//  UNROLL-INNER-BY-2-NEXT:      memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
 //  UNROLL-INNER-BY-2-NEXT:    }
 //  UNROLL-INNER-BY-2-NEXT:    scf.for %[[IV1:.*]] = %{{.*}} to %[[UB1]] step %[[STEP1]] {
-//  UNROLL-INNER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
+//  UNROLL-INNER-BY-2-NEXT:      memref.store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
 //  UNROLL-INNER-BY-2-NEXT:    }
 //  UNROLL-INNER-BY-2-NEXT:  }
 //  UNROLL-INNER-BY-2-NEXT:  return
@@ -159,7 +159,7 @@
   %ub = constant 20 : index
   %step = constant 1 : index
   scf.for %i0 = %lb to %ub step %step {
-    store %0, %arg0[%i0] : memref<?xf32>
+    memref.store %0, %arg0[%i0] : memref<?xf32>
   }
   return
 }
@@ -171,11 +171,11 @@
 //   UNROLL-BY-2-DAG:  %[[C20:.*]] = constant 20 : index
 //   UNROLL-BY-2-DAG:  %[[C2:.*]] = constant 2 : index
 //   UNROLL-BY-2:  scf.for %[[IV:.*]] = %[[C0]] to %[[C20]] step %[[C2]] {
-//  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+//  UNROLL-BY-2-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-2-NEXT:    %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-BY-2-NEXT:    %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
 //  UNROLL-BY-2-NEXT:    %[[V1:.*]] = addi %[[IV]], %[[V0]] : index
-//  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
+//  UNROLL-BY-2-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
 //  UNROLL-BY-2-NEXT:  }
 //  UNROLL-BY-2-NEXT:  return
 
@@ -187,7 +187,7 @@
   %ub = constant 20 : index
   %step = constant 1 : index
   scf.for %i0 = %lb to %ub step %step {
-    store %0, %arg0[%i0] : memref<?xf32>
+    memref.store %0, %arg0[%i0] : memref<?xf32>
   }
   return
 }
@@ -201,18 +201,18 @@
 //   UNROLL-BY-3-DAG:  %[[C18:.*]] = constant 18 : index
 //   UNROLL-BY-3-DAG:  %[[C3:.*]] = constant 3 : index
 //       UNROLL-BY-3: scf.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]] {
-//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-BY-3-NEXT:    %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
 //  UNROLL-BY-3-NEXT:    %[[V1:.*]] = addi %[[IV]], %[[V0]] : index
-//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:    %[[C2_IV:.*]] = constant 2 : index
 //  UNROLL-BY-3-NEXT:    %[[V2:.*]] = muli %[[C1]], %[[C2_IV]] : index
 //  UNROLL-BY-3-NEXT:    %[[V3:.*]] = addi %[[IV]], %[[V2]] : index
-//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V3]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V3]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:  }
 //  UNROLL-BY-3-NEXT:  scf.for %[[IV:.*]] = %[[C18]] to %[[C20]] step %[[C1]] {
-//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:  }
 //  UNROLL-BY-3-NEXT:  return
 
@@ -224,7 +224,7 @@
   %ub = constant 10 : index
   %step = constant 1 : index
   scf.for %i0 = %lb to %ub step %step {
-    store %0, %arg0[%i0] : memref<?xf32>
+    memref.store %0, %arg0[%i0] : memref<?xf32>
   }
   return
 }
@@ -237,17 +237,17 @@
 //   UNROLL-BY-3-DAG:  %[[C9:.*]] = constant 9 : index
 //   UNROLL-BY-3-DAG:  %[[C3:.*]] = constant 3 : index
 //       UNROLL-BY-3: scf.for %[[IV:.*]] = %[[C0]] to %[[C9]] step %[[C3]] {
-//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = constant 1 : index
 //  UNROLL-BY-3-NEXT:    %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
 //  UNROLL-BY-3-NEXT:    %[[V1:.*]] = addi %[[IV]], %[[V0]] : index
-//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:    %[[C2_IV:.*]] = constant 2 : index
 //  UNROLL-BY-3-NEXT:    %[[V2:.*]] = muli %[[C1]], %[[C2_IV]] : index
 //  UNROLL-BY-3-NEXT:    %[[V3:.*]] = addi %[[IV]], %[[V2]] : index
-//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V3]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    memref.store %{{.*}}, %[[MEM]][%[[V3]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:  }
-//  UNROLL-BY-3-NEXT:  store %{{.*}}, %[[MEM]][%[[C9]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:  memref.store %{{.*}}, %[[MEM]][%[[C9]]] : memref<?xf32>
 //  UNROLL-BY-3-NEXT:  return
 
 // Test unroll-up-to functionality.
diff --git a/mlir/test/Dialect/SCF/ops.mlir b/mlir/test/Dialect/SCF/ops.mlir
--- a/mlir/test/Dialect/SCF/ops.mlir
+++ b/mlir/test/Dialect/SCF/ops.mlir
@@ -208,7 +208,7 @@
   %sum_0 = constant 0.0 : f32
   %c0 = constant 0.0 : f32
   %sum = scf.for %iv = %lb to %ub step %step iter_args(%sum_iter = %sum_0) -> (f32) {
-	  %t = load %buffer[%iv] : memref<1024xf32>
+	  %t = memref.load %buffer[%iv] : memref<1024xf32>
 	  %cond = cmpf ugt, %t, %c0 : f32
 	  %sum_next = scf.if %cond -> (f32) {
 	    %new_sum = addf %sum_iter, %t : f32
@@ -229,7 +229,7 @@
 //  CHECK-NEXT: %[[ZERO:.*]] = constant
 //  CHECK-NEXT: %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[ARG1]] to %[[ARG2]] step %[[ARG3]]
 //  CHECK-SAME: iter_args(%[[ITER:.*]] = %[[INIT]]) -> (f32) {
-//  CHECK-NEXT: %[[T:.*]] = load %[[ARG0]][%[[IV]]]
+//  CHECK-NEXT: %[[T:.*]] = memref.load %[[ARG0]][%[[IV]]]
 //  CHECK-NEXT: %[[COND:.*]] = cmpf ugt, %[[T]], %[[ZERO]]
 //  CHECK-NEXT: %[[IFRES:.*]] = scf.if %[[COND]] -> (f32) {
 //  CHECK-NEXT: %[[THENRES:.*]] = addf %[[ITER]], %[[T]]
diff --git a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
--- a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
+++ b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
@@ -29,22 +29,22 @@
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %sum = alloc()  : memref<2x2xf32>
+  %sum = memref.alloc()  : memref<2x2xf32>
   scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    %B_elem = load %B[%i, %j] : memref<2x2xf32>
-    %C_elem = load %C[%i, %j] : memref<2x2xf32>
+    %B_elem = memref.load %B[%i, %j] : memref<2x2xf32>
+    %C_elem = memref.load %C[%i, %j] : memref<2x2xf32>
     %sum_elem = addf %B_elem, %C_elem : f32
-    store %sum_elem, %sum[%i, %j] : memref<2x2xf32>
+    memref.store %sum_elem, %sum[%i, %j] : memref<2x2xf32>
     scf.yield
   }
   scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    %sum_elem = load %sum[%i, %j] : memref<2x2xf32>
-    %A_elem = load %A[%i, %j] : memref<2x2xf32>
+    %sum_elem = memref.load %sum[%i, %j] : memref<2x2xf32>
+    %A_elem = memref.load %A[%i, %j] : memref<2x2xf32>
     %product_elem = mulf %sum_elem, %A_elem : f32
-    store %product_elem, %result[%i, %j] : memref<2x2xf32>
+    memref.store %product_elem, %result[%i, %j] : memref<2x2xf32>
     scf.yield
   }
-  dealloc %sum : memref<2x2xf32>
+  memref.dealloc %sum : memref<2x2xf32>
   return
 }
 // CHECK-LABEL: func @fuse_two
@@ -53,20 +53,20 @@
 // CHECK:      [[C2:%.*]] = constant 2 : index
 // CHECK:      [[C0:%.*]] = constant 0 : index
 // CHECK:      [[C1:%.*]] = constant 1 : index
-// CHECK:      [[SUM:%.*]] = alloc()
+// CHECK:      [[SUM:%.*]] = memref.alloc()
 // CHECK:      scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:     to ([[C2]], [[C2]]) step ([[C1]], [[C1]]) {
-// CHECK:        [[B_ELEM:%.*]] = load [[B]]{{\[}}[[I]], [[J]]]
-// CHECK:        [[C_ELEM:%.*]] = load [[C]]{{\[}}[[I]], [[J]]]
+// CHECK:        [[B_ELEM:%.*]] = memref.load [[B]]{{\[}}[[I]], [[J]]]
+// CHECK:        [[C_ELEM:%.*]] = memref.load [[C]]{{\[}}[[I]], [[J]]]
 // CHECK:        [[SUM_ELEM:%.*]] = addf [[B_ELEM]], [[C_ELEM]]
-// CHECK:        store [[SUM_ELEM]], [[SUM]]{{\[}}[[I]], [[J]]]
-// CHECK:        [[SUM_ELEM_:%.*]] = load [[SUM]]{{\[}}[[I]], [[J]]]
-// CHECK:        [[A_ELEM:%.*]] = load [[A]]{{\[}}[[I]], [[J]]]
+// CHECK:        memref.store [[SUM_ELEM]], [[SUM]]{{\[}}[[I]], [[J]]]
+// CHECK:        [[SUM_ELEM_:%.*]] = memref.load [[SUM]]{{\[}}[[I]], [[J]]]
+// CHECK:        [[A_ELEM:%.*]] = memref.load [[A]]{{\[}}[[I]], [[J]]]
 // CHECK:        [[PRODUCT_ELEM:%.*]] = mulf [[SUM_ELEM_]], [[A_ELEM]]
-// CHECK:        store [[PRODUCT_ELEM]], [[RESULT]]{{\[}}[[I]], [[J]]]
+// CHECK:        memref.store [[PRODUCT_ELEM]], [[RESULT]]{{\[}}[[I]], [[J]]]
 // CHECK:        scf.yield
 // CHECK:      }
-// CHECK:      dealloc [[SUM]]
+// CHECK:      memref.dealloc [[SUM]]
 
 // -----
 
@@ -76,28 +76,28 @@
   %c10 = constant 10 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %broadcast_rhs = alloc() : memref<100x10xf32>
-  %diff = alloc() : memref<100x10xf32>
+  %broadcast_rhs = memref.alloc() : memref<100x10xf32>
+  %diff = memref.alloc() : memref<100x10xf32>
   scf.parallel (%i, %j) = (%c0, %c0) to (%c100, %c10) step (%c1, %c1) {
-    %rhs_elem = load %rhs[%i] : memref<100xf32>
-    store %rhs_elem, %broadcast_rhs[%i, %j] : memref<100x10xf32>
+    %rhs_elem = memref.load %rhs[%i] : memref<100xf32>
+    memref.store %rhs_elem, %broadcast_rhs[%i, %j] : memref<100x10xf32>
     scf.yield
   }
   scf.parallel (%i, %j) = (%c0, %c0) to (%c100, %c10) step (%c1, %c1) {
-    %lhs_elem = load %lhs[%i, %j] : memref<100x10xf32>
-    %broadcast_rhs_elem = load %broadcast_rhs[%i, %j] : memref<100x10xf32>
+    %lhs_elem = memref.load %lhs[%i, %j] : memref<100x10xf32>
+    %broadcast_rhs_elem = memref.load %broadcast_rhs[%i, %j] : memref<100x10xf32>
     %diff_elem = subf %lhs_elem, %broadcast_rhs_elem : f32
-    store %diff_elem, %diff[%i, %j] : memref<100x10xf32>
+    memref.store %diff_elem, %diff[%i, %j] : memref<100x10xf32>
     scf.yield
   }
   scf.parallel (%i, %j) = (%c0, %c0) to (%c100, %c10) step (%c1, %c1) {
-    %diff_elem = load %diff[%i, %j] : memref<100x10xf32>
+    %diff_elem = memref.load %diff[%i, %j] : memref<100x10xf32>
     %exp_elem = math.exp %diff_elem : f32
-    store %exp_elem, %result[%i, %j] : memref<100x10xf32>
+    memref.store %exp_elem, %result[%i, %j] : memref<100x10xf32>
     scf.yield
   }
-  dealloc %broadcast_rhs : memref<100x10xf32>
-  dealloc %diff : memref<100x10xf32>
+  memref.dealloc %broadcast_rhs : memref<100x10xf32>
+  memref.dealloc %diff : memref<100x10xf32>
   return
 }
 // CHECK-LABEL: func @fuse_three
@@ -107,23 +107,23 @@
 // CHECK:      [[C10:%.*]] = constant 10 : index
 // CHECK:      [[C0:%.*]] = constant 0 : index
 // CHECK:      [[C1:%.*]] = constant 1 : index
-// CHECK:      [[BROADCAST_RHS:%.*]] = alloc()
-// CHECK:      [[DIFF:%.*]] = alloc()
+// CHECK:      [[BROADCAST_RHS:%.*]] = memref.alloc()
+// CHECK:      [[DIFF:%.*]] = memref.alloc()
 // CHECK:      scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:     to ([[C100]], [[C10]]) step ([[C1]], [[C1]]) {
-// CHECK:        [[RHS_ELEM:%.*]] = load [[RHS]]{{\[}}[[I]]]
-// CHECK:        store [[RHS_ELEM]], [[BROADCAST_RHS]]{{\[}}[[I]], [[J]]]
-// CHECK:        [[LHS_ELEM:%.*]] = load [[LHS]]{{\[}}[[I]], [[J]]]
-// CHECK:        [[BROADCAST_RHS_ELEM:%.*]] = load [[BROADCAST_RHS]]
+// CHECK:        [[RHS_ELEM:%.*]] = memref.load [[RHS]]{{\[}}[[I]]]
+// CHECK:        memref.store [[RHS_ELEM]], [[BROADCAST_RHS]]{{\[}}[[I]], [[J]]]
+// CHECK:        [[LHS_ELEM:%.*]] = memref.load [[LHS]]{{\[}}[[I]], [[J]]]
+// CHECK:        [[BROADCAST_RHS_ELEM:%.*]] = memref.load [[BROADCAST_RHS]]
 // CHECK:        [[DIFF_ELEM:%.*]] = subf [[LHS_ELEM]], [[BROADCAST_RHS_ELEM]]
-// CHECK:        store [[DIFF_ELEM]], [[DIFF]]{{\[}}[[I]], [[J]]]
-// CHECK:        [[DIFF_ELEM_:%.*]] = load [[DIFF]]{{\[}}[[I]], [[J]]]
+// CHECK:        memref.store [[DIFF_ELEM]], [[DIFF]]{{\[}}[[I]], [[J]]]
+// CHECK:        [[DIFF_ELEM_:%.*]] = memref.load [[DIFF]]{{\[}}[[I]], [[J]]]
 // CHECK:        [[EXP_ELEM:%.*]] = math.exp [[DIFF_ELEM_]]
-// CHECK:        store [[EXP_ELEM]], [[RESULT]]{{\[}}[[I]], [[J]]]
+// CHECK:        memref.store [[EXP_ELEM]], [[RESULT]]{{\[}}[[I]], [[J]]]
 // CHECK:        scf.yield
 // CHECK:      }
-// CHECK:      dealloc [[BROADCAST_RHS]]
-// CHECK:      dealloc [[DIFF]]
+// CHECK:      memref.dealloc [[BROADCAST_RHS]]
+// CHECK:      memref.dealloc [[DIFF]]
 
 // -----
 
@@ -196,7 +196,7 @@
   scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     scf.yield
   }
-  %buffer  = alloc() : memref<2x2xf32>
+  %buffer  = memref.alloc() : memref<2x2xf32>
   scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     scf.yield
   }
@@ -233,23 +233,23 @@
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %common_buf = alloc() : memref<2x2xf32>
+  %common_buf = memref.alloc() : memref<2x2xf32>
   scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    %B_elem = load %B[%i, %j] : memref<2x2xf32>
-    %C_elem = load %C[%i, %j] : memref<2x2xf32>
+    %B_elem = memref.load %B[%i, %j] : memref<2x2xf32>
+    %C_elem = memref.load %C[%i, %j] : memref<2x2xf32>
     %sum_elem = addf %B_elem, %C_elem : f32
-    store %sum_elem, %common_buf[%i, %j] : memref<2x2xf32>
+    memref.store %sum_elem, %common_buf[%i, %j] : memref<2x2xf32>
     scf.yield
   }
   scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     %k = addi %i, %c1 : index
-    %sum_elem = load %common_buf[%k, %j] : memref<2x2xf32>
-    %A_elem = load %A[%i, %j] : memref<2x2xf32>
+    %sum_elem = memref.load %common_buf[%k, %j] : memref<2x2xf32>
+    %A_elem = memref.load %A[%i, %j] : memref<2x2xf32>
     %product_elem = mulf %sum_elem, %A_elem : f32
-    store %product_elem, %result[%i, %j] : memref<2x2xf32>
+    memref.store %product_elem, %result[%i, %j] : memref<2x2xf32>
     scf.yield
   }
-  dealloc %common_buf : memref<2x2xf32>
+  memref.dealloc %common_buf : memref<2x2xf32>
   return
 }
 // CHECK-LABEL: func @do_not_fuse_unmatching_write_read_patterns
@@ -263,23 +263,23 @@
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %sum = alloc() : memref<2x2xf32>
+  %sum = memref.alloc() : memref<2x2xf32>
   scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    %B_elem = load %B[%i, %j] : memref<2x2xf32>
-    %C_elem = load %common_buf[%i, %j] : memref<2x2xf32>
+    %B_elem = memref.load %B[%i, %j] : memref<2x2xf32>
+    %C_elem = memref.load %common_buf[%i, %j] : memref<2x2xf32>
     %sum_elem = addf %B_elem, %C_elem : f32
-    store %sum_elem, %sum[%i, %j] : memref<2x2xf32>
+    memref.store %sum_elem, %sum[%i, %j] : memref<2x2xf32>
     scf.yield
   }
   scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     %k = addi %i, %c1 : index
-    %sum_elem = load %sum[%k, %j] : memref<2x2xf32>
-    %A_elem = load %A[%i, %j] : memref<2x2xf32>
+    %sum_elem = memref.load %sum[%k, %j] : memref<2x2xf32>
+    %A_elem = memref.load %A[%i, %j] : memref<2x2xf32>
     %product_elem = mulf %sum_elem, %A_elem : f32
-    store %product_elem, %common_buf[%j, %i] : memref<2x2xf32>
+    memref.store %product_elem, %common_buf[%j, %i] : memref<2x2xf32>
     scf.yield
   }
-  dealloc %sum : memref<2x2xf32>
+  memref.dealloc %sum : memref<2x2xf32>
   return
 }
 // CHECK-LABEL: func @do_not_fuse_unmatching_read_write_patterns
@@ -292,14 +292,14 @@
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %buffer  = alloc() : memref<2x2xf32>
+  %buffer  = memref.alloc() : memref<2x2xf32>
   scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
     scf.yield
   }
   scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-    %A = subview %buffer[%c0, %c0][%c2, %c2][%c1, %c1]
+    %A = memref.subview %buffer[%c0, %c0][%c2, %c2][%c1, %c1]
       : memref<2x2xf32> to memref<?x?xf32, offset: ?, strides:[?, ?]>
-    %A_elem = load %A[%i, %j] : memref<?x?xf32, offset: ?, strides:[?, ?]>
+    %A_elem = memref.load %A[%i, %j] : memref<?x?xf32, offset: ?, strides:[?, ?]>
     scf.yield
   }
   return
@@ -315,24 +315,24 @@
   %c2 = constant 2 : index
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %sum = alloc()  : memref<2x2xf32>
+  %sum = memref.alloc()  : memref<2x2xf32>
   scf.parallel (%k) = (%c0) to (%c2) step (%c1) {
     scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-      %B_elem = load %B[%i, %j] : memref<2x2xf32>
-      %C_elem = load %C[%i, %j] : memref<2x2xf32>
+      %B_elem = memref.load %B[%i, %j] : memref<2x2xf32>
+      %C_elem = memref.load %C[%i, %j] : memref<2x2xf32>
       %sum_elem = addf %B_elem, %C_elem : f32
-      store %sum_elem, %sum[%i, %j] : memref<2x2xf32>
+      memref.store %sum_elem, %sum[%i, %j] : memref<2x2xf32>
       scf.yield
     }
     scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
-      %sum_elem = load %sum[%i, %j] : memref<2x2xf32>
-      %A_elem = load %A[%i, %j] : memref<2x2xf32>
+      %sum_elem = memref.load %sum[%i, %j] : memref<2x2xf32>
+      %A_elem = memref.load %A[%i, %j] : memref<2x2xf32>
       %product_elem = mulf %sum_elem, %A_elem : f32
-      store %product_elem, %result[%i, %j] : memref<2x2xf32>
+      memref.store %product_elem, %result[%i, %j] : memref<2x2xf32>
       scf.yield
     }
   }
-  dealloc %sum : memref<2x2xf32>
+  memref.dealloc %sum : memref<2x2xf32>
   return
 }
 // CHECK-LABEL: func @nested_fuse
@@ -341,19 +341,19 @@
 // CHECK:      [[C2:%.*]] = constant 2 : index
 // CHECK:      [[C0:%.*]] = constant 0 : index
 // CHECK:      [[C1:%.*]] = constant 1 : index
-// CHECK:      [[SUM:%.*]] = alloc()
+// CHECK:      [[SUM:%.*]] = memref.alloc()
 // CHECK:      scf.parallel
 // CHECK:        scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
 // CHECK-SAME:       to ([[C2]], [[C2]]) step ([[C1]], [[C1]]) {
-// CHECK:          [[B_ELEM:%.*]] = load [[B]]{{\[}}[[I]], [[J]]]
-// CHECK:          [[C_ELEM:%.*]] = load [[C]]{{\[}}[[I]], [[J]]]
+// CHECK:          [[B_ELEM:%.*]] = memref.load [[B]]{{\[}}[[I]], [[J]]]
+// CHECK:          [[C_ELEM:%.*]] = memref.load [[C]]{{\[}}[[I]], [[J]]]
 // CHECK:          [[SUM_ELEM:%.*]] = addf [[B_ELEM]], [[C_ELEM]]
-// CHECK:          store [[SUM_ELEM]], [[SUM]]{{\[}}[[I]], [[J]]]
-// CHECK:          [[SUM_ELEM_:%.*]] = load [[SUM]]{{\[}}[[I]], [[J]]]
-// CHECK:          [[A_ELEM:%.*]] = load [[A]]{{\[}}[[I]], [[J]]]
+// CHECK:          memref.store [[SUM_ELEM]], [[SUM]]{{\[}}[[I]], [[J]]]
+// CHECK:          [[SUM_ELEM_:%.*]] = memref.load [[SUM]]{{\[}}[[I]], [[J]]]
+// CHECK:          [[A_ELEM:%.*]] = memref.load [[A]]{{\[}}[[I]], [[J]]]
 // CHECK:          [[PRODUCT_ELEM:%.*]] = mulf [[SUM_ELEM_]], [[A_ELEM]]
-// CHECK:          store [[PRODUCT_ELEM]], [[RESULT]]{{\[}}[[I]], [[J]]]
+// CHECK:          memref.store [[PRODUCT_ELEM]], [[RESULT]]{{\[}}[[I]], [[J]]]
 // CHECK:          scf.yield
 // CHECK:        }
 // CHECK:      }
-// CHECK:      dealloc [[SUM]]
+// CHECK:      memref.dealloc [[SUM]]
diff --git a/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir b/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir
--- a/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir
+++ b/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir
@@ -7,15 +7,15 @@
                     %C: memref<?x?xf32>, %result: memref<?x?xf32>) {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %d0 = dim %A, %c0 : memref<?x?xf32>
-  %d1 = dim %A, %c1 : memref<?x?xf32>
+  %d0 = memref.dim %A, %c0 : memref<?x?xf32>
+  %d1 = memref.dim %A, %c1 : memref<?x?xf32>
   %b0 = affine.min #map0()[%d0, %outer_i0]
   %b1 = affine.min #map1()[%d1, %outer_i1]
   scf.parallel (%i0, %i1) = (%c0, %c0) to (%b0, %b1) step (%c1, %c1) {
-    %B_elem = load %B[%i0, %i1] : memref<?x?xf32>
-    %C_elem = load %C[%i0, %i1] : memref<?x?xf32>
+    %B_elem = memref.load %B[%i0, %i1] : memref<?x?xf32>
+    %C_elem = memref.load %C[%i0, %i1] : memref<?x?xf32>
     %sum_elem = addf %B_elem, %C_elem : f32
-    store %sum_elem, %result[%i0, %i1] : memref<?x?xf32>
+    memref.store %sum_elem, %result[%i0, %i1] : memref<?x?xf32>
   }
   return
 }
@@ -24,8 +24,8 @@
 // CHECK-SAME:                        [[VAL_0:%.*]]: index, [[VAL_1:%.*]]: index, [[VAL_2:%.*]]: memref<?x?xf32>, [[VAL_3:%.*]]: memref<?x?xf32>, [[VAL_4:%.*]]: memref<?x?xf32>, [[VAL_5:%.*]]: memref<?x?xf32>) {
 // CHECK:           [[VAL_6:%.*]] = constant 0 : index
 // CHECK:           [[VAL_7:%.*]] = constant 1 : index
-// CHECK:           [[VAL_8:%.*]] = dim [[VAL_2]], [[VAL_6]] : memref<?x?xf32>
-// CHECK:           [[VAL_9:%.*]] = dim [[VAL_2]], [[VAL_7]] : memref<?x?xf32>
+// CHECK:           [[VAL_8:%.*]] = memref.dim [[VAL_2]], [[VAL_6]] : memref<?x?xf32>
+// CHECK:           [[VAL_9:%.*]] = memref.dim [[VAL_2]], [[VAL_7]] : memref<?x?xf32>
 // CHECK:           [[VAL_10:%.*]] = affine.min #map0(){{\[}}[[VAL_8]], [[VAL_0]]]
 // CHECK:           [[VAL_11:%.*]] = affine.min #map1(){{\[}}[[VAL_9]], [[VAL_1]]]
 // CHECK:           [[VAL_12:%.*]] = constant 1024 : index
@@ -35,11 +35,11 @@
 // CHECK:           [[VAL_16:%.*]] = and [[VAL_13]], [[VAL_15]] : i1
 // CHECK:           scf.if [[VAL_16]] {
 // CHECK:             scf.parallel ([[VAL_17:%.*]], [[VAL_18:%.*]]) = ([[VAL_6]], [[VAL_6]]) to ([[VAL_12]], [[VAL_14]]) step ([[VAL_7]], [[VAL_7]]) {
-// CHECK:               store
+// CHECK:               memref.store
 // CHECK:             }
 // CHECK:           } else {
 // CHECK:             scf.parallel ([[VAL_22:%.*]], [[VAL_23:%.*]]) = ([[VAL_6]], [[VAL_6]]) to ([[VAL_10]], [[VAL_11]]) step ([[VAL_7]], [[VAL_7]]) {
-// CHECK:               store
+// CHECK:               memref.store
 // CHECK:             }
 // CHECK:           }
 // CHECK:           return
diff --git a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
--- a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
+++ b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
@@ -5,10 +5,10 @@
 		    %A: memref<?x?xf32>, %B: memref<?x?xf32>,
                     %C: memref<?x?xf32>, %result: memref<?x?xf32>) {
   scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
-    %B_elem = load %B[%i0, %i1] : memref<?x?xf32>
-    %C_elem = load %C[%i0, %i1] : memref<?x?xf32>
+    %B_elem = memref.load %B[%i0, %i1] : memref<?x?xf32>
+    %C_elem = memref.load %C[%i0, %i1] : memref<?x?xf32>
     %sum_elem = addf %B_elem, %C_elem : f32
-    store %sum_elem, %result[%i0, %i1] : memref<?x?xf32>
+    memref.store %sum_elem, %result[%i0, %i1] : memref<?x?xf32>
   }
   return
 }
@@ -27,10 +27,10 @@
 // CHECK:             scf.parallel ([[V7:%.*]], [[V8:%.*]]) = ([[C0]], [[C0]]) to ([[V5]], [[V6]]) step ([[ARG5]], [[ARG6]]) {
 // CHECK:               [[V9:%.*]] = addi [[V7]], [[V3]] : index
 // CHECK:               [[V10:%.*]] = addi [[V8]], [[V4]] : index
-// CHECK:               [[V11:%.*]] = load [[ARG8]]{{\[}}[[V9]], [[V10]]] : memref<?x?xf32>
-// CHECK:               [[V12:%.*]] = load [[ARG9]]{{\[}}[[V9]], [[V10]]] : memref<?x?xf32>
+// CHECK:               [[V11:%.*]] = memref.load [[ARG8]]{{\[}}[[V9]], [[V10]]] : memref<?x?xf32>
+// CHECK:               [[V12:%.*]] = memref.load [[ARG9]]{{\[}}[[V9]], [[V10]]] : memref<?x?xf32>
 // CHECK:               [[V13:%.*]] = addf [[V11]], [[V12]] : f32
-// CHECK:               store [[V13]], [[ARG10]]{{\[}}[[V9]], [[V10]]] : memref<?x?xf32>
+// CHECK:               memref.store [[V13]], [[ARG10]]{{\[}}[[V9]], [[V10]]] : memref<?x?xf32>
 // CHECK:             }
 // CHECK:           }
 // CHECK:           return
diff --git a/mlir/test/Dialect/Shape/bufferize.mlir b/mlir/test/Dialect/Shape/bufferize.mlir
--- a/mlir/test/Dialect/Shape/bufferize.mlir
+++ b/mlir/test/Dialect/Shape/bufferize.mlir
@@ -6,10 +6,10 @@
 // CHECK:           %[[WTRUE:.*]] = shape.const_witness true
 // CHECK:           %[[MEMREF:.*]] = shape.assuming %[[WTRUE]] -> (memref<2xf16>) {
 // CHECK:             %[[TENSOR_VAL:.*]] = "test.source"() : () -> tensor<2xf16>
-// CHECK:             %[[YIELDED_MEMREF:.*]] = tensor_to_memref %[[TENSOR_VAL]] : memref<2xf16>
+// CHECK:             %[[YIELDED_MEMREF:.*]] = memref.buffer_cast %[[TENSOR_VAL]] : memref<2xf16>
 // CHECK:             shape.assuming_yield %[[YIELDED_MEMREF]] : memref<2xf16>
 // CHECK:           }
-// CHECK:           %[[TENSOR:.*]] = tensor_load %[[MEMREF:.*]] : memref<2xf16>
+// CHECK:           %[[TENSOR:.*]] = memref.tensor_load %[[MEMREF:.*]] : memref<2xf16>
 // CHECK:           "test.sink"(%[[TENSOR]]) : (tensor<2xf16>) -> ()
 // CHECK:           return
 // CHECK:         }
diff --git a/mlir/test/Dialect/Standard/bufferize.mlir b/mlir/test/Dialect/Standard/bufferize.mlir
--- a/mlir/test/Dialect/Standard/bufferize.mlir
+++ b/mlir/test/Dialect/Standard/bufferize.mlir
@@ -3,11 +3,11 @@
 // CHECK-LABEL:   func @dim(
 // CHECK-SAME:              %[[TENSOR:.*]]: tensor<f32>,
 // CHECK-SAME:              %[[INDEX:.*]]: index) -> index {
-// CHECK:           %[[MEMREF:.*]] = tensor_to_memref %[[TENSOR]] : memref<f32>
-// CHECK:           %[[EXTENT:.*]] = dim %[[MEMREF]], %[[INDEX]] : memref<f32>
+// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<f32>
+// CHECK:           %[[EXTENT:.*]] = memref.dim %[[MEMREF]], %[[INDEX]] : memref<f32>
 // CHECK:           return %[[EXTENT]] : index
 func @dim(%arg0: tensor<f32>, %arg1: index) -> index {
-  %0 = dim %arg0, %arg1 : tensor<f32>
+  %0 = memref.dim %arg0, %arg1 : tensor<f32>
   return %0 : index
 }
 
@@ -15,10 +15,10 @@
 // CHECK-SAME:                 %[[PRED:.*]]: i1,
 // CHECK-SAME:                 %[[TRUE_VAL:.*]]: tensor<f32>,
 // CHECK-SAME:                 %[[FALSE_VAL:.*]]: tensor<f32>) -> tensor<f32> {
-// CHECK:           %[[TRUE_VAL_MEMREF:.*]] = tensor_to_memref %[[TRUE_VAL]] : memref<f32>
-// CHECK:           %[[FALSE_VAL_MEMREF:.*]] = tensor_to_memref %[[FALSE_VAL]] : memref<f32>
+// CHECK:           %[[TRUE_VAL_MEMREF:.*]] = memref.buffer_cast %[[TRUE_VAL]] : memref<f32>
+// CHECK:           %[[FALSE_VAL_MEMREF:.*]] = memref.buffer_cast %[[FALSE_VAL]] : memref<f32>
 // CHECK:           %[[RET_MEMREF:.*]] = select %[[PRED]], %[[TRUE_VAL_MEMREF]], %[[FALSE_VAL_MEMREF]] : memref<f32>
-// CHECK:           %[[RET:.*]] = tensor_load %[[RET_MEMREF]] : memref<f32>
+// CHECK:           %[[RET:.*]] = memref.tensor_load %[[RET_MEMREF]] : memref<f32>
 // CHECK:           return %[[RET]] : tensor<f32>
 func @select(%arg0: i1, %arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32> {
   %0 = select %arg0, %arg1, %arg2 : tensor<f32>
diff --git a/mlir/test/Dialect/Standard/canonicalize.mlir b/mlir/test/Dialect/Standard/canonicalize.mlir
--- a/mlir/test/Dialect/Standard/canonicalize.mlir
+++ b/mlir/test/Dialect/Standard/canonicalize.mlir
@@ -2,25 +2,25 @@
 
 // -----
 
-// Test case: Basic folding of tensor_load(tensor_to_memref(t)) -> t
-// CHECK-LABEL:   func @tensor_load_of_tensor_to_memref(
+// Test case: Basic folding of memref.tensor_load(memref.buffer_cast(t)) -> t
+// CHECK-LABEL:   func @tensor_load_of_buffer_cast(
 // CHECK-SAME:                                          %[[TENSOR:.*]]: tensor<?xf32>) -> tensor<?xf32> {
 // CHECK:           return %[[TENSOR]]
-func @tensor_load_of_tensor_to_memref(%arg0: tensor<?xf32>) -> tensor<?xf32> {
-  %0 = tensor_to_memref %arg0 : memref<?xf32>
-  %1 = tensor_load %0 : memref<?xf32>
+func @tensor_load_of_buffer_cast(%arg0: tensor<?xf32>) -> tensor<?xf32> {
+  %0 = memref.buffer_cast %arg0 : memref<?xf32>
+  %1 = memref.tensor_load %0 : memref<?xf32>
   return %1 : tensor<?xf32>
 }
 
 // -----
 
-// Test case: Basic folding of tensor_to_memref(tensor_load(m)) -> m
-// CHECK-LABEL:   func @tensor_to_memref_of_tensor_load(
+// Test case: Basic folding of memref.buffer_cast(memref.tensor_load(m)) -> m
+// CHECK-LABEL:   func @buffer_cast_of_tensor_load(
 // CHECK-SAME:                                          %[[MEMREF:.*]]: memref<?xf32>) -> memref<?xf32> {
 // CHECK:           return %[[MEMREF]]
-func @tensor_to_memref_of_tensor_load(%arg0: memref<?xf32>) -> memref<?xf32> {
-  %0 = tensor_load %arg0 : memref<?xf32>
-  %1 = tensor_to_memref %0 : memref<?xf32>
+func @buffer_cast_of_tensor_load(%arg0: memref<?xf32>) -> memref<?xf32> {
+  %0 = memref.tensor_load %arg0 : memref<?xf32>
+  %1 = memref.buffer_cast %0 : memref<?xf32>
   return %1 : memref<?xf32>
 }
 
@@ -29,14 +29,14 @@
 // Test case: If the memrefs are not the same type, don't fold them.
 // Test case: If the memrefs are not cast-compatible (e.g. different address space),
 // don't canonicalize them either.
-// CHECK-LABEL:   func @no_fold_tensor_to_memref_of_tensor_load(
+// CHECK-LABEL:   func @no_fold_buffer_cast_of_tensor_load(
 // CHECK-SAME:                                                  %[[MEMREF_ADDRSPACE2:.*]]: memref<?xf32, 2>) -> memref<?xf32, 7> {
-// CHECK:           %[[TENSOR:.*]] = tensor_load %[[MEMREF_ADDRSPACE2]] : memref<?xf32, 2>
-// CHECK:           %[[MEMREF_ADDRSPACE7:.*]] = tensor_to_memref %[[TENSOR]] : memref<?xf32, 7>
+// CHECK:           %[[TENSOR:.*]] = memref.tensor_load %[[MEMREF_ADDRSPACE2]] : memref<?xf32, 2>
+// CHECK:           %[[MEMREF_ADDRSPACE7:.*]] = memref.buffer_cast %[[TENSOR]] : memref<?xf32, 7>
 // CHECK:           return %[[MEMREF_ADDRSPACE7]]
-func @no_fold_tensor_to_memref_of_tensor_load(%arg0: memref<?xf32, 2>) -> memref<?xf32, 7> {
-  %0 = tensor_load %arg0 : memref<?xf32, 2>
-  %1 = tensor_to_memref %0 : memref<?xf32, 7>
+func @no_fold_buffer_cast_of_tensor_load(%arg0: memref<?xf32, 2>) -> memref<?xf32, 7> {
+  %0 = memref.tensor_load %arg0 : memref<?xf32, 2>
+  %1 = memref.buffer_cast %0 : memref<?xf32, 7>
   return %1 : memref<?xf32, 7>
 }
 
@@ -46,57 +46,57 @@
 // CHECK-DAG: #[[$OFF_UNK:[a-z0-9]+]] = affine_map<(d0)[s0] -> (d0 + s0)>
 
 // Test case: If the memrefs are cast-compatible, canonicalize.
-// CHECK-LABEL: func @canonicalize_tensor_to_memref_of_tensor_load(
+// CHECK-LABEL: func @canonicalize_buffer_cast_of_tensor_load(
 //  CHECK-SAME:   %[[M:.*]]: memref<?xf32, #[[$OFF_3]]>) -> memref<?xf32, #[[$OFF_UNK]]> {
-//   CHECK-NOT:   tensor_load
-//   CHECK-NOT:   tensor_to_memref
-//       CHECK:   %[[R:.*]] = memref_cast %[[M]] : memref<?xf32, #[[$OFF_3]]> to memref<?xf32, #[[$OFF_UNK]]>
+//   CHECK-NOT:   memref.tensor_load
+//   CHECK-NOT:   memref.buffer_cast
+//       CHECK:   %[[R:.*]] = memref.cast %[[M]] : memref<?xf32, #[[$OFF_3]]> to memref<?xf32, #[[$OFF_UNK]]>
 //       CHECK:   return %[[R]]
-func @canonicalize_tensor_to_memref_of_tensor_load(%arg0: memref<?xf32, offset: 3, strides: [1]>)
+func @canonicalize_buffer_cast_of_tensor_load(%arg0: memref<?xf32, offset: 3, strides: [1]>)
   -> memref<?xf32, offset: ?, strides: [1]>
 {
-  %0 = tensor_load %arg0 : memref<?xf32, offset: 3, strides: [1]>
-  %1 = tensor_to_memref %0 : memref<?xf32, offset: ?, strides: [1]>
+  %0 = memref.tensor_load %arg0 : memref<?xf32, offset: 3, strides: [1]>
+  %1 = memref.buffer_cast %0 : memref<?xf32, offset: ?, strides: [1]>
   return %1 : memref<?xf32, offset: ?, strides: [1]>
 }
 
 // -----
 
-// Test case: Basic folding of dim(tensor_load(m)) -> dim(m).
+// Test case: Basic folding of memref.dim(memref.tensor_load(m)) -> memref.dim(m).
 // CHECK-LABEL: func @dim_of_tensor_load(
 //  CHECK-SAME:     %[[MEMREF:[0-9a-z]*]]: memref<?xf32>
 //       CHECK:   %[[C0:.*]] = constant 0
-//       CHECK:   %[[D:.*]] = dim %[[MEMREF]], %[[C0]]
+//       CHECK:   %[[D:.*]] = memref.dim %[[MEMREF]], %[[C0]]
 //       CHECK:   return %[[D]] : index
 func @dim_of_tensor_load(%arg0: memref<?xf32>) -> index {
   %c0 = constant 0 : index
-  %0 = tensor_load %arg0 : memref<?xf32>
-  %1 = dim %0, %c0 : tensor<?xf32>
+  %0 = memref.tensor_load %arg0 : memref<?xf32>
+  %1 = memref.dim %0, %c0 : tensor<?xf32>
   return %1 : index
 }
 
 // -----
 
-// Test case: Folding of load(tensor_to_memref(%v, %idxs))
+// Test case: Folding of memref.load(memref.buffer_cast(%v, %idxs))
 //            -> tensor.extract(%v, %idx)
-// CHECK-LABEL: func @load_from_tensor_to_memref(
+// CHECK-LABEL: func @load_from_buffer_cast(
 //  CHECK-SAME:     %[[IDX0:[0-9a-z]+]]: index, %[[IDX1:[0-9a-z]+]]: index
 //  CHECK-SAME:     %[[TENSOR:[0-9a-z]+]]: tensor<?x?xf32>
 //       CHECK:   %[[RES:.*]] = tensor.extract %[[TENSOR]][%[[IDX0]], %[[IDX1]]]
-//   CHECK-NOT:   load
+//   CHECK-NOT:   memref.load
 //       CHECK:   return %[[RES]] : f32
-func @load_from_tensor_to_memref(%arg0: index, %arg1: index, %arg2: tensor<?x?xf32>) -> f32 {
-  %0 = tensor_to_memref %arg2 : memref<?x?xf32>
-  %1 = load %0[%arg0, %arg1] : memref<?x?xf32>
+func @load_from_buffer_cast(%arg0: index, %arg1: index, %arg2: tensor<?x?xf32>) -> f32 {
+  %0 = memref.buffer_cast %arg2 : memref<?x?xf32>
+  %1 = memref.load %0[%arg0, %arg1] : memref<?x?xf32>
   return %1 : f32
 }
 
 // -----
 
-// Test case: Folding of dim(tensor.generate %idx) -> %idx
+// Test case: Folding of memref.dim(tensor.generate %idx) -> %idx
 // CHECK-LABEL: func @dim_of_tensor.generate(
 //  CHECK-SAME:     %[[IDX0:[0-9a-z]+]]: index, %[[IDX1:[0-9a-z]+]]: index
-//   CHECK-NOT:   dim
+//   CHECK-NOT:   memref.dim
 //       CHECK:   return %[[IDX1]] : index
 func @dim_of_tensor.generate(%arg0: index, %arg1: index) -> index {
   %c3 = constant 3 : index
@@ -104,7 +104,7 @@
   ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index):
     tensor.yield %c3 : index
   } : tensor<2x?x4x?x5xindex>
-  %1 = dim %0, %c3 : tensor<2x?x4x?x5xindex>
+  %1 = memref.dim %0, %c3 : tensor<2x?x4x?x5xindex>
   return %1 : index
 }
 
@@ -134,41 +134,41 @@
 
 // -----
 
-// Test case: Folding of dim(memref_reshape %v %shp, %idx) -> load %shp[%idx]
+// Test case: Folding of memref.dim(memref.reshape %v %shp, %idx) -> memref.load %shp[%idx]
 // CHECK-LABEL: func @dim_of_memref_reshape(
 //  CHECK-SAME:     %[[MEM:[0-9a-z]+]]: memref<*xf32>,
 //  CHECK-SAME:     %[[SHP:[0-9a-z]+]]: memref<?xindex>
 //  CHECK-NEXT:   %[[IDX:.*]] = constant 3
-//  CHECK-NEXT:   %[[DIM:.*]] = load %[[SHP]][%[[IDX]]]
-//  CHECK-NEXT:   store
-//   CHECK-NOT:   dim
+//  CHECK-NEXT:   %[[DIM:.*]] = memref.load %[[SHP]][%[[IDX]]]
+//  CHECK-NEXT:   memref.store
+//   CHECK-NOT:   memref.dim
 //       CHECK:   return %[[DIM]] : index
 func @dim_of_memref_reshape(%arg0: memref<*xf32>, %arg1: memref<?xindex>)
     -> index {
   %c3 = constant 3 : index
-  %0 = memref_reshape %arg0(%arg1)
+  %0 = memref.reshape %arg0(%arg1)
       : (memref<*xf32>, memref<?xindex>) -> memref<*xf32>
   // Update the shape to test that he load ends up in the right place.
-  store %c3, %arg1[%c3] : memref<?xindex>
-  %1 = dim %0, %c3 : memref<*xf32>
+  memref.store %c3, %arg1[%c3] : memref<?xindex>
+  %1 = memref.dim %0, %c3 : memref<*xf32>
   return %1 : index
 }
 
 // -----
 
-// Test case: Folding dim(tensor.cast %0, %idx) -> dim %0, %idx
+// Test case: Folding memref.dim(tensor.cast %0, %idx) -> memref.dim %0, %idx
 // CHECK-LABEL: func @fold_dim_of_tensor.cast
 //  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x?xf32>
 //   CHECK-DAG:   %[[C1:.+]] = constant 1 : index
 //   CHECK-DAG:   %[[C4:.+]] = constant 4 : index
-//       CHECK:   %[[T0:.+]] = dim %[[ARG0]], %[[C1]]
+//       CHECK:   %[[T0:.+]] = memref.dim %[[ARG0]], %[[C1]]
 //  CHECK-NEXT:   return %[[C4]], %[[T0]]
 func @fold_dim_of_tensor.cast(%arg0 : tensor<4x?xf32>) -> (index, index) {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %0 = tensor.cast %arg0 : tensor<4x?xf32> to tensor<?x?xf32>
-  %1 = dim %0, %c0 : tensor<?x?xf32>
-  %2 = dim %0, %c1 : tensor<?x?xf32>
+  %1 = memref.dim %0, %c0 : tensor<?x?xf32>
+  %2 = memref.dim %0, %c1 : tensor<?x?xf32>
   return %1, %2: index, index
 }
 
@@ -176,13 +176,13 @@
 
 // CHECK-LABEL: func @tensor_cast_to_memref
 //  CHECK-SAME:   %[[ARG0:.+]]: tensor<4x6x16x32xi8>
-//       CHECK:   %[[M:.+]] = tensor_to_memref %[[ARG0]] : memref<4x6x16x32xi8>
-//       CHECK:   %[[M1:.+]] = memref_cast %[[M]] : memref<4x6x16x32xi8> to memref<?x?x16x32xi8>
+//       CHECK:   %[[M:.+]] = memref.buffer_cast %[[ARG0]] : memref<4x6x16x32xi8>
+//       CHECK:   %[[M1:.+]] = memref.cast %[[M]] : memref<4x6x16x32xi8> to memref<?x?x16x32xi8>
 //       CHECK:   return %[[M1]] : memref<?x?x16x32xi8>
 func @tensor_cast_to_memref(%arg0 : tensor<4x6x16x32xi8>) ->
   memref<?x?x16x32xi8> {
   %0 = tensor.cast %arg0 : tensor<4x6x16x32xi8> to tensor<?x?x16x32xi8>
-  %1 = tensor_to_memref %0 : memref<?x?x16x32xi8>
+  %1 = memref.buffer_cast %0 : memref<?x?x16x32xi8>
   return %1 : memref<?x?x16x32xi8>
 }
 
@@ -190,13 +190,13 @@
 
 // CHECK-LABEL: func @subview_of_memcast
 //  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: memref<4x6x16x32xi8>
-//       CHECK:   %[[S:.+]] = subview %arg0[0, 1, 0] [1, 1, 16] [1, 1, 1] : memref<4x6x16x32xi8> to memref<16x32xi8, #{{.*}}>
-//       CHECK:   %[[M:.+]] = memref_cast %[[S]] : memref<16x32xi8, #{{.*}}> to memref<16x32xi8, #{{.*}}>
+//       CHECK:   %[[S:.+]] = memref.subview %arg0[0, 1, 0] [1, 1, 16] [1, 1, 1] : memref<4x6x16x32xi8> to memref<16x32xi8, #{{.*}}>
+//       CHECK:   %[[M:.+]] = memref.cast %[[S]] : memref<16x32xi8, #{{.*}}> to memref<16x32xi8, #{{.*}}>
 //       CHECK:   return %[[M]] : memref<16x32xi8, #{{.*}}>
 func @subview_of_memcast(%arg : memref<4x6x16x32xi8>) ->
   memref<16x32xi8, affine_map<(d0, d1)[s0] -> (d0 * 32 + d1 + s0)>>{
-  %0 = memref_cast %arg : memref<4x6x16x32xi8> to memref<?x?x16x32xi8>
-  %1 = subview %0[0, 1, 0] [1, 1, 16] [1, 1, 1] :
+  %0 = memref.cast %arg : memref<4x6x16x32xi8> to memref<?x?x16x32xi8>
+  %1 = memref.subview %0[0, 1, 0] [1, 1, 16] [1, 1, 1] :
     memref<?x?x16x32xi8> to
     memref<16x32xi8, affine_map<(d0, d1)[s0] -> (d0 * 32 + d1 + s0)>>
   return %1 : memref<16x32xi8, affine_map<(d0, d1)[s0] -> (d0 * 32 + d1 + s0)>>
@@ -206,10 +206,10 @@
 
 // CHECK-LABEL: func @subview_of_static_full_size
 // CHECK-SAME: %[[ARG0:.+]]: memref<4x6x16x32xi8>
-// CHECK-NOT: subview
+// CHECK-NOT: memref.subview
 // CHECK: return %[[ARG0]] : memref<4x6x16x32xi8>
 func @subview_of_static_full_size(%arg0 : memref<4x6x16x32xi8>) -> memref<4x6x16x32xi8> {
-  %0 = subview %arg0[0, 0, 0, 0] [4, 6, 16, 32] [1, 1, 1, 1] : memref<4x6x16x32xi8> to memref<4x6x16x32xi8>
+  %0 = memref.subview %arg0[0, 0, 0, 0] [4, 6, 16, 32] [1, 1, 1, 1] : memref<4x6x16x32xi8> to memref<4x6x16x32xi8>
   return %0 : memref<4x6x16x32xi8>
 }
 
@@ -272,7 +272,7 @@
   %c1 = constant 1 : index
   %c2 = constant 2 : index
   %c8 = constant 8 : index
-  %0 = dim %arg0, %c1 : tensor<2x?xi32>
+  %0 = memref.dim %arg0, %c1 : tensor<2x?xi32>
   %1 = tensor.extract %arg1[] : tensor<i32>
   %2 = tensor.generate %arg2, %c8 {
   ^bb0(%arg4: index, %arg5: index):
diff --git a/mlir/test/Dialect/Standard/expand-ops.mlir b/mlir/test/Dialect/Standard/expand-ops.mlir
--- a/mlir/test/Dialect/Standard/expand-ops.mlir
+++ b/mlir/test/Dialect/Standard/expand-ops.mlir
@@ -85,7 +85,7 @@
 // CHECK-LABEL: func @memref_reshape(
 func @memref_reshape(%input: memref<*xf32>,
                      %shape: memref<3xi32>) -> memref<?x?x?xf32> {
-  %result = memref_reshape %input(%shape)
+  %result = memref.reshape %input(%shape)
                : (memref<*xf32>, memref<3xi32>) -> memref<?x?x?xf32>
   return %result : memref<?x?x?xf32>
 }
@@ -94,20 +94,20 @@
 
 // CHECK: [[C1:%.*]] = constant 1 : index
 // CHECK: [[C2:%.*]] = constant 2 : index
-// CHECK: [[DIM_2:%.*]] = load [[SHAPE]]{{\[}}[[C2]]] : memref<3xi32>
+// CHECK: [[DIM_2:%.*]] = memref.load [[SHAPE]]{{\[}}[[C2]]] : memref<3xi32>
 // CHECK: [[SIZE_2:%.*]] = index_cast [[DIM_2]] : i32 to index
 // CHECK: [[STRIDE_1:%.*]] = muli [[C1]], [[SIZE_2]] : index
 
 // CHECK: [[C1_:%.*]] = constant 1 : index
-// CHECK: [[DIM_1:%.*]] = load [[SHAPE]]{{\[}}[[C1_]]] : memref<3xi32>
+// CHECK: [[DIM_1:%.*]] = memref.load [[SHAPE]]{{\[}}[[C1_]]] : memref<3xi32>
 // CHECK: [[SIZE_1:%.*]] = index_cast [[DIM_1]] : i32 to index
 // CHECK: [[STRIDE_0:%.*]] = muli [[STRIDE_1]], [[SIZE_1]] : index
 
 // CHECK: [[C0:%.*]] = constant 0 : index
-// CHECK: [[DIM_0:%.*]] = load [[SHAPE]]{{\[}}[[C0]]] : memref<3xi32>
+// CHECK: [[DIM_0:%.*]] = memref.load [[SHAPE]]{{\[}}[[C0]]] : memref<3xi32>
 // CHECK: [[SIZE_0:%.*]] = index_cast [[DIM_0]] : i32 to index
 
-// CHECK: [[RESULT:%.*]] = memref_reinterpret_cast [[SRC]]
+// CHECK: [[RESULT:%.*]] = memref.reinterpret_cast [[SRC]]
 // CHECK-SAME: to offset: [0], sizes: {{\[}}[[SIZE_0]], [[SIZE_1]], [[SIZE_2]]],
 // CHECK-SAME: strides: {{\[}}[[STRIDE_0]], [[STRIDE_1]], [[C1]]]
 // CHECK-SAME: : memref<*xf32> to memref<?x?x?xf32>
diff --git a/mlir/test/Dialect/Standard/func-bufferize.mlir b/mlir/test/Dialect/Standard/func-bufferize.mlir
--- a/mlir/test/Dialect/Standard/func-bufferize.mlir
+++ b/mlir/test/Dialect/Standard/func-bufferize.mlir
@@ -2,8 +2,8 @@
 
 // CHECK-LABEL:   func @identity(
 // CHECK-SAME:                   %[[ARG:.*]]: memref<f32>) -> memref<f32> {
-// CHECK:           %[[TENSOR:.*]] = tensor_load %[[ARG]] : memref<f32>
-// CHECK:           %[[MEMREF:.*]] = tensor_to_memref %[[TENSOR]] : memref<f32>
+// CHECK:           %[[TENSOR:.*]] = memref.tensor_load %[[ARG]] : memref<f32>
+// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<f32>
 // CHECK:           return %[[MEMREF]] : memref<f32>
 func @identity(%arg0: tensor<f32>) -> tensor<f32> {
   return %arg0 : tensor<f32>
@@ -11,12 +11,12 @@
 
 // CHECK-LABEL:   func @block_arguments(
 // CHECK-SAME:        %[[ARG:.*]]: memref<f32>) -> memref<f32> {
-// CHECK:           %[[T1:.*]] = tensor_load %[[ARG]] : memref<f32>
-// CHECK:           %[[M1:.*]] = tensor_to_memref %[[T1]] : memref<f32>
+// CHECK:           %[[T1:.*]] = memref.tensor_load %[[ARG]] : memref<f32>
+// CHECK:           %[[M1:.*]] = memref.buffer_cast %[[T1]] : memref<f32>
 // CHECK:           br ^bb1(%[[M1]] : memref<f32>)
 // CHECK:         ^bb1(%[[BBARG:.*]]: memref<f32>):
-// CHECK:           %[[T2:.*]] = tensor_load %[[BBARG]] : memref<f32>
-// CHECK:           %[[M2:.*]] = tensor_to_memref %[[T2]] : memref<f32>
+// CHECK:           %[[T2:.*]] = memref.tensor_load %[[BBARG]] : memref<f32>
+// CHECK:           %[[M2:.*]] = memref.buffer_cast %[[T2]] : memref<f32>
 // CHECK:           return %[[M2]] : memref<f32>
 func @block_arguments(%arg0: tensor<f32>) -> tensor<f32> {
   br ^bb1(%arg0: tensor<f32>)
@@ -35,8 +35,8 @@
 }
 // CHECK-LABEL:   func @call_sink(
 // CHECK-SAME:                    %[[ARG:.*]]: memref<f32>) {
-// CHECK:           %[[TENSOR:.*]] = tensor_load %[[ARG]] : memref<f32>
-// CHECK:           %[[MEMREF:.*]] = tensor_to_memref %[[TENSOR]] : memref<f32>
+// CHECK:           %[[TENSOR:.*]] = memref.tensor_load %[[ARG]] : memref<f32>
+// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<f32>
 // CHECK:           call @sink(%[[MEMREF]]) : (memref<f32>) -> ()
 // CHECK:           return
 func private @sink(tensor<f32>)
@@ -47,7 +47,7 @@
 
 // CHECK-LABEL:   func @unconverted_op_in_body() -> memref<f32> {
 // CHECK:           %[[TENSOR:.*]] = "test.source"() : () -> tensor<f32>
-// CHECK:           %[[MEMREF:.*]] = tensor_to_memref %[[TENSOR]] : memref<f32>
+// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<f32>
 // CHECK:           return %[[MEMREF]] : memref<f32>
 func @unconverted_op_in_body() -> tensor<f32> {
   %0 = "test.source"() : () -> tensor<f32>
diff --git a/mlir/test/Dialect/Standard/invalid.mlir b/mlir/test/Dialect/Standard/invalid.mlir
--- a/mlir/test/Dialect/Standard/invalid.mlir
+++ b/mlir/test/Dialect/Standard/invalid.mlir
@@ -18,28 +18,28 @@
 
 func @transpose_not_permutation(%v : memref<?x?xf32, affine_map<(i, j)[off, M]->(off + M * i + j)>>) {
   // expected-error @+1 {{expected a permutation map}}
-  transpose %v (i, j) -> (i, i) : memref<?x?xf32, affine_map<(i, j)[off, M]->(off + M * i + j)>> to memref<?x?xf32, affine_map<(i, j)[off, M]->(off + M * i + j)>>
+  memref.transpose %v (i, j) -> (i, i) : memref<?x?xf32, affine_map<(i, j)[off, M]->(off + M * i + j)>> to memref<?x?xf32, affine_map<(i, j)[off, M]->(off + M * i + j)>>
 }
 
 // -----
 
 func @transpose_bad_rank(%v : memref<?x?xf32, affine_map<(i, j)[off, M]->(off + M * i + j)>>) {
   // expected-error @+1 {{expected a permutation map of same rank as the input}}
-  transpose %v (i) -> (i) : memref<?x?xf32, affine_map<(i, j)[off, M]->(off + M * i + j)>> to memref<?x?xf32, affine_map<(i, j)[off, M]->(off + M * i + j)>>
+  memref.transpose %v (i) -> (i) : memref<?x?xf32, affine_map<(i, j)[off, M]->(off + M * i + j)>> to memref<?x?xf32, affine_map<(i, j)[off, M]->(off + M * i + j)>>
 }
 
 // -----
 
 func @transpose_wrong_type(%v : memref<?x?xf32, affine_map<(i, j)[off, M]->(off + M * i + j)>>) {
   // expected-error @+1 {{output type 'memref<?x?xf32, affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>>' does not match transposed input type 'memref<?x?xf32, affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>>'}}
-  transpose %v (i, j) -> (j, i) : memref<?x?xf32, affine_map<(i, j)[off, M]->(off + M * i + j)>> to memref<?x?xf32, affine_map<(i, j)[off, M]->(off + M * i + j)>>
+  memref.transpose %v (i, j) -> (j, i) : memref<?x?xf32, affine_map<(i, j)[off, M]->(off + M * i + j)>> to memref<?x?xf32, affine_map<(i, j)[off, M]->(off + M * i + j)>>
 }
 
 // -----
 
 func @memref_reinterpret_cast_too_many_offsets(%in: memref<?xf32>) {
   // expected-error @+1 {{expected <= 1 offset values}}
-  %out = memref_reinterpret_cast %in to
+  %out = memref.reinterpret_cast %in to
            offset: [0, 0], sizes: [10, 10], strides: [10, 1]
            : memref<?xf32> to memref<10x10xf32, offset: 0, strides: [10, 1]>
   return
@@ -49,7 +49,7 @@
 
 func @memref_reinterpret_cast_incompatible_element_types(%in: memref<*xf32>) {
   // expected-error @+1 {{different element types specified}}
-  %out = memref_reinterpret_cast %in to
+  %out = memref.reinterpret_cast %in to
            offset: [0], sizes: [10], strides: [1]
          : memref<*xf32> to memref<10xi32, offset: 0, strides: [1]>
   return
@@ -59,7 +59,7 @@
 
 func @memref_reinterpret_cast_incompatible_memory_space(%in: memref<*xf32>) {
   // expected-error @+1 {{different memory spaces specified}}
-  %out = memref_reinterpret_cast %in to
+  %out = memref.reinterpret_cast %in to
            offset: [0], sizes: [10], strides: [1]
          : memref<*xf32> to memref<10xi32, offset: 0, strides: [1], 2>
   return
@@ -69,7 +69,7 @@
 
 func @memref_reinterpret_cast_offset_mismatch(%in: memref<?xf32>) {
   // expected-error @+1 {{expected result type with offset = 2 instead of 1}}
-  %out = memref_reinterpret_cast %in to
+  %out = memref.reinterpret_cast %in to
            offset: [1], sizes: [10], strides: [1]
          : memref<?xf32> to memref<10xf32, offset: 2, strides: [1]>
   return
@@ -79,7 +79,7 @@
 
 func @memref_reinterpret_cast_size_mismatch(%in: memref<*xf32>) {
   // expected-error @+1 {{expected result type with size = 10 instead of 1 in dim = 0}}
-  %out = memref_reinterpret_cast %in to
+  %out = memref.reinterpret_cast %in to
            offset: [0], sizes: [10], strides: [1]
          : memref<*xf32> to memref<1xf32, offset: 0, strides: [1]>
   return
@@ -89,7 +89,7 @@
 
 func @memref_reinterpret_cast_offset_mismatch(%in: memref<?xf32>) {
   // expected-error @+1 {{expected result type with stride = 2 instead of 1 in dim = 0}}
-  %out = memref_reinterpret_cast %in to
+  %out = memref.reinterpret_cast %in to
            offset: [2], sizes: [10], strides: [2]
          : memref<?xf32> to memref<10xf32, offset: 2, strides: [1]>
   return
@@ -101,7 +101,7 @@
   %c0 = constant 0 : index
   %c10 = constant 10 : index
   // expected-error @+1 {{expected result type with size = 10 instead of -1 in dim = 0}}
-  %out = memref_reinterpret_cast %in to
+  %out = memref.reinterpret_cast %in to
            offset: [%c0], sizes: [10, %c10], strides: [%c10, 1]
            : memref<?xf32> to memref<?x?xf32, offset: ?, strides: [?, 1]>
   return
@@ -112,7 +112,7 @@
 func @memref_reshape_element_type_mismatch(
        %buf: memref<*xf32>, %shape: memref<1xi32>) {
   // expected-error @+1 {{element types of source and destination memref types should be the same}}
-  memref_reshape %buf(%shape) : (memref<*xf32>, memref<1xi32>) -> memref<?xi32>
+  memref.reshape %buf(%shape) : (memref<*xf32>, memref<1xi32>) -> memref<?xi32>
 }
 
 // -----
@@ -120,7 +120,7 @@
 func @memref_reshape_dst_ranked_shape_unranked(
        %buf: memref<*xf32>, %shape: memref<?xi32>) {
   // expected-error @+1 {{cannot use shape operand with dynamic length to reshape to statically-ranked memref type}}
-  memref_reshape %buf(%shape) : (memref<*xf32>, memref<?xi32>) -> memref<?xf32>
+  memref.reshape %buf(%shape) : (memref<*xf32>, memref<?xi32>) -> memref<?xf32>
 }
 
 // -----
@@ -128,7 +128,7 @@
 func @memref_reshape_dst_shape_rank_mismatch(
        %buf: memref<*xf32>, %shape: memref<1xi32>) {
   // expected-error @+1 {{length of shape operand differs from the result's memref rank}}
-  memref_reshape %buf(%shape)
+  memref.reshape %buf(%shape)
     : (memref<*xf32>, memref<1xi32>) -> memref<?x?xf32>
 }
 
@@ -138,7 +138,7 @@
         %buf: memref<4x4xf32, offset: 0, strides: [3, 2]>,
         %shape: memref<1xi32>) {
   // expected-error @+1 {{source memref type should have identity affine map}}
-  memref_reshape %buf(%shape)
+  memref.reshape %buf(%shape)
     : (memref<4x4xf32, offset: 0, strides: [3, 2]>, memref<1xi32>)
     -> memref<8xf32>
 }
@@ -148,67 +148,67 @@
 func @memref_reshape_result_affine_map_is_not_identity(
         %buf: memref<4x4xf32>, %shape: memref<1xi32>) {
   // expected-error @+1 {{result memref type should have identity affine map}}
-  memref_reshape %buf(%shape)
+  memref.reshape %buf(%shape)
     : (memref<4x4xf32>, memref<1xi32>) -> memref<8xf32, offset: 0, strides: [2]>
 }
 
 // -----
 
 // expected-error @+1 {{type should be static shaped memref}}
-global_memref @foo : i32
+memref.global @foo : i32
 
 // -----
 
 // expected-error @+1 {{type should be static shaped memref}}
-global_memref @foo : i32 = 5
+memref.global @foo : i32 = 5
 
 // -----
 
 // expected-error @+1 {{type should be static shaped memref}}
-global_memref @foo : memref<*xf32>
+memref.global @foo : memref<*xf32>
 
 // -----
 
 // expected-error @+1 {{type should be static shaped memref}}
-global_memref @foo : memref<?x?xf32>
+memref.global @foo : memref<?x?xf32>
 
 // -----
 
 // expected-error @+1 {{initial value should be a unit or elements attribute}}
-global_memref @foo : memref<2x2xf32>  = "foo"
+memref.global @foo : memref<2x2xf32>  = "foo"
 
 // -----
 
 // expected-error @+1 {{inferred shape of elements literal ([2]) does not match type ([2, 2])}}
-global_memref @foo : memref<2x2xf32> = dense<[0.0, 1.0]>
+memref.global @foo : memref<2x2xf32> = dense<[0.0, 1.0]>
 
 // -----
 
 // expected-error @+1 {{expected valid '@'-identifier for symbol name}}
-global_memref "private" "public" @foo : memref<2x2xf32>  = "foo"
+memref.global "private" "public" @foo : memref<2x2xf32>  = "foo"
 
 // -----
 
 // expected-error @+1 {{expected valid '@'-identifier for symbol name}}
-global_memref constant external @foo : memref<2x2xf32>  = "foo"
+memref.global constant external @foo : memref<2x2xf32>  = "foo"
 
 // -----
 
 // constant qualifier must be after visibility.
 // expected-error @+1 {{expected valid '@'-identifier for symbol name}}
-global_memref constant "private" @foo : memref<2x2xf32>  = "foo"
+memref.global constant "private" @foo : memref<2x2xf32>  = "foo"
 
 
 // -----
 
 // expected-error @+1 {{op visibility expected to be one of ["public", "private", "nested"], but got "priate"}}
-global_memref "priate" constant @memref5 : memref<2xf32>  = uninitialized
+memref.global "priate" constant @memref5 : memref<2xf32>  = uninitialized
 
 // -----
 
 func @nonexistent_global_memref() {
   // expected-error @+1 {{'gv' does not reference a valid global memref}}
-  %0 = get_global_memref @gv : memref<3xf32>
+  %0 = memref.get_global @gv : memref<3xf32>
   return
 }
 
@@ -218,17 +218,17 @@
 
 func @nonexistent_global_memref() {
   // expected-error @+1 {{'foo' does not reference a valid global memref}}
-  %0 = get_global_memref @foo : memref<3xf32>
+  %0 = memref.get_global @foo : memref<3xf32>
   return
 }
 
 // -----
 
-global_memref @gv : memref<3xi32>
+memref.global @gv : memref<3xi32>
 
 func @mismatched_types() {
   // expected-error @+1 {{result type 'memref<3xf32>' does not match type 'memref<3xi32>' of the global memref @gv}}
-  %0 = get_global_memref @gv : memref<3xf32>
+  %0 = memref.get_global @gv : memref<3xf32>
   return
 }
 
diff --git a/mlir/test/Dialect/Standard/ops.mlir b/mlir/test/Dialect/Standard/ops.mlir
--- a/mlir/test/Dialect/Standard/ops.mlir
+++ b/mlir/test/Dialect/Standard/ops.mlir
@@ -19,10 +19,10 @@
   return %0 : tensor<index>
 }
 
-// CHECK-LABEL: test_tensor_to_memref
-func @test_tensor_to_memref(%arg0: tensor<?xi64>, %arg1: tensor<*xi64>) -> (memref<?xi64, affine_map<(d0) -> (d0 + 7)>>, memref<*xi64, 1>) {
-  %0 = tensor_to_memref %arg0 : memref<?xi64, affine_map<(d0) -> (d0 + 7)>>
-  %1 = tensor_to_memref %arg1 : memref<*xi64, 1>
+// CHECK-LABEL: test_buffer_cast
+func @test_buffer_cast(%arg0: tensor<?xi64>, %arg1: tensor<*xi64>) -> (memref<?xi64, affine_map<(d0) -> (d0 + 7)>>, memref<*xi64, 1>) {
+  %0 = memref.buffer_cast %arg0 : memref<?xi64, affine_map<(d0) -> (d0 + 7)>>
+  %1 = memref.buffer_cast %arg1 : memref<*xi64, 1>
   return %0, %1 : memref<?xi64, affine_map<(d0) -> (d0 + 7)>>, memref<*xi64, 1>
 }
 
@@ -49,7 +49,7 @@
     -> memref<10x?xf32, offset: ?, strides: [?, 1]> {
   %c0 = constant 0 : index
   %c10 = constant 10 : index
-  %out = memref_reinterpret_cast %in to
+  %out = memref.reinterpret_cast %in to
            offset: [%c0], sizes: [10, %c10], strides: [%c10, 1]
            : memref<?xf32> to memref<10x?xf32, offset: ?, strides: [?, 1]>
   return %out : memref<10x?xf32, offset: ?, strides: [?, 1]>
@@ -58,41 +58,41 @@
 // CHECK-LABEL: func @memref_reshape(
 func @memref_reshape(%unranked: memref<*xf32>, %shape1: memref<1xi32>,
          %shape2: memref<2xi32>, %shape3: memref<?xi32>) -> memref<*xf32> {
-  %dyn_vec = memref_reshape %unranked(%shape1)
+  %dyn_vec = memref.reshape %unranked(%shape1)
                : (memref<*xf32>, memref<1xi32>) -> memref<?xf32>
-  %dyn_mat = memref_reshape %dyn_vec(%shape2)
+  %dyn_mat = memref.reshape %dyn_vec(%shape2)
                : (memref<?xf32>, memref<2xi32>) -> memref<?x?xf32>
-  %new_unranked = memref_reshape %dyn_mat(%shape3)
+  %new_unranked = memref.reshape %dyn_mat(%shape3)
                : (memref<?x?xf32>, memref<?xi32>) -> memref<*xf32>
   return %new_unranked : memref<*xf32>
 }
 
-// CHECK-LABEL: global_memref @memref0 : memref<2xf32>
-global_memref @memref0 : memref<2xf32>
+// CHECK-LABEL: memref.global @memref0 : memref<2xf32>
+memref.global @memref0 : memref<2xf32>
 
-// CHECK-LABEL: global_memref constant @memref1 : memref<2xf32> = dense<[0.000000e+00, 1.000000e+00]>
-global_memref constant @memref1 : memref<2xf32> = dense<[0.0, 1.0]>
+// CHECK-LABEL: memref.global constant @memref1 : memref<2xf32> = dense<[0.000000e+00, 1.000000e+00]>
+memref.global constant @memref1 : memref<2xf32> = dense<[0.0, 1.0]>
 
-// CHECK-LABEL: global_memref @memref2 : memref<2xf32> = uninitialized
-global_memref @memref2 : memref<2xf32>  = uninitialized
+// CHECK-LABEL: memref.global @memref2 : memref<2xf32> = uninitialized
+memref.global @memref2 : memref<2xf32>  = uninitialized
 
-// CHECK-LABEL: global_memref "private" @memref3 : memref<2xf32> = uninitialized
-global_memref "private" @memref3 : memref<2xf32>  = uninitialized
+// CHECK-LABEL: memref.global "private" @memref3 : memref<2xf32> = uninitialized
+memref.global "private" @memref3 : memref<2xf32>  = uninitialized
 
-// CHECK-LABEL: global_memref "private" constant @memref4 : memref<2xf32> = uninitialized
-global_memref "private" constant @memref4 : memref<2xf32>  = uninitialized
+// CHECK-LABEL: memref.global "private" constant @memref4 : memref<2xf32> = uninitialized
+memref.global "private" constant @memref4 : memref<2xf32>  = uninitialized
 
 // CHECK-LABEL: func @write_global_memref
 func @write_global_memref() {
-  %0 = get_global_memref @memref0 : memref<2xf32>
+  %0 = memref.get_global @memref0 : memref<2xf32>
   %1 = constant dense<[1.0, 2.0]> : tensor<2xf32>
-  tensor_store %1, %0 : memref<2xf32>
+  memref.tensor_store %1, %0 : memref<2xf32>
   return
 }
 
 // CHECK-LABEL: func @read_global_memref
 func @read_global_memref() {
-  %0 = get_global_memref @memref0 : memref<2xf32>
-  %1 = tensor_load %0 : memref<2xf32>
+  %0 = memref.get_global @memref0 : memref<2xf32>
+  %1 = memref.tensor_load %0 : memref<2xf32>
   return
 }
diff --git a/mlir/test/Dialect/Standard/tensor-constant-bufferize.mlir b/mlir/test/Dialect/Standard/tensor-constant-bufferize.mlir
--- a/mlir/test/Dialect/Standard/tensor-constant-bufferize.mlir
+++ b/mlir/test/Dialect/Standard/tensor-constant-bufferize.mlir
@@ -1,13 +1,13 @@
-// RUN: mlir-opt %s -tensor-constant-bufferize -split-input-file
+// RUN: mlir-opt %s -tensor-constant-bufferize -split-input-file | FileCheck %s
 
 // CHECK-LABEL: module {
 // We check the debug name too since we put some effort into making that readable.
 // The name isn't load-bearing though.
-// CHECK: global_memref "private" constant @__constant_3x4xf32 : memref<3x4xf32> = dense<7.000000e+00>
+// CHECK: memref.global "private" constant @__constant_3x4xf32 : memref<3x4xf32> = dense<7.000000e+00>
 // CHECK: @basic
 func @basic() -> tensor<3x4xf32> {
-  // CHECK: %[[MEMREF:.*]] = get_global_memref @__constant_3x4xf32 : memref<3x4xf32>
-  // CHECK: %[[TENSOR:.*]] = tensor_load %[[MEMREF]]
+  // CHECK: %[[MEMREF:.*]] = memref.get_global @__constant_3x4xf32 : memref<3x4xf32>
+  // CHECK: %[[TENSOR:.*]] = memref.tensor_load %[[MEMREF]]
   %0 = constant dense<7.0> : tensor<3x4xf32>
   // CHECK: return %[[TENSOR]]
   return %0 : tensor<3x4xf32>
@@ -20,8 +20,8 @@
 // CHECK-LABEL: module {
 
 // Only one global is created.
-// CHECK: global_memref
-// CHECK-NOT: global_memref
+// CHECK: memref.global
+// CHECK-NOT: memref.global
 func @duplicate_constants() -> (tensor<3x4xf32>, tensor<3x4xf32>) {
   %0 = constant dense<7.0> : tensor<3x4xf32>
   %1 = constant dense<7.0> : tensor<3x4xf32>
@@ -35,9 +35,9 @@
 // CHECK-LABEL: module {
 
 // Two globals are created.
-// CHECK: global_memref
-// CHECK: global_memref
-// CHECK-NOT: global_memref
+// CHECK: memref.global
+// CHECK: memref.global
+// CHECK-NOT: memref.global
 func @multiple_constants() -> (tensor<3x4xf32>, tensor<3x4xf32>) {
   %0 = constant dense<7.0> : tensor<3x4xf32>
   %1 = constant dense<8.0> : tensor<3x4xf32>
@@ -50,7 +50,7 @@
 
 // CHECK-LABEL: module {
 // We don't convert non-tensor globals.
-// CHECK-NOT: global_memref
+// CHECK-NOT: memref.global
 func @non_tensor() {
     %0 = constant 7 : i32
     return
diff --git a/mlir/test/Dialect/Tensor/bufferize.mlir b/mlir/test/Dialect/Tensor/bufferize.mlir
--- a/mlir/test/Dialect/Tensor/bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/bufferize.mlir
@@ -2,9 +2,9 @@
 
 // CHECK-LABEL:   func @tensor.cast(
 // CHECK-SAME:                      %[[TENSOR:.*]]: tensor<?xindex>) -> tensor<2xindex> {
-// CHECK:           %[[MEMREF:.*]] = tensor_to_memref %[[TENSOR]]
-// CHECK:           %[[CASTED:.*]] = memref_cast %[[MEMREF]] : memref<?xindex> to memref<2xindex>
-// CHECK:           %[[RET:.*]] = tensor_load %[[CASTED]]
+// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]]
+// CHECK:           %[[CASTED:.*]] = memref.cast %[[MEMREF]] : memref<?xindex> to memref<2xindex>
+// CHECK:           %[[RET:.*]] = memref.tensor_load %[[CASTED]]
 // CHECK:           return %[[RET]] : tensor<2xindex>
 func @tensor.cast(%arg0: tensor<?xindex>) -> tensor<2xindex> {
   %0 = tensor.cast %arg0 : tensor<?xindex> to tensor<2xindex>
@@ -13,9 +13,9 @@
 
 // CHECK-LABEL:   func @tensor.cast_from_unranked(
 // CHECK-SAME:                                    %[[TENSOR:.*]]: tensor<*xf32>) -> tensor<2xf32> {
-// CHECK:           %[[MEMREF:.*]] = tensor_to_memref %[[TENSOR]] : memref<*xf32>
-// CHECK:           %[[CASTED_MEMREF:.*]] = memref_cast %[[MEMREF]] : memref<*xf32> to memref<2xf32>
-// CHECK:           %[[RET:.*]] = tensor_load %[[CASTED_MEMREF]] : memref<2xf32>
+// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<*xf32>
+// CHECK:           %[[CASTED_MEMREF:.*]] = memref.cast %[[MEMREF]] : memref<*xf32> to memref<2xf32>
+// CHECK:           %[[RET:.*]] = memref.tensor_load %[[CASTED_MEMREF]] : memref<2xf32>
 // CHECK:           return %[[RET]] : tensor<2xf32>
 func @tensor.cast_from_unranked(%arg0: tensor<*xf32>) -> tensor<2xf32> {
   %0 = tensor.cast %arg0 : tensor<*xf32> to tensor<2xf32>
@@ -24,9 +24,9 @@
 
 // CHECK-LABEL:   func @tensor.cast_to_unranked(
 // CHECK-SAME:                                  %[[TENSOR:.*]]: tensor<2xf32>) -> tensor<*xf32> {
-// CHECK:           %[[MEMREF:.*]] = tensor_to_memref %[[TENSOR]] : memref<2xf32>
-// CHECK:           %[[CASTED_MEMREF:.*]] = memref_cast %[[MEMREF]] : memref<2xf32> to memref<*xf32>
-// CHECK:           %[[RET:.*]] = tensor_load %[[CASTED_MEMREF]] : memref<*xf32>
+// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<2xf32>
+// CHECK:           %[[CASTED_MEMREF:.*]] = memref.cast %[[MEMREF]] : memref<2xf32> to memref<*xf32>
+// CHECK:           %[[RET:.*]] = memref.tensor_load %[[CASTED_MEMREF]] : memref<*xf32>
 // CHECK:           return %[[RET]] : tensor<*xf32>
 func @tensor.cast_to_unranked(%arg0: tensor<2xf32>) -> tensor<*xf32> {
   %0 = tensor.cast %arg0 : tensor<2xf32> to tensor<*xf32>
@@ -36,8 +36,8 @@
 // CHECK-LABEL:   func @tensor.extract(
 // CHECK-SAME:                  %[[TENSOR:.*]]: tensor<?xf32>,
 // CHECK-SAME:                  %[[IDX:.*]]: index) -> f32 {
-// CHECK:           %[[MEMREF:.*]] = tensor_to_memref %[[TENSOR]] : memref<?xf32>
-// CHECK:           %[[RET:.*]] = load %[[MEMREF]][%[[IDX]]] : memref<?xf32>
+// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<?xf32>
+// CHECK:           %[[RET:.*]] = memref.load %[[MEMREF]][%[[IDX]]] : memref<?xf32>
 // CHECK:           return %[[RET]] : f32
 // CHECK:         }
 func @tensor.extract(%arg0: tensor<?xf32>, %arg1: index) -> f32 {
@@ -48,12 +48,12 @@
 // CHECK-LABEL:   func @tensor.from_elements(
 // CHECK-SAME:                               %[[ELEM0:.*]]: index,
 // CHECK-SAME:                               %[[ELEM1:.*]]: index) -> tensor<2xindex> {
-// CHECK:           %[[MEMREF:.*]] = alloc()
+// CHECK:           %[[MEMREF:.*]] = memref.alloc()
 // CHECK:           %[[C0:.*]] = constant 0 : index
 // CHECK:           store %[[ELEM0]], %[[MEMREF]][%[[C0]]]
 // CHECK:           %[[C1:.*]] = constant 1 : index
 // CHECK:           store %[[ELEM1]], %[[MEMREF]][%[[C1]]]
-// CHECK:           %[[RET:.*]] = tensor_load %[[MEMREF]]
+// CHECK:           %[[RET:.*]] = memref.tensor_load %[[MEMREF]]
 // CHECK:           return %[[RET]] : tensor<2xindex>
 func @tensor.from_elements(%arg0: index, %arg1: index) -> tensor<2xindex> {
   %0 = tensor.from_elements %arg0, %arg1 : tensor<2xindex>
@@ -63,21 +63,21 @@
 // CHECK-LABEL:   func @tensor.generate(
 // CHECK-SAME:                                       %[[ARG:.*]]: tensor<*xf32>,
 // CHECK-SAME:                                       %[[DYNAMIC_EXTENT:.*]]: index) -> tensor<?xindex> {
-// CHECK:           %[[MEMREF:.*]] = alloc(%[[DYNAMIC_EXTENT]]) : memref<?xindex>
+// CHECK:           %[[MEMREF:.*]] = memref.alloc(%[[DYNAMIC_EXTENT]]) : memref<?xindex>
 // CHECK:           %[[C0:.*]] = constant 0 : index
 // CHECK:           %[[C1:.*]] = constant 1 : index
 // CHECK:           scf.parallel (%[[I:.*]]) = (%[[C0]]) to (%[[DYNAMIC_EXTENT]]) step (%[[C1]]) {
-// CHECK:             %[[ELEM:.*]] = dim %[[ARG]], %[[I]] : tensor<*xf32>
+// CHECK:             %[[ELEM:.*]] = memref.dim %[[ARG]], %[[I]] : tensor<*xf32>
 // CHECK:             store %[[ELEM]], %[[MEMREF]][%[[I]]] : memref<?xindex>
 // CHECK:             scf.yield
 // CHECK:           }
-// CHECK:           %[[RET:.*]] = tensor_load %[[MEMREF]] : memref<?xindex>
+// CHECK:           %[[RET:.*]] = memref.tensor_load %[[MEMREF]] : memref<?xindex>
 // CHECK:           return %[[RET]] : tensor<?xindex>
 // CHECK:         }
 func @tensor.generate(%arg: tensor<*xf32>, %dynamic_extent: index) -> tensor<?xindex> {
   %result = tensor.generate %dynamic_extent {
   ^bb0(%i : index):
-    %elem = dim %arg, %i : tensor<*xf32>
+    %elem = memref.dim %arg, %i : tensor<*xf32>
     tensor.yield %elem : index
   } : tensor<?xindex>
   return %result : tensor<?xindex>
@@ -88,7 +88,7 @@
 //
 // CHECK-LABEL:   func @tensor.generate_static_and_dynamic(
 // CHECK-SAME:                                                          %[[DYNAMIC_EXTENT:.*]]: index) -> tensor<16x?xindex> {
-// CHECK:           %[[MEMREF:.*]] = alloc(%[[DYNAMIC_EXTENT]]) : memref<16x?xindex>
+// CHECK:           %[[MEMREF:.*]] = memref.alloc(%[[DYNAMIC_EXTENT]]) : memref<16x?xindex>
 // CHECK:           %[[C0:.*]] = constant 0 : index
 // CHECK:           %[[C1:.*]] = constant 1 : index
 // CHECK:           %[[C16:.*]] = constant 16 : index
@@ -97,7 +97,7 @@
 // CHECK:             store %[[VAL_7]], %[[MEMREF]][%[[I]], %[[J]]] : memref<16x?xindex>
 // CHECK:             scf.yield
 // CHECK:           }
-// CHECK:           %[[RET:.*]] = tensor_load %[[MEMREF]] : memref<16x?xindex>
+// CHECK:           %[[RET:.*]] = memref.tensor_load %[[MEMREF]] : memref<16x?xindex>
 // CHECK:           return %[[RET]] : tensor<16x?xindex>
 // CHECK:         }
 func @tensor.generate_static_and_dynamic(%arg0: index) -> tensor<16x?xindex> {
diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@@ -171,10 +171,10 @@
 // CHECK-SAME: %[[IDX:.*]]: index, %[[TENSOR:.*]]: tensor<*xf32>
 func @extract_from_tensor.generate(%idx: index, %tensor: tensor<*xf32>) -> index {
   %size = rank %tensor : tensor<*xf32>
-  // CHECK-NEXT: %[[RES:.*]] = dim %[[TENSOR]], %[[IDX]]
+  // CHECK-NEXT: %[[RES:.*]] = memref.dim %[[TENSOR]], %[[IDX]]
   %0 = tensor.generate %size {
     ^bb0(%arg0: index):
-    %1 = dim %tensor, %arg0 : tensor<*xf32>
+    %1 = memref.dim %tensor, %arg0 : tensor<*xf32>
     tensor.yield %1 : index
   } : tensor<?xindex>
   %1 = tensor.extract %0[%idx] : tensor<?xindex>
@@ -188,13 +188,13 @@
 // CHECK-SAME: %[[IDX0:.*]]: index, %[[IDX1:.*]]: index, %[[TENSOR:.*]]: tensor<*xf32>
 func @extract_from_tensor.generate_2d(%idx0: index, %idx1: index, %tensor: tensor<*xf32>) -> index {
   %size = rank %tensor : tensor<*xf32>
-  // CHECK-NEXT: %[[DIM0:.*]] = dim %[[TENSOR]], %[[IDX0]]
-  // CHECK-NEXT: %[[DIM1:.*]] = dim %[[TENSOR]], %[[IDX1]]
+  // CHECK-NEXT: %[[DIM0:.*]] = memref.dim %[[TENSOR]], %[[IDX0]]
+  // CHECK-NEXT: %[[DIM1:.*]] = memref.dim %[[TENSOR]], %[[IDX1]]
   // CHECK-NEXT: %[[RES:.*]] = addi %[[DIM0]], %[[DIM1]]
   %0 = tensor.generate %size, %size {
     ^bb0(%arg0: index, %arg1: index):
-    %1 = dim %tensor, %arg0 : tensor<*xf32>
-    %2 = dim %tensor, %arg1 : tensor<*xf32>
+    %1 = memref.dim %tensor, %arg0 : tensor<*xf32>
+    %2 = memref.dim %tensor, %arg1 : tensor<*xf32>
     %3 = addi %1, %2 : index
     tensor.yield %3 : index
   } : tensor<?x?xindex>
@@ -209,12 +209,12 @@
 // CHECK-SAME: %[[IDX:.*]]: index
 func @extract_from_tensor.generate_sideeffects(%idx: index, %tensor: tensor<*xf32>) -> index {
   %size = rank %tensor : tensor<*xf32>
-  %mem = alloc(%size) : memref<?xindex>
+  %mem = memref.alloc(%size) : memref<?xindex>
   // CHECK: %[[DTENSOR:.*]] = tensor.generate
   %0 = tensor.generate %size {
     ^bb0(%arg0: index):
-    %1 = dim %tensor, %arg0 : tensor<*xf32>
-    store %1, %mem[%arg0] : memref<?xindex>
+    %1 = memref.dim %tensor, %arg0 : tensor<*xf32>
+    memref.store %1, %mem[%arg0] : memref<?xindex>
     tensor.yield %1 : index
   } : tensor<?xindex>
   // CHECK: %[[RES:.*]] = tensor.extract %[[DTENSOR]][%[[IDX]]]
diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir
--- a/mlir/test/Dialect/Vector/canonicalize.mlir
+++ b/mlir/test/Dialect/Vector/canonicalize.mlir
@@ -255,7 +255,7 @@
 func @cast_transfers(%A: memref<4x8xf32>) -> (vector<4x8xf32>) {
   %c0 = constant 0 : index
   %f0 = constant 0.0 : f32
-  %0 = memref_cast %A : memref<4x8xf32> to memref<?x?xf32>
+  %0 = memref.cast %A : memref<4x8xf32> to memref<?x?xf32>
 
   // CHECK: vector.transfer_read %{{.*}} {masked = [false, false]} : memref<4x8xf32>, vector<4x8xf32>
   %1 = vector.transfer_read %0[%c0, %c0], %f0 : memref<?x?xf32>, vector<4x8xf32>
diff --git a/mlir/test/Dialect/Vector/vector-contract-matvec-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-matvec-transforms.mlir
--- a/mlir/test/Dialect/Vector/vector-contract-matvec-transforms.mlir
+++ b/mlir/test/Dialect/Vector/vector-contract-matvec-transforms.mlir
@@ -49,9 +49,9 @@
 // CHECK-SAME: %[[A:.*0]]: memref<vector<2x2xf32>>
 // CHECK-SAME: %[[B:.*1]]: memref<vector<2xf32>>
 // CHECK-SAME: %[[C:.*2]]: memref<vector<2xf32>>
-// CHECK: %[[T0:.*]] = load %[[A]][] : memref<vector<2x2xf32>>
-// CHECK: %[[T1:.*]] = load %[[B]][] : memref<vector<2xf32>>
-// CHECK: %[[T2:.*]] = load %[[C]][] : memref<vector<2xf32>>
+// CHECK: %[[T0:.*]] = memref.load %[[A]][] : memref<vector<2x2xf32>>
+// CHECK: %[[T1:.*]] = memref.load %[[B]][] : memref<vector<2xf32>>
+// CHECK: %[[T2:.*]] = memref.load %[[C]][] : memref<vector<2xf32>>
 // CHECK: %[[T3:.*]] = vector.transpose %[[T0]], [1, 0] : vector<2x2xf32> to vector<2x2xf32>
 // CHECK: %[[T4:.*]] = vector.extract %[[T3]][0] : vector<2x2xf32>
 // CHECK: %[[T5:.*]] = vector.extract %[[T1]][0] : vector<2xf32>
@@ -59,15 +59,15 @@
 // CHECK: %[[T7:.*]] = vector.extract %[[T3]][1] : vector<2x2xf32>
 // CHECK: %[[T8:.*]] = vector.extract %[[T1]][1] : vector<2xf32>
 // CHECK: %[[T9:.*]] = vector.outerproduct %[[T7]], %[[T8]], %[[T6]] {kind = #vector.kind<add>} : vector<2xf32>, f32
-// CHECK: store %[[T9]], %[[C]][] : memref<vector<2xf32>>
+// CHECK: memref.store %[[T9]], %[[C]][] : memref<vector<2xf32>>
 // CHECK: return
 func @matvec2x2(%arg0: memref<vector<2x2xf32>>, %arg1: memref<vector<2xf32>>,
                                                 %arg2: memref<vector<2xf32>>) {
-  %A = load %arg0[] : memref<vector<2x2xf32>>
-  %x = load %arg1[] : memref<vector<2xf32>>
-  %b = load %arg2[] : memref<vector<2xf32>>
+  %A = memref.load %arg0[] : memref<vector<2x2xf32>>
+  %x = memref.load %arg1[] : memref<vector<2xf32>>
+  %b = memref.load %arg2[] : memref<vector<2xf32>>
   %0 = vector.contract #matvec_trait %A, %x, %b : vector<2x2xf32>, vector<2xf32> into vector<2xf32>
-  store %0, %arg2[] : memref<vector<2xf32>>
+  memref.store %0, %arg2[] : memref<vector<2xf32>>
   return
 }
 
@@ -75,9 +75,9 @@
 // CHECK-SAME: %[[A:.*0]]: memref<vector<2x2xf32>>
 // CHECK-SAME: %[[B:.*1]]: memref<vector<2xf32>>
 // CHECK-SAME: %[[C:.*2]]: memref<vector<2xf32>>
-// CHECK: %[[T0:.*]] = load %[[A]][] : memref<vector<2x2xf32>>
-// CHECK: %[[T1:.*]] = load %[[B]][] : memref<vector<2xf32>>
-// CHECK: %[[T2:.*]] = load %[[C]][] : memref<vector<2xf32>>
+// CHECK: %[[T0:.*]] = memref.load %[[A]][] : memref<vector<2x2xf32>>
+// CHECK: %[[T1:.*]] = memref.load %[[B]][] : memref<vector<2xf32>>
+// CHECK: %[[T2:.*]] = memref.load %[[C]][] : memref<vector<2xf32>>
 // CHECK: %[[T3:.*]] = vector.transpose %[[T0]], [1, 0] : vector<2x2xf32> to vector<2x2xf32>
 // CHECK: %[[T4:.*]] = vector.extract %[[T3]][0] : vector<2x2xf32>
 // CHECK: %[[T5:.*]] = vector.extract %[[T1]][0] : vector<2xf32>
@@ -85,15 +85,15 @@
 // CHECK: %[[T7:.*]] = vector.extract %[[T3]][1] : vector<2x2xf32>
 // CHECK: %[[T8:.*]] = vector.extract %[[T1]][1] : vector<2xf32>
 // CHECK: %[[T9:.*]] = vector.outerproduct %[[T7]], %[[T8]], %[[T6]] {kind = #vector.kind<max>} : vector<2xf32>, f32
-// CHECK: store %[[T9]], %[[C]][] : memref<vector<2xf32>>
+// CHECK: memref.store %[[T9]], %[[C]][] : memref<vector<2xf32>>
 // CHECK: return
 func @matvecmax2x2(%arg0: memref<vector<2x2xf32>>, %arg1: memref<vector<2xf32>>,
                                                    %arg2: memref<vector<2xf32>>) {
-  %A = load %arg0[] : memref<vector<2x2xf32>>
-  %x = load %arg1[] : memref<vector<2xf32>>
-  %b = load %arg2[] : memref<vector<2xf32>>
+  %A = memref.load %arg0[] : memref<vector<2x2xf32>>
+  %x = memref.load %arg1[] : memref<vector<2xf32>>
+  %b = memref.load %arg2[] : memref<vector<2xf32>>
   %0 = vector.contract #matvecmax_trait %A, %x, %b : vector<2x2xf32>, vector<2xf32> into vector<2xf32>
-  store %0, %arg2[] : memref<vector<2xf32>>
+  memref.store %0, %arg2[] : memref<vector<2xf32>>
   return
 }
 
@@ -101,24 +101,24 @@
 // CHECK-SAME: %[[A:.*0]]: memref<vector<2x2xf32>>
 // CHECK-SAME: %[[B:.*1]]: memref<vector<2xf32>>
 // CHECK-SAME: %[[C:.*2]]: memref<vector<2xf32>>
-// CHECK: %[[T0:.*]] = load %[[A]][] : memref<vector<2x2xf32>>
-// CHECK: %[[T1:.*]] = load %[[B]][] : memref<vector<2xf32>>
-// CHECK: %[[T2:.*]] = load %[[C]][] : memref<vector<2xf32>>
+// CHECK: %[[T0:.*]] = memref.load %[[A]][] : memref<vector<2x2xf32>>
+// CHECK: %[[T1:.*]] = memref.load %[[B]][] : memref<vector<2xf32>>
+// CHECK: %[[T2:.*]] = memref.load %[[C]][] : memref<vector<2xf32>>
 // CHECK: %[[T3:.*]] = vector.extract %[[T0]][0] : vector<2x2xf32>
 // CHECK: %[[T4:.*]] = vector.extract %[[T1]][0] : vector<2xf32>
 // CHECK: %[[T5:.*]] = vector.outerproduct %[[T3]], %[[T4]], %[[T2]] {kind = #vector.kind<add>} : vector<2xf32>, f32
 // CHECK: %[[T6:.*]] = vector.extract %[[T0]][1] : vector<2x2xf32>
 // CHECK: %[[T7:.*]] = vector.extract %[[T1]][1] : vector<2xf32>
 // CHECK: %[[T8:.*]] = vector.outerproduct %[[T6]], %[[T7]], %[[T5]] {kind = #vector.kind<add>} : vector<2xf32>, f32
-// CHECK: store %[[T8]], %[[C]][] : memref<vector<2xf32>>
+// CHECK: memref.store %[[T8]], %[[C]][] : memref<vector<2xf32>>
 // CHECK: return
 func @mattransvec2x2(%arg0: memref<vector<2x2xf32>>, %arg1: memref<vector<2xf32>>,
                                                      %arg2: memref<vector<2xf32>>) {
-  %A = load %arg0[] : memref<vector<2x2xf32>>
-  %x = load %arg1[] : memref<vector<2xf32>>
-  %b = load %arg2[] : memref<vector<2xf32>>
+  %A = memref.load %arg0[] : memref<vector<2x2xf32>>
+  %x = memref.load %arg1[] : memref<vector<2xf32>>
+  %b = memref.load %arg2[] : memref<vector<2xf32>>
   %0 = vector.contract #mattransvec_trait %A, %x, %b : vector<2x2xf32>, vector<2xf32> into vector<2xf32>
-  store %0, %arg2[] : memref<vector<2xf32>>
+  memref.store %0, %arg2[] : memref<vector<2xf32>>
   return
 }
 
@@ -126,9 +126,9 @@
 // CHECK-SAME: %[[A:.*0]]: memref<vector<2x2xf32>>
 // CHECK-SAME: %[[B:.*1]]: memref<vector<2xf32>>
 // CHECK-SAME: %[[C:.*2]]: memref<vector<2xf32>>
-// CHECK: %[[T0:.*]] = load %[[A]][] : memref<vector<2x2xf32>>
-// CHECK: %[[T1:.*]] = load %[[B]][] : memref<vector<2xf32>>
-// CHECK: %[[T2:.*]] = load %[[C]][] : memref<vector<2xf32>>
+// CHECK: %[[T0:.*]] = memref.load %[[A]][] : memref<vector<2x2xf32>>
+// CHECK: %[[T1:.*]] = memref.load %[[B]][] : memref<vector<2xf32>>
+// CHECK: %[[T2:.*]] = memref.load %[[C]][] : memref<vector<2xf32>>
 // CHECK: %[[T3:.*]] = vector.transpose %[[T0]], [1, 0] : vector<2x2xf32> to vector<2x2xf32>
 // CHECK: %[[T4:.*]] = vector.extract %[[T3]][0] : vector<2x2xf32>
 // CHECK: %[[T5:.*]] = vector.extract %[[T1]][0] : vector<2xf32>
@@ -136,15 +136,15 @@
 // CHECK: %[[T7:.*]] = vector.extract %[[T3]][1] : vector<2x2xf32>
 // CHECK: %[[T8:.*]] = vector.extract %[[T1]][1] : vector<2xf32>
 // CHECK: %[[T9:.*]] = vector.outerproduct %[[T7]], %[[T8]], %[[T6]] {kind = #vector.kind<add>} : vector<2xf32>, f32
-// CHECK: store %[[T9]], %[[C]][] : memref<vector<2xf32>>
+// CHECK: memref.store %[[T9]], %[[C]][] : memref<vector<2xf32>>
 // CHECK: return
 func @vecmat2x2(%arg0: memref<vector<2x2xf32>>, %arg1: memref<vector<2xf32>>,
                                                 %arg2: memref<vector<2xf32>>) {
-  %A = load %arg0[] : memref<vector<2x2xf32>>
-  %x = load %arg1[] : memref<vector<2xf32>>
-  %b = load %arg2[] : memref<vector<2xf32>>
+  %A = memref.load %arg0[] : memref<vector<2x2xf32>>
+  %x = memref.load %arg1[] : memref<vector<2xf32>>
+  %b = memref.load %arg2[] : memref<vector<2xf32>>
   %0 = vector.contract #vecmat_trait %x, %A, %b : vector<2xf32>, vector<2x2xf32> into vector<2xf32>
-  store %0, %arg2[] : memref<vector<2xf32>>
+  memref.store %0, %arg2[] : memref<vector<2xf32>>
   return
 }
 
@@ -152,23 +152,23 @@
 // CHECK-SAME: %[[A:.*0]]: memref<vector<2x2xf32>>
 // CHECK-SAME: %[[B:.*1]]: memref<vector<2xf32>>
 // CHECK-SAME: %[[C:.*2]]: memref<vector<2xf32>>
-// CHECK: %[[T0:.*]] = load %[[A]][] : memref<vector<2x2xf32>>
-// CHECK: %[[T1:.*]] = load %[[B]][] : memref<vector<2xf32>>
-// CHECK: %[[T2:.*]] = load %[[C]][] : memref<vector<2xf32>>
+// CHECK: %[[T0:.*]] = memref.load %[[A]][] : memref<vector<2x2xf32>>
+// CHECK: %[[T1:.*]] = memref.load %[[B]][] : memref<vector<2xf32>>
+// CHECK: %[[T2:.*]] = memref.load %[[C]][] : memref<vector<2xf32>>
 // CHECK: %[[T3:.*]] = vector.extract %[[T0]][0] : vector<2x2xf32>
 // CHECK: %[[T4:.*]] = vector.extract %[[T1]][0] : vector<2xf32>
 // CHECK: %[[T5:.*]] = vector.outerproduct %[[T3]], %[[T4]], %[[T2]] {kind = #vector.kind<add>} : vector<2xf32>, f32
 // CHECK: %[[T6:.*]] = vector.extract %[[T0]][1] : vector<2x2xf32>
 // CHECK: %[[T7:.*]] = vector.extract %[[T1]][1] : vector<2xf32>
 // CHECK: %[[T8:.*]] = vector.outerproduct %[[T6]], %[[T7]], %[[T5]] {kind = #vector.kind<add>} : vector<2xf32>, f32
-// CHECK: store %[[T8]], %[[C]][] : memref<vector<2xf32>>
+// CHECK: memref.store %[[T8]], %[[C]][] : memref<vector<2xf32>>
 // CHECK: return
 func @vecmattrans2x2(%arg0: memref<vector<2x2xf32>>, %arg1: memref<vector<2xf32>>,
                                                      %arg2: memref<vector<2xf32>>) {
-  %A = load %arg0[] : memref<vector<2x2xf32>>
-  %x = load %arg1[] : memref<vector<2xf32>>
-  %b = load %arg2[] : memref<vector<2xf32>>
+  %A = memref.load %arg0[] : memref<vector<2x2xf32>>
+  %x = memref.load %arg1[] : memref<vector<2xf32>>
+  %b = memref.load %arg2[] : memref<vector<2xf32>>
   %0 = vector.contract #vecmattrans_trait %x, %A, %b : vector<2xf32>, vector<2x2xf32> into vector<2xf32>
-  store %0, %arg2[] : memref<vector<2xf32>>
+  memref.store %0, %arg2[] : memref<vector<2xf32>>
   return
 }
diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir
--- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir
@@ -29,10 +29,10 @@
   //  CHECK-DAG: %[[c8:.*]] = constant 8 : index
   //  CHECK-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32
   // alloca for boundary full tile
-  //      CHECK: %[[alloc:.*]] = alloca() {alignment = 32 : i64} : memref<4x8xf32>
+  //      CHECK: %[[alloc:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
   // %i + 4 <= dim(%A, 0)
   //      CHECK: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]]
-  //      CHECK: %[[d0:.*]] = dim %[[A]], %[[c0]] : memref<?x8xf32>
+  //      CHECK: %[[d0:.*]] = memref.dim %[[A]], %[[c0]] : memref<?x8xf32>
   //      CHECK: %[[cmp0:.*]] = cmpi sle, %[[idx0]], %[[d0]] : index
   // %j + 8 <= dim(%A, 1)
   //      CHECK: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]]
@@ -49,7 +49,7 @@
   //      CHECK:   %[[cast_alloc:.*]] = vector.type_cast %[[alloc]] :
   // CHECK-SAME:     memref<4x8xf32> to memref<vector<4x8xf32>>
   //      CHECK:   store %[[slow]], %[[cast_alloc]][] : memref<vector<4x8xf32>>
-  //      CHECK:   %[[yielded:.*]] = memref_cast %[[alloc]] :
+  //      CHECK:   %[[yielded:.*]] = memref.cast %[[alloc]] :
   // CHECK-SAME:     memref<4x8xf32> to memref<?x8xf32>
   //      CHECK:   scf.yield %[[yielded]], %[[c0]], %[[c0]] :
   // CHECK-SAME:     memref<?x8xf32>, index, index
@@ -62,10 +62,10 @@
   //  LINALG-DAG: %[[c8:.*]] = constant 8 : index
   //  LINALG-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32
   // alloca for boundary full tile
-  //      LINALG: %[[alloc:.*]] = alloca() {alignment = 32 : i64} : memref<4x8xf32>
+  //      LINALG: %[[alloc:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
   // %i + 4 <= dim(%A, 0)
   //      LINALG: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]]
-  //      LINALG: %[[d0:.*]] = dim %[[A]], %[[c0]] : memref<?x8xf32>
+  //      LINALG: %[[d0:.*]] = memref.dim %[[A]], %[[c0]] : memref<?x8xf32>
   //      LINALG: %[[cmp0:.*]] = cmpi sle, %[[idx0]], %[[d0]] : index
   // %j + 8 <= dim(%A, 1)
   //      LINALG: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]]
@@ -78,13 +78,13 @@
   //      LINALG: } else {
   //               slow path, fill tmp alloc and yield a memref_casted version of it
   //      LINALG:   linalg.fill(%[[alloc]], %[[cst]]) : memref<4x8xf32>, f32
-  //      LINALG:   %[[d0:.*]] = dim %[[A]], %[[c0]] : memref<?x8xf32>
+  //      LINALG:   %[[d0:.*]] = memref.dim %[[A]], %[[c0]] : memref<?x8xf32>
   //      LINALG:   %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[d0]], %[[i]], %[[c4]])
   //      LINALG:   %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]])
-  //      LINALG:   %[[sv:.*]] = subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [1, 1]
+  //      LINALG:   %[[sv:.*]] = memref.subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [1, 1]
   // LINALG-SAME:     memref<?x8xf32> to memref<?x?xf32, #[[$map_2d_stride_8x1]]>
   //      LINALG:   linalg.copy(%[[sv]], %[[alloc]]) : memref<?x?xf32, #[[$map_2d_stride_8x1]]>, memref<4x8xf32>
-  //      LINALG:   %[[yielded:.*]] = memref_cast %[[alloc]] :
+  //      LINALG:   %[[yielded:.*]] = memref.cast %[[alloc]] :
   // LINALG-SAME:     memref<4x8xf32> to memref<?x8xf32>
   //      LINALG:   scf.yield %[[yielded]], %[[c0]], %[[c0]] :
   // LINALG-SAME:     memref<?x8xf32>, index, index
@@ -117,7 +117,7 @@
   //  CHECK-DAG: %[[c8:.*]] = constant 8 : index
   //  CHECK-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32
   // alloca for boundary full tile
-  //      CHECK: %[[alloc:.*]] = alloca() {alignment = 32 : i64} : memref<4x8xf32>
+  //      CHECK: %[[alloc:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
   // %i + 4 <= dim(%A, 0)
   //      CHECK: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]]
   //      CHECK: %[[cmp0:.*]] = cmpi sle, %[[idx0]], %[[c7]] : index
@@ -128,7 +128,7 @@
   //      CHECK: %[[cond:.*]] = and %[[cmp0]], %[[cmp1]] : i1
   //      CHECK: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index) {
   //               inBounds but not cast-compatible: yield a memref_casted form of %A
-  //      CHECK:   %[[casted:.*]] = memref_cast %arg0 :
+  //      CHECK:   %[[casted:.*]] = memref.cast %arg0 :
   // CHECK-SAME:     memref<7x8xf32, #[[$map_2d_stride_1]]> to memref<?x8xf32, #[[$map_2d_stride_1]]>
   //      CHECK:   scf.yield %[[casted]], %[[i]], %[[j]] :
   // CHECK-SAME:     memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index
@@ -140,7 +140,7 @@
   // CHECK-SAME:     memref<4x8xf32> to memref<vector<4x8xf32>>
   //      CHECK:   store %[[slow]], %[[cast_alloc]][] :
   // CHECK-SAME:     memref<vector<4x8xf32>>
-  //      CHECK:   %[[yielded:.*]] = memref_cast %[[alloc]] :
+  //      CHECK:   %[[yielded:.*]] = memref.cast %[[alloc]] :
   // CHECK-SAME:     memref<4x8xf32> to memref<?x8xf32, #[[$map_2d_stride_1]]>
   //      CHECK:   scf.yield %[[yielded]], %[[c0]], %[[c0]] :
   // CHECK-SAME:     memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index
@@ -154,7 +154,7 @@
   //  LINALG-DAG: %[[c8:.*]] = constant 8 : index
   //  LINALG-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32
   // alloca for boundary full tile
-  //      LINALG: %[[alloc:.*]] = alloca() {alignment = 32 : i64} : memref<4x8xf32>
+  //      LINALG: %[[alloc:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
   // %i + 4 <= dim(%A, 0)
   //      LINALG: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]]
   //      LINALG: %[[cmp0:.*]] = cmpi sle, %[[idx0]], %[[c7]] : index
@@ -165,7 +165,7 @@
   //      LINALG: %[[cond:.*]] = and %[[cmp0]], %[[cmp1]] : i1
   //      LINALG: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index) {
   //               inBounds but not cast-compatible: yield a memref_casted form of %A
-  //      LINALG:   %[[casted:.*]] = memref_cast %arg0 :
+  //      LINALG:   %[[casted:.*]] = memref.cast %arg0 :
   // LINALG-SAME:     memref<7x8xf32, #[[$map_2d_stride_1]]> to memref<?x8xf32, #[[$map_2d_stride_1]]>
   //      LINALG:   scf.yield %[[casted]], %[[i]], %[[j]] :
   // LINALG-SAME:     memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index
@@ -174,10 +174,10 @@
   //      LINALG:   linalg.fill(%[[alloc]], %[[cst]]) : memref<4x8xf32>, f32
   //      LINALG:   %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[c7]], %[[i]], %[[c4]])
   //      LINALG:   %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]])
-  //      LINALG:   %[[sv:.*]] = subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [1, 1]
+  //      LINALG:   %[[sv:.*]] = memref.subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [1, 1]
   // LINALG-SAME:     memref<7x8xf32, #[[$map_2d_stride_1]]> to memref<?x?xf32, #[[$map_2d_stride_1]]>
   //      LINALG:   linalg.copy(%[[sv]], %[[alloc]]) : memref<?x?xf32, #[[$map_2d_stride_1]]>, memref<4x8xf32>
-  //      LINALG:   %[[yielded:.*]] = memref_cast %[[alloc]] :
+  //      LINALG:   %[[yielded:.*]] = memref.cast %[[alloc]] :
   // LINALG-SAME:     memref<4x8xf32> to memref<?x8xf32, #[[$map_2d_stride_1]]>
   //      LINALG:   scf.yield %[[yielded]], %[[c0]], %[[c0]] :
   // LINALG-SAME:     memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index
diff --git a/mlir/test/Dialect/Vector/vector-transfer-lowering.mlir b/mlir/test/Dialect/Vector/vector-transfer-lowering.mlir
--- a/mlir/test/Dialect/Vector/vector-transfer-lowering.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-lowering.mlir
@@ -159,7 +159,7 @@
 // CHECK-LABEL:   func @transfer_broadcasting(
 // CHECK-SAME:                                %[[MEM:.*]]: memref<8x8xf32>,
 // CHECK-SAME:                                %[[IDX:.*]]: index) -> vector<4xf32> {
-// CHECK-NEXT:      %[[LOAD:.*]] = load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xf32>
+// CHECK-NEXT:      %[[LOAD:.*]] = memref.load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xf32>
 // CHECK-NEXT:      %[[RES:.*]] = vector.broadcast %[[LOAD]] : f32 to vector<4xf32>
 // CHECK-NEXT:      return %[[RES]] : vector<4xf32>
 // CHECK-NEXT:    }
@@ -177,7 +177,7 @@
 // CHECK-LABEL:   func @transfer_broadcasting_2D(
 // CHECK-SAME:                                %[[MEM:.*]]: memref<8x8xf32>,
 // CHECK-SAME:                                %[[IDX:.*]]: index) -> vector<4x4xf32> {
-// CHECK-NEXT:      %[[LOAD:.*]] = load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xf32>
+// CHECK-NEXT:      %[[LOAD:.*]] = memref.load %[[MEM]][%[[IDX]], %[[IDX]]] : memref<8x8xf32>
 // CHECK-NEXT:      %[[RES:.*]] = vector.broadcast %[[LOAD]] : f32 to vector<4x4xf32>
 // CHECK-NEXT:      return %[[RES]] : vector<4x4xf32>
 // CHECK-NEXT:    }
diff --git a/mlir/test/Dialect/Vector/vector-transforms.mlir b/mlir/test/Dialect/Vector/vector-transforms.mlir
--- a/mlir/test/Dialect/Vector/vector-transforms.mlir
+++ b/mlir/test/Dialect/Vector/vector-transforms.mlir
@@ -287,9 +287,9 @@
 
 func @vector_transfers(%arg0: index, %arg1: index) {
   %cst = constant 0.000000e+00 : f32
-  %0 = alloc(%arg0, %arg1) : memref<?x?xf32>
-  %1 = alloc(%arg0, %arg1) : memref<?x?xf32>
-  %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
+  %0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
+  %1 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
+  %2 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
   %cst_0 = constant 1.000000e+00 : f32
   %cst_1 = constant 2.000000e+00 : f32
   affine.for %arg2 = 0 to %arg0 step 4 {
@@ -434,7 +434,7 @@
   %cf0 = constant 0.000000e+00 : f32
   %vf0 = splat %cf0 : vector<2x4xf32>
 
-  %0 = alloc() : memref<6x2x1xvector<2x4xf32>>
+  %0 = memref.alloc() : memref<6x2x1xvector<2x4xf32>>
 
   %1 = vector.transfer_read %0[%c0, %c0, %c0], %vf0
       {permutation_map = affine_map<(d0, d1, d2) -> (d1, d2)>}
diff --git a/mlir/test/EDSC/CMakeLists.txt b/mlir/test/EDSC/CMakeLists.txt
--- a/mlir/test/EDSC/CMakeLists.txt
+++ b/mlir/test/EDSC/CMakeLists.txt
@@ -16,6 +16,7 @@
   MLIRIR
   MLIRLinalg
   MLIRLinalgEDSC
+  MLIRMemRef
   MLIRSCF
   MLIRStandard
   MLIRTransforms
diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp
--- a/mlir/test/EDSC/builder-api-test.cpp
+++ b/mlir/test/EDSC/builder-api-test.cpp
@@ -12,6 +12,7 @@
 #include "mlir/Dialect/Linalg/EDSC/Builders.h"
 #include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
 #include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
 #include "mlir/Dialect/SCF/EDSC/Intrinsics.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
 #include "mlir/Dialect/Vector/EDSC/Intrinsics.h"
@@ -44,6 +45,7 @@
                         scf::SCFDialect,
                         linalg::LinalgDialect,
                         math::MathDialect,
+                        memref::MemRefDialect,
                         StandardOpsDialect,
                         vector::VectorDialect>();
     // clang-format on
@@ -762,9 +764,9 @@
   // clang-format off
   // CHECK-LABEL: func @tile_2d
   //       CHECK: %[[ZERO:.*]] = constant 0 : index
-  //       CHECK: %[[M:[0-9]+]] = dim %arg2, %c0{{[_0-9]*}} : memref<?x?x?xf32>
-  //       CHECK: %[[N:[0-9]+]] = dim %arg2, %c1{{[_0-9]*}} : memref<?x?x?xf32>
-  //       CHECK: %[[P:[0-9]+]] = dim %arg2, %c2{{[_0-9]*}} : memref<?x?x?xf32>
+  //       CHECK: %[[M:[0-9]+]] = memref.dim %arg2, %c0{{[_0-9]*}} : memref<?x?x?xf32>
+  //       CHECK: %[[N:[0-9]+]] = memref.dim %arg2, %c1{{[_0-9]*}} : memref<?x?x?xf32>
+  //       CHECK: %[[P:[0-9]+]] = memref.dim %arg2, %c2{{[_0-9]*}} : memref<?x?x?xf32>
   //       CHECK:   affine.for %{{.*}} = affine_map<(d0) -> (d0)>(%[[ZERO]]) to affine_map<(d0) -> (d0)>(%[[M]]) step 512 {
   //  CHECK-NEXT:     affine.for %{{.*}} = affine_map<(d0) -> (d0)>(%[[ZERO]]) to affine_map<(d0) -> (d0)>(%[[N]]) step 1024 {
   //  CHECK-NEXT:       affine.for %{{.*}} = affine_map<(d0) -> (d0)>(%[[ZERO]]) to affine_map<(d0) -> (d0)>(%[[P]]) {
@@ -806,7 +808,7 @@
   Value zero = std_constant_index(0);
   MemRefBoundsCapture vC(f.getArgument(2));
   AffineIndexedValue B(f.getArgument(1)), D(f.getArgument(3));
-  StdIndexedValue A(f.getArgument(0)), C(f.getArgument(2));
+  MemRefIndexedValue A(f.getArgument(0)), C(f.getArgument(2));
   Value N(vC.ub(0));
 
   // clang-format off
diff --git a/mlir/test/Examples/Toy/Ch5/affine-lowering.mlir b/mlir/test/Examples/Toy/Ch5/affine-lowering.mlir
--- a/mlir/test/Examples/Toy/Ch5/affine-lowering.mlir
+++ b/mlir/test/Examples/Toy/Ch5/affine-lowering.mlir
@@ -16,9 +16,9 @@
 // CHECK:         [[VAL_3:%.*]] = constant 4.000000e+00 : f64
 // CHECK:         [[VAL_4:%.*]] = constant 5.000000e+00 : f64
 // CHECK:         [[VAL_5:%.*]] = constant 6.000000e+00 : f64
-// CHECK:         [[VAL_6:%.*]] = alloc() : memref<3x2xf64>
-// CHECK:         [[VAL_7:%.*]] = alloc() : memref<3x2xf64>
-// CHECK:         [[VAL_8:%.*]] = alloc() : memref<2x3xf64>
+// CHECK:         [[VAL_6:%.*]] = memref.alloc() : memref<3x2xf64>
+// CHECK:         [[VAL_7:%.*]] = memref.alloc() : memref<3x2xf64>
+// CHECK:         [[VAL_8:%.*]] = memref.alloc() : memref<2x3xf64>
 // CHECK:         affine.store [[VAL_0]], [[VAL_8]][0, 0] : memref<2x3xf64>
 // CHECK:         affine.store [[VAL_1]], [[VAL_8]][0, 1] : memref<2x3xf64>
 // CHECK:         affine.store [[VAL_2]], [[VAL_8]][0, 2] : memref<2x3xf64>
@@ -36,9 +36,9 @@
 // CHECK:             [[VAL_16:%.*]] = mulf [[VAL_14]], [[VAL_15]] : f64
 // CHECK:             affine.store [[VAL_16]], [[VAL_6]]{{\[}}[[VAL_12]], [[VAL_13]]] : memref<3x2xf64>
 // CHECK:         toy.print [[VAL_6]] : memref<3x2xf64>
-// CHECK:         dealloc [[VAL_8]] : memref<2x3xf64>
-// CHECK:         dealloc [[VAL_7]] : memref<3x2xf64>
-// CHECK:         dealloc [[VAL_6]] : memref<3x2xf64>
+// CHECK:         memref.dealloc [[VAL_8]] : memref<2x3xf64>
+// CHECK:         memref.dealloc [[VAL_7]] : memref<3x2xf64>
+// CHECK:         memref.dealloc [[VAL_6]] : memref<3x2xf64>
 
 // OPT-LABEL: func @main()
 // OPT:         [[VAL_0:%.*]] = constant 1.000000e+00 : f64
@@ -47,8 +47,8 @@
 // OPT:         [[VAL_3:%.*]] = constant 4.000000e+00 : f64
 // OPT:         [[VAL_4:%.*]] = constant 5.000000e+00 : f64
 // OPT:         [[VAL_5:%.*]] = constant 6.000000e+00 : f64
-// OPT:         [[VAL_6:%.*]] = alloc() : memref<3x2xf64>
-// OPT:         [[VAL_7:%.*]] = alloc() : memref<2x3xf64>
+// OPT:         [[VAL_6:%.*]] = memref.alloc() : memref<3x2xf64>
+// OPT:         [[VAL_7:%.*]] = memref.alloc() : memref<2x3xf64>
 // OPT:         affine.store [[VAL_0]], [[VAL_7]][0, 0] : memref<2x3xf64>
 // OPT:         affine.store [[VAL_1]], [[VAL_7]][0, 1] : memref<2x3xf64>
 // OPT:         affine.store [[VAL_2]], [[VAL_7]][0, 2] : memref<2x3xf64>
@@ -61,5 +61,5 @@
 // OPT:             [[VAL_11:%.*]] = mulf [[VAL_10]], [[VAL_10]] : f64
 // OPT:             affine.store [[VAL_11]], [[VAL_6]]{{\[}}[[VAL_8]], [[VAL_9]]] : memref<3x2xf64>
 // OPT:         toy.print [[VAL_6]] : memref<3x2xf64>
-// OPT:         dealloc [[VAL_7]] : memref<2x3xf64>
-// OPT:         dealloc [[VAL_6]] : memref<3x2xf64>
+// OPT:         memref.dealloc [[VAL_7]] : memref<2x3xf64>
+// OPT:         memref.dealloc [[VAL_6]] : memref<3x2xf64>
diff --git a/mlir/test/Examples/Toy/Ch6/affine-lowering.mlir b/mlir/test/Examples/Toy/Ch6/affine-lowering.mlir
--- a/mlir/test/Examples/Toy/Ch6/affine-lowering.mlir
+++ b/mlir/test/Examples/Toy/Ch6/affine-lowering.mlir
@@ -16,9 +16,9 @@
 // CHECK:         [[VAL_3:%.*]] = constant 4.000000e+00 : f64
 // CHECK:         [[VAL_4:%.*]] = constant 5.000000e+00 : f64
 // CHECK:         [[VAL_5:%.*]] = constant 6.000000e+00 : f64
-// CHECK:         [[VAL_6:%.*]] = alloc() : memref<3x2xf64>
-// CHECK:         [[VAL_7:%.*]] = alloc() : memref<3x2xf64>
-// CHECK:         [[VAL_8:%.*]] = alloc() : memref<2x3xf64>
+// CHECK:         [[VAL_6:%.*]] = memref.alloc() : memref<3x2xf64>
+// CHECK:         [[VAL_7:%.*]] = memref.alloc() : memref<3x2xf64>
+// CHECK:         [[VAL_8:%.*]] = memref.alloc() : memref<2x3xf64>
 // CHECK:         affine.store [[VAL_0]], [[VAL_8]][0, 0] : memref<2x3xf64>
 // CHECK:         affine.store [[VAL_1]], [[VAL_8]][0, 1] : memref<2x3xf64>
 // CHECK:         affine.store [[VAL_2]], [[VAL_8]][0, 2] : memref<2x3xf64>
@@ -36,9 +36,9 @@
 // CHECK:             [[VAL_16:%.*]] = mulf [[VAL_14]], [[VAL_15]] : f64
 // CHECK:             affine.store [[VAL_16]], [[VAL_6]]{{\[}}[[VAL_12]], [[VAL_13]]] : memref<3x2xf64>
 // CHECK:         toy.print [[VAL_6]] : memref<3x2xf64>
-// CHECK:         dealloc [[VAL_8]] : memref<2x3xf64>
-// CHECK:         dealloc [[VAL_7]] : memref<3x2xf64>
-// CHECK:         dealloc [[VAL_6]] : memref<3x2xf64>
+// CHECK:         memref.dealloc [[VAL_8]] : memref<2x3xf64>
+// CHECK:         memref.dealloc [[VAL_7]] : memref<3x2xf64>
+// CHECK:         memref.dealloc [[VAL_6]] : memref<3x2xf64>
 
 // OPT-LABEL: func @main()
 // OPT:         [[VAL_0:%.*]] = constant 1.000000e+00 : f64
@@ -47,8 +47,8 @@
 // OPT:         [[VAL_3:%.*]] = constant 4.000000e+00 : f64
 // OPT:         [[VAL_4:%.*]] = constant 5.000000e+00 : f64
 // OPT:         [[VAL_5:%.*]] = constant 6.000000e+00 : f64
-// OPT:         [[VAL_6:%.*]] = alloc() : memref<3x2xf64>
-// OPT:         [[VAL_7:%.*]] = alloc() : memref<2x3xf64>
+// OPT:         [[VAL_6:%.*]] = memref.alloc() : memref<3x2xf64>
+// OPT:         [[VAL_7:%.*]] = memref.alloc() : memref<2x3xf64>
 // OPT:         affine.store [[VAL_0]], [[VAL_7]][0, 0] : memref<2x3xf64>
 // OPT:         affine.store [[VAL_1]], [[VAL_7]][0, 1] : memref<2x3xf64>
 // OPT:         affine.store [[VAL_2]], [[VAL_7]][0, 2] : memref<2x3xf64>
@@ -61,5 +61,5 @@
 // OPT:             [[VAL_11:%.*]] = mulf [[VAL_10]], [[VAL_10]] : f64
 // OPT:             affine.store [[VAL_11]], [[VAL_6]]{{\[}}[[VAL_8]], [[VAL_9]]] : memref<3x2xf64>
 // OPT:         toy.print [[VAL_6]] : memref<3x2xf64>
-// OPT:         dealloc [[VAL_7]] : memref<2x3xf64>
-// OPT:         dealloc [[VAL_6]] : memref<3x2xf64>
+// OPT:         memref.dealloc [[VAL_7]] : memref<2x3xf64>
+// OPT:         memref.dealloc [[VAL_6]] : memref<3x2xf64>
diff --git a/mlir/test/Examples/Toy/Ch7/affine-lowering.mlir b/mlir/test/Examples/Toy/Ch7/affine-lowering.mlir
--- a/mlir/test/Examples/Toy/Ch7/affine-lowering.mlir
+++ b/mlir/test/Examples/Toy/Ch7/affine-lowering.mlir
@@ -16,9 +16,9 @@
 // CHECK:         [[VAL_3:%.*]] = constant 4.000000e+00 : f64
 // CHECK:         [[VAL_4:%.*]] = constant 5.000000e+00 : f64
 // CHECK:         [[VAL_5:%.*]] = constant 6.000000e+00 : f64
-// CHECK:         [[VAL_6:%.*]] = alloc() : memref<3x2xf64>
-// CHECK:         [[VAL_7:%.*]] = alloc() : memref<3x2xf64>
-// CHECK:         [[VAL_8:%.*]] = alloc() : memref<2x3xf64>
+// CHECK:         [[VAL_6:%.*]] = memref.alloc() : memref<3x2xf64>
+// CHECK:         [[VAL_7:%.*]] = memref.alloc() : memref<3x2xf64>
+// CHECK:         [[VAL_8:%.*]] = memref.alloc() : memref<2x3xf64>
 // CHECK:         affine.store [[VAL_0]], [[VAL_8]][0, 0] : memref<2x3xf64>
 // CHECK:         affine.store [[VAL_1]], [[VAL_8]][0, 1] : memref<2x3xf64>
 // CHECK:         affine.store [[VAL_2]], [[VAL_8]][0, 2] : memref<2x3xf64>
@@ -36,9 +36,9 @@
 // CHECK:             [[VAL_16:%.*]] = mulf [[VAL_14]], [[VAL_15]] : f64
 // CHECK:             affine.store [[VAL_16]], [[VAL_6]]{{\[}}[[VAL_12]], [[VAL_13]]] : memref<3x2xf64>
 // CHECK:         toy.print [[VAL_6]] : memref<3x2xf64>
-// CHECK:         dealloc [[VAL_8]] : memref<2x3xf64>
-// CHECK:         dealloc [[VAL_7]] : memref<3x2xf64>
-// CHECK:         dealloc [[VAL_6]] : memref<3x2xf64>
+// CHECK:         memref.dealloc [[VAL_8]] : memref<2x3xf64>
+// CHECK:         memref.dealloc [[VAL_7]] : memref<3x2xf64>
+// CHECK:         memref.dealloc [[VAL_6]] : memref<3x2xf64>
 
 // OPT-LABEL: func @main()
 // OPT:         [[VAL_0:%.*]] = constant 1.000000e+00 : f64
@@ -47,8 +47,8 @@
 // OPT:         [[VAL_3:%.*]] = constant 4.000000e+00 : f64
 // OPT:         [[VAL_4:%.*]] = constant 5.000000e+00 : f64
 // OPT:         [[VAL_5:%.*]] = constant 6.000000e+00 : f64
-// OPT:         [[VAL_6:%.*]] = alloc() : memref<3x2xf64>
-// OPT:         [[VAL_7:%.*]] = alloc() : memref<2x3xf64>
+// OPT:         [[VAL_6:%.*]] = memref.alloc() : memref<3x2xf64>
+// OPT:         [[VAL_7:%.*]] = memref.alloc() : memref<2x3xf64>
 // OPT:         affine.store [[VAL_0]], [[VAL_7]][0, 0] : memref<2x3xf64>
 // OPT:         affine.store [[VAL_1]], [[VAL_7]][0, 1] : memref<2x3xf64>
 // OPT:         affine.store [[VAL_2]], [[VAL_7]][0, 2] : memref<2x3xf64>
@@ -61,5 +61,5 @@
 // OPT:             [[VAL_11:%.*]] = mulf [[VAL_10]], [[VAL_10]] : f64
 // OPT:             affine.store [[VAL_11]], [[VAL_6]]{{\[}}[[VAL_8]], [[VAL_9]]] : memref<3x2xf64>
 // OPT:         toy.print [[VAL_6]] : memref<3x2xf64>
-// OPT:         dealloc [[VAL_7]] : memref<2x3xf64>
-// OPT:         dealloc [[VAL_6]] : memref<3x2xf64>
+// OPT:         memref.dealloc [[VAL_7]] : memref<2x3xf64>
+// OPT:         memref.dealloc [[VAL_6]] : memref<3x2xf64>
diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir
--- a/mlir/test/IR/core-ops.mlir
+++ b/mlir/test/IR/core-ops.mlir
@@ -35,9 +35,9 @@
   %t = "getTensor"() : () -> tensor<4x4x?xf32>
 
   // CHECK: %[[C2:.*]] = constant 2 : index
-  // CHECK-NEXT: %{{.*}} = dim %[[T]], %[[C2]] : tensor<4x4x?xf32>
+  // CHECK-NEXT: %{{.*}} = memref.dim %[[T]], %[[C2]] : tensor<4x4x?xf32>
   %c2 = constant 2 : index
-  %t2 = "std.dim"(%t, %c2) : (tensor<4x4x?xf32>, index) -> index
+  %t2 = "memref.dim"(%t, %c2) : (tensor<4x4x?xf32>, index) -> index
 
   // CHECK: %{{.*}} = addf %[[ARG]], %[[ARG]] : f32
   %x = "std.addf"(%a, %a) : (f32,f32) -> (f32)
@@ -50,9 +50,9 @@
 func @standard_instrs(tensor<4x4x?xf32>, f32, i32, index, i64, f16) {
 ^bb42(%t: tensor<4x4x?xf32>, %f: f32, %i: i32, %idx : index, %j: i64, %half: f16):
   // CHECK: %[[C2:.*]] = constant 2 : index
-  // CHECK: %[[A2:.*]] = dim %arg0, %[[C2]] : tensor<4x4x?xf32>
+  // CHECK: %[[A2:.*]] = memref.dim %arg0, %[[C2]] : tensor<4x4x?xf32>
   %c2 = constant 2 : index
-  %a2 = dim %t, %c2 : tensor<4x4x?xf32>
+  %a2 = memref.dim %t, %c2 : tensor<4x4x?xf32>
 
   // CHECK: %[[F2:.*]] = addf %arg1, %arg1 : f32
   %f2 = "std.addf"(%f, %f) : (f32,f32) -> f32
@@ -540,17 +540,17 @@
 // CHECK-LABEL: func @load_store_prefetch
 func @load_store_prefetch(memref<4x4xi32>, index) {
 ^bb0(%0: memref<4x4xi32>, %1: index):
-  // CHECK: %0 = load %arg0[%arg1, %arg1] : memref<4x4xi32>
-  %2 = "std.load"(%0, %1, %1) : (memref<4x4xi32>, index, index)->i32
+  // CHECK: %0 = memref.load %arg0[%arg1, %arg1] : memref<4x4xi32>
+  %2 = "memref.load"(%0, %1, %1) : (memref<4x4xi32>, index, index)->i32
 
-  // CHECK: %{{.*}} = load %arg0[%arg1, %arg1] : memref<4x4xi32>
-  %3 = load %0[%1, %1] : memref<4x4xi32>
+  // CHECK: %{{.*}} = memref.load %arg0[%arg1, %arg1] : memref<4x4xi32>
+  %3 = memref.load %0[%1, %1] : memref<4x4xi32>
 
-  // CHECK: prefetch %arg0[%arg1, %arg1], write, locality<1>, data : memref<4x4xi32>
-  prefetch %0[%1, %1], write, locality<1>, data : memref<4x4xi32>
+  // CHECK: memref.prefetch %arg0[%arg1, %arg1], write, locality<1>, data : memref<4x4xi32>
+  memref.prefetch %0[%1, %1], write, locality<1>, data : memref<4x4xi32>
 
-  // CHECK: prefetch %arg0[%arg1, %arg1], read, locality<3>, instr : memref<4x4xi32>
-  prefetch %0[%1, %1], read, locality<3>, instr : memref<4x4xi32>
+  // CHECK: memref.prefetch %arg0[%arg1, %arg1], read, locality<3>, instr : memref<4x4xi32>
+  memref.prefetch %0[%1, %1], read, locality<3>, instr : memref<4x4xi32>
 
   return
 }
@@ -558,11 +558,11 @@
 // Test with zero-dimensional operands using no index in load/store.
 // CHECK-LABEL: func @zero_dim_no_idx
 func @zero_dim_no_idx(%arg0 : memref<i32>, %arg1 : memref<i32>, %arg2 : memref<i32>) {
-  %0 = std.load %arg0[] : memref<i32>
-  std.store %0, %arg1[] : memref<i32>
+  %0 = memref.load %arg0[] : memref<i32>
+  memref.store %0, %arg1[] : memref<i32>
   return
-  // CHECK: %0 = load %{{.*}}[] : memref<i32>
-  // CHECK: store %{{.*}}, %{{.*}}[] : memref<i32>
+  // CHECK: %0 = memref.load %{{.*}}[] : memref<i32>
+  // CHECK: memref.store %{{.*}}, %{{.*}}[] : memref<i32>
 }
 
 // CHECK-LABEL: func @return_op(%arg0: i32) -> i32 {
@@ -600,23 +600,23 @@
 
 // CHECK-LABEL: func @memref_cast(%arg0
 func @memref_cast(%arg0: memref<4xf32>, %arg1 : memref<?xf32>, %arg2 : memref<64x16x4xf32, offset: 0, strides: [64, 4, 1]>) {
-  // CHECK: %0 = memref_cast %arg0 : memref<4xf32> to memref<?xf32>
-  %0 = memref_cast %arg0 : memref<4xf32> to memref<?xf32>
+  // CHECK: %0 = memref.cast %arg0 : memref<4xf32> to memref<?xf32>
+  %0 = memref.cast %arg0 : memref<4xf32> to memref<?xf32>
 
-  // CHECK: %1 = memref_cast %arg1 : memref<?xf32> to memref<4xf32>
-  %1 = memref_cast %arg1 : memref<?xf32> to memref<4xf32>
+  // CHECK: %1 = memref.cast %arg1 : memref<?xf32> to memref<4xf32>
+  %1 = memref.cast %arg1 : memref<?xf32> to memref<4xf32>
 
-  // CHECK: {{%.*}} = memref_cast %arg2 : memref<64x16x4xf32, #[[$BASE_MAP0]]> to memref<64x16x4xf32, #[[$BASE_MAP3]]>
-  %2 = memref_cast %arg2 : memref<64x16x4xf32, offset: 0, strides: [64, 4, 1]> to memref<64x16x4xf32, offset: ?, strides: [?, ?, ?]>
+  // CHECK: {{%.*}} = memref.cast %arg2 : memref<64x16x4xf32, #[[$BASE_MAP0]]> to memref<64x16x4xf32, #[[$BASE_MAP3]]>
+  %2 = memref.cast %arg2 : memref<64x16x4xf32, offset: 0, strides: [64, 4, 1]> to memref<64x16x4xf32, offset: ?, strides: [?, ?, ?]>
 
-  // CHECK: {{%.*}} = memref_cast {{%.*}} : memref<64x16x4xf32, #[[$BASE_MAP3]]> to memref<64x16x4xf32, #[[$BASE_MAP0]]>
-  %3 = memref_cast %2 : memref<64x16x4xf32, offset: ?, strides: [?, ?, ?]> to memref<64x16x4xf32, offset: 0, strides: [64, 4, 1]>
+  // CHECK: {{%.*}} = memref.cast {{%.*}} : memref<64x16x4xf32, #[[$BASE_MAP3]]> to memref<64x16x4xf32, #[[$BASE_MAP0]]>
+  %3 = memref.cast %2 : memref<64x16x4xf32, offset: ?, strides: [?, ?, ?]> to memref<64x16x4xf32, offset: 0, strides: [64, 4, 1]>
 
-  // CHECK: memref_cast %{{.*}} : memref<4xf32> to memref<*xf32>
-  %4 = memref_cast %1 : memref<4xf32> to memref<*xf32>
+  // CHECK: memref.cast %{{.*}} : memref<4xf32> to memref<*xf32>
+  %4 = memref.cast %1 : memref<4xf32> to memref<*xf32>
 
-  // CHECK: memref_cast %{{.*}} : memref<*xf32> to memref<4xf32>
-  %5 = memref_cast %4 : memref<*xf32> to memref<4xf32>
+  // CHECK: memref.cast %{{.*}} : memref<*xf32> to memref<4xf32>
+  %5 = memref.cast %4 : memref<*xf32> to memref<4xf32>
   return
 }
 
@@ -627,19 +627,19 @@
 
 // CHECK-LABEL: func @memref_view(%arg0
 func @memref_view(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<2048xi8>
+  %0 = memref.alloc() : memref<2048xi8>
   // Test two dynamic sizes and dynamic offset.
-  // CHECK: %{{.*}} = std.view %0[%arg2][%arg0, %arg1] : memref<2048xi8> to memref<?x?xf32>
-  %1 = view %0[%arg2][%arg0, %arg1] : memref<2048xi8> to memref<?x?xf32>
+  // CHECK: %{{.*}} = memref.view %0[%arg2][%arg0, %arg1] : memref<2048xi8> to memref<?x?xf32>
+  %1 = memref.view %0[%arg2][%arg0, %arg1] : memref<2048xi8> to memref<?x?xf32>
 
   // Test one dynamic size and dynamic offset.
-  // CHECK: %{{.*}} = std.view %0[%arg2][%arg1] : memref<2048xi8> to memref<4x?xf32>
-  %3 = view %0[%arg2][%arg1] : memref<2048xi8> to memref<4x?xf32>
+  // CHECK: %{{.*}} = memref.view %0[%arg2][%arg1] : memref<2048xi8> to memref<4x?xf32>
+  %3 = memref.view %0[%arg2][%arg1] : memref<2048xi8> to memref<4x?xf32>
 
   // Test static sizes and static offset.
-  // CHECK: %{{.*}} = std.view %0[{{.*}}][] : memref<2048xi8> to memref<64x4xf32>
+  // CHECK: %{{.*}} = memref.view %0[{{.*}}][] : memref<2048xi8> to memref<64x4xf32>
   %c0 = constant 0: index
-  %5 = view %0[%c0][] : memref<2048xi8> to memref<64x4xf32>
+  %5 = memref.view %0[%c0][] : memref<2048xi8> to memref<64x4xf32>
   return
 }
 
@@ -648,107 +648,107 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
 
-  %0 = alloc() : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>>
+  %0 = memref.alloc() : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>>
   // CHECK: subview %0[%c0, %c0, %c0] [%arg0, %arg1, %arg2] [%c1, %c1, %c1] :
   // CHECK-SAME: memref<8x16x4xf32, #[[$BASE_MAP0]]>
   // CHECK-SAME: to memref<?x?x?xf32, #[[$BASE_MAP3]]>
-  %1 = subview %0[%c0, %c0, %c0][%arg0, %arg1, %arg2][%c1, %c1, %c1]
+  %1 = memref.subview %0[%c0, %c0, %c0][%arg0, %arg1, %arg2][%c1, %c1, %c1]
     : memref<8x16x4xf32, offset:0, strides: [64, 4, 1]> to
       memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
 
-  %2 = alloc()[%arg2] : memref<64xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
-  // CHECK: subview %2[%c1] [%arg0] [%c1] :
+  %2 = memref.alloc()[%arg2] : memref<64xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
+  // CHECK: memref.subview %2[%c1] [%arg0] [%c1] :
   // CHECK-SAME: memref<64xf32, #[[$BASE_MAP1]]>
   // CHECK-SAME: to memref<?xf32, #[[$SUBVIEW_MAP1]]>
-  %3 = subview %2[%c1][%arg0][%c1]
+  %3 = memref.subview %2[%c1][%arg0][%c1]
     : memref<64xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to
       memref<?xf32, affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>>
 
-  %4 = alloc() : memref<64x22xf32, affine_map<(d0, d1) -> (d0 * 22 + d1)>>
-  // CHECK: subview %4[%c0, %c1] [%arg0, %arg1] [%c1, %c0] :
+  %4 = memref.alloc() : memref<64x22xf32, affine_map<(d0, d1) -> (d0 * 22 + d1)>>
+  // CHECK: memref.subview %4[%c0, %c1] [%arg0, %arg1] [%c1, %c0] :
   // CHECK-SAME: memref<64x22xf32, #[[$BASE_MAP2]]>
   // CHECK-SAME: to memref<?x?xf32, #[[$SUBVIEW_MAP2]]>
-  %5 = subview %4[%c0, %c1][%arg0, %arg1][%c1, %c0]
+  %5 = memref.subview %4[%c0, %c1][%arg0, %arg1][%c1, %c0]
     : memref<64x22xf32, offset:0, strides: [22, 1]> to
       memref<?x?xf32, offset:?, strides: [?, ?]>
 
-  // CHECK: subview %0[0, 2, 0] [4, 4, 4] [1, 1, 1] :
+  // CHECK: memref.subview %0[0, 2, 0] [4, 4, 4] [1, 1, 1] :
   // CHECK-SAME: memref<8x16x4xf32, #[[$BASE_MAP0]]>
   // CHECK-SAME: to memref<4x4x4xf32, #[[$SUBVIEW_MAP3]]>
-  %6 = subview %0[0, 2, 0][4, 4, 4][1, 1, 1]
+  %6 = memref.subview %0[0, 2, 0][4, 4, 4][1, 1, 1]
     : memref<8x16x4xf32, offset:0, strides: [64, 4, 1]> to
       memref<4x4x4xf32, offset:8, strides: [64, 4, 1]>
 
-  %7 = alloc(%arg1, %arg2) : memref<?x?xf32>
-  // CHECK: subview {{%.*}}[0, 0] [4, 4] [1, 1] :
+  %7 = memref.alloc(%arg1, %arg2) : memref<?x?xf32>
+  // CHECK: memref.subview {{%.*}}[0, 0] [4, 4] [1, 1] :
   // CHECK-SAME: memref<?x?xf32>
   // CHECK-SAME: to memref<4x4xf32, #[[$SUBVIEW_MAP4]]>
-  %8 = subview %7[0, 0][4, 4][1, 1]
+  %8 = memref.subview %7[0, 0][4, 4][1, 1]
     : memref<?x?xf32> to memref<4x4xf32, offset: ?, strides:[?, 1]>
 
-  %9 = alloc() : memref<16x4xf32>
-  // CHECK: subview {{%.*}}[{{%.*}}, {{%.*}}] [4, 4] [{{%.*}}, {{%.*}}] :
+  %9 = memref.alloc() : memref<16x4xf32>
+  // CHECK: memref.subview {{%.*}}[{{%.*}}, {{%.*}}] [4, 4] [{{%.*}}, {{%.*}}] :
   // CHECK-SAME: memref<16x4xf32>
   // CHECK-SAME: to memref<4x4xf32, #[[$SUBVIEW_MAP2]]
-  %10 = subview %9[%arg1, %arg1][4, 4][%arg2, %arg2]
+  %10 = memref.subview %9[%arg1, %arg1][4, 4][%arg2, %arg2]
     : memref<16x4xf32> to memref<4x4xf32, offset: ?, strides:[?, ?]>
 
-  // CHECK: subview {{%.*}}[{{%.*}}, {{%.*}}] [4, 4] [2, 2] :
+  // CHECK: memref.subview {{%.*}}[{{%.*}}, {{%.*}}] [4, 4] [2, 2] :
   // CHECK-SAME: memref<16x4xf32>
   // CHECK-SAME: to memref<4x4xf32, #[[$SUBVIEW_MAP5]]
-  %11 = subview %9[%arg1, %arg2][4, 4][2, 2]
+  %11 = memref.subview %9[%arg1, %arg2][4, 4][2, 2]
     : memref<16x4xf32> to memref<4x4xf32, offset: ?, strides:[8, 2]>
 
-  %12 = alloc() : memref<1x9x1x4x1xf32, affine_map<(d0, d1, d2, d3, d4) -> (36 * d0 + 36 * d1 + 4 * d2 + 4 * d3 + d4)>>
-  // CHECK: subview %12[%arg1, %arg1, %arg1, %arg1, %arg1]
+  %12 = memref.alloc() : memref<1x9x1x4x1xf32, affine_map<(d0, d1, d2, d3, d4) -> (36 * d0 + 36 * d1 + 4 * d2 + 4 * d3 + d4)>>
+  // CHECK: memref.subview %12[%arg1, %arg1, %arg1, %arg1, %arg1]
   // CHECK-SAME: [1, 9, 1, 4, 1] [%arg2, %arg2, %arg2, %arg2, %arg2] :
   // CHECK-SAME: memref<1x9x1x4x1xf32, #[[$SUBVIEW_MAP6]]> to memref<9x4xf32, #[[$SUBVIEW_MAP2]]>
-  %13 = subview %12[%arg1, %arg1, %arg1, %arg1, %arg1][1, 9, 1, 4, 1][%arg2, %arg2, %arg2, %arg2, %arg2] : memref<1x9x1x4x1xf32, offset: 0, strides: [36, 36, 4, 4, 1]> to memref<9x4xf32, offset: ?, strides: [?, ?]>
-  // CHECK: subview %12[%arg1, %arg1, %arg1, %arg1, %arg1]
+  %13 = memref.subview %12[%arg1, %arg1, %arg1, %arg1, %arg1][1, 9, 1, 4, 1][%arg2, %arg2, %arg2, %arg2, %arg2] : memref<1x9x1x4x1xf32, offset: 0, strides: [36, 36, 4, 4, 1]> to memref<9x4xf32, offset: ?, strides: [?, ?]>
+  // CHECK: memref.subview %12[%arg1, %arg1, %arg1, %arg1, %arg1]
   // CHECK-SAME: [1, 9, 1, 4, 1] [%arg2, %arg2, %arg2, %arg2, %arg2] :
   // CHECK-SAME: memref<1x9x1x4x1xf32, #[[$SUBVIEW_MAP6]]> to memref<1x9x4xf32, #[[$BASE_MAP3]]>
-  %14 = subview %12[%arg1, %arg1, %arg1, %arg1, %arg1][1, 9, 1, 4, 1][%arg2, %arg2, %arg2, %arg2, %arg2] : memref<1x9x1x4x1xf32, offset: 0, strides: [36, 36, 4, 4, 1]> to memref<1x9x4xf32, offset: ?, strides: [?, ?, ?]>
+  %14 = memref.subview %12[%arg1, %arg1, %arg1, %arg1, %arg1][1, 9, 1, 4, 1][%arg2, %arg2, %arg2, %arg2, %arg2] : memref<1x9x1x4x1xf32, offset: 0, strides: [36, 36, 4, 4, 1]> to memref<1x9x4xf32, offset: ?, strides: [?, ?, ?]>
 
-  %15 = alloc(%arg1, %arg2)[%c0, %c1, %arg1, %arg0, %arg0, %arg2, %arg2] : memref<1x?x5x1x?x1xf32, affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6] -> (s0 + s1 * d0 + s2 * d1 + s3 * d2 + s4 * d3 + s5 * d4 + s6 * d5)>>
-  // CHECK: subview %15[0, 0, 0, 0, 0, 0] [1, %arg1, 5, 1, %arg2, 1] [1, 1, 1, 1, 1, 1]  :
+  %15 = memref.alloc(%arg1, %arg2)[%c0, %c1, %arg1, %arg0, %arg0, %arg2, %arg2] : memref<1x?x5x1x?x1xf32, affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6] -> (s0 + s1 * d0 + s2 * d1 + s3 * d2 + s4 * d3 + s5 * d4 + s6 * d5)>>
+  // CHECK: memref.subview %15[0, 0, 0, 0, 0, 0] [1, %arg1, 5, 1, %arg2, 1] [1, 1, 1, 1, 1, 1]  :
   // CHECK-SAME: memref<1x?x5x1x?x1xf32,  #[[$SUBVIEW_MAP7]]> to memref<?x5x?xf32, #[[$BASE_MAP3]]>
-  %16 = subview %15[0, 0, 0, 0, 0, 0][1, %arg1, 5, 1, %arg2, 1][1, 1, 1, 1, 1, 1] : memref<1x?x5x1x?x1xf32, offset: ?, strides: [?, ?, ?, ?, ?, ?]> to memref<?x5x?xf32, offset: ?, strides: [?, ?, ?]>
-  // CHECK: subview %15[%arg1, %arg1, %arg1, %arg1, %arg1, %arg1] [1, %arg1, 5, 1, %arg2, 1] [1, 1, 1, 1, 1, 1]  :
+  %16 = memref.subview %15[0, 0, 0, 0, 0, 0][1, %arg1, 5, 1, %arg2, 1][1, 1, 1, 1, 1, 1] : memref<1x?x5x1x?x1xf32, offset: ?, strides: [?, ?, ?, ?, ?, ?]> to memref<?x5x?xf32, offset: ?, strides: [?, ?, ?]>
+  // CHECK: memref.subview %15[%arg1, %arg1, %arg1, %arg1, %arg1, %arg1] [1, %arg1, 5, 1, %arg2, 1] [1, 1, 1, 1, 1, 1]  :
   // CHECK-SAME: memref<1x?x5x1x?x1xf32, #[[$SUBVIEW_MAP7]]> to memref<?x5x?x1xf32, #[[$SUBVIEW_MAP8]]>
-  %17 = subview %15[%arg1, %arg1, %arg1, %arg1, %arg1, %arg1][1, %arg1, 5, 1, %arg2, 1][1, 1, 1, 1, 1, 1] :  memref<1x?x5x1x?x1xf32, offset: ?, strides: [?, ?, ?, ?, ?, ?]> to memref<?x5x?x1xf32, offset: ?, strides: [?, ?, ?, ?]>
+  %17 = memref.subview %15[%arg1, %arg1, %arg1, %arg1, %arg1, %arg1][1, %arg1, 5, 1, %arg2, 1][1, 1, 1, 1, 1, 1] :  memref<1x?x5x1x?x1xf32, offset: ?, strides: [?, ?, ?, ?, ?, ?]> to memref<?x5x?x1xf32, offset: ?, strides: [?, ?, ?, ?]>
 
-  %18 = alloc() : memref<1x8xf32>
-  // CHECK: subview %18[0, 0] [1, 8] [1, 1]  : memref<1x8xf32> to memref<8xf32>
-  %19 = subview %18[0, 0][1, 8][1, 1] : memref<1x8xf32> to memref<8xf32>
+  %18 = memref.alloc() : memref<1x8xf32>
+  // CHECK: memref.subview %18[0, 0] [1, 8] [1, 1]  : memref<1x8xf32> to memref<8xf32>
+  %19 = memref.subview %18[0, 0][1, 8][1, 1] : memref<1x8xf32> to memref<8xf32>
 
-  %20 = alloc() : memref<8x16x4xf32>
-  // CHECK: subview %20[0, 0, 0] [1, 16, 4] [1, 1, 1]  : memref<8x16x4xf32> to memref<16x4xf32>
-  %21 = subview %20[0, 0, 0][1, 16, 4][1, 1, 1] : memref<8x16x4xf32> to memref<16x4xf32>
+  %20 = memref.alloc() : memref<8x16x4xf32>
+  // CHECK: memref.subview %20[0, 0, 0] [1, 16, 4] [1, 1, 1]  : memref<8x16x4xf32> to memref<16x4xf32>
+  %21 = memref.subview %20[0, 0, 0][1, 16, 4][1, 1, 1] : memref<8x16x4xf32> to memref<16x4xf32>
 
-  %22 = subview %20[3, 4, 2][1, 6, 3][1, 1, 1] : memref<8x16x4xf32> to memref<6x3xf32, offset: 210, strides: [4, 1]>
+  %22 = memref.subview %20[3, 4, 2][1, 6, 3][1, 1, 1] : memref<8x16x4xf32> to memref<6x3xf32, offset: 210, strides: [4, 1]>
 
-  %23 = alloc() : memref<f32>
-  %78 = subview %23[] [] []  : memref<f32> to memref<f32>
+  %23 = memref.alloc() : memref<f32>
+  %78 = memref.subview %23[] [] []  : memref<f32> to memref<f32>
 
   /// Subview with only leading operands.
-  %24 = alloc() : memref<5x3xf32>
-  // CHECK: subview %{{.*}}[2] [3] [1] : memref<5x3xf32> to memref<3x3xf32, #[[$SUBVIEW_MAP9]]>
-  %25 = subview %24[2][3][1]: memref<5x3xf32> to memref<3x3xf32, offset: 6, strides: [3, 1]>
+  %24 = memref.alloc() : memref<5x3xf32>
+  // CHECK: memref.subview %{{.*}}[2] [3] [1] : memref<5x3xf32> to memref<3x3xf32, #[[$SUBVIEW_MAP9]]>
+  %25 = memref.subview %24[2][3][1]: memref<5x3xf32> to memref<3x3xf32, offset: 6, strides: [3, 1]>
 
   /// Rank-reducing subview with only leading operands.
-  // CHECK: subview %{{.*}}[1] [1] [1] : memref<5x3xf32> to memref<3xf32, #[[$SUBVIEW_MAP10]]>
-  %26 = subview %24[1][1][1]: memref<5x3xf32> to memref<3xf32, offset: 3, strides: [1]>
+  // CHECK: memref.subview %{{.*}}[1] [1] [1] : memref<5x3xf32> to memref<3xf32, #[[$SUBVIEW_MAP10]]>
+  %26 = memref.subview %24[1][1][1]: memref<5x3xf32> to memref<3xf32, offset: 3, strides: [1]>
 
   // Corner-case of 0-D rank-reducing subview with an offset.
-  // CHECK: subview %{{.*}}[1, 1] [1, 1] [1, 1] : memref<5x3xf32> to memref<f32, #[[$SUBVIEW_MAP11]]>
-  %27 = subview %24[1, 1] [1, 1] [1, 1] : memref<5x3xf32> to memref<f32, affine_map<() -> (4)>>
+  // CHECK: memref.subview %{{.*}}[1, 1] [1, 1] [1, 1] : memref<5x3xf32> to memref<f32, #[[$SUBVIEW_MAP11]]>
+  %27 = memref.subview %24[1, 1] [1, 1] [1, 1] : memref<5x3xf32> to memref<f32, affine_map<() -> (4)>>
 
-  // CHECK: subview %{{.*}}[%{{.*}}, 1] [1, 1] [1, 1] : memref<5x3xf32> to memref<f32, #[[$SUBVIEW_MAP12]]>
-  %28 = subview %24[%arg0, 1] [1, 1] [1, 1] : memref<5x3xf32> to memref<f32, affine_map<()[s0] -> (s0)>>
+  // CHECK: memref.subview %{{.*}}[%{{.*}}, 1] [1, 1] [1, 1] : memref<5x3xf32> to memref<f32, #[[$SUBVIEW_MAP12]]>
+  %28 = memref.subview %24[%arg0, 1] [1, 1] [1, 1] : memref<5x3xf32> to memref<f32, affine_map<()[s0] -> (s0)>>
 
-  // CHECK: subview %{{.*}}[0, %{{.*}}] [%{{.*}}, 1] [1, 1] : memref<?x?xf32> to memref<?xf32, #[[$SUBVIEW_MAP1]]>
-  %a30 = alloc(%arg0, %arg0) : memref<?x?xf32>
-  %30 = subview %a30[0, %arg1][%arg2, 1][1, 1] : memref<?x?xf32> to memref<?xf32, affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>>
+  // CHECK: memref.subview %{{.*}}[0, %{{.*}}] [%{{.*}}, 1] [1, 1] : memref<?x?xf32> to memref<?xf32, #[[$SUBVIEW_MAP1]]>
+  %a30 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
+  %30 = memref.subview %a30[0, %arg1][%arg2, 1][1, 1] : memref<?x?xf32> to memref<?xf32, affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>>
 
   return
 }
@@ -757,9 +757,9 @@
 // CHECK-SAME: %[[ARG:.*]]: tensor<4x4x?xf32>
 func @test_dimop(%arg0: tensor<4x4x?xf32>) {
   // CHECK: %[[C2:.*]] = constant 2 : index
-  // CHECK: %{{.*}} = dim %[[ARG]], %[[C2]] : tensor<4x4x?xf32>
+  // CHECK: %{{.*}} = memref.dim %[[ARG]], %[[C2]] : tensor<4x4x?xf32>
   %c2 = constant 2 : index
-  %0 = dim %arg0, %c2 : tensor<4x4x?xf32>
+  %0 = memref.dim %arg0, %c2 : tensor<4x4x?xf32>
   // use dim as an index to ensure type correctness
   %1 = affine.apply affine_map<(d0) -> (d0)>(%0)
   return
@@ -779,19 +779,19 @@
 
 // CHECK-LABEL: func @tensor_load_store
 func @tensor_load_store(%0 : memref<4x4xi32>) {
-  // CHECK: %[[TENSOR:.*]] = tensor_load %[[MEMREF:.*]] : memref<4x4xi32>
-  %1 = tensor_load %0 : memref<4x4xi32>
-  // CHECK: tensor_store %[[TENSOR]], %[[MEMREF]] : memref<4x4xi32>
-  tensor_store %1, %0 : memref<4x4xi32>
+  // CHECK: %[[TENSOR:.*]] = memref.tensor_load %[[MEMREF:.*]] : memref<4x4xi32>
+  %1 = memref.tensor_load %0 : memref<4x4xi32>
+  // CHECK: memref.tensor_store %[[TENSOR]], %[[MEMREF]] : memref<4x4xi32>
+  memref.tensor_store %1, %0 : memref<4x4xi32>
   return
 }
 
 // CHECK-LABEL: func @unranked_tensor_load_store
 func @unranked_tensor_load_store(%0 : memref<*xi32>) {
-  // CHECK: %[[TENSOR:.*]] = tensor_load %[[MEMREF:.*]] : memref<*xi32>
-  %1 = tensor_load %0 : memref<*xi32>
-  // CHECK: tensor_store %[[TENSOR]], %[[MEMREF]] : memref<*xi32>
-  tensor_store %1, %0 : memref<*xi32>
+  // CHECK: %[[TENSOR:.*]] = memref.tensor_load %[[MEMREF:.*]] : memref<*xi32>
+  %1 = memref.tensor_load %0 : memref<*xi32>
+  // CHECK: memref.tensor_store %[[TENSOR]], %[[MEMREF]] : memref<*xi32>
+  memref.tensor_store %1, %0 : memref<*xi32>
   return
 }
 
@@ -820,8 +820,8 @@
 // CHECK-LABEL: func @assume_alignment
 // CHECK-SAME: %[[MEMREF:.*]]: memref<4x4xf16>
 func @assume_alignment(%0: memref<4x4xf16>) {
-  // CHECK: assume_alignment %[[MEMREF]], 16 : memref<4x4xf16>
-  assume_alignment %0, 16 : memref<4x4xf16>
+  // CHECK: memref.assume_alignment %[[MEMREF]], 16 : memref<4x4xf16>
+  memref.assume_alignment %0, 16 : memref<4x4xf16>
   return
 }
 
diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir
--- a/mlir/test/IR/invalid-ops.mlir
+++ b/mlir/test/IR/invalid-ops.mlir
@@ -2,7 +2,7 @@
 
 func @dim(%arg : tensor<1x?xf32>) {
   %c2 = constant 2 : index
-  dim %arg, %c2 : tensor<1x?xf32> // expected-error {{'std.dim' op index is out of range}}
+  memref.dim %arg, %c2 : tensor<1x?xf32> // expected-error {{'memref.dim' op index is out of range}}
   return
 }
 
@@ -10,7 +10,8 @@
 
 func @rank(f32) {
 ^bb(%0: f32):
-  "std.rank"(%0): (f32)->index // expected-error {{'std.rank' op operand #0 must be any tensor or memref type}}
+  "std.rank"(%0): (f32)->index // expected-error {{'std.rank' op operand #0 must be any memref or tensor type}}
+
   return
 }
 
@@ -88,7 +89,7 @@
   %0 = constant 7 : index
   // Test alloc with wrong number of dynamic dimensions.
   // expected-error@+1 {{dimension operand count does not equal memref dynamic dimension count}}
-  %1 = alloc(%0)[%0] : memref<2x4xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1>
+  %1 = memref.alloc(%0)[%0] : memref<2x4xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1>
   return
 }
 
@@ -99,7 +100,7 @@
   %0 = constant 7 : index
   // Test alloc with wrong number of symbols
   // expected-error@+1 {{symbol operand count does not equal memref symbol count}}
-  %1 = alloc(%0) : memref<2x?xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1>
+  %1 = memref.alloc(%0) : memref<2x?xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1>
   return
 }
 
@@ -107,19 +108,19 @@
 
 func @test_store_zero_results() {
 ^bb0:
-  %0 = alloc() : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
+  %0 = memref.alloc() : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
   %1 = constant 0 : index
   %2 = constant 1 : index
-  %3 = load %0[%1, %2] : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
+  %3 = memref.load %0[%1, %2] : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
   // Test that store returns zero results.
-  %4 = store %3, %0[%1, %2] : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1> // expected-error {{cannot name an operation with no results}}
+  %4 = memref.store %3, %0[%1, %2] : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1> // expected-error {{cannot name an operation with no results}}
   return
 }
 
 // -----
 
 func @test_store_zero_results2(%x: i32, %p: memref<i32>) {
-  "std.store"(%x,%p) : (i32, memref<i32>) -> i32  // expected-error {{'std.store' op requires zero results}}
+  "memref.store"(%x,%p) : (i32, memref<i32>) -> i32  // expected-error {{'memref.store' op requires zero results}}
   return
 }
 
@@ -127,7 +128,7 @@
 
 func @test_alloc_memref_map_rank_mismatch() {
 ^bb0:
-  %0 = alloc() : memref<1024x64xf32, affine_map<(d0) -> (d0)>, 1> // expected-error {{memref affine map dimension mismatch}}
+  %0 = memref.alloc() : memref<1024x64xf32, affine_map<(d0) -> (d0)>, 1> // expected-error {{memref affine map dimension mismatch}}
   return
 }
 
@@ -291,14 +292,14 @@
 
 func @dma_start_not_enough_operands() {
   // expected-error@+1 {{expected at least 4 operands}}
-  "std.dma_start"() : () -> ()
+  "memref.dma_start"() : () -> ()
 }
 
 // -----
 
 func @dma_no_src_memref(%m : f32, %tag : f32, %c0 : index) {
   // expected-error@+1 {{expected source to be of memref type}}
-  dma_start %m[%c0], %m[%c0], %c0, %tag[%c0] : f32, f32, f32
+  memref.dma_start %m[%c0], %m[%c0], %c0, %tag[%c0] : f32, f32, f32
 }
 
 // -----
@@ -306,7 +307,7 @@
 func @dma_start_not_enough_operands_for_src(
     %src: memref<2x2x2xf32>, %idx: index) {
   // expected-error@+1 {{expected at least 7 operands}}
-  "std.dma_start"(%src, %idx, %idx, %idx) : (memref<2x2x2xf32>, index, index, index) -> ()
+  "memref.dma_start"(%src, %idx, %idx, %idx) : (memref<2x2x2xf32>, index, index, index) -> ()
 }
 
 // -----
@@ -315,16 +316,16 @@
     %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32,1>,
     %tag: memref<i32,2>, %flt: f32) {
   // expected-error@+1 {{expected source indices to be of index type}}
-  "std.dma_start"(%src, %idx, %flt, %dst, %idx, %tag, %idx)
+  "memref.dma_start"(%src, %idx, %flt, %dst, %idx, %tag, %idx)
       : (memref<2x2xf32>, index, f32, memref<2xf32,1>, index, memref<i32,2>, index) -> ()
 }
 
 // -----
 
 func @dma_no_dst_memref(%m : f32, %tag : f32, %c0 : index) {
-  %mref = alloc() : memref<8 x f32>
+  %mref = memref.alloc() : memref<8 x f32>
   // expected-error@+1 {{expected destination to be of memref type}}
-  dma_start %mref[%c0], %m[%c0], %c0, %tag[%c0] : memref<8 x f32>, f32, f32
+  memref.dma_start %mref[%c0], %m[%c0], %c0, %tag[%c0] : memref<8 x f32>, f32, f32
 }
 
 // -----
@@ -333,7 +334,7 @@
     %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32,1>,
     %tag: memref<i32,2>) {
   // expected-error@+1 {{expected at least 7 operands}}
-  "std.dma_start"(%src, %idx, %idx, %dst, %idx, %idx)
+  "memref.dma_start"(%src, %idx, %idx, %dst, %idx, %idx)
       : (memref<2x2xf32>, index, index, memref<2xf32,1>, index, index) -> ()
 }
 
@@ -343,7 +344,7 @@
     %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32,1>,
     %tag: memref<i32,2>, %flt: f32) {
   // expected-error@+1 {{expected destination indices to be of index type}}
-  "std.dma_start"(%src, %idx, %idx, %dst, %flt, %tag, %idx)
+  "memref.dma_start"(%src, %idx, %idx, %dst, %flt, %tag, %idx)
       : (memref<2x2xf32>, index, index, memref<2xf32,1>, f32, memref<i32,2>, index) -> ()
 }
 
@@ -353,16 +354,16 @@
     %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32,1>,
     %tag: memref<i32,2>, %flt: f32) {
   // expected-error@+1 {{expected num elements to be of index type}}
-  "std.dma_start"(%src, %idx, %idx, %dst, %idx, %flt, %tag)
+  "memref.dma_start"(%src, %idx, %idx, %dst, %idx, %flt, %tag)
       : (memref<2x2xf32>, index, index, memref<2xf32,1>, index, f32, memref<i32,2>) -> ()
 }
 
 // -----
 
 func @dma_no_tag_memref(%tag : f32, %c0 : index) {
-  %mref = alloc() : memref<8 x f32>
+  %mref = memref.alloc() : memref<8 x f32>
   // expected-error@+1 {{expected tag to be of memref type}}
-  dma_start %mref[%c0], %mref[%c0], %c0, %tag[%c0] : memref<8 x f32>, memref<8 x f32>, f32
+  memref.dma_start %mref[%c0], %mref[%c0], %c0, %tag[%c0] : memref<8 x f32>, memref<8 x f32>, f32
 }
 
 // -----
@@ -371,7 +372,7 @@
     %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32,1>,
     %tag: memref<2xi32,2>) {
   // expected-error@+1 {{expected at least 8 operands}}
-  "std.dma_start"(%src, %idx, %idx, %dst, %idx, %idx, %tag)
+  "memref.dma_start"(%src, %idx, %idx, %dst, %idx, %idx, %tag)
       : (memref<2x2xf32>, index, index, memref<2xf32,1>, index, index, memref<2xi32,2>) -> ()
 }
 
@@ -381,7 +382,7 @@
     %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32,1>,
     %tag: memref<2xi32,2>, %flt: f32) {
   // expected-error@+1 {{expected tag indices to be of index type}}
-  "std.dma_start"(%src, %idx, %idx, %dst, %idx, %idx, %tag, %flt)
+  "memref.dma_start"(%src, %idx, %idx, %dst, %idx, %idx, %tag, %flt)
       : (memref<2x2xf32>, index, index, memref<2xf32,1>, index, index, memref<2xi32,2>, f32) -> ()
 }
 
@@ -391,7 +392,7 @@
     %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32>,
     %tag: memref<i32,2>) {
   // expected-error@+1 {{DMA should be between different memory spaces}}
-  dma_start %src[%idx, %idx], %dst[%idx], %idx, %tag[] : memref<2x2xf32>, memref<2xf32>, memref<i32,2>
+  memref.dma_start %src[%idx, %idx], %dst[%idx], %idx, %tag[] : memref<2x2xf32>, memref<2xf32>, memref<i32,2>
 }
 
 // -----
@@ -400,7 +401,7 @@
     %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32,1>,
     %tag: memref<i32,2>) {
   // expected-error@+1 {{incorrect number of operands}}
-  "std.dma_start"(%src, %idx, %idx, %dst, %idx, %idx, %tag, %idx, %idx, %idx)
+  "memref.dma_start"(%src, %idx, %idx, %dst, %idx, %idx, %tag, %idx, %idx, %idx)
       : (memref<2x2xf32>, index, index, memref<2xf32,1>, index, index, memref<i32,2>, index, index, index) -> ()
 }
 
@@ -411,7 +412,7 @@
     %src: memref<2x2xf32>, %idx: index, %dst: memref<2xf32,1>,
     %tag: memref<i32,2>, %flt: f32) {
   // expected-error@+1 {{expected stride and num elements per stride to be of type index}}
-  "std.dma_start"(%src, %idx, %idx, %dst, %idx, %idx, %tag, %idx, %flt)
+  "memref.dma_start"(%src, %idx, %idx, %dst, %idx, %idx, %tag, %idx, %flt)
       : (memref<2x2xf32>, index, index, memref<2xf32,1>, index, index, memref<i32,2>, index, f32) -> ()
 }
 
@@ -419,28 +420,28 @@
 
 func @dma_wait_not_enough_operands() {
   // expected-error@+1 {{expected at least 2 operands}}
-  "std.dma_wait"() : () -> ()
+  "memref.dma_wait"() : () -> ()
 }
 
 // -----
 
 func @dma_wait_no_tag_memref(%tag : f32, %c0 : index) {
   // expected-error@+1 {{expected tag to be of memref type}}
-  "std.dma_wait"(%tag, %c0, %c0) : (f32, index, index) -> ()
+  "memref.dma_wait"(%tag, %c0, %c0) : (f32, index, index) -> ()
 }
 
 // -----
 
 func @dma_wait_wrong_index_type(%tag : memref<2xi32>, %idx: index, %flt: f32) {
   // expected-error@+1 {{expected tag indices to be of index type}}
-  "std.dma_wait"(%tag, %flt, %idx) : (memref<2xi32>, f32, index) -> ()
+  "memref.dma_wait"(%tag, %flt, %idx) : (memref<2xi32>, f32, index) -> ()
 }
 
 // -----
 
 func @dma_wait_wrong_num_elements_type(%tag : memref<2xi32>, %idx: index, %flt: f32) {
   // expected-error@+1 {{expected the number of elements to be of index type}}
-  "std.dma_wait"(%tag, %idx, %flt) : (memref<2xi32>, index, f32) -> ()
+  "memref.dma_wait"(%tag, %idx, %flt) : (memref<2xi32>, index, f32) -> ()
 }
 
 // -----
@@ -856,9 +857,9 @@
 // -----
 
 func @invalid_view(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<2048xi8>
+  %0 = memref.alloc() : memref<2048xi8>
   // expected-error@+1 {{expects 1 offset operand}}
-  %1 = view %0[][%arg0, %arg1]
+  %1 = memref.view %0[][%arg0, %arg1]
     : memref<2048xi8> to memref<?x?xf32>
   return
 }
@@ -866,9 +867,9 @@
 // -----
 
 func @invalid_view(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<2048xi8, affine_map<(d0) -> (d0 floordiv 8, d0 mod 8)>>
+  %0 = memref.alloc() : memref<2048xi8, affine_map<(d0) -> (d0 floordiv 8, d0 mod 8)>>
   // expected-error@+1 {{unsupported map for base memref type}}
-  %1 = view %0[%arg2][%arg0, %arg1]
+  %1 = memref.view %0[%arg2][%arg0, %arg1]
     : memref<2048xi8, affine_map<(d0) -> (d0 floordiv 8, d0 mod 8)>> to
       memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0 * 4 + d1 + s0)>>
   return
@@ -877,9 +878,9 @@
 // -----
 
 func @invalid_view(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<2048xi8>
+  %0 = memref.alloc() : memref<2048xi8>
   // expected-error@+1 {{unsupported map for result memref type}}
-  %1 = view %0[%arg2][%arg0, %arg1]
+  %1 = memref.view %0[%arg2][%arg0, %arg1]
     : memref<2048xi8> to memref<?x?xf32, affine_map<(d0, d1)[s0] -> (d0, d1, s0)>>
   return
 }
@@ -887,18 +888,18 @@
 // -----
 
 func @invalid_view(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<2048xi8, 2>
+  %0 = memref.alloc() : memref<2048xi8, 2>
   // expected-error@+1 {{different memory spaces}}
-  %1 = view %0[%arg2][%arg0, %arg1] :  memref<2048xi8, 2> to memref<?x?xf32, 1>
+  %1 = memref.view %0[%arg2][%arg0, %arg1] :  memref<2048xi8, 2> to memref<?x?xf32, 1>
   return
 }
 
 // -----
 
 func @invalid_view(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<2048xi8>
+  %0 = memref.alloc() : memref<2048xi8>
   // expected-error@+1 {{incorrect number of size operands for type}}
-  %1 = view %0[%arg2][%arg0]
+  %1 = memref.view %0[%arg2][%arg0]
     : memref<2048xi8> to memref<?x?xf32>
   return
 }
@@ -906,9 +907,9 @@
 // -----
 
 func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<8x16x4xf32>
+  %0 = memref.alloc() : memref<8x16x4xf32>
   // expected-error@+1 {{expected mixed offsets rank to match mixed sizes rank (2 vs 3) so the rank of the result type is well-formed}}
-  %1 = subview %0[0, 0][2, 2, 2][1, 1, 1]
+  %1 = memref.subview %0[0, 0][2, 2, 2][1, 1, 1]
     : memref<8x16x4xf32> to memref<8x16x4xf32>
   return
 }
@@ -916,9 +917,9 @@
 // -----
 
 func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<8x16x4xf32>
+  %0 = memref.alloc() : memref<8x16x4xf32>
   // expected-error@+1 {{expected mixed sizes rank to match mixed strides rank (3 vs 2) so the rank of the result type is well-formed}}
-  %1 = subview %0[0, 0, 0][2, 2, 2][1, 1]
+  %1 = memref.subview %0[0, 0, 0][2, 2, 2][1, 1]
     : memref<8x16x4xf32> to memref<8x16x4xf32>
   return
 }
@@ -926,9 +927,9 @@
 // -----
 
 func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<8x16x4xf32>
+  %0 = memref.alloc() : memref<8x16x4xf32>
   // expected-error@+1 {{expected mixed sizes rank to match mixed strides rank (3 vs 2) so the rank of the result type is well-formed}}
-  %1 = memref_reinterpret_cast %0 to offset: [0], sizes: [2, 2, 2], strides:[1, 1]
+  %1 = memref.reinterpret_cast %0 to offset: [0], sizes: [2, 2, 2], strides:[1, 1]
     : memref<8x16x4xf32> to memref<8x16x4xf32>
   return
 }
@@ -936,9 +937,9 @@
 // -----
 
 func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<8x16x4xf32, offset: 0, strides: [64, 4, 1], 2>
+  %0 = memref.alloc() : memref<8x16x4xf32, offset: 0, strides: [64, 4, 1], 2>
   // expected-error@+1 {{different memory spaces}}
-  %1 = subview %0[0, 0, 0][%arg2, %arg2, %arg2][1, 1, 1]
+  %1 = memref.subview %0[0, 0, 0][%arg2, %arg2, %arg2][1, 1, 1]
     : memref<8x16x4xf32, offset: 0, strides: [64, 4, 1], 2> to
       memref<8x?x4xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * s0 + d1 * 4 + d2)>>
   return
@@ -947,9 +948,9 @@
 // -----
 
 func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 + d1, d1 + d2, d2)>>
+  %0 = memref.alloc() : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 + d1, d1 + d2, d2)>>
   // expected-error@+1 {{is not strided}}
-  %1 = subview %0[0, 0, 0][%arg2, %arg2, %arg2][1, 1, 1]
+  %1 = memref.subview %0[0, 0, 0][%arg2, %arg2, %arg2][1, 1, 1]
     : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 + d1, d1 + d2, d2)>> to
       memref<8x?x4xf32, offset: 0, strides: [?, 4, 1]>
   return
@@ -958,9 +959,9 @@
 // -----
 
 func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<8x16x4xf32>
+  %0 = memref.alloc() : memref<8x16x4xf32>
   // expected-error@+1 {{expected <= 3 offset values}}
-  %1 = subview %0[%arg0, %arg1, 0, 0][%arg2, 0, 0, 0][1, 1, 1, 1]
+  %1 = memref.subview %0[%arg0, %arg1, 0, 0][%arg2, 0, 0, 0][1, 1, 1, 1]
     : memref<8x16x4xf32> to
       memref<8x?x4xf32, offset: 0, strides:[?, ?, 4]>
   return
@@ -969,9 +970,9 @@
 // -----
 
 func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<8x16x4xf32>
+  %0 = memref.alloc() : memref<8x16x4xf32>
   // expected-error@+1 {{expected result type to be 'memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>>' or a rank-reduced version. (mismatch of result affine map)}}
-  %1 = subview %0[%arg0, %arg1, %arg2][%arg0, %arg1, %arg2][%arg0, %arg1, %arg2]
+  %1 = memref.subview %0[%arg0, %arg1, %arg2][%arg0, %arg1, %arg2][%arg0, %arg1, %arg2]
     : memref<8x16x4xf32> to
       memref<?x?x?xf32, offset: ?, strides: [64, 4, 1]>
   return
@@ -980,9 +981,9 @@
 // -----
 
 func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<8x16x4xf32>
+  %0 = memref.alloc() : memref<8x16x4xf32>
   // expected-error@+1 {{expected result element type to be 'f32'}}
-  %1 = subview %0[0, 0, 0][8, 16, 4][1, 1, 1]
+  %1 = memref.subview %0[0, 0, 0][8, 16, 4][1, 1, 1]
     : memref<8x16x4xf32> to
       memref<8x16x4xi32>
   return
@@ -991,9 +992,9 @@
 // -----
 
 func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<8x16x4xf32>
+  %0 = memref.alloc() : memref<8x16x4xf32>
   // expected-error@+1 {{expected result rank to be smaller or equal to the source rank.}}
-  %1 = subview %0[0, 0, 0][8, 16, 4][1, 1, 1]
+  %1 = memref.subview %0[0, 0, 0][8, 16, 4][1, 1, 1]
     : memref<8x16x4xf32> to
       memref<8x16x4x3xi32>
   return
@@ -1002,9 +1003,9 @@
 // -----
 
 func @invalid_rank_reducing_subview(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<8x16x4xf32>
+  %0 = memref.alloc() : memref<8x16x4xf32>
   // expected-error@+1 {{expected result type to be 'memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>>' or a rank-reduced version. (mismatch of result sizes)}}
-  %1 = subview %0[0, 0, 0][8, 16, 4][1, 1, 1]
+  %1 = memref.subview %0[0, 0, 0][8, 16, 4][1, 1, 1]
     : memref<8x16x4xf32> to memref<16x4xf32>
   return
 }
@@ -1012,9 +1013,9 @@
 // -----
 
 func @invalid_rank_reducing_subview(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %0 = alloc() : memref<8x16x4xf32>
+  %0 = memref.alloc() : memref<8x16x4xf32>
   // expected-error@+1 {{expected result type to be 'memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2 + 8)>>' or a rank-reduced version. (mismatch of result sizes)}}
-  %1 = subview %0[0, 2, 0][8, 16, 4][1, 1, 1]
+  %1 = memref.subview %0[0, 2, 0][8, 16, 4][1, 1, 1]
     : memref<8x16x4xf32> to memref<16x4xf32>
   return
 }
@@ -1023,7 +1024,7 @@
 
 func @invalid_rank_reducing_subview(%arg0 : memref<?x?xf32>, %arg1 : index, %arg2 : index) {
   // expected-error@+1 {{expected result type to be 'memref<?x1xf32, affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>>' or a rank-reduced version. (mismatch of result affine map)}}
-  %0 = subview %arg0[0, %arg1][%arg2, 1][1, 1] : memref<?x?xf32> to memref<?xf32>
+  %0 = memref.subview %arg0[0, %arg1][%arg2, 1][1, 1] : memref<?x?xf32> to memref<?xf32>
   return
 }
 
@@ -1032,7 +1033,7 @@
 // The affine map affine_map<(d0)[s0, s1, s2] -> (d0 * s1 + s0)> has an extra unused symbol.
 func @invalid_rank_reducing_subview(%arg0 : memref<?x?xf32>, %arg1 : index, %arg2 : index) {
   // expected-error@+1 {{expected result type to be 'memref<?x1xf32, affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>>' or a rank-reduced version. (mismatch of result affine map) inferred type: (d0)[s0, s1] -> (d0 * s1 + s0)}}
-  %0 = subview %arg0[0, %arg1][%arg2, 1][1, 1] : memref<?x?xf32> to memref<?xf32, affine_map<(d0)[s0, s1, s2] -> (d0 * s1 + s0)>>
+  %0 = memref.subview %arg0[0, %arg1][%arg2, 1][1, 1] : memref<?x?xf32> to memref<?xf32, affine_map<(d0)[s0, s1, s2] -> (d0 * s1 + s0)>>
   return
 }
 
@@ -1040,7 +1041,7 @@
 
 func @invalid_memref_cast(%arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]>) {
   // expected-error@+1{{operand type 'memref<12x4x16xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 16 + d2)>>' and result type 'memref<12x4x16xf32, affine_map<(d0, d1, d2) -> (d0 * 128 + d1 * 32 + d2 * 2)>>' are cast incompatible}}
-  %0 = memref_cast %arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]> to memref<12x4x16xf32, offset:0, strides:[128, 32, 2]>
+  %0 = memref.cast %arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]> to memref<12x4x16xf32, offset:0, strides:[128, 32, 2]>
   return
 }
 
@@ -1048,7 +1049,7 @@
 
 func @invalid_memref_cast(%arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]>) {
   // expected-error@+1{{operand type 'memref<12x4x16xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 16 + d2)>>' and result type 'memref<12x4x16xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 16 + d2 + 16)>>' are cast incompatible}}
-  %0 = memref_cast %arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]> to memref<12x4x16xf32, offset:16, strides:[64, 16, 1]>
+  %0 = memref.cast %arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]> to memref<12x4x16xf32, offset:16, strides:[64, 16, 1]>
   return
 }
 
@@ -1056,36 +1057,36 @@
 
 // incompatible element types
 func @invalid_memref_cast() {
-  %0 = alloc() : memref<2x5xf32, 0>
+  %0 = memref.alloc() : memref<2x5xf32, 0>
   // expected-error@+1 {{operand type 'memref<2x5xf32>' and result type 'memref<*xi32>' are cast incompatible}}
-  %1 = memref_cast %0 : memref<2x5xf32, 0> to memref<*xi32>
+  %1 = memref.cast %0 : memref<2x5xf32, 0> to memref<*xi32>
   return
 }
 
 // -----
 
 func @invalid_prefetch_rw(%i : index) {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   // expected-error@+1 {{rw specifier has to be 'read' or 'write'}}
-  prefetch %0[%i], rw, locality<0>, data  : memref<10xf32>
+  memref.prefetch %0[%i], rw, locality<0>, data  : memref<10xf32>
   return
 }
 
 // -----
 
 func @invalid_prefetch_cache_type(%i : index) {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   // expected-error@+1 {{cache type has to be 'data' or 'instr'}}
-  prefetch %0[%i], read, locality<0>, false  : memref<10xf32>
+  memref.prefetch %0[%i], read, locality<0>, false  : memref<10xf32>
   return
 }
 
 // -----
 
 func @invalid_prefetch_locality_hint(%i : index) {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   // expected-error@+1 {{32-bit signless integer attribute whose minimum value is 0 whose maximum value is 3}}
-  prefetch %0[%i], read, locality<5>, data  : memref<10xf32>
+  memref.prefetch %0[%i], read, locality<5>, data  : memref<10xf32>
   return
 }
 
@@ -1093,9 +1094,9 @@
 
 // incompatible memory space
 func @invalid_memref_cast() {
-  %0 = alloc() : memref<2x5xf32, 0>
+  %0 = memref.alloc() : memref<2x5xf32, 0>
   // expected-error@+1 {{operand type 'memref<2x5xf32>' and result type 'memref<*xf32, 1>' are cast incompatible}}
-  %1 = memref_cast %0 : memref<2x5xf32, 0> to memref<*xf32, 1>
+  %1 = memref.cast %0 : memref<2x5xf32, 0> to memref<*xf32, 1>
   return
 }
 
@@ -1103,10 +1104,10 @@
 
 // unranked to unranked
 func @invalid_memref_cast() {
-  %0 = alloc() : memref<2x5xf32, 0>
-  %1 = memref_cast %0 : memref<2x5xf32, 0> to memref<*xf32, 0>
+  %0 = memref.alloc() : memref<2x5xf32, 0>
+  %1 = memref.cast %0 : memref<2x5xf32, 0> to memref<*xf32, 0>
   // expected-error@+1 {{operand type 'memref<*xf32>' and result type 'memref<*xf32>' are cast incompatible}}
-  %2 = memref_cast %1 : memref<*xf32, 0> to memref<*xf32, 0>
+  %2 = memref.cast %1 : memref<*xf32, 0> to memref<*xf32, 0>
   return
 }
 
@@ -1177,7 +1178,7 @@
   %x = generic_atomic_rmw %I[%i] : memref<10xf32> {
     ^bb0(%old_value : f32):
       %c1 = constant 1.0 : f32
-      %buf = alloc() : memref<2048xf32>
+      %buf = memref.alloc() : memref<2048xf32>
       atomic_yield %c1 : f32
   }
 }
@@ -1199,7 +1200,7 @@
 // alignment is not power of 2.
 func @assume_alignment(%0: memref<4x4xf16>) {
   // expected-error@+1 {{alignment must be power of 2}}
-  std.assume_alignment %0, 12 : memref<4x4xf16>
+  memref.assume_alignment %0, 12 : memref<4x4xf16>
   return
 }
 
@@ -1208,14 +1209,14 @@
 // 0 alignment value.
 func @assume_alignment(%0: memref<4x4xf16>) {
   // expected-error@+1 {{attribute 'alignment' failed to satisfy constraint: 32-bit signless integer attribute whose value is positive}}
-  std.assume_alignment %0, 0 : memref<4x4xf16>
+  memref.assume_alignment %0, 0 : memref<4x4xf16>
   return
 }
 
 // -----
 
 "alloca_without_scoped_alloc_parent"() ( {
-  std.alloca() : memref<1xf32>
+  memref.alloca() : memref<1xf32>
   // expected-error@-1 {{requires an ancestor op with AutomaticAllocationScope trait}}
   return
 }) : () -> ()
diff --git a/mlir/test/IR/invalid.mlir b/mlir/test/IR/invalid.mlir
--- a/mlir/test/IR/invalid.mlir
+++ b/mlir/test/IR/invalid.mlir
@@ -821,7 +821,7 @@
 func @f(%m : memref<?x?xf32>) {
   affine.for %i0 = 0 to 42 {
     // expected-note@+1 {{previously referenced here}}
-    %x = load %m[%i0, %i1] : memref<?x?xf32>
+    %x = memref.load %m[%i0, %i1] : memref<?x?xf32>
   }
   // expected-error@+1 {{region entry argument '%i1' is already in use}}
   affine.for %i1 = 0 to 42 {
diff --git a/mlir/test/IR/memory-ops.mlir b/mlir/test/IR/memory-ops.mlir
--- a/mlir/test/IR/memory-ops.mlir
+++ b/mlir/test/IR/memory-ops.mlir
@@ -6,28 +6,28 @@
 func @alloc() {
 ^bb0:
   // Test simple alloc.
-  // CHECK: %0 = alloc() : memref<1024x64xf32, 1>
-  %0 = alloc() : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
+  // CHECK: %0 = memref.alloc() : memref<1024x64xf32, 1>
+  %0 = memref.alloc() : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
 
   %c0 = "std.constant"() {value = 0: index} : () -> index
   %c1 = "std.constant"() {value = 1: index} : () -> index
 
   // Test alloc with dynamic dimensions.
-  // CHECK: %1 = alloc(%c0, %c1) : memref<?x?xf32, 1>
-  %1 = alloc(%c0, %c1) : memref<?x?xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
+  // CHECK: %1 = memref.alloc(%c0, %c1) : memref<?x?xf32, 1>
+  %1 = memref.alloc(%c0, %c1) : memref<?x?xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
 
   // Test alloc with no dynamic dimensions and one symbol.
-  // CHECK: %2 = alloc()[%c0] : memref<2x4xf32, #map, 1>
-  %2 = alloc()[%c0] : memref<2x4xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1>
+  // CHECK: %2 = memref.alloc()[%c0] : memref<2x4xf32, #map, 1>
+  %2 = memref.alloc()[%c0] : memref<2x4xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1>
 
   // Test alloc with dynamic dimensions and one symbol.
-  // CHECK: %3 = alloc(%c1)[%c0] : memref<2x?xf32, #map, 1>
-  %3 = alloc(%c1)[%c0] : memref<2x?xf32, affine_map<(d0, d1)[s0] -> (d0 + s0, d1)>, 1>
+  // CHECK: %3 = memref.alloc(%c1)[%c0] : memref<2x?xf32, #map, 1>
+  %3 = memref.alloc(%c1)[%c0] : memref<2x?xf32, affine_map<(d0, d1)[s0] -> (d0 + s0, d1)>, 1>
 
   // Alloc with no mappings.
   // b/116054838 Parser crash while parsing ill-formed AllocOp
-  // CHECK: %4 = alloc() : memref<2xi32>
-  %4 = alloc() : memref<2 x i32>
+  // CHECK: %4 = memref.alloc() : memref<2xi32>
+  %4 = memref.alloc() : memref<2 x i32>
 
   // CHECK:   return
   return
@@ -37,27 +37,27 @@
 func @alloca() {
 ^bb0:
   // Test simple alloc.
-  // CHECK: %0 = alloca() : memref<1024x64xf32, 1>
-  %0 = alloca() : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
+  // CHECK: %0 = memref.alloca() : memref<1024x64xf32, 1>
+  %0 = memref.alloca() : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
 
   %c0 = "std.constant"() {value = 0: index} : () -> index
   %c1 = "std.constant"() {value = 1: index} : () -> index
 
   // Test alloca with dynamic dimensions.
-  // CHECK: %1 = alloca(%c0, %c1) : memref<?x?xf32, 1>
-  %1 = alloca(%c0, %c1) : memref<?x?xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
+  // CHECK: %1 = memref.alloca(%c0, %c1) : memref<?x?xf32, 1>
+  %1 = memref.alloca(%c0, %c1) : memref<?x?xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
 
   // Test alloca with no dynamic dimensions and one symbol.
-  // CHECK: %2 = alloca()[%c0] : memref<2x4xf32, #map, 1>
-  %2 = alloca()[%c0] : memref<2x4xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1>
+  // CHECK: %2 = memref.alloca()[%c0] : memref<2x4xf32, #map, 1>
+  %2 = memref.alloca()[%c0] : memref<2x4xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1>
 
   // Test alloca with dynamic dimensions and one symbol.
-  // CHECK: %3 = alloca(%c1)[%c0] : memref<2x?xf32, #map, 1>
-  %3 = alloca(%c1)[%c0] : memref<2x?xf32, affine_map<(d0, d1)[s0] -> (d0 + s0, d1)>, 1>
+  // CHECK: %3 = memref.alloca(%c1)[%c0] : memref<2x?xf32, #map, 1>
+  %3 = memref.alloca(%c1)[%c0] : memref<2x?xf32, affine_map<(d0, d1)[s0] -> (d0 + s0, d1)>, 1>
 
   // Alloca with no mappings, but with alignment.
-  // CHECK: %4 = alloca() {alignment = 64 : i64} : memref<2xi32>
-  %4 = alloca() {alignment = 64} : memref<2 x i32>
+  // CHECK: %4 = memref.alloca() {alignment = 64 : i64} : memref<2xi32>
+  %4 = memref.alloca() {alignment = 64} : memref<2 x i32>
 
   return
 }
@@ -65,28 +65,28 @@
 // CHECK-LABEL: func @dealloc() {
 func @dealloc() {
 ^bb0:
-  // CHECK: %0 = alloc() : memref<1024x64xf32>
-  %0 = alloc() : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 0>
+  // CHECK: %0 = memref.alloc() : memref<1024x64xf32>
+  %0 = memref.alloc() : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 0>
 
-  // CHECK: dealloc %0 : memref<1024x64xf32>
-  dealloc %0 : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 0>
+  // CHECK: memref.dealloc %0 : memref<1024x64xf32>
+  memref.dealloc %0 : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 0>
   return
 }
 
 // CHECK-LABEL: func @load_store
 func @load_store() {
 ^bb0:
-  // CHECK: %0 = alloc() : memref<1024x64xf32, 1>
-  %0 = alloc() : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
+  // CHECK: %0 = memref.alloc() : memref<1024x64xf32, 1>
+  %0 = memref.alloc() : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
 
   %1 = constant 0 : index
   %2 = constant 1 : index
 
-  // CHECK: %1 = load %0[%c0, %c1] : memref<1024x64xf32, 1>
-  %3 = load %0[%1, %2] : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
+  // CHECK: %1 = memref.load %0[%c0, %c1] : memref<1024x64xf32, 1>
+  %3 = memref.load %0[%1, %2] : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
 
-  // CHECK: store %1, %0[%c0, %c1] : memref<1024x64xf32, 1>
-  store %3, %0[%1, %2] : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
+  // CHECK: memref.store %1, %0[%c0, %c1] : memref<1024x64xf32, 1>
+  memref.store %3, %0[%1, %2] : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1>
 
   return
 }
@@ -97,20 +97,20 @@
   %stride = constant 32 : index
   %elt_per_stride = constant 16 : index
 
-  %A = alloc() : memref<256 x f32, affine_map<(d0) -> (d0)>, 0>
-  %Ah = alloc() : memref<256 x f32, affine_map<(d0) -> (d0)>, 1>
-  %tag = alloc() : memref<1 x f32>
+  %A = memref.alloc() : memref<256 x f32, affine_map<(d0) -> (d0)>, 0>
+  %Ah = memref.alloc() : memref<256 x f32, affine_map<(d0) -> (d0)>, 1>
+  %tag = memref.alloc() : memref<1 x f32>
 
   %num_elements = constant 256 : index
 
-  dma_start %A[%c0], %Ah[%c0], %num_elements, %tag[%c0] : memref<256 x f32>, memref<256 x f32, 1>, memref<1 x f32>
-  dma_wait %tag[%c0], %num_elements : memref<1 x f32>
+  memref.dma_start %A[%c0], %Ah[%c0], %num_elements, %tag[%c0] : memref<256 x f32>, memref<256 x f32, 1>, memref<1 x f32>
+  memref.dma_wait %tag[%c0], %num_elements : memref<1 x f32>
   // CHECK: dma_start %0[%c0], %1[%c0], %c256, %2[%c0] : memref<256xf32>, memref<256xf32, 1>, memref<1xf32>
   // CHECK-NEXT:  dma_wait %2[%c0], %c256 : memref<1xf32>
 
   // DMA with strides
-  dma_start %A[%c0], %Ah[%c0], %num_elements, %tag[%c0], %stride, %elt_per_stride : memref<256 x f32>, memref<256 x f32, 1>, memref<1 x f32>
-  dma_wait %tag[%c0], %num_elements : memref<1 x f32>
+  memref.dma_start %A[%c0], %Ah[%c0], %num_elements, %tag[%c0], %stride, %elt_per_stride : memref<256 x f32>, memref<256 x f32, 1>, memref<1 x f32>
+  memref.dma_wait %tag[%c0], %num_elements : memref<1 x f32>
   // CHECK-NEXT:  dma_start %0[%c0], %1[%c0], %c256, %2[%c0], %c32, %c16 : memref<256xf32>, memref<256xf32, 1>, memref<1xf32>
   // CHECK-NEXT:  dma_wait %2[%c0], %c256 : memref<1xf32>
 
diff --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir
--- a/mlir/test/IR/parser.mlir
+++ b/mlir/test/IR/parser.mlir
@@ -311,7 +311,7 @@
   %c = constant 0 : i32       // CHECK: %{{.*}} = constant 0 : i32
   affine.for %i0 = 1 to %arg0 {      // CHECK: affine.for %{{.*}} = 1 to %{{.*}} {
     affine.for %i1 = affine_map<(d0)[]->(d0)>(%i0)[] to %arg0 {  // CHECK:   affine.for %{{.*}} = #map{{[0-9]+}}(%{{.*}}) to %{{.*}} {
-      store %c, %arg1[%i0, %i1] : memref<?x?xi32>  // CHECK: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}]
+      memref.store %c, %arg1[%i0, %i1] : memref<?x?xi32>  // CHECK: memref.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}]
     }          // CHECK:     }
   }            // CHECK:   }
   return       // CHECK:   return
diff --git a/mlir/test/IR/slice.mlir b/mlir/test/IR/slice.mlir
--- a/mlir/test/IR/slice.mlir
+++ b/mlir/test/IR/slice.mlir
@@ -1,18 +1,18 @@
 // RUN: mlir-opt -slice-analysis-test %s | FileCheck %s
 
 func @slicing_linalg_op(%arg0 : index, %arg1 : index, %arg2 : index) {
-  %a = alloc(%arg0, %arg2) : memref<?x?xf32>
-  %b = alloc(%arg2, %arg1) : memref<?x?xf32>
-  %c = alloc(%arg0, %arg1) : memref<?x?xf32>
-  %d = alloc(%arg0, %arg1) : memref<?x?xf32>
+  %a = memref.alloc(%arg0, %arg2) : memref<?x?xf32>
+  %b = memref.alloc(%arg2, %arg1) : memref<?x?xf32>
+  %c = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
+  %d = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
   linalg.matmul ins(%a, %b : memref<?x?xf32>, memref<?x?xf32>)
                outs(%c : memref<?x?xf32>)
   linalg.matmul ins(%a, %b : memref<?x?xf32>, memref<?x?xf32>)
                outs(%d : memref<?x?xf32>)
-  dealloc %c : memref<?x?xf32>
-  dealloc %b : memref<?x?xf32>
-  dealloc %a : memref<?x?xf32>
-  dealloc %d : memref<?x?xf32>
+  memref.dealloc %c : memref<?x?xf32>
+  memref.dealloc %b : memref<?x?xf32>
+  memref.dealloc %a : memref<?x?xf32>
+  memref.dealloc %d : memref<?x?xf32>
   return
 }
 
@@ -20,16 +20,16 @@
 //  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: index
 //  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: index
 //  CHECK-SAME:   %[[ARG2:[a-zA-Z0-9_]+]]: index
-//   CHECK-DAG:   %[[A:.+]] = alloc(%[[ARG0]], %[[ARG2]]) : memref<?x?xf32>
-//   CHECK-DAG:   %[[B:.+]] = alloc(%[[ARG2]], %[[ARG1]]) : memref<?x?xf32>
-//   CHECK-DAG:   %[[C:.+]] = alloc(%[[ARG0]], %[[ARG1]]) : memref<?x?xf32>
+//   CHECK-DAG:   %[[A:.+]] = memref.alloc(%[[ARG0]], %[[ARG2]]) : memref<?x?xf32>
+//   CHECK-DAG:   %[[B:.+]] = memref.alloc(%[[ARG2]], %[[ARG1]]) : memref<?x?xf32>
+//   CHECK-DAG:   %[[C:.+]] = memref.alloc(%[[ARG0]], %[[ARG1]]) : memref<?x?xf32>
 //       CHECK:   return
 
 // CHECK-LABEL: func @slicing_linalg_op__backward_slice__1
 //  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: index
 //  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: index
 //  CHECK-SAME:   %[[ARG2:[a-zA-Z0-9_]+]]: index
-//   CHECK-DAG:   %[[A:.+]] = alloc(%[[ARG0]], %[[ARG2]]) : memref<?x?xf32>
-//   CHECK-DAG:   %[[B:.+]] = alloc(%[[ARG2]], %[[ARG1]]) : memref<?x?xf32>
-//   CHECK-DAG:   %[[C:.+]] = alloc(%[[ARG0]], %[[ARG1]]) : memref<?x?xf32>
+//   CHECK-DAG:   %[[A:.+]] = memref.alloc(%[[ARG0]], %[[ARG2]]) : memref<?x?xf32>
+//   CHECK-DAG:   %[[B:.+]] = memref.alloc(%[[ARG2]], %[[ARG1]]) : memref<?x?xf32>
+//   CHECK-DAG:   %[[C:.+]] = memref.alloc(%[[ARG0]], %[[ARG1]]) : memref<?x?xf32>
 //       CHECK:   return
diff --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
--- a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
@@ -60,39 +60,39 @@
   // Sanity check for the function under test.
   //
 
-  %LHS10 = alloc() {alignment = 64} : memref<1x10xf32>
-  %RHS10 = alloc() {alignment = 64} : memref<1x10xf32>
-  %DST10 = alloc() {alignment = 64} : memref<1x10xf32>
+  %LHS10 = memref.alloc() {alignment = 64} : memref<1x10xf32>
+  %RHS10 = memref.alloc() {alignment = 64} : memref<1x10xf32>
+  %DST10 = memref.alloc() {alignment = 64} : memref<1x10xf32>
 
   linalg.fill(%LHS10, %f1) : memref<1x10xf32>, f32
   linalg.fill(%RHS10, %f1) : memref<1x10xf32>, f32
 
-  %LHS = memref_cast %LHS10 : memref<1x10xf32> to memref<?x?xf32>
-  %RHS = memref_cast %RHS10 : memref<1x10xf32> to memref<?x?xf32>
-  %DST = memref_cast %DST10 : memref<1x10xf32> to memref<?x?xf32>
+  %LHS = memref.cast %LHS10 : memref<1x10xf32> to memref<?x?xf32>
+  %RHS = memref.cast %RHS10 : memref<1x10xf32> to memref<?x?xf32>
+  %DST = memref.cast %DST10 : memref<1x10xf32> to memref<?x?xf32>
 
   call @linalg_generic(%LHS, %RHS, %DST)
     : (memref<?x?xf32>, memref<?x?xf32>, memref<?x?xf32>) -> ()
 
   // CHECK: [2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
-  %U = memref_cast %DST10 :  memref<1x10xf32> to memref<*xf32>
+  %U = memref.cast %DST10 :  memref<1x10xf32> to memref<*xf32>
   call @print_memref_f32(%U): (memref<*xf32>) -> ()
 
-  dealloc %LHS10: memref<1x10xf32>
-  dealloc %RHS10: memref<1x10xf32>
-  dealloc %DST10: memref<1x10xf32>
+  memref.dealloc %LHS10: memref<1x10xf32>
+  memref.dealloc %RHS10: memref<1x10xf32>
+  memref.dealloc %DST10: memref<1x10xf32>
 
   //
   // Allocate data for microbenchmarks.
   //
 
-  %LHS1024 = alloc() {alignment = 64} : memref<1024x1024xf32>
-  %RHS1024 = alloc() {alignment = 64} : memref<1024x1024xf32>
-  %DST1024 = alloc() {alignment = 64} : memref<1024x1024xf32>
+  %LHS1024 = memref.alloc() {alignment = 64} : memref<1024x1024xf32>
+  %RHS1024 = memref.alloc() {alignment = 64} : memref<1024x1024xf32>
+  %DST1024 = memref.alloc() {alignment = 64} : memref<1024x1024xf32>
 
-  %LHS0 = memref_cast %LHS1024 : memref<1024x1024xf32> to memref<?x?xf32>
-  %RHS0 = memref_cast %RHS1024 : memref<1024x1024xf32> to memref<?x?xf32>
-  %DST0 = memref_cast %DST1024 : memref<1024x1024xf32> to memref<?x?xf32>
+  %LHS0 = memref.cast %LHS1024 : memref<1024x1024xf32> to memref<?x?xf32>
+  %RHS0 = memref.cast %RHS1024 : memref<1024x1024xf32> to memref<?x?xf32>
+  %DST0 = memref.cast %DST1024 : memref<1024x1024xf32> to memref<?x?xf32>
 
   //
   // Warm up.
@@ -117,9 +117,9 @@
   vector.print %t1024 : f64
 
   // Free.
-  dealloc %LHS1024: memref<1024x1024xf32>
-  dealloc %RHS1024: memref<1024x1024xf32>
-  dealloc %DST1024: memref<1024x1024xf32>
+  memref.dealloc %LHS1024: memref<1024x1024xf32>
+  memref.dealloc %RHS1024: memref<1024x1024xf32>
+  memref.dealloc %DST1024: memref<1024x1024xf32>
 
   return
 }
diff --git a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
--- a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
@@ -19,33 +19,33 @@
   %lb = constant 0 : index
   %ub = constant 9 : index
 
-  %A = alloc() : memref<9xf32>
-  %U = memref_cast %A :  memref<9xf32> to memref<*xf32>
+  %A = memref.alloc() : memref<9xf32>
+  %U = memref.cast %A :  memref<9xf32> to memref<*xf32>
 
   // 1. %i = (0) to (9) step (1)
   scf.parallel (%i) = (%lb) to (%ub) step (%c1) {
     %0 = index_cast %i : index to i32
     %1 = sitofp %0 : i32 to f32
-    store %1, %A[%i] : memref<9xf32>
+    memref.store %1, %A[%i] : memref<9xf32>
   }
   // CHECK: [0, 1, 2, 3, 4, 5, 6, 7, 8]
   call @print_memref_f32(%U): (memref<*xf32>) -> ()
 
   scf.parallel (%i) = (%lb) to (%ub) step (%c1) {
-    store %c0, %A[%i] : memref<9xf32>
+    memref.store %c0, %A[%i] : memref<9xf32>
   }
 
   // 2. %i = (0) to (9) step (2)
   scf.parallel (%i) = (%lb) to (%ub) step (%c2) {
     %0 = index_cast %i : index to i32
     %1 = sitofp %0 : i32 to f32
-    store %1, %A[%i] : memref<9xf32>
+    memref.store %1, %A[%i] : memref<9xf32>
   }
   // CHECK:  [0, 0, 2, 0, 4, 0, 6, 0, 8]
   call @print_memref_f32(%U): (memref<*xf32>) -> ()
 
   scf.parallel (%i) = (%lb) to (%ub) step (%c1) {
-    store %c0, %A[%i] : memref<9xf32>
+    memref.store %c0, %A[%i] : memref<9xf32>
   }
 
   // 3. %i = (-20) to (-11) step (3)
@@ -56,12 +56,12 @@
     %1 = sitofp %0 : i32 to f32
     %2 = constant 20 : index
     %3 = addi %i, %2 : index
-    store %1, %A[%3] : memref<9xf32>
+    memref.store %1, %A[%3] : memref<9xf32>
   }
   // CHECK: [-20, 0, 0, -17, 0, 0, -14, 0, 0]
   call @print_memref_f32(%U): (memref<*xf32>) -> ()
 
-  dealloc %A : memref<9xf32>
+  memref.dealloc %A : memref<9xf32>
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir
--- a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir
@@ -19,8 +19,8 @@
   %lb = constant 0 : index
   %ub = constant 8 : index
 
-  %A = alloc() : memref<8x8xf32>
-  %U = memref_cast %A :  memref<8x8xf32> to memref<*xf32>
+  %A = memref.alloc() : memref<8x8xf32>
+  %U = memref.cast %A :  memref<8x8xf32> to memref<*xf32>
 
   // 1. (%i, %i) = (0, 8) to (8, 8) step (1, 1)
   scf.parallel (%i, %j) = (%lb, %lb) to (%ub, %ub) step (%c1, %c1) {
@@ -28,7 +28,7 @@
     %1 = addi %j, %0  : index
     %2 = index_cast %1 : index to i32
     %3 = sitofp %2 : i32 to f32
-    store %3, %A[%i, %j] : memref<8x8xf32>
+    memref.store %3, %A[%i, %j] : memref<8x8xf32>
   }
 
   // CHECK:      [0, 1, 2, 3, 4, 5, 6, 7]
@@ -42,7 +42,7 @@
   call @print_memref_f32(%U): (memref<*xf32>) -> ()
 
   scf.parallel (%i, %j) = (%lb, %lb) to (%ub, %ub) step (%c1, %c1) {
-    store %c0, %A[%i, %j] : memref<8x8xf32>
+    memref.store %c0, %A[%i, %j] : memref<8x8xf32>
   }
 
   // 2. (%i, %i) = (0, 8) to (8, 8) step (2, 1)
@@ -51,7 +51,7 @@
     %1 = addi %j, %0  : index
     %2 = index_cast %1 : index to i32
     %3 = sitofp %2 : i32 to f32
-    store %3, %A[%i, %j] : memref<8x8xf32>
+    memref.store %3, %A[%i, %j] : memref<8x8xf32>
   }
 
   // CHECK:      [0, 1, 2, 3, 4, 5, 6, 7]
@@ -65,7 +65,7 @@
   call @print_memref_f32(%U): (memref<*xf32>) -> ()
 
   scf.parallel (%i, %j) = (%lb, %lb) to (%ub, %ub) step (%c1, %c1) {
-    store %c0, %A[%i, %j] : memref<8x8xf32>
+    memref.store %c0, %A[%i, %j] : memref<8x8xf32>
   }
 
   // 3. (%i, %i) = (0, 8) to (8, 8) step (1, 2)
@@ -74,7 +74,7 @@
     %1 = addi %j, %0  : index
     %2 = index_cast %1 : index to i32
     %3 = sitofp %2 : i32 to f32
-    store %3, %A[%i, %j] : memref<8x8xf32>
+    memref.store %3, %A[%i, %j] : memref<8x8xf32>
   }
 
   // CHECK:      [0, 0, 2, 0, 4, 0, 6, 0]
@@ -87,7 +87,7 @@
   // CHECK-NEXT: [56, 0, 58, 0, 60, 0, 62, 0]
   call @print_memref_f32(%U): (memref<*xf32>) -> ()
 
-  dealloc %A : memref<8x8xf32>
+  memref.dealloc %A : memref<8x8xf32>
 
   return
 }
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir
@@ -55,9 +55,9 @@
   %v0 = constant 0.0 : !elem_type_a
   %v1 = constant 1.0 : !elem_type_a
 
-  %A = alloc() : !row_major_A
-  %B = alloc() : !row_major_B
-  %C = alloc() : !row_major_C
+  %A = memref.alloc() : !row_major_A
+  %B = memref.alloc() : !row_major_B
+  %C = memref.alloc() : !row_major_C
 
   linalg.fill(%A, %v1) : !row_major_A, !elem_type_a
   linalg.fill(%B, %v1) : !row_major_B, !elem_type_b
@@ -89,19 +89,19 @@
   call @print_perf(%iters, %tmatmul) : (index, f64) -> ()
 
   // CHECK: {{^0$}}
-  %C_ref = alloc() : !row_major_C
+  %C_ref = memref.alloc() : !row_major_C
   linalg.fill(%C_ref, %v0) : !row_major_C, !elem_type_c
   linalg.matmul ins(%A, %B : !row_major_A, !row_major_B)
     outs(%C_ref: !row_major_C)
-  %act = memref_cast %C : !row_major_C to memref<*xf32>
-  %exp = memref_cast %C_ref : !row_major_C to memref<*xf32>
+  %act = memref.cast %C : !row_major_C to memref<*xf32>
+  %exp = memref.cast %C_ref : !row_major_C to memref<*xf32>
   %errors = call @verifyMemRefF32(%act, %exp) : (memref<*xf32>, memref<*xf32>) -> i64
   vector.print %errors : i64
-  dealloc %C_ref : !row_major_C
+  memref.dealloc %C_ref : !row_major_C
 
-  dealloc %A : !row_major_A
-  dealloc %B : !row_major_B
-  dealloc %C : !row_major_C
+  memref.dealloc %A : !row_major_A
+  memref.dealloc %B : !row_major_B
+  memref.dealloc %C : !row_major_C
 
   return
 }
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major.mlir
@@ -59,9 +59,9 @@
   %f0 = constant 0.0 : !elem_type_c
   %f1 = constant 1.0 : !elem_type_a
 
-  %cA = alloc() : !column_major_A
-  %cB = alloc() : !column_major_B
-  %cC = alloc() : !column_major_C
+  %cA = memref.alloc() : !column_major_A
+  %cB = memref.alloc() : !column_major_B
+  %cC = memref.alloc() : !column_major_C
 
   linalg.fill(%cA, %f1) : !column_major_A, !elem_type_a
   linalg.fill(%cB, %f1) : !column_major_B, !elem_type_b
@@ -86,19 +86,19 @@
   call @print_perf(%iters, %tmatmul_column_major) : (index, f64) -> ()
 
   // CHECK: {{^0$}}
-  %cC_ref = alloc() : !column_major_C
+  %cC_ref = memref.alloc() : !column_major_C
   linalg.fill(%cC_ref, %f0) : !column_major_C, !elem_type_c
   linalg.matmul_column_major ins(%cA, %cB : !column_major_A, !column_major_B)
     outs(%cC_ref: !column_major_C)
-  %act = memref_cast %cC : !column_major_C to memref<*xf32>
-  %exp = memref_cast %cC_ref : !column_major_C to memref<*xf32>
+  %act = memref.cast %cC : !column_major_C to memref<*xf32>
+  %exp = memref.cast %cC_ref : !column_major_C to memref<*xf32>
   %errors = call @verifyMemRefF32(%act, %exp) : (memref<*xf32>, memref<*xf32>) -> i64
   vector.print %errors : i64
-  dealloc %cC_ref : !column_major_C
+  memref.dealloc %cC_ref : !column_major_C
 
-  dealloc %cA : !column_major_A
-  dealloc %cB : !column_major_B
-  dealloc %cC : !column_major_C
+  memref.dealloc %cA : !column_major_A
+  memref.dealloc %cB : !column_major_B
+  memref.dealloc %cC : !column_major_C
 
   return
 }
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir
@@ -65,9 +65,9 @@
   %f0 = constant 0.0 : !elem_type_c
   %f1 = constant 1.0 : !elem_type_a
 
-  %cA = alloc() : !column_major_A
-  %cB = alloc() : !column_major_B
-  %cC = alloc() : !column_major_C
+  %cA = memref.alloc() : !column_major_A
+  %cB = memref.alloc() : !column_major_B
+  %cC = memref.alloc() : !column_major_C
 
   linalg.fill(%cA, %f1) : !column_major_A, !elem_type_a
   linalg.fill(%cB, %f1) : !column_major_B, !elem_type_b
@@ -78,9 +78,9 @@
   %iters = constant ${ITERS}: index
 
   /// Run and dump performance for matmul_column_major as a row-major
-  %A = alloc() : !row_major_A
-  %B = alloc() : !row_major_B
-  %C = alloc() : !row_major_C
+  %A = memref.alloc() : !row_major_A
+  %B = memref.alloc() : !row_major_B
+  %C = memref.alloc() : !row_major_C
   %t_start_matmul_column_major_as_row_major = call @rtclock() : () -> f64
   scf.for %arg0 = %c0 to %iters step %c1 {
     // linalg.matmul writes %C in place, need to reset it to zero every time.
@@ -97,34 +97,34 @@
   call @print_perf(%iters, %tmatmul_column_major_as_row_major) : (index, f64) -> ()
 
   // CHECK: {{^0$}}
-  %cC_ref = alloc() : !column_major_C
+  %cC_ref = memref.alloc() : !column_major_C
   linalg.fill(%cC_ref, %f0) : !column_major_C, !elem_type_c
   linalg.matmul_column_major ins(%cA, %cB : !column_major_A, !column_major_B)
     outs(%cC_ref: !column_major_C)
-  %act1 = memref_cast %cC : !column_major_C to memref<*xf32>
-  %exp1 = memref_cast %cC_ref : !column_major_C to memref<*xf32>
+  %act1 = memref.cast %cC : !column_major_C to memref<*xf32>
+  %exp1 = memref.cast %cC_ref : !column_major_C to memref<*xf32>
   %errors1 = call @verifyMemRefF32(%act1, %exp1) : (memref<*xf32>, memref<*xf32>) -> i64
   vector.print %errors1 : i64
-  dealloc %cC_ref : !column_major_C
+  memref.dealloc %cC_ref : !column_major_C
 
   // CHECK: {{^0$}}
-  %C_ref = alloc() : !row_major_C
+  %C_ref = memref.alloc() : !row_major_C
   linalg.fill(%C_ref, %f0) : !row_major_C, !elem_type_c
   linalg.matmul ins(%A, %B : !row_major_A, !row_major_B)
     outs(%C_ref: !row_major_C)
-  %act2 = memref_cast %C : !row_major_C to memref<*xf32>
-  %exp2 = memref_cast %C_ref : !row_major_C to memref<*xf32>
+  %act2 = memref.cast %C : !row_major_C to memref<*xf32>
+  %exp2 = memref.cast %C_ref : !row_major_C to memref<*xf32>
   %errors2 = call @verifyMemRefF32(%act2, %exp2) : (memref<*xf32>, memref<*xf32>) -> i64
   vector.print %errors2 : i64
-  dealloc %C_ref : !row_major_C
+  memref.dealloc %C_ref : !row_major_C
 
-  dealloc %A : !row_major_A
-  dealloc %B : !row_major_B
-  dealloc %C : !row_major_C
+  memref.dealloc %A : !row_major_A
+  memref.dealloc %B : !row_major_B
+  memref.dealloc %C : !row_major_C
 
-  dealloc %cA : !column_major_A
-  dealloc %cB : !column_major_B
-  dealloc %cC : !column_major_C
+  memref.dealloc %cA : !column_major_A
+  memref.dealloc %cB : !column_major_B
+  memref.dealloc %cC : !column_major_C
 
   return
 }
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir
@@ -55,9 +55,9 @@
   %v0 = constant 0 : !elem_type_c
   %v1 = constant 1 : !elem_type_a
 
-  %A = alloc() : !row_major_A
-  %B = alloc() : !row_major_B
-  %C = alloc() : !row_major_C
+  %A = memref.alloc() : !row_major_A
+  %B = memref.alloc() : !row_major_B
+  %C = memref.alloc() : !row_major_C
 
   linalg.fill(%A, %v1) : !row_major_A, !elem_type_a
   linalg.fill(%B, %v1) : !row_major_B, !elem_type_b
@@ -87,19 +87,19 @@
   call @print_perf(%iters, %tmatmul) : (index, f64) -> ()
 
   // CHECK: {{^0$}}
-  %C_ref = alloc() : !row_major_C
+  %C_ref = memref.alloc() : !row_major_C
   linalg.fill(%C_ref, %v0) : !row_major_C, !elem_type_c
   linalg.matmul_i8_i8_i32 ins(%A, %B : !row_major_A, !row_major_B)
     outs(%C_ref: !row_major_C)
-  %res = memref_cast %C : !row_major_C to memref<*xi32>
-  %exp = memref_cast %C_ref : !row_major_C to memref<*xi32>
+  %res = memref.cast %C : !row_major_C to memref<*xi32>
+  %exp = memref.cast %C_ref : !row_major_C to memref<*xi32>
   %errors = call @verifyMemRefI32(%res, %exp) : (memref<*xi32>, memref<*xi32>) -> i64
   vector.print %errors : i64
-  dealloc %C_ref : !row_major_C
+  memref.dealloc %C_ref : !row_major_C
 
-  dealloc %A : !row_major_A
-  dealloc %B : !row_major_B
-  dealloc %C : !row_major_C
+  memref.dealloc %A : !row_major_A
+  memref.dealloc %B : !row_major_B
+  memref.dealloc %C : !row_major_C
 
   return
 }
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
@@ -9,9 +9,9 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %f0 = constant 0.0 : f32
-  %x = dim %A, %c0 : memref<?x?xf32>
-  %y = dim %B, %c1 : memref<?x?xf32>
-  %C = alloc(%x, %y) : memref<?x?xf32>
+  %x = memref.dim %A, %c0 : memref<?x?xf32>
+  %y = memref.dim %B, %c1 : memref<?x?xf32>
+  %C = memref.alloc(%x, %y) : memref<?x?xf32>
   linalg.fill(%C, %f0) : memref<?x?xf32>, f32
   linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
                 outs(%C: memref<?x?xf32>)
@@ -22,14 +22,14 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %f0 = constant 0.0 : f32
-  %m = dim %A, %c0 : memref<?x?xf32>
-  %x = dim %A, %c1 : memref<?x?xf32>
-  %n = dim %B, %c1 : memref<?x?xf32>
-  %C = alloc(%m, %n) : memref<?x?xf32>
+  %m = memref.dim %A, %c0 : memref<?x?xf32>
+  %x = memref.dim %A, %c1 : memref<?x?xf32>
+  %n = memref.dim %B, %c1 : memref<?x?xf32>
+  %C = memref.alloc(%m, %n) : memref<?x?xf32>
   linalg.fill(%C, %f0) : memref<?x?xf32>, f32
   scf.for %i = %c0 to %n step %c1 {
-    %b = subview %B[0, %i][%x, 1][1, 1] : memref<?x?xf32> to memref<?xf32, offset: ?, strides: [?]>
-    %c = subview %C[0, %i][%m, 1][1, 1] : memref<?x?xf32> to memref<?xf32, offset: ?, strides: [?]>
+    %b = memref.subview %B[0, %i][%x, 1][1, 1] : memref<?x?xf32> to memref<?xf32, offset: ?, strides: [?]>
+    %c = memref.subview %C[0, %i][%m, 1][1, 1] : memref<?x?xf32> to memref<?xf32, offset: ?, strides: [?]>
     linalg.matvec ins(%A, %b: memref<?x?xf32>, memref<?xf32, offset: ?, strides: [?]>)
                   outs(%c: memref<?xf32, offset: ?, strides: [?]>)
   }
@@ -44,22 +44,22 @@
   %n = constant 2 : index
   %val1 = constant 13.0 : f32
   %val2 = constant 17.0 : f32
-  %A = alloc(%m, %x) : memref<?x?xf32>
-  %B = alloc(%x, %n) : memref<?x?xf32>
+  %A = memref.alloc(%m, %x) : memref<?x?xf32>
+  %B = memref.alloc(%x, %n) : memref<?x?xf32>
   linalg.fill(%A, %val1) : memref<?x?xf32>, f32
   linalg.fill(%B, %val2) : memref<?x?xf32>, f32
-  store %val1, %B[%c0, %c0] : memref<?x?xf32>
+  memref.store %val1, %B[%c0, %c0] : memref<?x?xf32>
   %C1 = call @matmul(%A, %B) : (memref<?x?xf32>, memref<?x?xf32>) -> memref<?x?xf32>
   %C2 = call @matvec(%A, %B) : (memref<?x?xf32>, memref<?x?xf32>) -> memref<?x?xf32>
   scf.for %i = %c0 to %m step %c1 {
     scf.for %j = %c0 to %n step %c1 {
-      %e1 = load %C1[%i, %j] : memref<?x?xf32>
-      %e2 = load %C2[%i, %j] : memref<?x?xf32>
+      %e1 = memref.load %C1[%i, %j] : memref<?x?xf32>
+      %e2 = memref.load %C2[%i, %j] : memref<?x?xf32>
       %c = cmpf oeq, %e1, %e2 : f32
       assert %c, "Matmul does not produce same output as matvec"
     }
   }
-  %C2_ = memref_cast %C2 : memref<?x?xf32> to memref<*xf32>
+  %C2_ = memref.cast %C2 : memref<?x?xf32> to memref<*xf32>
   call @print_memref_f32(%C2_) : (memref<*xf32>) -> ()
   return
 }
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/rank-reducing-subview.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/rank-reducing-subview.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/rank-reducing-subview.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/rank-reducing-subview.mlir
@@ -13,18 +13,18 @@
   %f1 = constant 1.0 : f32
   %f2 = constant 2.0 : f32
   %f3 = constant 3.0 : f32
-  %A = alloc(%c2, %c2) : memref<?x?xf32>
-  store %f0, %A[%c0, %c0] : memref<?x?xf32>
-  store %f1, %A[%c0, %c1] : memref<?x?xf32>
-  store %f2, %A[%c1, %c0] : memref<?x?xf32>
-  store %f3, %A[%c1, %c1] : memref<?x?xf32>
-  %B = subview %A[%c1, 0][1, %c2][1, 1] : memref<?x?xf32> to memref<?xf32, offset: ?, strides: [1]>
-  %C = subview %A[0, %c1][%c2, 1][1, 1] : memref<?x?xf32> to memref<?xf32, offset: ?, strides: [?]>
-  %A_ = memref_cast %A : memref<?x?xf32> to memref<*xf32>
+  %A = memref.alloc(%c2, %c2) : memref<?x?xf32>
+  memref.store %f0, %A[%c0, %c0] : memref<?x?xf32>
+  memref.store %f1, %A[%c0, %c1] : memref<?x?xf32>
+  memref.store %f2, %A[%c1, %c0] : memref<?x?xf32>
+  memref.store %f3, %A[%c1, %c1] : memref<?x?xf32>
+  %B = memref.subview %A[%c1, 0][1, %c2][1, 1] : memref<?x?xf32> to memref<?xf32, offset: ?, strides: [1]>
+  %C = memref.subview %A[0, %c1][%c2, 1][1, 1] : memref<?x?xf32> to memref<?xf32, offset: ?, strides: [?]>
+  %A_ = memref.cast %A : memref<?x?xf32> to memref<*xf32>
   call @print_memref_f32(%A_) : (memref<*xf32>) -> ()
-  %B_ = memref_cast %B : memref<?xf32, offset: ?, strides: [1]> to memref<*xf32>
+  %B_ = memref.cast %B : memref<?xf32, offset: ?, strides: [1]> to memref<*xf32>
   call @print_memref_f32(%B_) : (memref<*xf32>) -> ()
-  %C_ = memref_cast %C : memref<?xf32, offset: ?, strides: [?]> to memref<*xf32>
+  %C_ = memref.cast %C : memref<?xf32, offset: ?, strides: [?]> to memref<*xf32>
   call @print_memref_f32(%C_) : (memref<*xf32>) -> ()
   return
 }
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
@@ -24,7 +24,7 @@
 
 // Creates and returns a 1-D buffer of size %s1 filled with the value %f
 func @alloc_1d_filled_f32(%s1 : index, %f : f32) -> memref<?xf32> {
-  %buf = alloc(%s1) : memref<?xf32>
+  %buf = memref.alloc(%s1) : memref<?xf32>
   linalg.fill(%buf, %f) : memref<?xf32>, f32
   return %buf : memref<?xf32>
 }
@@ -47,14 +47,14 @@
   %in1D = call @alloc_1d_filled_f32(%c8, %val) : (index, f32) -> (memref<?xf32>)
   %out1D = call @alloc_1d_filled_f32(%c6, %zero) : (index, f32) -> (memref<?xf32>)
 
-  store %f10, %in1D[%c3] : memref<?xf32>
+  memref.store %f10, %in1D[%c3] : memref<?xf32>
   call @conv_1d(%in1D, %filter1D, %out1D) : (memref<?xf32>, memref<?xf32>, memref<?xf32>) -> ()
-  %out1D_ = memref_cast %out1D : memref<?xf32> to memref<*xf32>
+  %out1D_ = memref.cast %out1D : memref<?xf32> to memref<*xf32>
   call @print_memref_f32(%out1D_): (memref<*xf32>) -> ()
 
-  dealloc %filter1D : memref<?xf32>
-  dealloc %in1D : memref<?xf32>
-  dealloc %out1D : memref<?xf32>
+  memref.dealloc %filter1D : memref<?xf32>
+  memref.dealloc %in1D : memref<?xf32>
+  memref.dealloc %out1D : memref<?xf32>
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir
@@ -24,7 +24,7 @@
 
 // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
 func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref<?x?x?xf32> {
-  %buf = alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
+  %buf = memref.alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
   linalg.fill(%buf, %f) : memref<?x?x?xf32>, f32
   return %buf : memref<?x?x?xf32>
 }
@@ -51,14 +51,14 @@
   %in1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c8, %val) : (index, index, index, f32) -> (memref<?x?x?xf32>)
   %out1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c6, %zero) : (index, index, index, f32) -> (memref<?x?x?xf32>)
 
-  store %f10, %in1D_ncw[%c0, %c0, %c3] : memref<?x?x?xf32>
+  memref.store %f10, %in1D_ncw[%c0, %c0, %c3] : memref<?x?x?xf32>
   call @conv_1d_input_ncw_filter_wcf(%in1D_ncw, %filter1D_ncw, %out1D_ncw) : (memref<?x?x?xf32>, memref<?x?x?xf32>, memref<?x?x?xf32>) -> ()
-  %out1D_ncw_ = memref_cast %out1D_ncw : memref<?x?x?xf32> to memref<*xf32>
+  %out1D_ncw_ = memref.cast %out1D_ncw : memref<?x?x?xf32> to memref<*xf32>
   call @print_memref_f32(%out1D_ncw_): (memref<*xf32>) -> ()
 
-  dealloc %filter1D_ncw : memref<?x?x?xf32>
-  dealloc %in1D_ncw : memref<?x?x?xf32>
-  dealloc %out1D_ncw : memref<?x?x?xf32>
+  memref.dealloc %filter1D_ncw : memref<?x?x?xf32>
+  memref.dealloc %in1D_ncw : memref<?x?x?xf32>
+  memref.dealloc %out1D_ncw : memref<?x?x?xf32>
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir
@@ -24,7 +24,7 @@
 
 // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
 func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref<?x?x?xf32> {
-  %buf = alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
+  %buf = memref.alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
   linalg.fill(%buf, %f) : memref<?x?x?xf32>, f32
   return %buf : memref<?x?x?xf32>
 }
@@ -51,14 +51,14 @@
   %in1D_nwc = call @alloc_3d_filled_f32(%c3, %c8, %c1, %val) : (index, index, index, f32) -> (memref<?x?x?xf32>)
   %out1D_nwc = call @alloc_3d_filled_f32(%c3, %c6, %c1, %zero) : (index, index, index, f32) -> (memref<?x?x?xf32>)
 
-  store %f10, %in1D_nwc[%c0, %c3, %c0] : memref<?x?x?xf32>
+  memref.store %f10, %in1D_nwc[%c0, %c3, %c0] : memref<?x?x?xf32>
   call @conv_1d_input_nwc_filter_wcf(%in1D_nwc, %filter1D_nwc, %out1D_nwc) : (memref<?x?x?xf32>, memref<?x?x?xf32>, memref<?x?x?xf32>) -> ()
-  %out1D_nwc_ = memref_cast %out1D_nwc : memref<?x?x?xf32> to memref<*xf32>
+  %out1D_nwc_ = memref.cast %out1D_nwc : memref<?x?x?xf32> to memref<*xf32>
   call @print_memref_f32(%out1D_nwc_): (memref<*xf32>) -> ()
 
-  dealloc %filter1D_nwc : memref<?x?x?xf32>
-  dealloc %in1D_nwc : memref<?x?x?xf32>
-  dealloc %out1D_nwc : memref<?x?x?xf32>
+  memref.dealloc %filter1D_nwc : memref<?x?x?xf32>
+  memref.dealloc %in1D_nwc : memref<?x?x?xf32>
+  memref.dealloc %out1D_nwc : memref<?x?x?xf32>
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir
@@ -24,7 +24,7 @@
 
 // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
 func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref<?x?x?xf32> {
-  %buf = alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
+  %buf = memref.alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
   linalg.fill(%buf, %f) : memref<?x?x?xf32>, f32
   return %buf : memref<?x?x?xf32>
 }
@@ -49,14 +49,14 @@
   %in1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c8, %val) : (index, index, index, f32) -> (memref<?x?x?xf32>)
   %out1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c6, %zero) : (index, index, index, f32) -> (memref<?x?x?xf32>)
 
-  store %f10, %in1D_ncw[%c0, %c0, %c3] : memref<?x?x?xf32>
+  memref.store %f10, %in1D_ncw[%c0, %c0, %c3] : memref<?x?x?xf32>
   call @conv_1d_ncw(%in1D_ncw, %filter1D_ncw, %out1D_ncw) : (memref<?x?x?xf32>, memref<?x?x?xf32>, memref<?x?x?xf32>) -> ()
-  %out1D_ncw_ = memref_cast %out1D_ncw : memref<?x?x?xf32> to memref<*xf32>
+  %out1D_ncw_ = memref.cast %out1D_ncw : memref<?x?x?xf32> to memref<*xf32>
   call @print_memref_f32(%out1D_ncw_): (memref<*xf32>) -> ()
 
-  dealloc %filter1D_ncw : memref<?x?x?xf32>
-  dealloc %in1D_ncw : memref<?x?x?xf32>
-  dealloc %out1D_ncw : memref<?x?x?xf32>
+  memref.dealloc %filter1D_ncw : memref<?x?x?xf32>
+  memref.dealloc %in1D_ncw : memref<?x?x?xf32>
+  memref.dealloc %out1D_ncw : memref<?x?x?xf32>
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir
@@ -24,7 +24,7 @@
 
 // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
 func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref<?x?x?xf32> {
-  %buf = alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
+  %buf = memref.alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
   linalg.fill(%buf, %f) : memref<?x?x?xf32>, f32
   return %buf : memref<?x?x?xf32>
 }
@@ -49,14 +49,14 @@
   %in1D_nwc = call @alloc_3d_filled_f32(%c3, %c8, %c1, %val) : (index, index, index, f32) -> (memref<?x?x?xf32>)
   %out1D_nwc = call @alloc_3d_filled_f32(%c3, %c6, %c1, %zero) : (index, index, index, f32) -> (memref<?x?x?xf32>)
 
-  store %f10, %in1D_nwc[%c0, %c3, %c0] : memref<?x?x?xf32>
+  memref.store %f10, %in1D_nwc[%c0, %c3, %c0] : memref<?x?x?xf32>
   call @conv_1d_nwc(%in1D_nwc, %filter1D_nwc, %out1D_nwc) : (memref<?x?x?xf32>, memref<?x?x?xf32>, memref<?x?x?xf32>) -> ()
-  %out1D_nwc_ = memref_cast %out1D_nwc : memref<?x?x?xf32> to memref<*xf32>
+  %out1D_nwc_ = memref.cast %out1D_nwc : memref<?x?x?xf32> to memref<*xf32>
   call @print_memref_f32(%out1D_nwc_): (memref<*xf32>) -> ()
 
-  dealloc %filter1D_nwc : memref<?x?x?xf32>
-  dealloc %in1D_nwc : memref<?x?x?xf32>
-  dealloc %out1D_nwc : memref<?x?x?xf32>
+  memref.dealloc %filter1D_nwc : memref<?x?x?xf32>
+  memref.dealloc %in1D_nwc : memref<?x?x?xf32>
+  memref.dealloc %out1D_nwc : memref<?x?x?xf32>
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
@@ -24,7 +24,7 @@
 
 // Creates and returns a 2-D buffer of size (%s1, %s2) filled with the value %f
 func @alloc_2d_filled_f32(%s1 : index, %s2 : index, %f : f32) -> memref<?x?xf32> {
-  %buf = alloc(%s1, %s2) : memref<?x?xf32>
+  %buf = memref.alloc(%s1, %s2) : memref<?x?xf32>
   linalg.fill(%buf, %f) : memref<?x?xf32>, f32
   return %buf : memref<?x?xf32>
 }
@@ -49,14 +49,14 @@
   %in2D = call @alloc_2d_filled_f32(%c8, %c8, %val) : (index, index, f32) -> (memref<?x?xf32>)
   %out2D = call @alloc_2d_filled_f32(%c6, %c6, %zero) : (index, index, f32) -> (memref<?x?xf32>)
 
-  store %f10, %in2D[%c0, %c3] : memref<?x?xf32>
+  memref.store %f10, %in2D[%c0, %c3] : memref<?x?xf32>
   call @conv_2d(%in2D, %filter2D, %out2D) : (memref<?x?xf32>, memref<?x?xf32>, memref<?x?xf32>) -> ()
-  %out2D_ = memref_cast %out2D : memref<?x?xf32> to memref<*xf32>
+  %out2D_ = memref.cast %out2D : memref<?x?xf32> to memref<*xf32>
   call @print_memref_f32(%out2D_): (memref<*xf32>) -> ()
 
-  dealloc %filter2D : memref<?x?xf32>
-  dealloc %in2D : memref<?x?xf32>
-  dealloc %out2D : memref<?x?xf32>
+  memref.dealloc %filter2D : memref<?x?xf32>
+  memref.dealloc %in2D : memref<?x?xf32>
+  memref.dealloc %out2D : memref<?x?xf32>
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir
@@ -24,7 +24,7 @@
 
 // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f
 func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref<?x?x?x?xf32> {
-  %buf = alloc(%s1, %s2, %s3, %s4) : memref<?x?x?x?xf32>
+  %buf = memref.alloc(%s1, %s2, %s3, %s4) : memref<?x?x?x?xf32>
   linalg.fill(%buf, %f) : memref<?x?x?x?xf32>, f32
   return %buf : memref<?x?x?x?xf32>
 }
@@ -51,14 +51,14 @@
   %in2D_nchw = call @alloc_4d_filled_f32(%c3, %c1, %c8, %c8, %val) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
   %out2D_nchw = call @alloc_4d_filled_f32(%c3, %c1, %c6, %c6, %zero) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
 
-  store %f10, %in2D_nchw[%c0, %c0, %c0, %c3] : memref<?x?x?x?xf32>
+  memref.store %f10, %in2D_nchw[%c0, %c0, %c0, %c3] : memref<?x?x?x?xf32>
   call @conv_2d_input_nchw_filter_hwcf(%in2D_nchw, %filter2D_nchw, %out2D_nchw) : (memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32>) -> ()
-  %out2D_nchw_ = memref_cast %out2D_nchw : memref<?x?x?x?xf32> to memref<*xf32>
+  %out2D_nchw_ = memref.cast %out2D_nchw : memref<?x?x?x?xf32> to memref<*xf32>
   call @print_memref_f32(%out2D_nchw_): (memref<*xf32>) -> ()
 
-  dealloc %filter2D_nchw : memref<?x?x?x?xf32>
-  dealloc %in2D_nchw : memref<?x?x?x?xf32>
-  dealloc %out2D_nchw : memref<?x?x?x?xf32>
+  memref.dealloc %filter2D_nchw : memref<?x?x?x?xf32>
+  memref.dealloc %in2D_nchw : memref<?x?x?x?xf32>
+  memref.dealloc %out2D_nchw : memref<?x?x?x?xf32>
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir
@@ -24,7 +24,7 @@
 
 // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f
 func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref<?x?x?x?xf32> {
-  %buf = alloc(%s1, %s2, %s3, %s4) : memref<?x?x?x?xf32>
+  %buf = memref.alloc(%s1, %s2, %s3, %s4) : memref<?x?x?x?xf32>
   linalg.fill(%buf, %f) : memref<?x?x?x?xf32>, f32
   return %buf : memref<?x?x?x?xf32>
 }
@@ -51,14 +51,14 @@
   %in2D_nhwc = call @alloc_4d_filled_f32(%c3, %c8, %c8, %c3, %val) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
   %out2D_nhwc = call @alloc_4d_filled_f32(%c3, %c6, %c6, %c1, %zero) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
 
-  store %f10, %in2D_nhwc[%c0, %c0, %c3, %c0] : memref<?x?x?x?xf32>
+  memref.store %f10, %in2D_nhwc[%c0, %c0, %c3, %c0] : memref<?x?x?x?xf32>
   call @conv_2d_input_nhwc_filter_hwcf(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32>) -> ()
-  %out2D_nhwc_ = memref_cast %out2D_nhwc : memref<?x?x?x?xf32> to memref<*xf32>
+  %out2D_nhwc_ = memref.cast %out2D_nhwc : memref<?x?x?x?xf32> to memref<*xf32>
   call @print_memref_f32(%out2D_nhwc_): (memref<*xf32>) -> ()
 
-  dealloc %filter2D_nhwc : memref<?x?x?x?xf32>
-  dealloc %in2D_nhwc : memref<?x?x?x?xf32>
-  dealloc %out2D_nhwc : memref<?x?x?x?xf32>
+  memref.dealloc %filter2D_nhwc : memref<?x?x?x?xf32>
+  memref.dealloc %in2D_nhwc : memref<?x?x?x?xf32>
+  memref.dealloc %out2D_nhwc : memref<?x?x?x?xf32>
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir
@@ -24,7 +24,7 @@
 
 // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f
 func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref<?x?x?x?xf32> {
-  %buf = alloc(%s1, %s2, %s3, %s4) : memref<?x?x?x?xf32>
+  %buf = memref.alloc(%s1, %s2, %s3, %s4) : memref<?x?x?x?xf32>
   linalg.fill(%buf, %f) : memref<?x?x?x?xf32>, f32
   return %buf : memref<?x?x?x?xf32>
 }
@@ -49,14 +49,14 @@
   %in2D_nchw = call @alloc_4d_filled_f32(%c3, %c1, %c8, %c8, %val) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
   %out2D_nchw = call @alloc_4d_filled_f32(%c3, %c1, %c6, %c6, %zero) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
 
-  store %f10, %in2D_nchw[%c0, %c0, %c0, %c3] : memref<?x?x?x?xf32>
+  memref.store %f10, %in2D_nchw[%c0, %c0, %c0, %c3] : memref<?x?x?x?xf32>
   call @conv_2d_nchw(%in2D_nchw, %filter2D_nchw, %out2D_nchw) : (memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32>) -> ()
-  %out2D_nchw_ = memref_cast %out2D_nchw : memref<?x?x?x?xf32> to memref<*xf32>
+  %out2D_nchw_ = memref.cast %out2D_nchw : memref<?x?x?x?xf32> to memref<*xf32>
   call @print_memref_f32(%out2D_nchw_): (memref<*xf32>) -> ()
 
-  dealloc %filter2D_nchw : memref<?x?x?x?xf32>
-  dealloc %in2D_nchw : memref<?x?x?x?xf32>
-  dealloc %out2D_nchw : memref<?x?x?x?xf32>
+  memref.dealloc %filter2D_nchw : memref<?x?x?x?xf32>
+  memref.dealloc %in2D_nchw : memref<?x?x?x?xf32>
+  memref.dealloc %out2D_nchw : memref<?x?x?x?xf32>
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir
@@ -24,7 +24,7 @@
 
 // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f
 func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref<?x?x?x?xf32> {
-  %buf = alloc(%s1, %s2, %s3, %s4) : memref<?x?x?x?xf32>
+  %buf = memref.alloc(%s1, %s2, %s3, %s4) : memref<?x?x?x?xf32>
   linalg.fill(%buf, %f) : memref<?x?x?x?xf32>, f32
   return %buf : memref<?x?x?x?xf32>
 }
@@ -49,14 +49,14 @@
   %in2D_nhwc = call @alloc_4d_filled_f32(%c3, %c8, %c8, %c3, %val) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
   %out2D_nhwc = call @alloc_4d_filled_f32(%c3, %c6, %c6, %c1, %zero) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
 
-  store %f10, %in2D_nhwc[%c0, %c0, %c3, %c0] : memref<?x?x?x?xf32>
+  memref.store %f10, %in2D_nhwc[%c0, %c0, %c3, %c0] : memref<?x?x?x?xf32>
   call @conv_2d_nhwc(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32>) -> ()
-  %out2D_nhwc_ = memref_cast %out2D_nhwc : memref<?x?x?x?xf32> to memref<*xf32>
+  %out2D_nhwc_ = memref.cast %out2D_nhwc : memref<?x?x?x?xf32> to memref<*xf32>
   call @print_memref_f32(%out2D_nhwc_): (memref<*xf32>) -> ()
 
-  dealloc %filter2D_nhwc : memref<?x?x?x?xf32>
-  dealloc %in2D_nhwc : memref<?x?x?x?xf32>
-  dealloc %out2D_nhwc : memref<?x?x?x?xf32>
+  memref.dealloc %filter2D_nhwc : memref<?x?x?x?xf32>
+  memref.dealloc %in2D_nhwc : memref<?x?x?x?xf32>
+  memref.dealloc %out2D_nhwc : memref<?x?x?x?xf32>
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
@@ -24,7 +24,7 @@
 
 // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
 func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref<?x?x?xf32> {
-  %buf = alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
+  %buf = memref.alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
   linalg.fill(%buf, %f) : memref<?x?x?xf32>, f32
   return %buf : memref<?x?x?xf32>
 }
@@ -49,14 +49,14 @@
   %in3D = call @alloc_3d_filled_f32(%c8, %c8, %c8, %val) : (index, index, index, f32) -> (memref<?x?x?xf32>)
   %out3D = call @alloc_3d_filled_f32(%c6, %c6, %c6, %zero) : (index, index, index, f32) -> (memref<?x?x?xf32>)
 
-  store %f10, %in3D[%c0, %c0, %c3] : memref<?x?x?xf32>
+  memref.store %f10, %in3D[%c0, %c0, %c3] : memref<?x?x?xf32>
   call @conv_3d(%in3D, %filter3D, %out3D) : (memref<?x?x?xf32>, memref<?x?x?xf32>, memref<?x?x?xf32>) -> ()
-  %out3D_ = memref_cast %out3D : memref<?x?x?xf32> to memref<*xf32>
+  %out3D_ = memref.cast %out3D : memref<?x?x?xf32> to memref<*xf32>
   call @print_memref_f32(%out3D_): (memref<*xf32>) -> ()
 
-  dealloc %filter3D : memref<?x?x?xf32>
-  dealloc %in3D : memref<?x?x?xf32>
-  dealloc %out3D : memref<?x?x?xf32>
+  memref.dealloc %filter3D : memref<?x?x?xf32>
+  memref.dealloc %in3D : memref<?x?x?xf32>
+  memref.dealloc %out3D : memref<?x?x?xf32>
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir
@@ -24,7 +24,7 @@
 
 // Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f
 func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref<?x?x?x?x?xf32> {
-  %buf = alloc(%s1, %s2, %s3, %s4, %s5) : memref<?x?x?x?x?xf32>
+  %buf = memref.alloc(%s1, %s2, %s3, %s4, %s5) : memref<?x?x?x?x?xf32>
   linalg.fill(%buf, %f) : memref<?x?x?x?x?xf32>, f32
   return %buf : memref<?x?x?x?x?xf32>
 }
@@ -51,14 +51,14 @@
   %in3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c8, %c8, %c8, %val) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
   %out3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c6, %c6, %c6, %zero) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
 
-  store %f10, %in3D_ncdhw[%c0, %c0, %c0, %c0, %c3] : memref<?x?x?x?x?xf32>
+  memref.store %f10, %in3D_ncdhw[%c0, %c0, %c0, %c0, %c3] : memref<?x?x?x?x?xf32>
   call @conv_3d_input_ncdhw_filter_dhwcf(%in3D_ncdhw, %filter3D_ncdhw, %out3D_ncdhw) : (memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>) -> ()
-  %out3D_ncdhw_ = memref_cast %out3D_ncdhw : memref<?x?x?x?x?xf32> to memref<*xf32>
+  %out3D_ncdhw_ = memref.cast %out3D_ncdhw : memref<?x?x?x?x?xf32> to memref<*xf32>
   call @print_memref_f32(%out3D_ncdhw_): (memref<*xf32>) -> ()
 
-  dealloc %filter3D_ncdhw : memref<?x?x?x?x?xf32>
-  dealloc %in3D_ncdhw : memref<?x?x?x?x?xf32>
-  dealloc %out3D_ncdhw : memref<?x?x?x?x?xf32>
+  memref.dealloc %filter3D_ncdhw : memref<?x?x?x?x?xf32>
+  memref.dealloc %in3D_ncdhw : memref<?x?x?x?x?xf32>
+  memref.dealloc %out3D_ncdhw : memref<?x?x?x?x?xf32>
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir
@@ -24,7 +24,7 @@
 
 // Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f
 func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref<?x?x?x?x?xf32> {
-  %buf = alloc(%s1, %s2, %s3, %s4, %s5) : memref<?x?x?x?x?xf32>
+  %buf = memref.alloc(%s1, %s2, %s3, %s4, %s5) : memref<?x?x?x?x?xf32>
   linalg.fill(%buf, %f) : memref<?x?x?x?x?xf32>, f32
   return %buf : memref<?x?x?x?x?xf32>
 }
@@ -52,14 +52,14 @@
   %in3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c8, %c8, %c8, %c1, %val) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
   %out3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c6, %c6, %c6, %c1, %zero) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
 
-  store %f10, %in3D_ndhwc[%c0, %c0, %c0, %c3, %c0] : memref<?x?x?x?x?xf32>
+  memref.store %f10, %in3D_ndhwc[%c0, %c0, %c0, %c3, %c0] : memref<?x?x?x?x?xf32>
   call @conv_3d_input_ndhwc_filter_dhwcf(%in3D_ndhwc, %filter3D_ndhwc, %out3D_ndhwc) : (memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>) -> ()
-  %out3D_ndhwc_ = memref_cast %out3D_ndhwc : memref<?x?x?x?x?xf32> to memref<*xf32>
+  %out3D_ndhwc_ = memref.cast %out3D_ndhwc : memref<?x?x?x?x?xf32> to memref<*xf32>
   call @print_memref_f32(%out3D_ndhwc_): (memref<*xf32>) -> ()
 
-  dealloc %filter3D_ndhwc : memref<?x?x?x?x?xf32>
-  dealloc %in3D_ndhwc : memref<?x?x?x?x?xf32>
-  dealloc %out3D_ndhwc : memref<?x?x?x?x?xf32>
+  memref.dealloc %filter3D_ndhwc : memref<?x?x?x?x?xf32>
+  memref.dealloc %in3D_ndhwc : memref<?x?x?x?x?xf32>
+  memref.dealloc %out3D_ndhwc : memref<?x?x?x?x?xf32>
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir
@@ -24,7 +24,7 @@
 
 // Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f
 func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref<?x?x?x?x?xf32> {
-  %buf = alloc(%s1, %s2, %s3, %s4, %s5) : memref<?x?x?x?x?xf32>
+  %buf = memref.alloc(%s1, %s2, %s3, %s4, %s5) : memref<?x?x?x?x?xf32>
   linalg.fill(%buf, %f) : memref<?x?x?x?x?xf32>, f32
   return %buf : memref<?x?x?x?x?xf32>
 }
@@ -49,14 +49,14 @@
   %in3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c8, %c8, %c8, %val) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
   %out3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c6, %c6, %c6, %zero) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
 
-  store %f10, %in3D_ncdhw[%c0, %c0, %c0, %c0, %c3] : memref<?x?x?x?x?xf32>
+  memref.store %f10, %in3D_ncdhw[%c0, %c0, %c0, %c0, %c3] : memref<?x?x?x?x?xf32>
   call @conv_3d_ncdhw(%in3D_ncdhw, %filter3D_ncdhw, %out3D_ncdhw) : (memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>) -> ()
-  %out3D_ncdhw_ = memref_cast %out3D_ncdhw : memref<?x?x?x?x?xf32> to memref<*xf32>
+  %out3D_ncdhw_ = memref.cast %out3D_ncdhw : memref<?x?x?x?x?xf32> to memref<*xf32>
   call @print_memref_f32(%out3D_ncdhw_): (memref<*xf32>) -> ()
 
-  dealloc %filter3D_ncdhw : memref<?x?x?x?x?xf32>
-  dealloc %in3D_ncdhw : memref<?x?x?x?x?xf32>
-  dealloc %out3D_ncdhw : memref<?x?x?x?x?xf32>
+  memref.dealloc %filter3D_ncdhw : memref<?x?x?x?x?xf32>
+  memref.dealloc %in3D_ncdhw : memref<?x?x?x?x?xf32>
+  memref.dealloc %out3D_ncdhw : memref<?x?x?x?x?xf32>
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir
@@ -24,7 +24,7 @@
 
 // Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f
 func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref<?x?x?x?x?xf32> {
-  %buf = alloc(%s1, %s2, %s3, %s4, %s5) : memref<?x?x?x?x?xf32>
+  %buf = memref.alloc(%s1, %s2, %s3, %s4, %s5) : memref<?x?x?x?x?xf32>
   linalg.fill(%buf, %f) : memref<?x?x?x?x?xf32>, f32
   return %buf : memref<?x?x?x?x?xf32>
 }
@@ -50,14 +50,14 @@
   %in3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c8, %c8, %c8, %c1, %val) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
   %out3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c6, %c6, %c6, %c1, %zero) : (index, index, index, index, index, f32) -> (memref<?x?x?x?x?xf32>)
 
-  store %f10, %in3D_ndhwc[%c0, %c0, %c0, %c3, %c0] : memref<?x?x?x?x?xf32>
+  memref.store %f10, %in3D_ndhwc[%c0, %c0, %c0, %c3, %c0] : memref<?x?x?x?x?xf32>
   call @conv_3d_ndhwc(%in3D_ndhwc, %filter3D_ndhwc, %out3D_ndhwc) : (memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>) -> ()
-  %out3D_ndhwc_ = memref_cast %out3D_ndhwc : memref<?x?x?x?x?xf32> to memref<*xf32>
+  %out3D_ndhwc_ = memref.cast %out3D_ndhwc : memref<?x?x?x?x?xf32> to memref<*xf32>
   call @print_memref_f32(%out3D_ndhwc_): (memref<*xf32>) -> ()
 
-  dealloc %filter3D_ndhwc : memref<?x?x?x?x?xf32>
-  dealloc %in3D_ndhwc : memref<?x?x?x?x?xf32>
-  dealloc %out3D_ndhwc : memref<?x?x?x?x?xf32>
+  memref.dealloc %filter3D_ndhwc : memref<?x?x?x?x?xf32>
+  memref.dealloc %in3D_ndhwc : memref<?x?x?x?x?xf32>
+  memref.dealloc %out3D_ndhwc : memref<?x?x?x?x?xf32>
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir b/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
--- a/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
+++ b/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
@@ -17,13 +17,13 @@
   %c20 = constant 20: i32
   %c10 = constant 10: i32
   %cmin10 = constant -10: i32
-  %A = alloc() : memref<40xi32>
+  %A = memref.alloc() : memref<40xi32>
 
   // print numerator
   affine.for %i = 0 to 40  {
     %ii = index_cast %i: index to i32
     %ii30 = subi %ii, %c20 : i32
-    store %ii30, %A[%i] : memref<40xi32>
+    memref.store %ii30, %A[%i] : memref<40xi32>
   }
   call @transfer_read_2d(%A, %c0) : (memref<40xi32>, index) -> ()
 
@@ -32,7 +32,7 @@
     %ii = index_cast %i: index to i32
     %ii30 = subi %ii, %c20 : i32
     %val = ceildivi_signed %ii30, %c10 : i32
-    store %val, %A[%i] : memref<40xi32>
+    memref.store %val, %A[%i] : memref<40xi32>
   }
   call @transfer_read_2d(%A, %c0) : (memref<40xi32>, index) -> ()
 
@@ -41,7 +41,7 @@
     %ii = index_cast %i: index to i32
     %ii30 = subi %ii, %c20 : i32
     %val = floordivi_signed %ii30, %c10 : i32
-    store %val, %A[%i] : memref<40xi32>
+    memref.store %val, %A[%i] : memref<40xi32>
   }
   call @transfer_read_2d(%A, %c0) : (memref<40xi32>, index) -> ()
 
@@ -51,7 +51,7 @@
     %ii = index_cast %i: index to i32
     %ii30 = subi %ii, %c20 : i32
     %val = ceildivi_signed %ii30, %cmin10 : i32
-    store %val, %A[%i] : memref<40xi32>
+    memref.store %val, %A[%i] : memref<40xi32>
   }
   call @transfer_read_2d(%A, %c0) : (memref<40xi32>, index) -> ()
 
@@ -60,7 +60,7 @@
     %ii = index_cast %i: index to i32
     %ii30 = subi %ii, %c20 : i32
     %val = floordivi_signed %ii30, %cmin10 : i32
-    store %val, %A[%i] : memref<40xi32>
+    memref.store %val, %A[%i] : memref<40xi32>
   }
   call @transfer_read_2d(%A, %c0) : (memref<40xi32>, index) -> ()
 
diff --git a/mlir/test/Integration/Dialect/Standard/CPU/test_subview.mlir b/mlir/test/Integration/Dialect/Standard/CPU/test_subview.mlir
--- a/mlir/test/Integration/Dialect/Standard/CPU/test_subview.mlir
+++ b/mlir/test/Integration/Dialect/Standard/CPU/test_subview.mlir
@@ -2,7 +2,7 @@
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | FileCheck %s
 
-global_memref "private" constant @__constant_5x3xf32 : memref<5x3xf32> =
+memref.global "private" constant @__constant_5x3xf32 : memref<5x3xf32> =
 dense<[[0.0, 1.0, 2.0],
        [3.0, 4.0, 5.0],
        [6.0, 7.0, 8.0],
@@ -10,11 +10,11 @@
        [12.0, 13.0, 14.0]]>
 
 func @main() {
-  %0 = get_global_memref @__constant_5x3xf32 : memref<5x3xf32>
+  %0 = memref.get_global @__constant_5x3xf32 : memref<5x3xf32>
 
   /// Subview with only leading operands.
-  %1 = subview %0[2][3][1]: memref<5x3xf32> to memref<3x3xf32, offset: 6, strides: [3, 1]>
-  %unranked = memref_cast %1 : memref<3x3xf32, offset: 6, strides: [3, 1]> to memref<*xf32>
+  %1 = memref.subview %0[2][3][1]: memref<5x3xf32> to memref<3x3xf32, offset: 6, strides: [3, 1]>
+  %unranked = memref.cast %1 : memref<3x3xf32, offset: 6, strides: [3, 1]> to memref<*xf32>
   call @print_memref_f32(%unranked) : (memref<*xf32>) -> ()
 
   //      CHECK: Unranked Memref base@ = {{0x[-9a-f]*}}
@@ -26,8 +26,8 @@
   // CHECK-SAME: ]
 
   /// Regular subview.
-  %2 = subview %0[0, 2][5, 1][1, 1]: memref<5x3xf32> to memref<5x1xf32, offset: 2, strides: [3, 1]>
-  %unranked2 = memref_cast %2 : memref<5x1xf32, offset: 2, strides: [3, 1]> to memref<*xf32>
+  %2 = memref.subview %0[0, 2][5, 1][1, 1]: memref<5x3xf32> to memref<5x1xf32, offset: 2, strides: [3, 1]>
+  %unranked2 = memref.cast %2 : memref<5x1xf32, offset: 2, strides: [3, 1]> to memref<*xf32>
   call @print_memref_f32(%unranked2) : (memref<*xf32>) -> ()
 
   //      CHECK: Unranked Memref base@ = {{0x[-9a-f]*}}
@@ -41,8 +41,8 @@
   // CHECK-SAME: ]
 
   /// Rank-reducing subview.
-  %3 = subview %0[0, 2][5, 1][1, 1]: memref<5x3xf32> to memref<5xf32, offset: 2, strides: [3]>
-  %unranked3 = memref_cast %3 : memref<5xf32, offset: 2, strides: [3]> to memref<*xf32>
+  %3 = memref.subview %0[0, 2][5, 1][1, 1]: memref<5x3xf32> to memref<5xf32, offset: 2, strides: [3]>
+  %unranked3 = memref.cast %3 : memref<5xf32, offset: 2, strides: [3]> to memref<*xf32>
   call @print_memref_f32(%unranked3) : (memref<*xf32>) -> ()
 
   //      CHECK: Unranked Memref base@ = {{0x[-9a-f]*}}
@@ -50,8 +50,8 @@
   // CHECK-NEXT: [2,  5,  8,  11,  14]
 
   /// Rank-reducing subview with only leading operands.
-  %4 = subview %0[1][1][1]: memref<5x3xf32> to memref<3xf32, offset: 3, strides: [1]>
-  %unranked4 = memref_cast %4 : memref<3xf32, offset: 3, strides: [1]> to memref<*xf32>
+  %4 = memref.subview %0[1][1][1]: memref<5x3xf32> to memref<3xf32, offset: 3, strides: [1]>
+  %unranked4 = memref.cast %4 : memref<3xf32, offset: 3, strides: [1]> to memref<*xf32>
   call @print_memref_f32(%unranked4) : (memref<*xf32>) -> ()
   //      CHECK: Unranked Memref base@ = {{0x[-9a-f]*}}
   // CHECK-SAME: rank = 1 offset = 3 sizes = [3] strides = [1] data =
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir
@@ -27,7 +27,7 @@
   %m = vector.broadcast %z : f32 to vector<16xf32>
   %mem = scf.for %i = %c0 to %c16 step %c1
     iter_args(%m_iter = %m) -> (vector<16xf32>) {
-    %c = load %A[%i] : memref<?xf32>
+    %c = memref.load %A[%i] : memref<?xf32>
     %i32 = index_cast %i : index to i32
     %m_new = vector.insertelement %c, %m_iter[%i32 : i32] : vector<16xf32>
     scf.yield %m_new : vector<16xf32>
@@ -41,12 +41,12 @@
   %c0 = constant 0: index
   %c1 = constant 1: index
   %c16 = constant 16: index
-  %A = alloc(%c16) : memref<?xf32>
+  %A = memref.alloc(%c16) : memref<?xf32>
   %z = constant 0.0: f32
   %v = vector.broadcast %z : f32 to vector<16xf32>
   %value = scf.for %i = %c0 to %c16 step %c1
     iter_args(%v_iter = %v) -> (vector<16xf32>) {
-    store %z, %A[%i] : memref<?xf32>
+    memref.store %z, %A[%i] : memref<?xf32>
     %i32 = index_cast %i : index to i32
     %fi = sitofp %i32 : i32 to f32
     %v_new = vector.insertelement %fi, %v_iter[%i32 : i32] : vector<16xf32>
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir
@@ -26,11 +26,11 @@
   %c0 = constant 0: index
   %c1 = constant 1: index
   %c16 = constant 16: index
-  %A = alloc(%c16) : memref<?xf32>
+  %A = memref.alloc(%c16) : memref<?xf32>
   scf.for %i = %c0 to %c16 step %c1 {
     %i32 = index_cast %i : index to i32
     %fi = sitofp %i32 : i32 to f32
-    store %fi, %A[%i] : memref<?xf32>
+    memref.store %fi, %A[%i] : memref<?xf32>
   }
 
   // Set up pass thru vector.
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir
@@ -16,11 +16,11 @@
   %c0 = constant 0: index
   %c1 = constant 1: index
   %c10 = constant 10: index
-  %A = alloc(%c10) : memref<?xf32>
+  %A = memref.alloc(%c10) : memref<?xf32>
   scf.for %i = %c0 to %c10 step %c1 {
     %i32 = index_cast %i : index to i32
     %fi = sitofp %i32 : i32 to f32
-    store %fi, %A[%i] : memref<?xf32>
+    memref.store %fi, %A[%i] : memref<?xf32>
   }
 
   // Set up idx vector.
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir
@@ -24,11 +24,11 @@
   %c0 = constant 0: index
   %c1 = constant 1: index
   %c16 = constant 16: index
-  %A = alloc(%c16) : memref<?xf32>
+  %A = memref.alloc(%c16) : memref<?xf32>
   scf.for %i = %c0 to %c16 step %c1 {
     %i32 = index_cast %i : index to i32
     %fi = sitofp %i32 : i32 to f32
-    store %fi, %A[%i] : memref<?xf32>
+    memref.store %fi, %A[%i] : memref<?xf32>
   }
 
   // Set up pass thru vector.
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir
@@ -27,7 +27,7 @@
   %m = vector.broadcast %z : f32 to vector<16xf32>
   %mem = scf.for %i = %c0 to %c16 step %c1
     iter_args(%m_iter = %m) -> (vector<16xf32>) {
-    %c = load %A[%i] : memref<?xf32>
+    %c = memref.load %A[%i] : memref<?xf32>
     %i32 = index_cast %i : index to i32
     %m_new = vector.insertelement %c, %m_iter[%i32 : i32] : vector<16xf32>
     scf.yield %m_new : vector<16xf32>
@@ -42,9 +42,9 @@
   %c0 = constant 0: index
   %c1 = constant 1: index
   %c16 = constant 16: index
-  %A = alloc(%c16) : memref<?xf32>
+  %A = memref.alloc(%c16) : memref<?xf32>
   scf.for %i = %c0 to %c16 step %c1 {
-    store %f0, %A[%i] : memref<?xf32>
+    memref.store %f0, %A[%i] : memref<?xf32>
   }
 
   // Set up value vector.
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir
@@ -20,7 +20,7 @@
   %m = vector.broadcast %z : f32 to vector<8xf32>
   %mem = scf.for %i = %c0 to %c8 step %c1
     iter_args(%m_iter = %m) -> (vector<8xf32>) {
-    %c = load %A[%i] : memref<?xf32>
+    %c = memref.load %A[%i] : memref<?xf32>
     %i32 = index_cast %i : index to i32
     %m_new = vector.insertelement %c, %m_iter[%i32 : i32] : vector<8xf32>
     scf.yield %m_new : vector<8xf32>
@@ -34,11 +34,11 @@
   %c0 = constant 0: index
   %c1 = constant 1: index
   %c8 = constant 8: index
-  %A = alloc(%c8) : memref<?xf32>
+  %A = memref.alloc(%c8) : memref<?xf32>
   scf.for %i = %c0 to %c8 step %c1 {
     %i32 = index_cast %i : index to i32
     %fi = sitofp %i32 : i32 to f32
-    store %fi, %A[%i] : memref<?xf32>
+    memref.store %fi, %A[%i] : memref<?xf32>
   }
 
   // Set up idx vector.
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir
@@ -62,12 +62,12 @@
   %mask = vector.constant_mask [4] : vector<4xi1>
   %pass = vector.broadcast %f0 : f32 to vector<4xf32>
   scf.for %i = %c0 to %cn step %c1 {
-    %aval = load %AVAL[%i] : memref<8xvector<4xf32>>
-    %aidx = load %AIDX[%i] : memref<8xvector<4xi32>>
+    %aval = memref.load %AVAL[%i] : memref<8xvector<4xf32>>
+    %aidx = memref.load %AIDX[%i] : memref<8xvector<4xi32>>
     %0 = vector.gather %X[%c0][%aidx], %mask, %pass
        : memref<?xf32>, vector<4xi32>, vector<4xi1>, vector<4xf32> into vector<4xf32>
     %1 = vector.contract #dot_trait %aval, %0, %f0 : vector<4xf32>, vector<4xf32> into f32
-    store %1, %B[%i] : memref<?xf32>
+    memref.store %1, %B[%i] : memref<?xf32>
   }
   return
 }
@@ -106,10 +106,10 @@
   // Allocate.
   //
 
-  %AVAL = alloc()    {alignment = 64} : memref<8xvector<4xf32>>
-  %AIDX = alloc()    {alignment = 64} : memref<8xvector<4xi32>>
-  %X    = alloc(%c8) {alignment = 64} : memref<?xf32>
-  %B    = alloc(%c8) {alignment = 64} : memref<?xf32>
+  %AVAL = memref.alloc()    {alignment = 64} : memref<8xvector<4xf32>>
+  %AIDX = memref.alloc()    {alignment = 64} : memref<8xvector<4xi32>>
+  %X    = memref.alloc(%c8) {alignment = 64} : memref<?xf32>
+  %B    = memref.alloc(%c8) {alignment = 64} : memref<?xf32>
 
   //
   // Initialize.
@@ -118,88 +118,88 @@
   %vf1 = vector.broadcast %f1 : f32 to vector<4xf32>
 
   %0 = vector.insert %f2, %vf1[1] : f32 into vector<4xf32>
-  store %0, %AVAL[%c0] : memref<8xvector<4xf32>>
+  memref.store %0, %AVAL[%c0] : memref<8xvector<4xf32>>
 
   %1 = vector.insert %f8, %vf1[1] : f32 into vector<4xf32>
   %2 = vector.insert %f3, %1[2]   : f32 into vector<4xf32>
-  store %2, %AVAL[%c1] : memref<8xvector<4xf32>>
+  memref.store %2, %AVAL[%c1] : memref<8xvector<4xf32>>
 
   %3 = vector.insert %f2, %vf1[1] : f32 into vector<4xf32>
   %4 = vector.insert %f6, %3[2]   : f32 into vector<4xf32>
   %5 = vector.insert %f2, %4[3]   : f32 into vector<4xf32>
-  store %5, %AVAL[%c2] : memref<8xvector<4xf32>>
+  memref.store %5, %AVAL[%c2] : memref<8xvector<4xf32>>
 
   %6 = vector.insert %f3, %vf1[0] : f32 into vector<4xf32>
-  store %6, %AVAL[%c3] : memref<8xvector<4xf32>>
+  memref.store %6, %AVAL[%c3] : memref<8xvector<4xf32>>
 
   %7 = vector.insert %f5, %vf1[0] : f32 into vector<4xf32>
-  store %7, %AVAL[%c4] : memref<8xvector<4xf32>>
+  memref.store %7, %AVAL[%c4] : memref<8xvector<4xf32>>
 
   %8 = vector.insert %f3, %vf1[0] : f32 into vector<4xf32>
   %9 = vector.insert %f2, %8[1]   : f32 into vector<4xf32>
   %10 = vector.insert %f2, %9[3]   : f32 into vector<4xf32>
-  store %10, %AVAL[%c5] : memref<8xvector<4xf32>>
+  memref.store %10, %AVAL[%c5] : memref<8xvector<4xf32>>
 
   %11 = vector.insert %f4, %vf1[0] : f32 into vector<4xf32>
   %12 = vector.insert %f7, %11[1]   : f32 into vector<4xf32>
-  store %12, %AVAL[%c6] : memref<8xvector<4xf32>>
+  memref.store %12, %AVAL[%c6] : memref<8xvector<4xf32>>
 
   %13 = vector.insert %f3, %vf1[0] : f32 into vector<4xf32>
   %14 = vector.insert %f2, %13[1]   : f32 into vector<4xf32>
-  store %14, %AVAL[%c7] : memref<8xvector<4xf32>>
+  memref.store %14, %AVAL[%c7] : memref<8xvector<4xf32>>
 
   %vi0 = vector.broadcast %i0 : i32 to vector<4xi32>
 
   %20 = vector.insert %i2, %vi0[1] : i32 into vector<4xi32>
   %21 = vector.insert %i5, %20[2] : i32 into vector<4xi32>
   %22 = vector.insert %i7, %21[3] : i32 into vector<4xi32>
-  store %22, %AIDX[%c0] : memref<8xvector<4xi32>>
+  memref.store %22, %AIDX[%c0] : memref<8xvector<4xi32>>
 
   %23 = vector.insert %i1, %vi0[1] : i32 into vector<4xi32>
   %24 = vector.insert %i4, %23[2] : i32 into vector<4xi32>
   %25 = vector.insert %i6, %24[3] : i32 into vector<4xi32>
-  store %25, %AIDX[%c1] : memref<8xvector<4xi32>>
+  memref.store %25, %AIDX[%c1] : memref<8xvector<4xi32>>
 
   %26 = vector.insert %i2, %vi0[0] : i32 into vector<4xi32>
   %27 = vector.insert %i5, %26[1] : i32 into vector<4xi32>
   %28 = vector.insert %i6, %27[2] : i32 into vector<4xi32>
   %29 = vector.insert %i7, %28[3] : i32 into vector<4xi32>
-  store %29, %AIDX[%c2] : memref<8xvector<4xi32>>
+  memref.store %29, %AIDX[%c2] : memref<8xvector<4xi32>>
 
   %30 = vector.insert %i1, %vi0[0] : i32 into vector<4xi32>
   %31 = vector.insert %i3, %30[1] : i32 into vector<4xi32>
   %32 = vector.insert %i5, %31[2] : i32 into vector<4xi32>
   %33 = vector.insert %i7, %32[3] : i32 into vector<4xi32>
-  store %33, %AIDX[%c3] : memref<8xvector<4xi32>>
+  memref.store %33, %AIDX[%c3] : memref<8xvector<4xi32>>
 
   %34 = vector.insert %i3, %vi0[1] : i32 into vector<4xi32>
   %35 = vector.insert %i4, %34[2] : i32 into vector<4xi32>
   %36 = vector.insert %i5, %35[3] : i32 into vector<4xi32>
-  store %36, %AIDX[%c4] : memref<8xvector<4xi32>>
+  memref.store %36, %AIDX[%c4] : memref<8xvector<4xi32>>
 
   %37 = vector.insert %i1, %vi0[0] : i32 into vector<4xi32>
   %38 = vector.insert %i4, %37[1] : i32 into vector<4xi32>
   %39 = vector.insert %i5, %38[2] : i32 into vector<4xi32>
   %40 = vector.insert %i6, %39[3] : i32 into vector<4xi32>
-  store %40, %AIDX[%c5] : memref<8xvector<4xi32>>
+  memref.store %40, %AIDX[%c5] : memref<8xvector<4xi32>>
 
   %41 = vector.insert %i2, %vi0[1] : i32 into vector<4xi32>
   %42 = vector.insert %i4, %41[2] : i32 into vector<4xi32>
   %43 = vector.insert %i6, %42[3] : i32 into vector<4xi32>
-  store %43, %AIDX[%c6] : memref<8xvector<4xi32>>
+  memref.store %43, %AIDX[%c6] : memref<8xvector<4xi32>>
 
   %44 = vector.insert %i1, %vi0[0] : i32 into vector<4xi32>
   %45 = vector.insert %i3, %44[1] : i32 into vector<4xi32>
   %46 = vector.insert %i6, %45[2] : i32 into vector<4xi32>
   %47 = vector.insert %i7, %46[3] : i32 into vector<4xi32>
-  store %47, %AIDX[%c7] : memref<8xvector<4xi32>>
+  memref.store %47, %AIDX[%c7] : memref<8xvector<4xi32>>
 
   scf.for %i = %c0 to %c8 step %c1 {
     %ix = addi %i, %c1 : index
     %kx = index_cast %ix : index to i32
     %fx = sitofp %kx : i32 to f32
-    store %fx, %X[%i] : memref<?xf32>
-    store %f0, %B[%i] : memref<?xf32>
+    memref.store %fx, %X[%i] : memref<?xf32>
+    memref.store %f0, %B[%i] : memref<?xf32>
   }
 
   //
@@ -215,17 +215,17 @@
   //
 
   scf.for %i = %c0 to %c8 step %c1 {
-    %aval = load %AVAL[%i] : memref<8xvector<4xf32>>
+    %aval = memref.load %AVAL[%i] : memref<8xvector<4xf32>>
     vector.print %aval : vector<4xf32>
   }
 
   scf.for %i = %c0 to %c8 step %c1 {
-    %aidx = load %AIDX[%i] : memref<8xvector<4xi32>>
+    %aidx = memref.load %AIDX[%i] : memref<8xvector<4xi32>>
     vector.print %aidx : vector<4xi32>
   }
 
   scf.for %i = %c0 to %c8 step %c1 {
-    %ldb = load %B[%i] : memref<?xf32>
+    %ldb = memref.load %B[%i] : memref<?xf32>
     vector.print %ldb : f32
   }
 
@@ -262,10 +262,10 @@
   // Free.
   //
 
-  dealloc %AVAL : memref<8xvector<4xf32>>
-  dealloc %AIDX : memref<8xvector<4xi32>>
-  dealloc %X    : memref<?xf32>
-  dealloc %B    : memref<?xf32>
+  memref.dealloc %AVAL : memref<8xvector<4xf32>>
+  memref.dealloc %AIDX : memref<8xvector<4xi32>>
+  memref.dealloc %X    : memref<?xf32>
+  memref.dealloc %B    : memref<?xf32>
 
   return
 }
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir
@@ -53,16 +53,16 @@
   %f0 = constant 0.0 : f32
   %mask = vector.constant_mask [8] : vector<8xi1>
   %pass = vector.broadcast %f0 : f32 to vector<8xf32>
-  %b = load %B[%c0] : memref<1xvector<8xf32>>
+  %b = memref.load %B[%c0] : memref<1xvector<8xf32>>
   %b_out = scf.for %k = %c0 to %cn step %c1 iter_args(%b_iter = %b) -> (vector<8xf32>) {
-    %aval = load %AVAL[%k] : memref<4xvector<8xf32>>
-    %aidx = load %AIDX[%k] : memref<4xvector<8xi32>>
+    %aval = memref.load %AVAL[%k] : memref<4xvector<8xf32>>
+    %aidx = memref.load %AIDX[%k] : memref<4xvector<8xi32>>
     %0 = vector.gather %X[%c0][%aidx], %mask, %pass
        : memref<?xf32>, vector<8xi32>, vector<8xi1>, vector<8xf32> into vector<8xf32>
     %b_new = vector.fma %aval, %0, %b_iter : vector<8xf32>
     scf.yield %b_new : vector<8xf32>
   }
-  store %b_out, %B[%c0] : memref<1xvector<8xf32>>
+  memref.store %b_out, %B[%c0] : memref<1xvector<8xf32>>
   return
 }
 
@@ -100,10 +100,10 @@
   // Allocate.
   //
 
-  %AVAL = alloc()    {alignment = 64} : memref<4xvector<8xf32>>
-  %AIDX = alloc()    {alignment = 64} : memref<4xvector<8xi32>>
-  %X    = alloc(%c8) {alignment = 64} : memref<?xf32>
-  %B    = alloc()    {alignment = 64} : memref<1xvector<8xf32>>
+  %AVAL = memref.alloc()    {alignment = 64} : memref<4xvector<8xf32>>
+  %AIDX = memref.alloc()    {alignment = 64} : memref<4xvector<8xi32>>
+  %X    = memref.alloc(%c8) {alignment = 64} : memref<?xf32>
+  %B    = memref.alloc()    {alignment = 64} : memref<1xvector<8xf32>>
 
   //
   // Initialize.
@@ -116,7 +116,7 @@
   %2 = vector.insert %f3, %1[5] : f32 into vector<8xf32>
   %3 = vector.insert %f4, %2[6] : f32 into vector<8xf32>
   %4 = vector.insert %f3, %3[7] : f32 into vector<8xf32>
-  store %4, %AVAL[%c0] : memref<4xvector<8xf32>>
+  memref.store %4, %AVAL[%c0] : memref<4xvector<8xf32>>
 
   %5 = vector.insert %f2, %vf1[0] : f32 into vector<8xf32>
   %6 = vector.insert %f8, %5[1] : f32 into vector<8xf32>
@@ -124,15 +124,15 @@
   %8 = vector.insert %f2, %7[5] : f32 into vector<8xf32>
   %9 = vector.insert %f7, %8[6] : f32 into vector<8xf32>
   %10 = vector.insert %f2, %9[7] : f32 into vector<8xf32>
-  store %10, %AVAL[%c1] : memref<4xvector<8xf32>>
+  memref.store %10, %AVAL[%c1] : memref<4xvector<8xf32>>
 
   %11 = vector.insert %f3, %vf1[1] : f32 into vector<8xf32>
   %12 = vector.insert %f6, %11[2] : f32 into vector<8xf32>
-  store %12, %AVAL[%c2] : memref<4xvector<8xf32>>
+  memref.store %12, %AVAL[%c2] : memref<4xvector<8xf32>>
 
   %13 = vector.insert %f2, %vf1[2] : f32 into vector<8xf32>
   %14 = vector.insert %f2, %13[5] : f32 into vector<8xf32>
-  store %14, %AVAL[%c3] : memref<4xvector<8xf32>>
+  memref.store %14, %AVAL[%c3] : memref<4xvector<8xf32>>
 
   %vi0 = vector.broadcast %i0 : i32 to vector<8xi32>
 
@@ -140,7 +140,7 @@
   %21 = vector.insert %i1, %20[3] : i32 into vector<8xi32>
   %22 = vector.insert %i1, %21[5] : i32 into vector<8xi32>
   %23 = vector.insert %i1, %22[7] : i32 into vector<8xi32>
-  store %23, %AIDX[%c0] : memref<4xvector<8xi32>>
+  memref.store %23, %AIDX[%c0] : memref<4xvector<8xi32>>
 
   %24 = vector.insert %i2, %vi0[0] : i32 into vector<8xi32>
   %25 = vector.insert %i1, %24[1] : i32 into vector<8xi32>
@@ -150,7 +150,7 @@
   %29 = vector.insert %i4, %28[5] : i32 into vector<8xi32>
   %30 = vector.insert %i2, %29[6] : i32 into vector<8xi32>
   %31 = vector.insert %i3, %30[7] : i32 into vector<8xi32>
-  store %31, %AIDX[%c1] : memref<4xvector<8xi32>>
+  memref.store %31, %AIDX[%c1] : memref<4xvector<8xi32>>
 
   %32 = vector.insert %i5, %vi0[0] : i32 into vector<8xi32>
   %33 = vector.insert %i4, %32[1] : i32 into vector<8xi32>
@@ -160,7 +160,7 @@
   %37 = vector.insert %i5, %36[5] : i32 into vector<8xi32>
   %38 = vector.insert %i4, %37[6] : i32 into vector<8xi32>
   %39 = vector.insert %i6, %38[7] : i32 into vector<8xi32>
-  store %39, %AIDX[%c2] : memref<4xvector<8xi32>>
+  memref.store %39, %AIDX[%c2] : memref<4xvector<8xi32>>
 
   %40 = vector.insert %i7, %vi0[0] : i32 into vector<8xi32>
   %41 = vector.insert %i6, %40[1] : i32 into vector<8xi32>
@@ -170,16 +170,16 @@
   %45 = vector.insert %i6, %44[5] : i32 into vector<8xi32>
   %46 = vector.insert %i6, %45[6] : i32 into vector<8xi32>
   %47 = vector.insert %i7, %46[7] : i32 into vector<8xi32>
-  store %47, %AIDX[%c3] : memref<4xvector<8xi32>>
+  memref.store %47, %AIDX[%c3] : memref<4xvector<8xi32>>
 
   %vf0 = vector.broadcast %f0 : f32 to vector<8xf32>
-  store %vf0, %B[%c0] : memref<1xvector<8xf32>>
+  memref.store %vf0, %B[%c0] : memref<1xvector<8xf32>>
 
   scf.for %i = %c0 to %c8 step %c1 {
     %ix = addi %i, %c1 : index
     %kx = index_cast %ix : index to i32
     %fx = sitofp %kx : i32 to f32
-    store %fx, %X[%i] : memref<?xf32>
+    memref.store %fx, %X[%i] : memref<?xf32>
   }
 
   //
@@ -196,16 +196,16 @@
   //
 
   scf.for %i = %c0 to %c4 step %c1 {
-    %aval = load %AVAL[%i] : memref<4xvector<8xf32>>
+    %aval = memref.load %AVAL[%i] : memref<4xvector<8xf32>>
     vector.print %aval : vector<8xf32>
   }
 
   scf.for %i = %c0 to %c4 step %c1 {
-    %aidx = load %AIDX[%i] : memref<4xvector<8xi32>>
+    %aidx = memref.load %AIDX[%i] : memref<4xvector<8xi32>>
     vector.print %aidx : vector<8xi32>
   }
 
-  %ldb = load %B[%c0] : memref<1xvector<8xf32>>
+  %ldb = memref.load %B[%c0] : memref<1xvector<8xf32>>
   vector.print %ldb : vector<8xf32>
 
   //
@@ -226,10 +226,10 @@
   // Free.
   //
 
-  dealloc %AVAL : memref<4xvector<8xf32>>
-  dealloc %AIDX : memref<4xvector<8xi32>>
-  dealloc %X    : memref<?xf32>
-  dealloc %B    : memref<1xvector<8xf32>>
+  memref.dealloc %AVAL : memref<4xvector<8xf32>>
+  memref.dealloc %AIDX : memref<4xvector<8xi32>>
+  memref.dealloc %X    : memref<?xf32>
+  memref.dealloc %B    : memref<1xvector<8xf32>>
 
   return
 }
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
@@ -33,7 +33,7 @@
   // work with dims of 4, not of 3
   %first = constant 3: index
   %second = constant 4: index
-  %A = alloc(%first, %second) : memref<?x?xf32>
+  %A = memref.alloc(%first, %second) : memref<?x?xf32>
   scf.for %i = %c0 to %first step %c1 {
     %i32 = index_cast %i : index to i32
     %fi = sitofp %i32 : i32 to f32
@@ -42,7 +42,7 @@
         %j32 = index_cast %j : index to i32
         %fj = sitofp %j32 : i32 to f32
         %fres = addf %fi10, %fj : f32
-        store %fres, %A[%i, %j] : memref<?x?xf32>
+        memref.store %fres, %A[%i, %j] : memref<?x?xf32>
     }
   }
   // On input, memory contains [[ 0, 1, 2, ...], [10, 11, 12, ...], ...]
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
@@ -37,11 +37,11 @@
   %c3 = constant 3: index
   %c4 = constant 4: index
   %c5 = constant 5: index
-  %A = alloc(%c5) : memref<?xf32>
+  %A = memref.alloc(%c5) : memref<?xf32>
   scf.for %i = %c0 to %c5 step %c1 {
     %i32 = index_cast %i : index to i32
     %fi = sitofp %i32 : i32 to f32
-    store %fi, %A[%i] : memref<?xf32>
+    memref.store %fi, %A[%i] : memref<?xf32>
   }
   // On input, memory contains [[ 0, 1, 2, 3, 4, xxx garbage xxx ]]
   // Read shifted by 2 and pad with -42:
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
@@ -13,14 +13,14 @@
   %c1 = constant 1 : index
   %c10 = constant 10 : index
   %c100 = constant 100 : index
-  %0 = alloc(%arg0, %arg1) : memref<?x?xf32>
+  %0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
   scf.for %arg5 = %c0 to %arg0 step %c1 {
     scf.for %arg6 = %c0 to %arg1 step %c1 {
       %arg66 = muli %arg6, %c100 : index
       %tmp1 = addi %arg5, %arg66 : index
       %tmp2 = index_cast %tmp1 : index to i32
       %tmp3 = sitofp %tmp2 : i32 to f32
-      store %tmp3, %0[%arg5, %arg6] : memref<?x?xf32>
+      memref.store %tmp3, %0[%arg5, %arg6] : memref<?x?xf32>
     }
   }
   return %0 : memref<?x?xf32>
@@ -34,7 +34,7 @@
   %c6 = constant 6 : index
   %cst = constant -4.2e+01 : f32
   %0 = call @alloc_2d_filled_f32(%c6, %c6) : (index, index) -> memref<?x?xf32>
-  %converted = memref_cast %0 : memref<?x?xf32> to memref<*xf32>
+  %converted = memref.cast %0 : memref<?x?xf32> to memref<*xf32>
   call @print_memref_f32(%converted): (memref<*xf32>) -> ()
   // CHECK:      Unranked{{.*}}data =
   // CHECK:      [
@@ -100,6 +100,6 @@
   vector.print %5 : vector<5xf32>
   // CHECK-NEXT: ( 403, 503, 502, -42, -42 )
 
-  dealloc %0 : memref<?x?xf32>
+  memref.dealloc %0 : memref<?x?xf32>
   return
 }
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-write.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-write.mlir
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-write.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-write.mlir
@@ -43,10 +43,10 @@
   %c0 = constant 0: index
   %c1 = constant 1: index
   %c32 = constant 32: index
-  %A = alloc(%c32) {alignment=64} : memref<?xf32>
+  %A = memref.alloc(%c32) {alignment=64} : memref<?xf32>
   scf.for %i = %c0 to %c32 step %c1 {
     %f = constant 0.0: f32
-    store %f, %A[%i] : memref<?xf32>
+    memref.store %f, %A[%i] : memref<?xf32>
   }
 
   // On input, memory contains all zeros.
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
@@ -18,12 +18,12 @@
 func @alloc_1d_filled_inc_f32(%arg0: index, %arg1: f32) -> memref<?xf32> {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %0 = alloc(%arg0) : memref<?xf32>
+  %0 = memref.alloc(%arg0) : memref<?xf32>
   scf.for %arg2 = %c0 to %arg0 step %c1 {
     %tmp = index_cast %arg2 : index to i32
     %tmp1 = sitofp %tmp : i32 to f32
     %tmp2 = addf %tmp1, %arg1 : f32
-    store %tmp2, %0[%arg2] : memref<?xf32>
+    memref.store %tmp2, %0[%arg2] : memref<?xf32>
   }
   return %0 : memref<?xf32>
 }
@@ -37,7 +37,7 @@
   %c1 = constant 1 : index
   %c32 = constant 32 : index
   %c64 = constant 64 : index
-  %out = alloc(%c64) : memref<?xf32>
+  %out = memref.alloc(%c64) : memref<?xf32>
   %in1 = call @alloc_1d_filled_inc_f32(%c64, %cf1) : (index, f32) -> memref<?xf32>
   %in2 = call @alloc_1d_filled_inc_f32(%c64, %cf2) : (index, f32) -> memref<?xf32>
   // Check that the tansformatio correctly happened.
@@ -51,7 +51,7 @@
   %b = vector.transfer_read %in2[%c0], %cf0: memref<?xf32>, vector<64xf32>
   %acc = addf %a, %b: vector<64xf32>
   vector.transfer_write %acc, %out[%c0]: vector<64xf32>, memref<?xf32>
-  %converted = memref_cast %out : memref<?xf32> to memref<*xf32>
+  %converted = memref.cast %out : memref<?xf32> to memref<*xf32>
   call @print_memref_f32(%converted): (memref<*xf32>) -> ()
   // CHECK:      Unranked{{.*}}data =
   // CHECK:      [
@@ -61,8 +61,8 @@
   // CHECK-SAME:  77,  79,  81,  83,  85,  87,  89,  91,  93,  95,  97,  99,
   // CHECK-SAME:  101,  103,  105,  107,  109,  111,  113,  115,  117,  119,
   // CHECK-SAME:  121,  123,  125,  127,  129]
-  dealloc %out : memref<?xf32>
-  dealloc %in1 : memref<?xf32>
-  dealloc %in2 : memref<?xf32>
+  memref.dealloc %out : memref<?xf32>
+  memref.dealloc %in1 : memref<?xf32>
+  memref.dealloc %in2 : memref<?xf32>
   return
 }
diff --git a/mlir/test/Integration/Sparse/CPU/frostt-example.mlir b/mlir/test/Integration/Sparse/CPU/frostt-example.mlir
--- a/mlir/test/Integration/Sparse/CPU/frostt-example.mlir
+++ b/mlir/test/Integration/Sparse/CPU/frostt-example.mlir
@@ -35,8 +35,8 @@
     // Setup memrefs to get meta data, indices and values.
     // The index array should provide sufficient space.
     //
-    %idata = alloc(%c10) : memref<?xindex>
-    %ddata = alloc(%c1)  : memref<?xf64>
+    %idata = memref.alloc(%c10) : memref<?xindex>
+    %ddata = memref.alloc(%c1)  : memref<?xf64>
 
     //
     // Obtain the sparse tensor filename through this test helper.
@@ -55,12 +55,12 @@
     //
     // Print some meta data.
     //
-    %rank = load %idata[%c0] : memref<?xindex>
-    %nnz  = load %idata[%c1] : memref<?xindex>
+    %rank = memref.load %idata[%c0] : memref<?xindex>
+    %nnz  = memref.load %idata[%c1] : memref<?xindex>
     vector.print %rank : index
     vector.print %nnz  : index
     scf.for %r = %c2 to %c10 step %c1 {
-      %d = load %idata[%r] : memref<?xindex>
+      %d = memref.load %idata[%r] : memref<?xindex>
       vector.print %d : index
     }
 
@@ -77,13 +77,13 @@
       //
       %0 = vector.broadcast %i0 : i64 to vector<8xi64>
       %1 = scf.for %r = %c0 to %rank step %c1 iter_args(%in = %0) -> vector<8xi64> {
-        %i  = load %idata[%r] : memref<?xindex>
+        %i  = memref.load %idata[%r] : memref<?xindex>
         %ii = index_cast %i : index to i64
         %ri = index_cast %r : index to i32
         %out = vector.insertelement %ii, %in[%ri : i32] : vector<8xi64>
         scf.yield %out : vector<8xi64>
       }
-      %2 = load %ddata[%c0] : memref<?xf64>
+      %2 = memref.load %ddata[%c0] : memref<?xf64>
       vector.print %1 : vector<8xi64>
       vector.print %2 : f64
     }
@@ -145,8 +145,8 @@
     //
     // Free.
     //
-    dealloc %idata : memref<?xindex>
-    dealloc %ddata : memref<?xf64>
+    memref.dealloc %idata : memref<?xindex>
+    memref.dealloc %ddata : memref<?xf64>
 
     return
   }
diff --git a/mlir/test/Integration/Sparse/CPU/matrix-market-example.mlir b/mlir/test/Integration/Sparse/CPU/matrix-market-example.mlir
--- a/mlir/test/Integration/Sparse/CPU/matrix-market-example.mlir
+++ b/mlir/test/Integration/Sparse/CPU/matrix-market-example.mlir
@@ -35,8 +35,8 @@
     //
     // Setup memrefs to get meta data, indices, and values.
     //
-    %idata = alloc(%c4) : memref<?xindex>
-    %ddata = alloc(%c1) : memref<?xf64>
+    %idata = memref.alloc(%c4) : memref<?xindex>
+    %ddata = memref.alloc(%c1) : memref<?xf64>
 
     //
     // Obtain the sparse matrix filename through this test helper.
@@ -51,20 +51,20 @@
     // nonzero elements (nnz), and the size (m x n) through a memref array.
     //
     %tensor = call @openTensor(%fileName, %idata) : (!Filename, memref<?xindex>) -> (!Tensor)
-    %rank = load %idata[%c0] : memref<?xindex>
-    %nnz  = load %idata[%c1] : memref<?xindex>
-    %m    = load %idata[%c2] : memref<?xindex>
-    %n    = load %idata[%c3] : memref<?xindex>
+    %rank = memref.load %idata[%c0] : memref<?xindex>
+    %nnz  = memref.load %idata[%c1] : memref<?xindex>
+    %m    = memref.load %idata[%c2] : memref<?xindex>
+    %n    = memref.load %idata[%c3] : memref<?xindex>
 
     //
     // At this point, code should prepare a proper sparse storage scheme for
     // an m x n matrix with nnz nonzero elements. For simplicity, here we
     // simply intialize a dense m x n matrix to all zeroes.
     //
-    %a = alloc(%m, %n) : memref<?x?xf64>
+    %a = memref.alloc(%m, %n) : memref<?x?xf64>
     scf.for %ii = %c0 to %m step %c1 {
       scf.for %jj = %c0 to %n step %c1 {
-        store %d0, %a[%ii, %jj] : memref<?x?xf64>
+        memref.store %d0, %a[%ii, %jj] : memref<?x?xf64>
       }
     }
 
@@ -75,10 +75,10 @@
     //
     scf.for %k = %c0 to %nnz step %c1 {
       call @readTensorItem(%tensor, %idata, %ddata) : (!Tensor, memref<?xindex>, memref<?xf64>) -> ()
-      %i = load %idata[%c0] : memref<?xindex>
-      %j = load %idata[%c1] : memref<?xindex>
-      %d = load %ddata[%c0] : memref<?xf64>
-      store %d, %a[%i, %j] : memref<?x?xf64>
+      %i = memref.load %idata[%c0] : memref<?xindex>
+      %j = memref.load %idata[%c1] : memref<?xindex>
+      %d = memref.load %ddata[%c0] : memref<?xf64>
+      memref.store %d, %a[%i, %j] : memref<?x?xf64>
     }
 
     //
@@ -112,9 +112,9 @@
     //
     // Free.
     //
-    dealloc %idata : memref<?xindex>
-    dealloc %ddata : memref<?xf64>
-    dealloc %a     : memref<?x?xf64>
+    memref.dealloc %idata : memref<?xindex>
+    memref.dealloc %ddata : memref<?xf64>
+    memref.dealloc %a     : memref<?x?xf64>
 
     return
   }
diff --git a/mlir/test/Integration/Sparse/CPU/sparse_sampled_matmul.mlir b/mlir/test/Integration/Sparse/CPU/sparse_sampled_matmul.mlir
--- a/mlir/test/Integration/Sparse/CPU/sparse_sampled_matmul.mlir
+++ b/mlir/test/Integration/Sparse/CPU/sparse_sampled_matmul.mlir
@@ -82,32 +82,32 @@
     // Mark both dimensions of the matrix as sparse and encode the
     // storage scheme types (this must match the metadata in the
     // trait and compiler switches).
-    %annotations = alloc(%c2) : memref<?xi1>
+    %annotations = memref.alloc(%c2) : memref<?xi1>
     %sparse = constant true
-    store %sparse, %annotations[%c0] : memref<?xi1>
-    store %sparse, %annotations[%c1] : memref<?xi1>
+    memref.store %sparse, %annotations[%c0] : memref<?xi1>
+    memref.store %sparse, %annotations[%c1] : memref<?xi1>
     %i32 = constant 3 : index
     %f32 = constant 1 : index
 
     // Setup memory for the dense matrices and initialize.
-    %adata = alloc(%c5, %c10) : memref<?x?xf32>
-    %bdata = alloc(%c10, %c5) : memref<?x?xf32>
-    %xdata = alloc(%c5,  %c5) : memref<?x?xf32>
+    %adata = memref.alloc(%c5, %c10) : memref<?x?xf32>
+    %bdata = memref.alloc(%c10, %c5) : memref<?x?xf32>
+    %xdata = memref.alloc(%c5,  %c5) : memref<?x?xf32>
     scf.for %i = %c0 to %c5 step %c1 {
       scf.for %j = %c0 to %c5 step %c1 {
-        store %d0, %xdata[%i, %j] : memref<?x?xf32>
+        memref.store %d0, %xdata[%i, %j] : memref<?x?xf32>
       }
       %p = addi %i, %c1 : index
       %q = index_cast %p : index to i32
       %d = sitofp %q : i32 to f32
       scf.for %j = %c0 to %c10 step %c1 {
-        store %d, %adata[%i, %j] : memref<?x?xf32>
-        store %d, %bdata[%j, %i] : memref<?x?xf32>
+        memref.store %d, %adata[%i, %j] : memref<?x?xf32>
+        memref.store %d, %bdata[%j, %i] : memref<?x?xf32>
       }
     }
-    %a = tensor_load %adata : memref<?x?xf32>
-    %b = tensor_load %bdata : memref<?x?xf32>
-    %x = tensor_load %xdata : memref<?x?xf32>
+    %a = memref.tensor_load %adata : memref<?x?xf32>
+    %b = memref.tensor_load %bdata : memref<?x?xf32>
+    %x = memref.tensor_load %xdata : memref<?x?xf32>
 
     // Read the sparse matrix from file, construct sparse storage
     // according to <sparse,sparse> in memory, and call the kernel.
@@ -125,7 +125,7 @@
     // CHECK: ( 164, 0, 0, 640, 0 )
     // CHECK: ( 0, 520, 0, 0, 1250 )
     //
-    %r = tensor_to_memref %0 : memref<?x?xf32>
+    %r = memref.buffer_cast %0 : memref<?x?xf32>
     scf.for %i = %c0 to %c5 step %c1 {
       %v = vector.transfer_read %r[%i, %c0], %d0: memref<?x?xf32>, vector<5xf32>
       vector.print %v : vector<5xf32>
@@ -133,9 +133,9 @@
 
     // Release the resources.
     call @delSparseTensor(%s) : (!SparseTensor) -> ()
-    dealloc %adata : memref<?x?xf32>
-    dealloc %bdata : memref<?x?xf32>
-    dealloc %xdata : memref<?x?xf32>
+    memref.dealloc %adata : memref<?x?xf32>
+    memref.dealloc %bdata : memref<?x?xf32>
+    memref.dealloc %xdata : memref<?x?xf32>
 
     return
   }
diff --git a/mlir/test/Integration/Sparse/CPU/sparse_sum.mlir b/mlir/test/Integration/Sparse/CPU/sparse_sum.mlir
--- a/mlir/test/Integration/Sparse/CPU/sparse_sum.mlir
+++ b/mlir/test/Integration/Sparse/CPU/sparse_sum.mlir
@@ -72,18 +72,18 @@
     // Mark both dimensions of the matrix as sparse and encode the
     // storage scheme types (this must match the metadata in the
     // trait and compiler switches).
-    %annotations = alloc(%c2) : memref<?xi1>
+    %annotations = memref.alloc(%c2) : memref<?xi1>
     %sparse = constant true
-    store %sparse, %annotations[%c0] : memref<?xi1>
-    store %sparse, %annotations[%c1] : memref<?xi1>
+    memref.store %sparse, %annotations[%c0] : memref<?xi1>
+    memref.store %sparse, %annotations[%c1] : memref<?xi1>
     %i64 = constant 2 : index
     %f64 = constant 0 : index
 
     // Setup memory for a single reduction scalar,
     // initialized to zero.
-    %xdata = alloc() : memref<f64>
-    store %d0, %xdata[] : memref<f64>
-    %x = tensor_load %xdata : memref<f64>
+    %xdata = memref.alloc() : memref<f64>
+    memref.store %d0, %xdata[] : memref<f64>
+    %x = memref.tensor_load %xdata : memref<f64>
 
     // Read the sparse matrix from file, construct sparse storage
     // according to <sparse,sparse> in memory, and call the kernel.
@@ -97,13 +97,13 @@
     //
     // CHECK: 28.2
     //
-    %m = tensor_to_memref %0 : memref<f64>
-    %v = load %m[] : memref<f64>
+    %m = memref.buffer_cast %0 : memref<f64>
+    %v = memref.load %m[] : memref<f64>
     vector.print %v : f64
 
     // Release the resources.
     call @delSparseTensor(%a) : (!SparseTensor) -> ()
-    dealloc %xdata : memref<f64>
+    memref.dealloc %xdata : memref<f64>
 
     return
   }
diff --git a/mlir/test/Transforms/buffer-deallocation.mlir b/mlir/test/Transforms/buffer-deallocation.mlir
--- a/mlir/test/Transforms/buffer-deallocation.mlir
+++ b/mlir/test/Transforms/buffer-deallocation.mlir
@@ -21,7 +21,7 @@
 ^bb1:
   br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
@@ -30,17 +30,17 @@
 }
 
 // CHECK-NEXT: cond_br
-//      CHECK: %[[ALLOC0:.*]] = alloc()
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy
 // CHECK-NEXT: br ^bb3(%[[ALLOC0]]
-//      CHECK: %[[ALLOC1:.*]] = alloc()
+//      CHECK: %[[ALLOC1:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
-//      CHECK: %[[ALLOC2:.*]] = alloc()
+//      CHECK: %[[ALLOC2:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy
-// CHECK-NEXT: dealloc %[[ALLOC1]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
 // CHECK-NEXT: br ^bb3(%[[ALLOC2]]
 //      CHECK: test.copy
-// CHECK-NEXT: dealloc
+// CHECK-NEXT: memref.dealloc
 // CHECK-NEXT: return
 
 // -----
@@ -68,7 +68,7 @@
 ^bb1:
   br ^bb3(%arg1 : memref<?xf32>)
 ^bb2(%0: index):
-  %1 = alloc(%0) : memref<?xf32>
+  %1 = memref.alloc(%0) : memref<?xf32>
   test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>)
   br ^bb3(%1 : memref<?xf32>)
 ^bb3(%2: memref<?xf32>):
@@ -77,21 +77,21 @@
 }
 
 // CHECK-NEXT: cond_br
-//      CHECK: %[[DIM0:.*]] = dim
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[DIM0]])
+//      CHECK: %[[DIM0:.*]] = memref.dim
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc(%[[DIM0]])
 // CHECK-NEXT: linalg.copy(%{{.*}}, %[[ALLOC0]])
 // CHECK-NEXT: br ^bb3(%[[ALLOC0]]
 //      CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
-// CHECK-NEXT: %[[ALLOC1:.*]] = alloc(%[[IDX]])
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]])
 // CHECK-NEXT: test.buffer_based
-//      CHECK: %[[DIM1:.*]] = dim %[[ALLOC1]]
-// CHECK-NEXT: %[[ALLOC2:.*]] = alloc(%[[DIM1]])
+//      CHECK: %[[DIM1:.*]] = memref.dim %[[ALLOC1]]
+// CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc(%[[DIM1]])
 // CHECK-NEXT: linalg.copy(%[[ALLOC1]], %[[ALLOC2]])
-// CHECK-NEXT: dealloc %[[ALLOC1]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
 // CHECK-NEXT: br ^bb3
 // CHECK-NEXT: ^bb3(%[[ALLOC3:.*]]:{{.*}})
 //      CHECK: test.copy(%[[ALLOC3]],
-// CHECK-NEXT: dealloc %[[ALLOC3]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC3]]
 // CHECK-NEXT: return
 
 // -----
@@ -126,7 +126,7 @@
 ^bb1:
   br ^bb6(%arg1 : memref<?xf32>)
 ^bb2(%0: index):
-  %1 = alloc(%0) : memref<?xf32>
+  %1 = memref.alloc(%0) : memref<?xf32>
   test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>)
   cond_br %arg0, ^bb3, ^bb4
 ^bb3:
@@ -144,12 +144,12 @@
 
 // CHECK-NEXT: cond_br
 //      CHECK: ^bb1
-//      CHECK: %[[DIM0:.*]] = dim
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[DIM0]])
+//      CHECK: %[[DIM0:.*]] = memref.dim
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc(%[[DIM0]])
 // CHECK-NEXT: linalg.copy(%{{.*}}, %[[ALLOC0]])
 // CHECK-NEXT: br ^bb6
 //      CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
-// CHECK-NEXT: %[[ALLOC1:.*]] = alloc(%[[IDX]])
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]])
 // CHECK-NEXT: test.buffer_based
 //      CHECK: cond_br
 //      CHECK: ^bb3:
@@ -157,16 +157,16 @@
 //      CHECK: ^bb4:
 // CHECK-NEXT: br ^bb5(%[[ALLOC1]]{{.*}})
 // CHECK-NEXT: ^bb5(%[[ALLOC2:.*]]:{{.*}})
-//      CHECK: %[[DIM2:.*]] = dim %[[ALLOC2]]
-// CHECK-NEXT: %[[ALLOC3:.*]] = alloc(%[[DIM2]])
+//      CHECK: %[[DIM2:.*]] = memref.dim %[[ALLOC2]]
+// CHECK-NEXT: %[[ALLOC3:.*]] = memref.alloc(%[[DIM2]])
 // CHECK-NEXT: linalg.copy(%[[ALLOC2]], %[[ALLOC3]])
-// CHECK-NEXT: dealloc %[[ALLOC1]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
 // CHECK-NEXT: br ^bb6(%[[ALLOC3]]{{.*}})
 // CHECK-NEXT: ^bb6(%[[ALLOC4:.*]]:{{.*}})
 // CHECK-NEXT: br ^bb7(%[[ALLOC4]]{{.*}})
 // CHECK-NEXT: ^bb7(%[[ALLOC5:.*]]:{{.*}})
 //      CHECK: test.copy(%[[ALLOC5]],
-// CHECK-NEXT: dealloc %[[ALLOC4]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC4]]
 // CHECK-NEXT: return
 
 // -----
@@ -177,11 +177,11 @@
 
 // CHECK-LABEL: func @emptyUsesValue
 func @emptyUsesValue(%arg0: memref<4xf32>) {
-  %0 = alloc() : memref<4xf32>
+  %0 = memref.alloc() : memref<4xf32>
   return
 }
-// CHECK-NEXT: %[[ALLOC:.*]] = alloc()
-// CHECK-NEXT: dealloc %[[ALLOC]]
+// CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc()
+// CHECK-NEXT: memref.dealloc %[[ALLOC]]
 // CHECK-NEXT: return
 
 // -----
@@ -200,7 +200,7 @@
 func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
 ^bb1:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   br ^bb2(%0 : memref<2xf32>)
 ^bb2(%1: memref<2xf32>):
@@ -208,16 +208,16 @@
   return
 }
 
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy
 // CHECK-NEXT: cond_br
-//      CHECK: %[[ALLOC1:.*]] = alloc()
+//      CHECK: %[[ALLOC1:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
-//      CHECK: %[[ALLOC2:.*]] = alloc()
+//      CHECK: %[[ALLOC2:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy
-// CHECK-NEXT: dealloc %[[ALLOC1]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
 //      CHECK: test.copy
-// CHECK-NEXT: dealloc
+// CHECK-NEXT: memref.dealloc
 // CHECK-NEXT: return
 
 // -----
@@ -233,7 +233,7 @@
 
 // CHECK-LABEL: func @invCriticalEdge
 func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
 ^bb1:
@@ -261,7 +261,7 @@
 
 // CHECK-LABEL: func @ifElse
 func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
@@ -271,19 +271,19 @@
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
   br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
 ^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
-  %7 = alloc() : memref<2xf32>
+  %7 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>)
   test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc()
+// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
-//      CHECK: %[[SECOND_ALLOC:.*]] = alloc()
+//      CHECK: %[[SECOND_ALLOC:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
-//      CHECK: dealloc %[[FIRST_ALLOC]]
+//      CHECK: memref.dealloc %[[FIRST_ALLOC]]
 //      CHECK: test.copy
-// CHECK-NEXT: dealloc %[[SECOND_ALLOC]]
+// CHECK-NEXT: memref.dealloc %[[SECOND_ALLOC]]
 // CHECK-NEXT: return
 
 // -----
@@ -299,7 +299,7 @@
 
 // CHECK-LABEL: func @ifElseNoUsers
 func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
@@ -313,9 +313,9 @@
   return
 }
 
-// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc()
+// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = memref.alloc()
 //      CHECK: test.copy
-// CHECK-NEXT: dealloc %[[FIRST_ALLOC]]
+// CHECK-NEXT: memref.dealloc %[[FIRST_ALLOC]]
 // CHECK-NEXT: return
 
 // -----
@@ -334,7 +334,7 @@
 
 // CHECK-LABEL: func @ifElseNested
 func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
@@ -348,19 +348,19 @@
 ^bb4(%6: memref<2xf32>):
   br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
 ^bb5(%7: memref<2xf32>, %8: memref<2xf32>):
-  %9 = alloc() : memref<2xf32>
+  %9 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>)
   test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc()
+// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
-//      CHECK: %[[SECOND_ALLOC:.*]] = alloc()
+//      CHECK: %[[SECOND_ALLOC:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
-//      CHECK: dealloc %[[FIRST_ALLOC]]
+//      CHECK: memref.dealloc %[[FIRST_ALLOC]]
 //      CHECK: test.copy
-// CHECK-NEXT: dealloc %[[SECOND_ALLOC]]
+// CHECK-NEXT: memref.dealloc %[[SECOND_ALLOC]]
 // CHECK-NEXT: return
 
 // -----
@@ -371,17 +371,17 @@
 
 // CHECK-LABEL: func @redundantOperations
 func @redundantOperations(%arg0: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
-  %1 = alloc() : memref<2xf32>
+  %1 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%0: memref<2xf32>) out(%1: memref<2xf32>)
   return
 }
 
 //      CHECK: (%[[ARG0:.*]]: {{.*}})
-// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = alloc()
+// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}}out(%[[FIRST_ALLOC]]
-//      CHECK: %[[SECOND_ALLOC:.*]] = alloc()
+//      CHECK: %[[SECOND_ALLOC:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based in(%[[FIRST_ALLOC]]{{.*}}out(%[[SECOND_ALLOC]]
 //      CHECK: dealloc
 // CHECK-NEXT: dealloc
@@ -407,11 +407,11 @@
     %arg1: memref<2xf32>) {
   cond_br %cond, ^bb1, ^bb2
 ^bb1:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
   br ^exit(%0 : memref<2xf32>)
 ^bb2:
-  %1 = alloc() : memref<2xf32>
+  %1 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>)
   br ^exit(%1 : memref<2xf32>)
 ^exit(%arg2: memref<2xf32>):
@@ -422,22 +422,22 @@
 // CHECK-NEXT: cond_br
 //      CHECK: ^bb1
 //      CHECK: ^bb1
-//      CHECK: %[[ALLOC0:.*]] = alloc()
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
-//      CHECK: %[[ALLOC1:.*]] = alloc()
+//      CHECK: %[[ALLOC1:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy
-// CHECK-NEXT: dealloc %[[ALLOC0]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
 // CHECK-NEXT: br ^bb3(%[[ALLOC1]]
 // CHECK-NEXT: ^bb2
-// CHECK-NEXT: %[[ALLOC2:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
-//      CHECK: %[[ALLOC3:.*]] = alloc()
+//      CHECK: %[[ALLOC3:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy
-// CHECK-NEXT: dealloc %[[ALLOC2]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC2]]
 // CHECK-NEXT: br ^bb3(%[[ALLOC3]]
 // CHECK-NEXT: ^bb3(%[[ALLOC4:.*]]:{{.*}})
 //      CHECK: test.copy
-// CHECK-NEXT: dealloc %[[ALLOC4]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC4]]
 // CHECK-NEXT: return
 
 // -----
@@ -457,23 +457,23 @@
   %cond: i1,
     %arg0: memref<2xf32>,
     %arg1: memref<2xf32>) {
-  %1 = alloc() : memref<2xf32>
+  %1 = memref.alloc() : memref<2xf32>
   cond_br %cond, ^bb1, ^bb2
 ^bb1:
   br ^exit(%arg0 : memref<2xf32>)
 ^bb2:
   test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>)
-  dealloc %1 : memref<2xf32>
+  memref.dealloc %1 : memref<2xf32>
   br ^exit(%1 : memref<2xf32>)
 ^exit(%arg2: memref<2xf32>):
   test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: cond_br
 //      CHECK: test.copy
-// CHECK-NEXT: dealloc %[[ALLOC0]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
 // CHECK-NEXT: return
 
 // -----
@@ -484,15 +484,15 @@
 func @inserting_missing_dealloc_simple(
   %arg0 : memref<2xf32>,
   %arg1: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
   test.copy(%0, %arg1) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
 //      CHECK: test.copy
-// CHECK-NEXT: dealloc %[[ALLOC0]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
 
 // -----
 
@@ -501,16 +501,16 @@
 
 // CHECK-LABEL: func @moving_invalid_dealloc_op
 func @moving_invalid_dealloc_op(%arg0 : memref<2xf32>, %arg1: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
-  dealloc %0 : memref<2xf32>
+  memref.dealloc %0 : memref<2xf32>
   test.copy(%0, %arg1) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
 //      CHECK: test.copy
-// CHECK-NEXT: dealloc %[[ALLOC0]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
 
 // -----
 
@@ -530,10 +530,10 @@
 ^bb1:
   br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
-    %1 = alloc() : memref<2xf32>
+    %1 = memref.alloc() : memref<2xf32>
     test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>)
     %tmp1 = math.exp %gen1_arg0 : f32
     test.region_yield %tmp1 : f32
@@ -545,18 +545,18 @@
 }
 //      CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}})
 // CHECK-NEXT:   cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]]
-//      CHECK:   %[[ALLOC0:.*]] = alloc()
+//      CHECK:   %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT:   linalg.copy(%[[ARG1]], %[[ALLOC0]])
 //      CHECK: ^[[BB2]]:
-//      CHECK:   %[[ALLOC1:.*]] = alloc()
+//      CHECK:   %[[ALLOC1:.*]] = memref.alloc()
 // CHECK-NEXT:   test.region_buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC1]]
-//      CHECK:     %[[ALLOC2:.*]] = alloc()
+//      CHECK:     %[[ALLOC2:.*]] = memref.alloc()
 // CHECK-NEXT:     test.buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC2]]
-//      CHECK:     dealloc %[[ALLOC2]]
+//      CHECK:     memref.dealloc %[[ALLOC2]]
 // CHECK-NEXT:     %{{.*}} = math.exp
-//      CHECK:   %[[ALLOC3:.*]] = alloc()
+//      CHECK:   %[[ALLOC3:.*]] = memref.alloc()
 // CHECK-NEXT:   linalg.copy(%[[ALLOC1]], %[[ALLOC3]])
-// CHECK-NEXT:   dealloc %[[ALLOC1]]
+// CHECK-NEXT:   memref.dealloc %[[ALLOC1]]
 //      CHECK:  ^[[BB3:.*]]({{.*}}):
 //      CHECK:  test.copy
 // CHECK-NEXT:  dealloc
@@ -573,18 +573,18 @@
   %arg0: memref<5xf32>,
   %arg1: memref<10xf32>,
   %arg2: memref<5xf32>) -> (memref<10xf32>, memref<15xf32>) {
-  %x = alloc() : memref<15xf32>
-  %y = alloc() : memref<5xf32>
+  %x = memref.alloc() : memref<15xf32>
+  %y = memref.alloc() : memref<5xf32>
   test.buffer_based in(%arg0: memref<5xf32>) out(%y: memref<5xf32>)
   test.copy(%y, %arg2) : (memref<5xf32>, memref<5xf32>)
   return %arg1, %x : memref<10xf32>, memref<15xf32>
 }
 //      CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>,
 // CHECK-SAME: %[[RESULT:.*]]: memref<5xf32>)
-//      CHECK: %[[X:.*]] = alloc()
-//      CHECK: %[[Y:.*]] = alloc()
+//      CHECK: %[[X:.*]] = memref.alloc()
+//      CHECK: %[[Y:.*]] = memref.alloc()
 //      CHECK: test.copy
-//      CHECK: dealloc %[[Y]]
+//      CHECK: memref.dealloc %[[Y]]
 //      CHECK: return %[[ARG1]], %[[X]]
 
 // -----
@@ -599,21 +599,21 @@
   %arg0 : index,
   %arg1 : index) -> memref<?x?xf32> {
   %0 = cmpi eq, %arg0, %arg1 : index
-  %1 = alloc(%arg0, %arg0) : memref<?x?xf32>
+  %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
   %2 = scf.if %0 -> (memref<?x?xf32>) {
     scf.yield %1 : memref<?x?xf32>
   } else {
-    %3 = alloc(%arg0, %arg1) : memref<?x?xf32>
+    %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
     scf.yield %1 : memref<?x?xf32>
   }
   return %2 : memref<?x?xf32>
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc(%arg0, %arg0)
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
 // CHECK-NEXT: %[[ALLOC1:.*]] = scf.if
 //      CHECK: scf.yield %[[ALLOC0]]
-//      CHECK: %[[ALLOC2:.*]] = alloc(%arg0, %arg1)
-// CHECK-NEXT: dealloc %[[ALLOC2]]
+//      CHECK: %[[ALLOC2:.*]] = memref.alloc(%arg0, %arg1)
+// CHECK-NEXT: memref.dealloc %[[ALLOC2]]
 // CHECK-NEXT: scf.yield %[[ALLOC0]]
 //      CHECK: return %[[ALLOC1]]
 
@@ -629,27 +629,27 @@
   %arg0 : index,
   %arg1 : index) -> memref<?x?xf32> {
   %0 = cmpi eq, %arg0, %arg1 : index
-  %1 = alloc(%arg0, %arg0) : memref<?x?xf32>
+  %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
   %2 = scf.if %0 -> (memref<?x?xf32>) {
     scf.yield %1 : memref<?x?xf32>
   } else {
-    %3 = alloc(%arg0, %arg1) : memref<?x?xf32>
+    %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
     scf.yield %3 : memref<?x?xf32>
   }
   return %2 : memref<?x?xf32>
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc(%arg0, %arg0)
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
 // CHECK-NEXT: %[[ALLOC1:.*]] = scf.if
-//      CHECK: %[[ALLOC2:.*]] = alloc
+//      CHECK: %[[ALLOC2:.*]] = memref.alloc
 // CHECK-NEXT: linalg.copy(%[[ALLOC0]], %[[ALLOC2]])
 //      CHECK: scf.yield %[[ALLOC2]]
-//      CHECK: %[[ALLOC3:.*]] = alloc(%arg0, %arg1)
-//      CHECK: %[[ALLOC4:.*]] = alloc
+//      CHECK: %[[ALLOC3:.*]] = memref.alloc(%arg0, %arg1)
+//      CHECK: %[[ALLOC4:.*]] = memref.alloc
 // CHECK-NEXT: linalg.copy(%[[ALLOC3]], %[[ALLOC4]])
-//      CHECK: dealloc %[[ALLOC3]]
+//      CHECK: memref.dealloc %[[ALLOC3]]
 //      CHECK: scf.yield %[[ALLOC4]]
-//      CHECK: dealloc %[[ALLOC0]]
+//      CHECK: memref.dealloc %[[ALLOC0]]
 // CHECK-NEXT: return %[[ALLOC1]]
 
 // -----
@@ -660,7 +660,7 @@
 
 // CHECK-LABEL: func @inner_region_control_flow
 func @inner_region_control_flow(%arg0 : index) -> memref<?x?xf32> {
-  %0 = alloc(%arg0, %arg0) : memref<?x?xf32>
+  %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
   %1 = test.region_if %0 : memref<?x?xf32> -> (memref<?x?xf32>) then {
     ^bb0(%arg1 : memref<?x?xf32>):
       test.region_if_yield %arg1 : memref<?x?xf32>
@@ -674,7 +674,7 @@
   return %1 : memref<?x?xf32>
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc(%arg0, %arg0)
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
 // CHECK-NEXT: %[[ALLOC1:.*]] = test.region_if
 // CHECK-NEXT: ^bb0(%[[ALLOC2:.*]]:{{.*}}):
 // CHECK-NEXT: test.region_if_yield %[[ALLOC2]]
@@ -688,8 +688,8 @@
 
 // CHECK-LABEL: func @subview
 func @subview(%arg0 : index, %arg1 : index, %arg2 : memref<?x?xf32>) {
-  %0 = alloc() : memref<64x4xf32, offset: 0, strides: [4, 1]>
-  %1 = subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] :
+  %0 = memref.alloc() : memref<64x4xf32, offset: 0, strides: [4, 1]>
+  %1 = memref.subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] :
     memref<64x4xf32, offset: 0, strides: [4, 1]>
   to memref<?x?xf32, offset: ?, strides: [?, ?]>
   test.copy(%1, %arg2) :
@@ -697,10 +697,10 @@
   return
 }
 
-// CHECK-NEXT: %[[ALLOC:.*]] = alloc()
-// CHECK-NEXT: subview
+// CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc()
+// CHECK-NEXT: memref.subview
 // CHECK-NEXT: test.copy
-// CHECK-NEXT: dealloc %[[ALLOC]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC]]
 // CHECK-NEXT: return
 
 // -----
@@ -714,7 +714,7 @@
 ^bb1:
   br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
-  %0 = alloca() : memref<2xf32>
+  %0 = memref.alloca() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
@@ -723,7 +723,7 @@
 }
 
 // CHECK-NEXT: cond_br
-//      CHECK: %[[ALLOCA:.*]] = alloca()
+//      CHECK: %[[ALLOCA:.*]] = memref.alloca()
 //      CHECK: br ^bb3(%[[ALLOCA:.*]])
 // CHECK-NEXT: ^bb3
 // CHECK-NEXT: test.copy
@@ -737,7 +737,7 @@
 
 // CHECK-LABEL: func @ifElseAlloca
 func @ifElseAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
@@ -747,17 +747,17 @@
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
   br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
 ^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
-  %7 = alloca() : memref<2xf32>
+  %7 = memref.alloca() : memref<2xf32>
   test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>)
   test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: %[[ALLOC:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
-//      CHECK: %[[ALLOCA:.*]] = alloca()
+//      CHECK: %[[ALLOCA:.*]] = memref.alloca()
 // CHECK-NEXT: test.buffer_based
-//      CHECK: dealloc %[[ALLOC]]
+//      CHECK: memref.dealloc %[[ALLOC]]
 //      CHECK: test.copy
 // CHECK-NEXT: return
 
@@ -768,7 +768,7 @@
   %arg0: i1,
   %arg1: memref<2xf32>,
   %arg2: memref<2xf32>) {
-  %0 = alloca() : memref<2xf32>
+  %0 = memref.alloca() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
@@ -782,18 +782,18 @@
 ^bb4(%6: memref<2xf32>):
   br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
 ^bb5(%7: memref<2xf32>, %8: memref<2xf32>):
-  %9 = alloc() : memref<2xf32>
+  %9 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>)
   test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: %[[ALLOCA:.*]] = alloca()
+// CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca()
 // CHECK-NEXT: test.buffer_based
-//      CHECK: %[[ALLOC:.*]] = alloc()
+//      CHECK: %[[ALLOC:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
 //      CHECK: test.copy
-// CHECK-NEXT: dealloc %[[ALLOC]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC]]
 // CHECK-NEXT: return
 
 // -----
@@ -807,10 +807,10 @@
 ^bb1:
   br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
-    %1 = alloca() : memref<2xf32>
+    %1 = memref.alloca() : memref<2xf32>
     test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>)
     %tmp1 = math.exp %gen1_arg0 : f32
     test.region_yield %tmp1 : f32
@@ -823,17 +823,17 @@
 //      CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}})
 // CHECK-NEXT:   cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]]
 //      CHECK: ^[[BB1]]:
-//      CHECK: %[[ALLOC0:.*]] = alloc()
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy
 //      CHECK: ^[[BB2]]:
-//      CHECK:   %[[ALLOC1:.*]] = alloc()
+//      CHECK:   %[[ALLOC1:.*]] = memref.alloc()
 // CHECK-NEXT:   test.region_buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC1]]
-//      CHECK:     %[[ALLOCA:.*]] = alloca()
+//      CHECK:     %[[ALLOCA:.*]] = memref.alloca()
 // CHECK-NEXT:     test.buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOCA]]
 //      CHECK:     %{{.*}} = math.exp
-//      CHECK:  %[[ALLOC2:.*]] = alloc()
+//      CHECK:  %[[ALLOC2:.*]] = memref.alloc()
 // CHECK-NEXT:  linalg.copy
-// CHECK-NEXT:  dealloc %[[ALLOC1]]
+// CHECK-NEXT:  memref.dealloc %[[ALLOC1]]
 //      CHECK:  ^[[BB3:.*]]({{.*}}):
 //      CHECK:  test.copy
 // CHECK-NEXT:  dealloc
@@ -845,20 +845,20 @@
   %arg0 : index,
   %arg1 : index) -> memref<?x?xf32> {
   %0 = cmpi eq, %arg0, %arg1 : index
-  %1 = alloc(%arg0, %arg0) : memref<?x?xf32>
+  %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
   %2 = scf.if %0 -> (memref<?x?xf32>) {
     scf.yield %1 : memref<?x?xf32>
   } else {
-    %3 = alloca(%arg0, %arg1) : memref<?x?xf32>
+    %3 = memref.alloca(%arg0, %arg1) : memref<?x?xf32>
     scf.yield %1 : memref<?x?xf32>
   }
   return %2 : memref<?x?xf32>
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc(%arg0, %arg0)
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
 // CHECK-NEXT: %[[ALLOC1:.*]] = scf.if
 //      CHECK: scf.yield %[[ALLOC0]]
-//      CHECK: %[[ALLOCA:.*]] = alloca(%arg0, %arg1)
+//      CHECK: %[[ALLOCA:.*]] = memref.alloca(%arg0, %arg1)
 // CHECK-NEXT: scf.yield %[[ALLOC0]]
 //      CHECK: return %[[ALLOC1]]
 
@@ -875,33 +875,33 @@
   %step: index,
   %buf: memref<2xf32>,
   %res: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
     %2 = cmpi eq, %i, %ub : index
-    %3 = alloc() : memref<2xf32>
+    %3 = memref.alloc() : memref<2xf32>
     scf.yield %3 : memref<2xf32>
   }
   test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc()
-// CHECK-NEXT: dealloc %[[ALLOC0]]
-// CHECK-NEXT: %[[ALLOC1:.*]] = alloc()
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc()
+// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc()
 //      CHECK: linalg.copy(%arg3, %[[ALLOC1]])
 //      CHECK: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args
 // CHECK-SAME: (%[[IALLOC:.*]] = %[[ALLOC1]]
 //      CHECK:    cmpi
-//      CHECK:    dealloc %[[IALLOC]]
-//      CHECK:    %[[ALLOC3:.*]] = alloc()
-//      CHECK:    %[[ALLOC4:.*]] = alloc()
+//      CHECK:    memref.dealloc %[[IALLOC]]
+//      CHECK:    %[[ALLOC3:.*]] = memref.alloc()
+//      CHECK:    %[[ALLOC4:.*]] = memref.alloc()
 //      CHECK:    linalg.copy(%[[ALLOC3]], %[[ALLOC4]])
-//      CHECK:    dealloc %[[ALLOC3]]
+//      CHECK:    memref.dealloc %[[ALLOC3]]
 //      CHECK:    scf.yield %[[ALLOC4]]
 //      CHECK: }
 //      CHECK: test.copy(%[[ALLOC2]], %arg4)
-// CHECK-NEXT: dealloc %[[ALLOC2]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC2]]
 
 // -----
 
@@ -918,7 +918,7 @@
   %step: index,
   %buf: memref<2xf32>,
   %res: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
     %2 = cmpi eq, %i, %ub : index
@@ -933,14 +933,14 @@
   return
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc()
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: %[[ALLOC1:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] =
 //      CHECK: %[[ALLOC2:.*]] = scf.if
 //      CHECK: scf.yield %[[ALLOC0]]
 //      CHECK: scf.yield %[[IALLOC]]
 //      CHECK: scf.yield %[[ALLOC2]]
 //      CHECK: test.copy(%[[ALLOC1]], %arg4)
-//      CHECK: dealloc %[[ALLOC0]]
+//      CHECK: memref.dealloc %[[ALLOC0]]
 
 // -----
 
@@ -958,12 +958,12 @@
   %ub: index,
   %step: index,
   %buf: memref<2xf32>) -> memref<2xf32> {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
     %2 = cmpi eq, %i, %ub : index
     %3 = scf.if %2 -> (memref<2xf32>) {
-      %4 = alloc() : memref<2xf32>
+      %4 = memref.alloc() : memref<2xf32>
       scf.yield %4 : memref<2xf32>
     } else {
       scf.yield %0 : memref<2xf32>
@@ -973,30 +973,30 @@
   return %1 : memref<2xf32>
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc()
-//      CHECK: %[[ALLOC1:.*]] = alloc()
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc()
+//      CHECK: %[[ALLOC1:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]])
 // CHECK-NEXT: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args
 // CHECK-SAME: (%[[IALLOC:.*]] = %[[ALLOC1]]
-//      CHECK: dealloc %[[IALLOC]]
+//      CHECK: memref.dealloc %[[IALLOC]]
 //      CHECK: %[[ALLOC3:.*]] = scf.if
 
-//      CHECK: %[[ALLOC4:.*]] = alloc()
-// CHECK-NEXT: %[[ALLOC5:.*]] = alloc()
+//      CHECK: %[[ALLOC4:.*]] = memref.alloc()
+// CHECK-NEXT: %[[ALLOC5:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy(%[[ALLOC4]], %[[ALLOC5]])
-// CHECK-NEXT: dealloc %[[ALLOC4]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC4]]
 // CHECK-NEXT: scf.yield %[[ALLOC5]]
 
-//      CHECK: %[[ALLOC6:.*]] = alloc()
+//      CHECK: %[[ALLOC6:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy(%[[ALLOC0]], %[[ALLOC6]])
 // CHECK-NEXT: scf.yield %[[ALLOC6]]
 
-//      CHECK: %[[ALLOC7:.*]] = alloc()
+//      CHECK: %[[ALLOC7:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy(%[[ALLOC3:.*]], %[[ALLOC7]])
-// CHECK-NEXT: dealloc %[[ALLOC3]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC3]]
 // CHECK-NEXT: scf.yield %[[ALLOC7]]
 
-//      CHECK: dealloc %[[ALLOC0]]
+//      CHECK: memref.dealloc %[[ALLOC0]]
 // CHECK-NEXT: return %[[ALLOC2]]
 
 // -----
@@ -1013,17 +1013,17 @@
   %step: index,
   %buf: memref<2xf32>,
   %res: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
     %2 = scf.for %i2 = %lb to %ub step %step
       iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> {
       %3 = scf.for %i3 = %lb to %ub step %step
         iter_args(%iterBuf3 = %iterBuf2) -> memref<2xf32> {
-        %4 = alloc() : memref<2xf32>
+        %4 = memref.alloc() : memref<2xf32>
         %5 = cmpi eq, %i, %ub : index
         %6 = scf.if %5 -> (memref<2xf32>) {
-          %7 = alloc() : memref<2xf32>
+          %7 = memref.alloc() : memref<2xf32>
           scf.yield %7 : memref<2xf32>
         } else {
           scf.yield %iterBuf3 : memref<2xf32>
@@ -1038,55 +1038,55 @@
   return
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc()
-// CHECK-NEXT: dealloc %[[ALLOC0]]
-// CHECK-NEXT: %[[ALLOC1:.*]] = alloc()
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc()
+// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]])
 // CHECK-NEXT: %[[VAL_7:.*]] = scf.for {{.*}} iter_args
 // CHECK-SAME: (%[[IALLOC0:.*]] = %[[ALLOC1]])
-//      CHECK: %[[ALLOC2:.*]] = alloc()
+//      CHECK: %[[ALLOC2:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy(%[[IALLOC0]], %[[ALLOC2]])
-// CHECK-NEXT: dealloc %[[IALLOC0]]
+// CHECK-NEXT: memref.dealloc %[[IALLOC0]]
 // CHECK-NEXT: %[[ALLOC3:.*]] = scf.for {{.*}} iter_args
 // CHECK-SAME: (%[[IALLOC1:.*]] = %[[ALLOC2]])
-//      CHECK: %[[ALLOC5:.*]] = alloc()
+//      CHECK: %[[ALLOC5:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy(%[[IALLOC1]], %[[ALLOC5]])
-// CHECK-NEXT: dealloc %[[IALLOC1]]
+// CHECK-NEXT: memref.dealloc %[[IALLOC1]]
 
 //      CHECK: %[[ALLOC6:.*]] = scf.for {{.*}} iter_args
 // CHECK-SAME: (%[[IALLOC2:.*]] = %[[ALLOC5]])
-//      CHECK: %[[ALLOC8:.*]] = alloc()
-// CHECK-NEXT: dealloc %[[ALLOC8]]
+//      CHECK: %[[ALLOC8:.*]] = memref.alloc()
+// CHECK-NEXT: memref.dealloc %[[ALLOC8]]
 //      CHECK: %[[ALLOC9:.*]] = scf.if
 
-//      CHECK: %[[ALLOC11:.*]] = alloc()
-// CHECK-NEXT: %[[ALLOC12:.*]] = alloc()
+//      CHECK: %[[ALLOC11:.*]] = memref.alloc()
+// CHECK-NEXT: %[[ALLOC12:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy(%[[ALLOC11]], %[[ALLOC12]])
-// CHECK-NEXT: dealloc %[[ALLOC11]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC11]]
 // CHECK-NEXT: scf.yield %[[ALLOC12]]
 
-//      CHECK: %[[ALLOC13:.*]] = alloc()
+//      CHECK: %[[ALLOC13:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy(%[[IALLOC2]], %[[ALLOC13]])
 // CHECK-NEXT: scf.yield %[[ALLOC13]]
 
-//      CHECK: dealloc %[[IALLOC2]]
-// CHECK-NEXT: %[[ALLOC10:.*]] = alloc()
+//      CHECK: memref.dealloc %[[IALLOC2]]
+// CHECK-NEXT: %[[ALLOC10:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy(%[[ALLOC9]], %[[ALLOC10]])
-// CHECK-NEXT: dealloc %[[ALLOC9]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC9]]
 // CHECK-NEXT: scf.yield %[[ALLOC10]]
 
-//      CHECK: %[[ALLOC7:.*]] = alloc()
+//      CHECK: %[[ALLOC7:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy(%[[ALLOC6]], %[[ALLOC7]])
-// CHECK-NEXT: dealloc %[[ALLOC6]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC6]]
 // CHECK-NEXT: scf.yield %[[ALLOC7]]
 
-//      CHECK: %[[ALLOC4:.*]] = alloc()
+//      CHECK: %[[ALLOC4:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy(%[[ALLOC3]], %[[ALLOC4]])
-// CHECK-NEXT: dealloc %[[ALLOC3]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC3]]
 // CHECK-NEXT: scf.yield %[[ALLOC4]]
 
 //      CHECK: test.copy(%[[VAL_7]], %arg4)
-// CHECK-NEXT: dealloc %[[VAL_7]]
+// CHECK-NEXT: memref.dealloc %[[VAL_7]]
 
 // -----
 
@@ -1113,7 +1113,7 @@
   %const1 = constant 1 : i32
   %inc = addi %val, %const1 : i32
   %size = std.index_cast %inc : i32 to index
-  %alloc1 = alloc(%size) : memref<?xf32>
+  %alloc1 = memref.alloc(%size) : memref<?xf32>
   br ^loopHeader(%inc, %alloc1 : i32, memref<?xf32>)
 
 ^exit(%buff3 : memref<?xf32>):
@@ -1139,7 +1139,7 @@
 ^loopBody(%val : i32, %buff2: memref<2xf32>):
   %const1 = constant 1 : i32
   %inc = addi %val, %const1 : i32
-  %alloc1 = alloc() : memref<2xf32>
+  %alloc1 = memref.alloc() : memref<2xf32>
   br ^loopHeader(%inc, %alloc1 : i32, memref<2xf32>)
 
 ^loopHeader(%i : i32, %buff : memref<2xf32>):
@@ -1162,12 +1162,12 @@
   %arg3: memref<2xf32>) {
   // Confirm the alloc will be dealloc'ed in the block.
   %1 = shape.assuming %arg0 -> memref<2xf32> {
-     %0 = alloc() : memref<2xf32>
+     %0 = memref.alloc() : memref<2xf32>
     shape.assuming_yield %arg2 : memref<2xf32>
   }
   // Confirm the alloc will be returned and dealloc'ed after its use.
   %3 = shape.assuming %arg0 -> memref<2xf32> {
-    %2 = alloc() : memref<2xf32>
+    %2 = memref.alloc() : memref<2xf32>
     shape.assuming_yield %2 : memref<2xf32>
   }
   test.copy(%3, %arg3) : (memref<2xf32>, memref<2xf32>)
@@ -1178,17 +1178,17 @@
 // CHECK-SAME: %[[ARG1:.*]]: {{.*}},
 // CHECK-SAME: %[[ARG2:.*]]: {{.*}}
 //      CHECK: %[[UNUSED_RESULT:.*]] = shape.assuming %[[ARG0]]
-// CHECK-NEXT:    %[[ALLOC0:.*]] = alloc()
-// CHECK-NEXT:    dealloc %[[ALLOC0]]
+// CHECK-NEXT:    %[[ALLOC0:.*]] = memref.alloc()
+// CHECK-NEXT:    memref.dealloc %[[ALLOC0]]
 // CHECK-NEXT:    shape.assuming_yield %[[ARG1]]
 //      CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[ARG0]]
-// CHECK-NEXT:    %[[TMP_ALLOC:.*]] = alloc()
-// CHECK-NEXT:    %[[RETURNING_ALLOC:.*]] = alloc()
+// CHECK-NEXT:    %[[TMP_ALLOC:.*]] = memref.alloc()
+// CHECK-NEXT:    %[[RETURNING_ALLOC:.*]] = memref.alloc()
 // CHECK-NEXT:    linalg.copy(%[[TMP_ALLOC]], %[[RETURNING_ALLOC]])
-// CHECK-NEXT:    dealloc %[[TMP_ALLOC]]
+// CHECK-NEXT:    memref.dealloc %[[TMP_ALLOC]]
 // CHECK-NEXT:    shape.assuming_yield %[[RETURNING_ALLOC]]
 //      CHECK: test.copy(%[[ASSUMING_RESULT:.*]], %[[ARG2]])
-// CHECK-NEXT: dealloc %[[ASSUMING_RESULT]]
+// CHECK-NEXT: memref.dealloc %[[ASSUMING_RESULT]]
 
 // -----
 
diff --git a/mlir/test/Transforms/buffer-hoisting.mlir b/mlir/test/Transforms/buffer-hoisting.mlir
--- a/mlir/test/Transforms/buffer-hoisting.mlir
+++ b/mlir/test/Transforms/buffer-hoisting.mlir
@@ -18,7 +18,7 @@
 ^bb1:
   br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
@@ -26,7 +26,7 @@
   return
 }
 
-// CHECK-NEXT: %[[ALLOC:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc()
 // CHECK-NEXT: cond_br
 
 // -----
@@ -51,7 +51,7 @@
 ^bb1:
   br ^bb3(%arg1 : memref<?xf32>)
 ^bb2(%0: index):
-  %1 = alloc(%0) : memref<?xf32>
+  %1 = memref.alloc(%0) : memref<?xf32>
   test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>)
   br ^bb3(%1 : memref<?xf32>)
 ^bb3(%2: memref<?xf32>):
@@ -62,7 +62,7 @@
 // CHECK-NEXT: cond_br
 //      CHECK: ^bb2
 //      CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[IDX]])
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc(%[[IDX]])
 // CHECK-NEXT: test.buffer_based
 
 // -----
@@ -93,7 +93,7 @@
 ^bb1:
   br ^bb6(%arg1 : memref<?xf32>)
 ^bb2(%0: index):
-  %1 = alloc(%0) : memref<?xf32>
+  %1 = memref.alloc(%0) : memref<?xf32>
   test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>)
   cond_br %arg0, ^bb3, ^bb4
 ^bb3:
@@ -112,7 +112,7 @@
 // CHECK-NEXT: cond_br
 //      CHECK: ^bb2
 //      CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[IDX]])
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc(%[[IDX]])
 // CHECK-NEXT: test.buffer_based
 
 // -----
@@ -130,7 +130,7 @@
 func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
 ^bb1:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   br ^bb2(%0 : memref<2xf32>)
 ^bb2(%1: memref<2xf32>):
@@ -138,7 +138,7 @@
   return
 }
 
-// CHECK-NEXT: %[[ALLOC:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc()
 // CHECK-NEXT: cond_br
 
 // -----
@@ -153,7 +153,7 @@
 
 // CHECK-LABEL: func @ifElse
 func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
@@ -163,18 +163,18 @@
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
   br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
 ^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
-  %7 = alloc() : memref<2xf32>
+  %7 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%7: memref<2xf32>) out(%7: memref<2xf32>)
   test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
 //      CHECK: br ^bb3
 //      CHECK: br ^bb3
 // CHECK-NEXT: ^bb3
-//      CHECK: %[[ALLOC1:.*]] = alloc()
+//      CHECK: %[[ALLOC1:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
 //      CHECK: test.copy(%[[ALLOC1]]
 // CHECK-NEXT: return
@@ -191,7 +191,7 @@
 
 // CHECK-LABEL: func @ifElseNoUsers
 func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
@@ -205,7 +205,7 @@
   return
 }
 
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
 
 // -----
@@ -223,7 +223,7 @@
 
 // CHECK-LABEL: func @ifElseNested
 func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
@@ -237,19 +237,19 @@
 ^bb4(%6: memref<2xf32>):
   br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
 ^bb5(%7: memref<2xf32>, %8: memref<2xf32>):
-  %9 = alloc() : memref<2xf32>
+  %9 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>)
   test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
 //      CHECK: br ^bb5
 //      CHECK: br ^bb5
 //      CHECK: br ^bb5
 // CHECK-NEXT: ^bb5
-//      CHECK: %[[ALLOC1:.*]] = alloc()
+//      CHECK: %[[ALLOC1:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
 
 // -----
@@ -259,16 +259,16 @@
 
 // CHECK-LABEL: func @redundantOperations
 func @redundantOperations(%arg0: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
-  %1 = alloc() : memref<2xf32>
+  %1 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%0: memref<2xf32>) out(%1: memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
-//      CHECK: %[[ALLOC1:.*]] = alloc()
+//      CHECK: %[[ALLOC1:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
 
 // -----
@@ -289,11 +289,11 @@
     %arg1: memref<2xf32>) {
   cond_br %cond, ^bb1, ^bb2
 ^bb1:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
   br ^exit(%0 : memref<2xf32>)
 ^bb2:
-  %1 = alloc() : memref<2xf32>
+  %1 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>)
   br ^exit(%1 : memref<2xf32>)
 ^exit(%arg2: memref<2xf32>):
@@ -301,8 +301,8 @@
   return
 }
 
-// CHECK-NEXT: %{{.*}} = alloc()
-// CHECK-NEXT: %{{.*}} = alloc()
+// CHECK-NEXT: %{{.*}} = memref.alloc()
+// CHECK-NEXT: %{{.*}} = memref.alloc()
 // CHECK-NEXT: cond_br
 
 // -----
@@ -326,16 +326,16 @@
 ^bb1:
   br ^exit(%arg0 : memref<2xf32>)
 ^bb2:
-  %1 = alloc() : memref<2xf32>
+  %1 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>)
-  dealloc %1 : memref<2xf32>
+  memref.dealloc %1 : memref<2xf32>
   br ^exit(%1 : memref<2xf32>)
 ^exit(%arg2: memref<2xf32>):
   test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: %{{.*}} = alloc()
+// CHECK-NEXT: %{{.*}} = memref.alloc()
 // CHECK-NEXT: cond_br
 
 // -----
@@ -355,10 +355,10 @@
 ^bb1:
   br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
-    %1 = alloc() : memref<2xf32>
+    %1 = memref.alloc() : memref<2xf32>
     test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>)
     %tmp1 = math.exp %gen1_arg0 : f32
     test.region_yield %tmp1 : f32
@@ -368,10 +368,10 @@
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
-// CHECK-NEXT:   %[[ALLOC0:.*]] = alloc()
+// CHECK-NEXT:   %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT:   cond_br
 //      CHECK:   test.region_buffer_based
-//      CHECK:     %[[ALLOC1:.*]] = alloc()
+//      CHECK:     %[[ALLOC1:.*]] = memref.alloc()
 // CHECK-NEXT:     test.buffer_based
 
 // -----
@@ -385,20 +385,20 @@
   %arg0 : index,
   %arg1 : index) -> memref<?x?xf32> {
   %0 = cmpi eq, %arg0, %arg1 : index
-  %1 = alloc(%arg0, %arg0) : memref<?x?xf32>
+  %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
   %2 = scf.if %0 -> (memref<?x?xf32>) {
     scf.yield %1 : memref<?x?xf32>
   } else {
-    %3 = alloc(%arg0, %arg1) : memref<?x?xf32>
+    %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
     scf.yield %1 : memref<?x?xf32>
   }
   return %2 : memref<?x?xf32>
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc(%arg0, %arg0)
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
 // CHECK-NEXT: %{{.*}} = scf.if
 //      CHECK: else
-// CHECK-NEXT: %[[ALLOC1:.*]] = alloc(%arg0, %arg1)
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%arg0, %arg1)
 
 // -----
 
@@ -411,18 +411,18 @@
   %arg0 : index,
   %arg1 : index) -> memref<?x?xf32> {
   %0 = cmpi eq, %arg0, %arg1 : index
-  %1 = alloc(%arg0, %arg0) : memref<?x?xf32>
+  %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
   %2 = scf.if %0 -> (memref<?x?xf32>) {
     scf.yield %1 : memref<?x?xf32>
   } else {
-    %3 = alloc(%arg0, %arg1) : memref<?x?xf32>
+    %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
     scf.yield %3 : memref<?x?xf32>
   }
   return %2 : memref<?x?xf32>
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc(%arg0, %arg0)
-// CHECK-NEXT: %[[ALLOC1:.*]] = alloc(%arg0, %arg1)
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%arg0, %arg1)
 // CHECK-NEXT: %{{.*}} = scf.if
 
 // -----
@@ -437,24 +437,24 @@
   %arg0 : index,
   %arg1 : index) -> memref<?x?xf32> {
   %0 = cmpi eq, %arg0, %arg1 : index
-  %1 = alloc(%arg0, %arg0) : memref<?x?xf32>
+  %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
   %2 = scf.if %0 -> (memref<?x?xf32>) {
     %3 = scf.if %0 -> (memref<?x?xf32>) {
       scf.yield %1 : memref<?x?xf32>
     } else {
-      %4 = alloc(%arg0, %arg1) : memref<?x?xf32>
+      %4 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
       scf.yield %4 : memref<?x?xf32>
     }
     scf.yield %3 : memref<?x?xf32>
   } else {
-    %5 = alloc(%arg1, %arg1) : memref<?x?xf32>
+    %5 = memref.alloc(%arg1, %arg1) : memref<?x?xf32>
     scf.yield %5 : memref<?x?xf32>
   }
   return %2 : memref<?x?xf32>
 }
-//      CHECK: %[[ALLOC0:.*]] = alloc(%arg0, %arg0)
-// CHECK-NEXT: %[[ALLOC1:.*]] = alloc(%arg0, %arg1)
-// CHECK-NEXT: %[[ALLOC2:.*]] = alloc(%arg1, %arg1)
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%arg0, %arg1)
+// CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc(%arg1, %arg1)
 // CHECK-NEXT: %{{.*}} = scf.if
 
 // -----
@@ -472,13 +472,13 @@
     %1 = constant 1 : i32
     %2 = addi %arg0, %1 : i32
     %3 = index_cast %2 : i32 to index
-    %4 = alloc(%arg2, %3) : memref<?x?xf32>
+    %4 = memref.alloc(%arg2, %3) : memref<?x?xf32>
     scf.yield %4 : memref<?x?xf32>
   } else {
     %1 = constant 2 : i32
     %2 = addi %arg0, %1 : i32
     %3 = index_cast %2 : i32 to index
-    %4 = alloc(%arg2, %3) : memref<?x?xf32>
+    %4 = memref.alloc(%arg2, %3) : memref<?x?xf32>
     scf.yield %4 : memref<?x?xf32>
   }
   return %0 : memref<?x?xf32>
@@ -504,7 +504,7 @@
 
 // CHECK-LABEL: func @inner_region_control_flow
 func @inner_region_control_flow(%arg0 : index) -> memref<?x?xf32> {
-  %0 = alloc(%arg0, %arg0) : memref<?x?xf32>
+  %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
   %1 = test.region_if %0 : memref<?x?xf32> -> (memref<?x?xf32>) then {
     ^bb0(%arg1 : memref<?x?xf32>):
       test.region_if_yield %arg1 : memref<?x?xf32>
@@ -518,7 +518,7 @@
   return %1 : memref<?x?xf32>
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc(%arg0, %arg0)
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
 // CHECK-NEXT: {{.*}} test.region_if
 
 // -----
@@ -531,13 +531,13 @@
 func @inner_region_control_flow_div(
   %arg0 : index,
   %arg1 : index) -> memref<?x?xf32> {
-  %0 = alloc(%arg0, %arg0) : memref<?x?xf32>
+  %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
   %1 = test.region_if %0 : memref<?x?xf32> -> (memref<?x?xf32>) then {
     ^bb0(%arg2 : memref<?x?xf32>):
       test.region_if_yield %arg2 : memref<?x?xf32>
   } else {
     ^bb0(%arg2 : memref<?x?xf32>):
-      %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
+      %2 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
       test.region_if_yield %2 : memref<?x?xf32>
   } join {
     ^bb0(%arg2 : memref<?x?xf32>):
@@ -546,8 +546,8 @@
   return %1 : memref<?x?xf32>
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc(%arg0, %arg0)
-// CHECK-NEXT: %[[ALLOC1:.*]] = alloc(%arg0, %arg1)
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%arg0, %arg1)
 // CHECK-NEXT: {{.*}} test.region_if
 
 // -----
@@ -560,7 +560,7 @@
 ^bb1:
   br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
-  %0 = alloca() : memref<2xf32>
+  %0 = memref.alloca() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
@@ -571,7 +571,7 @@
 // CHECK-NEXT: cond_br
 //      CHECK: ^bb2
 //      CHECK: ^bb2
-// CHECK-NEXT: %[[ALLOCA:.*]] = alloca()
+// CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca()
 // CHECK-NEXT: test.buffer_based
 
 // -----
@@ -584,7 +584,7 @@
   %arg0: i1,
   %arg1: memref<2xf32>,
   %arg2: memref<2xf32>) {
-  %0 = alloca() : memref<2xf32>
+  %0 = memref.alloca() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
@@ -598,19 +598,19 @@
 ^bb4(%6: memref<2xf32>):
   br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
 ^bb5(%7: memref<2xf32>, %8: memref<2xf32>):
-  %9 = alloc() : memref<2xf32>
+  %9 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>)
   test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: %[[ALLOCA:.*]] = alloca()
+// CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca()
 // CHECK-NEXT: test.buffer_based
 //      CHECK: ^bb5
 //      CHECK: ^bb5
 //      CHECK: ^bb5
 // CHECK-NEXT: ^bb5
-// CHECK-NEXT: %[[ALLOC:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc()
 // CHECK-NEXT: test.buffer_based
 
 // -----
@@ -627,10 +627,10 @@
 ^bb1:
   br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
-    %1 = alloca() : memref<2xf32>
+    %1 = memref.alloca() : memref<2xf32>
     test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>)
     %tmp1 = math.exp %gen1_arg0 : f32
     test.region_yield %tmp1 : f32
@@ -640,10 +640,10 @@
   test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
-// CHECK-NEXT:   %[[ALLOC:.*]] = alloc()
+// CHECK-NEXT:   %[[ALLOC:.*]] = memref.alloc()
 // CHECK-NEXT:   cond_br
 //      CHECK:   test.region_buffer_based
-//      CHECK:     %[[ALLOCA:.*]] = alloca()
+//      CHECK:     %[[ALLOCA:.*]] = memref.alloca()
 // CHECK-NEXT:     test.buffer_based
 
 // -----
@@ -658,20 +658,20 @@
   %step: index,
   %buf: memref<2xf32>,
   %res: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
     %2 = cmpi eq, %i, %ub : index
-    %3 = alloc() : memref<2xf32>
+    %3 = memref.alloc() : memref<2xf32>
     scf.yield %3 : memref<2xf32>
   }
   test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc()
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: {{.*}} scf.for
-//      CHECK: %[[ALLOC1:.*]] = alloc()
+//      CHECK: %[[ALLOC1:.*]] = memref.alloc()
 
 // -----
 
@@ -685,12 +685,12 @@
   %ub: index,
   %step: index,
   %buf: memref<2xf32>) -> memref<2xf32> {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
     %2 = cmpi eq, %i, %ub : index
     %3 = scf.if %2 -> (memref<2xf32>) {
-      %4 = alloc() : memref<2xf32>
+      %4 = memref.alloc() : memref<2xf32>
       scf.yield %4 : memref<2xf32>
     } else {
       scf.yield %0 : memref<2xf32>
@@ -700,9 +700,9 @@
   return %1 : memref<2xf32>
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc()
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: {{.*}} scf.for
-//      CHECK: %[[ALLOC1:.*]] = alloc()
+//      CHECK: %[[ALLOC1:.*]] = memref.alloc()
 
 // -----
 
@@ -717,17 +717,17 @@
   %step: index,
   %buf: memref<2xf32>,
   %res: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
     %2 = scf.for %i2 = %lb to %ub step %step
       iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> {
       %3 = scf.for %i3 = %lb to %ub step %step
         iter_args(%iterBuf3 = %iterBuf2) -> memref<2xf32> {
-        %4 = alloc() : memref<2xf32>
+        %4 = memref.alloc() : memref<2xf32>
         %5 = cmpi eq, %i, %ub : index
         %6 = scf.if %5 -> (memref<2xf32>) {
-          %7 = alloc() : memref<2xf32>
+          %7 = memref.alloc() : memref<2xf32>
           scf.yield %7 : memref<2xf32>
         } else {
           scf.yield %iterBuf3 : memref<2xf32>
@@ -742,12 +742,12 @@
   return
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc()
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: {{.*}} = scf.for
 // CHECK-NEXT: {{.*}} = scf.for
 // CHECK-NEXT: {{.*}} = scf.for
-// CHECK-NEXT: %[[ALLOC1:.*]] = alloc()
-//      CHECK: %[[ALLOC2:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc()
+//      CHECK: %[[ALLOC2:.*]] = memref.alloc()
 
 // -----
 
@@ -759,7 +759,7 @@
   %arg0: index,
   %buf: memref<?xf32>,
   %res: memref<?xf32>) {
-  %0 = alloc(%arg0) : memref<?xf32>
+  %0 = memref.alloc(%arg0) : memref<?xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<?xf32> {
     %2 = scf.for %i2 = %lb to %ub step %step
@@ -768,7 +768,7 @@
         iter_args(%iterBuf3 = %iterBuf2) -> memref<?xf32> {
         %5 = cmpi eq, %i, %ub : index
         %6 = scf.if %5 -> (memref<?xf32>) {
-          %7 = alloc(%i3) : memref<?xf32>
+          %7 = memref.alloc(%i3) : memref<?xf32>
           scf.yield %7 : memref<?xf32>
         } else {
           scf.yield %iterBuf3 : memref<?xf32>
@@ -784,8 +784,8 @@
 }
 
 
-//      CHECK: %[[ALLOC0:.*]] = alloc({{.*}})
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc({{.*}})
 // CHECK-NEXT: {{.*}} = scf.for
 // CHECK-NEXT: {{.*}} = scf.for
 // CHECK-NEXT: {{.*}} = scf.for
-//      CHECK: %[[ALLOC1:.*]] = alloc({{.*}})
+//      CHECK: %[[ALLOC1:.*]] = memref.alloc({{.*}})
diff --git a/mlir/test/Transforms/buffer-loop-hoisting.mlir b/mlir/test/Transforms/buffer-loop-hoisting.mlir
--- a/mlir/test/Transforms/buffer-loop-hoisting.mlir
+++ b/mlir/test/Transforms/buffer-loop-hoisting.mlir
@@ -17,7 +17,7 @@
 ^bb1:
   br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
@@ -26,7 +26,7 @@
 }
 
 // CHECK-NEXT: cond_br
-//      CHECK: %[[ALLOC:.*]] = alloc()
+//      CHECK: %[[ALLOC:.*]] = memref.alloc()
 
 // -----
 
@@ -50,7 +50,7 @@
 ^bb1:
   br ^bb3(%arg1 : memref<?xf32>)
 ^bb2(%0: index):
-  %1 = alloc(%0) : memref<?xf32>
+  %1 = memref.alloc(%0) : memref<?xf32>
   test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>)
   br ^bb3(%1 : memref<?xf32>)
 ^bb3(%2: memref<?xf32>):
@@ -61,7 +61,7 @@
 // CHECK-NEXT: cond_br
 //      CHECK: ^bb2
 //      CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[IDX]])
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc(%[[IDX]])
 // CHECK-NEXT: test.buffer_based
 
 // -----
@@ -81,10 +81,10 @@
 ^bb1:
   br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
-    %1 = alloc() : memref<2xf32>
+    %1 = memref.alloc() : memref<2xf32>
     test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>)
     %tmp1 = math.exp %gen1_arg0 : f32
     test.region_yield %tmp1 : f32
@@ -95,9 +95,9 @@
   return
 }
 // CHECK-NEXT:   cond_br
-//      CHECK:   %[[ALLOC0:.*]] = alloc()
+//      CHECK:   %[[ALLOC0:.*]] = memref.alloc()
 //      CHECK:   test.region_buffer_based
-//      CHECK:     %[[ALLOC1:.*]] = alloc()
+//      CHECK:     %[[ALLOC1:.*]] = memref.alloc()
 // CHECK-NEXT:     test.buffer_based
 
 // -----
@@ -111,20 +111,20 @@
   %arg0 : index,
   %arg1 : index) -> memref<?x?xf32> {
   %0 = cmpi eq, %arg0, %arg1 : index
-  %1 = alloc(%arg0, %arg0) : memref<?x?xf32>
+  %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
   %2 = scf.if %0 -> (memref<?x?xf32>) {
     scf.yield %1 : memref<?x?xf32>
   } else {
-    %3 = alloc(%arg0, %arg1) : memref<?x?xf32>
+    %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
     scf.yield %1 : memref<?x?xf32>
   }
   return %2 : memref<?x?xf32>
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc(%arg0, %arg0)
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
 // CHECK-NEXT: %{{.*}} = scf.if
 //      CHECK: else
-// CHECK-NEXT: %[[ALLOC1:.*]] = alloc(%arg0, %arg1)
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%arg0, %arg1)
 
 // -----
 
@@ -138,20 +138,20 @@
   %step: index,
   %buf: memref<2xf32>,
   %res: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
     %2 = cmpi eq, %i, %ub : index
-    %3 = alloc() : memref<2xf32>
+    %3 = memref.alloc() : memref<2xf32>
     scf.yield %3 : memref<2xf32>
   }
   test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc()
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: {{.*}} scf.for
-//      CHECK: %[[ALLOC1:.*]] = alloc()
+//      CHECK: %[[ALLOC1:.*]] = memref.alloc()
 
 // -----
 
@@ -165,12 +165,12 @@
   %ub: index,
   %step: index,
   %buf: memref<2xf32>) -> memref<2xf32> {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
     %2 = cmpi eq, %i, %ub : index
     %3 = scf.if %2 -> (memref<2xf32>) {
-      %4 = alloc() : memref<2xf32>
+      %4 = memref.alloc() : memref<2xf32>
       scf.yield %4 : memref<2xf32>
     } else {
       scf.yield %0 : memref<2xf32>
@@ -180,9 +180,9 @@
   return %1 : memref<2xf32>
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc()
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: {{.*}} scf.for
-//      CHECK: %[[ALLOC1:.*]] = alloc()
+//      CHECK: %[[ALLOC1:.*]] = memref.alloc()
 
 // -----
 
@@ -198,23 +198,23 @@
   %step: index,
   %buf: memref<2xf32>,
   %res: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
     %2 = scf.for %i2 = %lb to %ub step %step
       iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> {
       %3 = scf.for %i3 = %lb to %ub step %step
         iter_args(%iterBuf3 = %iterBuf2) -> memref<2xf32> {
-        %4 = alloc() : memref<2xf32>
+        %4 = memref.alloc() : memref<2xf32>
         %5 = cmpi eq, %i, %ub : index
         %6 = scf.if %5 -> (memref<2xf32>) {
-          %7 = alloc() : memref<2xf32>
-          %8 = alloc() : memref<2xf32>
+          %7 = memref.alloc() : memref<2xf32>
+          %8 = memref.alloc() : memref<2xf32>
           scf.yield %8 : memref<2xf32>
         } else {
           scf.yield %iterBuf3 : memref<2xf32>
         }
-        %9 = alloc() : memref<2xf32>
+        %9 = memref.alloc() : memref<2xf32>
         scf.yield %6 : memref<2xf32>
       }
       scf.yield %3 : memref<2xf32>
@@ -225,15 +225,15 @@
   return
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc()
-// CHECK-NEXT: %[[ALLOC1:.*]] = alloc()
-// CHECK-NEXT: %[[ALLOC2:.*]] = alloc()
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc()
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc()
+// CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc()
 // CHECK-NEXT: {{.*}} = scf.for
 // CHECK-NEXT: {{.*}} = scf.for
 // CHECK-NEXT: {{.*}} = scf.for
 //      CHECK: {{.*}} = scf.if
-//      CHECK: %[[ALLOC3:.*]] = alloc()
-//      CHECK: %[[ALLOC4:.*]] = alloc()
+//      CHECK: %[[ALLOC3:.*]] = memref.alloc()
+//      CHECK: %[[ALLOC4:.*]] = memref.alloc()
 
 // -----
 
@@ -245,22 +245,22 @@
   %arg0: index,
   %buf: memref<?xf32>,
   %res: memref<?xf32>) {
-  %0 = alloc(%arg0) : memref<?xf32>
+  %0 = memref.alloc(%arg0) : memref<?xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<?xf32> {
     %2 = scf.for %i2 = %lb to %ub step %step
       iter_args(%iterBuf2 = %iterBuf) -> memref<?xf32> {
       %3 = scf.for %i3 = %lb to %ub step %step
         iter_args(%iterBuf3 = %iterBuf2) -> memref<?xf32> {
-        %4 = alloc(%i3) : memref<?xf32>
+        %4 = memref.alloc(%i3) : memref<?xf32>
         %5 = cmpi eq, %i, %ub : index
         %6 = scf.if %5 -> (memref<?xf32>) {
-          %7 = alloc(%i3) : memref<?xf32>
+          %7 = memref.alloc(%i3) : memref<?xf32>
           scf.yield %7 : memref<?xf32>
         } else {
           scf.yield %iterBuf3 : memref<?xf32>
         }
-        %8 = alloc(%i3) : memref<?xf32>
+        %8 = memref.alloc(%i3) : memref<?xf32>
         scf.yield %6 : memref<?xf32>
       }
       scf.yield %3 : memref<?xf32>
@@ -271,12 +271,12 @@
   return
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc({{.*}})
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc({{.*}})
 // CHECK-NEXT: {{.*}} = scf.for
 // CHECK-NEXT: {{.*}} = scf.for
 // CHECK-NEXT: {{.*}} = scf.for
-//      CHECK: %[[ALLOC1:.*]] = alloc({{.*}})
-//      CHECK: %[[ALLOC2:.*]] = alloc({{.*}})
+//      CHECK: %[[ALLOC1:.*]] = memref.alloc({{.*}})
+//      CHECK: %[[ALLOC2:.*]] = memref.alloc({{.*}})
 
 // -----
 
@@ -287,18 +287,18 @@
   %step: index,
   %buf: memref<2xf32>,
   %res: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
-      %2 = alloc() : memref<2xf32>
+      %2 = memref.alloc() : memref<2xf32>
       scf.yield %0 : memref<2xf32>
   }
   test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc({{.*}})
-// CHECK-NEXT: %[[ALLOC1:.*]] = alloc({{.*}})
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc({{.*}})
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc({{.*}})
 // CHECK-NEXT: {{.*}} = scf.for
 
 // -----
@@ -312,7 +312,7 @@
   %res: memref<2xf32>) {
   %0 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
-      %1 = alloc() : memref<2xf32>
+      %1 = memref.alloc() : memref<2xf32>
       scf.yield %1 : memref<2xf32>
   }
   test.copy(%0, %res) : (memref<2xf32>, memref<2xf32>)
@@ -320,7 +320,7 @@
 }
 
 //      CHECK: {{.*}} = scf.for
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc({{.*}})
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc({{.*}})
 
 // -----
 
@@ -331,12 +331,12 @@
   %step: index,
   %buf: memref<2xf32>,
   %res: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
     %2 = scf.for %i2 = %lb to %ub step %step
       iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> {
-        %3 = alloc() : memref<2xf32>
+        %3 = memref.alloc() : memref<2xf32>
         scf.yield %0 : memref<2xf32>
     }
     scf.yield %0 : memref<2xf32>
@@ -345,8 +345,8 @@
   return
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc({{.*}})
-// CHECK-NEXT: %[[ALLOC1:.*]] = alloc({{.*}})
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc({{.*}})
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc({{.*}})
 // CHECK-NEXT: {{.*}} = scf.for
 
 // -----
@@ -362,7 +362,7 @@
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
       %1 = cmpi eq, %i, %ub : index
       %2 = scf.if %1 -> (memref<2xf32>) {
-        %3 = alloc() : memref<2xf32>
+        %3 = memref.alloc() : memref<2xf32>
         scf.yield %3 : memref<2xf32>
       } else {
         scf.yield %iterBuf : memref<2xf32>
@@ -375,7 +375,7 @@
 
 //      CHECK: {{.*}} = scf.for
 //      CHECK: {{.*}} = scf.if
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc({{.*}})
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc({{.*}})
 
 // -----
 
@@ -386,12 +386,12 @@
   %step: index,
   %buf: memref<2xf32>,
   %res: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = cmpi eq, %lb, %ub : index
   %2 = scf.if %1 -> (memref<2xf32>) {
     %3 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
-      %4 = alloc() : memref<2xf32>
+      %4 = memref.alloc() : memref<2xf32>
       scf.yield %0 : memref<2xf32>
     }
     scf.yield %0 : memref<2xf32>
@@ -405,7 +405,7 @@
 }
 
 //      CHECK: {{.*}} = scf.if
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc({{.*}})
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc({{.*}})
 //      CHECK: {{.*}} = scf.for
 
 // -----
@@ -417,19 +417,19 @@
   %step: index,
   %buf: memref<2xf32>,
   %res: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
-      %2 = alloc(%i) : memref<?xf32>
+      %2 = memref.alloc(%i) : memref<?xf32>
       scf.yield %0 : memref<2xf32>
   }
   test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc({{.*}})
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc({{.*}})
 // CHECK-NEXT: {{.*}} = scf.for
-// CHECK-NEXT: %[[ALLOC1:.*]] = alloc({{.*}})
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc({{.*}})
 
 // -----
 
@@ -440,12 +440,12 @@
   %step: index,
   %buf: memref<2xf32>,
   %res: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
     %2 = scf.for %i2 = %lb to %ub step %step
       iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> {
-        %3 = alloc(%i) : memref<?xf32>
+        %3 = memref.alloc(%i) : memref<?xf32>
         scf.yield %0 : memref<2xf32>
     }
     scf.yield %0 : memref<2xf32>
@@ -454,7 +454,7 @@
   return
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc({{.*}})
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc({{.*}})
 // CHECK-NEXT: {{.*}} = scf.for
-// CHECK-NEXT: %[[ALLOC1:.*]] = alloc({{.*}})
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc({{.*}})
 // CHECK-NEXT: {{.*}} = scf.for
diff --git a/mlir/test/Transforms/buffer-results-to-out-params.mlir b/mlir/test/Transforms/buffer-results-to-out-params.mlir
--- a/mlir/test/Transforms/buffer-results-to-out-params.mlir
+++ b/mlir/test/Transforms/buffer-results-to-out-params.mlir
@@ -60,7 +60,7 @@
 func private @callee() -> memref<1xf32>
 
 // CHECK-LABEL:   func @call_basic() {
-// CHECK:           %[[OUTPARAM:.*]] = alloc() : memref<1xf32>
+// CHECK:           %[[OUTPARAM:.*]] = memref.alloc() : memref<1xf32>
 // CHECK:           call @callee(%[[OUTPARAM]]) : (memref<1xf32>) -> ()
 // CHECK:           "test.sink"(%[[OUTPARAM]]) : (memref<1xf32>) -> ()
 // CHECK:           return
@@ -77,8 +77,8 @@
 func private @callee() -> (memref<1xf32>, memref<2xf32>)
 
 // CHECK-LABEL:   func @call_multiple_result() {
-// CHECK:           %[[RESULT0:.*]] = alloc() : memref<1xf32>
-// CHECK:           %[[RESULT1:.*]] = alloc() : memref<2xf32>
+// CHECK:           %[[RESULT0:.*]] = memref.alloc() : memref<1xf32>
+// CHECK:           %[[RESULT1:.*]] = memref.alloc() : memref<2xf32>
 // CHECK:           call @callee(%[[RESULT0]], %[[RESULT1]]) : (memref<1xf32>, memref<2xf32>) -> ()
 // CHECK:           "test.sink"(%[[RESULT0]], %[[RESULT1]]) : (memref<1xf32>, memref<2xf32>) -> ()
 // CHECK:         }
@@ -93,7 +93,7 @@
 func private @callee() -> (i1, memref<1xf32>, i32)
 
 // CHECK-LABEL:   func @call_non_memref_result() {
-// CHECK:           %[[RESULT0:.*]] = alloc() : memref<1xf32>
+// CHECK:           %[[RESULT0:.*]] = memref.alloc() : memref<1xf32>
 // CHECK:           %[[NON_MEMREF_RESULTS:.*]]:2 = call @callee(%[[RESULT0]]) : (memref<1xf32>) -> (i1, i32)
 // CHECK:           "test.sink"(%[[NON_MEMREF_RESULTS]]#0, %[[RESULT0]], %[[NON_MEMREF_RESULTS]]#1) : (i1, memref<1xf32>, i32) -> ()
 // CHECK:         }
diff --git a/mlir/test/Transforms/canonicalize-block-merge.mlir b/mlir/test/Transforms/canonicalize-block-merge.mlir
--- a/mlir/test/Transforms/canonicalize-block-merge.mlir
+++ b/mlir/test/Transforms/canonicalize-block-merge.mlir
@@ -213,13 +213,13 @@
   cond_br %arg0, ^bb2, ^bb3
 
 ^bb2:
-  // CHECK: store %{{.*}}, %{{.*}} : memref<i32>
-  store %c0_i32, %arg1[] : memref<i32>
+  // CHECK: memref.store %{{.*}}, %{{.*}} : memref<i32>
+  memref.store %c0_i32, %arg1[] : memref<i32>
   br ^bb1
 
 ^bb3:
-  // CHECK: store %{{.*}}, %{{.*}} : memref<i1>
-  store %true, %arg2[] : memref<i1>
+  // CHECK: memref.store %{{.*}}, %{{.*}} : memref<i1>
+  memref.store %true, %arg2[] : memref<i1>
   br ^bb1
 }
 
diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -29,7 +29,7 @@
 
   // CHECK: %c4 = constant 4 : index
   %c1 = constant 1 : index
-  %0 = dim %arg0, %c1 : tensor<8x4xf32>
+  %0 = memref.dim %arg0, %c1 : tensor<8x4xf32>
 
   // CHECK-NEXT: return %c4
   return %0 : index
@@ -53,7 +53,7 @@
 // CHECK-LABEL: func @trivial_dce
 func @trivial_dce(%arg0: tensor<8x4xf32>) {
   %c1 = constant 1 : index
-  %0 = dim %arg0, %c1 : tensor<8x4xf32>
+  %0 = memref.dim %arg0, %c1 : tensor<8x4xf32>
   // CHECK-NEXT: return
   return
 }
@@ -61,9 +61,9 @@
 // CHECK-LABEL: func @load_dce
 func @load_dce(%arg0: index) {
   %c4 = constant 4 : index
-  %a = alloc(%c4) : memref<?xf32>
-  %2 = load %a[%arg0] : memref<?xf32>
-  dealloc %a: memref<?xf32>
+  %a = memref.alloc(%c4) : memref<?xf32>
+  %2 = memref.load %a[%arg0] : memref<?xf32>
+  memref.dealloc %a: memref<?xf32>
   // CHECK-NEXT: return
   return
 }
@@ -313,22 +313,22 @@
 
 // CHECK-LABEL: func @memref_cast_folding
 func @memref_cast_folding(%arg0: memref<4 x f32>, %arg1: f32) -> (f32, f32) {
-  %0 = memref_cast %arg0 : memref<4xf32> to memref<?xf32>
+  %0 = memref.cast %arg0 : memref<4xf32> to memref<?xf32>
   // CHECK-NEXT: %c0 = constant 0 : index
   %c0 = constant 0 : index
-  %dim = dim %0, %c0 : memref<? x f32>
+  %dim = memref.dim %0, %c0 : memref<? x f32>
 
   // CHECK-NEXT: affine.load %arg0[3]
   %1 = affine.load %0[%dim - 1] : memref<?xf32>
 
-  // CHECK-NEXT: store %arg1, %arg0[%c0] : memref<4xf32>
-  store %arg1, %0[%c0] : memref<?xf32>
+  // CHECK-NEXT: memref.store %arg1, %arg0[%c0] : memref<4xf32>
+  memref.store %arg1, %0[%c0] : memref<?xf32>
 
-  // CHECK-NEXT: %{{.*}} = load %arg0[%c0] : memref<4xf32>
-  %2 = load %0[%c0] : memref<?xf32>
+  // CHECK-NEXT: %{{.*}} = memref.load %arg0[%c0] : memref<4xf32>
+  %2 = memref.load %0[%c0] : memref<?xf32>
 
-  // CHECK-NEXT: dealloc %arg0 : memref<4xf32>
-  dealloc %0: memref<?xf32>
+  // CHECK-NEXT: memref.dealloc %arg0 : memref<4xf32>
+  memref.dealloc %0: memref<?xf32>
 
   // CHECK-NEXT: return %{{.*}}
   return %1, %2 : f32, f32
@@ -337,10 +337,10 @@
 // CHECK-LABEL: @fold_memref_cast_in_memref_cast
 // CHECK-SAME: (%[[ARG0:.*]]: memref<42x42xf64>)
 func @fold_memref_cast_in_memref_cast(%0: memref<42x42xf64>) {
-  // CHECK: %[[folded:.*]] = memref_cast %[[ARG0]] : memref<42x42xf64> to memref<?x?xf64>
-  %4 = memref_cast %0 : memref<42x42xf64> to memref<?x42xf64>
-  // CHECK-NOT: memref_cast
-  %5 = memref_cast %4 : memref<?x42xf64> to memref<?x?xf64>
+  // CHECK: %[[folded:.*]] = memref.cast %[[ARG0]] : memref<42x42xf64> to memref<?x?xf64>
+  %4 = memref.cast %0 : memref<42x42xf64> to memref<?x42xf64>
+  // CHECK-NOT: memref.cast
+  %5 = memref.cast %4 : memref<?x42xf64> to memref<?x?xf64>
   // CHECK: "test.user"(%[[folded]])
   "test.user"(%5) : (memref<?x?xf64>) -> ()
   return
@@ -349,9 +349,9 @@
 // CHECK-LABEL: @fold_memref_cast_chain
 // CHECK-SAME: (%[[ARG0:.*]]: memref<42x42xf64>)
 func @fold_memref_cast_chain(%0: memref<42x42xf64>) {
-  // CHECK-NOT: memref_cast
-  %4 = memref_cast %0 : memref<42x42xf64> to memref<?x42xf64>
-  %5 = memref_cast %4 : memref<?x42xf64> to memref<42x42xf64>
+  // CHECK-NOT: memref.cast
+  %4 = memref.cast %0 : memref<42x42xf64> to memref<?x42xf64>
+  %5 = memref.cast %4 : memref<?x42xf64> to memref<42x42xf64>
   // CHECK: "test.user"(%[[ARG0]])
   "test.user"(%5) : (memref<42x42xf64>) -> ()
   return
@@ -359,11 +359,11 @@
 
 // CHECK-LABEL: func @alloc_const_fold
 func @alloc_const_fold() -> memref<?xf32> {
-  // CHECK-NEXT: %0 = alloc() : memref<4xf32>
+  // CHECK-NEXT: %0 = memref.alloc() : memref<4xf32>
   %c4 = constant 4 : index
-  %a = alloc(%c4) : memref<?xf32>
+  %a = memref.alloc(%c4) : memref<?xf32>
 
-  // CHECK-NEXT: %1 = memref_cast %0 : memref<4xf32> to memref<?xf32>
+  // CHECK-NEXT: %1 = memref.cast %0 : memref<4xf32> to memref<?xf32>
   // CHECK-NEXT: return %1 : memref<?xf32>
   return %a : memref<?xf32>
 }
@@ -372,30 +372,30 @@
 func @dead_alloc_fold() {
   // CHECK-NEXT: return
   %c4 = constant 4 : index
-  %a = alloc(%c4) : memref<?xf32>
+  %a = memref.alloc(%c4) : memref<?xf32>
   return
 }
 
 // CHECK-LABEL: func @dead_dealloc_fold
 func @dead_dealloc_fold() {
   // CHECK-NEXT: return
-  %a = alloc() : memref<4xf32>
-  dealloc %a: memref<4xf32>
+  %a = memref.alloc() : memref<4xf32>
+  memref.dealloc %a: memref<4xf32>
   return
 }
 
 // CHECK-LABEL: func @dead_dealloc_fold_multi_use
 func @dead_dealloc_fold_multi_use(%cond : i1) {
   // CHECK-NEXT: return
-  %a = alloc() : memref<4xf32>
+  %a = memref.alloc() : memref<4xf32>
   cond_br %cond, ^bb1, ^bb2
 
 ^bb1:
-  dealloc %a: memref<4xf32>
+  memref.dealloc %a: memref<4xf32>
   return
 
 ^bb2:
-  dealloc %a: memref<4xf32>
+  memref.dealloc %a: memref<4xf32>
   return
 }
 
@@ -423,29 +423,29 @@
   %N = constant 1024 : index
   %K = constant 512 : index
 
-  // CHECK-NEXT: alloc(%arg0) : memref<?x1024xf32>
-  %a = alloc(%L, %N) : memref<? x ? x f32>
+  // CHECK-NEXT: memref.alloc(%arg0) : memref<?x1024xf32>
+  %a = memref.alloc(%L, %N) : memref<? x ? x f32>
 
-  // CHECK-NEXT: alloc(%arg1) : memref<4x1024x8x512x?xf32>
-  %b = alloc(%N, %K, %M) : memref<4 x ? x 8 x ? x ? x f32>
+  // CHECK-NEXT: memref.alloc(%arg1) : memref<4x1024x8x512x?xf32>
+  %b = memref.alloc(%N, %K, %M) : memref<4 x ? x 8 x ? x ? x f32>
 
-  // CHECK-NEXT: alloc() : memref<512x1024xi32>
-  %c = alloc(%K, %N) : memref<? x ? x i32>
+  // CHECK-NEXT: memref.alloc() : memref<512x1024xi32>
+  %c = memref.alloc(%K, %N) : memref<? x ? x i32>
 
-  // CHECK: alloc() : memref<9x9xf32>
-  %d = alloc(%nine, %nine) : memref<? x ? x f32>
+  // CHECK: memref.alloc() : memref<9x9xf32>
+  %d = memref.alloc(%nine, %nine) : memref<? x ? x f32>
 
-  // CHECK: alloca(%arg1) : memref<4x1024x8x512x?xf32>
-  %e = alloca(%N, %K, %M) : memref<4 x ? x 8 x ? x ? x f32>
+  // CHECK: memref.alloca(%arg1) : memref<4x1024x8x512x?xf32>
+  %e = memref.alloca(%N, %K, %M) : memref<4 x ? x 8 x ? x ? x f32>
 
   // CHECK: affine.for
   affine.for %i = 0 to %L {
     // CHECK-NEXT: affine.for
     affine.for %j = 0 to 10 {
-      // CHECK-NEXT: load %0[%arg2, %arg3] : memref<?x1024xf32>
-      // CHECK-NEXT: store %{{.*}}, %1[%c0, %c0, %arg2, %arg3, %c0] : memref<4x1024x8x512x?xf32>
-      %v = load %a[%i, %j] : memref<?x?xf32>
-      store %v, %b[%zero, %zero, %i, %j, %zero] : memref<4x?x8x?x?xf32>
+      // CHECK-NEXT: memref.load %0[%arg2, %arg3] : memref<?x1024xf32>
+      // CHECK-NEXT: memref.store %{{.*}}, %1[%c0, %c0, %arg2, %arg3, %c0] : memref<4x1024x8x512x?xf32>
+      %v = memref.load %a[%i, %j] : memref<?x?xf32>
+      memref.store %v, %b[%zero, %zero, %i, %j, %zero] : memref<4x?x8x?x?xf32>
     }
   }
 
@@ -468,24 +468,24 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c2 = constant 2 : index
-  %0 = alloc(%arg0, %arg1) : memref<?x?xf32>
-  %1 = alloc(%arg1, %arg2) : memref<?x8x?xf32>
-  %2 = dim %1, %c2 : memref<?x8x?xf32>
+  %0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
+  %1 = memref.alloc(%arg1, %arg2) : memref<?x8x?xf32>
+  %2 = memref.dim %1, %c2 : memref<?x8x?xf32>
   affine.for %arg3 = 0 to %2 {
-    %3 = alloc(%arg0) : memref<?xi8>
-    %ub = dim %3, %c0 : memref<?xi8>
+    %3 = memref.alloc(%arg0) : memref<?xi8>
+    %ub = memref.dim %3, %c0 : memref<?xi8>
     affine.for %arg4 = 0 to %ub {
-      %s = dim %0, %c0 : memref<?x?xf32>
-      %v = std.view %3[%c0][%arg4, %s] : memref<?xi8> to memref<?x?xf32>
-      %sv = subview %0[%c0, %c0][%s,%arg4][%c1,%c1] : memref<?x?xf32> to memref<?x?xf32, #map1>
-      %l = dim %v, %c1 : memref<?x?xf32>
-      %u = dim %sv, %c0 : memref<?x?xf32, #map1>
+      %s = memref.dim %0, %c0 : memref<?x?xf32>
+      %v = memref.view %3[%c0][%arg4, %s] : memref<?xi8> to memref<?x?xf32>
+      %sv = memref.subview %0[%c0, %c0][%s,%arg4][%c1,%c1] : memref<?x?xf32> to memref<?x?xf32, #map1>
+      %l = memref.dim %v, %c1 : memref<?x?xf32>
+      %u = memref.dim %sv, %c0 : memref<?x?xf32, #map1>
       affine.for %arg5 = %l to %u {
         "foo"() : () -> ()
       }
-      %sv2 = subview %0[0, 0][17, %arg4][1, 1] : memref<?x?xf32> to memref<17x?xf32, #map3>
-      %l2 = dim %v, %c1 : memref<?x?xf32>
-      %u2 = dim %sv2, %c1 : memref<17x?xf32, #map3>
+      %sv2 = memref.subview %0[0, 0][17, %arg4][1, 1] : memref<?x?xf32> to memref<17x?xf32, #map3>
+      %l2 = memref.dim %v, %c1 : memref<?x?xf32>
+      %u2 = memref.dim %sv2, %c1 : memref<17x?xf32, #map3>
       scf.for %arg5 = %l2 to %u2 step %c1 {
         "foo"() : () -> ()
       }
@@ -502,13 +502,13 @@
   // CHECK-NEXT:   }
   // CHECK-NEXT: }
 
-  %A = view %BUF[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32>
-  %B = view %BUF[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>
-  %C = view %BUF[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
+  %A = memref.view %BUF[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32>
+  %B = memref.view %BUF[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>
+  %C = memref.view %BUF[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
 
-  %M_ = dim %A, %c0 : memref<?x?xf32>
-  %K_ = dim %A, %c1 : memref<?x?xf32>
-  %N_ = dim %C, %c1 : memref<?x?xf32>
+  %M_ = memref.dim %A, %c0 : memref<?x?xf32>
+  %K_ = memref.dim %A, %c1 : memref<?x?xf32>
+  %N_ = memref.dim %C, %c1 : memref<?x?xf32>
   scf.for %i = %c0 to %M_ step %c1 {
     scf.for %j = %c0 to %N_ step %c1 {
       scf.for %k = %c0 to %K_ step %c1 {
@@ -533,9 +533,9 @@
   // CHECK-NEXT: %c42_i32 = constant 42 : i32
   // CHECK-NEXT: affine.for %arg1 = 0 to 8 {
   affine.for %arg1 = 0 to 8 {
-    // CHECK-NEXT: store %c42_i32, %arg0[%arg1]
+    // CHECK-NEXT: memref.store %c42_i32, %arg0[%arg1]
     %c42_i32 = constant 42 : i32
-    store %c42_i32, %arg0[%arg1] : memref<8xi32>
+    memref.store %c42_i32, %arg0[%arg1] : memref<8xi32>
   }
   return
 }
@@ -547,8 +547,8 @@
   %VT_i_s = affine.apply affine_map<(d0) -> (d0 floordiv  8)> (%VT_i)
   %VT_k_l = affine.apply affine_map<(d0) -> (d0 floordiv  16)> (%VT_i)
 
-  // CHECK: = alloc() : memref<64x32xf32>
-  %Av = alloc(%VT_i_s, %VT_k_l) : memref<?x?xf32>
+  // CHECK: = memref.alloc() : memref<64x32xf32>
+  %Av = memref.alloc(%VT_i_s, %VT_k_l) : memref<?x?xf32>
   return %Av : memref<?x?xf32>
 }
 
@@ -663,11 +663,11 @@
 // CHECK-LABEL: cast_values
 func @cast_values(%arg0: memref<?xi32>) -> memref<2xi32> {
   // NOP cast
-  %1 = memref_cast %arg0 : memref<?xi32> to memref<?xi32>
-  // CHECK-NEXT: %[[RET:.*]] = memref_cast %arg0 : memref<?xi32> to memref<2xi32>
-  %3 = memref_cast %1 : memref<?xi32> to memref<2xi32>
+  %1 = memref.cast %arg0 : memref<?xi32> to memref<?xi32>
+  // CHECK-NEXT: %[[RET:.*]] = memref.cast %arg0 : memref<?xi32> to memref<2xi32>
+  %3 = memref.cast %1 : memref<?xi32> to memref<2xi32>
   // NOP cast
-  %5 = memref_cast %3 : memref<2xi32> to memref<2xi32>
+  %5 = memref.cast %3 : memref<2xi32> to memref<2xi32>
   // CHECK-NEXT: return %[[RET]] : memref<2xi32>
   return %5 : memref<2xi32>
 }
@@ -677,33 +677,33 @@
 // CHECK-LABEL: func @view
 func @view(%arg0 : index) -> (f32, f32, f32, f32) {
   // CHECK: %[[C15:.*]] = constant 15 : index
-  // CHECK: %[[ALLOC_MEM:.*]] = alloc() : memref<2048xi8>
-  %0 = alloc() : memref<2048xi8>
+  // CHECK: %[[ALLOC_MEM:.*]] = memref.alloc() : memref<2048xi8>
+  %0 = memref.alloc() : memref<2048xi8>
   %c0 = constant 0 : index
   %c7 = constant 7 : index
   %c11 = constant 11 : index
   %c15 = constant 15 : index
 
   // Test: fold constant sizes.
-  // CHECK: std.view %[[ALLOC_MEM]][%[[C15]]][] : memref<2048xi8> to memref<7x11xf32>
-  %1 = view %0[%c15][%c7, %c11] : memref<2048xi8> to memref<?x?xf32>
-  %r0 = load %1[%c0, %c0] : memref<?x?xf32>
+  // CHECK: memref.view %[[ALLOC_MEM]][%[[C15]]][] : memref<2048xi8> to memref<7x11xf32>
+  %1 = memref.view %0[%c15][%c7, %c11] : memref<2048xi8> to memref<?x?xf32>
+  %r0 = memref.load %1[%c0, %c0] : memref<?x?xf32>
 
   // Test: fold one constant size.
-  // CHECK: std.view %[[ALLOC_MEM]][%[[C15]]][%arg0, %arg0] : memref<2048xi8> to memref<?x?x7xf32>
-  %2 = view %0[%c15][%arg0, %arg0, %c7] : memref<2048xi8> to memref<?x?x?xf32>
-  %r1 = load %2[%c0, %c0, %c0] : memref<?x?x?xf32>
+  // CHECK: memref.view %[[ALLOC_MEM]][%[[C15]]][%arg0, %arg0] : memref<2048xi8> to memref<?x?x7xf32>
+  %2 = memref.view %0[%c15][%arg0, %arg0, %c7] : memref<2048xi8> to memref<?x?x?xf32>
+  %r1 = memref.load %2[%c0, %c0, %c0] : memref<?x?x?xf32>
 
   // Test: preserve an existing static size.
-  // CHECK: std.view %[[ALLOC_MEM]][%[[C15]]][] : memref<2048xi8> to memref<7x4xf32>
-  %3 = view %0[%c15][%c7] : memref<2048xi8> to memref<?x4xf32>
-  %r2 = load %3[%c0, %c0] : memref<?x4xf32>
-
-  // Test: folding static alloc and memref_cast into a view.
-  // CHECK: std.view %[[ALLOC_MEM]][%[[C15]]][] : memref<2048xi8> to memref<15x7xf32>
-  %4 = memref_cast %0 : memref<2048xi8> to memref<?xi8>
-  %5 = view %4[%c15][%c15, %c7] : memref<?xi8> to memref<?x?xf32>
-  %r3 = load %5[%c0, %c0] : memref<?x?xf32>
+  // CHECK: memref.view %[[ALLOC_MEM]][%[[C15]]][] : memref<2048xi8> to memref<7x4xf32>
+  %3 = memref.view %0[%c15][%c7] : memref<2048xi8> to memref<?x4xf32>
+  %r2 = memref.load %3[%c0, %c0] : memref<?x4xf32>
+
+  // Test: folding static alloc and memref.cast into a view.
+  // CHECK memref.view %[[ALLOC_MEM]][%[[C15]]][] : memref<2048xi8> to memref<15x7xf32>
+  %4 = memref.cast %0 : memref<2048xi8> to memref<?xi8>
+  %5 = memref.view %4[%c15][%c15, %c7] : memref<?xi8> to memref<?x?xf32>
+  %r3 = memref.load %5[%c0, %c0] : memref<?x?xf32>
   return %r0, %r1, %r2, %r3 : f32, f32, f32, f32
 }
 
@@ -739,142 +739,142 @@
   // CHECK-NOT: constant 15 : index
   %c15 = constant 15 : index
 
-  // CHECK: %[[ALLOC0:.*]] = alloc()
-  %0 = alloc() : memref<8x16x4xf32, offset : 0, strides : [64, 4, 1]>
+  // CHECK: %[[ALLOC0:.*]] = memref.alloc()
+  %0 = memref.alloc() : memref<8x16x4xf32, offset : 0, strides : [64, 4, 1]>
 
   // Test: subview with constant base memref and constant operands is folded.
   // Note that the subview uses the base memrefs layout map because it used
   // zero offset and unit stride arguments.
-  // CHECK: subview %[[ALLOC0]][0, 0, 0] [7, 11, 2] [1, 1, 1] :
+  // CHECK: memref.subview %[[ALLOC0]][0, 0, 0] [7, 11, 2] [1, 1, 1] :
   // CHECK-SAME: memref<8x16x4xf32, #[[$BASE_MAP0]]>
   // CHECK-SAME: to memref<7x11x2xf32, #[[$BASE_MAP0]]>
-  %1 = subview %0[%c0, %c0, %c0] [%c7, %c11, %c2] [%c1, %c1, %c1]
+  %1 = memref.subview %0[%c0, %c0, %c0] [%c7, %c11, %c2] [%c1, %c1, %c1]
     : memref<8x16x4xf32, offset : 0, strides : [64, 4, 1]> to
       memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
-  %v0 = load %1[%c0, %c0, %c0] : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
+  %v0 = memref.load %1[%c0, %c0, %c0] : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
 
   // Test: subview with one dynamic operand can also be folded.
-  // CHECK: subview %[[ALLOC0]][0, %[[ARG0]], 0] [7, 11, 15] [1, 1, 1] :
+  // CHECK: memref.subview %[[ALLOC0]][0, %[[ARG0]], 0] [7, 11, 15] [1, 1, 1] :
   // CHECK-SAME: memref<8x16x4xf32, #[[$BASE_MAP0]]>
   // CHECK-SAME: to memref<7x11x15xf32, #[[$SUBVIEW_MAP0]]>
-  %2 = subview %0[%c0, %arg0, %c0] [%c7, %c11, %c15] [%c1, %c1, %c1]
+  %2 = memref.subview %0[%c0, %arg0, %c0] [%c7, %c11, %c15] [%c1, %c1, %c1]
     : memref<8x16x4xf32, offset : 0, strides : [64, 4, 1]> to
       memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
-  store %v0, %2[%c0, %c0, %c0] : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
+  memref.store %v0, %2[%c0, %c0, %c0] : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
 
-  // CHECK: %[[ALLOC1:.*]] = alloc(%[[ARG0]])
-  %3 = alloc(%arg0) : memref<?x16x4xf32, offset : 0, strides : [64, 4, 1]>
+  // CHECK: %[[ALLOC1:.*]] = memref.alloc(%[[ARG0]])
+  %3 = memref.alloc(%arg0) : memref<?x16x4xf32, offset : 0, strides : [64, 4, 1]>
   // Test: subview with constant operands but dynamic base memref is folded as long as the strides and offset of the base memref are static.
-  // CHECK: subview %[[ALLOC1]][0, 0, 0] [7, 11, 15] [1, 1, 1] :
+  // CHECK: memref.subview %[[ALLOC1]][0, 0, 0] [7, 11, 15] [1, 1, 1] :
   // CHECK-SAME: memref<?x16x4xf32, #[[$BASE_MAP0]]>
   // CHECK-SAME: to memref<7x11x15xf32, #[[$BASE_MAP0]]>
-  %4 = subview %3[%c0, %c0, %c0] [%c7, %c11, %c15] [%c1, %c1, %c1]
+  %4 = memref.subview %3[%c0, %c0, %c0] [%c7, %c11, %c15] [%c1, %c1, %c1]
     : memref<?x16x4xf32, offset : 0, strides : [64, 4, 1]> to
       memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
-  store %v0, %4[%c0, %c0, %c0] : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
+  memref.store %v0, %4[%c0, %c0, %c0] : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
 
   // Test: subview offset operands are folded correctly w.r.t. base strides.
-  // CHECK: subview %[[ALLOC0]][1, 2, 7] [7, 11, 2] [1, 1, 1] :
+  // CHECK: memref.subview %[[ALLOC0]][1, 2, 7] [7, 11, 2] [1, 1, 1] :
   // CHECK-SAME: memref<8x16x4xf32, #[[$BASE_MAP0]]> to
   // CHECK-SAME: memref<7x11x2xf32, #[[$SUBVIEW_MAP1]]>
-  %5 = subview %0[%c1, %c2, %c7] [%c7, %c11, %c2] [%c1, %c1, %c1]
+  %5 = memref.subview %0[%c1, %c2, %c7] [%c7, %c11, %c2] [%c1, %c1, %c1]
     : memref<8x16x4xf32, offset : 0, strides : [64, 4, 1]> to
       memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
-  store %v0, %5[%c0, %c0, %c0] : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
+  memref.store %v0, %5[%c0, %c0, %c0] : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
 
   // Test: subview stride operands are folded correctly w.r.t. base strides.
-  // CHECK: subview %[[ALLOC0]][0, 0, 0] [7, 11, 2] [2, 7, 11] :
+  // CHECK: memref.subview %[[ALLOC0]][0, 0, 0] [7, 11, 2] [2, 7, 11] :
   // CHECK-SAME: memref<8x16x4xf32, #[[$BASE_MAP0]]>
   // CHECK-SAME: to memref<7x11x2xf32, #[[$SUBVIEW_MAP2]]>
-  %6 = subview %0[%c0, %c0, %c0] [%c7, %c11, %c2] [%c2, %c7, %c11]
+  %6 = memref.subview %0[%c0, %c0, %c0] [%c7, %c11, %c2] [%c2, %c7, %c11]
     : memref<8x16x4xf32, offset : 0, strides : [64, 4, 1]> to
       memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
-  store %v0, %6[%c0, %c0, %c0] : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
+  memref.store %v0, %6[%c0, %c0, %c0] : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
 
   // Test: subview shape are folded, but offsets and strides are not even if base memref is static
-  // CHECK: subview %[[ALLOC0]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [7, 11, 2] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] :
+  // CHECK: memref.subview %[[ALLOC0]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [7, 11, 2] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] :
   // CHECK-SAME: memref<8x16x4xf32, #[[$BASE_MAP0]]> to
   // CHECK-SAME: memref<7x11x2xf32, #[[$SUBVIEW_MAP3]]>
-  %10 = subview %0[%arg0, %arg0, %arg0] [%c7, %c11, %c2] [%arg1, %arg1, %arg1] :
+  %10 = memref.subview %0[%arg0, %arg0, %arg0] [%c7, %c11, %c2] [%arg1, %arg1, %arg1] :
     memref<8x16x4xf32, offset:0, strides:[64, 4, 1]> to
     memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  store %v0, %10[%arg1, %arg1, %arg1] :
+  memref.store %v0, %10[%arg1, %arg1, %arg1] :
     memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
 
   // Test: subview strides are folded, but offsets and shape are not even if base memref is static
-  // CHECK: subview %[[ALLOC0]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [2, 7, 11] :
+  // CHECK: memref.subview %[[ALLOC0]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [2, 7, 11] :
   // CHECK-SAME: memref<8x16x4xf32, #[[$BASE_MAP0]]> to
   // CHECK-SAME: memref<?x?x?xf32, #[[$SUBVIEW_MAP4]]
-  %11 = subview %0[%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] [%c2, %c7, %c11] :
+  %11 = memref.subview %0[%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] [%c2, %c7, %c11] :
     memref<8x16x4xf32, offset:0, strides:[64, 4, 1]> to
     memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  store %v0, %11[%arg0, %arg0, %arg0] :
+  memref.store %v0, %11[%arg0, %arg0, %arg0] :
     memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
 
   // Test: subview offsets are folded, but strides and shape are not even if base memref is static
-  // CHECK: subview %[[ALLOC0]][1, 2, 7] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [%[[ARG0]], %[[ARG0]], %[[ARG0]]] :
+  // CHECK: memref.subview %[[ALLOC0]][1, 2, 7] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [%[[ARG0]], %[[ARG0]], %[[ARG0]]] :
   // CHECK-SAME: memref<8x16x4xf32, #[[$BASE_MAP0]]> to
   // CHECK-SAME: memref<?x?x?xf32, #[[$SUBVIEW_MAP5]]
-  %13 = subview %0[%c1, %c2, %c7] [%arg1, %arg1, %arg1] [%arg0, %arg0, %arg0] :
+  %13 = memref.subview %0[%c1, %c2, %c7] [%arg1, %arg1, %arg1] [%arg0, %arg0, %arg0] :
     memref<8x16x4xf32, offset:0, strides:[64, 4, 1]> to
     memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  store %v0, %13[%arg1, %arg1, %arg1] :
+  memref.store %v0, %13[%arg1, %arg1, %arg1] :
     memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
 
-  // CHECK: %[[ALLOC2:.*]] = alloc(%[[ARG0]], %[[ARG0]], %[[ARG1]])
-  %14 = alloc(%arg0, %arg0, %arg1) : memref<?x?x?xf32>
+  // CHECK: %[[ALLOC2:.*]] = memref.alloc(%[[ARG0]], %[[ARG0]], %[[ARG1]])
+  %14 = memref.alloc(%arg0, %arg0, %arg1) : memref<?x?x?xf32>
   // Test: subview shape are folded, even if base memref is not static
-  // CHECK: subview %[[ALLOC2]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [7, 11, 2] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] :
+  // CHECK: memref.subview %[[ALLOC2]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [7, 11, 2] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] :
   // CHECK-SAME: memref<?x?x?xf32> to
   // CHECK-SAME: memref<7x11x2xf32, #[[$SUBVIEW_MAP3]]>
-  %15 = subview %14[%arg0, %arg0, %arg0] [%c7, %c11, %c2] [%arg1, %arg1, %arg1] :
+  %15 = memref.subview %14[%arg0, %arg0, %arg0] [%c7, %c11, %c2] [%arg1, %arg1, %arg1] :
     memref<?x?x?xf32> to
     memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  store %v0, %15[%arg1, %arg1, %arg1] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
+  memref.store %v0, %15[%arg1, %arg1, %arg1] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
 
   // TEST: subview strides are folded, in the type only the most minor stride is folded.
-  // CHECK: subview %[[ALLOC2]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [2, 2, 2] :
+  // CHECK: memref.subview %[[ALLOC2]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [2, 2, 2] :
   // CHECK-SAME: memref<?x?x?xf32> to
   // CHECK-SAME: memref<?x?x?xf32, #[[$SUBVIEW_MAP6]]
-  %16 = subview %14[%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] [%c2, %c2, %c2] :
+  %16 = memref.subview %14[%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] [%c2, %c2, %c2] :
     memref<?x?x?xf32> to
     memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  store %v0, %16[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
+  memref.store %v0, %16[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
 
   // TEST: subview offsets are folded but the type offset remains dynamic, when the base memref is not static
-  // CHECK: subview %[[ALLOC2]][1, 1, 1] [%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] :
+  // CHECK: memref.subview %[[ALLOC2]][1, 1, 1] [%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] :
   // CHECK-SAME: memref<?x?x?xf32> to
   // CHECK-SAME: memref<?x?x?xf32, #[[$SUBVIEW_MAP3]]
-  %17 = subview %14[%c1, %c1, %c1] [%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] :
+  %17 = memref.subview %14[%c1, %c1, %c1] [%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] :
     memref<?x?x?xf32> to
     memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
-  store %v0, %17[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
+  memref.store %v0, %17[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
 
-  // CHECK: %[[ALLOC3:.*]] = alloc() : memref<12x4xf32>
-  %18 = alloc() : memref<12x4xf32>
+  // CHECK: %[[ALLOC3:.*]] = memref.alloc() : memref<12x4xf32>
+  %18 = memref.alloc() : memref<12x4xf32>
   %c4 = constant 4 : index
 
   // TEST: subview strides are maintained when sizes are folded
-  // CHECK: subview %[[ALLOC3]][%arg1, %arg1] [2, 4] [1, 1] :
+  // CHECK: memref.subview %[[ALLOC3]][%arg1, %arg1] [2, 4] [1, 1] :
   // CHECK-SAME: memref<12x4xf32> to
   // CHECK-SAME: memref<2x4xf32, #[[$SUBVIEW_MAP7]]>
-  %19 = subview %18[%arg1, %arg1] [%c2, %c4] [1, 1] :
+  %19 = memref.subview %18[%arg1, %arg1] [%c2, %c4] [1, 1] :
     memref<12x4xf32> to
     memref<?x?xf32, offset: ?, strides:[4, 1]>
-  store %v0, %19[%arg1, %arg1] : memref<?x?xf32, offset: ?, strides:[4, 1]>
+  memref.store %v0, %19[%arg1, %arg1] : memref<?x?xf32, offset: ?, strides:[4, 1]>
 
   // TEST: subview strides and sizes are maintained when offsets are folded
-  // CHECK: subview %[[ALLOC3]][2, 4] [12, 4] [1, 1] :
+  // CHECK: memref.subview %[[ALLOC3]][2, 4] [12, 4] [1, 1] :
   // CHECK-SAME: memref<12x4xf32> to
   // CHECK-SAME: memref<12x4xf32, #[[$SUBVIEW_MAP8]]>
-  %20 = subview %18[%c2, %c4] [12, 4] [1, 1] :
+  %20 = memref.subview %18[%c2, %c4] [12, 4] [1, 1] :
     memref<12x4xf32> to
     memref<12x4xf32, offset: ?, strides:[4, 1]>
-  store %v0, %20[%arg1, %arg1] : memref<12x4xf32, offset: ?, strides:[4, 1]>
+  memref.store %v0, %20[%arg1, %arg1] : memref<12x4xf32, offset: ?, strides:[4, 1]>
 
   // Test: dim on subview is rewritten to size operand.
-  %7 = dim %4, %c0 : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
-  %8 = dim %4, %c1 : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
+  %7 = memref.dim %4, %c0 : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
+  %8 = memref.dim %4, %c1 : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
 
   // CHECK: return %[[C7]], %[[C11]]
   return %7, %8 : index, index
@@ -1006,9 +1006,9 @@
 
 // CHECK-LABEL: func @memref_cast_folding_subview
 func @memref_cast_folding_subview(%arg0: memref<4x5xf32>, %i: index) -> (memref<?x?xf32, offset:? , strides: [?, ?]>) {
-  %0 = memref_cast %arg0 : memref<4x5xf32> to memref<?x?xf32>
-  // CHECK-NEXT: subview %{{.*}}: memref<4x5xf32>
-  %1 = subview %0[%i, %i][%i, %i][%i, %i]: memref<?x?xf32> to memref<?x?xf32, offset:? , strides: [?, ?]>
+  %0 = memref.cast %arg0 : memref<4x5xf32> to memref<?x?xf32>
+  // CHECK-NEXT: memref.subview %{{.*}}: memref<4x5xf32>
+  %1 = memref.subview %0[%i, %i][%i, %i][%i, %i]: memref<?x?xf32> to memref<?x?xf32, offset:? , strides: [?, ?]>
   // CHECK-NEXT: return %{{.*}}
   return %1: memref<?x?xf32, offset:? , strides: [?, ?]>
 }
@@ -1022,11 +1022,10 @@
 func @memref_cast_folding_subview_static(%V: memref<16x16xf32>, %a: index, %b: index)
   -> memref<3x4xf32, offset:?, strides:[?, 1]>
 {
-  %0 = memref_cast %V : memref<16x16xf32> to memref<?x?xf32>
-  %1 = subview %0[0, 0][3, 4][1, 1] : memref<?x?xf32> to memref<3x4xf32, offset:?, strides:[?, 1]>
+  %0 = memref.cast %V : memref<16x16xf32> to memref<?x?xf32>
+  %1 = memref.subview %0[0, 0][3, 4][1, 1] : memref<?x?xf32> to memref<3x4xf32, offset:?, strides:[?, 1]>
 
-  // CHECK:  subview{{.*}}: memref<16x16xf32> to memref<3x4xf32, #[[$map0]]>
-  // CHECK:  memref_cast{{.*}}: memref<3x4xf32, #[[$map0]]> to memref<3x4xf32, #[[$map1]]>
+  // CHECK:  memref.subview{{.*}}: memref<16x16xf32> to memref<3x4xf32, #[[$map0]]>
   return %1: memref<3x4xf32, offset:?, strides:[?, 1]>
 }
 
diff --git a/mlir/test/Transforms/constant-fold.mlir b/mlir/test/Transforms/constant-fold.mlir
--- a/mlir/test/Transforms/constant-fold.mlir
+++ b/mlir/test/Transforms/constant-fold.mlir
@@ -13,8 +13,8 @@
 
       %2 = addf %0, %1 : f32
 
-      // CHECK-NEXT: store [[C]], [[ARG]][]
-      store %2, %p[] : memref<f32>
+      // CHECK-NEXT: memref.store [[C]], [[ARG]][]
+      memref.store %2, %p[] : memref<f32>
     }
   }
   return
@@ -548,7 +548,7 @@
 
   // CHECK:[[C4:%.+]] = constant 4 : index
   %c1 = constant 1 : index
-  %0 = dim %x, %c1 : tensor<8x4xf32>
+  %0 = memref.dim %x, %c1 : tensor<8x4xf32>
 
   // CHECK-NEXT: return [[C4]]
   return %0 : index
@@ -793,7 +793,7 @@
 
 // CHECK-LABEL: func @subview_scalar_fold
 func @subview_scalar_fold(%arg0: memref<f32>) -> memref<f32> {
-  // CHECK-NOT: subview
-  %c = subview %arg0[] [] [] : memref<f32> to memref<f32>
+  // CHECK-NOT: memref.subview
+  %c = memref.subview %arg0[] [] [] : memref<f32> to memref<f32>
   return %c : memref<f32>
 }
diff --git a/mlir/test/Transforms/copy-removal.mlir b/mlir/test/Transforms/copy-removal.mlir
--- a/mlir/test/Transforms/copy-removal.mlir
+++ b/mlir/test/Transforms/copy-removal.mlir
@@ -6,65 +6,65 @@
 // CHECK-LABEL: func @nested_region_control_flow_div_nested
 func @nested_region_control_flow_div_nested(%arg0: index, %arg1: index) -> memref<?x?xf32> {
   %0 = cmpi eq, %arg0, %arg1 : index
-  %1 = alloc(%arg0, %arg0) : memref<?x?xf32>
+  %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
   // CHECK: %{{.*}} = scf.if
   %2 = scf.if %0 -> (memref<?x?xf32>) {
     // CHECK: %[[PERCENT3:.*]] = scf.if
     %3 = scf.if %0 -> (memref<?x?xf32>) {
       %c0_0 = constant 0 : index
-      %7 = dim %1, %c0_0 : memref<?x?xf32>
+      %7 = memref.dim %1, %c0_0 : memref<?x?xf32>
       %c1_1 = constant 1 : index
-      %8 = dim %1, %c1_1 : memref<?x?xf32>
-      %9 = alloc(%7, %8) : memref<?x?xf32>
+      %8 = memref.dim %1, %c1_1 : memref<?x?xf32>
+      %9 = memref.alloc(%7, %8) : memref<?x?xf32>
       // CHECK: linalg.copy({{.*}}, %[[PERCENT9:.*]])
       linalg.copy(%1, %9) : memref<?x?xf32>, memref<?x?xf32>
       // CHECK: scf.yield %[[PERCENT9]]
       scf.yield %9 : memref<?x?xf32>
     } else {
-      // CHECK: %[[PERCENT7:.*]] = alloc
-      %7 = alloc(%arg0, %arg1) : memref<?x?xf32>
+      // CHECK: %[[PERCENT7:.*]] = memref.alloc
+      %7 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
       %c0_0 = constant 0 : index
-      %8 = dim %7, %c0_0 : memref<?x?xf32>
+      %8 = memref.dim %7, %c0_0 : memref<?x?xf32>
       %c1_1 = constant 1 : index
-      %9 = dim %7, %c1_1 : memref<?x?xf32>
-      // CHECK-NOT: %{{.*}} = alloc
+      %9 = memref.dim %7, %c1_1 : memref<?x?xf32>
+      // CHECK-NOT: %{{.*}} = memref.alloc
       // CHECK-NOT: linalg.copy(%[[PERCENT7]], %{{.*}})
-      // CHECK-NOT: dealloc %[[PERCENT7]]
-      %10 = alloc(%8, %9) : memref<?x?xf32>
+      // CHECK-NOT: memref.dealloc %[[PERCENT7]]
+      %10 = memref.alloc(%8, %9) : memref<?x?xf32>
       linalg.copy(%7, %10) : memref<?x?xf32>, memref<?x?xf32>
-      dealloc %7 : memref<?x?xf32>
+      memref.dealloc %7 : memref<?x?xf32>
       // CHECK: scf.yield %[[PERCENT7]]
       scf.yield %10 : memref<?x?xf32>
     }
     %c0 = constant 0 : index
-    %4 = dim %3, %c0 : memref<?x?xf32>
+    %4 = memref.dim %3, %c0 : memref<?x?xf32>
     %c1 = constant 1 : index
-    %5 = dim %3, %c1 : memref<?x?xf32>
-    // CHECK-NOT: %{{.*}} = alloc
+    %5 = memref.dim %3, %c1 : memref<?x?xf32>
+    // CHECK-NOT: %{{.*}} = memref.alloc
     // CHECK-NOT: linalg.copy(%[[PERCENT3]], %{{.*}})
-    // CHECK-NOT: dealloc %[[PERCENT3]]
-    %6 = alloc(%4, %5) : memref<?x?xf32>
+    // CHECK-NOT: memref.dealloc %[[PERCENT3]]
+    %6 = memref.alloc(%4, %5) : memref<?x?xf32>
     linalg.copy(%3, %6) : memref<?x?xf32>, memref<?x?xf32>
-    dealloc %3 : memref<?x?xf32>
+    memref.dealloc %3 : memref<?x?xf32>
     // CHECK: scf.yield %[[PERCENT3]]
     scf.yield %6 : memref<?x?xf32>
   } else {
-    // CHECK: %[[PERCENT3:.*]] = alloc
-    %3 = alloc(%arg1, %arg1) : memref<?x?xf32>
+    // CHECK: %[[PERCENT3:.*]] = memref.alloc
+    %3 = memref.alloc(%arg1, %arg1) : memref<?x?xf32>
     %c0 = constant 0 : index
-    %4 = dim %3, %c0 : memref<?x?xf32>
+    %4 = memref.dim %3, %c0 : memref<?x?xf32>
     %c1 = constant 1 : index
-    %5 = dim %3, %c1 : memref<?x?xf32>
-    // CHECK-NOT: %{{.*}} = alloc
+    %5 = memref.dim %3, %c1 : memref<?x?xf32>
+    // CHECK-NOT: %{{.*}} = memref.alloc
     // CHECK-NOT: linalg.copy(%[[PERCENT3]], %{{.*}})
-    // CHECK-NOT: dealloc %[[PERCENT3]]
-    %6 = alloc(%4, %5) : memref<?x?xf32>
+    // CHECK-NOT: memref.dealloc %[[PERCENT3]]
+    %6 = memref.alloc(%4, %5) : memref<?x?xf32>
     linalg.copy(%3, %6) : memref<?x?xf32>, memref<?x?xf32>
-    dealloc %3 : memref<?x?xf32>
+    memref.dealloc %3 : memref<?x?xf32>
     // CHECK: scf.yield %[[PERCENT3]]
     scf.yield %6 : memref<?x?xf32>
   }
-  dealloc %1 : memref<?x?xf32>
+  memref.dealloc %1 : memref<?x?xf32>
   return %2 : memref<?x?xf32>
 }
 
@@ -72,16 +72,16 @@
 
 // CHECK-LABEL: func @simple_test
 func @simple_test() -> memref<5xf32> {
-  %temp = alloc() : memref<5xf32>
-  %ret = alloc() : memref<5xf32>
+  %temp = memref.alloc() : memref<5xf32>
+  %ret = memref.alloc() : memref<5xf32>
   linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
-  dealloc %ret : memref<5xf32>
+  memref.dealloc %ret : memref<5xf32>
   return %temp : memref<5xf32>
 }
 // CHECK-SAME: () -> memref<5xf32>
-// CHECK-NEXT: %[[ret:.*]] = alloc()
+// CHECK-NEXT: %[[ret:.*]] = memref.alloc()
 // CHECK-NOT: linalg.copy(%[[ret]], %{{.*}})
-// CHECK-NOT: dealloc %[[ret]]
+// CHECK-NOT: memref.dealloc %[[ret]]
 // CHECK: return %[[ret]]
 
 // -----
@@ -92,20 +92,20 @@
 
 // CHECK-LABEL: func @test_with_ret_usage_before_copy
 func @test_with_ret_usage_before_copy() -> memref<5xf32> {
-  %ret = alloc() : memref<5xf32>
-  %temp = alloc() : memref<5xf32>
+  %ret = memref.alloc() : memref<5xf32>
+  %temp = memref.alloc() : memref<5xf32>
   %c0 = constant 0 : index
-  %dimension = dim %ret, %c0 : memref<5xf32>
+  %dimension = memref.dim %ret, %c0 : memref<5xf32>
   linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
-  dealloc %ret : memref<5xf32>
+  memref.dealloc %ret : memref<5xf32>
   return %temp : memref<5xf32>
 }
-// CHECK-NEXT: %[[ret:.*]] = alloc()
-// CHECK-NOT: %{{.*}} = alloc
+// CHECK-NEXT: %[[ret:.*]] = memref.alloc()
+// CHECK-NOT: %{{.*}} = memref.alloc
 // CHECK-NEXT: %{{.*}} = constant
-// CHECK-NEXT: %[[DIM:.*]] = dim %[[ret]]
+// CHECK-NEXT: %[[DIM:.*]] = memref.dim %[[ret]]
 // CHECK-NOT: linalg.copy(%[[ret]], %{{.*}})
-// CHECK-NOT: dealloc %[[ret]]
+// CHECK-NOT: memref.dealloc %[[ret]]
 // CHECK: return %[[ret]]
 
 // -----
@@ -115,13 +115,13 @@
 
 // CHECK-LABEL: func @test_with_ret_usage_after_copy
 func @test_with_ret_usage_after_copy() -> memref<5xf32> {
-  %ret = alloc() : memref<5xf32>
-  %temp = alloc() : memref<5xf32>
+  %ret = memref.alloc() : memref<5xf32>
+  %temp = memref.alloc() : memref<5xf32>
   // CHECK: linalg.copy
   linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
   %c0 = constant 0 : index
-  %dimension = dim %ret, %c0 : memref<5xf32>
-  dealloc %ret : memref<5xf32>
+  %dimension = memref.dim %ret, %c0 : memref<5xf32>
+  memref.dealloc %ret : memref<5xf32>
   return %temp : memref<5xf32>
 }
 
@@ -132,13 +132,13 @@
 
 // CHECK-LABEL: func @test_with_temp_usage_before_copy
 func @test_with_temp_usage_before_copy() -> memref<5xf32> {
-  %ret = alloc() : memref<5xf32>
-  %temp = alloc() : memref<5xf32>
+  %ret = memref.alloc() : memref<5xf32>
+  %temp = memref.alloc() : memref<5xf32>
   %c0 = constant 0 : index
-  %dimension = dim %temp, %c0 : memref<5xf32>
+  %dimension = memref.dim %temp, %c0 : memref<5xf32>
   // CHECK: linalg.copy
   linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
-  dealloc %ret : memref<5xf32>
+  memref.dealloc %ret : memref<5xf32>
   return %temp : memref<5xf32>
 }
 
@@ -149,11 +149,11 @@
 // removed.
 
 // However the following pattern is not handled by copy removal.
-//   %from = alloc()
-//   %to = alloc()
+//   %from = memref.alloc()
+//   %to = memref.alloc()
 //   copy(%from, %to)
 //   read_from(%from) + write_to(%something_else)
-//   dealloc(%from)
+//   memref.dealloc(%from)
 //   return %to
 // In particular, linalg.generic is a memoryEffectOp between copy and dealloc.
 // Since no alias analysis is performed and no distinction is made between reads
@@ -163,9 +163,9 @@
 
 // CHECK-LABEL: func @test_with_temp_usage_after_copy
 func @test_with_temp_usage_after_copy() -> memref<5xf32> {
-  %ret = alloc() : memref<5xf32>
-  %res = alloc() : memref<5xf32>
-  %temp = alloc() : memref<5xf32>
+  %ret = memref.alloc() : memref<5xf32>
+  %res = memref.alloc() : memref<5xf32>
+  %temp = memref.alloc() : memref<5xf32>
   linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
   linalg.generic {
     indexing_maps = [#map0, #map0],
@@ -176,22 +176,22 @@
     %tmp1 = math.exp %gen1_arg0 : f32
     linalg.yield %tmp1 : f32
   }
-  dealloc %ret : memref<5xf32>
+  memref.dealloc %ret : memref<5xf32>
   return %temp : memref<5xf32>
 }
-// CHECK-NEXT: %[[ret:.*]] = alloc()
-// CHECK-NEXT: %[[res:.*]] = alloc()
-// CHECK-NEXT: %[[temp:.*]] = alloc()
+// CHECK-NEXT: %[[ret:.*]] = memref.alloc()
+// CHECK-NEXT: %[[res:.*]] = memref.alloc()
+// CHECK-NEXT: %[[temp:.*]] = memref.alloc()
 // CHECK-NEXT: linalg.copy(%[[ret]], %[[temp]])
 // CHECK-NEXT: linalg.generic
-//      CHECK: dealloc %[[ret]]
+//      CHECK: memref.dealloc %[[ret]]
 //      CHECK: return %[[temp]]
 
 // -----
 
 // CHECK-LABEL: func @make_allocation
 func @make_allocation() -> memref<5xf32> {
-  %mem = alloc() : memref<5xf32>
+  %mem = memref.alloc() : memref<5xf32>
   return %mem : memref<5xf32>
 }
 
@@ -199,12 +199,12 @@
 func @test_with_function_call() -> memref<5xf32> {
   // CHECK-NEXT: %[[ret:.*]] = call @make_allocation() : () -> memref<5xf32>
   %ret = call @make_allocation() : () -> (memref<5xf32>)
-  // CHECK-NOT: %{{.*}} = alloc
+  // CHECK-NOT: %{{.*}} = memref.alloc
   // CHECK-NOT: linalg.copy(%[[ret]], %{{.*}})
-  // CHECK-NOT: dealloc %[[ret]]
-  %temp = alloc() : memref<5xf32>
+  // CHECK-NOT: memref.dealloc %[[ret]]
+  %temp = memref.alloc() : memref<5xf32>
   linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
-  dealloc %ret : memref<5xf32>
+  memref.dealloc %ret : memref<5xf32>
   // CHECK: return %[[ret]]
   return %temp : memref<5xf32>
 }
@@ -213,20 +213,20 @@
 
 // CHECK-LABEL: func @multiple_deallocs_in_different_blocks
 func @multiple_deallocs_in_different_blocks(%cond : i1) -> memref<5xf32> {
-  // CHECK-NEXT: %[[PERCENT0:.*]] = alloc()
-  %0 = alloc() : memref<5xf32>
+  // CHECK-NEXT: %[[PERCENT0:.*]] = memref.alloc()
+  %0 = memref.alloc() : memref<5xf32>
   cond_br %cond, ^bb1, ^bb2
 ^bb1:
-  dealloc %0 : memref<5xf32>
+  memref.dealloc %0 : memref<5xf32>
   // CHECK: br ^[[BB3:.*]](%[[PERCENT0]]
   br ^bb3(%0 : memref<5xf32>)
 ^bb2:
-  // CHECK-NOT: %{{.*}} = alloc
+  // CHECK-NOT: %{{.*}} = memref.alloc
   // CHECK-NOT: linalg.copy(%[[PERCENT0]], %{{.*}})
-  // CHECK-NOT: dealloc %[[PERCENT0]]
-  %temp = alloc() : memref<5xf32>
+  // CHECK-NOT: memref.dealloc %[[PERCENT0]]
+  %temp = memref.alloc() : memref<5xf32>
   linalg.copy(%0, %temp) : memref<5xf32>, memref<5xf32>
-  dealloc %0 : memref<5xf32>
+  memref.dealloc %0 : memref<5xf32>
   // CHECK: br ^[[BB3]](%[[PERCENT0]]
   br ^bb3(%temp : memref<5xf32>)
 ^bb3(%res : memref<5xf32>):
@@ -240,12 +240,12 @@
 // CHECK-LABEL: func @test_ReuseCopyTargetAsSource
 func @test_ReuseCopyTargetAsSource(%arg0: memref<2xf32>, %result: memref<2xf32>){
   // CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[RES:.*]]: memref<2xf32>)
-  // CHECK-NOT: %{{.*}} = alloc
-  %temp = alloc() : memref<2xf32>
+  // CHECK-NOT: %{{.*}} = memref.alloc
+  %temp = memref.alloc() : memref<2xf32>
   // CHECK-NEXT: linalg.generic
   // CHECK-SAME: ins(%[[ARG0]]{{.*}}outs(%[[RES]]
   // CHECK-NOT: linalg.copy(%{{.*}}, %[[RES]])
-  // CHECK-NOT: dealloc %{{.*}}
+  // CHECK-NOT: memref.dealloc %{{.*}}
   linalg.generic {
     indexing_maps = [#map0, #map0],
     iterator_types = ["parallel"]}
@@ -256,7 +256,7 @@
     linalg.yield %tmp2 : f32
   }
   linalg.copy(%temp, %result) : memref<2xf32>, memref<2xf32>
-  dealloc %temp : memref<2xf32>
+  memref.dealloc %temp : memref<2xf32>
   // CHECK: return
   return
 }
@@ -270,8 +270,8 @@
 
 // CHECK-LABEL: func @test_ReuseCopyTargetAsSource
 func @test_ReuseCopyTargetAsSource(%arg0: memref<2xf32>){
-  %to = alloc() : memref<2xf32>
-  %temp = alloc() : memref<2xf32>
+  %to = memref.alloc() : memref<2xf32>
+  %temp = memref.alloc() : memref<2xf32>
   linalg.generic {
     indexing_maps = [#map0, #map0],
     iterator_types = ["parallel"]}
@@ -292,7 +292,7 @@
   }
   // CHECK: linalg.copy
   linalg.copy(%temp, %to) : memref<2xf32>, memref<2xf32>
-  dealloc %temp : memref<2xf32>
+  memref.dealloc %temp : memref<2xf32>
   return
 }
 
@@ -302,34 +302,34 @@
 
 // CHECK-LABEL: func @loop_alloc
 func @loop_alloc(%arg0: index, %arg1: index, %arg2: index, %arg3: memref<2xf32>, %arg4: memref<2xf32>) {
-  // CHECK: %{{.*}} = alloc()
-  %0 = alloc() : memref<2xf32>
-  dealloc %0 : memref<2xf32>
-  // CHECK: %{{.*}} = alloc()
-  %1 = alloc() : memref<2xf32>
+  // CHECK: %{{.*}} = memref.alloc()
+  %0 = memref.alloc() : memref<2xf32>
+  memref.dealloc %0 : memref<2xf32>
+  // CHECK: %{{.*}} = memref.alloc()
+  %1 = memref.alloc() : memref<2xf32>
   // CHECK: linalg.copy
   linalg.copy(%arg3, %1) : memref<2xf32>, memref<2xf32>
   %2 = scf.for %arg5 = %arg0 to %arg1 step %arg2 iter_args(%arg6 = %1) -> (memref<2xf32>) {
     %3 = cmpi eq, %arg5, %arg1 : index
-    // CHECK: dealloc
-    dealloc %arg6 : memref<2xf32>
-    // CHECK: %[[PERCENT4:.*]] = alloc()
-    %4 = alloc() : memref<2xf32>
-    // CHECK-NOT: alloc
+    // CHECK: memref.dealloc
+    memref.dealloc %arg6 : memref<2xf32>
+    // CHECK: %[[PERCENT4:.*]] = memref.alloc()
+    %4 = memref.alloc() : memref<2xf32>
+    // CHECK-NOT: memref.alloc
     // CHECK-NOT: linalg.copy
-    // CHECK-NOT: dealloc
-    %5 = alloc() : memref<2xf32>
+    // CHECK-NOT: memref.dealloc
+    %5 = memref.alloc() : memref<2xf32>
     linalg.copy(%4, %5) : memref<2xf32>, memref<2xf32>
-    dealloc %4 : memref<2xf32>
-    // CHECK: %[[PERCENT6:.*]] = alloc()
-    %6 = alloc() : memref<2xf32>
+    memref.dealloc %4 : memref<2xf32>
+    // CHECK: %[[PERCENT6:.*]] = memref.alloc()
+    %6 = memref.alloc() : memref<2xf32>
     // CHECK: linalg.copy(%[[PERCENT4]], %[[PERCENT6]])
     linalg.copy(%5, %6) : memref<2xf32>, memref<2xf32>
     scf.yield %6 : memref<2xf32>
   }
   // CHECK: linalg.copy
   linalg.copy(%2, %arg4) : memref<2xf32>, memref<2xf32>
-  dealloc %2 : memref<2xf32>
+  memref.dealloc %2 : memref<2xf32>
   return
 }
 
@@ -341,8 +341,8 @@
 // CHECK-LABEL: func @check_with_affine_dialect
 func @check_with_affine_dialect(%arg0: memref<4xf32>, %arg1: memref<4xf32>, %arg2: memref<4xf32>) {
   // CHECK-SAME: (%[[ARG0:.*]]: memref<4xf32>, %[[ARG1:.*]]: memref<4xf32>, %[[RES:.*]]: memref<4xf32>)
-  // CHECK-NOT: alloc
-  %0 = alloc() : memref<4xf32>
+  // CHECK-NOT: memref.alloc
+  %0 = memref.alloc() : memref<4xf32>
   affine.for %arg3 = 0 to 4 {
     %5 = affine.load %arg0[%arg3] : memref<4xf32>
     %6 = affine.load %arg1[%arg3] : memref<4xf32>
@@ -355,7 +355,7 @@
   // CHECK-NOT: linalg.copy
   // CHECK-NOT: dealloc
   linalg.copy(%0, %arg2) : memref<4xf32>, memref<4xf32>
-  dealloc %0 : memref<4xf32>
+  memref.dealloc %0 : memref<4xf32>
   //CHECK: return
   return
 }
diff --git a/mlir/test/Transforms/cse.mlir b/mlir/test/Transforms/cse.mlir
--- a/mlir/test/Transforms/cse.mlir
+++ b/mlir/test/Transforms/cse.mlir
@@ -96,11 +96,11 @@
 /// Check that operations with side effects are not eliminated.
 // CHECK-LABEL: @side_effect
 func @side_effect() -> (memref<2x1xf32>, memref<2x1xf32>) {
-  // CHECK: %0 = alloc() : memref<2x1xf32>
-  %0 = alloc() : memref<2x1xf32>
+  // CHECK: %0 = memref.alloc() : memref<2x1xf32>
+  %0 = memref.alloc() : memref<2x1xf32>
 
-  // CHECK-NEXT: %1 = alloc() : memref<2x1xf32>
-  %1 = alloc() : memref<2x1xf32>
+  // CHECK-NEXT: %1 = memref.alloc() : memref<2x1xf32>
+  %1 = memref.alloc() : memref<2x1xf32>
 
   // CHECK-NEXT: return %0, %1 : memref<2x1xf32>, memref<2x1xf32>
   return %0, %1 : memref<2x1xf32>, memref<2x1xf32>
diff --git a/mlir/test/Transforms/finalizing-bufferize.mlir b/mlir/test/Transforms/finalizing-bufferize.mlir
--- a/mlir/test/Transforms/finalizing-bufferize.mlir
+++ b/mlir/test/Transforms/finalizing-bufferize.mlir
@@ -4,24 +4,24 @@
 // CHECK-SAME:                                     %[[ARG:.*]]: memref<f32>) -> memref<f32> {
 // CHECK:           return %[[ARG]] : memref<f32>
 func @eliminate_materializations(%arg0: memref<f32>) -> memref<f32> {
-  %0 = tensor_load %arg0 : memref<f32>
-  %1 = tensor_to_memref %0 : memref<f32>
+  %0 = memref.tensor_load %arg0 : memref<f32>
+  %1 = memref.buffer_cast %0 : memref<f32>
   return %1 : memref<f32>
 }
 
 // -----
 
-func @unable_to_convert_lone_tensor_to_memref() -> memref<f32> {
+func @unable_to_convert_lone_buffer_cast() -> memref<f32> {
   // expected-error @+1 {{failed to legalize operation 'test.source'}}
   %0 = "test.source"() : () -> tensor<f32>
-  %1 = tensor_to_memref %0 : memref<f32>
+  %1 = memref.buffer_cast %0 : memref<f32>
   return %1 : memref<f32>
 }
 
 // -----
 
 func @unable_to_convert_lone_tensor_load(%arg0: memref<f32>) {
-  %0 = tensor_load %arg0 : memref<f32>
+  %0 = memref.tensor_load %arg0 : memref<f32>
   // expected-error @+1 {{failed to legalize operation 'test.sink'}}
   "test.sink"(%0) : (tensor<f32>) -> ()
   return
diff --git a/mlir/test/Transforms/loop-fusion-dependence-check.mlir b/mlir/test/Transforms/loop-fusion-dependence-check.mlir
--- a/mlir/test/Transforms/loop-fusion-dependence-check.mlir
+++ b/mlir/test/Transforms/loop-fusion-dependence-check.mlir
@@ -4,9 +4,9 @@
 
 // CHECK-LABEL: func @cannot_fuse_would_create_cycle() {
 func @cannot_fuse_would_create_cycle() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
-  %c = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
+  %c = memref.alloc() : memref<10xf32>
 
   %cf7 = constant 7.0 : f32
 
@@ -37,9 +37,9 @@
 
 // CHECK-LABEL: func @can_fuse_rar_dependence() {
 func @can_fuse_rar_dependence() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
-  %c = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
+  %c = memref.alloc() : memref<10xf32>
 
   %cf7 = constant 7.0 : f32
 
@@ -69,10 +69,10 @@
 
 // CHECK-LABEL: func @can_fuse_different_memrefs() {
 func @can_fuse_different_memrefs() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
-  %c = alloc() : memref<10xf32>
-  %d = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
+  %c = memref.alloc() : memref<10xf32>
+  %d = memref.alloc() : memref<10xf32>
 
   %cf7 = constant 7.0 : f32
 
@@ -102,7 +102,7 @@
 
 // CHECK-LABEL: func @should_not_fuse_across_intermediate_store() {
 func @should_not_fuse_across_intermediate_store() {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   %c0 = constant 0 : index
   %cf7 = constant 7.0 : f32
 
@@ -127,7 +127,7 @@
 
 // CHECK-LABEL: func @should_not_fuse_across_intermediate_load() {
 func @should_not_fuse_across_intermediate_load() {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   %c0 = constant 0 : index
   %cf7 = constant 7.0 : f32
 
@@ -152,8 +152,8 @@
 
 // CHECK-LABEL: func @should_not_fuse_across_ssa_value_def() {
 func @should_not_fuse_across_ssa_value_def() {
-  %0 = alloc() : memref<10xf32>
-  %1 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
+  %1 = memref.alloc() : memref<10xf32>
   %c0 = constant 0 : index
   %cf7 = constant 7.0 : f32
 
@@ -182,7 +182,7 @@
 
 // CHECK-LABEL: func @should_not_fuse_store_before_load() {
 func @should_not_fuse_store_before_load() {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   %c0 = constant 0 : index
   %cf7 = constant 7.0 : f32
 
@@ -208,7 +208,7 @@
 
 // CHECK-LABEL: func @should_not_fuse_across_load_at_depth1() {
 func @should_not_fuse_across_load_at_depth1() {
-  %0 = alloc() : memref<10x10xf32>
+  %0 = memref.alloc() : memref<10x10xf32>
   %c0 = constant 0 : index
   %cf7 = constant 7.0 : f32
 
@@ -232,7 +232,7 @@
 
 // CHECK-LABEL: func @should_not_fuse_across_load_in_loop_at_depth1() {
 func @should_not_fuse_across_load_in_loop_at_depth1() {
-  %0 = alloc() : memref<10x10xf32>
+  %0 = memref.alloc() : memref<10x10xf32>
   %c0 = constant 0 : index
   %cf7 = constant 7.0 : f32
 
@@ -258,7 +258,7 @@
 
 // CHECK-LABEL: func @should_not_fuse_across_store_at_depth1() {
 func @should_not_fuse_across_store_at_depth1() {
-  %0 = alloc() : memref<10x10xf32>
+  %0 = memref.alloc() : memref<10x10xf32>
   %c0 = constant 0 : index
   %cf7 = constant 7.0 : f32
 
@@ -282,7 +282,7 @@
 
 // CHECK-LABEL: func @should_not_fuse_across_store_in_loop_at_depth1() {
 func @should_not_fuse_across_store_in_loop_at_depth1() {
-  %0 = alloc() : memref<10x10xf32>
+  %0 = memref.alloc() : memref<10x10xf32>
   %c0 = constant 0 : index
   %cf7 = constant 7.0 : f32
 
@@ -308,8 +308,8 @@
 
 // CHECK-LABEL: func @should_not_fuse_across_ssa_value_def_at_depth1() {
 func @should_not_fuse_across_ssa_value_def_at_depth1() {
-  %0 = alloc() : memref<10x10xf32>
-  %1 = alloc() : memref<10x10xf32>
+  %0 = memref.alloc() : memref<10x10xf32>
+  %1 = memref.alloc() : memref<10x10xf32>
   %c0 = constant 0 : index
   %cf7 = constant 7.0 : f32
 
diff --git a/mlir/test/Transforms/loop-fusion-slice-computation.mlir b/mlir/test/Transforms/loop-fusion-slice-computation.mlir
--- a/mlir/test/Transforms/loop-fusion-slice-computation.mlir
+++ b/mlir/test/Transforms/loop-fusion-slice-computation.mlir
@@ -4,7 +4,7 @@
 
 // CHECK-LABEL: func @slice_depth1_loop_nest() {
 func @slice_depth1_loop_nest() {
-  %0 = alloc() : memref<100xf32>
+  %0 = memref.alloc() : memref<100xf32>
   %cst = constant 7.000000e+00 : f32
   affine.for %i0 = 0 to 16 {
     // expected-remark@-1 {{slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 1) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] )}}
@@ -24,7 +24,7 @@
 // same location.
 // CHECK-LABEL: func @slice_depth1_loop_nest_with_offsets() {
 func @slice_depth1_loop_nest_with_offsets() {
-  %0 = alloc() : memref<100xf32>
+  %0 = memref.alloc() : memref<100xf32>
   %cst = constant 7.000000e+00 : f32
   affine.for %i0 = 0 to 16 {
     // expected-remark@-1 {{slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 2) loop bounds: [(d0) -> (d0 + 3), (d0) -> (d0 + 4)] )}}
@@ -45,7 +45,7 @@
 // Slices at loop depth 2 should slice loop bounds of both loops.
 // CHECK-LABEL: func @slice_depth2_loop_nest() {
 func @slice_depth2_loop_nest() {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   %cst = constant 7.000000e+00 : f32
   affine.for %i0 = 0 to 16 {
     // expected-remark@-1 {{slice ( src loop: 1, dst loop: 0, depth: 1 : insert point: (1, 1) loop bounds: [(d0) -> (d0), (d0) -> (d0 + 1)] [(d0) -> (0), (d0) -> (8)] )}}
@@ -71,7 +71,7 @@
 // depths 1 and 2 because the dependent store in loop nest %i0 is at depth 2.
 // CHECK-LABEL: func @slice_depth2_loop_nest_two_loads() {
 func @slice_depth2_loop_nest_two_loads() {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   %c0 = constant 0 : index
   %cst = constant 7.000000e+00 : f32
   affine.for %i0 = 0 to 16 {
@@ -99,7 +99,7 @@
 // loop nest %i2 is at depth 2.
 // CHECK-LABEL: func @slice_depth2_loop_nest_two_stores() {
 func @slice_depth2_loop_nest_two_stores() {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   %c0 = constant 0 : index
   %cst = constant 7.000000e+00 : f32
   affine.for %i0 = 0 to 16 {
@@ -124,7 +124,7 @@
 // Test loop nest which has a smaller outer trip count than its inner scf.
 // CHECK-LABEL: func @slice_loop_nest_with_smaller_outer_trip_count() {
 func @slice_loop_nest_with_smaller_outer_trip_count() {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   %c0 = constant 0 : index
   %cst = constant 7.000000e+00 : f32
   affine.for %i0 = 0 to 16 {
diff --git a/mlir/test/Transforms/loop-fusion-transformation.mlir b/mlir/test/Transforms/loop-fusion-transformation.mlir
--- a/mlir/test/Transforms/loop-fusion-transformation.mlir
+++ b/mlir/test/Transforms/loop-fusion-transformation.mlir
@@ -2,7 +2,7 @@
 
 // CHECK-LABEL: func @slice_depth1_loop_nest() {
 func @slice_depth1_loop_nest() {
-  %0 = alloc() : memref<100xf32>
+  %0 = memref.alloc() : memref<100xf32>
   %cst = constant 7.000000e+00 : f32
   affine.for %i0 = 0 to 16 {
     affine.store %cst, %0[%i0] : memref<100xf32>
@@ -24,9 +24,9 @@
 
 // CHECK-LABEL: func @should_fuse_reduction_to_pointwise() {
 func @should_fuse_reduction_to_pointwise() {
-  %a = alloc() : memref<10x10xf32>
-  %b = alloc() : memref<10xf32>
-  %c = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10x10xf32>
+  %b = memref.alloc() : memref<10xf32>
+  %c = memref.alloc() : memref<10xf32>
 
   %cf7 = constant 7.0 : f32
 
@@ -64,9 +64,9 @@
 
 // CHECK-LABEL: func @should_fuse_avoiding_dependence_cycle() {
 func @should_fuse_avoiding_dependence_cycle() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
-  %c = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
+  %c = memref.alloc() : memref<10xf32>
 
   %cf7 = constant 7.0 : f32
 
diff --git a/mlir/test/Transforms/loop-fusion.mlir b/mlir/test/Transforms/loop-fusion.mlir
--- a/mlir/test/Transforms/loop-fusion.mlir
+++ b/mlir/test/Transforms/loop-fusion.mlir
@@ -12,7 +12,7 @@
 
 // CHECK-LABEL: func @should_fuse_raw_dep_for_locality() {
 func @should_fuse_raw_dep_for_locality() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 10 {
@@ -33,9 +33,9 @@
 
 // CHECK-LABEL: func @should_fuse_reduction_to_pointwise() {
 func @should_fuse_reduction_to_pointwise() {
-  %a = alloc() : memref<10x10xf32>
-  %b = alloc() : memref<10xf32>
-  %c = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10x10xf32>
+  %b = memref.alloc() : memref<10xf32>
+  %c = memref.alloc() : memref<10xf32>
 
   %cf7 = constant 7.0 : f32
 
@@ -76,7 +76,7 @@
 
 // CHECK-LABEL: func @should_fuse_loop_nests_with_shifts() {
 func @should_fuse_loop_nests_with_shifts() {
-  %a = alloc() : memref<10x10xf32>
+  %a = memref.alloc() : memref<10x10xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 9 {
@@ -116,8 +116,8 @@
 
 // CHECK-LABEL: func @should_fuse_loop_nest() {
 func @should_fuse_loop_nest() {
-  %a = alloc() : memref<10x10xf32>
-  %b = alloc() : memref<10x10xf32>
+  %a = memref.alloc() : memref<10x10xf32>
+  %b = memref.alloc() : memref<10x10xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 10 {
@@ -137,8 +137,8 @@
     }
   }
   // Expecting private memref for '%a' first, then private memref for '%b'.
-  // CHECK-DAG:  [[NEWA:%[0-9]+]] = alloc() : memref<1x1xf32>
-  // CHECK-DAG:  [[NEWB:%[0-9]+]] = alloc() : memref<1x1xf32>
+  // CHECK-DAG:  [[NEWA:%[0-9]+]] = memref.alloc() : memref<1x1xf32>
+  // CHECK-DAG:  [[NEWB:%[0-9]+]] = memref.alloc() : memref<1x1xf32>
   // CHECK:      affine.for %{{.*}} = 0 to 10 {
   // CHECK-NEXT:   affine.for %{{.*}} = 0 to 10 {
   // CHECK-NEXT:     affine.store %{{.*}}, [[NEWA]][0, 0] : memref<1x1xf32>
@@ -155,9 +155,9 @@
 
 // CHECK-LABEL: func @should_fuse_across_intermediate_loop_with_no_deps() {
 func @should_fuse_across_intermediate_loop_with_no_deps() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
-  %c = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
+  %c = memref.alloc() : memref<10xf32>
 
   %cf7 = constant 7.0 : f32
 
@@ -190,8 +190,8 @@
 
 // CHECK-LABEL: func @should_fuse_all_loops() {
 func @should_fuse_all_loops() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
   // Set up flow dependences from first and second loops to third.
@@ -208,8 +208,8 @@
 
   // Should fuse first and second loops into third.
   // Expecting private memref for '%a' first, then private memref for '%b'.
-  // CHECK-DAG: [[NEWA:%[0-9]+]] = alloc() : memref<1xf32>
-  // CHECK-DAG: [[NEWB:%[0-9]+]] = alloc() : memref<1xf32>
+  // CHECK-DAG: [[NEWA:%[0-9]+]] = memref.alloc() : memref<1xf32>
+  // CHECK-DAG: [[NEWB:%[0-9]+]] = memref.alloc() : memref<1xf32>
   // CHECK:      affine.for %{{.*}} = 0 to 10 {
   // CHECK-NEXT:   affine.store %{{.*}}, [[NEWA]][0] : memref<1xf32>
   // CHECK-NEXT:   affine.store %{{.*}}, [[NEWB]][0] : memref<1xf32>
@@ -224,9 +224,9 @@
 
 // CHECK-LABEL: func @should_fuse_first_and_second_loops() {
 func @should_fuse_first_and_second_loops() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
-  %c = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
+  %c = memref.alloc() : memref<10xf32>
 
   %cf7 = constant 7.0 : f32
 
@@ -260,9 +260,9 @@
 
 // CHECK-LABEL: func @should_not_fuse_would_create_cycle() {
 func @should_not_fuse_would_create_cycle() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
-  %c = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
+  %c = memref.alloc() : memref<10xf32>
 
   %cf7 = constant 7.0 : f32
 
@@ -303,7 +303,7 @@
 
 // CHECK-LABEL: func @should_fuse_producer_consumer() {
 func @should_fuse_producer_consumer() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 10 {
@@ -319,7 +319,7 @@
   // %i1, but OK to fuse %i1 into %i2.
   // TODO: When the fusion pass is run to a fixed-point, it should
   // fuse all three of these loop nests.
-  // CHECK:      alloc() : memref<1xf32>
+  // CHECK:      memref.alloc() : memref<1xf32>
   // CHECK:      affine.for %{{.*}} = 0 to 10 {
   // CHECK-NEXT:   affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
   // CHECK-NEXT:   affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
@@ -333,8 +333,8 @@
 
 // CHECK-LABEL: func @should_fuse_and_move_to_preserve_war_dep() {
 func @should_fuse_and_move_to_preserve_war_dep() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 10 {
@@ -366,7 +366,7 @@
 
 // CHECK-LABEL: func @should_fuse_if_top_level_access() {
 func @should_fuse_if_top_level_access() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 10 {
@@ -380,8 +380,8 @@
   %v1 = affine.load %m[%c0] : memref<10xf32>
   // Top-level load to '%m' should prevent creating a private memref but
   // loop nests should be fused and '%i0' should be removed.
-  // CHECK:      %[[m:.*]] = alloc() : memref<10xf32>
-  // CHECK-NOT:  alloc
+  // CHECK:      %[[m:.*]] = memref.alloc() : memref<10xf32>
+  // CHECK-NOT:  memref.alloc
 
   // CHECK:      affine.for %[[i1:.*]] = 0 to 10 {
   // CHECK-NEXT:   affine.store %{{.*}}, %[[m]][%[[i1]]] : memref<10xf32>
@@ -395,7 +395,7 @@
 
 // CHECK-LABEL: func @should_fuse_but_not_remove_src() {
 func @should_fuse_but_not_remove_src() {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 100 {
@@ -424,7 +424,7 @@
 
 // CHECK-LABEL: func @should_fuse_no_top_level_access() {
 func @should_fuse_no_top_level_access() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 10 {
@@ -447,7 +447,7 @@
 
 // CHECK-LABEL: func @should_not_fuse_if_inst_at_top_level() {
 func @should_not_fuse_if_inst_at_top_level() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 10 {
@@ -475,7 +475,7 @@
 
 // CHECK-LABEL: func @should_not_fuse_if_inst_in_loop_nest() {
 func @should_not_fuse_if_inst_in_loop_nest() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
   %c4 = constant 4 : index
 
@@ -504,7 +504,7 @@
 
 // CHECK-LABEL: func @permute_and_fuse() {
 func @permute_and_fuse() {
-  %m = alloc() : memref<10x20x30xf32>
+  %m = memref.alloc() : memref<10x20x30xf32>
 
   %cf7 = constant 7.0 : f32
   affine.for %i0 = 0 to 10 {
@@ -545,7 +545,7 @@
 // Reshape from a 64 x f32 to 16 x 4 x f32.
 // CHECK-LABEL: func @fuse_reshape_64_16_4
 func @fuse_reshape_64_16_4(%in : memref<64xf32>) {
-  %out = alloc() : memref<16x4xf32>
+  %out = memref.alloc() : memref<16x4xf32>
 
   affine.for %i0 = 0 to 64 {
     %v = affine.load %in[%i0] : memref<64xf32>
@@ -575,8 +575,8 @@
 // Reshape a 16x4xf32 to 64xf32.
 // CHECK-LABEL: func @fuse_reshape_16_4_64
 func @fuse_reshape_16_4_64() {
-  %in = alloc() : memref<16x4xf32>
-  %out = alloc() : memref<64xf32>
+  %in = memref.alloc() : memref<16x4xf32>
+  %out = memref.alloc() : memref<64xf32>
 
   affine.for %i0 = 0 to 16 {
     affine.for %i1 = 0 to 4 {
@@ -608,9 +608,9 @@
 // All three loop nests below (6-d one, 2-d one, 2-d one is fused into a single
 // 2-d loop nest).
 func @R6_to_R2_reshape_square() -> memref<64x9xi32> {
-  %in = alloc() : memref<2x2x3x3x16x1xi32>
-  %out = alloc() : memref<64x9xi32>
-  %live_out = alloc() : memref<64x9xi32>
+  %in = memref.alloc() : memref<2x2x3x3x16x1xi32>
+  %out = memref.alloc() : memref<64x9xi32>
+  %live_out = memref.alloc() : memref<64x9xi32>
 
   // Initialize input.
   affine.for %i0 = 0 to 2 {
@@ -670,9 +670,9 @@
 
 //
 // CHECK-LABEL: func @R6_to_R2_reshape
-// CHECK:       alloc() : memref<1x2x3x3x16x1xi32>
-// CHECK:       alloc() : memref<1x1xi32>
-// CHECK:       alloc() : memref<64x9xi32>
+// CHECK:       memref.alloc() : memref<1x2x3x3x16x1xi32>
+// CHECK:       memref.alloc() : memref<1x1xi32>
+// CHECK:       memref.alloc() : memref<64x9xi32>
 // CHECK-NEXT:  affine.for %{{.*}} = 0 to 64 {
 // CHECK-NEXT:    affine.for %{{.*}} = 0 to 9 {
 // CHECK-NEXT:      affine.apply [[$MAP0]](%{{.*}}, %{{.*}})
@@ -703,7 +703,7 @@
 // CHECK-LABEL: func @fuse_symbolic_bounds
 func @fuse_symbolic_bounds(%M : index, %N : index) {
   %N_plus_5 = affine.apply affine_map<(d0) -> (d0 + 5)>(%N)
-  %m = alloc(%M, %N_plus_5) : memref<? x ? x f32>
+  %m = memref.alloc(%M, %N_plus_5) : memref<? x ? x f32>
 
   %c0 = constant 0.0 : f32
   %s = constant 5 : index
@@ -727,8 +727,8 @@
 
 // CHECK-LABEL: func @should_fuse_reduction_at_depth_of_one
 func @should_fuse_reduction_at_depth_of_one() {
-  %a = alloc() : memref<10x100xf32>
-  %b = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10x100xf32>
+  %b = memref.alloc() : memref<10xf32>
 
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = 0 to 100 {
@@ -772,8 +772,8 @@
 
 // CHECK-LABEL: func @should_fuse_at_src_depth1_and_dst_depth1
 func @should_fuse_at_src_depth1_and_dst_depth1() {
-  %a = alloc() : memref<100x16xf32>
-  %b = alloc() : memref<100x16xf32>
+  %a = memref.alloc() : memref<100x16xf32>
+  %b = memref.alloc() : memref<100x16xf32>
 
   affine.for %i0 = 0 to 100 {
     affine.for %i1 = 0 to 16 {
@@ -820,7 +820,7 @@
 
 // CHECK-LABEL: func @should_fuse_src_depth1_at_dst_depth2
 func @should_fuse_src_depth1_at_dst_depth2() {
-  %a = alloc() : memref<100xf32>
+  %a = memref.alloc() : memref<100xf32>
   %c0 = constant 0.0 : f32
 
   affine.for %i0 = 0 to 100 {
@@ -851,7 +851,7 @@
 
 // CHECK-LABEL: func @fusion_at_depth0_not_currently_supported
 func @fusion_at_depth0_not_currently_supported() {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   %c0 = constant 0 : index
   %cst = constant 0.000000e+00 : f32
   affine.for %i0 = 0 to 10 {
@@ -862,7 +862,7 @@
   }
   // NOTE: Should shrink memref size to 1 element access by load in dst loop
   // nest, and make the store in the slice store to the same element.
-  // CHECK-DAG:   alloc() : memref<1xf32>
+  // CHECK-DAG:   memref.alloc() : memref<1xf32>
   // CHECK:       affine.for %{{.*}} = 0 to 10 {
   // CHECK-NEXT:    affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
   // CHECK-NEXT:    affine.load %{{.*}}[0] : memref<1xf32>
@@ -875,9 +875,9 @@
 
 // CHECK-LABEL: func @should_fuse_deep_loop_nests
 func @should_fuse_deep_loop_nests() {
-  %0 = alloc() : memref<2x2x3x3x16x10xf32, 2>
-  %1 = alloc() : memref<2x2x3x3x16x10xf32, 2>
-  %2 = alloc() : memref<3x3x3x3x16x10xf32, 2>
+  %0 = memref.alloc() : memref<2x2x3x3x16x10xf32, 2>
+  %1 = memref.alloc() : memref<2x2x3x3x16x10xf32, 2>
+  %2 = memref.alloc() : memref<3x3x3x3x16x10xf32, 2>
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c1_0 = constant 1 : index
@@ -934,7 +934,7 @@
 // bounds which are a function of the first four loops of destination loop nest,
 // where the destination loops nests have been interchanged.
 
-// CHECK-DAG:   alloc() : memref<1x1x1x1x16x10xf32, 2>
+// CHECK-DAG:   memref.alloc() : memref<1x1x1x1x16x10xf32, 2>
 // CHECK:       affine.for %{{.*}} = 0 to 3 {
 // CHECK-NEXT:    affine.for %{{.*}} = 0 to 3 {
 // CHECK-NEXT:      affine.for %{{.*}} = 0 to 2 {
@@ -979,8 +979,8 @@
 
 // CHECK-LABEL: func @should_fuse_at_depth1_and_reduce_slice_trip_count
 func @should_fuse_at_depth1_and_reduce_slice_trip_count() {
-  %a = alloc() : memref<4x256xf32>
-  %b = alloc() : memref<4x256xf32>
+  %a = memref.alloc() : memref<4x256xf32>
+  %b = memref.alloc() : memref<4x256xf32>
 
   %c0 = constant 0 : index
   %cf0 = constant 0.0 : f32
@@ -1008,7 +1008,7 @@
   // NOTE: the size of the private memref created for the fused loop nest
   // is reduced from the original shape from 4x256 to 4x16 because of the
   // data accessed by the load.
-  // CHECK-DAG:   alloc() : memref<1x16xf32>
+  // CHECK-DAG:   memref.alloc() : memref<1x16xf32>
   // CHECK:       affine.for %{{.*}} = 0 to 4 {
   // CHECK-NEXT:    affine.for %{{.*}} = 0 to 256 {
   // CHECK-NEXT:      affine.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<4x256xf32>
@@ -1028,7 +1028,7 @@
 
 // CHECK-LABEL: func @should_fuse_at_depth1_with_trip_count_20
 func @should_fuse_at_depth1_with_trip_count_20() {
-  %a = alloc() : memref<100xf32>
+  %a = memref.alloc() : memref<100xf32>
   %c0 = constant 0 : index
   %cf0 = constant 0.0 : f32
 
@@ -1047,7 +1047,7 @@
     }
   }
   // NOTE: The size of the private memref created for fusion is shrunk to 20xf32
-  // CHECK-DAG:   alloc() : memref<20xf32>
+  // CHECK-DAG:   memref.alloc() : memref<20xf32>
   // CHECK:       affine.for %{{.*}} = 0 to 5 {
   // CHECK-NEXT:    affine.for %{{.*}} = 0 to 20 {
   // CHECK-NEXT:      affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<20xf32>
@@ -1069,7 +1069,7 @@
 
 // CHECK-LABEL: func @should_fuse_at_depth1_with_trip_count_19
 func @should_fuse_at_depth1_with_trip_count_19() {
-  %a = alloc() : memref<100xf32>
+  %a = memref.alloc() : memref<100xf32>
   %c0 = constant 0 : index
   %cf0 = constant 0.0 : f32
 
@@ -1088,7 +1088,7 @@
     }
   }
   // NOTE: The size of the private memref created for fusion is shrunk to 19xf32
-  // CHECK-DAG:   alloc() : memref<19xf32>
+  // CHECK-DAG:   memref.alloc() : memref<19xf32>
   // CHECK:       affine.for %{{.*}} = 0 to 5 {
   // CHECK-NEXT:    affine.for %{{.*}} = 0 to 19 {
   // CHECK-NEXT:      affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<19xf32>
@@ -1111,7 +1111,7 @@
 
 // CHECK-LABEL: func @should_fuse_with_private_memrefs_with_diff_shapes() {
 func @should_fuse_with_private_memrefs_with_diff_shapes() {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 100 {
@@ -1125,8 +1125,8 @@
   }
   // Should create two new private memrefs customized to the shapes accessed
   // by loops %{{.*}} and %{{.*}}.
-  // CHECK-DAG:  alloc() : memref<1xf32>
-  // CHECK-DAG:  alloc() : memref<1xf32>
+  // CHECK-DAG:  memref.alloc() : memref<1xf32>
+  // CHECK-DAG:  memref.alloc() : memref<1xf32>
   // CHECK:      affine.for %{{.*}} = 0 to 17 {
   // CHECK-NEXT:   affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
   // CHECK-NEXT:   affine.load %{{.*}}[0] : memref<1xf32>
@@ -1195,7 +1195,7 @@
 // CHECK-LABEL: func @should_fuse_escaping_memref_but_preserve_src_loop() -> memref<10xf32>
 func @should_fuse_escaping_memref_but_preserve_src_loop() -> memref<10xf32> {
   %cf7 = constant 7.0 : f32
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   affine.for %i0 = 0 to 10 {
     affine.store %cf7, %m[%i0] : memref<10xf32>
   }
@@ -1206,7 +1206,7 @@
   // because it writes to memref '%m', which is returned by the function, and
   // the '%i1' memory region does not cover '%i0' memory region.
 
-  // CHECK-DAG:   alloc() : memref<10xf32>
+  // CHECK-DAG:   memref.alloc() : memref<10xf32>
   // CHECK:       affine.for %{{.*}} = 0 to 10 {
   // CHECK-NEXT:    affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
   // CHECK-NEXT:  }
@@ -1221,7 +1221,7 @@
 
 // This should fuse with the %in becoming a 1x1x1.
 func @R3_to_R2_reshape() {
-  %in = alloc() : memref<2x3x16xi32>
+  %in = memref.alloc() : memref<2x3x16xi32>
 
   %c0 = constant 0 : index
 
@@ -1249,7 +1249,7 @@
 // CHECK-DAG: [[$MAP2:#map[0-9]+]] = affine_map<(d0) -> (d0 floordiv 48)>
 
 // CHECK-LABEL: func @R3_to_R2_reshape()
-// CHECK-DAG:    alloc() : memref<1x1x1xi32>
+// CHECK-DAG:    memref.alloc() : memref<1x1x1xi32>
 // CHECK:        affine.for %{{.*}} = 0 to 32 {
 // CHECK-NEXT:     affine.for %{{.*}} = 0 to 3 {
 // CHECK-NEXT:      affine.apply [[$MAP0]](%{{.*}}, %{{.*}})
@@ -1265,8 +1265,8 @@
 // -----
 
 func @should_fuse_multi_output_producer() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
 
   %cf7 = constant 7.0 : f32
 
@@ -1293,9 +1293,9 @@
 
 // CHECK-LABEL: func @fusion_preventing_deps_on_middle_loop() {
 func @fusion_preventing_deps_on_middle_loop() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
-  %c = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
+  %c = memref.alloc() : memref<10xf32>
 
   %cf7 = constant 7.0 : f32
 
@@ -1334,9 +1334,9 @@
 
 // CHECK-LABEL: func @should_fuse_and_move_to_preserve_war_dep() {
 func @should_fuse_and_move_to_preserve_war_dep() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
-  %c = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
+  %c = memref.alloc() : memref<10xf32>
 
   %cf7 = constant 7.0 : f32
 
@@ -1368,7 +1368,7 @@
   // It is possible to fuse loop '%i0' into '%i3' and preserve dependences
   // if the fused loop nest is inserted between loops '%i1' and '%i2'.
 
-  // CHECK-DAG:   alloc() : memref<1xf32>
+  // CHECK-DAG:   memref.alloc() : memref<1xf32>
   // CHECK:       affine.for %{{.*}} = 0 to 3 {
   // CHECK-NEXT:    affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
   // CHECK-NEXT:  }
@@ -1389,9 +1389,9 @@
 
 // CHECK-LABEL: func @fusion_preventing_dep_on_constant() {
 func @fusion_preventing_dep_on_constant() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
-  %c = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
+  %c = memref.alloc() : memref<10xf32>
 
   %cf7 = constant 7.0 : f32
 
@@ -1430,9 +1430,9 @@
 
 // CHECK-LABEL: func @should_fuse_and_preserve_dep_on_constant() {
 func @should_fuse_and_preserve_dep_on_constant() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
-  %c = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
+  %c = memref.alloc() : memref<10xf32>
 
   %cf7 = constant 7.0 : f32
   %cf11 = constant 11.0 : f32
@@ -1470,7 +1470,7 @@
 
 // CHECK-LABEL: func @should_fuse_at_depth_above_loop_carried_dependence(%{{.*}}: memref<64x4xf32>, %{{.*}}: memref<64x4xf32>) {
 func @should_fuse_at_depth_above_loop_carried_dependence(%arg0: memref<64x4xf32>, %arg1: memref<64x4xf32>) {
-  %out = alloc() : memref<64x4xf32>
+  %out = memref.alloc() : memref<64x4xf32>
   %0 = constant 0.0 : f32
   affine.for %i0 = 0 to 64 {
     affine.for %i1 = 0 to 4 {
@@ -1506,7 +1506,7 @@
   // loop nest iteration bounds on its loop '%i1' are reduced to 1, so the
   // memref size can be reduced to 128x1xf32.
 
-  // CHECK:       alloc() : memref<64x1xf32>
+  // CHECK:       memref.alloc() : memref<64x1xf32>
   // CHECK:       affine.for %{{.*}} = 0 to 4 {
   // CHECK-NEXT:    affine.for %{{.*}} = 0 to 64 {
   // CHECK-NEXT:      affine.store %{{.*}}, %{{.*}}[%{{.*}}, 0] : memref<64x1xf32>
@@ -1538,8 +1538,8 @@
 
 // CHECK-LABEL: func @should_fuse_only_two_loops_and_remove_producer() {
 func @should_fuse_only_two_loops_and_remove_producer() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
 
   %cf7 = constant 7.0 : f32
 
@@ -1582,7 +1582,7 @@
 
 // CHECK-LABEL: func @should_fuse_after_one_loop_interchange() {
 func @should_fuse_after_one_loop_interchange() {
-  %a = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
 
   %cf0 = constant 0.0 : f32
   affine.for %i0 = 0 to 10 {
@@ -1617,7 +1617,7 @@
 
 // CHECK-LABEL: func @should_fuse_after_two_loop_interchanges() {
 func @should_fuse_after_two_loop_interchanges() {
-  %a = alloc() : memref<6x8xf32>
+  %a = memref.alloc() : memref<6x8xf32>
 
   %cf0 = constant 0.0 : f32
   affine.for %i0 = 0 to 6 {
@@ -1739,8 +1739,8 @@
 
 // Test case which illustrates fix for b/126454413
 func @test_add_slice_bounds() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
   %c0 = constant 0 : index
 
@@ -1785,7 +1785,7 @@
 // -----
 
 func @should_fuse_init_loops_siblings_then_shared_producer(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>) {
-  %0 = alloc() : memref<10x10xf32>
+  %0 = memref.alloc() : memref<10x10xf32>
   %cst = constant 0.000000e+00 : f32
   %cst_0 = constant 1.000000e+00 : f32
   %cst_1 = constant 7.000000e+00 : f32
@@ -1852,11 +1852,11 @@
 // -----
 
 func @two_matrix_vector_products() {
-  %in_matrix = alloc() : memref<10x10xf32>
-  %in_vec0 = alloc() : memref<10xf32>
-  %in_vec1 = alloc() : memref<10xf32>
-  %out_vec0 = alloc() : memref<10xf32>
-  %out_vec1 = alloc() : memref<10xf32>
+  %in_matrix = memref.alloc() : memref<10x10xf32>
+  %in_vec0 = memref.alloc() : memref<10xf32>
+  %in_vec1 = memref.alloc() : memref<10xf32>
+  %out_vec0 = memref.alloc() : memref<10xf32>
+  %out_vec1 = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
   // Populate input matrix.
@@ -1916,7 +1916,7 @@
 // -----
 
 func @should_not_slice_past_slice_barrier() {
-  %0 = alloc() : memref<100x16xf32>
+  %0 = memref.alloc() : memref<100x16xf32>
   affine.for %i0 = 0 to 100 {
     affine.for %i1 = 0 to 16 {
       %1 = "op1"() : () -> f32
@@ -1948,7 +1948,7 @@
 
 #map0 = affine_map<(d0, d1) -> (d0 * 16 + d1)>
 func @fuse_across_dim_mismatch(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>, %arg2: memref<9xf32>) {
-  %1 = alloc() : memref<144x4xf32>
+  %1 = memref.alloc() : memref<144x4xf32>
   %2 = constant 0.0 : f32
   affine.for %i2 = 0 to 9 {
     affine.for %i3 = 0 to 4 {
@@ -1972,7 +1972,7 @@
 }
 // MAXIMAL:      #map = affine_map<(d0, d1) -> (d0 * 16 + d1)>
 // MAXIMAL-LABEL: func @fuse_across_dim_mismatch
-// MAXIMAL:        alloc() : memref<1x1xf32>
+// MAXIMAL:        memref.alloc() : memref<1x1xf32>
 // MAXIMAL:        affine.for %{{.*}} = 0 to 9 {
 // MAXIMAL-NEXT:    affine.for %{{.*}} = 0 to 9 {
 // MAXIMAL-NEXT:      affine.for %{{.*}} = 0 to 4 {
@@ -1998,9 +1998,9 @@
 #map12 = affine_map<(d0, d1) -> (d0 * 16 - d1 + 15)>
 func @fuse_across_varying_dims_complex(%arg0: f32) {
   %c0 = constant 0 : index
-  %0 = alloc() : memref<2x2x3x3x16x1xf32>
-  %1 = alloc() : memref<64x9xf32>
-  %2 = alloc() : memref<144x4xf32>
+  %0 = memref.alloc() : memref<2x2x3x3x16x1xf32>
+  %1 = memref.alloc() : memref<64x9xf32>
+  %2 = memref.alloc() : memref<144x4xf32>
   affine.for %i0 = 0 to 64 {
     affine.for %i1 = 0 to 9 {
       %4 = affine.apply #map3(%i0, %i1)
@@ -2044,10 +2044,10 @@
 // MAXIMAL-DAG: [[$MAP7:#map[0-9]+]] = affine_map<(d0, d1) -> (d0 * 16 + d1)>
 // MAXIMAL-DAG: [[$MAP8:#map[0-9]+]] = affine_map<(d0, d1) -> (d0 * 16 - d1 + 15)>
 // MAXIMAL-LABEL: func @fuse_across_varying_dims_complex
-// MAXIMAL-NEXT:  alloc() : memref<64x1xf32>
+// MAXIMAL-NEXT:  memref.alloc() : memref<64x1xf32>
 // MAXIMAL-NEXT:  constant 0 : index
-// MAXIMAL-NEXT:  alloc() : memref<2x2x3x3x16x1xf32>
-// MAXIMAL-NEXT:  alloc() : memref<144x4xf32>
+// MAXIMAL-NEXT:  memref.alloc() : memref<2x2x3x3x16x1xf32>
+// MAXIMAL-NEXT:  memref.alloc() : memref<144x4xf32>
 // MAXIMAL-NEXT:  affine.for %{{.*}} = 0 to 9 {
 // MAXIMAL-NEXT:    affine.for %{{.*}} = 0 to 9 {
 // MAXIMAL-NEXT:      affine.for %{{.*}} = 0 to 4 {
@@ -2081,7 +2081,7 @@
 // -----
 
 func @should_fuse_with_slice_union() {
-  %a = alloc() : memref<100xf32>
+  %a = memref.alloc() : memref<100xf32>
   %c0 = constant 0 : index
   %cf0 = constant 0.0 : f32
 
@@ -2284,8 +2284,8 @@
 
 // CHECK-LABEL: func @should_fuse_self_dependence_multi_store_producer() {
 func @should_fuse_self_dependence_multi_store_producer() {
-  %m = alloc() : memref<10xf32>
-  %local_m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
+  %local_m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 10 {
@@ -2310,8 +2310,8 @@
 
 // CHECK-LABEL: func @should_fuse_dead_multi_store_producer() {
 func @should_fuse_dead_multi_store_producer() {
-  %m = alloc() : memref<10xf32>
-  %dead_m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
+  %dead_m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 10 {
@@ -2334,7 +2334,7 @@
 
 // CHECK-LABEL: func @should_fuse_function_live_out_multi_store_producer
 func @should_fuse_function_live_out_multi_store_producer(%live_in_out_m : memref<10xf32>) {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 10 {
@@ -2359,7 +2359,7 @@
 // CHECK-LABEL: func @mul_add_0
 func @mul_add_0(%arg0: memref<3x4xf32>, %arg1: memref<4x3xf32>, %arg2: memref<3x3xf32>, %arg3: memref<3x3xf32>) {
   %cst = constant 0.000000e+00 : f32
-  %0 = alloc() : memref<3x3xf32>
+  %0 = memref.alloc() : memref<3x3xf32>
   affine.for %arg4 = 0 to 3 {
     affine.for %arg5 = 0 to 3 {
       affine.store %cst, %0[%arg4, %arg5] : memref<3x3xf32>
@@ -2468,7 +2468,7 @@
 // MAXIMAL-LABEL: func @reshape_into_matmul
 func @reshape_into_matmul(%lhs : memref<1024x1024xf32>,
               %R: memref<16x64x1024xf32>, %out: memref<1024x1024xf32>) {
-  %rhs = alloc() :  memref<1024x1024xf32>
+  %rhs = memref.alloc() :  memref<1024x1024xf32>
 
   // Reshape from 3-d to 2-d.
   affine.for %i0 = 0 to 16 {
@@ -2495,7 +2495,7 @@
   }
   return
 }
-// MAXIMAL-NEXT: alloc
+// MAXIMAL-NEXT: memref.alloc
 // MAXIMAL-NEXT: affine.for
 // MAXIMAL-NEXT:   affine.for
 // MAXIMAL-NEXT:     affine.for
@@ -2580,7 +2580,7 @@
 // CHECK-LABEL: func @calc
 func @calc(%arg0: memref<?xf32>, %arg1: memref<?xf32>, %arg2: memref<?xf32>, %len: index) {
   %c1 = constant 1 : index
-  %1 = alloc(%len) : memref<?xf32>
+  %1 = memref.alloc(%len) : memref<?xf32>
   affine.for %arg4 = 1 to 10 {
     %7 = affine.load %arg0[%arg4] : memref<?xf32>
     %8 = affine.load %arg1[%arg4] : memref<?xf32>
@@ -2595,7 +2595,7 @@
   }
   return
 }
-// CHECK:       alloc() : memref<1xf32>
+// CHECK:       memref.alloc() : memref<1xf32>
 // CHECK:       affine.for %arg{{.*}} = 1 to 10 {
 // CHECK-NEXT:    affine.load %arg{{.*}}
 // CHECK-NEXT:    affine.load %arg{{.*}}
@@ -2620,10 +2620,10 @@
     affine.store %add, %in0[%d] : memref<32xf32>
   }
   affine.for %d = 0 to 32 {
-    %lhs = load %in0[%d] : memref<32xf32>
-    %rhs = load %in1[%d] : memref<32xf32>
+    %lhs = memref.load %in0[%d] : memref<32xf32>
+    %rhs = memref.load %in1[%d] : memref<32xf32>
     %add = subf %lhs, %rhs : f32
-    store %add, %in0[%d] : memref<32xf32>
+    memref.store %add, %in0[%d] : memref<32xf32>
   }
   affine.for %d = 0 to 32 {
     %lhs = affine.load %in0[%d] : memref<32xf32>
@@ -2646,15 +2646,15 @@
 // CHECK-LABEL: func @should_not_fuse_since_top_level_non_affine_users
 func @should_not_fuse_since_top_level_non_affine_users(%in0 : memref<32xf32>,
                       %in1 : memref<32xf32>) {
-  %sum = alloc() : memref<f32>
+  %sum = memref.alloc() : memref<f32>
   affine.for %d = 0 to 32 {
     %lhs = affine.load %in0[%d] : memref<32xf32>
     %rhs = affine.load %in1[%d] : memref<32xf32>
     %add = addf %lhs, %rhs : f32
-    store %add, %sum[] : memref<f32>
+    memref.store %add, %sum[] : memref<f32>
     affine.store %add, %in0[%d] : memref<32xf32>
   }
-  %load_sum = load %sum[] : memref<f32>
+  %load_sum = memref.load %sum[] : memref<f32>
   affine.for %d = 0 to 32 {
     %lhs = affine.load %in0[%d] : memref<32xf32>
     %rhs = affine.load %in1[%d] : memref<32xf32>
@@ -2662,7 +2662,7 @@
     %sub = subf %add, %load_sum: f32
     affine.store %sub, %in0[%d] : memref<32xf32>
   }
-  dealloc %sum : memref<f32>
+  memref.dealloc %sum : memref<f32>
   return
 }
 
@@ -2686,7 +2686,7 @@
     %add = addf %lhs, %rhs : f32
     affine.store %add, %in0[%d] : memref<32xf32>
   }
-  store %cst_0, %in0[%c0] : memref<32xf32>
+  memref.store %cst_0, %in0[%c0] : memref<32xf32>
   affine.for %d = 0 to 32 {
     %lhs = affine.load %in0[%d] : memref<32xf32>
     %rhs = affine.load %in1[%d] : memref<32xf32>
@@ -2705,7 +2705,7 @@
 
 // MAXIMAL-LABEL: func @fuse_minor_affine_map
 func @fuse_minor_affine_map(%in: memref<128xf32>, %out: memref<20x512xf32>) {
-  %tmp = alloc() : memref<128xf32>
+  %tmp = memref.alloc() : memref<128xf32>
 
   affine.for %arg4 = 0 to 128 {
     %ld = affine.load %in[%arg4] : memref<128xf32>
@@ -2725,7 +2725,7 @@
 // TODO: The size of the private memref is not properly computed in the presence
 // of the 'mod' operation. It should be memref<1xf32> instead of
 // memref<128xf32>: https://bugs.llvm.org/show_bug.cgi?id=46973
-// MAXIMAL:       alloc() : memref<128xf32>
+// MAXIMAL:       memref.alloc() : memref<128xf32>
 // MAXIMAL:       affine.for
 // MAXIMAL-NEXT:    affine.for
 // MAXIMAL-NOT:   affine.for
@@ -2735,9 +2735,9 @@
 
 // CHECK-LABEL: func @should_fuse_multi_store_producer_and_privatize_memfefs
 func @should_fuse_multi_store_producer_and_privatize_memfefs() {
-  %a = alloc() : memref<10xf32>
-  %b = alloc() : memref<10xf32>
-  %c = alloc() : memref<10xf32>
+  %a = memref.alloc() : memref<10xf32>
+  %b = memref.alloc() : memref<10xf32>
+  %c = memref.alloc() : memref<10xf32>
   %cst = constant 0.000000e+00 : f32
   affine.for %arg0 = 0 to 10 {
     affine.store %cst, %a[%arg0] : memref<10xf32>
@@ -2839,22 +2839,22 @@
 // -----
 
 func @should_not_fuse_due_to_dealloc(%arg0: memref<16xf32>){
-  %A = alloc() : memref<16xf32>
-  %C = alloc() : memref<16xf32>
+  %A = memref.alloc() : memref<16xf32>
+  %C = memref.alloc() : memref<16xf32>
   %cst_1 = constant 1.000000e+00 : f32
   affine.for %arg1 = 0 to 16 {
     %a = affine.load %arg0[%arg1] : memref<16xf32>
     affine.store %a, %A[%arg1] : memref<16xf32>
     affine.store %a, %C[%arg1] : memref<16xf32>
   }
-  dealloc %C : memref<16xf32>
-  %B = alloc() : memref<16xf32>
+  memref.dealloc %C : memref<16xf32>
+  %B = memref.alloc() : memref<16xf32>
   affine.for %arg1 = 0 to 16 {
     %a = affine.load %A[%arg1] : memref<16xf32>
     %b = addf %cst_1, %a : f32
     affine.store %b, %B[%arg1] : memref<16xf32>
   }
-  dealloc %A : memref<16xf32>
+  memref.dealloc %A : memref<16xf32>
   return
 }
 // CHECK-LABEL: func @should_not_fuse_due_to_dealloc
@@ -2862,7 +2862,7 @@
 // CHECK-NEXT:      affine.load
 // CHECK-NEXT:      affine.store
 // CHECK-NEXT:      affine.store
-// CHECK:         dealloc
+// CHECK:         memref.dealloc
 // CHECK:         affine.for
 // CHECK-NEXT:      affine.load
 // CHECK-NEXT:      addf
@@ -3021,14 +3021,14 @@
 
 func private @some_function(memref<16xf32>)
 func @call_op_prevents_fusion(%arg0: memref<16xf32>){
-  %A = alloc() : memref<16xf32>
+  %A = memref.alloc() : memref<16xf32>
   %cst_1 = constant 1.000000e+00 : f32
   affine.for %arg1 = 0 to 16 {
     %a = affine.load %arg0[%arg1] : memref<16xf32>
     affine.store %a, %A[%arg1] : memref<16xf32>
   }
   call @some_function(%A) : (memref<16xf32>) -> ()
-  %B = alloc() : memref<16xf32>
+  %B = memref.alloc() : memref<16xf32>
   affine.for %arg1 = 0 to 16 {
     %a = affine.load %A[%arg1] : memref<16xf32>
     %b = addf %cst_1, %a : f32
@@ -3050,14 +3050,14 @@
 
 func private @some_function()
 func @call_op_does_not_prevent_fusion(%arg0: memref<16xf32>){
-  %A = alloc() : memref<16xf32>
+  %A = memref.alloc() : memref<16xf32>
   %cst_1 = constant 1.000000e+00 : f32
   affine.for %arg1 = 0 to 16 {
     %a = affine.load %arg0[%arg1] : memref<16xf32>
     affine.store %a, %A[%arg1] : memref<16xf32>
   }
   call @some_function() : () -> ()
-  %B = alloc() : memref<16xf32>
+  %B = memref.alloc() : memref<16xf32>
   affine.for %arg1 = 0 to 16 {
     %a = affine.load %A[%arg1] : memref<16xf32>
     %b = addf %cst_1, %a : f32
diff --git a/mlir/test/Transforms/loop-invariant-code-motion.mlir b/mlir/test/Transforms/loop-invariant-code-motion.mlir
--- a/mlir/test/Transforms/loop-invariant-code-motion.mlir
+++ b/mlir/test/Transforms/loop-invariant-code-motion.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s  -split-input-file -loop-invariant-code-motion | FileCheck %s
 
 func @nested_loops_both_having_invariant_code() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
   %cf8 = constant 8.0 : f32
 
@@ -13,7 +13,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %[[CST0:.*]] = constant 7.000000e+00 : f32
   // CHECK-NEXT: %[[CST1:.*]] = constant 8.000000e+00 : f32
   // CHECK-NEXT: %[[ADD0:.*]] = addf %[[CST0]], %[[CST1]] : f32
@@ -28,7 +28,7 @@
 // -----
 
 func @nested_loops_code_invariant_to_both() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
   %cf8 = constant 8.0 : f32
 
@@ -38,7 +38,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
   // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
   // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
@@ -49,8 +49,8 @@
 // -----
 
 func @single_loop_nothing_invariant() {
-  %m1 = alloc() : memref<10xf32>
-  %m2 = alloc() : memref<10xf32>
+  %m1 = memref.alloc() : memref<10xf32>
+  %m2 = memref.alloc() : memref<10xf32>
   affine.for %arg0 = 0 to 10 {
     %v0 = affine.load %m1[%arg0] : memref<10xf32>
     %v1 = affine.load %m2[%arg0] : memref<10xf32>
@@ -58,8 +58,8 @@
     affine.store %v2, %m1[%arg0] : memref<10xf32>
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %1 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
+  // CHECK-NEXT: %1 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
   // CHECK-NEXT: %2 = affine.load %0[%arg0] : memref<10xf32>
   // CHECK-NEXT: %3 = affine.load %1[%arg0] : memref<10xf32>
@@ -72,7 +72,7 @@
 // -----
 
 func @invariant_code_inside_affine_if() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf8 = constant 8.0 : f32
 
   affine.for %arg0 = 0 to 10 {
@@ -84,7 +84,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
   // CHECK-NEXT: %1 = affine.apply #map(%arg0)
@@ -100,7 +100,7 @@
 // -----
 
 func @invariant_affine_if() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf8 = constant 8.0 : f32
   affine.for %arg0 = 0 to 10 {
     affine.for %arg1 = 0 to 10 {
@@ -110,7 +110,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %[[CST:.*]] = constant 8.000000e+00 : f32
   // CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 10 {
   // CHECK-NEXT: }
@@ -125,7 +125,7 @@
 // -----
 
 func @invariant_affine_if2() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf8 = constant 8.0 : f32
   affine.for %arg0 = 0 to 10 {
     affine.for %arg1 = 0 to 10 {
@@ -136,7 +136,7 @@
     }
   }
 
-  // CHECK: alloc
+  // CHECK: memref.alloc
   // CHECK-NEXT: constant
   // CHECK-NEXT: affine.for
   // CHECK-NEXT: affine.for
@@ -152,7 +152,7 @@
 // -----
 
 func @invariant_affine_nested_if() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf8 = constant 8.0 : f32
   affine.for %arg0 = 0 to 10 {
     affine.for %arg1 = 0 to 10 {
@@ -165,7 +165,7 @@
     }
   }
 
-  // CHECK: alloc
+  // CHECK: memref.alloc
   // CHECK-NEXT: constant
   // CHECK-NEXT: affine.for
   // CHECK-NEXT: affine.for
@@ -184,7 +184,7 @@
 // -----
 
 func @invariant_affine_nested_if_else() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf8 = constant 8.0 : f32
   affine.for %arg0 = 0 to 10 {
     affine.for %arg1 = 0 to 10 {
@@ -200,7 +200,7 @@
     }
   }
 
-  // CHECK: alloc
+  // CHECK: memref.alloc
   // CHECK-NEXT: constant
   // CHECK-NEXT: affine.for
   // CHECK-NEXT: affine.for
@@ -225,7 +225,7 @@
   %ci0 = constant 0 : index
   %ci10 = constant 10 : index
   %ci1 = constant 1 : index
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
   %cf8 = constant 8.0 : f32
   scf.for %arg0 = %ci0 to %ci10 step %ci1 {
@@ -234,7 +234,7 @@
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
   // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
   // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
@@ -248,14 +248,14 @@
   %ci0 = constant 0 : index
   %ci10 = constant 10 : index
   %ci1 = constant 1 : index
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   scf.for %arg0 = %ci0 to %ci10 step %ci1 {
     scf.for %arg1 = %ci0 to %ci10 step %ci1 {
       %v0 = addi %arg0, %arg1 : index
     }
   }
 
-  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: scf.for
   // CHECK-NEXT: scf.for
   // CHECK-NEXT: addi
diff --git a/mlir/test/Transforms/memref-bound-check.mlir b/mlir/test/Transforms/memref-bound-check.mlir
--- a/mlir/test/Transforms/memref-bound-check.mlir
+++ b/mlir/test/Transforms/memref-bound-check.mlir
@@ -8,8 +8,8 @@
   %minusone = constant -1 : index
   %sym = constant 111 : index
 
-  %A = alloc() : memref<9 x 9 x i32>
-  %B = alloc() : memref<111 x i32>
+  %A = memref.alloc() : memref<9 x 9 x i32>
+  %B = memref.alloc() : memref<111 x i32>
 
   affine.for %i = -1 to 10 {
     affine.for %j = -1 to 10 {
@@ -41,7 +41,7 @@
 // CHECK-LABEL: func @test_mod_floordiv_ceildiv
 func @test_mod_floordiv_ceildiv() {
   %zero = constant 0 : index
-  %A = alloc() : memref<128 x 64 x 64 x i32>
+  %A = memref.alloc() : memref<128 x 64 x 64 x i32>
 
   affine.for %i = 0 to 256 {
     affine.for %j = 0 to 256 {
@@ -64,9 +64,9 @@
 // CHECK-LABEL: func @test_no_out_of_bounds()
 func @test_no_out_of_bounds() {
   %zero = constant 0 : index
-  %A = alloc() : memref<257 x 256 x i32>
-  %C = alloc() : memref<257 x i32>
-  %B = alloc() : memref<1 x i32>
+  %A = memref.alloc() : memref<257 x 256 x i32>
+  %C = memref.alloc() : memref<257 x i32>
+  %B = memref.alloc() : memref<1 x i32>
 
   affine.for %i = 0 to 256 {
     affine.for %j = 0 to 256 {
@@ -90,7 +90,7 @@
 // CHECK-LABEL: func @mod_div
 func @mod_div() {
   %zero = constant 0 : index
-  %A = alloc() : memref<128 x 64 x 64 x i32>
+  %A = memref.alloc() : memref<128 x 64 x 64 x i32>
 
   affine.for %i = 0 to 256 {
     affine.for %j = 0 to 256 {
@@ -113,7 +113,7 @@
 // Tests with nested mod's and floordiv's.
 // CHECK-LABEL: func @mod_floordiv_nested() {
 func @mod_floordiv_nested() {
-  %A = alloc() : memref<256 x 256 x i32>
+  %A = memref.alloc() : memref<256 x 256 x i32>
   affine.for %i = 0 to 256 {
     affine.for %j = 0 to 256 {
       %idx0 = affine.apply affine_map<(d0, d1) -> ((d0 mod 1024) floordiv 4)>(%i, %j)
@@ -126,7 +126,7 @@
 
 // CHECK-LABEL: func @test_semi_affine_bailout
 func @test_semi_affine_bailout(%N : index) {
-  %B = alloc() : memref<10 x i32>
+  %B = memref.alloc() : memref<10 x i32>
   affine.for %i = 0 to 10 {
     %idx = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%i)[%N]
     %y = affine.load %B[%idx] : memref<10 x i32>
@@ -137,7 +137,7 @@
 
 // CHECK-LABEL: func @multi_mod_floordiv
 func @multi_mod_floordiv() {
-  %A = alloc() : memref<2x2xi32>
+  %A = memref.alloc() : memref<2x2xi32>
   affine.for %ii = 0 to 64 {
       %idx0 = affine.apply affine_map<(d0) -> ((d0 mod 147456) floordiv 1152)> (%ii)
       %idx1 = affine.apply affine_map<(d0) -> (((d0 mod 147456) mod 1152) floordiv 384)> (%ii)
@@ -149,8 +149,8 @@
 // CHECK-LABEL: func @delinearize_mod_floordiv
 func @delinearize_mod_floordiv() {
   %c0 = constant 0 : index
-  %in = alloc() : memref<2x2x3x3x16x1xi32>
-  %out = alloc() : memref<64x9xi32>
+  %in = memref.alloc() : memref<2x2x3x3x16x1xi32>
+  %out = memref.alloc() : memref<64x9xi32>
 
   // Reshape '%in' into '%out'.
   affine.for %ii = 0 to 64 {
@@ -186,7 +186,7 @@
 
 // CHECK-LABEL: func @out_of_bounds
 func @out_of_bounds() {
-  %in = alloc() : memref<1xi32>
+  %in = memref.alloc() : memref<1xi32>
   %c9 = constant 9 : i32
 
   affine.for %i0 = 10 to 11 {
@@ -208,7 +208,7 @@
 // CHECK-LABEL: func @test_complex_mod_floordiv
 func @test_complex_mod_floordiv(%arg0: memref<4x4x16x1xf32>) {
   %c0 = constant 0 : index
-  %0 = alloc() : memref<1x2x3x3x16x1xf32>
+  %0 = memref.alloc() : memref<1x2x3x3x16x1xf32>
   affine.for %i0 = 0 to 64 {
     affine.for %i1 = 0 to 9 {
       %2 = affine.apply #map3(%i0, %i1)
@@ -228,8 +228,8 @@
 
 // CHECK-LABEL: func @test_mod_bound
 func @test_mod_bound() {
-  %0 = alloc() : memref<7 x f32>
-  %1 = alloc() : memref<6 x f32>
+  %0 = memref.alloc() : memref<7 x f32>
+  %1 = memref.alloc() : memref<6 x f32>
   affine.for %i0 = 0 to 4096 {
     affine.for %i1 = #map0(%i0) to #map1(%i0) {
       affine.load %0[%i1] : memref<7 x f32>
@@ -248,9 +248,9 @@
 
 // CHECK-LABEL: func @test_floordiv_bound
 func @test_floordiv_bound() {
-  %0 = alloc() : memref<1027 x f32>
-  %1 = alloc() : memref<1026 x f32>
-  %2 = alloc() : memref<4096 x f32>
+  %0 = memref.alloc() : memref<1027 x f32>
+  %1 = memref.alloc() : memref<1026 x f32>
+  %2 = memref.alloc() : memref<4096 x f32>
   %N = constant 2048 : index
   affine.for %i0 = 0 to 4096 {
     affine.for %i1 = #map0(%i0) to #map1(%i0) {
@@ -287,7 +287,7 @@
 
 // CHECK-LABEL: func @zero_d_memref
 func @zero_d_memref() {
-  %Z = alloc() : memref<f32>
+  %Z = memref.alloc() : memref<f32>
   affine.for %i = 0 to 100 {
     affine.load %Z[] : memref<f32>
   }
diff --git a/mlir/test/Transforms/memref-dataflow-opt.mlir b/mlir/test/Transforms/memref-dataflow-opt.mlir
--- a/mlir/test/Transforms/memref-dataflow-opt.mlir
+++ b/mlir/test/Transforms/memref-dataflow-opt.mlir
@@ -9,7 +9,7 @@
 // CHECK-LABEL: func @simple_store_load() {
 func @simple_store_load() {
   %cf7 = constant 7.0 : f32
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   affine.for %i0 = 0 to 10 {
     affine.store %cf7, %m[%i0] : memref<10xf32>
     %v0 = affine.load %m[%i0] : memref<10xf32>
@@ -29,7 +29,7 @@
   %cf7 = constant 7.0 : f32
   %cf8 = constant 8.0 : f32
   %cf9 = constant 9.0 : f32
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   affine.for %i0 = 0 to 10 {
     affine.store %cf7, %m[%i0] : memref<10xf32>
     %v0 = affine.load %m[%i0] : memref<10xf32>
@@ -58,7 +58,7 @@
 // CHECK-LABEL: func @store_load_affine_apply
 func @store_load_affine_apply() -> memref<10x10xf32> {
   %cf7 = constant 7.0 : f32
-  %m = alloc() : memref<10x10xf32>
+  %m = memref.alloc() : memref<10x10xf32>
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = 0 to 10 {
       %t0 = affine.apply affine_map<(d0, d1) -> (d1 + 1)>(%i0, %i1)
@@ -74,7 +74,7 @@
   // The memref and its stores won't be erased due to this memref return.
   return %m : memref<10x10xf32>
 // CHECK:       %{{.*}} = constant 7.000000e+00 : f32
-// CHECK-NEXT:  %{{.*}} = alloc() : memref<10x10xf32>
+// CHECK-NEXT:  %{{.*}} = memref.alloc() : memref<10x10xf32>
 // CHECK-NEXT:  affine.for %{{.*}} = 0 to 10 {
 // CHECK-NEXT:    affine.for %{{.*}} = 0 to 10 {
 // CHECK-NEXT:      %{{.*}} = affine.apply [[$MAP0]](%{{.*}}, %{{.*}})
@@ -91,7 +91,7 @@
 // CHECK-LABEL: func @store_load_nested
 func @store_load_nested(%N : index) {
   %cf7 = constant 7.0 : f32
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   affine.for %i0 = 0 to 10 {
     affine.store %cf7, %m[%i0] : memref<10xf32>
     affine.for %i1 = 0 to %N {
@@ -116,7 +116,7 @@
 func @multi_store_load_nested_no_fwd(%N : index) {
   %cf7 = constant 7.0 : f32
   %cf8 = constant 8.0 : f32
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   affine.for %i0 = 0 to 10 {
     affine.store %cf7, %m[%i0] : memref<10xf32>
     affine.for %i1 = 0 to %N {
@@ -137,7 +137,7 @@
 func @store_load_store_nested_no_fwd(%N : index) {
   %cf7 = constant 7.0 : f32
   %cf9 = constant 9.0 : f32
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   affine.for %i0 = 0 to 10 {
     affine.store %cf7, %m[%i0] : memref<10xf32>
     affine.for %i1 = 0 to %N {
@@ -158,7 +158,7 @@
   %cf8 = constant 8.0 : f32
   %cf9 = constant 9.0 : f32
   %cf10 = constant 10.0 : f32
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   affine.for %i0 = 0 to 10 {
     affine.store %cf7, %m[%i0] : memref<10xf32>
     affine.for %i1 = 0 to %N {
@@ -181,7 +181,7 @@
 // CHECK-LABEL: func @store_load_no_fwd
 func @store_load_no_fwd() {
   %cf7 = constant 7.0 : f32
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   affine.for %i0 = 0 to 10 {
     affine.store %cf7, %m[%i0] : memref<10xf32>
     affine.for %i1 = 0 to 10 {
@@ -200,7 +200,7 @@
 func @store_load_fwd() {
   %cf7 = constant 7.0 : f32
   %c0 = constant 0 : index
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   affine.store %cf7, %m[%c0] : memref<10xf32>
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = 0 to 10 {
@@ -222,7 +222,7 @@
   %cf9 = constant 9.0 : f32
   %c0 = constant 0 : index
   %c1 = constant 1 : index
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   affine.for %i0 = 0 to 10 {
     affine.store %cf7, %m[%i0] : memref<10xf32>
     affine.for %i1 = 0 to %N {
@@ -235,7 +235,7 @@
   // Due to this load, the memref isn't optimized away.
   %v3 = affine.load %m[%c1] : memref<10xf32>
   return %v3 : f32
-// CHECK:       %{{.*}} = alloc() : memref<10xf32>
+// CHECK:       %{{.*}} = memref.alloc() : memref<10xf32>
 // CHECK-NEXT:  affine.for %{{.*}} = 0 to 10 {
 // CHECK-NEXT:    affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
 // CHECK-NEXT:    affine.for %{{.*}} = 0 to %{{.*}} {
@@ -285,7 +285,7 @@
 // The value loaded from %in can directly be stored to %out by eliminating
 // store and load from %tmp.
 func @vector_forwarding(%in : memref<512xf32>, %out : memref<512xf32>) {
-  %tmp = alloc() : memref<512xf32>
+  %tmp = memref.alloc() : memref<512xf32>
   affine.for %i = 0 to 16 {
     %ld0 = affine.vector_load %in[32*%i] : memref<512xf32>, vector<32xf32>
     affine.vector_store %ld0, %tmp[32*%i] : memref<512xf32>, vector<32xf32>
diff --git a/mlir/test/Transforms/memref-dependence-check.mlir b/mlir/test/Transforms/memref-dependence-check.mlir
--- a/mlir/test/Transforms/memref-dependence-check.mlir
+++ b/mlir/test/Transforms/memref-dependence-check.mlir
@@ -6,7 +6,7 @@
 
 // CHECK-LABEL: func @store_may_execute_before_load() {
 func @store_may_execute_before_load() {
-  %m = alloc() : memref<10xf32>
+  %m = memref.alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
   %c0 = constant 4 : index
   // There is no dependence from store 0 to load 1 at depth if we take into account
@@ -33,7 +33,7 @@
 
 // CHECK-LABEL: func @dependent_loops() {
 func @dependent_loops() {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   %cst = constant 7.000000e+00 : f32
   // There is a dependence from 0 to 1 at depth 1 (common surrounding loops 0)
   // because the first loop with the store dominates the second scf.
@@ -55,8 +55,8 @@
 // -----
 // CHECK-LABEL: func @different_memrefs() {
 func @different_memrefs() {
-  %m.a = alloc() : memref<100xf32>
-  %m.b = alloc() : memref<100xf32>
+  %m.a = memref.alloc() : memref<100xf32>
+  %m.b = memref.alloc() : memref<100xf32>
   %c0 = constant 0 : index
   %c1 = constant 1.0 : f32
   affine.store %c1, %m.a[%c0] : memref<100xf32>
@@ -71,7 +71,7 @@
 // -----
 // CHECK-LABEL: func @store_load_different_elements() {
 func @store_load_different_elements() {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c7 = constant 7.0 : f32
@@ -87,7 +87,7 @@
 // -----
 // CHECK-LABEL: func @load_store_different_elements() {
 func @load_store_different_elements() {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c7 = constant 7.0 : f32
@@ -103,7 +103,7 @@
 // -----
 // CHECK-LABEL: func @store_load_same_element() {
 func @store_load_same_element() {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c11 = constant 11 : index
   %c7 = constant 7.0 : f32
   affine.store %c7, %m[%c11] : memref<100xf32>
@@ -118,7 +118,7 @@
 // -----
 // CHECK-LABEL: func @load_load_same_element() {
 func @load_load_same_element() {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c11 = constant 11 : index
   %c7 = constant 7.0 : f32
   %v0 = affine.load %m[%c11] : memref<100xf32>
@@ -133,7 +133,7 @@
 // -----
 // CHECK-LABEL: func @store_load_same_symbol(%arg0: index) {
 func @store_load_same_symbol(%arg0: index) {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   affine.store %c7, %m[%arg0] : memref<100xf32>
   // expected-remark@above {{dependence from 0 to 0 at depth 1 = false}}
@@ -147,7 +147,7 @@
 // -----
 // CHECK-LABEL: func @store_load_different_symbols(%arg0: index, %arg1: index) {
 func @store_load_different_symbols(%arg0: index, %arg1: index) {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   affine.store %c7, %m[%arg0] : memref<100xf32>
   // expected-remark@above {{dependence from 0 to 0 at depth 1 = false}}
@@ -161,7 +161,7 @@
 // -----
 // CHECK-LABEL: func @store_load_diff_element_affine_apply_const() {
 func @store_load_diff_element_affine_apply_const() {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c1 = constant 1 : index
   %c8 = constant 8.0 : f32
   %a0 = affine.apply affine_map<(d0) -> (d0)> (%c1)
@@ -178,7 +178,7 @@
 // -----
 // CHECK-LABEL: func @store_load_same_element_affine_apply_const() {
 func @store_load_same_element_affine_apply_const() {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %c9 = constant 9 : index
   %c11 = constant 11 : index
@@ -196,7 +196,7 @@
 // -----
 // CHECK-LABEL: func @store_load_affine_apply_symbol(%arg0: index) {
 func @store_load_affine_apply_symbol(%arg0: index) {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %a0 = affine.apply affine_map<(d0) -> (d0)> (%arg0)
   affine.store %c7, %m[%a0] : memref<100xf32>
@@ -212,7 +212,7 @@
 // -----
 // CHECK-LABEL: func @store_load_affine_apply_symbol_offset(%arg0: index) {
 func @store_load_affine_apply_symbol_offset(%arg0: index) {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %a0 = affine.apply affine_map<(d0) -> (d0)> (%arg0)
   affine.store %c7, %m[%a0] : memref<100xf32>
@@ -228,7 +228,7 @@
 // -----
 // CHECK-LABEL: func @store_range_load_after_range() {
 func @store_range_load_after_range() {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %c10 = constant 10 : index
   affine.for %i0 = 0 to 10 {
@@ -251,7 +251,7 @@
 // -----
 // CHECK-LABEL: func @store_load_func_symbol(%arg0: index, %arg1: index) {
 func @store_load_func_symbol(%arg0: index, %arg1: index) {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %c10 = constant 10 : index
   affine.for %i0 = 0 to %arg1 {
@@ -274,7 +274,7 @@
 // -----
 // CHECK-LABEL: func @store_range_load_last_in_range() {
 func @store_range_load_last_in_range() {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %c10 = constant 10 : index
   affine.for %i0 = 0 to 10 {
@@ -302,7 +302,7 @@
 // -----
 // CHECK-LABEL: func @store_range_load_before_range() {
 func @store_range_load_before_range() {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %c0 = constant 0 : index
   affine.for %i0 = 1 to 11 {
@@ -325,7 +325,7 @@
 // -----
 // CHECK-LABEL: func @store_range_load_first_in_range() {
 func @store_range_load_first_in_range() {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %c0 = constant 0 : index
   affine.for %i0 = 1 to 11 {
@@ -351,7 +351,7 @@
 // -----
 // CHECK-LABEL: func @store_plus_3() {
 func @store_plus_3() {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   affine.for %i0 = 1 to 11 {
     %a0 = affine.apply affine_map<(d0) -> (d0 + 3)> (%i0)
@@ -373,7 +373,7 @@
 // -----
 // CHECK-LABEL: func @load_minus_2() {
 func @load_minus_2() {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   affine.for %i0 = 2 to 11 {
     %a0 = affine.apply affine_map<(d0) -> (d0)> (%i0)
@@ -395,7 +395,7 @@
 // -----
 // CHECK-LABEL: func @perfectly_nested_loops_loop_independent() {
 func @perfectly_nested_loops_loop_independent() {
-  %m = alloc() : memref<10x10xf32>
+  %m = memref.alloc() : memref<10x10xf32>
   %c7 = constant 7.0 : f32
   affine.for %i0 = 0 to 11 {
     affine.for %i1 = 0 to 11 {
@@ -426,7 +426,7 @@
 // -----
 // CHECK-LABEL: func @perfectly_nested_loops_loop_carried_at_depth1() {
 func @perfectly_nested_loops_loop_carried_at_depth1() {
-  %m = alloc() : memref<10x10xf32>
+  %m = memref.alloc() : memref<10x10xf32>
   %c7 = constant 7.0 : f32
   affine.for %i0 = 0 to 9 {
     affine.for %i1 = 0 to 9 {
@@ -457,7 +457,7 @@
 // -----
 // CHECK-LABEL: func @perfectly_nested_loops_loop_carried_at_depth2() {
 func @perfectly_nested_loops_loop_carried_at_depth2() {
-  %m = alloc() : memref<10x10xf32>
+  %m = memref.alloc() : memref<10x10xf32>
   %c7 = constant 7.0 : f32
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = 0 to 10 {
@@ -488,7 +488,7 @@
 // -----
 // CHECK-LABEL: func @one_common_loop() {
 func @one_common_loop() {
-  %m = alloc() : memref<10x10xf32>
+  %m = memref.alloc() : memref<10x10xf32>
   %c7 = constant 7.0 : f32
   // There is a loop-independent dependence from access 0 to 1 at depth 2.
   affine.for %i0 = 0 to 10 {
@@ -519,8 +519,8 @@
 // -----
 // CHECK-LABEL: func @dependence_cycle() {
 func @dependence_cycle() {
-  %m.a = alloc() : memref<100xf32>
-  %m.b = alloc() : memref<100xf32>
+  %m.a = memref.alloc() : memref<100xf32>
+  %m.b = memref.alloc() : memref<100xf32>
 
   // Dependences:
   // *) loop-independent dependence from access 1 to 2 at depth 2.
@@ -573,7 +573,7 @@
 // -----
 // CHECK-LABEL: func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
 func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
-  %m = alloc() : memref<10x10xf32>
+  %m = memref.alloc() : memref<10x10xf32>
   %c7 = constant 7.0 : f32
   affine.for %i0 = 0 to %arg0 {
     affine.for %i1 = 0 to %arg1 {
@@ -603,7 +603,7 @@
 // -----
 // CHECK-LABEL: func @war_raw_waw_deps() {
 func @war_raw_waw_deps() {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = 0 to 10 {
@@ -631,7 +631,7 @@
 // -----
 // CHECK-LABEL: func @mod_deps() {
 func @mod_deps() {
-  %m = alloc() : memref<100xf32>
+  %m = memref.alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   affine.for %i0 = 0 to 10 {
     %a0 = affine.apply affine_map<(d0) -> (d0 mod 2)> (%i0)
@@ -655,7 +655,7 @@
 // -----
 // CHECK-LABEL: func @loop_nest_depth() {
 func @loop_nest_depth() {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   %c7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 128 {
@@ -691,7 +691,7 @@
 // mod/div's successively.
 // CHECK-LABEL: func @mod_div_3d() {
 func @mod_div_3d() {
-  %M = alloc() : memref<2x2x2xi32>
+  %M = memref.alloc() : memref<2x2x2xi32>
   %c0 = constant 0 : i32
   affine.for %i0 = 0 to 8 {
     affine.for %i1 = 0 to 8 {
@@ -716,8 +716,8 @@
 func @delinearize_mod_floordiv() {
   %c0 = constant 0 : index
   %val = constant 0 : i32
-  %in = alloc() : memref<2x2x3x3x16x1xi32>
-  %out = alloc() : memref<64x9xi32>
+  %in = memref.alloc() : memref<2x2x3x3x16x1xi32>
+  %out = memref.alloc() : memref<64x9xi32>
 
   affine.for %i0 = 0 to 2 {
     affine.for %i1 = 0 to 2 {
@@ -788,7 +788,7 @@
 // Load and store ops access the same elements in strided scf.
 // CHECK-LABEL: func @strided_loop_with_dependence_at_depth2
 func @strided_loop_with_dependence_at_depth2() {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   %cf0 = constant 0.0 : f32
   affine.for %i0 = 0 to 8 step 2 {
     affine.store %cf0, %0[%i0] : memref<10xf32>
@@ -810,7 +810,7 @@
 // Load and store ops access alternating memref elements: no dependence.
 // CHECK-LABEL: func @strided_loop_with_no_dependence
 func @strided_loop_with_no_dependence() {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   %cf0 = constant 0.0 : f32
   affine.for %i0 = 0 to 8 step 2 {
     %a0 = affine.apply affine_map<(d0) -> (d0 + 1)>(%i0)
@@ -833,7 +833,7 @@
 // Affine.Store op accesses memref elements at offset causing loop-carried dependence.
 // CHECK-LABEL: func @strided_loop_with_loop_carried_dependence_at_depth1
 func @strided_loop_with_loop_carried_dependence_at_depth1() {
-  %0 = alloc() : memref<10xf32>
+  %0 = memref.alloc() : memref<10xf32>
   %cf0 = constant 0.0 : f32
   affine.for %i0 = 0 to 8 step 2 {
     %a0 = affine.apply affine_map<(d0) -> (d0 + 4)>(%i0)
@@ -857,7 +857,7 @@
 // properly computed when the load and store are at different loop depths.
 // CHECK-LABEL: func @test_dep_store_depth1_load_depth2
 func @test_dep_store_depth1_load_depth2() {
-  %0 = alloc() : memref<100xf32>
+  %0 = memref.alloc() : memref<100xf32>
   %cst = constant 7.000000e+00 : f32
   affine.for %i0 = 0 to 10 {
     %a0 = affine.apply affine_map<(d0) -> (d0 - 1)>(%i0)
@@ -884,7 +884,7 @@
 // properly computed when the load and store are at different loop depths.
 // CHECK-LABEL: func @test_dep_store_depth2_load_depth1
 func @test_dep_store_depth2_load_depth1() {
-  %0 = alloc() : memref<100xf32>
+  %0 = memref.alloc() : memref<100xf32>
   %cst = constant 7.000000e+00 : f32
   affine.for %i0 = 0 to 10 {
     affine.for %i1 = affine_map<(d0) -> (d0)>(%i0) to affine_map<(d0) -> (d0 + 1)>(%i0) {
@@ -912,7 +912,7 @@
 
 // CHECK-LABEL: func @test_affine_for_if_same_block() {
 func @test_affine_for_if_same_block() {
-  %0 = alloc() : memref<100xf32>
+  %0 = memref.alloc() : memref<100xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 100 {
@@ -940,7 +940,7 @@
 
 // CHECK-LABEL: func @test_affine_for_if_separated() {
 func @test_affine_for_if_separated() {
-  %0 = alloc() : memref<100xf32>
+  %0 = memref.alloc() : memref<100xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 10 {
@@ -970,7 +970,7 @@
 
 // CHECK-LABEL: func @test_affine_for_if_partially_joined() {
 func @test_affine_for_if_partially_joined() {
-  %0 = alloc() : memref<100xf32>
+  %0 = memref.alloc() : memref<100xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 100 {
@@ -1001,7 +1001,7 @@
 
 // CHECK-LABEL: func @test_interleaved_affine_for_if() {
 func @test_interleaved_affine_for_if() {
-  %0 = alloc() : memref<100x100xf32>
+  %0 = memref.alloc() : memref<100x100xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 100 {
@@ -1039,9 +1039,9 @@
 
 // CHECK-LABEL: func @test_interleaved_affine_for_if() {
 func @test_interleaved_affine_for_if() {
-  %0 = alloc() : memref<101xf32>
+  %0 = memref.alloc() : memref<101xf32>
   %c0 = constant 0 : index
-  %N = dim %0, %c0 : memref<101xf32>
+  %N = memref.dim %0, %c0 : memref<101xf32>
   %cf7 = constant 7.0 : f32
 
   affine.for %i0 = 0 to 101 {
diff --git a/mlir/test/Transforms/normalize-memrefs-ops.mlir b/mlir/test/Transforms/normalize-memrefs-ops.mlir
--- a/mlir/test/Transforms/normalize-memrefs-ops.mlir
+++ b/mlir/test/Transforms/normalize-memrefs-ops.mlir
@@ -16,13 +16,13 @@
 // CHECK-LABEL: test_norm
 // CHECK-SAME: (%[[ARG0:.*]]: memref<1x16x1x1x32x64xf32>)
 func @test_norm(%arg0 : memref<1x16x14x14xf32, #map0>) -> () {
-    %0 = alloc() : memref<1x16x14x14xf32, #map0>
+    %0 = memref.alloc() : memref<1x16x14x14xf32, #map0>
     "test.op_norm"(%arg0, %0) : (memref<1x16x14x14xf32, #map0>, memref<1x16x14x14xf32, #map0>) -> ()
-    dealloc %0 :  memref<1x16x14x14xf32, #map0>
+    memref.dealloc %0 :  memref<1x16x14x14xf32, #map0>
 
-    // CHECK: %[[v0:.*]] = alloc() : memref<1x16x1x1x32x64xf32>
+    // CHECK: %[[v0:.*]] = memref.alloc() : memref<1x16x1x1x32x64xf32>
     // CHECK: "test.op_norm"(%[[ARG0]], %[[v0]]) : (memref<1x16x1x1x32x64xf32>, memref<1x16x1x1x32x64xf32>) -> ()
-    // CHECK: dealloc %[[v0]] : memref<1x16x1x1x32x64xf32>
+    // CHECK: memref.dealloc %[[v0]] : memref<1x16x1x1x32x64xf32>
     return
 }
 
@@ -31,13 +31,13 @@
 // CHECK-LABEL: test_nonnorm
 // CHECK-SAME: (%[[ARG0:.*]]: memref<1x16x14x14xf32, #map>)
 func @test_nonnorm(%arg0 : memref<1x16x14x14xf32, #map0>) -> () {
-    %0 = alloc() : memref<1x16x14x14xf32, #map0>
+    %0 = memref.alloc() : memref<1x16x14x14xf32, #map0>
     "test.op_nonnorm"(%arg0, %0) : (memref<1x16x14x14xf32, #map0>, memref<1x16x14x14xf32, #map0>) -> ()
-    dealloc %0 :  memref<1x16x14x14xf32, #map0>
+    memref.dealloc %0 :  memref<1x16x14x14xf32, #map0>
 
-    // CHECK: %[[v0:.*]] = alloc() : memref<1x16x14x14xf32, #map>
+    // CHECK: %[[v0:.*]] = memref.alloc() : memref<1x16x14x14xf32, #map>
     // CHECK: "test.op_nonnorm"(%[[ARG0]], %[[v0]]) : (memref<1x16x14x14xf32, #map>, memref<1x16x14x14xf32, #map>) -> ()
-    // CHECK: dealloc %[[v0]] : memref<1x16x14x14xf32, #map>
+    // CHECK: memref.dealloc %[[v0]] : memref<1x16x14x14xf32, #map>
     return
 }
 
@@ -46,13 +46,13 @@
 // CHECK-LABEL: test_norm_mix
 // CHECK-SAME: (%[[ARG0:.*]]: memref<1x16x1x1x32x64xf32>
 func @test_norm_mix(%arg0 : memref<1x16x1x1x32x64xf32>) -> () {
-    %0 = alloc() : memref<1x16x14x14xf32, #map0>
+    %0 = memref.alloc() : memref<1x16x14x14xf32, #map0>
     "test.op_norm"(%arg0, %0) : (memref<1x16x1x1x32x64xf32>, memref<1x16x14x14xf32, #map0>) -> ()
-    dealloc %0 :  memref<1x16x14x14xf32, #map0>
+    memref.dealloc %0 :  memref<1x16x14x14xf32, #map0>
 
-    // CHECK: %[[v0:.*]] = alloc() : memref<1x16x1x1x32x64xf32>
+    // CHECK: %[[v0:.*]] = memref.alloc() : memref<1x16x1x1x32x64xf32>
     // CHECK: "test.op_norm"(%[[ARG0]], %[[v0]]) : (memref<1x16x1x1x32x64xf32>, memref<1x16x1x1x32x64xf32>) -> ()
-    // CHECK: dealloc %[[v0]] : memref<1x16x1x1x32x64xf32>
+    // CHECK: memref.dealloc %[[v0]] : memref<1x16x1x1x32x64xf32>
     return
 }
 
@@ -63,10 +63,10 @@
 // CHECK-LABEL: test_load_store
 // CHECK-SAME: (%[[ARG0:.*]]: memref<1x16x14x14xf32>
 func @test_load_store(%arg0 : memref<1x16x14x14xf32>) -> () {
-    %0 = alloc() : memref<1x16x14x14xf32, #map_tile>
-    // CHECK: %[[v0:.*]] = alloc() : memref<1x16x1x1x32x32xf32>
-    %1 = alloc() : memref<1x16x14x14xf32>
-    // CHECK: %[[v1:.*]] = alloc() : memref<1x16x14x14xf32>
+    %0 = memref.alloc() : memref<1x16x14x14xf32, #map_tile>
+    // CHECK: %[[v0:.*]] = memref.alloc() : memref<1x16x1x1x32x32xf32>
+    %1 = memref.alloc() : memref<1x16x14x14xf32>
+    // CHECK: %[[v1:.*]] = memref.alloc() : memref<1x16x14x14xf32>
     "test.op_norm"(%0, %1) : (memref<1x16x14x14xf32, #map_tile>, memref<1x16x14x14xf32>) -> ()
     // CHECK: "test.op_norm"(%[[v0]], %[[v1]]) : (memref<1x16x1x1x32x32xf32>, memref<1x16x14x14xf32>) -> ()
     %cst = constant 3.0 : f32
@@ -74,19 +74,19 @@
       affine.for %j = 0 to 16 {
         affine.for %k = 0 to 14 {
           affine.for %l = 0 to 14 {
-            %2 = load %1[%i, %j, %k, %l] : memref<1x16x14x14xf32>
+            %2 = memref.load %1[%i, %j, %k, %l] : memref<1x16x14x14xf32>
             // CHECK: memref<1x16x14x14xf32>
             %3 = addf %2, %cst : f32
-            store %3, %arg0[%i, %j, %k, %l] : memref<1x16x14x14xf32>
+            memref.store %3, %arg0[%i, %j, %k, %l] : memref<1x16x14x14xf32>
             // CHECK: memref<1x16x14x14xf32>
           }
         }
       }
     }
-    dealloc %0 :  memref<1x16x14x14xf32, #map_tile>
-    // CHECK: dealloc %[[v0]] : memref<1x16x1x1x32x32xf32>
-    dealloc %1 :  memref<1x16x14x14xf32>
-    // CHECK: dealloc %[[v1]] : memref<1x16x14x14xf32>
+    memref.dealloc %0 :  memref<1x16x14x14xf32, #map_tile>
+    // CHECK: memref.dealloc %[[v0]] : memref<1x16x1x1x32x32xf32>
+    memref.dealloc %1 :  memref<1x16x14x14xf32>
+    // CHECK: memref.dealloc %[[v1]] : memref<1x16x14x14xf32>
     return
 }
 
@@ -95,16 +95,16 @@
 // CHECK-LABEL: test_norm_ret
 // CHECK-SAME: (%[[ARG0:.*]]: memref<1x16x1x1x32x32xf32>) -> (memref<1x16x1x1x32x32xf32>, memref<1x16x14x14xf32>) {
 func @test_norm_ret(%arg0: memref<1x16x14x14xf32, #map_tile>) -> (memref<1x16x14x14xf32, #map_tile>, memref<1x16x14x14xf32>) {
-    %0 = alloc() : memref<1x16x14x14xf32, #map_tile>
-    // CHECK-NEXT: %[[v0:.*]] = alloc() : memref<1x16x1x1x32x32xf32>
+    %0 = memref.alloc() : memref<1x16x14x14xf32, #map_tile>
+    // CHECK-NEXT: %[[v0:.*]] = memref.alloc() : memref<1x16x1x1x32x32xf32>
     %1, %2 = "test.op_norm_ret"(%arg0) : (memref<1x16x14x14xf32, #map_tile>) -> (memref<1x16x14x14xf32, #map_tile>, memref<1x16x14x14xf32>)
     // CHECK-NEXT: %[[v1:.*]], %[[v2:.*]] = "test.op_norm_ret"
     // CHECK-SAME: (memref<1x16x1x1x32x32xf32>) -> (memref<1x16x1x1x32x32xf32>, memref<1x16x14x14xf32>)
     "test.op_norm"(%1, %0) : (memref<1x16x14x14xf32, #map_tile>, memref<1x16x14x14xf32, #map_tile>) -> ()
     // CHECK-NEXT: "test.op_norm"
     // CHECK-SAME: : (memref<1x16x1x1x32x32xf32>, memref<1x16x1x1x32x32xf32>) -> ()
-    dealloc %0 : memref<1x16x14x14xf32, #map_tile>
-    // CHECK-NEXT: dealloc %[[v0]] : memref<1x16x1x1x32x32xf32>
+    memref.dealloc %0 : memref<1x16x14x14xf32, #map_tile>
+    // CHECK-NEXT: memref.dealloc %[[v0]] : memref<1x16x1x1x32x32xf32>
     return %1, %2 : memref<1x16x14x14xf32, #map_tile>, memref<1x16x14x14xf32>
     // CHECK-NEXT: return %[[v1]], %[[v2]] : memref<1x16x1x1x32x32xf32>, memref<1x16x14x14xf32>
 }
diff --git a/mlir/test/Transforms/normalize-memrefs.mlir b/mlir/test/Transforms/normalize-memrefs.mlir
--- a/mlir/test/Transforms/normalize-memrefs.mlir
+++ b/mlir/test/Transforms/normalize-memrefs.mlir
@@ -5,32 +5,32 @@
 
 // CHECK-LABEL: func @permute()
 func @permute() {
-  %A = alloc() : memref<64x256xf32, affine_map<(d0, d1) -> (d1, d0)>>
+  %A = memref.alloc() : memref<64x256xf32, affine_map<(d0, d1) -> (d1, d0)>>
   affine.for %i = 0 to 64 {
     affine.for %j = 0 to 256 {
       %1 = affine.load %A[%i, %j] : memref<64x256xf32, affine_map<(d0, d1) -> (d1, d0)>>
       "prevent.dce"(%1) : (f32) -> ()
     }
   }
-  dealloc %A : memref<64x256xf32, affine_map<(d0, d1) -> (d1, d0)>>
+  memref.dealloc %A : memref<64x256xf32, affine_map<(d0, d1) -> (d1, d0)>>
   return
 }
 // The old memref alloc should disappear.
 // CHECK-NOT:  memref<64x256xf32>
-// CHECK:      [[MEM:%[0-9]+]] = alloc() : memref<256x64xf32>
+// CHECK:      [[MEM:%[0-9]+]] = memref.alloc() : memref<256x64xf32>
 // CHECK-NEXT: affine.for %[[I:arg[0-9]+]] = 0 to 64 {
 // CHECK-NEXT:   affine.for %[[J:arg[0-9]+]] = 0 to 256 {
 // CHECK-NEXT:     affine.load [[MEM]][%[[J]], %[[I]]] : memref<256x64xf32>
 // CHECK-NEXT:     "prevent.dce"
 // CHECK-NEXT:   }
 // CHECK-NEXT: }
-// CHECK-NEXT: dealloc [[MEM]]
+// CHECK-NEXT: memref.dealloc [[MEM]]
 // CHECK-NEXT: return
 
 // CHECK-LABEL: func @shift
 func @shift(%idx : index) {
-  // CHECK-NEXT: alloc() : memref<65xf32>
-  %A = alloc() : memref<64xf32, affine_map<(d0) -> (d0 + 1)>>
+  // CHECK-NEXT: memref.alloc() : memref<65xf32>
+  %A = memref.alloc() : memref<64xf32, affine_map<(d0) -> (d0 + 1)>>
   // CHECK-NEXT: affine.load %{{.*}}[symbol(%arg0) + 1] : memref<65xf32>
   affine.load %A[%idx] : memref<64xf32, affine_map<(d0) -> (d0 + 1)>>
   affine.for %i = 0 to 64 {
@@ -44,7 +44,7 @@
 // CHECK-LABEL: func @high_dim_permute()
 func @high_dim_permute() {
   // CHECK-NOT: memref<64x128x256xf32,
-  %A = alloc() : memref<64x128x256xf32, affine_map<(d0, d1, d2) -> (d2, d0, d1)>>
+  %A = memref.alloc() : memref<64x128x256xf32, affine_map<(d0, d1, d2) -> (d2, d0, d1)>>
   // CHECK: %[[I:arg[0-9]+]]
   affine.for %i = 0 to 64 {
     // CHECK: %[[J:arg[0-9]+]]
@@ -62,16 +62,16 @@
 
 // CHECK-LABEL: func @invalid_map
 func @invalid_map() {
-  %A = alloc() : memref<64x128xf32, affine_map<(d0, d1) -> (d0, -d1 - 10)>>
-  // CHECK: %{{.*}} = alloc() : memref<64x128xf32,
+  %A = memref.alloc() : memref<64x128xf32, affine_map<(d0, d1) -> (d0, -d1 - 10)>>
+  // CHECK: %{{.*}} = memref.alloc() : memref<64x128xf32,
   return
 }
 
 // A tiled layout.
 // CHECK-LABEL: func @data_tiling
 func @data_tiling(%idx : index) {
-  // CHECK: alloc() : memref<8x32x8x16xf32>
-  %A = alloc() : memref<64x512xf32, affine_map<(d0, d1) -> (d0 floordiv 8, d1 floordiv 16, d0 mod 8, d1 mod 16)>>
+  // CHECK: memref.alloc() : memref<8x32x8x16xf32>
+  %A = memref.alloc() : memref<64x512xf32, affine_map<(d0, d1) -> (d0 floordiv 8, d1 floordiv 16, d0 mod 8, d1 mod 16)>>
   // CHECK: affine.load %{{.*}}[symbol(%arg0) floordiv 8, symbol(%arg0) floordiv 16, symbol(%arg0) mod 8, symbol(%arg0) mod 16]
   %1 = affine.load %A[%idx, %idx] : memref<64x512xf32, affine_map<(d0, d1) -> (d0 floordiv 8, d1 floordiv 16, d0 mod 8, d1 mod 16)>>
   "prevent.dce"(%1) : (f32) -> ()
@@ -81,7 +81,7 @@
 // Strides 2 and 4 along respective dimensions.
 // CHECK-LABEL: func @strided
 func @strided() {
-  %A = alloc() : memref<64x128xf32, affine_map<(d0, d1) -> (2*d0, 4*d1)>>
+  %A = memref.alloc() : memref<64x128xf32, affine_map<(d0, d1) -> (2*d0, 4*d1)>>
   // CHECK: affine.for %[[IV0:.*]] =
   affine.for %i = 0 to 64 {
     // CHECK: affine.for %[[IV1:.*]] =
@@ -97,7 +97,7 @@
 // Strided, but the strides are in the linearized space.
 // CHECK-LABEL: func @strided_cumulative
 func @strided_cumulative() {
-  %A = alloc() : memref<2x5xf32, affine_map<(d0, d1) -> (3*d0 + 17*d1)>>
+  %A = memref.alloc() : memref<2x5xf32, affine_map<(d0, d1) -> (3*d0 + 17*d1)>>
   // CHECK: affine.for %[[IV0:.*]] =
   affine.for %i = 0 to 2 {
     // CHECK: affine.for %[[IV1:.*]] =
@@ -114,8 +114,8 @@
 // when the index remap has symbols.
 // CHECK-LABEL: func @symbolic_operands
 func @symbolic_operands(%s : index) {
-  // CHECK: alloc() : memref<100xf32>
-  %A = alloc()[%s] : memref<10x10xf32, affine_map<(d0,d1)[s0] -> (10*d0 + d1)>>
+  // CHECK: memref.alloc() : memref<100xf32>
+  %A = memref.alloc()[%s] : memref<10x10xf32, affine_map<(d0,d1)[s0] -> (10*d0 + d1)>>
   affine.for %i = 0 to 10 {
     affine.for %j = 0 to 10 {
       // CHECK: affine.load %{{.*}}[%{{.*}} * 10 + %{{.*}}] : memref<100xf32>
@@ -129,7 +129,7 @@
 // Semi-affine maps, normalization not implemented yet.
 // CHECK-LABEL: func @semi_affine_layout_map
 func @semi_affine_layout_map(%s0: index, %s1: index) {
-  %A = alloc()[%s0, %s1] : memref<256x1024xf32, affine_map<(d0, d1)[s0, s1] -> (d0*s0 + d1*s1)>>
+  %A = memref.alloc()[%s0, %s1] : memref<256x1024xf32, affine_map<(d0, d1)[s0, s1] -> (d0*s0 + d1*s1)>>
   affine.for %i = 0 to 256 {
     affine.for %j = 0 to 1024 {
       // CHECK: memref<256x1024xf32, #map{{[0-9]+}}>
@@ -141,8 +141,8 @@
 
 // CHECK-LABEL: func @alignment
 func @alignment() {
-  %A = alloc() {alignment = 32 : i64}: memref<64x128x256xf32, affine_map<(d0, d1, d2) -> (d2, d0, d1)>>
-  // CHECK-NEXT: alloc() {alignment = 32 : i64} : memref<256x64x128xf32>
+  %A = memref.alloc() {alignment = 32 : i64}: memref<64x128x256xf32, affine_map<(d0, d1, d2) -> (d2, d0, d1)>>
+  // CHECK-NEXT: memref.alloc() {alignment = 32 : i64} : memref<256x64x128xf32>
   return
 }
 
@@ -171,20 +171,20 @@
 // CHECK-LABEL: func @single_argument_type
 // CHECK-SAME: (%[[C:arg[0-9]+]]: memref<2x4xf64>)
 func @single_argument_type(%C : memref<8xf64, #tile>) {
-  %a = alloc(): memref<8xf64, #tile>
-  %b = alloc(): memref<16xf64, #tile>
+  %a = memref.alloc(): memref<8xf64, #tile>
+  %b = memref.alloc(): memref<16xf64, #tile>
   %d = constant 23.0 : f64
-  %e = alloc(): memref<24xf64>
+  %e = memref.alloc(): memref<24xf64>
   call @single_argument_type(%a): (memref<8xf64, #tile>) -> ()
   call @single_argument_type(%C): (memref<8xf64, #tile>) -> ()
   call @multiple_argument_type(%b, %d, %a, %e): (memref<16xf64, #tile>, f64, memref<8xf64, #tile>, memref<24xf64>) -> f64
   return
 }
 
-// CHECK: %[[a:[0-9]+]] = alloc() : memref<2x4xf64>
-// CHECK: %[[b:[0-9]+]] = alloc() : memref<4x4xf64>
+// CHECK: %[[a:[0-9]+]] = memref.alloc() : memref<2x4xf64>
+// CHECK: %[[b:[0-9]+]] = memref.alloc() : memref<4x4xf64>
 // CHECK: %cst = constant 2.300000e+01 : f64
-// CHECK: %[[e:[0-9]+]] = alloc() : memref<24xf64>
+// CHECK: %[[e:[0-9]+]] = memref.alloc() : memref<24xf64>
 // CHECK: call @single_argument_type(%[[a]]) : (memref<2x4xf64>) -> ()
 // CHECK: call @single_argument_type(%[[C]]) : (memref<2x4xf64>) -> ()
 // CHECK: call @multiple_argument_type(%[[b]], %cst, %[[a]], %[[e]]) : (memref<4x4xf64>, f64, memref<2x4xf64>, memref<24xf64>) -> f64
@@ -227,8 +227,8 @@
 // CHECK-LABEL: func @ret_single_argument_type
 // CHECK-SAME: (%[[C:arg[0-9]+]]: memref<2x4xf64>) -> (memref<4x4xf64>, memref<2x4xf64>)
 func @ret_single_argument_type(%C: memref<8xf64, #tile>) -> (memref<16xf64, #tile>, memref<8xf64, #tile>){
-  %a = alloc() : memref<8xf64, #tile>
-  %b = alloc() : memref<16xf64, #tile>
+  %a = memref.alloc() : memref<8xf64, #tile>
+  %b = memref.alloc() : memref<16xf64, #tile>
   %d = constant 23.0 : f64
   call @ret_single_argument_type(%a) : (memref<8xf64, #tile>) -> (memref<16xf64, #tile>, memref<8xf64, #tile>)
   call @ret_single_argument_type(%C) : (memref<8xf64, #tile>) -> (memref<16xf64, #tile>, memref<8xf64, #tile>)
@@ -237,8 +237,8 @@
   return %b, %a: memref<16xf64, #tile>, memref<8xf64, #tile>
 }
 
-// CHECK: %[[a:[0-9]+]] = alloc() : memref<2x4xf64>
-// CHECK: %[[b:[0-9]+]] = alloc() : memref<4x4xf64>
+// CHECK: %[[a:[0-9]+]] = memref.alloc() : memref<2x4xf64>
+// CHECK: %[[b:[0-9]+]] = memref.alloc() : memref<4x4xf64>
 // CHECK: %cst = constant 2.300000e+01 : f64
 // CHECK: %[[resA:[0-9]+]]:2 = call @ret_single_argument_type(%[[a]]) : (memref<2x4xf64>) -> (memref<4x4xf64>, memref<2x4xf64>)
 // CHECK: %[[resB:[0-9]+]]:2 = call @ret_single_argument_type(%[[C]]) : (memref<2x4xf64>) -> (memref<4x4xf64>, memref<2x4xf64>)
@@ -304,11 +304,11 @@
 
 // CHECK-LABEL: func @simply_call_external()
 func @simply_call_external() {
-  %a = alloc() : memref<16xf64, #tile>
+  %a = memref.alloc() : memref<16xf64, #tile>
   call @external_func_A(%a) : (memref<16xf64, #tile>) -> ()
   return
 }
-// CHECK: %[[a:[0-9]+]] = alloc() : memref<4x4xf64>
+// CHECK: %[[a:[0-9]+]] = memref.alloc() : memref<4x4xf64>
 // CHECK: call @external_func_A(%[[a]]) : (memref<4x4xf64>) -> ()
 
 // CHECK-LABEL: func @use_value_of_external
@@ -323,7 +323,7 @@
 // CHECK-LABEL: func @affine_parallel_norm
 func @affine_parallel_norm() ->  memref<8xf32, #tile> {
   %c = constant 23.0 : f32
-  %a = alloc() : memref<8xf32, #tile>
+  %a = memref.alloc() : memref<8xf32, #tile>
   // CHECK: affine.parallel (%{{.*}}) = (0) to (8) reduce ("assign") -> (memref<2x4xf32>)
   %1 = affine.parallel (%i) = (0) to (8) reduce ("assign") ->  memref<8xf32, #tile> {
     affine.store %c, %a[%i] : memref<8xf32, #tile>
diff --git a/mlir/test/Transforms/parametric-tiling.mlir b/mlir/test/Transforms/parametric-tiling.mlir
--- a/mlir/test/Transforms/parametric-tiling.mlir
+++ b/mlir/test/Transforms/parametric-tiling.mlir
@@ -57,9 +57,9 @@
     // TILE_74:scf.for %[[jj:.*]] = %[[j]] to %[[ub2]] step %c2
    scf.for %j = %c1 to %c44 step %c2 {
       // The right iterator are used.
-      // TILE_7:  load %arg0[%[[ii]], %[[j]]]
-      // TILE_74: load %arg0[%[[ii]], %[[jj]]]
-      load %arg0[%i, %j]: memref<?x?xf32>
+      // TILE_7:  memref.load %arg0[%[[ii]], %[[j]]]
+      // TILE_74: memref.load %arg0[%[[ii]], %[[jj]]]
+      memref.load %arg0[%i, %j]: memref<?x?xf32>
     }
   }
   return
@@ -124,9 +124,9 @@
     // TILE_74:scf.for %[[jj:.*]] = %[[j]] to %[[ub2]] step %c2
    scf.for %j = %c1 to %i step %c2 {
       // The right iterator are used.
-      // TILE_7:  load %arg0[%[[ii]], %[[j]]]
-      // TILE_74: load %arg0[%[[ii]], %[[jj]]]
-      load %arg0[%i, %j]: memref<?x?xf32>
+      // TILE_7:  memref.load %arg0[%[[ii]], %[[j]]]
+      // TILE_74: memref.load %arg0[%[[ii]], %[[jj]]]
+      memref.load %arg0[%i, %j]: memref<?x?xf32>
     }
   }
   return
diff --git a/mlir/test/Transforms/pipeline-data-transfer.mlir b/mlir/test/Transforms/pipeline-data-transfer.mlir
--- a/mlir/test/Transforms/pipeline-data-transfer.mlir
+++ b/mlir/test/Transforms/pipeline-data-transfer.mlir
@@ -8,10 +8,10 @@
 // CHECK-LABEL: func @loop_nest_dma() {
 func @loop_nest_dma() {
 
-  %A = alloc() : memref<256 x f32, affine_map<(d0) -> (d0)>, 0>
-  %Ah = alloc() : memref<32 x f32, affine_map<(d0) -> (d0)>, 1>
+  %A = memref.alloc() : memref<256 x f32, affine_map<(d0) -> (d0)>, 0>
+  %Ah = memref.alloc() : memref<32 x f32, affine_map<(d0) -> (d0)>, 1>
 
-  %tag = alloc() : memref<1 x f32>
+  %tag = memref.alloc() : memref<1 x f32>
 
   %zero = constant 0 : index
   %num_elts = constant 32 : index
@@ -26,13 +26,13 @@
       "do_more_compute"(%i, %j) : (index, index) -> ()
     }
   }
-  dealloc %tag : memref<1 x f32>
-  dealloc %Ah : memref<32 x f32, affine_map<(d0) -> (d0)>, 1>
+  memref.dealloc %tag : memref<1 x f32>
+  memref.dealloc %Ah : memref<32 x f32, affine_map<(d0) -> (d0)>, 1>
   return
 }
-// CHECK:       %{{.*}} = alloc() : memref<256xf32>
-// CHECK:       %{{.*}} = alloc() : memref<2x32xf32, 1>
-// CHECK-NEXT:  %{{.*}} = alloc() : memref<2x1xf32>
+// CHECK:       %{{.*}} = memref.alloc() : memref<256xf32>
+// CHECK:       %{{.*}} = memref.alloc() : memref<2x32xf32, 1>
+// CHECK-NEXT:  %{{.*}} = memref.alloc() : memref<2x1xf32>
 // CHECK-NEXT:  affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}} mod 2, %{{.*}}], %{{.*}}[%{{.*}} mod 2, 0], %{{.*}} : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
 // CHECK-NEXT:  affine.for %{{.*}} = 1 to 8 {
 // CHECK-NEXT:    affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}} mod 2, %{{.*}}], %{{.*}}[%{{.*}} mod 2, 0], %{{.*}} : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
@@ -57,8 +57,8 @@
 // CHECK-NEXT:  affine.for %{{.*}} = 0 to 32 {
 // CHECK-NEXT:    "do_more_compute"(%{{.*}}, %{{.*}}) : (index, index) -> ()
 // CHECK-NEXT:  }
-// CHECK-NEXT:  dealloc %{{.*}} : memref<2x1xf32>
-// CHECK-NEXT:  dealloc %{{.*}} : memref<2x32xf32, 1>
+// CHECK-NEXT:  memref.dealloc %{{.*}} : memref<2x1xf32>
+// CHECK-NEXT:  memref.dealloc %{{.*}} : memref<2x32xf32, 1>
 // CHECK-NEXT:  return
 // CHECK-NEXT:}
 
@@ -73,19 +73,19 @@
   %c0 = constant 0 : index
   %c4 = constant 4 : index
   affine.for %i0 = 0 to 512 step 4 {
-    %1 = alloc() : memref<4xf32, 1>
-    %2 = alloc() : memref<1xi32>
+    %1 = memref.alloc() : memref<4xf32, 1>
+    %2 = memref.alloc() : memref<1xi32>
     affine.dma_start %arg0[%i0], %1[%c0], %2[%c0], %c4,
               : memref<512xf32>, memref<4xf32, 1>, memref<1xi32>
     affine.dma_wait %2[%c0], %c4 : memref<1xi32>
     "compute"(%i0) : (index) -> ()
-    dealloc %2 : memref<1xi32>
-    dealloc %1 : memref<4xf32, 1>
+    memref.dealloc %2 : memref<1xi32>
+    memref.dealloc %1 : memref<4xf32, 1>
   }
   return
 }
-// CHECK:        [[BUF:%[0-9]+]] = alloc() : memref<2x4xf32, 1>
-// CHECK:        [[TAG:%[0-9]+]] = alloc() : memref<2x1xi32>
+// CHECK:        [[BUF:%[0-9]+]] = memref.alloc() : memref<2x4xf32, 1>
+// CHECK:        [[TAG:%[0-9]+]] = memref.alloc() : memref<2x1xi32>
 // CHECK-NEXT:   affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[(%{{.*}} floordiv 4) mod 2, 0], [[TAG]][(%{{.*}} floordiv 4) mod 2, 0], %{{.*}} : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
 // CHECK-NEXT:   affine.for %{{.*}} = 4 to 512 step 4 {
 // CHECK-NEXT:     affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[(%{{.*}} floordiv 4) mod 2, 0], [[TAG]][(%{{.*}} floordiv 4) mod 2, 0], %{{.*}} : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
@@ -98,8 +98,8 @@
 // CHECK-NEXT:   %{{.*}} = affine.apply [[$FLOOR_MOD_2]]([[SHIFTED]])
 // CHECK:        affine.dma_wait [[TAG]][(%{{.*}} floordiv 4) mod 2, 0], %{{.*}} : memref<2x1xi32>
 // CHECK-NEXT:   "compute"(%{{.*}}) : (index) -> ()
-// CHECK-NEXT:   dealloc [[TAG]] : memref<2x1xi32>
-// CHECK-NEXT:   dealloc [[BUF]] : memref<2x4xf32, 1>
+// CHECK-NEXT:   memref.dealloc [[TAG]] : memref<2x1xi32>
+// CHECK-NEXT:   memref.dealloc [[BUF]] : memref<2x4xf32, 1>
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 
@@ -111,15 +111,15 @@
 func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>>, %arg1: memref<512x32xvector<8xf32>>, %arg2: memref<512x32xvector<8xf32>>) {
   %num_elts = constant 256 : index
   %c0 = constant 0 : index
-  %0 = alloc() : memref<64x4xvector<8xf32>, 2>
-  %1 = alloc() : memref<64x4xvector<8xf32>, 2>
-  %2 = alloc() : memref<64x4xvector<8xf32>, 2>
-  %3 = alloc() : memref<2xi32>
-  %4 = alloc() : memref<2xi32>
-  %5 = alloc() : memref<2xi32>
+  %0 = memref.alloc() : memref<64x4xvector<8xf32>, 2>
+  %1 = memref.alloc() : memref<64x4xvector<8xf32>, 2>
+  %2 = memref.alloc() : memref<64x4xvector<8xf32>, 2>
+  %3 = memref.alloc() : memref<2xi32>
+  %4 = memref.alloc() : memref<2xi32>
+  %5 = memref.alloc() : memref<2xi32>
   // Prologue for DMA overlap on arg2.
-  // CHECK-DAG: [[BUF_ARG2:%[0-9]+]] = alloc() : memref<2x64x4xvector<8xf32>, 2>
-  // CHECK-DAG: [[TAG_ARG2:%[0-9]+]] = alloc() : memref<2x2xi32>
+  // CHECK-DAG: [[BUF_ARG2:%[0-9]+]] = memref.alloc() : memref<2x64x4xvector<8xf32>, 2>
+  // CHECK-DAG: [[TAG_ARG2:%[0-9]+]] = memref.alloc() : memref<2x2xi32>
   // CHECK: affine.dma_start %{{.*}}[
   // CHECK: affine.for %{{.*}} = 1 to 8 {
   affine.for %i0 = 0 to 8 {
@@ -130,10 +130,10 @@
     // CHECK: affine.dma_start %{{.*}}[
     // CHECK: affine.dma_wait [[TAG_ARG2]]
     // Prologue for DMA overlap on arg0, arg1 nested within i0
-    // CHECK: [[BUF_ARG0:%[0-9]+]] = alloc() : memref<2x64x4xvector<8xf32>, 2>
-    // CHECK: [[BUF_ARG1:%[0-9]+]] = alloc() : memref<2x64x4xvector<8xf32>, 2>
-    // CHECK: [[TAG_ARG0:%[0-9]+]] = alloc() : memref<2x2xi32>
-    // CHECK: [[TAG_ARG1:%[0-9]+]] = alloc() : memref<2x2xi32>
+    // CHECK: [[BUF_ARG0:%[0-9]+]] = memref.alloc() : memref<2x64x4xvector<8xf32>, 2>
+    // CHECK: [[BUF_ARG1:%[0-9]+]] = memref.alloc() : memref<2x64x4xvector<8xf32>, 2>
+    // CHECK: [[TAG_ARG0:%[0-9]+]] = memref.alloc() : memref<2x2xi32>
+    // CHECK: [[TAG_ARG1:%[0-9]+]] = memref.alloc() : memref<2x2xi32>
     // CHECK: affine.dma_start %{{.*}}[
     // CHECK: affine.dma_start %{{.*}}[
     // CHECK-NEXT: affine.for %{{.*}} = 1 to 8 {
@@ -157,17 +157,17 @@
     // epilogue for arg0, arg1
     // CHECK: affine.dma_wait [[TAG_ARG0]]
     // CHECK: affine.dma_wait [[TAG_ARG1]]
-    // CHECK-DAG:    dealloc [[TAG_ARG1]] : memref<2x2xi32>
-    // CHECK-DAG:    dealloc [[TAG_ARG0]] : memref<2x2xi32>
-    // CHECK-DAG:    dealloc [[BUF_ARG1]] : memref<2x64x4xvector<8xf32>, 2>
-    // CHECK-DAG:    dealloc [[BUF_ARG0]] : memref<2x64x4xvector<8xf32>, 2>
+    // CHECK-DAG:    memref.dealloc [[TAG_ARG1]] : memref<2x2xi32>
+    // CHECK-DAG:    memref.dealloc [[TAG_ARG0]] : memref<2x2xi32>
+    // CHECK-DAG:    memref.dealloc [[BUF_ARG1]] : memref<2x64x4xvector<8xf32>, 2>
+    // CHECK-DAG:    memref.dealloc [[BUF_ARG0]] : memref<2x64x4xvector<8xf32>, 2>
   // epilogue for DMA overlap on %arg2
   // CHECK:  affine.dma_wait [[TAG_ARG2]]
   // Within the epilogue for arg2's DMA, we have the DMAs on %arg1, %arg2 nested.
-  // CHECK: [[BUF_ARG0_NESTED:%[0-9]+]] = alloc() : memref<2x64x4xvector<8xf32>, 2>
-  // CHECK: [[BUF_ARG1_NESTED:%[0-9]+]] = alloc() : memref<2x64x4xvector<8xf32>, 2>
-  // CHECK: [[TAG_ARG0_NESTED:%[0-9]+]] = alloc() : memref<2x2xi32>
-  // CHECK: [[TAG_ARG1_NESTED:%[0-9]+]] = alloc() : memref<2x2xi32>
+  // CHECK: [[BUF_ARG0_NESTED:%[0-9]+]] = memref.alloc() : memref<2x64x4xvector<8xf32>, 2>
+  // CHECK: [[BUF_ARG1_NESTED:%[0-9]+]] = memref.alloc() : memref<2x64x4xvector<8xf32>, 2>
+  // CHECK: [[TAG_ARG0_NESTED:%[0-9]+]] = memref.alloc() : memref<2x2xi32>
+  // CHECK: [[TAG_ARG1_NESTED:%[0-9]+]] = memref.alloc() : memref<2x2xi32>
   // CHECK:  affine.dma_start %{{.*}}[
   // CHECK:  affine.dma_start %{{.*}}[
   // CHECK:  affine.for %{{.*}} = 1 to 8 {
@@ -181,20 +181,20 @@
   // CHECK:  affine.dma_wait [[TAG_ARG1_NESTED]]
   // CHECK:  affine.for %{{.*}} = 0 to 4 {
   }
-  dealloc %5 : memref<2xi32>
-  dealloc %4 : memref<2xi32>
-  dealloc %3 : memref<2xi32>
-  dealloc %2 : memref<64x4xvector<8xf32>, 2>
-  dealloc %1 : memref<64x4xvector<8xf32>, 2>
-  dealloc %0 : memref<64x4xvector<8xf32>, 2>
+  memref.dealloc %5 : memref<2xi32>
+  memref.dealloc %4 : memref<2xi32>
+  memref.dealloc %3 : memref<2xi32>
+  memref.dealloc %2 : memref<64x4xvector<8xf32>, 2>
+  memref.dealloc %1 : memref<64x4xvector<8xf32>, 2>
+  memref.dealloc %0 : memref<64x4xvector<8xf32>, 2>
   return
 // CHECK: }
-// CHECK-DAG:  dealloc [[TAG_ARG1_NESTED]] : memref<2x2xi32>
-// CHECK-DAG:  dealloc [[TAG_ARG0_NESTED]] : memref<2x2xi32>
-// CHECK-DAG:  dealloc [[BUF_ARG1_NESTED]] : memref<2x64x4xvector<8xf32>, 2>
-// CHECK-DAG:  dealloc [[BUF_ARG0_NESTED]] : memref<2x64x4xvector<8xf32>, 2>
-// CHECK-DAG:  dealloc [[TAG_ARG2]] : memref<2x2xi32>
-// CHECK-DAG:  dealloc [[BUF_ARG2]] : memref<2x64x4xvector<8xf32>, 2>
+// CHECK-DAG:  memref.dealloc [[TAG_ARG1_NESTED]] : memref<2x2xi32>
+// CHECK-DAG:  memref.dealloc [[TAG_ARG0_NESTED]] : memref<2x2xi32>
+// CHECK-DAG:  memref.dealloc [[BUF_ARG1_NESTED]] : memref<2x64x4xvector<8xf32>, 2>
+// CHECK-DAG:  memref.dealloc [[BUF_ARG0_NESTED]] : memref<2x64x4xvector<8xf32>, 2>
+// CHECK-DAG:  memref.dealloc [[TAG_ARG2]] : memref<2x2xi32>
+// CHECK-DAG:  memref.dealloc [[BUF_ARG2]] : memref<2x64x4xvector<8xf32>, 2>
 // CHECK-NEXT: return
 }
 
@@ -205,12 +205,12 @@
 func @loop_dma_dependent(%arg2: memref<512x32xvector<8xf32>>) {
   %num_elts = constant 256 : index
   %c0 = constant 0 : index
-  %0 = alloc() : memref<64x4xvector<8xf32>, 2>
-  %1 = alloc() : memref<64x4xvector<8xf32>, 2>
-  %2 = alloc() : memref<64x4xvector<8xf32>, 2>
-  %3 = alloc() : memref<2xi32>
-  %4 = alloc() : memref<2xi32>
-  %5 = alloc() : memref<2xi32>
+  %0 = memref.alloc() : memref<64x4xvector<8xf32>, 2>
+  %1 = memref.alloc() : memref<64x4xvector<8xf32>, 2>
+  %2 = memref.alloc() : memref<64x4xvector<8xf32>, 2>
+  %3 = memref.alloc() : memref<2xi32>
+  %4 = memref.alloc() : memref<2xi32>
+  %5 = memref.alloc() : memref<2xi32>
 
   // The two DMAs below are dependent (incoming and outgoing on the same
   // memref) in the same iteration; so no pipelining here.
@@ -224,12 +224,12 @@
     affine.dma_start %2[%c0, %c0], %arg2[%6, %c0], %5[%c0], %num_elts : memref<64x4xvector<8xf32>, 2>, memref<512x32xvector<8xf32>>, memref<2xi32>
     affine.dma_wait %5[%c0], %num_elts : memref<2xi32>
   }
-  dealloc %5 : memref<2xi32>
-  dealloc %4 : memref<2xi32>
-  dealloc %3 : memref<2xi32>
-  dealloc %2 : memref<64x4xvector<8xf32>, 2>
-  dealloc %1 : memref<64x4xvector<8xf32>, 2>
-  dealloc %0 : memref<64x4xvector<8xf32>, 2>
+  memref.dealloc %5 : memref<2xi32>
+  memref.dealloc %4 : memref<2xi32>
+  memref.dealloc %3 : memref<2xi32>
+  memref.dealloc %2 : memref<64x4xvector<8xf32>, 2>
+  memref.dealloc %1 : memref<64x4xvector<8xf32>, 2>
+  memref.dealloc %0 : memref<64x4xvector<8xf32>, 2>
   return
 }
 
@@ -240,8 +240,8 @@
   %c32 = constant 32 : index
   %num_elt = constant 512 : index
   %zero = constant 0 : index
-  %Av = alloc() : memref<32 x 32 x f32, 2>
-  %tag = alloc() : memref<1 x i32>
+  %Av = memref.alloc() : memref<32 x 32 x f32, 2>
+  %tag = memref.alloc() : memref<1 x i32>
 
   // CHECK-NOT: affine.dma_start
   // CHECK: affine.for %{{.*}} = 0 to 16 {
@@ -253,8 +253,8 @@
     // escaping use; no DMA pipelining / double buffering will be done.
     "foo"(%Av) : (memref<32 x 32 x f32, 2>) -> ()
   }
-  dealloc %tag : memref<1 x i32>
-  dealloc %Av : memref<32 x 32 x f32, 2>
+  memref.dealloc %tag : memref<1 x i32>
+  memref.dealloc %Av : memref<32 x 32 x f32, 2>
   return
 // CHECK:        "foo"(%{{[0-9]+}}) : (memref<32x32xf32, 2>) -> ()
 // CHECK:      }
@@ -268,8 +268,8 @@
   %c32 = constant 32 : index
   %num_elt = constant 512 : index
   %zero = constant 0 : index
-  %Av = alloc() : memref<32 x 32 x f32, 2>
-  %tag = alloc() : memref<1 x i32>
+  %Av = memref.alloc() : memref<32 x 32 x f32, 2>
+  %tag = memref.alloc() : memref<1 x i32>
 
   // CHECK-NOT: affine.dma_start
   // CHECK: affine.for %{{.*}} = 0 to 16 {
@@ -281,8 +281,8 @@
     // escaping use; no DMA pipelining / double buffering will be done.
     "foo"(%tag) : (memref<1 x i32>) -> ()
   }
-  dealloc %tag : memref<1 x i32>
-  dealloc %Av : memref<32 x 32 x f32, 2>
+  memref.dealloc %tag : memref<1 x i32>
+  memref.dealloc %Av : memref<32 x 32 x f32, 2>
   return
 // CHECK:        "foo"(%{{[0-9]+}}) : (memref<1xi32>) -> ()
 // CHECK:      }
@@ -297,8 +297,8 @@
   %c32 = constant 32 : index
   %num_elt = constant 512 : index
   %zero = constant 0 : index
-  %Av = alloc() : memref<32 x 32 x f32, 2>
-  %tag = alloc() : memref<1 x i32>
+  %Av = memref.alloc() : memref<32 x 32 x f32, 2>
+  %tag = memref.alloc() : memref<1 x i32>
 
   // CHECK-NOT: affine.dma_start
   // CHECK: affine.for %{{.*}} = 0 to 16 {
@@ -310,8 +310,8 @@
   }
   // Use live out of 'affine.for' op; no DMA pipelining will be done.
   %v = affine.load %Av[%zero, %zero] : memref<32 x 32 x f32, 2>
-  dealloc %tag : memref<1 x i32>
-  dealloc %Av : memref<32 x 32 x f32, 2>
+  memref.dealloc %tag : memref<1 x i32>
+  memref.dealloc %Av : memref<32 x 32 x f32, 2>
   return %v : f32
 // CHECK:      affine.load %{{[0-9]+}}[%{{.*}}, %{{.*}}] : memref<32x32xf32, 2>
 // CHECK:      return
@@ -325,16 +325,16 @@
   %num_elt = constant 512 : index
   %zero = constant 0 : index
 
-  %Av = alloc(%c32, %c32) : memref<? x ? x f32, 2>
-  %tag = alloc() : memref<1 x i32>
+  %Av = memref.alloc(%c32, %c32) : memref<? x ? x f32, 2>
+  %tag = memref.alloc() : memref<1 x i32>
 
 // Double buffering for dynamic shaped buffer.
-// CHECK:       alloc(%{{.*}}, %{{.*}}) : memref<?x?xf32, 2>
+// CHECK:       memref.alloc(%{{.*}}, %{{.*}}) : memref<?x?xf32, 2>
 // CHECK-NEXT:  %[[C0:.*]] = constant 0 : index
-// CHECK-NEXT:  dim %{{.*}}, %[[C0]] : memref<?x?xf32, 2>
+// CHECK-NEXT:  memref.dim %{{.*}}, %[[C0]] : memref<?x?xf32, 2>
 // CHECK-NEXT:  %[[C1:.*]] = constant 1 : index
-// CHECK-NEXT:  dim %{{.*}}, %[[C1]] : memref<?x?xf32, 2>
-// CHECK-NEXT:  alloc(%{{.*}}, %{{.*}}) : memref<2x?x?xf32, 2>
+// CHECK-NEXT:  memref.dim %{{.*}}, %[[C1]] : memref<?x?xf32, 2>
+// CHECK-NEXT:  memref.alloc(%{{.*}}, %{{.*}}) : memref<2x?x?xf32, 2>
 // CHECK:       affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}} mod 2, 0, 0], %{{.*}}[%{{.*}} mod 2, 0], %{{.*}}
   affine.for %kTT = 0 to 16 {
     affine.dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %tag[%zero], %num_elt :
@@ -342,7 +342,7 @@
       memref<? x ? x f32, 2>, memref<1 x i32>
     affine.dma_wait %tag[%zero], %num_elt : memref<1 x i32>
   }
-  dealloc %Av : memref<? x ? x f32, 2>
+  memref.dealloc %Av : memref<? x ? x f32, 2>
   return
 // CHECK-NEXT:  affine.for %{{.*}} = 1 to 16 {
 // CHECK:         affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}} mod 2, 0, 0], %{{.*}}[%{{.*}} mod 2, 0], %{{.*}}
@@ -358,9 +358,9 @@
 // before performing any replacement.
 // CHECK-LABEL: func @escaping_and_indexed_use_mix
 func @escaping_and_indexed_use_mix() {
-  %A = alloc() : memref<256 x f32, affine_map<(d0) -> (d0)>, 0>
-  %Ah = alloc() : memref<32 x f32, affine_map<(d0) -> (d0)>, 1>
-  %tag = alloc() : memref<1 x f32>
+  %A = memref.alloc() : memref<256 x f32, affine_map<(d0) -> (d0)>, 0>
+  %Ah = memref.alloc() : memref<32 x f32, affine_map<(d0) -> (d0)>, 1>
+  %tag = memref.alloc() : memref<1 x f32>
   %zero = constant 0 : index
   %num_elts = constant 32 : index
 
@@ -372,8 +372,8 @@
     %v = affine.load %Ah[%i] : memref<32 x f32, affine_map<(d0) -> (d0)>, 1>
     "foo"(%v) : (f32) -> ()
   }
-  dealloc %A : memref<256 x f32, affine_map<(d0) -> (d0)>, 0>
-  dealloc %Ah : memref<32 x f32, affine_map<(d0) -> (d0)>, 1>
+  memref.dealloc %A : memref<256 x f32, affine_map<(d0) -> (d0)>, 0>
+  memref.dealloc %Ah : memref<32 x f32, affine_map<(d0) -> (d0)>, 1>
   return
 }
 // No replacement.
diff --git a/mlir/test/Transforms/promote-buffers-to-stack.mlir b/mlir/test/Transforms/promote-buffers-to-stack.mlir
--- a/mlir/test/Transforms/promote-buffers-to-stack.mlir
+++ b/mlir/test/Transforms/promote-buffers-to-stack.mlir
@@ -21,7 +21,7 @@
 ^bb1:
   br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
@@ -31,7 +31,7 @@
 
 // CHECK-NEXT: cond_br {{.*}}
 //      CHECK: ^bb2
-// CHECK-NEXT: %[[ALLOCA:.*]] = alloca()
+// CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca()
 //      CHECK: test.copy
 // CHECK-NEXT: return
 
@@ -56,7 +56,7 @@
 ^bb1:
   br ^bb3(%arg1 : memref<?xf32>)
 ^bb2(%0: index):
-  %1 = alloc(%0) : memref<?xf32>
+  %1 = memref.alloc(%0) : memref<?xf32>
   test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>)
   br ^bb3(%1 : memref<?xf32>)
 ^bb3(%2: memref<?xf32>):
@@ -67,7 +67,7 @@
 // CHECK-NEXT: cond_br
 //      CHECK: ^bb2
 //      CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
-// CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[IDX]])
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc(%[[IDX]])
 // CHECK-NEXT: test.buffer_based
 //      CHECK: br ^bb3
 // CHECK-NEXT: ^bb3(%[[ALLOC0:.*]]:{{.*}})
@@ -79,35 +79,35 @@
 // CHECK-LABEL: func @dynamicRanked
 func @dynamicRanked(%tensor: tensor<*xf32>) {
   %0 = rank %tensor : tensor<*xf32>
-  %1 = alloc(%0) : memref<?xindex>
+  %1 = memref.alloc(%0) : memref<?xindex>
   return
 }
 
 // CHECK-NEXT: %[[RANK:.*]] = rank
-// CHECK-NEXT: %[[ALLOCA:.*]] = alloca(%[[RANK]])
+// CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca(%[[RANK]])
 
 // -----
 
 // CHECK-LABEL: func @dynamicRanked2D
 func @dynamicRanked2D(%tensor: tensor<*xf32>) {
   %0 = rank %tensor : tensor<*xf32>
-  %1 = alloc(%0, %0) : memref<?x?xindex>
+  %1 = memref.alloc(%0, %0) : memref<?x?xindex>
   return
 }
 
 // CHECK-NEXT: %[[RANK:.*]] = rank
-//  RANK-NEXT: %[[ALLOC:.*]] = alloca(%[[RANK]], %[[RANK]])
-// DEFINDEX-NEXT: %[[ALLOC:.*]] = alloc(%[[RANK]], %[[RANK]])
+//  RANK-NEXT: %[[ALLOC:.*]] = memref.alloca(%[[RANK]], %[[RANK]])
+// DEFINDEX-NEXT: %[[ALLOC:.*]] = memref.alloc(%[[RANK]], %[[RANK]])
 
 // -----
 
 // CHECK-LABEL: func @dynamicNoRank
 func @dynamicNoRank(%arg0: index) {
-  %0 = alloc(%arg0) : memref<?xindex>
+  %0 = memref.alloc(%arg0) : memref<?xindex>
   return
 }
 
-// CHECK-NEXT: %[[ALLOC:.*]] = alloc
+// CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc
 
 // -----
 
@@ -117,10 +117,10 @@
 
 // CHECK-LABEL: func @emptyUsesValue
 func @emptyUsesValue(%arg0: memref<4xf32>) {
-  %0 = alloc() : memref<4xf32>
+  %0 = memref.alloc() : memref<4xf32>
   return
 }
-// CHECK-NEXT: %[[ALLOCA:.*]] = alloca()
+// CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca()
 // CHECK-NEXT: return
 
 // -----
@@ -138,7 +138,7 @@
 func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
 ^bb1:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   br ^bb2(%0 : memref<2xf32>)
 ^bb2(%1: memref<2xf32>):
@@ -148,7 +148,7 @@
 
 // CHECK-NEXT: cond_br {{.*}}
 //      CHECK: ^bb1
-// CHECK-NEXT: %[[ALLOCA:.*]] = alloca()
+// CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca()
 //      CHECK: test.copy
 // CHECK-NEXT: return
 
@@ -164,7 +164,7 @@
 
 // CHECK-LABEL: func @invCriticalEdge
 func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
 ^bb1:
@@ -174,7 +174,7 @@
   return
 }
 
-// CHECK-NEXT: %[[ALLOCA:.*]] = alloca()
+// CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca()
 //      CHECK: cond_br
 //      CHECK: test.copy
 // CHECK-NEXT: return
@@ -191,7 +191,7 @@
 
 // CHECK-LABEL: func @ifElse
 func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
@@ -201,15 +201,15 @@
 ^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
   br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
 ^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
-  %7 = alloc() : memref<2xf32>
+  %7 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>)
   test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: %[[ALLOCA0:.*]] = alloca()
+// CHECK-NEXT: %[[ALLOCA0:.*]] = memref.alloca()
 // CHECK-NEXT: test.buffer_based
-//      CHECK: %[[ALLOCA1:.*]] = alloca()
+//      CHECK: %[[ALLOCA1:.*]] = memref.alloca()
 //      CHECK: test.buffer_based
 //      CHECK: test.copy(%[[ALLOCA1]]
 // CHECK-NEXT: return
@@ -226,7 +226,7 @@
 
 // CHECK-LABEL: func @ifElseNoUsers
 func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
@@ -240,7 +240,7 @@
   return
 }
 
-// CHECK-NEXT: %[[ALLOCA:.*]] = alloca()
+// CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca()
 //      CHECK: return
 
 // -----
@@ -259,7 +259,7 @@
 
 // CHECK-LABEL: func @ifElseNested
 func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
   cond_br %arg0,
     ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
@@ -273,15 +273,15 @@
 ^bb4(%6: memref<2xf32>):
   br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
 ^bb5(%7: memref<2xf32>, %8: memref<2xf32>):
-  %9 = alloc() : memref<2xf32>
+  %9 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>)
   test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: %[[ALLOCA0:.*]] = alloca()
+// CHECK-NEXT: %[[ALLOCA0:.*]] = memref.alloca()
 // CHECK-NEXT: test.buffer_based
-//      CHECK: %[[ALLOCA1:.*]] = alloca()
+//      CHECK: %[[ALLOCA1:.*]] = memref.alloca()
 //      CHECK: test.buffer_based
 //      CHECK: test.copy(%[[ALLOCA1]]
 // CHECK-NEXT: return
@@ -294,17 +294,17 @@
 
 // CHECK-LABEL: func @redundantOperations
 func @redundantOperations(%arg0: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
-  %1 = alloc() : memref<2xf32>
+  %1 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%0: memref<2xf32>) out(%1: memref<2xf32>)
   return
 }
 
 //      CHECK: (%[[ARG0:.*]]: {{.*}})
-// CHECK-NEXT: %[[ALLOCA0:.*]] = alloca()
+// CHECK-NEXT: %[[ALLOCA0:.*]] = memref.alloca()
 // CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}} out(%[[ALLOCA0]]
-//      CHECK: %[[ALLOCA1:.*]] = alloca()
+//      CHECK: %[[ALLOCA1:.*]] = memref.alloca()
 // CHECK-NEXT: test.buffer_based in(%[[ALLOCA0]]{{.*}} out(%[[ALLOCA1]]
 //      CHECK: return
 
@@ -326,11 +326,11 @@
     %arg1: memref<2xf32>) {
   cond_br %cond, ^bb1, ^bb2
 ^bb1:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
   br ^exit(%0 : memref<2xf32>)
 ^bb2:
-  %1 = alloc() : memref<2xf32>
+  %1 = memref.alloc() : memref<2xf32>
   test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>)
   br ^exit(%1 : memref<2xf32>)
 ^exit(%arg2: memref<2xf32>):
@@ -340,9 +340,9 @@
 
 // CHECK-NEXT: cond_br {{.*}}
 //      CHECK: ^bb1
-// CHECK-NEXT: %{{.*}} = alloca()
+// CHECK-NEXT: %{{.*}} = memref.alloca()
 //      CHECK: ^bb2
-// CHECK-NEXT: %{{.*}} = alloca()
+// CHECK-NEXT: %{{.*}} = memref.alloca()
 //      CHECK: test.copy
 // CHECK-NEXT: return
 
@@ -362,10 +362,10 @@
 ^bb1:
   br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
   ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
-    %1 = alloc() : memref<2xf32>
+    %1 = memref.alloc() : memref<2xf32>
     test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>)
     %tmp1 = math.exp %gen1_arg0 : f32
     test.region_yield %tmp1 : f32
@@ -378,9 +378,9 @@
 
 // CHECK-NEXT:   cond_br {{.*}}
 //      CHECK:   ^bb2
-// CHECK-NEXT:   %[[ALLOCA0:.*]] = alloca()
+// CHECK-NEXT:   %[[ALLOCA0:.*]] = memref.alloca()
 //      CHECK:   ^bb0
-// CHECK-NEXT:   %[[ALLOCA1:.*]] = alloc()
+// CHECK-NEXT:   %[[ALLOCA1:.*]] = memref.alloc()
 
 // -----
 
@@ -394,16 +394,16 @@
   %arg0: memref<5xf32>,
   %arg1: memref<10xf32>,
   %arg2: memref<5xf32>) -> (memref<10xf32>, memref<15xf32>) {
-  %x = alloc() : memref<15xf32>
-  %y = alloc() : memref<5xf32>
+  %x = memref.alloc() : memref<15xf32>
+  %y = memref.alloc() : memref<5xf32>
   test.buffer_based in(%arg0: memref<5xf32>) out(%y: memref<5xf32>)
   test.copy(%y, %arg2) : (memref<5xf32>, memref<5xf32>)
   return %arg1, %x : memref<10xf32>, memref<15xf32>
 }
 //      CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>,
 // CHECK-SAME: %[[RESULT:.*]]: memref<5xf32>)
-//      CHECK: %[[ALLOC:.*]] = alloc()
-//      CHECK: %[[ALLOCA:.*]] = alloca()
+//      CHECK: %[[ALLOC:.*]] = memref.alloc()
+//      CHECK: %[[ALLOCA:.*]] = memref.alloca()
 //      CHECK: test.copy
 //      CHECK: return %[[ARG1]], %[[ALLOC]]
 
@@ -418,20 +418,20 @@
   %arg0 : index,
   %arg1 : index) -> memref<?x?xf32> {
   %0 = cmpi eq, %arg0, %arg1 : index
-  %1 = alloc(%arg0, %arg0) : memref<?x?xf32>
+  %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
   %2 = scf.if %0 -> (memref<?x?xf32>) {
     scf.yield %1 : memref<?x?xf32>
   } else {
-    %3 = alloc(%arg0, %arg1) : memref<?x?xf32>
+    %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
     scf.yield %1 : memref<?x?xf32>
   }
   return %2 : memref<?x?xf32>
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc(%arg0, %arg0)
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
 // CHECK-NEXT: %[[ALLOC1:.*]] = scf.if
 //      CHECK: scf.yield %[[ALLOC0]]
-//      CHECK: %[[ALLOC2:.*]] = alloc(%arg0, %arg1)
+//      CHECK: %[[ALLOC2:.*]] = memref.alloc(%arg0, %arg1)
 // CHECK-NEXT: scf.yield %[[ALLOC0]]
 //      CHECK: return %[[ALLOC1]]
 
@@ -443,7 +443,7 @@
 
 // CHECK-LABEL: func @inner_region_control_flow
 func @inner_region_control_flow(%arg0 : index) -> memref<2x2xf32> {
-  %0 = alloc() : memref<2x2xf32>
+  %0 = memref.alloc() : memref<2x2xf32>
   %1 = test.region_if %0 : memref<2x2xf32> -> (memref<2x2xf32>) then {
     ^bb0(%arg1 : memref<2x2xf32>):
       test.region_if_yield %arg1 : memref<2x2xf32>
@@ -457,7 +457,7 @@
   return %1 : memref<2x2xf32>
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc()
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: %[[ALLOC1:.*]] = test.region_if
 // CHECK-NEXT: ^bb0(%[[ALLOC2:.*]]:{{.*}}):
 // CHECK-NEXT: test.region_if_yield %[[ALLOC2]]
@@ -479,20 +479,20 @@
   %step: index,
   %buf: memref<2xf32>,
   %res: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
     %2 = cmpi eq, %i, %ub : index
-    %3 = alloc() : memref<2xf32>
+    %3 = memref.alloc() : memref<2xf32>
     scf.yield %3 : memref<2xf32>
   }
   test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>)
   return
 }
 
-// CHECK-NEXT: %[[ALLOCA:.*]] = alloca()
+// CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca()
 // CHECK-NEXT: scf.for
-//      CHECK: %[[ALLOC:.*]] = alloc()
+//      CHECK: %[[ALLOC:.*]] = memref.alloc()
 
 // -----
 
@@ -509,7 +509,7 @@
   %step: index,
   %buf: memref<2xf32>,
   %res: memref<2xf32>) {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
     %2 = cmpi eq, %i, %ub : index
@@ -524,7 +524,7 @@
   return
 }
 
-//      CHECK: %[[ALLOCA0:.*]] = alloca()
+//      CHECK: %[[ALLOCA0:.*]] = memref.alloca()
 // CHECK-NEXT: %[[ALLOCA1:.*]] = scf.for {{.*}} iter_args(%[[IALLOCA:.*]] =
 //      CHECK: %[[ALLOCA2:.*]] = scf.if
 //      CHECK: scf.yield %[[ALLOCA0]]
@@ -544,12 +544,12 @@
   %ub: index,
   %step: index,
   %buf: memref<2xf32>) -> memref<2xf32> {
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   %1 = scf.for %i = %lb to %ub step %step
     iter_args(%iterBuf = %buf) -> memref<2xf32> {
     %2 = cmpi eq, %i, %ub : index
     %3 = scf.if %2 -> (memref<2xf32>) {
-      %4 = alloc() : memref<2xf32>
+      %4 = memref.alloc() : memref<2xf32>
       scf.yield %4 : memref<2xf32>
     } else {
       scf.yield %0 : memref<2xf32>
@@ -559,10 +559,10 @@
   return %1 : memref<2xf32>
 }
 
-//      CHECK: %[[ALLOC0:.*]] = alloc()
+//      CHECK: %[[ALLOC0:.*]] = memref.alloc()
 // CHECK-NEXT: %[[ALLOC1:.*]] = scf.for {{.*}}
 //      CHECK: %[[ALLOC2:.*]] = scf.if
-//      CHECK: %[[ALLOC3:.*]] = alloc()
+//      CHECK: %[[ALLOC3:.*]] = memref.alloc()
 // CHECK-NEXT: scf.yield %[[ALLOC3]]
 //      CHECK: scf.yield %[[ALLOC0]]
 //      CHECK: scf.yield %[[ALLOC2]]
@@ -575,12 +575,12 @@
 
 // CHECK-LABEL: func @large_buffer_allocation
 func @large_buffer_allocation(%arg0: memref<2048xf32>) {
-  %0 = alloc() : memref<2048xf32>
+  %0 = memref.alloc() : memref<2048xf32>
   test.copy(%0, %arg0) : (memref<2048xf32>, memref<2048xf32>)
   return
 }
 
-// CHECK-NEXT: %[[ALLOC:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc()
 // CHECK-NEXT: test.copy
 
 // -----
@@ -591,11 +591,11 @@
 
 // CHECK-LABEL: func @indexElementType
 func @indexElementType() {
-  %0 = alloc() : memref<4xindex>
+  %0 = memref.alloc() : memref<4xindex>
   return
 }
-// DEFINDEX-NEXT: alloca()
-// BIGINDEX-NEXT: alloca()
-// LOWLIMIT-NEXT: alloc()
-// RANK-NEXT: alloca()
+// DEFINDEX-NEXT: memref.alloca()
+// BIGINDEX-NEXT: memref.alloca()
+// LOWLIMIT-NEXT: memref.alloc()
+// RANK-NEXT: memref.alloca()
 // CHECK-NEXT: return
diff --git a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
--- a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
@@ -13,6 +13,7 @@
 
 #include "mlir/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 #include "mlir/Transforms/LoopUtils.h"
@@ -31,6 +32,9 @@
   TestAffineDataCopy() = default;
   TestAffineDataCopy(const TestAffineDataCopy &pass){};
 
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<memref::MemRefDialect>();
+  }
   void runOnFunction() override;
 
 private:
diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp
--- a/mlir/test/lib/Dialect/Test/TestDialect.cpp
+++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp
@@ -10,6 +10,7 @@
 #include "TestAttributes.h"
 #include "TestTypes.h"
 #include "mlir/Dialect/DLTI/DLTI.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/DialectImplementation.h"
@@ -680,7 +681,7 @@
 LogicalResult OpWithShapedTypeInferTypeInterfaceOp::reifyReturnTypeShapes(
     OpBuilder &builder, llvm::SmallVectorImpl<Value> &shapes) {
   shapes = SmallVector<Value, 1>{
-      builder.createOrFold<DimOp>(getLoc(), getOperand(0), 0)};
+      builder.createOrFold<memref::DimOp>(getLoc(), getOperand(0), 0)};
   return success();
 }
 
diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp
--- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp
+++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "TestDialect.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Transforms/FuncConversions.h"
 #include "mlir/IR/Matchers.h"
@@ -136,6 +137,10 @@
 
 struct TestReturnTypeDriver
     : public PassWrapper<TestReturnTypeDriver, FunctionPass> {
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<memref::MemRefDialect>();
+  }
+
   void runOnFunction() override {
     if (getFunction().getName() == "testCreateFunctions") {
       std::vector<Operation *> ops;
diff --git a/mlir/test/lib/Transforms/TestConvVectorization.cpp b/mlir/test/lib/Transforms/TestConvVectorization.cpp
--- a/mlir/test/lib/Transforms/TestConvVectorization.cpp
+++ b/mlir/test/lib/Transforms/TestConvVectorization.cpp
@@ -37,6 +37,7 @@
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<VectorDialect>();
     registry.insert<linalg::LinalgDialect>();
+    registry.insert<memref::MemRefDialect>();
     registry.insert<scf::SCFDialect>();
     registry.insert<AffineDialect>();
     registry.insert<StandardOpsDialect>();
diff --git a/mlir/test/lib/Transforms/TestGpuMemoryPromotion.cpp b/mlir/test/lib/Transforms/TestGpuMemoryPromotion.cpp
--- a/mlir/test/lib/Transforms/TestGpuMemoryPromotion.cpp
+++ b/mlir/test/lib/Transforms/TestGpuMemoryPromotion.cpp
@@ -14,6 +14,7 @@
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/MemoryPromotion.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
@@ -31,7 +32,8 @@
     : public PassWrapper<TestGpuMemoryPromotionPass,
                          OperationPass<gpu::GPUFuncOp>> {
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<AffineDialect, StandardOpsDialect, scf::SCFDialect>();
+    registry.insert<AffineDialect, memref::MemRefDialect, StandardOpsDialect,
+                    scf::SCFDialect>();
   }
 
   void runOnOperation() override {
diff --git a/mlir/test/lib/Transforms/TestGpuRewrite.cpp b/mlir/test/lib/Transforms/TestGpuRewrite.cpp
--- a/mlir/test/lib/Transforms/TestGpuRewrite.cpp
+++ b/mlir/test/lib/Transforms/TestGpuRewrite.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/GPU/Passes.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@@ -21,7 +22,7 @@
 struct TestGpuRewritePass
     : public PassWrapper<TestGpuRewritePass, OperationPass<ModuleOp>> {
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<StandardOpsDialect>();
+    registry.insert<StandardOpsDialect, memref::MemRefDialect>();
   }
   void runOnOperation() override {
     OwningRewritePatternList patterns;
diff --git a/mlir/test/lib/Transforms/TestLinalgCodegenStrategy.cpp b/mlir/test/lib/Transforms/TestLinalgCodegenStrategy.cpp
--- a/mlir/test/lib/Transforms/TestLinalgCodegenStrategy.cpp
+++ b/mlir/test/lib/Transforms/TestLinalgCodegenStrategy.cpp
@@ -36,6 +36,7 @@
     registry.insert<AffineDialect,
                     gpu::GPUDialect,
                     linalg::LinalgDialect,
+                    memref::MemRefDialect,
                     scf::SCFDialect,
                     StandardOpsDialect,
                     vector::VectorDialect>();
diff --git a/mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp
--- a/mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp
+++ b/mlir/test/lib/Transforms/TestLinalgFusionTransforms.cpp
@@ -102,8 +102,8 @@
   TestLinalgFusionTransforms(const TestLinalgFusionTransforms &pass) {}
 
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<AffineDialect, linalg::LinalgDialect, scf::SCFDialect,
-                    StandardOpsDialect>();
+    registry.insert<AffineDialect, linalg::LinalgDialect, memref::MemRefDialect,
+                    scf::SCFDialect, StandardOpsDialect>();
   }
 
   void runOnFunction() override {
@@ -211,7 +211,8 @@
       llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated};
 
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<AffineDialect, linalg::LinalgDialect, scf::SCFDialect>();
+    registry.insert<AffineDialect, linalg::LinalgDialect, memref::MemRefDialect,
+                    scf::SCFDialect>();
   }
 
   void runOnFunction() override {
diff --git a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
--- a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
+++ b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
@@ -35,6 +35,7 @@
   void getDependentDialects(DialectRegistry &registry) const override {
     // clang-format off
     registry.insert<AffineDialect,
+                    memref::MemRefDialect,
                     scf::SCFDialect,
                     StandardOpsDialect,
                     vector::VectorDialect,
@@ -261,22 +262,22 @@
 //===----------------------------------------------------------------------===//
 
 // Allocation call back
-static Optional<Value> allocCallBackFn(OpBuilder &b, SubViewOp subView,
+static Optional<Value> allocCallBackFn(OpBuilder &b, memref::SubViewOp subView,
                                        ArrayRef<Value> boundingSubViewSize,
                                        OperationFolder *folder) {
   SmallVector<int64_t, 4> shape(boundingSubViewSize.size(), -1);
   return b
-      .create<AllocOp>(subView.getLoc(),
-                       MemRefType::get(shape,
-                                       subView.getType().getElementType(),
-                                       /*affineMapComposition =*/{}, 3),
-                       boundingSubViewSize)
+      .create<memref::AllocOp>(
+          subView.getLoc(),
+          MemRefType::get(shape, subView.getType().getElementType(),
+                          /*affineMapComposition =*/{}, 3),
+          boundingSubViewSize)
       .getResult();
 }
 
 // Deallocation callback
 static LogicalResult deallocCallBackFn(OpBuilder &b, Value buffer) {
-  b.create<DeallocOp>(buffer.getLoc(), buffer);
+  b.create<memref::DeallocOp>(buffer.getLoc(), buffer);
   return success();
 }
 
diff --git a/mlir/test/lib/Transforms/TestMemRefStrideCalculation.cpp b/mlir/test/lib/Transforms/TestMemRefStrideCalculation.cpp
--- a/mlir/test/lib/Transforms/TestMemRefStrideCalculation.cpp
+++ b/mlir/test/lib/Transforms/TestMemRefStrideCalculation.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/Pass/Pass.h"
@@ -23,7 +24,7 @@
 /// Traverse AllocOp and compute strides of each MemRefType independently.
 void TestMemRefStrideCalculation::runOnFunction() {
   llvm::outs() << "Testing: " << getFunction().getName() << "\n";
-  getFunction().walk([&](AllocOp allocOp) {
+  getFunction().walk([&](memref::AllocOp allocOp) {
     auto memrefType = allocOp.getResult().getType().cast<MemRefType>();
     int64_t offset;
     SmallVector<int64_t, 4> strides;
diff --git a/mlir/test/lib/Transforms/TestSparsification.cpp b/mlir/test/lib/Transforms/TestSparsification.cpp
--- a/mlir/test/lib/Transforms/TestSparsification.cpp
+++ b/mlir/test/lib/Transforms/TestSparsification.cpp
@@ -50,8 +50,8 @@
 
   /// Registers all dialects required by testing.
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry
-        .insert<scf::SCFDialect, vector::VectorDialect, LLVM::LLVMDialect>();
+    registry.insert<memref::MemRefDialect, scf::SCFDialect,
+                    vector::VectorDialect, LLVM::LLVMDialect>();
   }
 
   /// Returns parallelization strategy given on command line.
@@ -134,8 +134,7 @@
 
 void registerTestSparsification() {
   PassRegistration<TestSparsification> sparsificationPass(
-      "test-sparsification",
-      "Test automatic generation of sparse tensor code");
+      "test-sparsification", "Test automatic generation of sparse tensor code");
 }
 
 } // namespace test
diff --git a/mlir/test/lib/Transforms/TestVectorTransforms.cpp b/mlir/test/lib/Transforms/TestVectorTransforms.cpp
--- a/mlir/test/lib/Transforms/TestVectorTransforms.cpp
+++ b/mlir/test/lib/Transforms/TestVectorTransforms.cpp
@@ -11,6 +11,7 @@
 #include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
@@ -268,7 +269,7 @@
           type.getNumElements() % multiplicity != 0)
         return mlir::WalkResult::advance();
       auto filterAlloc = [](Operation *op) {
-        if (isa<ConstantOp, AllocOp, CallOp>(op))
+        if (isa<ConstantOp, memref::AllocOp, CallOp>(op))
           return false;
         return true;
       };
@@ -335,7 +336,8 @@
       const TestVectorTransferFullPartialSplitPatterns &pass) {}
 
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<AffineDialect, linalg::LinalgDialect, scf::SCFDialect>();
+    registry.insert<AffineDialect, linalg::LinalgDialect, memref::MemRefDialect,
+                    scf::SCFDialect>();
   }
 
   Option<bool> useLinalgOps{
@@ -363,6 +365,9 @@
 
 struct TestVectorTransferLoweringPatterns
     : public PassWrapper<TestVectorTransferLoweringPatterns, FunctionPass> {
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<memref::MemRefDialect>();
+  }
   void runOnFunction() override {
     OwningRewritePatternList patterns;
     populateVectorTransferLoweringPatterns(patterns, &getContext());
diff --git a/mlir/test/mlir-cpu-runner/async-value.mlir b/mlir/test/mlir-cpu-runner/async-value.mlir
--- a/mlir/test/mlir-cpu-runner/async-value.mlir
+++ b/mlir/test/mlir-cpu-runner/async-value.mlir
@@ -44,13 +44,13 @@
   // Memref allocated inside async.execute region.
   // ------------------------------------------------------------------------ //
   %token2, %result2 = async.execute[%token0] -> !async.value<memref<f32>> {
-    %5 = alloc() : memref<f32>
+    %5 = memref.alloc() : memref<f32>
     %c0 = constant 0.25 : f32
-    store %c0, %5[]: memref<f32>
+    memref.store %c0, %5[]: memref<f32>
     async.yield %5 : memref<f32>
   }
   %6 = async.await %result2 : !async.value<memref<f32>>
-  %7 = memref_cast %6 :  memref<f32> to memref<*xf32>
+  %7 = memref.cast %6 :  memref<f32> to memref<*xf32>
 
   // CHECK: Unranked Memref
   // CHECK-SAME: rank = 0 offset = 0 sizes = [] strides = []
@@ -61,9 +61,9 @@
   // Memref passed as async.execute operand.
   // ------------------------------------------------------------------------ //
   %token3 = async.execute(%result2 as %unwrapped : !async.value<memref<f32>>) {
-    %8 = load %unwrapped[]: memref<f32>
+    %8 = memref.load %unwrapped[]: memref<f32>
     %9 = addf %8, %8 : f32
-    store %9, %unwrapped[]: memref<f32>
+    memref.store %9, %unwrapped[]: memref<f32>
     async.yield
   }
   async.await %token3 : !async.token
@@ -73,7 +73,7 @@
   // CHECK-NEXT: [0.5]
   call @print_memref_f32(%7): (memref<*xf32>) -> ()
 
-  dealloc %6 : memref<f32>
+  memref.dealloc %6 : memref<f32>
 
   return
 }
diff --git a/mlir/test/mlir-cpu-runner/async.mlir b/mlir/test/mlir-cpu-runner/async.mlir
--- a/mlir/test/mlir-cpu-runner/async.mlir
+++ b/mlir/test/mlir-cpu-runner/async.mlir
@@ -24,23 +24,23 @@
   %c3 = constant 3.0 : f32
   %c4 = constant 4.0 : f32
 
-  %A = alloc() : memref<4xf32>
+  %A = memref.alloc() : memref<4xf32>
   linalg.fill(%A, %c0) : memref<4xf32>, f32
 
   // CHECK: [0, 0, 0, 0]
-  %U = memref_cast %A :  memref<4xf32> to memref<*xf32>
+  %U = memref.cast %A :  memref<4xf32> to memref<*xf32>
   call @print_memref_f32(%U): (memref<*xf32>) -> ()
 
   // CHECK: Current thread id: [[MAIN:.*]]
   // CHECK: [1, 0, 0, 0]
-  store %c1, %A[%i0]: memref<4xf32>
+  memref.store %c1, %A[%i0]: memref<4xf32>
   call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
   call @print_memref_f32(%U): (memref<*xf32>) -> ()
 
   %outer = async.execute {
     // CHECK: Current thread id: [[THREAD0:.*]]
     // CHECK: [1, 2, 0, 0]
-    store %c2, %A[%i1]: memref<4xf32>
+    memref.store %c2, %A[%i1]: memref<4xf32>
     call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
     call @print_memref_f32(%U): (memref<*xf32>) -> ()
 
@@ -54,7 +54,7 @@
     %inner = async.execute [%noop] {
       // CHECK: Current thread id: [[THREAD2:.*]]
       // CHECK: [1, 2, 3, 0]
-      store %c3, %A[%i2]: memref<4xf32>
+      memref.store %c3, %A[%i2]: memref<4xf32>
       call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
       call @print_memref_f32(%U): (memref<*xf32>) -> ()
 
@@ -64,7 +64,7 @@
 
     // CHECK: Current thread id: [[THREAD3:.*]]
     // CHECK: [1, 2, 3, 4]
-    store %c4, %A[%i3]: memref<4xf32>
+    memref.store %c4, %A[%i3]: memref<4xf32>
     call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
     call @print_memref_f32(%U): (memref<*xf32>) -> ()
 
@@ -77,7 +77,7 @@
   call @mlirAsyncRuntimePrintCurrentThreadId(): () -> ()
   call @print_memref_f32(%U): (memref<*xf32>) -> ()
 
-  dealloc %A : memref<4xf32>
+  memref.dealloc %A : memref<4xf32>
 
   return
 }
diff --git a/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir b/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir
--- a/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir
+++ b/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir
@@ -12,14 +12,14 @@
   %cst = constant 1.000000e+00 : f32
   %cst_0 = constant 2.000000e+00 : f32
   scf.for %arg2 = %c0 to %c2 step %c1 {
-    %0 = load %arg0[%arg2] : memref<2xf32>
+    %0 = memref.load %arg0[%arg2] : memref<2xf32>
     %1 = addf %0, %cst : f32
-    store %1, %arg0[%arg2] : memref<2xf32>
+    memref.store %1, %arg0[%arg2] : memref<2xf32>
     // CHECK: 2, 2
 
-    %2 = load %arg1[%arg2] : memref<2xf32>
+    %2 = memref.load %arg1[%arg2] : memref<2xf32>
     %3 = addf %1, %cst_0 : f32
-    store %3, %arg1[%arg2] : memref<2xf32>
+    memref.store %3, %arg1[%arg2] : memref<2xf32>
     // CHECK-NEXT: 4, 4
   }
   return
@@ -39,30 +39,30 @@
   %c1 = constant 1 : index
   %cst = constant 1.000000e+00 : f32
   %cst_0 = constant 2.000000e+00 : f32
-  %a = alloc() : memref<2xf32>
-  %b = alloc() : memref<2xf32>
+  %a = memref.alloc() : memref<2xf32>
+  %b = memref.alloc() : memref<2xf32>
   scf.for %i = %c0 to %c2 step %c1 {
-    store %cst, %a[%i] : memref<2xf32>
-    store %cst, %b[%i] : memref<2xf32>
+    memref.store %cst, %a[%i] : memref<2xf32>
+    memref.store %cst, %b[%i] : memref<2xf32>
   }
 
   call @simple_add1_add2_test(%a, %b) : (memref<2xf32>, memref<2xf32>) -> ()
 
-  %l0 = load %a[%c0] : memref<2xf32>
+  %l0 = memref.load %a[%c0] : memref<2xf32>
   call @printF32(%l0) : (f32) -> ()
   call @printComma() : () -> ()
-  %l1 = load %a[%c1] : memref<2xf32>
+  %l1 = memref.load %a[%c1] : memref<2xf32>
   call @printF32(%l1) : (f32) -> ()
   call @printNewline() : () -> ()
 
-  %l2 = load %b[%c0] : memref<2xf32>
+  %l2 = memref.load %b[%c0] : memref<2xf32>
   call @printF32(%l2) : (f32) -> ()
   call @printComma() : () -> ()
-  %l3 = load %b[%c1] : memref<2xf32>
+  %l3 = memref.load %b[%c1] : memref<2xf32>
   call @printF32(%l3) : (f32) -> ()
   call @printNewline() : () -> ()
 
-  dealloc %a : memref<2xf32>
-  dealloc %b : memref<2xf32>
+  memref.dealloc %a : memref<2xf32>
+  memref.dealloc %b : memref<2xf32>
   return
 }
diff --git a/mlir/test/mlir-cpu-runner/global_memref.mlir b/mlir/test/mlir-cpu-runner/global_memref.mlir
--- a/mlir/test/mlir-cpu-runner/global_memref.mlir
+++ b/mlir/test/mlir-cpu-runner/global_memref.mlir
@@ -4,10 +4,10 @@
 func private @print_memref_i32(memref<*xi32>) attributes { llvm.emit_c_interface }
 func private @printNewline() -> ()
 
-global_memref "private" @gv0 : memref<4xf32> = dense<[0.0, 1.0, 2.0, 3.0]>
+memref.global "private" @gv0 : memref<4xf32> = dense<[0.0, 1.0, 2.0, 3.0]>
 func @test1DMemref() {
-  %0 = get_global_memref @gv0 : memref<4xf32>
-  %U = memref_cast %0 : memref<4xf32> to memref<*xf32>
+  %0 = memref.get_global @gv0 : memref<4xf32>
+  %U = memref.cast %0 : memref<4xf32> to memref<*xf32>
   // CHECK: rank = 1
   // CHECK: offset = 0
   // CHECK: sizes = [4]
@@ -21,8 +21,8 @@
   %c2 = constant 2 : index
   %fp0 = constant 4.0 : f32
   %fp1 = constant 5.0 : f32
-  store %fp0, %0[%c0] : memref<4xf32>
-  store %fp1, %0[%c2] : memref<4xf32>
+  memref.store %fp0, %0[%c0] : memref<4xf32>
+  memref.store %fp1, %0[%c2] : memref<4xf32>
   // CHECK: rank = 1
   // CHECK: offset = 0
   // CHECK: sizes = [4]
@@ -33,10 +33,10 @@
   return
 }
 
-global_memref constant @gv1 : memref<3x2xi32> = dense<[[0, 1],[2, 3],[4, 5]]>
+memref.global constant @gv1 : memref<3x2xi32> = dense<[[0, 1],[2, 3],[4, 5]]>
 func @testConstantMemref() {
-  %0 = get_global_memref @gv1 : memref<3x2xi32>
-  %U = memref_cast %0 : memref<3x2xi32> to memref<*xi32>
+  %0 = memref.get_global @gv1 : memref<3x2xi32>
+  %U = memref.cast %0 : memref<3x2xi32> to memref<*xi32>
   // CHECK: rank = 2
   // CHECK: offset = 0
   // CHECK: sizes = [3, 2]
@@ -49,10 +49,10 @@
   return
 }
 
-global_memref "private" @gv2 : memref<4x2xf32> = dense<[[0.0, 1.0], [2.0, 3.0], [4.0, 5.0], [6.0, 7.0]]>
+memref.global "private" @gv2 : memref<4x2xf32> = dense<[[0.0, 1.0], [2.0, 3.0], [4.0, 5.0], [6.0, 7.0]]>
 func @test2DMemref() {
-  %0 = get_global_memref @gv2 : memref<4x2xf32>
-  %U = memref_cast %0 : memref<4x2xf32> to memref<*xf32>
+  %0 = memref.get_global @gv2 : memref<4x2xf32>
+  %U = memref.cast %0 : memref<4x2xf32> to memref<*xf32>
   // CHECK: rank = 2
   // CHECK: offset = 0
   // CHECK: sizes = [4, 2]
@@ -68,7 +68,7 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %fp10 = constant 10.0 : f32
-  store %fp10, %0[%c0, %c1] : memref<4x2xf32>
+  memref.store %fp10, %0[%c0, %c1] : memref<4x2xf32>
   // CHECK: rank = 2
   // CHECK: offset = 0
   // CHECK: sizes = [4, 2]
@@ -82,10 +82,10 @@
   return
 }
 
-global_memref @gv3 : memref<i32> = dense<11>
+memref.global @gv3 : memref<i32> = dense<11>
 func @testScalarMemref() {
-  %0 = get_global_memref @gv3 : memref<i32>
-  %U = memref_cast %0 : memref<i32> to memref<*xi32>
+  %0 = memref.get_global @gv3 : memref<i32>
+  %U = memref.cast %0 : memref<i32> to memref<*xi32>
   // CHECK: rank = 0
   // CHECK: offset = 0
   // CHECK: sizes = []
diff --git a/mlir/test/mlir-cpu-runner/memref_reinterpret_cast.mlir b/mlir/test/mlir-cpu-runner/memref_reinterpret_cast.mlir
--- a/mlir/test/mlir-cpu-runner/memref_reinterpret_cast.mlir
+++ b/mlir/test/mlir-cpu-runner/memref_reinterpret_cast.mlir
@@ -10,17 +10,17 @@
   %c1 = constant 1 : index
 
   // Initialize input.
-  %input = alloc() : memref<2x3xf32>
-  %dim_x = dim %input, %c0 : memref<2x3xf32>
-  %dim_y = dim %input, %c1 : memref<2x3xf32>
+  %input = memref.alloc() : memref<2x3xf32>
+  %dim_x = memref.dim %input, %c0 : memref<2x3xf32>
+  %dim_y = memref.dim %input, %c1 : memref<2x3xf32>
   scf.parallel (%i, %j) = (%c0, %c0) to (%dim_x, %dim_y) step (%c1, %c1) {
     %prod = muli %i,  %dim_y : index
     %val = addi %prod, %j : index
     %val_i64 = index_cast %val : index to i64
     %val_f32 = sitofp %val_i64 : i64 to f32
-    store %val_f32, %input[%i, %j] : memref<2x3xf32>
+    memref.store %val_f32, %input[%i, %j] : memref<2x3xf32>
   }
-  %unranked_input = memref_cast %input : memref<2x3xf32> to memref<*xf32>
+  %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
   call @print_memref_f32(%unranked_input) : (memref<*xf32>) -> ()
   // CHECK: rank = 2 offset = 0 sizes = [2, 3] strides = [3, 1]
   // CHECK-NEXT: [0,   1,   2]
@@ -35,11 +35,11 @@
 }
 
 func @cast_ranked_memref_to_static_shape(%input : memref<2x3xf32>) {
-  %output = memref_reinterpret_cast %input to
+  %output = memref.reinterpret_cast %input to
            offset: [0], sizes: [6, 1], strides: [1, 1]
            : memref<2x3xf32> to memref<6x1xf32>
 
-  %unranked_output = memref_cast %output
+  %unranked_output = memref.cast %output
       : memref<6x1xf32> to memref<*xf32>
   call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
   // CHECK: rank = 2 offset = 0 sizes = [6, 1] strides = [1, 1] data =
@@ -56,11 +56,11 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c6 = constant 6 : index
-  %output = memref_reinterpret_cast %input to
+  %output = memref.reinterpret_cast %input to
            offset: [%c0], sizes: [%c1, %c6], strides: [%c6, %c1]
            : memref<2x3xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
 
-  %unranked_output = memref_cast %output
+  %unranked_output = memref.cast %output
       : memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<*xf32>
   call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
   // CHECK: rank = 2 offset = 0 sizes = [1, 6] strides = [6, 1] data =
@@ -69,12 +69,12 @@
 }
 
 func @cast_unranked_memref_to_static_shape(%input : memref<2x3xf32>) {
-  %unranked_input = memref_cast %input : memref<2x3xf32> to memref<*xf32>
-  %output = memref_reinterpret_cast %unranked_input to
+  %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
+  %output = memref.reinterpret_cast %unranked_input to
            offset: [0], sizes: [6, 1], strides: [1, 1]
            : memref<*xf32> to memref<6x1xf32>
 
-  %unranked_output = memref_cast %output
+  %unranked_output = memref.cast %output
       : memref<6x1xf32> to memref<*xf32>
   call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
   // CHECK: rank = 2 offset = 0 sizes = [6, 1] strides = [1, 1] data =
@@ -88,15 +88,15 @@
 }
 
 func @cast_unranked_memref_to_dynamic_shape(%input : memref<2x3xf32>) {
-  %unranked_input = memref_cast %input : memref<2x3xf32> to memref<*xf32>
+  %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %c6 = constant 6 : index
-  %output = memref_reinterpret_cast %unranked_input to
+  %output = memref.reinterpret_cast %unranked_input to
            offset: [%c0], sizes: [%c1, %c6], strides: [%c6, %c1]
            : memref<*xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
 
-  %unranked_output = memref_cast %output
+  %unranked_output = memref.cast %output
       : memref<?x?xf32, offset: ?, strides: [?, ?]> to memref<*xf32>
   call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
   // CHECK: rank = 2 offset = 0 sizes = [1, 6] strides = [6, 1] data =
diff --git a/mlir/test/mlir-cpu-runner/memref_reshape.mlir b/mlir/test/mlir-cpu-runner/memref_reshape.mlir
--- a/mlir/test/mlir-cpu-runner/memref_reshape.mlir
+++ b/mlir/test/mlir-cpu-runner/memref_reshape.mlir
@@ -11,28 +11,28 @@
   %c1 = constant 1 : index
 
   // Initialize input.
-  %input = alloc() : memref<2x3xf32>
-  %dim_x = dim %input, %c0 : memref<2x3xf32>
-  %dim_y = dim %input, %c1 : memref<2x3xf32>
+  %input = memref.alloc() : memref<2x3xf32>
+  %dim_x = memref.dim %input, %c0 : memref<2x3xf32>
+  %dim_y = memref.dim %input, %c1 : memref<2x3xf32>
   scf.parallel (%i, %j) = (%c0, %c0) to (%dim_x, %dim_y) step (%c1, %c1) {
     %prod = muli %i,  %dim_y : index
     %val = addi %prod, %j : index
     %val_i64 = index_cast %val : index to i64
     %val_f32 = sitofp %val_i64 : i64 to f32
-    store %val_f32, %input[%i, %j] : memref<2x3xf32>
+    memref.store %val_f32, %input[%i, %j] : memref<2x3xf32>
   }
-  %unranked_input = memref_cast %input : memref<2x3xf32> to memref<*xf32>
+  %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
   call @print_memref_f32(%unranked_input) : (memref<*xf32>) -> ()
   // CHECK: rank = 2 offset = 0 sizes = [2, 3] strides = [3, 1]
   // CHECK-NEXT: [0,   1,   2]
   // CHECK-NEXT: [3,   4,   5]
 
   // Initialize shape.
-  %shape = alloc() : memref<2xindex>
+  %shape = memref.alloc() : memref<2xindex>
   %c2 = constant 2 : index
   %c3 = constant 3 : index
-  store %c3, %shape[%c0] : memref<2xindex>
-  store %c2, %shape[%c1] : memref<2xindex>
+  memref.store %c3, %shape[%c0] : memref<2xindex>
+  memref.store %c2, %shape[%c1] : memref<2xindex>
 
   // Test cases.
   call @reshape_ranked_memref_to_ranked(%input, %shape)
@@ -48,10 +48,10 @@
 
 func @reshape_ranked_memref_to_ranked(%input : memref<2x3xf32>,
                                       %shape : memref<2xindex>) {
-  %output = memref_reshape %input(%shape)
+  %output = memref.reshape %input(%shape)
                 : (memref<2x3xf32>, memref<2xindex>) -> memref<?x?xf32>
 
-  %unranked_output = memref_cast %output : memref<?x?xf32> to memref<*xf32>
+  %unranked_output = memref.cast %output : memref<?x?xf32> to memref<*xf32>
   call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
   // CHECK: rank = 2 offset = 0 sizes = [3, 2] strides = [2, 1] data =
   // CHECK: [0,   1],
@@ -62,11 +62,11 @@
 
 func @reshape_unranked_memref_to_ranked(%input : memref<2x3xf32>,
                                         %shape : memref<2xindex>) {
-  %unranked_input = memref_cast %input : memref<2x3xf32> to memref<*xf32>
-  %output = memref_reshape %input(%shape)
+  %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
+  %output = memref.reshape %input(%shape)
                 : (memref<2x3xf32>, memref<2xindex>) -> memref<?x?xf32>
 
-  %unranked_output = memref_cast %output : memref<?x?xf32> to memref<*xf32>
+  %unranked_output = memref.cast %output : memref<?x?xf32> to memref<*xf32>
   call @print_memref_f32(%unranked_output) : (memref<*xf32>) -> ()
   // CHECK: rank = 2 offset = 0 sizes = [3, 2] strides = [2, 1] data =
   // CHECK: [0,   1],
@@ -77,8 +77,8 @@
 
 func @reshape_ranked_memref_to_unranked(%input : memref<2x3xf32>,
                                         %shape : memref<2xindex>) {
-  %dyn_size_shape = memref_cast %shape : memref<2xindex> to memref<?xindex>
-  %output = memref_reshape %input(%dyn_size_shape)
+  %dyn_size_shape = memref.cast %shape : memref<2xindex> to memref<?xindex>
+  %output = memref.reshape %input(%dyn_size_shape)
                 : (memref<2x3xf32>, memref<?xindex>) -> memref<*xf32>
 
   call @print_memref_f32(%output) : (memref<*xf32>) -> ()
@@ -91,9 +91,9 @@
 
 func @reshape_unranked_memref_to_unranked(%input : memref<2x3xf32>,
                                           %shape : memref<2xindex>) {
-  %unranked_input = memref_cast %input : memref<2x3xf32> to memref<*xf32>
-  %dyn_size_shape = memref_cast %shape : memref<2xindex> to memref<?xindex>
-  %output = memref_reshape %input(%dyn_size_shape)
+  %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32>
+  %dyn_size_shape = memref.cast %shape : memref<2xindex> to memref<?xindex>
+  %output = memref.reshape %input(%dyn_size_shape)
                 : (memref<2x3xf32>, memref<?xindex>) -> memref<*xf32>
 
   call @print_memref_f32(%output) : (memref<*xf32>) -> ()
diff --git a/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir b/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir
--- a/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir
+++ b/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir
@@ -1,9 +1,9 @@
 // RUN: mlir-opt -convert-linalg-to-loops -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
 
 func @main() {
-  %A = alloc() : memref<16x16xf32>
-  %B = alloc() : memref<16x16xf32>
-  %C = alloc() : memref<16x16xf32>
+  %A = memref.alloc() : memref<16x16xf32>
+  %B = memref.alloc() : memref<16x16xf32>
+  %C = memref.alloc() : memref<16x16xf32>
 
   %cf1 = constant 1.00000e+00 : f32
 
@@ -27,9 +27,9 @@
   %c1 = constant 1 : index
   %c2 = constant 2 : index
 
-  %M = dim %C, %c0 : memref<16x16xf32>
-  %N = dim %C, %c1 : memref<16x16xf32>
-  %K = dim %A, %c1 : memref<16x16xf32>
+  %M = memref.dim %C, %c0 : memref<16x16xf32>
+  %N = memref.dim %C, %c1 : memref<16x16xf32>
+  %K = memref.dim %A, %c1 : memref<16x16xf32>
 
   %f1 = muli %M, %N : index
   %f2 = muli %f1, %K : index
@@ -50,7 +50,7 @@
   %c0 = constant 0 : index
   affine.for %arg3 = 0 to 16 {
     affine.for %arg4 = 0 to 16 {
-      %m = alloc() : memref<1xf32>
+      %m = memref.alloc() : memref<1xf32>
       %v = affine.load %arg2[%arg3, %arg4] : memref<16x16xf32>
       affine.store %v, %m[%c0] : memref<1xf32>
       affine.for %arg5 = 0 to 16 {
@@ -63,7 +63,7 @@
       }
       %s = affine.load %m[%c0] : memref<1xf32>
       affine.store %s, %arg2[%arg3, %arg4] : memref<16x16xf32>
-      dealloc %m : memref<1xf32>
+      memref.dealloc %m : memref<1xf32>
     }
   }
   return
diff --git a/mlir/test/mlir-cpu-runner/unranked_memref.mlir b/mlir/test/mlir-cpu-runner/unranked_memref.mlir
--- a/mlir/test/mlir-cpu-runner/unranked_memref.mlir
+++ b/mlir/test/mlir-cpu-runner/unranked_memref.mlir
@@ -39,35 +39,35 @@
 // CHECK-SAME: strides = [3, 1]
 // CHECK-COUNT-4: [1, 1, 1]
 func @main() -> () {
-    %A = alloc() : memref<10x3xf32, 0>
+    %A = memref.alloc() : memref<10x3xf32, 0>
     %f2 = constant 2.00000e+00 : f32
     %f5 = constant 5.00000e+00 : f32
     %f10 = constant 10.00000e+00 : f32
 
-    %V = memref_cast %A : memref<10x3xf32, 0> to memref<?x?xf32>
+    %V = memref.cast %A : memref<10x3xf32, 0> to memref<?x?xf32>
     linalg.fill(%V, %f10) : memref<?x?xf32, 0>, f32
-    %U = memref_cast %A : memref<10x3xf32, 0> to memref<*xf32>
+    %U = memref.cast %A : memref<10x3xf32, 0> to memref<*xf32>
     call @print_memref_f32(%U) : (memref<*xf32>) -> ()
 
-    %V2 = memref_cast %U : memref<*xf32> to memref<?x?xf32>
+    %V2 = memref.cast %U : memref<*xf32> to memref<?x?xf32>
     linalg.fill(%V2, %f5) : memref<?x?xf32, 0>, f32
-    %U2 = memref_cast %V2 : memref<?x?xf32, 0> to memref<*xf32>
+    %U2 = memref.cast %V2 : memref<?x?xf32, 0> to memref<*xf32>
     call @print_memref_f32(%U2) : (memref<*xf32>) -> ()
 
-    %V3 = memref_cast %V2 : memref<?x?xf32> to memref<*xf32>
-    %V4 = memref_cast %V3 : memref<*xf32> to memref<?x?xf32>
+    %V3 = memref.cast %V2 : memref<?x?xf32> to memref<*xf32>
+    %V4 = memref.cast %V3 : memref<*xf32> to memref<?x?xf32>
     linalg.fill(%V4, %f2) : memref<?x?xf32, 0>, f32
-    %U3 = memref_cast %V2 : memref<?x?xf32> to memref<*xf32>
+    %U3 = memref.cast %V2 : memref<?x?xf32> to memref<*xf32>
     call @print_memref_f32(%U3) : (memref<*xf32>) -> ()
 
     // 122 is ASCII for 'z'.
     %i8_z = constant 122 : i8
-    %I8 = alloc() : memref<i8>
-    store %i8_z, %I8[]: memref<i8>
-    %U4 = memref_cast %I8 : memref<i8> to memref<*xi8>
+    %I8 = memref.alloc() : memref<i8>
+    memref.store %i8_z, %I8[]: memref<i8>
+    %U4 = memref.cast %I8 : memref<i8> to memref<*xi8>
     call @print_memref_i8(%U4) : (memref<*xi8>) -> ()
 
-    dealloc %A : memref<10x3xf32, 0>
+    memref.dealloc %A : memref<10x3xf32, 0>
 
     call @return_var_memref_caller() : () -> ()
     call @return_two_var_memref_caller() : () -> ()
@@ -79,7 +79,7 @@
 func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface }
 
 func @return_two_var_memref_caller() {
-  %0 = alloca() : memref<4x3xf32>
+  %0 = memref.alloca() : memref<4x3xf32>
   %c0f32 = constant 1.0 : f32
   linalg.fill(%0, %c0f32) : memref<4x3xf32>, f32
   %1:2 = call @return_two_var_memref(%0) : (memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>)
@@ -89,12 +89,12 @@
  }
 
  func @return_two_var_memref(%arg0: memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>) {
-  %0 = memref_cast %arg0 : memref<4x3xf32> to memref<*xf32>
+  %0 = memref.cast %arg0 : memref<4x3xf32> to memref<*xf32>
   return %0, %0 : memref<*xf32>, memref<*xf32>
 }
 
 func @return_var_memref_caller() {
-  %0 = alloca() : memref<4x3xf32>
+  %0 = memref.alloca() : memref<4x3xf32>
   %c0f32 = constant 1.0 : f32
   linalg.fill(%0, %c0f32) : memref<4x3xf32>, f32
   %1 = call @return_var_memref(%0) : (memref<4x3xf32>) -> memref<*xf32>
@@ -103,7 +103,7 @@
 }
 
 func @return_var_memref(%arg0: memref<4x3xf32>) -> memref<*xf32> {
-  %0 = memref_cast %arg0: memref<4x3xf32> to memref<*xf32>
+  %0 = memref.cast %arg0: memref<4x3xf32> to memref<*xf32>
   return %0 : memref<*xf32>
 }
 
@@ -111,17 +111,17 @@
 func private @printNewline() -> ()
 
 func @dim_op_of_unranked() {
-  %ranked = alloc() : memref<4x3xf32>
-  %unranked = memref_cast %ranked: memref<4x3xf32> to memref<*xf32>
+  %ranked = memref.alloc() : memref<4x3xf32>
+  %unranked = memref.cast %ranked: memref<4x3xf32> to memref<*xf32>
 
   %c0 = constant 0 : index
-  %dim_0 = dim %unranked, %c0 : memref<*xf32>
+  %dim_0 = memref.dim %unranked, %c0 : memref<*xf32>
   call @printU64(%dim_0) : (index) -> ()
   call @printNewline() : () -> ()
   // CHECK: 4
 
   %c1 = constant 1 : index
-  %dim_1 = dim %unranked, %c1 : memref<*xf32>
+  %dim_1 = memref.dim %unranked, %c1 : memref<*xf32>
   call @printU64(%dim_1) : (index) -> ()
   call @printNewline() : () -> ()
   // CHECK: 3
diff --git a/mlir/test/mlir-cpu-runner/utils.mlir b/mlir/test/mlir-cpu-runner/utils.mlir
--- a/mlir/test/mlir-cpu-runner/utils.mlir
+++ b/mlir/test/mlir-cpu-runner/utils.mlir
@@ -5,11 +5,11 @@
 
 func @print_0d() {
   %f = constant 2.00000e+00 : f32
-  %A = alloc() : memref<f32>
-  store %f, %A[]: memref<f32>
-  %U = memref_cast %A :  memref<f32> to memref<*xf32>
+  %A = memref.alloc() : memref<f32>
+  memref.store %f, %A[]: memref<f32>
+  %U = memref.cast %A :  memref<f32> to memref<*xf32>
   call @print_memref_f32(%U): (memref<*xf32>) -> ()
-  dealloc %A : memref<f32>
+  memref.dealloc %A : memref<f32>
   return
 }
 // PRINT-0D: Unranked Memref base@ = {{.*}} rank = 0 offset = 0 sizes = [] strides = [] data =
@@ -17,12 +17,12 @@
 
 func @print_1d() {
   %f = constant 2.00000e+00 : f32
-  %A = alloc() : memref<16xf32>
-  %B = memref_cast %A: memref<16xf32> to memref<?xf32>
+  %A = memref.alloc() : memref<16xf32>
+  %B = memref.cast %A: memref<16xf32> to memref<?xf32>
   linalg.fill(%B, %f) : memref<?xf32>, f32
-  %U = memref_cast %B :  memref<?xf32> to memref<*xf32>
+  %U = memref.cast %B :  memref<?xf32> to memref<*xf32>
   call @print_memref_f32(%U): (memref<*xf32>) -> ()
-  dealloc %A : memref<16xf32>
+  memref.dealloc %A : memref<16xf32>
   return
 }
 // PRINT-1D: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [16] strides = [1] data =
@@ -31,15 +31,15 @@
 func @print_3d() {
   %f = constant 2.00000e+00 : f32
   %f4 = constant 4.00000e+00 : f32
-  %A = alloc() : memref<3x4x5xf32>
-  %B = memref_cast %A: memref<3x4x5xf32> to memref<?x?x?xf32>
+  %A = memref.alloc() : memref<3x4x5xf32>
+  %B = memref.cast %A: memref<3x4x5xf32> to memref<?x?x?xf32>
   linalg.fill(%B, %f) : memref<?x?x?xf32>, f32
 
   %c2 = constant 2 : index
-  store %f4, %B[%c2, %c2, %c2]: memref<?x?x?xf32>
-  %U = memref_cast %B : memref<?x?x?xf32> to memref<*xf32>
+  memref.store %f4, %B[%c2, %c2, %c2]: memref<?x?x?xf32>
+  %U = memref.cast %B : memref<?x?x?xf32> to memref<*xf32>
   call @print_memref_f32(%U): (memref<*xf32>) -> ()
-  dealloc %A : memref<3x4x5xf32>
+  memref.dealloc %A : memref<3x4x5xf32>
   return
 }
 // PRINT-3D: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [3, 4, 5] strides = [20, 5, 1] data =
@@ -57,13 +57,13 @@
   %c0 = constant 0 : index
   %f10 = constant 10.0 : f32
   %vf10 = splat %f10: !vector_type_C
-  %C = alloc() : !matrix_type_CC
-  store %vf10, %C[%c0, %c0]: !matrix_type_CC
+  %C = memref.alloc() : !matrix_type_CC
+  memref.store %vf10, %C[%c0, %c0]: !matrix_type_CC
 
-  %CC = memref_cast %C: !matrix_type_CC to memref<?x?x!vector_type_C>
+  %CC = memref.cast %C: !matrix_type_CC to memref<?x?x!vector_type_C>
   call @print_memref_vector_4x4xf32(%CC): (memref<?x?x!vector_type_C>) -> ()
 
-  dealloc %C : !matrix_type_CC
+  memref.dealloc %C : !matrix_type_CC
   return
 }
 
diff --git a/mlir/test/mlir-opt/commandline.mlir b/mlir/test/mlir-opt/commandline.mlir
--- a/mlir/test/mlir-opt/commandline.mlir
+++ b/mlir/test/mlir-opt/commandline.mlir
@@ -13,6 +13,7 @@
 // CHECK-NEXT: llvm
 // CHECK-NEXT: llvm_arm_sve
 // CHECK-NEXT: math
+// CHECK-NEXT: memref
 // CHECK-NEXT: nvvm
 // CHECK-NEXT: omp
 // CHECK-NEXT: pdl
diff --git a/mlir/test/mlir-reduce/multiple-function.mlir b/mlir/test/mlir-reduce/multiple-function.mlir
--- a/mlir/test/mlir-reduce/multiple-function.mlir
+++ b/mlir/test/mlir-reduce/multiple-function.mlir
@@ -1,7 +1,7 @@
 // UNSUPPORTED: system-windows
 // RUN: mlir-reduce %s -test %S/failure-test.sh -pass-test function-reducer | FileCheck %s
-// This input should be reduced by the pass pipeline so that only 
-// the @simple5 function remains as this is the shortest function 
+// This input should be reduced by the pass pipeline so that only
+// the @simple5 function remains as this is the shortest function
 // containing the interesting behavior.
 
 // CHECK-NOT: func @simple1() {
@@ -26,7 +26,7 @@
 ^bb1:
   br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   "test.crashOp"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
diff --git a/mlir/test/mlir-reduce/simple-test.mlir b/mlir/test/mlir-reduce/simple-test.mlir
--- a/mlir/test/mlir-reduce/simple-test.mlir
+++ b/mlir/test/mlir-reduce/simple-test.mlir
@@ -6,7 +6,7 @@
 ^bb1:
   br ^bb3(%arg1 : memref<2xf32>)
 ^bb2:
-  %0 = alloc() : memref<2xf32>
+  %0 = memref.alloc() : memref<2xf32>
   br ^bb3(%0 : memref<2xf32>)
 ^bb3(%1: memref<2xf32>):
   return
diff --git a/mlir/unittests/ExecutionEngine/Invoke.cpp b/mlir/unittests/ExecutionEngine/Invoke.cpp
--- a/mlir/unittests/ExecutionEngine/Invoke.cpp
+++ b/mlir/unittests/ExecutionEngine/Invoke.cpp
@@ -104,7 +104,7 @@
   std::string moduleStr = R"mlir(
   func @zero_ranked(%arg0 : memref<f32>) attributes { llvm.emit_c_interface } {
     %cst42 = constant 42.0 : f32
-    store %cst42, %arg0[] : memref<f32>
+    memref.store %cst42, %arg0[] : memref<f32>
     return
   }
   )mlir";
@@ -139,7 +139,7 @@
   func @one_ranked(%arg0 : memref<?xf32>) attributes { llvm.emit_c_interface } {
     %cst42 = constant 42.0 : f32
     %cst5 = constant 5 : index
-    store %cst42, %arg0[%cst5] : memref<?xf32>
+    memref.store %cst42, %arg0[%cst5] : memref<?xf32>
     return
   }
   )mlir";
@@ -192,8 +192,8 @@
     %x = constant 2 : index
     %y = constant 1 : index
     %cst42 = constant 42.0 : f32
-    store %cst42, %arg0[%y, %x] : memref<?x?xf32>
-    store %cst42, %arg1[%x, %y] : memref<?x?xf32>
+    memref.store %cst42, %arg0[%y, %x] : memref<?x?xf32>
+    memref.store %cst42, %arg1[%x, %y] : memref<?x?xf32>
     return
   }
   )mlir";
@@ -234,7 +234,7 @@
   std::string moduleStr = R"mlir(
   func private @callback(%arg0: memref<?x?xf32>, %coefficient: i32)  attributes { llvm.emit_c_interface }
   func @caller_for_callback(%arg0: memref<?x?xf32>, %coefficient: i32) attributes { llvm.emit_c_interface } {
-    %unranked = memref_cast %arg0: memref<?x?xf32> to memref<*xf32>
+    %unranked = memref.cast %arg0: memref<?x?xf32> to memref<*xf32>
     call @callback(%arg0, %coefficient) : (memref<?x?xf32>, i32) -> ()
     return
   }