diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -367,7 +367,8 @@
   let dependentDialects = [
     "memref::MemRefDialect",
     "StandardOpsDialect",
-    "scf::SCFDialect"
+    "scf::SCFDialect",
+    "tensor::TensorDialect"
   ];
 }
 
@@ -504,7 +505,7 @@
 
 def TosaToStandard : Pass<"tosa-to-standard"> {
   let summary = "Lower TOSA to the Standard dialect";
-  let dependentDialects = ["StandardOpsDialect"];
+  let dependentDialects = ["StandardOpsDialect", "tensor::TensorDialect"];
   let description = [{
     Pass that converts TOSA operations to the equivalent operations using the
     operations in the Standard dialect.
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -579,12 +579,11 @@
 
     Tensor-based version:
 
-    The body region of the loop contains `subtensor` operations applied to
+    The body region of the loop contains `extract_slice` operations applied to
     every tensor argument of TiledLoopOp.
 
     The body region must contain exactly one block that terminates with
-    `linalg.yield` with the operands resulting from `subtensor_insert`
-    operations.
+    `linalg.yield` with the operands resulting from `insert_slice` operations.
 
     Example:
 
@@ -594,16 +593,16 @@
         outs(%out : tensor<24x64xi8>)
         iterators("parallel")
         distribution("block_x") {
-      %lhs_sub = subtensor %lhs[%i, 0] [%c4, %c64] [1, 1]
+      %lhs_sub = tensor.extract_slice %lhs[%i, 0] [%c4, %c64] [1, 1]
           : tensor<24x64xi8> to tensor<?x?xi8>
-      %rhs_sub = subtensor %rhs[%i, 0] [%c4, %c64] [1, 1]
+      %rhs_sub = tensor.extract_slice %rhs[%i, 0] [%c4, %c64] [1, 1]
           : tensor<24x64xi8> to tensor<?x?xi8>
-      %out_sub = subtensor %out[%i, 0] [%c4, %c64] [1, 1]
+      %out_sub = tensor.extract_slice %out[%i, 0] [%c4, %c64] [1, 1]
           : tensor<24x64xi8> to tensor<?x?xi8>
 
       %result_sub = linalg.generic ...
 
-      %result = subtensor_insert %result_sub into %out[%i, 0][%c4, %c64][1, 1]
+      %result = tensor.insert_slice %result_sub into %out[%i, 0][%c4, %c64][1, 1]
         : tensor<?x?xi8> into tensor<24x64xi8>
       linalg.yield %result : tensor<24x64xi8>
     }
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
@@ -47,7 +47,7 @@
 /// If hoistPaddingOnTensors is called with `nLoops` = 2 on the following IR.
 /// ```
 ///    scf.for (%i, %j, %k)
-///      %st0 = subtensor f(%i, %k) : ... to tensor<?x?xf32>
+///      %st0 = tensor.extract_slice f(%i, %k) : ... to tensor<?x?xf32>
 ///      %0 = linalg.pad_tensor %st0 low[0, 0] high[...] {
 ///      ^bb0( ... ):
 ///        linalg.yield %pad
@@ -61,16 +61,17 @@
 ///    scf.for (%i) {
 ///      %packed_init = linalg.init_tensor range(%j) : tensor<?x4x8xf32>
 ///      %packed = scf.for (%k) iter_args(%p : %packed_init) {
-///        %st0 = subtensor f(%i, %k) : ... to tensor<?x?xf32>
+///        %st0 = tensor.extract_slice f(%i, %k) : ... to tensor<?x?xf32>
 ///        %0 = linalg.pad_tensor %st0 low[0, 0] high[...] {
 ///        ^bb0( ... ):
 ///          linalg.yield %pad
 ///        } : tensor<?x?xf32> to tensor<4x8xf32>
-///        %1 = subtensor_insert %0 ... : tensor<4x8xf32> to tensor<?x4x8xf32>
+///        %1 = tensor.insert_slice %0 ...
+///            : tensor<4x8xf32> to tensor<?x4x8xf32>
 ///        scf.yield %1: tensor<?x4x8xf32>
 ///      } -> tensor<?x4x8xf32>
 ///      scf.for (%j, %k) {
-///        %st0 = subtensor %packed [%k, 0, 0][1, 4, 8][1, 1, 1] :
+///        %st0 = tensor.extract_slice %packed [%k, 0, 0][1, 4, 8][1, 1, 1] :
 ///                 tensor<?x4x8xf32> to tensor<4x8xf32>
 ///        compute(%st0)
 ///      }
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -12,6 +12,7 @@
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/Utils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/IR/Identifier.h"
 #include "mlir/IR/PatternMatch.h"
@@ -1077,12 +1078,12 @@
     const FrozenRewritePatternSet &stage2Patterns,
     function_ref<LogicalResult(Operation *)> stage3Lambda = nullptr);
 
-/// Rewrite subtensor(pad_tensor(x)) into pad_tensor(subtensor(x)).
-struct SubTensorOfPadTensorSwapPattern
-    : public OpRewritePattern<SubTensorOp> {
-  using OpRewritePattern<SubTensorOp>::OpRewritePattern;
+/// Rewrite extract_slice(pad_tensor(x)) into pad_tensor(extract_slice(x)).
+struct ExtractSliceOfPadTensorSwapPattern
+    : public OpRewritePattern<tensor::ExtractSliceOp> {
+  using OpRewritePattern<tensor::ExtractSliceOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(SubTensorOp subTensorOp,
+  LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp,
                                 PatternRewriter &rewriter) const override;
 };
 
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -78,7 +78,7 @@
 bool isFusableInto(const LinalgDependenceGraph &graph, LinalgOp consumer,
                    Value consumedView, LinalgOp producer);
 
-/// Creates subtensor/subview ops for all `tiledOperands` of the given
+/// Creates extract_slice/subview ops for all `tiledOperands` of the given
 /// `linalgOp` with `builder`, assuming `linalgOp` is being fused into a loop
 /// nest for tiling with the given induction variables `ivs` and tile sizes
 /// `tileSizes`. `sizeBounds` are the iteration space bounds for *all* the
@@ -118,15 +118,17 @@
                                           const LinalgDependenceGraph &graph);
 /// Tensor counterpart of `fuseProducerOfBuffer`.
 /// This implements the fusion part of the "tileAndFuse on tensors"
-/// transformation and thus requires the `consumerOpOperand` to be a `subtensor`
-/// op (generally obtained by applying the tiling transformation).
+/// transformation and thus requires the `consumerOpOperand` to be a
+/// `extract_slice` op (generally obtained by applying the tiling
+/// transformation).
 Optional<FusionInfo> fuseProducerOfTensor(OpBuilder &b,
                                           OpOperand &consumerOpOperand);
 /// Tensor counterpart of `fuseProducerOfBuffer`.
 /// This implements the fusion part of the "tileAndFuse on tensors"
-/// transformation and thus requires the `consumerOpOperand` to be a `subtensor`
-/// op (generally obtained by applying the tiling transformation).
-/// Assumes `producerOfTensor` is a Linalg op that produces `consumerOpOperand`.
+/// transformation and thus requires the `consumerOpOperand` to be a
+/// `extract_slice` op (generally obtained by applying the tiling
+/// transformation). Assumes `producerOfTensor` is a Linalg op that produces
+/// `consumerOpOperand`.
 Optional<FusionInfo> fuseProducerOfTensor(OpBuilder &b,
                                           OpResult producerOpResult,
                                           OpOperand &consumerOpOperand);
diff --git a/mlir/include/mlir/Dialect/Shape/IR/Shape.h b/mlir/include/mlir/Dialect/Shape/IR/Shape.h
--- a/mlir/include/mlir/Dialect/Shape/IR/Shape.h
+++ b/mlir/include/mlir/Dialect/Shape/IR/Shape.h
@@ -14,6 +14,7 @@
 #ifndef MLIR_SHAPE_IR_SHAPE_H
 #define MLIR_SHAPE_IR_SHAPE_H
 
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/OpDefinition.h"
diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td
--- a/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td
+++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td
@@ -35,6 +35,7 @@
   }];
 
   let cppNamespace = "::mlir::shape";
+  let dependentDialects = ["tensor::TensorDialect"];
 
   let hasConstantMaterializer = 1;
   let hasOperationAttrVerify = 1;
diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h
@@ -23,7 +23,6 @@
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 #include "mlir/Interfaces/VectorInterfaces.h"
-#include "mlir/Interfaces/ViewLikeInterface.h"
 
 // Pull in all enum type definitions and utility function declarations.
 #include "mlir/Dialect/StandardOps/IR/OpsEnums.h.inc"
@@ -34,12 +33,6 @@
 class FuncOp;
 class OpBuilder;
 class PatternRewriter;
-
-/// Return the list of Range (i.e. offset, size, stride). Each Range
-/// entry contains either the dynamic value or a ConstantIndexOp constructed
-/// with `b` at location `loc`.
-SmallVector<Range, 8> getOrCreateRanges(OffsetSizeAndStrideOpInterface op,
-                                        OpBuilder &b, Location loc);
 } // namespace mlir
 
 #define GET_OP_CLASSES
diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@@ -21,7 +21,6 @@
 include "mlir/Interfaces/ControlFlowInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Interfaces/VectorInterfaces.td"
-include "mlir/Interfaces/ViewLikeInterface.td"
 
 def StandardOps_Dialect : Dialect {
   let name = "std";
@@ -1754,245 +1753,6 @@
   let hasCanonicalizer = 1;
 }
 
-//===----------------------------------------------------------------------===//
-// SubTensorOp
-//===----------------------------------------------------------------------===//
-
-def SubTensorOp : BaseOpWithOffsetSizesAndStrides<
-    StandardOps_Dialect, "subtensor", [NoSideEffect, AttrSizedOperandSegments,
-                                       OffsetSizeAndStrideOpInterface]> {
-  let summary = "subtensor operation";
-  let description = [{
-    The "subtensor" operation extract a tensor from another tensor as
-    specified by the operation's offsets, sizes and strides arguments.
-
-    The subtensor operation supports the following arguments:
-
-    * source: the "base" tensor from which to extract a subtensor.
-    * offsets: tensor-rank number of offsets into the "base" tensor from which
-               to extract the subtensor.
-    * sizes: tensor-rank number of sizes which specify the sizes of the result
-             tensor type.
-    * strides: tensor-rank number of strides specifying subsampling in each
-               dimension.
-
-    The representation based on offsets, sizes and strides support a
-    partially-static specification via attributes specified through the
-    `static_offsets`, `static_sizes` and `static_strides` arguments. A special
-    sentinel value ShapedType::kDynamicSize and
-    ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has
-    a dynamic value.
-
-    After buffer-allocation, the "subtensor" op is expected to lower into a
-    "subview" op.
-
-    A subtensor operation may additionally reduce the rank of the resulting
-    tensor by removing dimensions that are statically known to be of size 1.
-
-    Example:
-
-    ```
-    // Rank-reducing subtensor.
-    %1 = subtensor %0[0, 0, 0][1, 16, 4][1, 1, 1] :
-      tensor<8x16x4xf32> to tensor<16x4xf32>
-    %3 = subtensor %2[3, 4, 2][1, 6, 3][1, 1, 1] :
-      tensor<8x16x4xf32> to tensor<6x3xf32>
-    ```
-  }];
-
-  let arguments = (ins
-    AnyRankedTensor:$source,
-    Variadic<Index>:$offsets,
-    Variadic<Index>:$sizes,
-    Variadic<Index>:$strides,
-    I64ArrayAttr:$static_offsets,
-    I64ArrayAttr:$static_sizes,
-    I64ArrayAttr:$static_strides
-  );
-  let results = (outs AnyRankedTensor:$result);
-
-  let assemblyFormat = [{
-    $source ``
-    custom<OperandsOrIntegersOffsetsOrStridesList>($offsets, $static_offsets)
-    custom<OperandsOrIntegersSizesList>($sizes, $static_sizes)
-    custom<OperandsOrIntegersOffsetsOrStridesList>($strides, $static_strides)
-    attr-dict `:` type($source) `to` type($result)
-  }];
-
-  let builders = [
-    // Build a SubTensorOp with mixed static and dynamic entries and inferred
-    // result type.
-    OpBuilder<(ins "Value":$source, "ArrayRef<OpFoldResult>":$offsets,
-      "ArrayRef<OpFoldResult>":$sizes, "ArrayRef<OpFoldResult>":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a SubTensorOp with mixed static and dynamic entries and custom
-    // result type. If the type passed is nullptr, it is inferred.
-    OpBuilder<(ins "RankedTensorType":$resultType, "Value":$source,
-      "ArrayRef<OpFoldResult>":$offsets, "ArrayRef<OpFoldResult>":$sizes,
-      "ArrayRef<OpFoldResult>":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a SubTensorOp with dynamic entries and custom result type. If the
-    // type passed is nullptr, it is inferred.
-    OpBuilder<(ins "Value":$source, "ValueRange":$offsets,
-      "ValueRange":$sizes, "ValueRange":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a SubTensorOp with dynamic entries and inferred result type.
-    OpBuilder<(ins "RankedTensorType":$resultType, "Value":$source,
-      "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
-  ];
-
-  let extraClassDeclaration = extraBaseClassDeclaration # [{
-    /// Returns the type of the base tensor operand.
-    RankedTensorType getSourceType() {
-      return source().getType().cast<RankedTensorType>();
-    }
-
-    /// The result of a subtensor is always a tensor.
-    RankedTensorType getType() {
-      return getResult().getType().cast<RankedTensorType>();
-    }
-
-    /// A subtensor result type can be fully inferred from the source type and
-    /// the static representation of offsets, sizes and strides. Special
-    /// sentinels encode the dynamic case.
-    static Type inferResultType(RankedTensorType sourceRankedTensorType,
-                                ArrayRef<int64_t> staticOffsets,
-                                ArrayRef<int64_t> staticSizes,
-                                ArrayRef<int64_t> staticStrides);
-    static Type inferResultType(RankedTensorType sourceRankedTensorType,
-                                ArrayRef<OpFoldResult> staticOffsets,
-                                ArrayRef<OpFoldResult> staticSizes,
-                                ArrayRef<OpFoldResult> staticStrides);
-    static Type inferRankReducedResultType(unsigned resultRank,
-                                           RankedTensorType sourceRankedTensorType,
-                                           ArrayRef<int64_t> staticOffsets,
-                                           ArrayRef<int64_t> staticSizes,
-                                           ArrayRef<int64_t> staticStrides);
-    static Type inferRankReducedResultType(unsigned resultRank,
-                                           RankedTensorType sourceRankedTensorType,
-                                           ArrayRef<OpFoldResult> staticOffsets,
-                                           ArrayRef<OpFoldResult> staticSizes,
-                                           ArrayRef<OpFoldResult> staticStrides);
-
-    /// Return the expected rank of each of the`static_offsets`, `static_sizes`
-    /// and `static_strides` attributes.
-    std::array<unsigned, 3> getArrayAttrMaxRanks() {
-      unsigned rank = getSourceType().getRank();
-      return {rank, rank, rank};
-    }
-
-    /// Return the number of leading operands before the `offsets`, `sizes` and
-    /// and `strides` operands.
-    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
-  }];
-
-  let hasCanonicalizer = 1;
-  let hasFolder = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// SubTensorInsertOp
-//===----------------------------------------------------------------------===//
-
-def SubTensorInsertOp : BaseOpWithOffsetSizesAndStrides<
-    StandardOps_Dialect, "subtensor_insert",
-    [NoSideEffect, AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface,
-     TypesMatchWith<"expected result type to match dest type",
-                    "dest", "result", "$_self">]> {
-  let summary = "subtensor_insert operation";
-  let description = [{
-    The "subtensor_insert" operation insert a tensor `source` into another
-    tensor `dest` as specified by the operation's offsets, sizes and strides
-    arguments.
-
-    It returns a copy of `dest` with the proper subtensor updated with the value
-    of `source`.
-
-    The subtensor_insert operation has the encodes the following information:
-
-    * source: the tensor that is inserted.
-    * dest: the tensor into which the source tensor is inserted.
-    * offsets: tensor-rank number of offsets into the "base" tensor from which
-               to extract the subtensor.
-    * sizes: tensor-rank number of sizes which specify the sizes of the result
-             tensor type.
-    * strides: tensor-rank number of strides that specify subsampling in each
-               dimension.
-
-    The representation based on offsets, sizes and strides support a
-    partially-static specification via attributes specified through the
-    `static_offsets`, `static_sizes` and `static_strides` arguments. A special
-    sentinel value ShapedType::kDynamicSize and
-    ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has
-    a dynamic value.
-
-    After buffer-allocation, the "subtensor_insert" op is expected to become
-    an in-place buffer update.
-  }];
-
-  let arguments = (ins
-    AnyRankedTensor:$source,
-    AnyRankedTensor:$dest,
-    Variadic<Index>:$offsets,
-    Variadic<Index>:$sizes,
-    Variadic<Index>:$strides,
-    I64ArrayAttr:$static_offsets,
-    I64ArrayAttr:$static_sizes,
-    I64ArrayAttr:$static_strides
-  );
-  let results = (outs AnyRankedTensor:$result);
-
-  let assemblyFormat = [{
-    $source `into` $dest ``
-    custom<OperandsOrIntegersOffsetsOrStridesList>($offsets, $static_offsets)
-    custom<OperandsOrIntegersSizesList>($sizes, $static_sizes)
-    custom<OperandsOrIntegersOffsetsOrStridesList>($strides, $static_strides)
-    attr-dict `:` type($source) `into` type($dest)
-  }];
-
-  let verifier = ?;
-
-  let builders = [
-    // Build a SubTensorInsertOp with mixed static and dynamic entries.
-    OpBuilder<(ins "Value":$source, "Value":$dest,
-      "ArrayRef<OpFoldResult>":$offsets, "ArrayRef<OpFoldResult>":$sizes,
-      "ArrayRef<OpFoldResult>":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a SubTensorInsertOp with dynamic entries.
-    OpBuilder<(ins "Value":$source, "Value":$dest,
-      "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
-  ];
-
-  let extraClassDeclaration = extraBaseClassDeclaration # [{
-    /// Returns the type of the base tensor operand.
-    RankedTensorType getSourceType() {
-      return source().getType().cast<RankedTensorType>();
-    }
-
-    /// The result of a subtensor_insert is always a tensor.
-    RankedTensorType getType() {
-      return getResult().getType().cast<RankedTensorType>();
-    }
-
-    /// Return the expected rank of each of the`static_offsets`, `static_sizes`
-    /// and `static_strides` attributes.
-    std::array<unsigned, 3> getArrayAttrMaxRanks() {
-      unsigned rank = getType().getRank();
-      return {rank, rank, rank};
-    }
-
-    /// Return the number of leading operands before the `offsets`, `sizes` and
-    /// and `strides` operands.
-    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 2; }
-  }];
-
-  let hasCanonicalizer = 1;
-  let hasFolder = 1;
-}
-
-
 //===----------------------------------------------------------------------===//
 // SwitchOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h
--- a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h
+++ b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h
@@ -16,6 +16,21 @@
 #include "mlir/Interfaces/CastInterfaces.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "mlir/Interfaces/ViewLikeInterface.h"
+
+//===----------------------------------------------------------------------===//
+// Tensor Dialect Helpers
+//===----------------------------------------------------------------------===//
+
+namespace mlir {
+
+/// Return the list of Range (i.e. offset, size, stride). Each Range
+/// entry contains either the dynamic value or a ConstantIndexOp constructed
+/// with `b` at location `loc`.
+SmallVector<Range, 8> getOrCreateRanges(OffsetSizeAndStrideOpInterface op,
+                                        OpBuilder &b, Location loc);
+
+} // namespace mlir
 
 //===----------------------------------------------------------------------===//
 // Tensor Dialect
@@ -41,8 +56,8 @@
 /// source tensor. This is useful to fold a tensor.cast into a consuming op and
 /// implement canonicalization patterns for ops in different dialects that may
 /// consume the results of tensor.cast operations. Such foldable tensor.cast
-/// operations are typically inserted as `subtensor` ops and are canonicalized,
-/// to preserve the type compatibility of their uses.
+/// operations are typically inserted as `extract_slice` ops and are
+/// canonicalized, to preserve the type compatibility of their uses.
 ///
 /// Returns true when all conditions are met:
 /// 1. source and result are ranked tensors with same element type and rank.
@@ -64,7 +79,6 @@
 /// Performs folding of any operand of `op` if it comes from a tensor::CastOp
 /// that can be folded.
 LogicalResult foldTensorCast(Operation *op);
-
 } // namespace tensor
 } // namespace mlir
 
diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
--- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
+++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
@@ -13,6 +13,7 @@
 include "mlir/Interfaces/CastInterfaces.td"
 include "mlir/Interfaces/ControlFlowInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/ViewLikeInterface.td"
 
 class Tensor_Op<string mnemonic, list<OpTrait> traits = []>
     : Op<Tensor_Dialect, mnemonic, traits> {
@@ -99,6 +100,144 @@
   let hasFolder = 1;
 }
 
+
+//===----------------------------------------------------------------------===//
+// ExtractSliceOp
+//===----------------------------------------------------------------------===//
+
+def Tensor_ExtractSliceOp : BaseOpWithOffsetSizesAndStrides<
+    Tensor_Dialect, "extract_slice", [NoSideEffect, AttrSizedOperandSegments,
+                                      OffsetSizeAndStrideOpInterface]> {
+  let summary = "extract slice operation";
+  let description = [{
+    The "extract_slice" operation extract a tensor from another tensor as
+    specified by the operation's offsets, sizes and strides arguments.
+
+    The extract_slice operation supports the following arguments:
+
+    * source: the "base" tensor from which to extract a slice.
+    * offsets: tensor-rank number of offsets into the "base" tensor from which
+               to extract the slice.
+    * sizes: tensor-rank number of sizes which specify the sizes of the result
+             tensor type.
+    * strides: tensor-rank number of strides specifying subsampling in each
+               dimension.
+
+    The representation based on offsets, sizes and strides support a
+    partially-static specification via attributes specified through the
+    `static_offsets`, `static_sizes` and `static_strides` arguments. A special
+    sentinel value ShapedType::kDynamicSize and
+    ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has
+    a dynamic value.
+
+    After buffer-allocation, the "extract_slice" op is expected to lower into a
+    "subview" op.
+
+    An extract_slice operation may additionally reduce the rank of the resulting
+    tensor by removing dimensions that are statically known to be of size 1.
+
+    Example:
+
+    ```
+    // Rank-reducing extract_slice.
+    %1 = tensor.extract_slice %0[0, 0, 0][1, 16, 4][1, 1, 1] :
+      tensor<8x16x4xf32> to tensor<16x4xf32>
+    %3 = tensor.extract_slice %2[3, 4, 2][1, 6, 3][1, 1, 1] :
+      tensor<8x16x4xf32> to tensor<6x3xf32>
+    ```
+  }];
+
+  let arguments = (ins
+    AnyRankedTensor:$source,
+    Variadic<Index>:$offsets,
+    Variadic<Index>:$sizes,
+    Variadic<Index>:$strides,
+    I64ArrayAttr:$static_offsets,
+    I64ArrayAttr:$static_sizes,
+    I64ArrayAttr:$static_strides
+  );
+  let results = (outs AnyRankedTensor:$result);
+
+  let assemblyFormat = [{
+    $source ``
+    custom<OperandsOrIntegersOffsetsOrStridesList>($offsets, $static_offsets)
+    custom<OperandsOrIntegersSizesList>($sizes, $static_sizes)
+    custom<OperandsOrIntegersOffsetsOrStridesList>($strides, $static_strides)
+    attr-dict `:` type($source) `to` type($result)
+  }];
+
+  let builders = [
+    // Build an ExtractSliceOp with mixed static and dynamic entries and
+    // inferred result type.
+    OpBuilder<(ins "Value":$source, "ArrayRef<OpFoldResult>":$offsets,
+      "ArrayRef<OpFoldResult>":$sizes, "ArrayRef<OpFoldResult>":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build an ExtractSliceOp with mixed static and dynamic entries and custom
+    // result type. If the type passed is nullptr, it is inferred.
+    OpBuilder<(ins "RankedTensorType":$resultType, "Value":$source,
+      "ArrayRef<OpFoldResult>":$offsets, "ArrayRef<OpFoldResult>":$sizes,
+      "ArrayRef<OpFoldResult>":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build an ExtractSliceOp with dynamic entries and custom result type. If
+    // the type passed is nullptr, it is inferred.
+    OpBuilder<(ins "Value":$source, "ValueRange":$offsets,
+      "ValueRange":$sizes, "ValueRange":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build an ExtractSliceOp with dynamic entries and inferred result type.
+    OpBuilder<(ins "RankedTensorType":$resultType, "Value":$source,
+      "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
+  ];
+
+  let extraClassDeclaration = extraBaseClassDeclaration # [{
+    /// Returns the type of the base tensor operand.
+    RankedTensorType getSourceType() {
+      return source().getType().cast<RankedTensorType>();
+    }
+
+    /// The result of an extract_slice is always a tensor.
+    RankedTensorType getType() {
+      return getResult().getType().cast<RankedTensorType>();
+    }
+
+    /// An extract_slice result type can be fully inferred from the source type
+    /// and the static representation of offsets, sizes and strides. Special
+    /// sentinels encode the dynamic case.
+    static Type inferResultType(RankedTensorType sourceRankedTensorType,
+                                ArrayRef<int64_t> staticOffsets,
+                                ArrayRef<int64_t> staticSizes,
+                                ArrayRef<int64_t> staticStrides);
+    static Type inferResultType(RankedTensorType sourceRankedTensorType,
+                                ArrayRef<OpFoldResult> staticOffsets,
+                                ArrayRef<OpFoldResult> staticSizes,
+                                ArrayRef<OpFoldResult> staticStrides);
+    static Type inferRankReducedResultType(unsigned resultRank,
+                                           RankedTensorType sourceRankedTensorType,
+                                           ArrayRef<int64_t> staticOffsets,
+                                           ArrayRef<int64_t> staticSizes,
+                                           ArrayRef<int64_t> staticStrides);
+    static Type inferRankReducedResultType(unsigned resultRank,
+                                           RankedTensorType sourceRankedTensorType,
+                                           ArrayRef<OpFoldResult> staticOffsets,
+                                           ArrayRef<OpFoldResult> staticSizes,
+                                           ArrayRef<OpFoldResult> staticStrides);
+
+    /// Return the expected rank of each of the`static_offsets`, `static_sizes`
+    /// and `static_strides` attributes.
+    std::array<unsigned, 3> getArrayAttrMaxRanks() {
+      unsigned rank = getSourceType().getRank();
+      return {rank, rank, rank};
+    }
+
+    /// Return the number of leading operands before the `offsets`, `sizes` and
+    /// and `strides` operands.
+    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
+  }];
+
+  let hasCanonicalizer = 1;
+  let hasFolder = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // FromElementsOp
 //===----------------------------------------------------------------------===//
@@ -200,7 +339,7 @@
     The `tensor.insert` op writes a tensor into a tensor `dest`as specified by
     the operation's indices.
 
-    It returns a copy of `dest` with the proper subtensor updated with the value
+    It returns a copy of `dest` with the proper slice updated with the value
     of `scalar`.
 
     The arity of indices must match the rank of the tensor `dest` (i.e., if a
@@ -234,6 +373,107 @@
   let hasFolder = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// InsertSliceOp
+//===----------------------------------------------------------------------===//
+
+def Tensor_InsertSliceOp : BaseOpWithOffsetSizesAndStrides<
+    Tensor_Dialect, "insert_slice",
+    [NoSideEffect, AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface,
+     TypesMatchWith<"expected result type to match dest type",
+                    "dest", "result", "$_self">]> {
+  let summary = "insert_slice operation";
+  let description = [{
+    The "insert_slice" operation insert a tensor `source` into another
+    tensor `dest` as specified by the operation's offsets, sizes and strides
+    arguments.
+
+    It returns a copy of `dest` with the proper slice updated with the value
+    of `source`.
+
+    The insert_slice operation supports the following arguments:
+
+    * source: the tensor that is inserted.
+    * dest: the tensor into which the source tensor is inserted.
+    * offsets: tensor-rank number of offsets into the `dest` tensor into which
+               the slice is inserted.
+    * sizes: tensor-rank number of sizes which specify the sizes of the result
+             tensor type.
+    * strides: tensor-rank number of strides that specify subsampling in each
+               dimension.
+
+    The representation based on offsets, sizes and strides support a
+    partially-static specification via attributes specified through the
+    `static_offsets`, `static_sizes` and `static_strides` arguments. A special
+    sentinel value ShapedType::kDynamicSize and
+    ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has
+    a dynamic value.
+
+    After buffer-allocation, the "insert_slice" op is expected to become an
+    in-place buffer update.
+  }];
+
+  let arguments = (ins
+    AnyRankedTensor:$source,
+    AnyRankedTensor:$dest,
+    Variadic<Index>:$offsets,
+    Variadic<Index>:$sizes,
+    Variadic<Index>:$strides,
+    I64ArrayAttr:$static_offsets,
+    I64ArrayAttr:$static_sizes,
+    I64ArrayAttr:$static_strides
+  );
+  let results = (outs AnyRankedTensor:$result);
+
+  let assemblyFormat = [{
+    $source `into` $dest ``
+    custom<OperandsOrIntegersOffsetsOrStridesList>($offsets, $static_offsets)
+    custom<OperandsOrIntegersSizesList>($sizes, $static_sizes)
+    custom<OperandsOrIntegersOffsetsOrStridesList>($strides, $static_strides)
+    attr-dict `:` type($source) `into` type($dest)
+  }];
+
+  let verifier = ?;
+
+  let builders = [
+    // Build a InsertSliceOp with mixed static and dynamic entries.
+    OpBuilder<(ins "Value":$source, "Value":$dest,
+      "ArrayRef<OpFoldResult>":$offsets, "ArrayRef<OpFoldResult>":$sizes,
+      "ArrayRef<OpFoldResult>":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build a InsertSliceOp with dynamic entries.
+    OpBuilder<(ins "Value":$source, "Value":$dest,
+      "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
+  ];
+
+  let extraClassDeclaration = extraBaseClassDeclaration # [{
+    /// Returns the type of the base tensor operand.
+    RankedTensorType getSourceType() {
+      return source().getType().cast<RankedTensorType>();
+    }
+
+    /// The result of a insert_slice is always a tensor.
+    RankedTensorType getType() {
+      return getResult().getType().cast<RankedTensorType>();
+    }
+
+    /// Return the expected rank of each of the`static_offsets`, `static_sizes`
+    /// and `static_strides` attributes.
+    std::array<unsigned, 3> getArrayAttrMaxRanks() {
+      unsigned rank = getType().getRank();
+      return {rank, rank, rank};
+    }
+
+    /// Return the number of leading operands before the `offsets`, `sizes` and
+    /// and `strides` operands.
+    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 2; }
+  }];
+
+  let hasCanonicalizer = 1;
+  let hasFolder = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // ReshapeOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
--- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
+++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
@@ -1384,7 +1384,7 @@
   let summary = "Gather operation,";
 
   let description = [{
-    Generate a tensor for which each element in the output is a subtensor of the
+    Generate a tensor for which each element in the output is a slice of the
     values tensor based on the value of indices.
   }];
 
diff --git a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
--- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
+++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
@@ -627,10 +627,11 @@
   Value index = b.create<SelectOp>(indexIsNegative, add, originalIndex);
 
   Value one = b.create<ConstantIndexOp>(1);
-  Value head = b.create<SubTensorOp>(transformed.operand(), zero, index, one);
+  Value head =
+      b.create<tensor::ExtractSliceOp>(transformed.operand(), zero, index, one);
   Value tailSize = b.create<SubIOp>(rank, index);
-  Value tail =
-      b.create<SubTensorOp>(transformed.operand(), index, tailSize, one);
+  Value tail = b.create<tensor::ExtractSliceOp>(transformed.operand(), index,
+                                                tailSize, one);
   rewriter.replaceOp(op, {head, tail});
   return success();
 }
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
@@ -1741,8 +1741,8 @@
 
     for (auto arg : args) {
       sizes[axis] = rewriter.create<memref::DimOp>(loc, arg, axisValue);
-      result = rewriter.create<SubTensorInsertOp>(loc, arg, result, offsets,
-                                                  sizes, strides);
+      result = rewriter.create<tensor::InsertSliceOp>(loc, arg, result, offsets,
+                                                      sizes, strides);
       offsets[axis] = rewriter.create<AddIOp>(loc, offsets[axis], sizes[axis]);
     }
     rewriter.replaceOp(op, result);
diff --git a/mlir/lib/Conversion/TosaToStandard/TosaToStandard.cpp b/mlir/lib/Conversion/TosaToStandard/TosaToStandard.cpp
--- a/mlir/lib/Conversion/TosaToStandard/TosaToStandard.cpp
+++ b/mlir/lib/Conversion/TosaToStandard/TosaToStandard.cpp
@@ -12,6 +12,7 @@
 
 #include "mlir/Conversion/TosaToStandard/TosaToStandard.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Tosa/IR/TosaOps.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@@ -42,7 +43,7 @@
     SmallVector<int64_t> strides;
     strides.resize(sliceOp.getType().template cast<ShapedType>().getRank(), 1);
 
-    rewriter.replaceOpWithNewOp<SubTensorOp>(
+    rewriter.replaceOpWithNewOp<tensor::ExtractSliceOp>(
         sliceOp, sliceOp.getType(), input, ValueRange({}), ValueRange({}),
         ValueRange({}), sliceOp.start(), sliceOp.size(),
         rewriter.getI64ArrayAttr(strides));
diff --git a/mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp b/mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp
--- a/mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp
+++ b/mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp
@@ -35,6 +35,7 @@
     target.addIllegalOp<tosa::SliceOp>();
     target.addIllegalOp<tosa::ApplyScaleOp>();
     target.addLegalDialect<StandardOpsDialect>();
+    target.addLegalDialect<tensor::TensorDialect>();
 
     mlir::tosa::populateTosaToStandardConversionPatterns(&patterns);
     if (failed(applyPartialConversion(getOperation(), target,
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -16,6 +16,7 @@
 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/AffineExprVisitor.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/OpImplementation.h"
@@ -746,22 +747,23 @@
 
 namespace {
 /// Since `init_tensor` operation creates a tensor needed only for its shape, a
-/// subtensor of this is also needed only for its shape. The result can be
-/// replaced by a new init_tensor operation of the same size as the subtensor
-/// op.
-struct FoldInitTensorWithSubTensorOp : public OpRewritePattern<SubTensorOp> {
-  using OpRewritePattern<SubTensorOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(SubTensorOp subtensorOp,
+/// slice of this is also needed only for its shape. The result can be
+/// replaced by a new init_tensor operation of the same size as the extract
+/// slice op.
+struct FoldInitTensorWithExtractSliceOp
+    : public OpRewritePattern<tensor::ExtractSliceOp> {
+  using OpRewritePattern<tensor::ExtractSliceOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp,
                                 PatternRewriter &rewriter) const override {
-    if (!subtensorOp.source().getDefiningOp<linalg::InitTensorOp>())
+    if (!sliceOp.source().getDefiningOp<linalg::InitTensorOp>())
       return failure();
     rewriter.replaceOpWithNewOp<linalg::InitTensorOp>(
-        subtensorOp, subtensorOp.sizes(),
+        sliceOp, sliceOp.sizes(),
         llvm::to_vector<4>(llvm::map_range(
-            subtensorOp.static_sizes(),
+            sliceOp.static_sizes(),
             [](Attribute attr) { return attr.cast<IntegerAttr>().getInt(); })),
-        subtensorOp.getSourceType().getElementType());
+        sliceOp.getSourceType().getElementType());
     return success();
   }
 };
@@ -797,7 +799,7 @@
 
 void InitTensorOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                MLIRContext *context) {
-  results.add<FoldInitTensorWithSubTensorOp,
+  results.add<FoldInitTensorWithExtractSliceOp,
               FoldInitTensorWithTensorReshapeOp<TensorExpandShapeOp>,
               FoldInitTensorWithTensorReshapeOp<TensorCollapseShapeOp>,
               ReplaceStaticShapeDims>(context);
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
@@ -15,6 +15,7 @@
 #include "mlir/Dialect/Math/IR/Math.h"
 #include "mlir/Dialect/StandardOps/Transforms/Passes.h"
 #include "mlir/Dialect/StandardOps/Utils/Utils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/IR/BuiltinDialect.h"
 #include "mlir/IR/Operation.h"
@@ -232,8 +233,8 @@
   }
 };
 
-/// Convert `subtensor %t [offsets][sizes][strides] -> %st` to an alloc + copy
-/// pattern.
+/// Convert `extract_slice %t [offsets][sizes][strides] -> %st` to an
+/// alloc + copy pattern.
 /// ```
 ///   %a = alloc(sizes)
 ///   %sv = subview %source [offsets][sizes][strides]
@@ -242,21 +243,22 @@
 ///
 /// This pattern is arguable a std pattern once linalg::CopyOp becomes
 /// std::CopyOp.
-class SubTensorOpConverter : public OpConversionPattern<SubTensorOp> {
+class ExtractSliceOpConverter
+    : public OpConversionPattern<tensor::ExtractSliceOp> {
 public:
-  using OpConversionPattern<SubTensorOp>::OpConversionPattern;
+  using OpConversionPattern<tensor::ExtractSliceOp>::OpConversionPattern;
 
   LogicalResult
-  matchAndRewrite(SubTensorOp op, ArrayRef<Value> operands,
+  matchAndRewrite(tensor::ExtractSliceOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
-    SubTensorOpAdaptor adaptor(operands, op->getAttrDictionary());
+    tensor::ExtractSliceOpAdaptor adaptor(operands, op->getAttrDictionary());
     Value sourceMemref = adaptor.source();
     assert(sourceMemref.getType().isa<MemRefType>());
 
     MemRefType subviewMemRefType =
         getTypeConverter()->convertType(op.getType()).cast<MemRefType>();
     // op.sizes() capture exactly the dynamic alloc operands matching the
-    // subviewMemRefType thanks to subview/subtensor canonicalization and
+    // subviewMemRefType thanks to subview/slice canonicalization and
     // verification.
     Value alloc = rewriter.create<memref::AllocOp>(
         op.getLoc(), subviewMemRefType, op.sizes());
@@ -269,7 +271,7 @@
   }
 };
 
-/// Convert `subtensor_insert %source into %dest [offsets][sizes][strides] ->
+/// Convert `insert_slice %source into %dest [offsets][sizes][strides] ->
 /// %t` to an buffer_cast + subview + copy + tensor_load pattern.
 /// buffer_cast and tensor_load are inserted automatically by the
 /// conversion infra:
@@ -281,15 +283,15 @@
 ///
 /// This pattern is arguable a std pattern once linalg::CopyOp becomes
 /// std::CopyOp.
-class SubTensorInsertOpConverter
-    : public OpConversionPattern<SubTensorInsertOp> {
+class InsertSliceOpConverter
+    : public OpConversionPattern<tensor::InsertSliceOp> {
 public:
-  using OpConversionPattern<SubTensorInsertOp>::OpConversionPattern;
+  using OpConversionPattern<tensor::InsertSliceOp>::OpConversionPattern;
 
   LogicalResult
-  matchAndRewrite(SubTensorInsertOp op, ArrayRef<Value> operands,
+  matchAndRewrite(tensor::InsertSliceOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
-    SubTensorInsertOpAdaptor adaptor(operands, op->getAttrDictionary());
+    tensor::InsertSliceOpAdaptor adaptor(operands, op->getAttrDictionary());
     Value sourceMemRef = adaptor.source();
     assert(sourceMemRef.getType().isa<MemRefType>());
 
@@ -323,7 +325,8 @@
     // Mark all Standard operations legal.
     target.addLegalDialect<AffineDialect, math::MathDialect,
                            memref::MemRefDialect, StandardOpsDialect>();
-    target.addIllegalOp<InitTensorOp, SubTensorOp, SubTensorInsertOp>();
+    target.addIllegalOp<InitTensorOp, tensor::ExtractSliceOp,
+                        tensor::InsertSliceOp>();
 
     // Mark all Linalg operations illegal as long as they work on tensors.
     auto isLegalOperation = [&](Operation *op) {
@@ -355,8 +358,8 @@
       BufferizeInitTensorOp,
       BufferizeTensorReshapeOp<TensorExpandShapeOp>,
       BufferizeTensorReshapeOp<TensorCollapseShapeOp>,
-      SubTensorOpConverter,
-      SubTensorInsertOpConverter
+      ExtractSliceOpConverter,
+      InsertSliceOpConverter
     >(typeConverter, patterns.getContext());
   // clang-format on
 }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp
@@ -77,7 +77,7 @@
 //        out of the function at each call site.
 //
 //   iii. as an optimization over ii., it may be possible to reuse an argument
-//        and only want to return a subtensor.
+//        and only want to return a slice.
 //        This may forego allocation by letting *all* callers decide whether to
 //        pass a new *aliasing* memref function argument (i.e. a subview).
 //        Without loss of generality, callers may agree to allocate a new buffer
@@ -284,7 +284,7 @@
 //   5. Wheher an op bufferizes to a memory read.
 //   6. Wheher an op bufferizes to a memory write.
 // These interfaces are necessary to distinguish between various cases and allow
-// special inplace behavior for (SubTensorOp, SubTensorInsertOp) pairs.
+// special inplace behavior for (ExtractSliceOp, InsertSliceOp) pairs.
 //===----------------------------------------------------------------------===//
 
 /// Return `true` if the op is explicitly supported by bufferization or if it
@@ -295,8 +295,8 @@
       // clang-format off
       isa<LinalgOp,
           ReturnOp,
-          SubTensorOp,
-          SubTensorInsertOp,
+          ExtractSliceOp,
+          InsertSliceOp,
           VectorTransferOpInterface>(op)
       // clang-format on
       || (none_of(op->getResultTypes(),
@@ -339,8 +339,7 @@
 /// Return the OpResult that may bufferize into the same buffer as `opOperand`
 /// when the op is bufferized inplace.
 /// Return null if no such result exists.
-static OpResult getInplaceableOpResult(SubTensorInsertOp op,
-                                       OpOperand &opOperand) {
+static OpResult getInplaceableOpResult(InsertSliceOp op, OpOperand &opOperand) {
   if (opOperand.get() != op.dest())
     return OpResult();
   return op->getResult(0);
@@ -357,12 +356,12 @@
         // Ops that perform destructive updates on operand(s) to produce
         // result(s).
         .Case<LinalgOp,
-              SubTensorInsertOp,
+              InsertSliceOp,
               VectorTransferOpInterface>(
             [&](auto op) { return getInplaceableOpResult(op, opOperand); })
-        // SubTensorOp is special, when bufferized inplace it just returns an
+        // ExtractSliceOp is special, when bufferized inplace it just returns an
         // alias to its operand. Its result is never inplaceable on its operand.
-        .Case([&](SubTensorOp op) { return OpResult(); })
+        .Case([&](ExtractSliceOp op) { return OpResult(); })
         // Other ops.
         .Default([&](Operation *op) { return OpResult(); });
   // clang-format on
@@ -380,10 +379,10 @@
   return TypeSwitch<Operation *, OpResult>(opOperand.getOwner())
       // ReturnOp has no result.
       .Case([&](ReturnOp op) { return OpResult(); })
-      // SubTensorOp is different: its result is not inplaceable on op.source
+      // ExtractSliceOp is different: its result is not inplaceable on op.source
       // but when bufferized inplace, the result is an aliasing subregion of
       // op.source.
-      .Case([&](SubTensorOp op) { return op->getResult(0); })
+      .Case([&](ExtractSliceOp op) { return op->getResult(0); })
       .Default(
           [&](Operation *op) { return getInplaceableOpResult(opOperand); });
 }
@@ -395,8 +394,9 @@
   // it. Conservatively return true.
   if (!maybeOpResult)
     return true;
-  // SubTensorOp alone doesn't bufferize to a memory read, one of its uses may.
-  if (isa<SubTensorOp>(opOperand.getOwner()))
+  // ExtractSliceOp alone doesn't bufferize to a memory read, one of its uses
+  // may.
+  if (isa<ExtractSliceOp>(opOperand.getOwner()))
     return false;
   if (auto linalgOp = dyn_cast<LinalgOp>(opOperand.getOwner()))
     return linalgOp.isInputTensor(&opOperand) ||
@@ -425,8 +425,9 @@
   // A ReturnOp is not a write.
   if (isa<ReturnOp>(opOperand.getOwner()))
     return false;
-  // SubTensorOp alone doesn't bufferize to a memory write, one of its uses may.
-  if (maybeOpResult->getDefiningOp<SubTensorOp>())
+  // ExtractSliceOp alone doesn't bufferize to a memory write, one of its uses
+  // may.
+  if (maybeOpResult->getDefiningOp<ExtractSliceOp>())
     return false;
   // If we have a matching OpResult, this is a write.
   // Additionally allow to restrict to only inPlace write, if so specified.
@@ -442,10 +443,10 @@
 
 /// The BufferizationAliasInfo class maintains a list of buffer aliases and
 /// equivalence classes to support bufferization.
-/// SubTensorOps have special behavior, they act as a level of indirection for
-/// bufferization. They don't create reads or writes themselves and analysis
+/// ExtractSliceOps have special behavior, they act as a level of indirection
+/// for bufferization. They don't create reads or writes themselves and analysis
 /// needs to look through their uses.
-/// SubTensorOp + SubTensorInsertOp have special joint behavior: they may
+/// ExtractSliceOp + InsertSliceOp have special joint behavior: they may
 /// bufferize to the same buffer (i.e. subview), which is what introduces the
 /// need for bufferization classes.
 /// Some of these functionalities could be refactored in a Bufferizer class that
@@ -469,7 +470,7 @@
 
   /// Return true if the buffer to which `operand` would bufferize is equivalent
   /// to some use that would bufferize to a write to a buffer.
-  bool aliasesInPlaceWrite(SubTensorOp subTensorOp) const;
+  bool aliasesInPlaceWrite(ExtractSliceOp extractSliceOp) const;
 
   /// Merge result's and operand's aliasing sets and iterate to a fixed point.
   void bufferizeInPlace(OpResult result, OpOperand &operand,
@@ -495,10 +496,10 @@
   bool existsNonDominatingRead(OpOperand &opOperand,
                                const DominanceInfo &domInfo) const;
 
-  /// Return true if the source of a `subTensorInsertOp` bufferizes to an
-  /// equivalent SubTensorOp.
-  bool isSourceEquivalentToAMatchingSubTensorOp(
-      SubTensorInsertOp subTensorInsertOp) const;
+  /// Return true if the source of a `insertSliceOp` bufferizes to an
+  /// equivalent ExtractSliceOp.
+  bool isSourceEquivalentToAMatchingExtractSliceOp(
+      InsertSliceOp insertSliceOp) const;
 
   /// Print to `os`.
   void print(raw_ostream &os) const;
@@ -519,13 +520,13 @@
   /// Iteratively merge alias sets until a fixed-point.
   void mergeAliasesToFixedPoint();
 
-  /// Return true if the (SubTensorOp, SubTensorInsertOp) pair match (i.e.
+  /// Return true if the (ExtractSliceOp, InsertSliceOp) pair match (i.e.
   /// equivalent operand / result and same offset/sizes/strides specification).
   ///
   /// This is one particular type of relationship between ops on tensors that
   /// reduce to an equivalence on buffers. This should be generalized and
   /// exposed as interfaces on the proper types.
-  bool areEquivalentSubTensorOps(SubTensorOp st, SubTensorInsertOp sti) const;
+  bool areEquivalentExtractSliceOps(ExtractSliceOp st, InsertSliceOp sti) const;
 
   /// Return true if there is a `candidateOp` that would write to memory after
   /// bufferization and such that:
@@ -658,10 +659,10 @@
 /// Return true if the buffer to which `operand` would bufferize is equivalent
 /// to some use that would bufferize to a write to a buffer.
 bool BufferizationAliasInfo::aliasesInPlaceWrite(
-    SubTensorOp subTensorOp) const {
+    ExtractSliceOp extractSliceOp) const {
   LDBG("----Start aliasesInPlaceWrite\n");
-  LDBG("-------for op: " << *subTensorOp.getOperation() << '\n');
-  for (Value v : getAliasInfoRef(subTensorOp.result())) {
+  LDBG("-------for op: " << *extractSliceOp.getOperation() << '\n');
+  for (Value v : getAliasInfoRef(extractSliceOp.result())) {
     for (auto &use : v.getUses()) {
       if (bufferizesToMemoryWrite(use, InPlaceSpec::True)) {
         LDBG("-----------wants to bufferize to inPlace write: "
@@ -670,7 +671,7 @@
       }
     }
   }
-  LDBG("----------->subtensor does not alias an inplace write");
+  LDBG("----------->extract_slice does not alias an inplace write");
   return false;
 }
 
@@ -796,16 +797,16 @@
   return false;
 }
 
-/// Return true if the source of a `subTensorInsertOp` bufferizes to an
-/// equivalent SubTensorOp.
-bool BufferizationAliasInfo::isSourceEquivalentToAMatchingSubTensorOp(
-    SubTensorInsertOp subTensorInsertOp) const {
-  auto leaderIt = equivalentInfo.findLeader(subTensorInsertOp.source());
+/// Return true if the source of a `insertSliceOp` bufferizes to an
+/// equivalent ExtractSliceOp.
+bool BufferizationAliasInfo::isSourceEquivalentToAMatchingExtractSliceOp(
+    InsertSliceOp insertSliceOp) const {
+  auto leaderIt = equivalentInfo.findLeader(insertSliceOp.source());
   for (auto mit = leaderIt, meit = equivalentInfo.member_end(); mit != meit;
        ++mit) {
-    if (areEquivalentSubTensorOps(
-            dyn_cast_or_null<SubTensorOp>(mit->v.getDefiningOp()),
-            subTensorInsertOp))
+    if (areEquivalentExtractSliceOps(
+            dyn_cast_or_null<ExtractSliceOp>(mit->v.getDefiningOp()),
+            insertSliceOp))
       return true;
   }
   return false;
@@ -874,8 +875,8 @@
 /// This is one particular type of relationship between ops on tensors that
 /// reduce to an equivalence on buffers. This should be generalized and exposed
 /// as interfaces on the proper types.
-bool BufferizationAliasInfo::areEquivalentSubTensorOps(
-    SubTensorOp st, SubTensorInsertOp sti) const {
+bool BufferizationAliasInfo::areEquivalentExtractSliceOps(
+    ExtractSliceOp st, InsertSliceOp sti) const {
   if (!st || !sti)
     return false;
   if (!equivalentInfo.isEquivalent(st.source(), sti.dest()))
@@ -950,47 +951,47 @@
     return false;
   }
 
-  // The case `opToBufferize` isa SubTensorOp is important enough that we look
-  // for it specifically. The key information to discover is whether the
-  // aliasing read or write come from a matching SubTensorInsertOp.
+  // The case `opToBufferize` isa ExtractSliceOp is important enough that we
+  // look for it specifically. The key information to discover is whether the
+  // aliasing read or write come from a matching InsertSliceOp.
   // Such a pattern is introduced by tiling and is the key inplace condition
   // not to miss.
-  if (auto subTensorOp = dyn_cast<SubTensorOp>(opToBufferize)) {
-    if (auto subTensorInsertOp = dyn_cast<SubTensorInsertOp>(aliasingReadOp)) {
-      // %1 = subtensor %0[%offset_sizes_and_strides_1]
+  if (auto extractSliceOp = dyn_cast<ExtractSliceOp>(opToBufferize)) {
+    if (auto insertSliceOp = dyn_cast<InsertSliceOp>(aliasingReadOp)) {
+      // %1 = extract_slice %0[%offset_sizes_and_strides_1]
       //
       // ... // 0 or more of inplace compute that reduces to: %X is an
       //     // aliasingWrite equivalent to %1.
       // %W = inplace_write(%1)
       //
-      // // aliasingRead %Y in subtensor_insert
-      // ... = subtensor_insert %W into %R[%offset_sizes_and_strides_1]
-      if (aliasingRead.get() == subTensorInsertOp.dest() &&
+      // // aliasingRead %Y in insert_slice
+      // ... = insert_slice %W into %R[%offset_sizes_and_strides_1]
+      if (aliasingRead.get() == insertSliceOp.dest() &&
           // TODO: This is currently too restrictive and misses clobberings.
           // When available, use container-containee analysis: the condition
           // should be that the `aliasingWrite` is contained within
-          // `subTensorInsertOp.source()`.
+          // `insertSliceOp.source()`.
           equivalentInfo.isEquivalent(aliasingWrite.get(),
-                                      subTensorInsertOp.source()) &&
-          areEquivalentSubTensorOps(subTensorOp, subTensorInsertOp)) {
-        LDBG("---->clobbering matching subtensor/subtensor_insert\n");
+                                      insertSliceOp.source()) &&
+          areEquivalentExtractSliceOps(extractSliceOp, insertSliceOp)) {
+        LDBG("---->clobbering matching extract_slice/insert_slice\n");
         return true;
       }
-      // %1 = subtensor %0[%offset_sizes_and_strides_1]
+      // %1 = extract_slice %0[%offset_sizes_and_strides_1]
       //
       // ... // bunch of inplace ops that reduce to %X, equivalent to %1.
       // %X = inplace_write(%1)
       //
-      // // aliasingRead %X in subtensor_insert
-      // // aliasingWrite %Y in subtensor_insert
-      // ... = subtensor_insert %X into %Y[%offset_sizes_and_strides_1]
+      // // aliasingRead %X in insert_slice
+      // // aliasingWrite %Y in insert_slice
+      // ... = insert_slice %X into %Y[%offset_sizes_and_strides_1]
       if (aliasingReadOp == aliasingWriteOp) {
-        assert(aliasingRead.get() == subTensorInsertOp.source() &&
-               "expected read to source of subtensor_insert");
-        assert(aliasingWrite.get() == subTensorInsertOp.dest() &&
-               "expected write to dest of subtensor_insert");
-        if (areEquivalentSubTensorOps(subTensorOp, subTensorInsertOp)) {
-          LDBG("---->clobbering matching subtensor/subtensor_insert\n");
+        assert(aliasingRead.get() == insertSliceOp.source() &&
+               "expected read to source of insert_slice");
+        assert(aliasingWrite.get() == insertSliceOp.dest() &&
+               "expected write to dest of insert_slice");
+        if (areEquivalentExtractSliceOps(extractSliceOp, insertSliceOp)) {
+          LDBG("---->clobbering matching extract_slice/insert_slice\n");
           return true;
         }
       }
@@ -1262,114 +1263,114 @@
   return success();
 }
 
-/// Bufferize SubTensorOp to subview with optional alloc + copy depending on
+/// Bufferize ExtractSliceOp to subview with optional alloc + copy depending on
 /// whether or not it is marked inplaceable.
-/// Note that `getInplaceableOpResult` on a SubTensorOp always returns null.
-/// As consequence a SubTensorOp always alloc + copy when taken in
+/// Note that `getInplaceableOpResult` on a ExtractSliceOp always returns null.
+/// As consequence a ExtractSliceOp always alloc + copy when taken in
 /// isolation.
-static LogicalResult bufferize(OpBuilder &b, SubTensorOp subTensorOp,
+static LogicalResult bufferize(OpBuilder &b, ExtractSliceOp extractSliceOp,
                                BlockAndValueMapping &bvm,
                                const BufferizationAliasInfo &aliasInfo) {
-  LDBG("bufferize: " << *subTensorOp << '\n');
+  LDBG("bufferize: " << *extractSliceOp << '\n');
 
   // Take a guard before anything else.
   OpBuilder::InsertionGuard g(b);
-  b.setInsertionPoint(subTensorOp);
+  b.setInsertionPoint(extractSliceOp);
 
-  Location loc = subTensorOp.getLoc();
+  Location loc = extractSliceOp.getLoc();
   // Bail if source was not bufferized.
-  Value srcMemref = lookup(bvm, subTensorOp.source());
+  Value srcMemref = lookup(bvm, extractSliceOp.source());
   if (!srcMemref)
     return failure();
   auto srcMemrefType = srcMemref.getType().cast<MemRefType>();
-  auto dstTensorType = subTensorOp.result().getType().cast<RankedTensorType>();
+  auto dstTensorType =
+      extractSliceOp.result().getType().cast<RankedTensorType>();
 
   // If not inplaceable, alloc.
   Value alloc;
-  auto inPlace = getInPlace(subTensorOp->getResult(0));
+  auto inPlace = getInPlace(extractSliceOp->getResult(0));
   if (inPlace != InPlaceSpec::True) {
-    alloc =
-        createNewAllocDeallocPairForShapedValue(b, loc, subTensorOp.result());
+    alloc = createNewAllocDeallocPairForShapedValue(b, loc,
+                                                    extractSliceOp.result());
     b.setInsertionPointAfter(alloc.getDefiningOp());
   }
 
   // Bufferize to subview.
   auto subviewMemRefType =
       memref::SubViewOp::inferRankReducedResultType(
-          dstTensorType.getRank(), srcMemrefType, subTensorOp.getMixedOffsets(),
-          subTensorOp.getMixedSizes(), subTensorOp.getMixedStrides())
+          dstTensorType.getRank(), srcMemrefType,
+          extractSliceOp.getMixedOffsets(), extractSliceOp.getMixedSizes(),
+          extractSliceOp.getMixedStrides())
           .cast<MemRefType>();
   Value subView = b.create<memref::SubViewOp>(
-      loc, subviewMemRefType, srcMemref, subTensorOp.getMixedOffsets(),
-      subTensorOp.getMixedSizes(), subTensorOp.getMixedStrides());
+      loc, subviewMemRefType, srcMemref, extractSliceOp.getMixedOffsets(),
+      extractSliceOp.getMixedSizes(), extractSliceOp.getMixedStrides());
 
   /// If not inplaceable, copy.
   if (alloc) {
-    b.create<CopyOp>(subTensorOp.getLoc(), subView, alloc);
+    b.create<CopyOp>(extractSliceOp.getLoc(), subView, alloc);
     subView = alloc;
   }
 
-  map(bvm, subTensorOp.result(), subView);
+  map(bvm, extractSliceOp.result(), subView);
   return success();
 }
 
-static LogicalResult bufferize(OpBuilder &b,
-                               SubTensorInsertOp subTensorInsertOp,
+static LogicalResult bufferize(OpBuilder &b, InsertSliceOp insertSliceOp,
                                BlockAndValueMapping &bvm,
                                const BufferizationAliasInfo &aliasInfo) {
-  LDBG("bufferize: " << *subTensorInsertOp << '\n');
+  LDBG("bufferize: " << *insertSliceOp << '\n');
 
   // Take a guard before anything else.
   OpBuilder::InsertionGuard g(b);
-  b.setInsertionPoint(subTensorInsertOp);
-  Location loc = subTensorInsertOp.getLoc();
+  b.setInsertionPoint(insertSliceOp);
+  Location loc = insertSliceOp.getLoc();
 
-  Value dstMemref = lookup(bvm, subTensorInsertOp.dest());
+  Value dstMemref = lookup(bvm, insertSliceOp.dest());
   if (!dstMemref)
     return failure();
-  auto inPlace = getInPlace(subTensorInsertOp->getResult(0));
+  auto inPlace = getInPlace(insertSliceOp->getResult(0));
   if (inPlace != InPlaceSpec::True) {
-    // Since subtensor_insert arise from tiling and introducing loops, this
+    // Since insert_slice arise from tiling and introducing loops, this
     // case is generally a deal breaker. When used with loops, this ends up
     // cloning the whole tensor on every single iteration and is a symptom
     // of a catastrophically bad scheduling decision.
     // TODO: be very loud about it or even consider failing the pass.
-    Value newDstMemref = createNewAllocDeallocPairForShapedValue(
-        b, loc, subTensorInsertOp.result());
+    Value newDstMemref =
+        createNewAllocDeallocPairForShapedValue(b, loc, insertSliceOp.result());
     b.setInsertionPointAfter(newDstMemref.getDefiningOp());
-    b.create<CopyOp>(subTensorInsertOp.getLoc(), dstMemref, newDstMemref);
+    b.create<CopyOp>(insertSliceOp.getLoc(), dstMemref, newDstMemref);
     dstMemref = newDstMemref;
   }
   auto dstMemrefType = dstMemref.getType().cast<MemRefType>();
 
-  Value srcMemref = lookup(bvm, subTensorInsertOp.source());
+  Value srcMemref = lookup(bvm, insertSliceOp.source());
   if (!srcMemref)
     return failure();
   auto subviewMemRefType =
       memref::SubViewOp::inferRankReducedResultType(
-          subTensorInsertOp.getSourceType().getRank(), dstMemrefType,
-          subTensorInsertOp.getMixedOffsets(),
-          subTensorInsertOp.getMixedSizes(),
-          subTensorInsertOp.getMixedStrides())
+          insertSliceOp.getSourceType().getRank(), dstMemrefType,
+          insertSliceOp.getMixedOffsets(), insertSliceOp.getMixedSizes(),
+          insertSliceOp.getMixedStrides())
           .cast<MemRefType>();
 
   // A copy of the source buffer is needed if either:
   //   - The producer of `source` is not inplace. This is the case where a
-  //     subtensor is computed out of place into the inplace full tensor.
+  //     slice is computed out of place into the inplace full tensor.
   //   - The result is not inplace. This is the case where the whole tensor is
   //     cloned and the clone needs to be updated.
-  if (!aliasInfo.isSourceEquivalentToAMatchingSubTensorOp(subTensorInsertOp) ||
+  if (!aliasInfo.isSourceEquivalentToAMatchingExtractSliceOp(insertSliceOp) ||
       inPlace != InPlaceSpec::True) {
-    LDBG("subtensor_insert needs extra source copy: "
-         << subTensorInsertOp.source() << " -> copy\n");
+    LDBG("insert_slice needs extra source copy: " << insertSliceOp.source()
+                                                  << " -> copy\n");
     // Take a subview of the dst.
     Value subView = b.create<memref::SubViewOp>(
-        loc, subviewMemRefType, dstMemref, subTensorInsertOp.getMixedOffsets(),
-        subTensorInsertOp.getMixedSizes(), subTensorInsertOp.getMixedStrides());
-    b.create<CopyOp>(subTensorInsertOp.getLoc(), srcMemref, subView);
+        loc, subviewMemRefType, dstMemref, insertSliceOp.getMixedOffsets(),
+        insertSliceOp.getMixedSizes(), insertSliceOp.getMixedStrides());
+    b.create<CopyOp>(insertSliceOp.getLoc(), srcMemref, subView);
   }
 
-  map(bvm, subTensorInsertOp.result(), dstMemref);
+  map(bvm, insertSliceOp.result(), dstMemref);
 
   return success();
 }
@@ -1433,54 +1434,54 @@
 //===----------------------------------------------------------------------===//
 
 ///
-/// Rationale for bufferizing `%1 = subtensor %0[...]` inplace.
+/// Rationale for bufferizing `%1 = tensor.extract_slice %0[...]` inplace.
 /// ===========================================================
 ///
-/// When bufferized out of place, a SubTensorOp lowers to alloc + copy. This
+/// When bufferized out of place, a ExtractSlice lowers to alloc + copy. This
 /// cannot change the flow of information for either the source or the
 /// result buffers.
 ///
-/// When bufferized inplace, a SubTensorOp does not by itself create any read or
-/// write from memory. Instead, it has the effect of merging the alias sets of
-/// the source and the result buffers.
+/// When bufferized inplace, a ExtractSliceOp does not by itself create any read
+/// or write from memory. Instead, it has the effect of merging the alias sets
+/// of the source and the result buffers.
 ///
 /// An analysis is required to ensure inplace bufferization would not result in
 /// RaW dependence violations.
-static void bufferizableInPlaceAnalysis(SubTensorOp subTensorOp,
+static void bufferizableInPlaceAnalysis(ExtractSliceOp extractSliceOp,
                                         BufferizationAliasInfo &aliasInfo,
                                         const DominanceInfo &domInfo) {
   LDBG('\n');
-  LDBG("Try to bufferize subtensor inplace: " << *subTensorOp << '\n');
+  LDBG("Try to bufferize extract_slice inplace: " << *extractSliceOp << '\n');
 
-  // If `subTensorOp` were to be bufferized inplace, it cannot end up
+  // If `extractSliceOp` were to be bufferized inplace, it cannot end up
   // aliasing a write into a non-writeable buffer.
   bool wouldCreateAliasingWriteToNonWriteableBuffer =
-      aliasInfo.aliasesInPlaceWrite(subTensorOp) &&
-      aliasInfo.aliasesNonWriteableBuffer(subTensorOp->getOpOperand(0));
+      aliasInfo.aliasesInPlaceWrite(extractSliceOp) &&
+      aliasInfo.aliasesNonWriteableBuffer(extractSliceOp->getOpOperand(0));
 
   if (wouldCreateAliasingWriteToNonWriteableBuffer)
     LDBG("->the corresponding buffer is not writeable\n");
   LDBG("->bufferizes to writeable inplace buffer\n");
 
-  // In any of subTensorOp.result's aliases, can we find 2 such that we hit
+  // In any of extractSliceOp.result's aliases, can we find 2 such that we hit
   // an interfering write?
-  Value s = subTensorOp.source(), r = subTensorOp.result();
+  Value s = extractSliceOp.source(), r = extractSliceOp.result();
   bool foundInterference = wouldCreateAliasingWriteToNonWriteableBuffer ||
                            // Do not consider (s, s) and (r, r) as all the
                            // aliasings already exist by construction; we are
                            // interested in new interfering aliases only.
                            aliasInfo.wouldCreateReadAfterWriteInterference(
-                               s, r, subTensorOp, domInfo) ||
+                               s, r, extractSliceOp, domInfo) ||
                            aliasInfo.wouldCreateReadAfterWriteInterference(
-                               r, s, subTensorOp, domInfo);
+                               r, s, extractSliceOp, domInfo);
   if (foundInterference) {
-    setInPlaceOpResult(subTensorOp->getResult(0), InPlaceSpec::False);
+    setInPlaceOpResult(extractSliceOp->getResult(0), InPlaceSpec::False);
   } else {
-    setInPlaceOpResult(subTensorOp->getResult(0), InPlaceSpec::True);
-    aliasInfo.bufferizeInPlace(subTensorOp->getResult(0),
-                               subTensorOp->getOpOperand(0));
+    setInPlaceOpResult(extractSliceOp->getResult(0), InPlaceSpec::True);
+    aliasInfo.bufferizeInPlace(extractSliceOp->getResult(0),
+                               extractSliceOp->getOpOperand(0));
   }
-  LDBG("Done bufferizing subtensor\n");
+  LDBG("Done bufferizing extract_slice\n");
 }
 
 /// Analyze the (opOperand, result) pair to determine whether the result can
@@ -1490,8 +1491,8 @@
                                         BufferizationAliasInfo &aliasInfo,
                                         const DominanceInfo &domInfo) {
   Operation *op = result.getDefiningOp();
-  assert(result && !isa<SubTensorOp>(op) &&
-         "expected OpResult not coming from a SubTensorOp");
+  assert(result && !isa<ExtractSliceOp>(op) &&
+         "expected OpResult not coming from a ExtractSliceOp");
 
   int64_t resultNumber = result.getResultNumber();
   (void)resultNumber;
@@ -1541,48 +1542,47 @@
          "expected a funcOp definition with a body");
 
   // Collect ops so we can build our own traversal.
-  SmallVector<SubTensorOp> subTensorOps;
-  SmallVector<SubTensorInsertOp> subTensorInsertOps;
-  SmallVector<Operation *> nonSubTensorOps;
+  SmallVector<ExtractSliceOp> extractSliceOps;
+  SmallVector<InsertSliceOp> insertSliceOps;
+  SmallVector<Operation *> nonSliceOps;
   funcOp.walk([&](Operation *op) {
-    if (auto subTensorOp = dyn_cast<SubTensorOp>(op))
-      return subTensorOps.push_back(subTensorOp);
-    if (auto subTensorInsertOp = dyn_cast<SubTensorInsertOp>(op))
-      return subTensorInsertOps.push_back(subTensorInsertOp);
+    if (auto extractSliceOp = dyn_cast<ExtractSliceOp>(op))
+      return extractSliceOps.push_back(extractSliceOp);
+    if (auto insertSliceOp = dyn_cast<InsertSliceOp>(op))
+      return insertSliceOps.push_back(insertSliceOp);
     auto isaTensor = [](Type t) { return t.isa<TensorType>(); };
     // No tensors => no buffers.
     if (none_of(op->getOperandTypes(), isaTensor) &&
         none_of(op->getResultTypes(), isaTensor))
       return;
-    nonSubTensorOps.push_back(op);
+    nonSliceOps.push_back(op);
   });
 
-  // Bufferize SubTensorInsertOp greedily: we almost never want to bufferize
+  // Bufferize InsertSliceOp greedily: we almost never want to bufferize
   // the tensor "inserted into" to become out-of-place. This implementation
-  // does not distinguish between different SubTensorInsertOps. If we want
-  // finer-grained behavior, we could order the SubTensorInsertOps with some
-  // metric.
-  // Walk SubTensorInsertOps in reverse for better interference behavior.
-  for (SubTensorInsertOp subTensorInsertOp : reverse(subTensorInsertOps)) {
-    OpOperand &destOpOperand = subTensorInsertOp->getOpOperand(1);
+  // does not distinguish between different InsertSliceOp. If we want
+  // finer-grained behavior, we could order the InsertSliceOp with some metric.
+  // Walk InsertSliceOp in reverse for better interference behavior.
+  for (InsertSliceOp insertSliceOp : reverse(insertSliceOps)) {
+    OpOperand &destOpOperand = insertSliceOp->getOpOperand(1);
     bufferizableInPlaceAnalysis(destOpOperand,
                                 getInplaceableOpResult(destOpOperand),
                                 aliasInfo, domInfo);
   }
 
-  // Bufferize all ops except SubTensorOp and SubTensorInsertOp which are
-  // handled separately.
+  // Bufferize all ops except ExtractSliceOp and InsertSliceOp which are handled
+  // separately.
   // Walk other ops in reverse for better interference behavior.
-  for (Operation *op : reverse(nonSubTensorOps))
+  for (Operation *op : reverse(nonSliceOps))
     for (OpOperand &opOperand : op->getOpOperands())
       if (OpResult result = getInplaceableOpResult(opOperand))
         bufferizableInPlaceAnalysis(opOperand, result, aliasInfo, domInfo);
 
-  // Finally, bufferize SubTensorOp.
-  // Walk SubTensorOps in reverse for better clobbering behavior: it is easier
-  // to detect clobbers of smaller subtensors before larger ones.
-  for (SubTensorOp subTensorOp : reverse(subTensorOps))
-    bufferizableInPlaceAnalysis(subTensorOp, aliasInfo, domInfo);
+  // Finally, bufferize ExtractSliceOp.
+  // Walk ExtractSliceOps in reverse for better clobbering behavior: it is
+  // easier to detect clobbers of smaller slices before larger ones.
+  for (ExtractSliceOp extractSliceOp : reverse(extractSliceOps))
+    bufferizableInPlaceAnalysis(extractSliceOp, aliasInfo, domInfo);
 
   LDBG("End InPlaceAnalysisFuncOpInternals:\n" << funcOp << '\n');
 }
@@ -1611,8 +1611,8 @@
             .Case<memref::DimOp,
                   LinalgOp,
                   ReturnOp,
-                  SubTensorOp,
-                  SubTensorInsertOp,
+                  ExtractSliceOp,
+                  InsertSliceOp,
                   VectorTransferOpInterface>(
                 [&](auto op) { return bufferize(b, op, bvm, aliasInfo); })
             // clang-format on
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
@@ -18,6 +18,7 @@
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/BuiltinTypes.h"
@@ -457,8 +458,8 @@
 };
 } // namespace
 
-/// Get the reassociation maps to fold the result of a subtensor (or source of a
-/// subtensor_insert) operation with given offsets, and sizes to its
+/// Get the reassociation maps to fold the result of a extract_slice (or source
+/// of a insert_slice) operation with given offsets, and sizes to its
 /// rank-reduced version. This is only done for the cases where the size is 1
 /// and offset is 0. Strictly speaking the offset 0 is not required in general,
 /// but non-zero offsets are not handled by SPIR-V backend at this point (and
@@ -486,41 +487,41 @@
 }
 
 namespace {
-/// Convert `subtensor` operations to rank-reduced versions.
-struct UseRankReducedSubTensorOp : public OpRewritePattern<SubTensorOp> {
-  using OpRewritePattern<SubTensorOp>::OpRewritePattern;
+/// Convert `extract_slice` operations to rank-reduced versions.
+struct UseRankReducedExtractSliceOp
+    : public OpRewritePattern<tensor::ExtractSliceOp> {
+  using OpRewritePattern<tensor::ExtractSliceOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(SubTensorOp subTensorOp,
+  LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp,
                                 PatternRewriter &rewriter) const override {
-    RankedTensorType resultType = subTensorOp.getType();
-    SmallVector<OpFoldResult> offsets = subTensorOp.getMixedOffsets();
-    SmallVector<OpFoldResult> sizes = subTensorOp.getMixedSizes();
-    SmallVector<OpFoldResult> strides = subTensorOp.getMixedStrides();
+    RankedTensorType resultType = sliceOp.getType();
+    SmallVector<OpFoldResult> offsets = sliceOp.getMixedOffsets();
+    SmallVector<OpFoldResult> sizes = sliceOp.getMixedSizes();
+    SmallVector<OpFoldResult> strides = sliceOp.getMixedStrides();
     auto reassociation = getReassociationMapForFoldingUnitDims(sizes);
     if (!reassociation ||
         reassociation->size() == static_cast<size_t>(resultType.getRank()))
       return failure();
-    auto rankReducedType =
-        SubTensorOp::inferRankReducedResultType(reassociation->size(),
-                                                subTensorOp.getSourceType(),
-                                                offsets, sizes, strides)
-            .cast<RankedTensorType>();
-
-    Location loc = subTensorOp.getLoc();
-    Value newSubTensor = rewriter.create<SubTensorOp>(
-        loc, rankReducedType, subTensorOp.source(), offsets, sizes, strides);
-    rewriter.replaceOpWithNewOp<TensorExpandShapeOp>(
-        subTensorOp, resultType, newSubTensor, *reassociation);
+    auto rankReducedType = tensor::ExtractSliceOp::inferRankReducedResultType(
+                               reassociation->size(), sliceOp.getSourceType(),
+                               offsets, sizes, strides)
+                               .cast<RankedTensorType>();
+
+    Location loc = sliceOp.getLoc();
+    Value newSlice = rewriter.create<tensor::ExtractSliceOp>(
+        loc, rankReducedType, sliceOp.source(), offsets, sizes, strides);
+    rewriter.replaceOpWithNewOp<TensorExpandShapeOp>(sliceOp, resultType,
+                                                     newSlice, *reassociation);
     return success();
   }
 };
 
-/// Convert `subtensor_insert` operations to rank-reduced versions.
-struct UseRankReducedSubTensorInsertOp
-    : public OpRewritePattern<SubTensorInsertOp> {
-  using OpRewritePattern<SubTensorInsertOp>::OpRewritePattern;
+/// Convert `insert_slice` operations to rank-reduced versions.
+struct UseRankReducedInsertSliceOp
+    : public OpRewritePattern<tensor::InsertSliceOp> {
+  using OpRewritePattern<tensor::InsertSliceOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(SubTensorInsertOp insertOp,
+  LogicalResult matchAndRewrite(tensor::InsertSliceOp insertOp,
                                 PatternRewriter &rewriter) const override {
     RankedTensorType sourceType = insertOp.getSourceType();
     SmallVector<OpFoldResult> offsets = insertOp.getMixedOffsets();
@@ -533,7 +534,7 @@
     Location loc = insertOp.getLoc();
     auto reshapedSource = rewriter.create<TensorCollapseShapeOp>(
         loc, insertOp.source(), *reassociation);
-    rewriter.replaceOpWithNewOp<SubTensorInsertOp>(
+    rewriter.replaceOpWithNewOp<tensor::InsertSliceOp>(
         insertOp, reshapedSource, insertOp.dest(), insertOp.getMixedOffsets(),
         insertOp.getMixedSizes(), insertOp.getMixedStrides());
     return success();
@@ -546,8 +547,9 @@
 void mlir::linalg::populateFoldUnitExtentDimsPatterns(
     RewritePatternSet &patterns) {
   auto *context = patterns.getContext();
-  patterns.add<FoldUnitDimLoops, ReplaceUnitExtents, UseRankReducedSubTensorOp,
-               UseRankReducedSubTensorInsertOp>(context);
+  patterns.add<FoldUnitDimLoops, ReplaceUnitExtents,
+               UseRankReducedExtractSliceOp, UseRankReducedInsertSliceOp>(
+      context);
   TensorCollapseShapeOp::getCanonicalizationPatterns(patterns, context);
   TensorExpandShapeOp::getCanonicalizationPatterns(patterns, context);
 }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
@@ -48,8 +48,8 @@
 ///      are 2 cases:
 ///      a) buffer case: use the SSA value of the views and a simple alias
 ///         analysis on subview ops to determine producer-consumer dependences;
-///      b) tensor case: use SSA use-def chains on subtensor ops;
-///   2. greedily fuse the linalg ops that produce the subview/subtensor.
+///      b) tensor case: use SSA use-def chains on extract_slice ops;
+///   2. greedily fuse the linalg ops that produce the subview/extract_slice.
 ///   3. inspect the fused ops and determine whether they have other remaining
 ///      LinalgOp uses. If not, then erase the original producing linalg op.
 ///
@@ -73,13 +73,14 @@
   // Extract the subranges from the linearized ranges.
   for (OpOperand *opOperand : op.getInputAndOutputOperands()) {
     // The method `getRangeFromOperandShape` requires using SubViewOp or
-    // SubTensorOps. If the value isnt defined from there continue.
+    // ExtractSliceOps. If the value isn't defined from there continue.
     // todo: The method should be adapted to get the values from
     // `ViewInterface`. The interface needs a `getOrCreateRanges` method which
     // currently returns a `linalg.range`. The fix here is to move this op to
     // `std` dialect and add the method to `ViewInterface`.
-    if (fromSubViewOpOnly && !isa_and_nonnull<memref::SubViewOp, SubTensorOp>(
-                                 opOperand->get().getDefiningOp()))
+    if (fromSubViewOpOnly &&
+        !isa_and_nonnull<memref::SubViewOp, tensor::ExtractSliceOp>(
+            opOperand->get().getDefiningOp()))
       continue;
 
     AffineMap map = op.getTiedIndexingMap(opOperand);
@@ -221,7 +222,7 @@
     SmallVector<int64_t, 4> staticSizesVector(rank, ShapedType::kDynamicSize);
     SmallVector<int64_t, 4> staticStridesVector(
         rank, ShapedType::kDynamicStrideOrOffset);
-    resultTypes.push_back(SubTensorOp::inferResultType(
+    resultTypes.push_back(tensor::ExtractSliceOp::inferResultType(
         t.cast<RankedTensorType>(), staticOffsetsVector, staticSizesVector,
         staticStridesVector));
   }
@@ -252,15 +253,15 @@
 }
 
 /// Get the loop range for a dimension `dim` based on the `shapedOperand`. It is
-/// expected to be defined by a subview op or a subtensor op.
+/// expected to be defined by a subview op or an extract_slice op.
 static Range getRangeFromOperandShape(OpBuilder &b, Location loc,
                                       Value shapedOperand, unsigned dim) {
   Operation *shapeProducingOp = shapedOperand.getDefiningOp();
   if (auto subViewOp = dyn_cast<memref::SubViewOp>(shapeProducingOp))
     return subViewOp.getOrCreateRanges(b, loc)[dim];
-  if (auto subTensorOp = dyn_cast<SubTensorOp>(shapeProducingOp))
-    return subTensorOp.getOrCreateRanges(b, loc)[dim];
-  llvm_unreachable("SubviewOp or SubTensorOp expected");
+  if (auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(shapeProducingOp))
+    return sliceOp.getOrCreateRanges(b, loc)[dim];
+  llvm_unreachable("SubviewOp or ExtractSliceOp expected");
 }
 
 /// Fuses the producer into the loop immediately enclosing the consumer.
@@ -439,8 +440,8 @@
   if (!producerMap)
     return llvm::None;
 
-  // Must be a subview or a slice to guarantee there are loops we can fuse
-  // into.
+  // Must be a subview or an extract_slice to guarantee there are loops we can
+  // fuse into.
   auto subView = consumerOpOperand.get().getDefiningOp<memref::SubViewOp>();
   if (!subView) {
     LLVM_DEBUG(llvm::dbgs() << "\nNot fusable (not a subview)");
@@ -473,8 +474,8 @@
       opResult = tensor.cast<OpResult>();
       return;
     }
-    if (auto subTensorOp = tensor.getDefiningOp<SubTensorOp>()) {
-      tensor = subTensorOp.source();
+    if (auto sliceOp = tensor.getDefiningOp<tensor::ExtractSliceOp>()) {
+      tensor = sliceOp.source();
       continue;
     }
     if (auto blockArg = tensor.dyn_cast<BlockArgument>()) {
@@ -512,11 +513,11 @@
 
   Value inputTensor = consumerOpOperand.get();
 
-  // Must be a subtensor to guarantee there are loops we can fuse into.
-  auto subTensor = inputTensor.getDefiningOp<SubTensorOp>();
-  if (!subTensor) {
+  // Must be an extract_slice op to guarantee there are loops we can fuse into.
+  auto sliceOp = inputTensor.getDefiningOp<tensor::ExtractSliceOp>();
+  if (!sliceOp) {
     LLVM_DEBUG(llvm::dbgs()
-               << "\nNot fusable, not a subtensor: " << inputTensor);
+               << "\nNot fusable, not an extract_slice op: " << inputTensor);
     return {};
   }
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
@@ -19,6 +19,7 @@
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/SCF/Utils.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/Dialect/Vector/VectorUtils.h"
 #include "mlir/IR/BuiltinOps.h"
@@ -42,13 +43,13 @@
 /// instructions that need to be hoisted too.
 struct HoistableWrite {
   vector::TransferWriteOp transferWriteOp;
-  SubTensorInsertOp subTensorInsertOp;
+  tensor::InsertSliceOp insertSliceOp;
 };
 /// Represents a unit of hoistable TransferReadOp. This may comprise other
 /// instructions that need to be hoisted too.
 struct HoistableRead {
   vector::TransferReadOp transferReadOp;
-  SubTensorOp subTensorOp;
+  tensor::ExtractSliceOp extractSliceOp;
 };
 } // namespace
 
@@ -71,7 +72,8 @@
 }
 
 /// Return true is all offsets, sizes and strides are equal.
-static bool sameOffsetsSizesAndStrides(SubTensorOp s, SubTensorInsertOp si) {
+static bool sameOffsetsSizesAndStrides(tensor::ExtractSliceOp s,
+                                       tensor::InsertSliceOp si) {
   if (s.static_offsets().size() != si.static_offsets().size())
     return false;
   if (s.static_sizes().size() != si.static_sizes().size())
@@ -99,38 +101,37 @@
 
   LLVM_DEBUG(DBGS() << "findMatchingTransferRead for: "
                     << *write.transferWriteOp.getOperation() << "\n");
-  if (write.subTensorInsertOp)
-    LLVM_DEBUG(DBGS() << "findMatchingTransferRead subTensorInsertOp: "
-                      << *write.subTensorInsertOp.getOperation() << "\n");
+  if (write.insertSliceOp)
+    LLVM_DEBUG(DBGS() << "findMatchingTransferRead inserSliceOp: "
+                      << *write.insertSliceOp.getOperation() << "\n");
 
   for (Operation *user : srcTensor.getUsers()) {
     LLVM_DEBUG(DBGS() << "findMatchingTransferRead inspect user: " << *user
                       << "\n");
 
-    // If HoistableWrite involves a SubTensorInsertOp, we need to find a
-    // matching SubTensorOp.
-    SubTensorOp subTensorOp;
+    // If HoistableWrite involves a InsertSliceOp, we need to find a
+    // matching ExtractSliceOp.
+    tensor::ExtractSliceOp sliceOp;
     Operation *maybeTransferReadUser = user;
-    if (write.subTensorInsertOp) {
-      subTensorOp = dyn_cast<SubTensorOp>(user);
-      if (!subTensorOp || subTensorOp.getResult().getType() !=
-                              write.subTensorInsertOp.source().getType())
+    if (write.insertSliceOp) {
+      sliceOp = dyn_cast<tensor::ExtractSliceOp>(user);
+      if (!sliceOp || sliceOp.getResult().getType() !=
+                          write.insertSliceOp.source().getType())
         continue;
 
       LLVM_DEBUG(DBGS() << "check whether sameOffsetsSizesAndStrides: "
-                        << *subTensorOp << " vs " << *write.subTensorInsertOp
-                        << "\n");
-      if (!sameOffsetsSizesAndStrides(subTensorOp, write.subTensorInsertOp))
+                        << *sliceOp << " vs " << *write.insertSliceOp << "\n");
+      if (!sameOffsetsSizesAndStrides(sliceOp, write.insertSliceOp))
         continue;
 
       LLVM_DEBUG(DBGS() << "sameOffsetsSizesAndStrides: SUCCESS\n");
-      // If we got here, subTensorOp is hoistable iff it has exactly 2 uses:
+      // If we got here, sliceOp is hoistable iff it has exactly 2 uses:
       //   1. the transfer_write we want to hoist.
       //   2. a matching transfer_read.
       // Anything else, we skip.
       bool skip = false;
       Operation *otherUser = nullptr;
-      for (Operation *u : subTensorOp->getUsers()) {
+      for (Operation *u : sliceOp->getUsers()) {
         if (u == write.transferWriteOp)
           continue;
         if (otherUser) {
@@ -149,7 +150,7 @@
     auto read = dyn_cast<vector::TransferReadOp>(maybeTransferReadUser);
     if (read && read.indices() == write.transferWriteOp.indices() &&
         read.getVectorType() == write.transferWriteOp.getVectorType())
-      return HoistableRead{read, subTensorOp};
+      return HoistableRead{read, sliceOp};
   }
   return HoistableRead();
 }
@@ -168,13 +169,13 @@
       Operation *user = use.getOwner();
       // Skip the candidate use, only inspect the "other" uses.
       if (user == candidateRead.transferReadOp ||
-          user == candidateRead.subTensorOp || user == write.transferWriteOp ||
-          user == write.subTensorInsertOp)
+          user == candidateRead.extractSliceOp ||
+          user == write.transferWriteOp || user == write.insertSliceOp)
         continue;
-      // Consider all transitive uses through a subtensor / subtensor_insert.
+      // Consider all transitive uses through a extract_slice / insert_slice.
       // TODO: atm we just bail because a stronger analysis is needed for these
       // cases.
-      if (isa<SubTensorOp, SubTensorInsertOp>(user))
+      if (isa<tensor::ExtractSliceOp, tensor::InsertSliceOp>(user))
         return true;
       // Consider all transitive uses through a vector.transfer_write.
       if (auto writeUser = dyn_cast<vector::TransferWriteOp>(user)) {
@@ -214,7 +215,7 @@
 
 /// Return the `forOp`-invariant HoistableWrite that produces `yieldOperand`.
 /// Return the null HoistableWrite() if it is not comprised of a
-/// vector.transfer_write + optional subtensor_insert or if any of the indexings
+/// vector.transfer_write + optional insert_slice or if any of the indexings
 /// is `forOp`-dependent.
 static HoistableWrite
 getLoopInvariantTransferWriteOpDefining(scf::ForOp forOp,
@@ -229,26 +230,26 @@
     return HoistableWrite{write, nullptr};
   }
 
-  if (auto subTensorInsertOp = v.getDefiningOp<SubTensorInsertOp>()) {
-    // Inserted subTensor must come from vector.transfer_write.
+  if (auto insertSliceOp = v.getDefiningOp<tensor::InsertSliceOp>()) {
+    // Inserted slice must come from vector.transfer_write.
     auto write =
-        subTensorInsertOp.source().getDefiningOp<vector::TransferWriteOp>();
+        insertSliceOp.source().getDefiningOp<vector::TransferWriteOp>();
     if (!write)
       return HoistableWrite();
 
     // Tensor inserted into must be a BBArg at position matching yieldOperand's.
-    auto bbArg = subTensorInsertOp.dest().dyn_cast<BlockArgument>();
+    auto bbArg = insertSliceOp.dest().dyn_cast<BlockArgument>();
     if (!bbArg || bbArg.getOwner()->getParentOp() != forOp ||
         bbArg.getArgNumber() != /*num iv=*/1 + yieldOperand.getOperandNumber())
       return HoistableWrite();
 
     // Indexing inserted into must not depend on `forOp`.
-    for (Value operand : subTensorInsertOp->getOperands().drop_front(
-             SubTensorInsertOp::getOffsetSizeAndStrideStartOperandIndex()))
+    for (Value operand : insertSliceOp->getOperands().drop_front(
+             tensor::InsertSliceOp::getOffsetSizeAndStrideStartOperandIndex()))
       if (!forOp.isDefinedOutsideOfLoop(operand))
         return HoistableWrite();
 
-    return HoistableWrite{write, subTensorInsertOp};
+    return HoistableWrite{write, insertSliceOp};
   }
 
   return HoistableWrite();
@@ -260,18 +261,18 @@
   scf::ForOp forOp = cast<scf::ForOp>(tensorBBArg.getOwner()->getParentOp());
   assert(read.transferReadOp && write.transferWriteOp &&
          "expected transfer_read and transfer_write ops to be set");
-  assert(((read.subTensorOp && write.subTensorInsertOp) ||
-          (!read.subTensorOp && !write.subTensorInsertOp)) &&
-         "expected matching subtensor / subtensor_insert");
+  assert(((read.extractSliceOp && write.insertSliceOp) ||
+          (!read.extractSliceOp && !write.insertSliceOp)) &&
+         "expected matching extract_slice / insert_slice");
   LLVM_DEBUG(DBGS() << "In forOp:\n"
                     << *forOp.getOperation()
                     << "\nHoist: " << *read.transferReadOp.getOperation()
                     << "\nHoist: " << *write.transferWriteOp.getOperation()
                     << "\nInvolving: " << tensorBBArg << "\n");
 
-  // If a read subtensor is present, hoist it.
-  if (read.subTensorOp && failed(forOp.moveOutOfLoop({read.subTensorOp})))
-    llvm_unreachable("Unexpected failure moving subtensor out of loop");
+  // If a read slice is present, hoist it.
+  if (read.extractSliceOp && failed(forOp.moveOutOfLoop({read.extractSliceOp})))
+    llvm_unreachable("Unexpected failure moving extract_slice out of loop");
 
   // Hoist the transfer_read op.
   if (failed(forOp.moveOutOfLoop({read.transferReadOp})))
@@ -282,20 +283,20 @@
   unsigned initArgNumber = tensorBBArg.getArgNumber() - /*numIvs=*/1;
 
   // Update the source tensor.
-  if (read.subTensorOp)
-    read.subTensorOp.sourceMutable().assign(forOp.initArgs()[initArgNumber]);
+  if (read.extractSliceOp)
+    read.extractSliceOp.sourceMutable().assign(forOp.initArgs()[initArgNumber]);
   else
     read.transferReadOp.sourceMutable().assign(forOp.initArgs()[initArgNumber]);
 
   // Hoist write after.
-  if (write.subTensorInsertOp)
-    write.subTensorInsertOp->moveAfter(forOp);
+  if (write.insertSliceOp)
+    write.insertSliceOp->moveAfter(forOp);
   write.transferWriteOp->moveAfter(forOp);
 
   // Update the yield.
   auto yieldOp = cast<scf::YieldOp>(forOp.region().front().getTerminator());
-  if (write.subTensorInsertOp)
-    yieldOp->setOperand(initArgNumber, write.subTensorInsertOp.dest());
+  if (write.insertSliceOp)
+    yieldOp->setOperand(initArgNumber, write.insertSliceOp.dest());
   else
     yieldOp->setOperand(initArgNumber, write.transferWriteOp.source());
 
@@ -306,13 +307,13 @@
   // Transfer write has been hoisted, need to update the vector and tensor
   // source. Replace the result of the loop to use the new tensor created
   // outside the loop.
-  // Depending on whether a subtensor_insert is present or not, it carries the
+  // Depending on whether a insert_slice is present or not, it carries the
   // update on the tensor operands.
-  if (write.subTensorInsertOp) {
+  if (write.insertSliceOp) {
     newForOp.getResult(initArgNumber)
-        .replaceAllUsesWith(write.subTensorInsertOp.getResult());
-    write.transferWriteOp.sourceMutable().assign(read.subTensorOp.result());
-    write.subTensorInsertOp.destMutable().assign(read.subTensorOp.source());
+        .replaceAllUsesWith(write.insertSliceOp.getResult());
+    write.transferWriteOp.sourceMutable().assign(read.extractSliceOp.result());
+    write.insertSliceOp.destMutable().assign(read.extractSliceOp.source());
   } else {
     newForOp.getResult(initArgNumber)
         .replaceAllUsesWith(write.transferWriteOp.getResult(0));
@@ -350,9 +351,9 @@
         LLVM_DEBUG(dbgs() << "\n";
                    DBGS() << "Candidate write for hoisting: "
                           << *write.transferWriteOp.getOperation() << "\n");
-        if (write.subTensorInsertOp)
-          LLVM_DEBUG(DBGS() << "Candidate subtensor_insert for hoisting: "
-                            << *write.subTensorInsertOp.getOperation() << "\n");
+        if (write.insertSliceOp)
+          LLVM_DEBUG(DBGS() << "Candidate insert_slice for hoisting: "
+                            << *write.insertSliceOp.getOperation() << "\n");
         if (llvm::any_of(write.transferWriteOp.indices(),
                          [&forOp](Value index) {
                            return !forOp.isDefinedOutsideOfLoop(index);
@@ -788,8 +789,8 @@
   // The implementation proceeds in a stack-like fashion:
   //   1. Iteratively clone and step into the loops, pushing the `packedTensor`
   //      deeper in the stack.
-  //   2. Create a SubTensorInsert at the top of the stack.
-  //   3. Iteratively pop and yield the result of the SubTensorInsertOp across
+  //   2. Create a InsertSliceOp at the top of the stack.
+  //   3. Iteratively pop and yield the result of the InsertSliceOp across
   //     the cloned loops.
   SmallVector<Value> clonedLoopIvs, leadingPackedTensorIndexings;
   clonedLoopIvs.reserve(nLoops);
@@ -799,10 +800,10 @@
   backwardSlice.insert(padTensorOp);
   // Stack step 1. iteratively clone loops and push `packedTensor`.
   for (Operation *op : backwardSlice) {
-    // Specifically sit out in the subtenso(packedTensor) case: this is the
+    // Specifically sit out in the extract_slice(packedTensor) case: this is the
     // piece we seek to replace.
-    if (auto subTensor = dyn_cast<SubTensorOp>(op))
-      if (bvm.lookupOrDefault(subTensor.source()) == packedTensor)
+    if (auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(op))
+      if (bvm.lookupOrDefault(sliceOp.source()) == packedTensor)
         continue;
     auto effects = dyn_cast<MemoryEffectOpInterface>(op);
     bool hasNoEffects = !effects || effects.hasNoEffect();
@@ -839,7 +840,7 @@
     packedTensor = clonedForOp.getRegionIterArgs().front();
   }
 
-  // Stack step 2. create SubTensorInsertOp at the top of the stack.
+  // Stack step 2. create InsertSliceOp at the top of the stack.
   // offsets = [clonedLoopIvs, 0 .. 0].
   SmallVector<OpFoldResult> offsets(leadingPackedTensorIndexings.begin(),
                                     leadingPackedTensorIndexings.end());
@@ -856,8 +857,8 @@
   SmallVector<OpFoldResult> strides(nLoops + paddedRank, b.getIndexAttr(1));
 
   Value inserted =
-      b.create<SubTensorInsertOp>(loc, bvm.lookup(padTensorOp.result()),
-                                  packedTensor, offsets, sizes, strides);
+      b.create<tensor::InsertSliceOp>(loc, bvm.lookup(padTensorOp.result()),
+                                      packedTensor, offsets, sizes, strides);
 
   // Stack step 3. iteratively pop the stack and propagate the yield.
   Value valueToYield = inserted;
@@ -869,7 +870,7 @@
   }
 
   // Now the packed tensor is ready, replace the original padding op by a
-  // 1x..x1 SubTensor [originalLoopIvs, 0 .. 0][1 .. 1, paddedShape][1 .. 1].
+  // 1x..x1 slice [originalLoopIvs, 0 .. 0][1 .. 1, paddedShape][1 .. 1].
   b.setInsertionPoint(padTensorOp);
   SmallVector<Value> loopIterationCounts =
       llvm::to_vector<4>(llvm::map_range(packingLoops, [&](Operation *loop) {
@@ -888,8 +889,8 @@
   packedTensor =
       scf::getForInductionVarOwner(clonedLoopIvs.front())->getResult(0);
   padTensorOp.replaceAllUsesWith(
-      b.create<SubTensorOp>(loc, padTensorOp.getResultType(), packedTensor,
-                            offsets, sizes, strides)
+      b.create<tensor::ExtractSliceOp>(loc, padTensorOp.getResultType(),
+                                       packedTensor, offsets, sizes, strides)
           ->getResult(0));
 
   Operation *toErase = padTensorOp;
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -254,18 +254,18 @@
 
     res = op.clone(b, loc, resultTensorTypes, tiledOperands);
 
-    // Insert a subtensor_insert for each output tensor.
+    // Insert a insert_slice for each output tensor.
     unsigned resultIdx = 0;
     for (OpOperand *opOperand : op.getOutputTensorOperands()) {
       // TODO: use an interface/adaptor to avoid leaking position in
       // `tiledOperands`.
       Value outputTensor = tiledOperands[opOperand->getOperandNumber()];
-      if (auto subtensor = outputTensor.getDefiningOp<SubTensorOp>()) {
-        tensorResults.push_back(b.create<SubTensorInsertOp>(
-            loc, subtensor.source().getType(), res->getResult(resultIdx),
-            subtensor.source(), subtensor.offsets(), subtensor.sizes(),
-            subtensor.strides(), subtensor.static_offsets(),
-            subtensor.static_sizes(), subtensor.static_strides()));
+      if (auto sliceOp = outputTensor.getDefiningOp<tensor::ExtractSliceOp>()) {
+        tensorResults.push_back(b.create<tensor::InsertSliceOp>(
+            loc, sliceOp.source().getType(), res->getResult(resultIdx),
+            sliceOp.source(), sliceOp.offsets(), sliceOp.sizes(),
+            sliceOp.strides(), sliceOp.static_offsets(), sliceOp.static_sizes(),
+            sliceOp.static_strides()));
       } else {
         tensorResults.push_back(res->getResult(resultIdx));
       }
@@ -406,7 +406,7 @@
   scf::ForOp::getCanonicalizationPatterns(patterns, ctx);
   scf::ParallelOp::getCanonicalizationPatterns(patterns, ctx);
   ConstantIndexOp::getCanonicalizationPatterns(patterns, ctx);
-  SubTensorOp::getCanonicalizationPatterns(patterns, ctx);
+  tensor::ExtractSliceOp::getCanonicalizationPatterns(patterns, ctx);
   memref::SubViewOp::getCanonicalizationPatterns(patterns, ctx);
   tensor::CastOp::getCanonicalizationPatterns(patterns, ctx);
   memref::ViewOp::getCanonicalizationPatterns(patterns, ctx);
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -16,6 +16,7 @@
 #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Utils/StructuredOpsUtils.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/IR/AffineExpr.h"
@@ -128,14 +129,13 @@
   // Already static shape, no need to pad.
   if (llvm::none_of(opToPad.getShape(opOperand), ShapedType::isDynamic))
     return success();
-  auto subtensor = opOperand->get().getDefiningOp<SubTensorOp>();
-  // Not a subtensor, cannot construct a static bounding box.
-  if (!subtensor)
+  auto sliceOp = opOperand->get().getDefiningOp<tensor::ExtractSliceOp>();
+  // Not a slice op, cannot construct a static bounding box.
+  if (!sliceOp)
     return failure();
   SmallVector<int64_t> staticSizes;
   staticSizes.reserve(opToPad.getRank(opOperand));
-  auto shapedOp =
-      cast<OffsetSizeAndStrideOpInterface>(subtensor.getOperation());
+  auto shapedOp = cast<OffsetSizeAndStrideOpInterface>(sliceOp.getOperation());
   for (auto size : shapedOp.getMixedSizes()) {
     auto indexAttr = size.is<Attribute>()
                          ? size.get<Attribute>().dyn_cast<IntegerAttr>()
@@ -195,8 +195,8 @@
   linalg::LinalgOp paddedOp =
       opToPad.clone(rewriter, loc, resultTensorTypes, newOperands);
 
-  // Recover the subtensor out of the new static results. This keeps the
-  // original linalg op around because it uses the dims of the original results.
+  // Recover the slice out of the new static results. This keeps the original
+  // linalg op around because it uses the dims of the original results.
   // This later folds away.
   SmallVector<Value> paddedSubviewResults;
   paddedSubviewResults.reserve(opToPad->getNumResults());
@@ -211,7 +211,7 @@
           return dimOp.getResult();
         }));
     SmallVector<OpFoldResult> strides(rank, rewriter.getIndexAttr(1));
-    paddedSubviewResults.push_back(rewriter.create<SubTensorOp>(
+    paddedSubviewResults.push_back(rewriter.create<tensor::ExtractSliceOp>(
         loc, std::get<1>(it), offsets, sizes, strides));
   }
   // Replace the transient `opToPad` locally, except for uses that we just
@@ -679,7 +679,7 @@
       rewriter.create<linalg::FillOp>(loc, initTensor, padValue).result();
 
   // Copy original contents into new tensor
-  // Uses linalg.generic, but could be done with std.subtensor_insert
+  // Uses linalg.generic, but could be done with tensor.insert_slice
   SmallVector<AffineExpr, 4> outputExprs;
   for (unsigned i = 0; i < resultShapedType.getRank(); ++i) {
     outputExprs.push_back(getAffineDimExpr(i, rewriter.getContext()) +
@@ -719,13 +719,13 @@
   return val;
 }
 
-LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite(
-    SubTensorOp subTensorOp, PatternRewriter &rewriter) const {
-  auto padOp = subTensorOp.source().getDefiningOp<PadTensorOp>();
+LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite(
+    tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const {
+  auto padOp = sliceOp.source().getDefiningOp<PadTensorOp>();
   if (!padOp)
     return failure();
   // Only unit stride supported.
-  if (!subTensorOp.hasUnitStride())
+  if (!sliceOp.hasUnitStride())
     return failure();
   // Only constant padding value supported.
   Value padValue = padOp.getConstantPaddingValue();
@@ -734,7 +734,7 @@
 
   // Helper variables and functions for various arithmetic operations. These are
   // used extensively for computing new offset/length and padding values.
-  Location loc = subTensorOp.getLoc();
+  Location loc = sliceOp.getLoc();
   AffineExpr dim0, dim1;
   bindDims(rewriter.getContext(), dim0, dim1);
   // Add two integers.
@@ -786,8 +786,8 @@
   int64_t rank = padOp.getSourceType().getRank();
   for (unsigned dim = 0; dim < rank; ++dim) {
     auto low = asValue(rewriter, loc, padOp.getMixedLowPad()[dim]);
-    auto offset = asValue(rewriter, loc, subTensorOp.getMixedOffsets()[dim]);
-    auto length = asValue(rewriter, loc, subTensorOp.getMixedSizes()[dim]);
+    auto offset = asValue(rewriter, loc, sliceOp.getMixedOffsets()[dim]);
+    auto length = asValue(rewriter, loc, sliceOp.getMixedSizes()[dim]);
     auto srcSize = rewriter.createOrFold<memref::DimOp>(
         loc, padOp.source(), dim);
 
@@ -805,19 +805,19 @@
     //
     // The original read could also have started in the high padding zone.
     // In that case, set the offset to the end of source tensor. The new
-    // SubTensorOp length will be zero in that case. (Effectively reading no
+    // ExtractSliceOp length will be zero in that case. (Effectively reading no
     // data from the source.)
     Value newOffset = min(max(sub(offset, low), zero), srcSize);
     newOffsets.push_back(asOpFoldResult(rewriter, newOffset));
 
-    // The original SubTensorOp was reading until position `offset + length`.
+    // The original ExtractSliceOp was reading until position `offset + length`.
     // Therefore, the corresponding position within the source tensor is:
     //
     // offset + length - low
     //
-    // In case the original SubTensorOp stopped reading within the low padding
-    // zone, this value can be negative. In that case, the end position of the
-    // read should be zero. (Similar to newOffset.)
+    // In case the original ExtractSliceOp stopped reading within the low
+    // padding zone, this value can be negative. In that case, the end position
+    // of the read should be zero. (Similar to newOffset.)
     //
     // The original read could also have stopped in the high padding zone.
     // In that case, set the end positition of the read should be the end of the
@@ -825,7 +825,7 @@
     //
     // endLoc = min(max(offset - low + length, 0), srcSize)
     //
-    // The new SubTensorOp length is `endLoc - newOffset`.
+    // The new ExtractSliceOp length is `endLoc - newOffset`.
     Value endLoc = min(max(add(sub(offset, low), length), zero), srcSize);
     Value newLength = sub(endLoc, newOffset);
     newLengths.push_back(asOpFoldResult(rewriter, newLength));
@@ -842,7 +842,7 @@
     }
 
     // The amount of high padding is simply the number of elements remaining,
-    // so that the result has the same length as the original SubTensorOp.
+    // so that the result has the same length as the original ExtractSliceOp.
     Value newHigh = sub(sub(length, newLength), newLow);
     appendIndex(newHigh, newHighs, staticNewHighs);
 
@@ -852,22 +852,20 @@
 
   // Insert cast to ensure that types match. (May be folded away.)
   auto castResult = [&](Value val) -> Value {
-    auto castOp = rewriter.create<tensor::CastOp>(
-        loc, subTensorOp.getType(), val);
+    auto castOp = rewriter.create<tensor::CastOp>(loc, sliceOp.getType(), val);
     return castOp;
   };
 
   // In cases where the original data source is unused: Emit a GenerateOp and
-  // do not generate a SubTensorOp. (The result shape of the SubTensorOp would
+  // do not generate a SliceOp. (The result shape of the SliceOp would
   // have a dimension of size 0, the semantics of which is unclear.)
   auto createGenerateOp = [&]() {
-    // The shape of the GenerateOp is the same as the existing SubTensorOp.
-    RankedTensorType type = subTensorOp.getType();
+    // The shape of the GenerateOp is the same as the existing SliceOp.
+    RankedTensorType type = sliceOp.getType();
     SmallVector<Value> dynDims;
     for (unsigned i = 0; i < type.getRank(); ++i) {
       if (type.isDynamicDim(i))
-        dynDims.push_back(
-            asValue(rewriter, loc, subTensorOp.getMixedOffsets()[i]));
+        dynDims.push_back(asValue(rewriter, loc, sliceOp.getMixedOffsets()[i]));
     }
 
     // Create GenerateOp.
@@ -891,14 +889,14 @@
     return castResult(generateOp);
   };
 
-  // Emit a SubTensorOp and a PadTensorOp. Should not be used in cases where
-  // the result shape of the new SubTensorOp has a zero dimension.
+  // Emit a SliceOp and a PadTensorOp. Should not be used in cases where
+  // the result shape of the new SliceOp has a zero dimension.
   auto createPadTensorOfSubTensor = [&]() {
     // Create pad_tensor(subtensor(x)).
-    auto newSubTensorOp = rewriter.create<SubTensorOp>(
+    auto newSliceOp = rewriter.create<tensor::ExtractSliceOp>(
         loc, padOp.source(), newOffsets, newLengths, newStrides);
     auto newPadTensorOp = rewriter.create<PadTensorOp>(
-        loc, newSubTensorOp, staticNewLows, staticNewHighs, newLows, newHighs);
+        loc, newSliceOp, staticNewLows, staticNewHighs, newLows, newHighs);
 
     // Copy region to new PadTensorOp.
     BlockAndValueMapping bvm;
@@ -911,27 +909,29 @@
   // Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known
   // that the original data source x is not used.
   if (hasZeroLen) {
-    rewriter.replaceOp(subTensorOp, createGenerateOp());
+    rewriter.replaceOp(sliceOp, createGenerateOp());
     return success();
   }
 
   // If there are dynamic dimensions: Generate an scf.if check to avoid creating
-  // SubTensorOps with result dimensions of size 0 at runtime.
+  // SliceOps with result dimensions of size 0 at runtime.
   if (dynHasZeroLenCond) {
     auto result = rewriter.create<scf::IfOp>(
-        loc, subTensorOp.getType(), dynHasZeroLenCond,
-        /*thenBuilder=*/[&](OpBuilder &b, Location loc) {
+        loc, sliceOp.getType(), dynHasZeroLenCond,
+        /*thenBuilder=*/
+        [&](OpBuilder &b, Location loc) {
           b.create<scf::YieldOp>(loc, createGenerateOp());
         },
-        /*elseBuilder=*/[&](OpBuilder &b, Location loc) {
+        /*elseBuilder=*/
+        [&](OpBuilder &b, Location loc) {
           b.create<scf::YieldOp>(loc, createPadTensorOfSubTensor());
         });
-    rewriter.replaceOp(subTensorOp, result.getResult(0));
+    rewriter.replaceOp(sliceOp, result.getResult(0));
     return success();
   }
 
   // All shapes are static and the data source is actually used. Rewrite into
   // pad_tensor(subtensor(x)).
-  rewriter.replaceOp(subTensorOp, createPadTensorOfSubTensor());
+  rewriter.replaceOp(sliceOp, createPadTensorOfSubTensor());
   return success();
 }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -15,6 +15,7 @@
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Utils/StructuredOpsUtils.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/IR/AffineExpr.h"
@@ -677,9 +678,9 @@
 }
 
 /// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp and
-/// SubTensorInsertOp. For now, only constant padding values are supported.
+/// InsertSliceOp. For now, only constant padding values are supported.
 /// If there is enough static type information, TransferReadOps and
-/// TransferWriteOps may be generated instead of SubTensorInsertOps.
+/// TransferWriteOps may be generated instead of InsertSliceOps.
 struct GenericPadTensorOpVectorizationPattern
     : public OpRewritePattern<PadTensorOp> {
   using OpRewritePattern<PadTensorOp>::OpRewritePattern;
@@ -723,7 +724,7 @@
       return success();
 
     // Neither source type nor PadTensorOp result type have static shape. Such
-    // PadTensorOps cannot be vectorized. Generate a SubTensorInsertOp instead
+    // PadTensorOps cannot be vectorized. Generate a InsertSliceOp instead
     // for copying the PadOp source.
 
     auto sourceType = padOp.getSourceType();
@@ -737,10 +738,10 @@
         srcSizes.push_back(rewriter.getIndexAttr(sourceType.getDimSize(dim)));
       }
     }
-    // Strides of SubTensorInsertOp are all 1.
+    // Strides of InsertSliceOp are all 1.
     SmallVector<OpFoldResult> strides(sourceType.getRank(),
                                       rewriter.getIndexAttr(1));
-    rewriter.replaceOpWithNewOp<SubTensorInsertOp>(
+    rewriter.replaceOpWithNewOp<tensor::InsertSliceOp>(
         padOp, padOp.source(), fill, padOp.getMixedLowPad(), srcSizes, strides);
 
     return success();
@@ -913,27 +914,29 @@
 /// write. In such cases, the TransferWriteOp can write to the non-padded tensor
 /// value and apply out-of-bounds masking. E.g.:
 /// ```
-/// %0 = subtensor ...[...] [%s0, %s1] [1, 1] : tensor<...> to tensor<?x?xf32>
+/// %0 = tensor.extract_slice ...[...] [%s0, %s1] [1, 1]
+///     : tensor<...> to tensor<?x?xf32>
 /// %1 = linalg.pad_tensor %0 ... : tensor<?x?xf32> to tensor<17x5xf32>
 /// %2 = vector.transfer_write %vec, %1[...]
 ///     : vector<17x5xf32>, tensor<17x5xf32>
-/// %r = subtensor %2[0, 0] [%s0, %s1] [1, 1]
+/// %r = tensor.extract_slice %2[0, 0] [%s0, %s1] [1, 1]
 ///     : tensor<17x5xf32> to tensor<?x?xf32>
 /// ```
 /// is rewritten to:
 /// ```
-/// %0 = subtensor ...[...] [%s0, %s1] [1, 1] : tensor<...> to tensor<?x?xf32>
+/// %0 = tensor.extract_slice ...[...] [%s0, %s1] [1, 1]
+///     : tensor<...> to tensor<?x?xf32>
 /// %r = vector.transfer_write %vec, %0[...] : vector<17x5xf32>, tensor<?x?xf32>
 /// ```
-/// Note: It is important that the SubTensorOp %r resizes the result of the
+/// Note: It is important that the ExtractSliceOp %r resizes the result of the
 /// TransferWriteOp to the same size as the input of the TensorPadOp (or an even
 /// smaller size). Otherwise, %r's new (dynamic) dimensions would differ from
 /// %r's old dimensions.
 ///
 /// This rewrite is possible if:
 /// - Low padding is static 0.
-/// - `xferOp` has exactly one use, which is a SubTensorOp. This SubTensorOp
-///   trims the same amount of padding that was added beforehand.
+/// - `xferOp` has exactly one use, which is an ExtractSliceOp. This
+///   ExtractSliceOp trims the same amount of padding that was added beforehand.
 /// - Single, scalar padding value.
 struct PadTensorOpVectorizationWithTransferWritePattern
     : public VectorizePadTensorOpUserPattern<vector::TransferWriteOp> {
@@ -947,9 +950,9 @@
     // Pad value must be a constant.
     auto padValue = padOp.getConstantPaddingValue();
     if (!padValue) return failure();
-    // TransferWriteOp result must be directly consumed by a SubTensorOp.
+    // TransferWriteOp result must be directly consumed by an ExtractSliceOp.
     if (!xferOp->hasOneUse()) return failure();
-    auto trimPadding = dyn_cast<SubTensorOp>(*xferOp->user_begin());
+    auto trimPadding = dyn_cast<tensor::ExtractSliceOp>(*xferOp->user_begin());
     if (!trimPadding) return failure();
     // Only static zero offsets supported when trimming padding.
     if (!trimPadding.hasZeroOffset()) return failure();
@@ -976,7 +979,8 @@
   /// This is a conservative analysis. In case equal tensor sizes cannot be
   /// proven statically, this analysis returns `false` even though the tensor
   /// sizes may turn out to be equal at runtime.
-  bool hasSameTensorSize(Value beforePadding, SubTensorOp afterTrimming) const {
+  bool hasSameTensorSize(Value beforePadding,
+                         tensor::ExtractSliceOp afterTrimming) const {
     // If the input to PadTensorOp is a CastOp, try with with both CastOp result
     // and CastOp operand.
     if (auto castOp = beforePadding.getDefiningOp<tensor::CastOp>())
@@ -1002,21 +1006,22 @@
     if (t1.getNumDynamicDims() == 0) return true;
 
     // All dynamic sizes must be the same. The only supported case at the moment
-    // is when `beforePadding` is a SubTensorOp (or a cast thereof).
+    // is when `beforePadding` is an ExtractSliceOp (or a cast thereof).
 
-    // Apart from CastOp, only SubTensorOp is supported.
-    auto beforeSubtensor = beforePadding.getDefiningOp<SubTensorOp>();
-    if (!beforeSubtensor) return false;
+    // Apart from CastOp, only ExtractSliceOp is supported.
+    auto beforeSlice = beforePadding.getDefiningOp<tensor::ExtractSliceOp>();
+    if (!beforeSlice)
+      return false;
 
-    assert(static_cast<size_t>(t1.getRank())
-           == beforeSubtensor.getMixedSizes().size());
+    assert(static_cast<size_t>(t1.getRank()) ==
+           beforeSlice.getMixedSizes().size());
     assert(static_cast<size_t>(t2.getRank())
            == afterTrimming.getMixedSizes().size());
 
     for (unsigned i = 0; i < t1.getRank(); ++i) {
       // Skip static dimensions.
       if (!t1.isDynamicDim(i)) continue;
-      auto size1 = beforeSubtensor.getMixedSizes()[i];
+      auto size1 = beforeSlice.getMixedSizes()[i];
       auto size2 = afterTrimming.getMixedSizes()[i];
 
       // Case 1: Same value or same constant int.
@@ -1042,10 +1047,11 @@
   }
 };
 
-/// Rewrite use of PadTensorOp result in SubtensorInsertOp. E.g.:
+/// Rewrite use of PadTensorOp result in InsertSliceOp. E.g.:
 /// ```
 /// %0 = linalg.pad_tensor %src ... : tensor<?x?xf32> to tensor<17x5xf32>
-/// %r = subtensor_insert %0 into %dest[%a, %b, 0, 0] [1, 1, 17, 5] [1, 1, 1, 1]
+/// %r = tensor.insert_slice %0
+///     into %dest[%a, %b, 0, 0] [1, 1, 17, 5] [1, 1, 1, 1]
 ///     : tensor<17x5xf32> into tensor<?x?x17x5xf32>
 /// ```
 /// is rewritten to:
@@ -1063,13 +1069,13 @@
 ///   (Implies that sizes of `insertOp` are all static.)
 /// - Only unit strides in `insertOp`.
 /// - Single, scalar padding value.
-struct PadTensorOpVectorizationWithSubTensorInsertPattern
-    : public VectorizePadTensorOpUserPattern<SubTensorInsertOp> {
-  using VectorizePadTensorOpUserPattern<SubTensorInsertOp>
-      ::VectorizePadTensorOpUserPattern;
+struct PadTensorOpVectorizationWithInsertSlicePattern
+    : public VectorizePadTensorOpUserPattern<tensor::InsertSliceOp> {
+  using VectorizePadTensorOpUserPattern<
+      tensor::InsertSliceOp>::VectorizePadTensorOpUserPattern;
 
   LogicalResult rewriteUser(PatternRewriter &rewriter, PadTensorOp padOp,
-                            SubTensorInsertOp insertOp) const override {
+                            tensor::InsertSliceOp insertOp) const override {
     // Low padding must be static 0.
     if (!padOp.hasZeroLowPad()) return failure();
     // Only unit stride supported.
@@ -1103,8 +1109,8 @@
     auto read = rewriter.create<vector::TransferReadOp>(
         padOp.getLoc(), vecType, padOp.source(), readIndices, padValue);
 
-    // Generate TransferWriteOp: Write to SubTensorInsertOp's dest tensor at
-    // specified offsets. Write is fully in-bounds because a SubTensorInsertOp's
+    // Generate TransferWriteOp: Write to InsertSliceOp's dest tensor at
+    // specified offsets. Write is fully in-bounds because a InsertSliceOp's
     // source must fit into the destination at the specified offsets.
     auto writeIndices =
         ofrToIndexValues(rewriter, padOp.getLoc(), insertOp.getMixedOffsets());
@@ -1123,7 +1129,7 @@
   // Try these specialized patterns first before resorting to the generic one.
   patterns.add<PadTensorOpVectorizationWithTransferReadPattern,
                PadTensorOpVectorizationWithTransferWritePattern,
-               PadTensorOpVectorizationWithSubTensorInsertPattern>(
+               PadTensorOpVectorizationWithInsertSlicePattern>(
       patterns.getContext(), baseBenefit.getBenefit() + 1);
 }
 
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -19,6 +19,7 @@
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Utils/Utils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineExprVisitor.h"
 #include "mlir/IR/AffineMap.h"
@@ -556,7 +557,7 @@
     }
     LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n");
 
-    // Construct a new subview / subtensor for the tile.
+    // Construct a new subview / extract_slice for the tile.
     SmallVector<OpFoldResult, 4> offsets, sizes, strides;
     offsets.reserve(rank);
     sizes.reserve(rank);
@@ -585,7 +586,7 @@
       Value size = makeComposedAffineApply(b, loc, s0 + 1, closedIntSize);
       LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: raw size: " << size << "\n");
 
-      // The size of the subview / subtensor should be trimmed to avoid
+      // The size of the subview / extract_slice should be trimmed to avoid
       // out-of-bounds accesses, unless we statically know the subshape size
       // divides the shape size evenly.
       int64_t shapeSize = shape[r];
@@ -619,8 +620,8 @@
       tiledShapes.push_back(
           b.create<memref::SubViewOp>(loc, shapedOp, offsets, sizes, strides));
     else
-      tiledShapes.push_back(
-          b.create<SubTensorOp>(loc, shapedOp, offsets, sizes, strides));
+      tiledShapes.push_back(b.create<tensor::ExtractSliceOp>(
+          loc, shapedOp, offsets, sizes, strides));
   }
 
   return tiledShapes;
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -717,10 +717,10 @@
   // The size at the given index is now known to be a dynamic size.
   unsigned unsignedIndex = index.getValue().getZExtValue();
 
-  if (auto subtensor = dyn_cast_or_null<mlir::SubTensorOp>(definingOp)) {
-    assert(subtensor.isDynamicSize(unsignedIndex) &&
-           "Expected dynamic subtensor size");
-    return subtensor.getDynamicSize(unsignedIndex);
+  if (auto sliceOp = dyn_cast_or_null<tensor::ExtractSliceOp>(definingOp)) {
+    assert(sliceOp.isDynamicSize(unsignedIndex) &&
+           "Expected dynamic slice size");
+    return sliceOp.getDynamicSize(unsignedIndex);
   }
 
   // Fold dim to the size argument for an `AllocOp`, `ViewOp`, or `SubViewOp`.
@@ -1314,7 +1314,7 @@
 }
 
 // TODO: ponder whether we want to allow missing trailing sizes/strides that are
-// completed automatically, like we have for subview and subtensor.
+// completed automatically, like we have for subview and extract_slice.
 static LogicalResult verify(ReinterpretCastOp op) {
   // The source and result memrefs should be in the same memory space.
   auto srcType = op.source().getType().cast<BaseMemRefType>();
diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
--- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
+++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
@@ -10,7 +10,6 @@
 
 #include "mlir/Dialect/CommonFolders.h"
 #include "mlir/Dialect/StandardOps/Utils/Utils.h"
-#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/BlockAndValueMapping.h"
@@ -34,32 +33,6 @@
 
 using namespace mlir;
 
-/// Helper function to dispatch an OpFoldResult into either the `dynamicVec` if
-/// it is a Value or into `staticVec` if it is an IntegerAttr.
-/// In the case of a Value, a copy of the `sentinel` value is also pushed to
-/// `staticVec`. This is useful to extract mixed static and dynamic entries that
-/// come from an AttrSizedOperandSegments trait.
-static void dispatchIndexOpFoldResult(OpFoldResult ofr,
-                                      SmallVectorImpl<Value> &dynamicVec,
-                                      SmallVectorImpl<int64_t> &staticVec,
-                                      int64_t sentinel) {
-  if (auto v = ofr.dyn_cast<Value>()) {
-    dynamicVec.push_back(v);
-    staticVec.push_back(sentinel);
-    return;
-  }
-  APInt apInt = ofr.dyn_cast<Attribute>().cast<IntegerAttr>().getValue();
-  staticVec.push_back(apInt.getSExtValue());
-}
-
-static void dispatchIndexOpFoldResults(ArrayRef<OpFoldResult> ofrs,
-                                       SmallVectorImpl<Value> &dynamicVec,
-                                       SmallVectorImpl<int64_t> &staticVec,
-                                       int64_t sentinel) {
-  for (auto ofr : ofrs)
-    dispatchIndexOpFoldResult(ofr, dynamicVec, staticVec, sentinel);
-}
-
 /// If ofr is a constant integer, i.e., an IntegerAttr or a ConstantOp with an
 /// IntegerAttr, return the integer.
 llvm::Optional<int64_t> mlir::getConstantIntValue(OpFoldResult ofr) {
@@ -227,7 +200,6 @@
 }
 
 void StandardOpsDialect::initialize() {
-  getContext()->loadDialect<tensor::TensorDialect>();
   addOperations<
 #define GET_OP_LIST
 #include "mlir/Dialect/StandardOps/IR/Ops.cpp.inc"
@@ -286,14 +258,6 @@
                                         [](APInt a, APInt b) { return a + b; });
 }
 
-/// Extract int64_t values from the assumed ArrayAttr of IntegerAttr.
-static SmallVector<int64_t, 4> extractFromI64ArrayAttr(Attribute attr) {
-  return llvm::to_vector<4>(
-      llvm::map_range(attr.cast<ArrayAttr>(), [](Attribute a) -> int64_t {
-        return a.cast<IntegerAttr>().getInt();
-      }));
-}
-
 /// Canonicalize a sum of a constant and (constant - something) to simply be
 /// a sum of constants minus something. This transformation does similar
 /// transformations for additions of a constant with a subtract/add of
@@ -2082,499 +2046,6 @@
   return areVectorCastSimpleCompatible(a, b, areCastCompatible);
 }
 
-//===----------------------------------------------------------------------===//
-// SubTensorOp
-//===----------------------------------------------------------------------===//
-
-/// A subtensor result type can be fully inferred from the source type and the
-/// static representation of offsets, sizes and strides. Special sentinels
-/// encode the dynamic case.
-Type SubTensorOp::inferResultType(RankedTensorType sourceRankedTensorType,
-                                  ArrayRef<int64_t> leadingStaticOffsets,
-                                  ArrayRef<int64_t> leadingStaticSizes,
-                                  ArrayRef<int64_t> leadingStaticStrides) {
-  // A subtensor may specify only a leading subset of offset/sizes/strides in
-  // which case we complete with offset=0, sizes from memref type and strides=1.
-  unsigned rank = sourceRankedTensorType.getRank();
-  assert(leadingStaticSizes.size() <= rank &&
-         "unexpected leadingStaticSizes overflow");
-  auto staticSizes = llvm::to_vector<4>(leadingStaticSizes);
-  unsigned numTrailingSizes = rank - staticSizes.size();
-  llvm::append_range(staticSizes, sourceRankedTensorType.getShape().take_back(
-                                      numTrailingSizes));
-  return RankedTensorType::get(staticSizes,
-                               sourceRankedTensorType.getElementType());
-}
-
-Type SubTensorOp::inferResultType(RankedTensorType sourceRankedTensorType,
-                                  ArrayRef<OpFoldResult> leadingStaticOffsets,
-                                  ArrayRef<OpFoldResult> leadingStaticSizes,
-                                  ArrayRef<OpFoldResult> leadingStaticStrides) {
-  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
-  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
-  dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets,
-                             staticOffsets, ShapedType::kDynamicStrideOrOffset);
-  dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes,
-                             ShapedType::kDynamicSize);
-  dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides,
-                             staticStrides, ShapedType::kDynamicStrideOrOffset);
-  return SubTensorOp::inferResultType(sourceRankedTensorType, staticOffsets,
-                                      staticSizes, staticStrides);
-}
-
-/// A subtensor result type can be fully inferred from the source type and the
-/// static representation of offsets, sizes and strides. Special sentinels
-/// encode the dynamic case.
-Type SubTensorOp::inferRankReducedResultType(
-    unsigned resultRank, RankedTensorType sourceRankedTensorType,
-    ArrayRef<int64_t> leadingStaticOffsets,
-    ArrayRef<int64_t> leadingStaticSizes,
-    ArrayRef<int64_t> leadingStaticStrides) {
-  auto inferredType =
-      inferResultType(sourceRankedTensorType, leadingStaticOffsets,
-                      leadingStaticSizes, leadingStaticStrides)
-          .cast<RankedTensorType>();
-  int rankDiff = inferredType.getRank() - resultRank;
-  if (rankDiff > 0) {
-    auto shape = inferredType.getShape();
-    llvm::SmallDenseSet<unsigned> dimsToProject;
-    mlir::getPositionsOfShapeOne(rankDiff, shape, dimsToProject);
-    SmallVector<int64_t> projectedShape;
-    for (unsigned pos = 0, e = shape.size(); pos < e; ++pos)
-      if (!dimsToProject.contains(pos))
-        projectedShape.push_back(shape[pos]);
-    inferredType =
-        RankedTensorType::get(projectedShape, inferredType.getElementType());
-  }
-  return inferredType;
-}
-
-Type SubTensorOp::inferRankReducedResultType(
-    unsigned resultRank, RankedTensorType sourceRankedTensorType,
-    ArrayRef<OpFoldResult> leadingStaticOffsets,
-    ArrayRef<OpFoldResult> leadingStaticSizes,
-    ArrayRef<OpFoldResult> leadingStaticStrides) {
-  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
-  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
-  dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets,
-                             staticOffsets, ShapedType::kDynamicStrideOrOffset);
-  dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes,
-                             ShapedType::kDynamicSize);
-  dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides,
-                             staticStrides, ShapedType::kDynamicStrideOrOffset);
-  return SubTensorOp::inferRankReducedResultType(
-      resultRank, sourceRankedTensorType, staticOffsets, staticSizes,
-      staticStrides);
-}
-
-// Build a SubTensorOp with mixed static and dynamic entries and custom result
-// type. If the type passed is nullptr, it is inferred.
-void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result,
-                              RankedTensorType resultType, Value source,
-                              ArrayRef<OpFoldResult> offsets,
-                              ArrayRef<OpFoldResult> sizes,
-                              ArrayRef<OpFoldResult> strides,
-                              ArrayRef<NamedAttribute> attrs) {
-  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
-  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
-  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets,
-                             ShapedType::kDynamicStrideOrOffset);
-  dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes,
-                             ShapedType::kDynamicSize);
-  dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides,
-                             ShapedType::kDynamicStrideOrOffset);
-  auto sourceRankedTensorType = source.getType().cast<RankedTensorType>();
-  // Structuring implementation this way avoids duplication between builders.
-  if (!resultType) {
-    resultType =
-        SubTensorOp::inferResultType(sourceRankedTensorType, staticOffsets,
-                                     staticSizes, staticStrides)
-            .cast<RankedTensorType>();
-  }
-  build(b, result, resultType, source, dynamicOffsets, dynamicSizes,
-        dynamicStrides, b.getI64ArrayAttr(staticOffsets),
-        b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides));
-  result.addAttributes(attrs);
-}
-
-// Build a SubTensorOp with mixed static and dynamic entries and inferred result
-// type.
-void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result,
-                              Value source, ArrayRef<OpFoldResult> offsets,
-                              ArrayRef<OpFoldResult> sizes,
-                              ArrayRef<OpFoldResult> strides,
-                              ArrayRef<NamedAttribute> attrs) {
-  build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
-}
-
-// Build a SubTensorOp with dynamic entries and custom result type. If the type
-// passed is nullptr, it is inferred.
-void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result,
-                              RankedTensorType resultType, Value source,
-                              ValueRange offsets, ValueRange sizes,
-                              ValueRange strides,
-                              ArrayRef<NamedAttribute> attrs) {
-  SmallVector<OpFoldResult> offsetValues = llvm::to_vector<4>(
-      llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; }));
-  SmallVector<OpFoldResult> sizeValues = llvm::to_vector<4>(
-      llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));
-  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
-      llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));
-  build(b, result, resultType, source, offsetValues, sizeValues, strideValues);
-}
-
-// Build a SubTensorOp with dynamic entries and inferred result type.
-void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result,
-                              Value source, ValueRange offsets,
-                              ValueRange sizes, ValueRange strides,
-                              ArrayRef<NamedAttribute> attrs) {
-  build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
-}
-
-enum SubTensorVerificationResult {
-  Success,
-  RankTooLarge,
-  SizeMismatch,
-  ElemTypeMismatch,
-};
-
-/// Checks if `original` Type type can be rank reduced to `reduced` type.
-/// This function is slight variant of `is subsequence` algorithm where
-/// not matching dimension must be 1.
-static SubTensorVerificationResult
-isRankReducedType(Type originalType, Type candidateReducedType,
-                  std::string *errMsg = nullptr) {
-  if (originalType == candidateReducedType)
-    return SubTensorVerificationResult::Success;
-  if (!originalType.isa<RankedTensorType>())
-    return SubTensorVerificationResult::Success;
-  if (originalType.isa<RankedTensorType>() &&
-      !candidateReducedType.isa<RankedTensorType>())
-    return SubTensorVerificationResult::Success;
-
-  ShapedType originalShapedType = originalType.cast<ShapedType>();
-  ShapedType candidateReducedShapedType =
-      candidateReducedType.cast<ShapedType>();
-
-  // Rank and size logic is valid for all ShapedTypes.
-  ArrayRef<int64_t> originalShape = originalShapedType.getShape();
-  ArrayRef<int64_t> candidateReducedShape =
-      candidateReducedShapedType.getShape();
-  unsigned originalRank = originalShape.size(),
-           candidateReducedRank = candidateReducedShape.size();
-  if (candidateReducedRank > originalRank)
-    return SubTensorVerificationResult::RankTooLarge;
-
-  auto optionalUnusedDimsMask =
-      computeRankReductionMask(originalShape, candidateReducedShape);
-
-  // Sizes cannot be matched in case empty vector is returned.
-  if (!optionalUnusedDimsMask.hasValue())
-    return SubTensorVerificationResult::SizeMismatch;
-
-  if (originalShapedType.getElementType() !=
-      candidateReducedShapedType.getElementType())
-    return SubTensorVerificationResult::ElemTypeMismatch;
-
-  // We are done for the tensor case.
-  if (originalType.isa<RankedTensorType>())
-    return SubTensorVerificationResult::Success;
-
-  return SubTensorVerificationResult::Success;
-}
-
-template <typename OpTy>
-static LogicalResult
-produceSubTensorErrorMsg(SubTensorVerificationResult result, OpTy op,
-                         Type expectedType, StringRef errMsg = "") {
-  auto memrefType = expectedType.cast<ShapedType>();
-  switch (result) {
-  case SubTensorVerificationResult::Success:
-    return success();
-  case SubTensorVerificationResult::RankTooLarge:
-    return op.emitError("expected result rank to be smaller or equal to ")
-           << "the source rank. " << errMsg;
-  case SubTensorVerificationResult::SizeMismatch:
-    return op.emitError("expected result type to be ")
-           << expectedType
-           << " or a rank-reduced version. (mismatch of result sizes) "
-           << errMsg;
-  case SubTensorVerificationResult::ElemTypeMismatch:
-    return op.emitError("expected result element type to be ")
-           << memrefType.getElementType() << errMsg;
-  }
-  llvm_unreachable("unexpected subtensor verification result");
-}
-/// Verifier for SubTensorOp.
-static LogicalResult verify(SubTensorOp op) {
-  // Verify result type against inferred type.
-  auto expectedType = SubTensorOp::inferResultType(
-      op.getSourceType(), extractFromI64ArrayAttr(op.static_offsets()),
-      extractFromI64ArrayAttr(op.static_sizes()),
-      extractFromI64ArrayAttr(op.static_strides()));
-  auto result = isRankReducedType(expectedType, op.getType());
-  return produceSubTensorErrorMsg(result, op, expectedType);
-}
-
-/// Infer the canonical type of the result of a subtensor operation. Returns a
-/// type with rank `resultRank` that is either the rank of the rank-reduced
-/// type, or the non-rank-reduced type.
-static RankedTensorType getCanonicalSubTensorResultType(
-    unsigned resultRank, RankedTensorType sourceType,
-    ArrayRef<OpFoldResult> mixedOffsets, ArrayRef<OpFoldResult> mixedSizes,
-    ArrayRef<OpFoldResult> mixedStrides) {
-  auto resultType =
-      SubTensorOp::inferRankReducedResultType(
-          resultRank, sourceType, mixedOffsets, mixedSizes, mixedStrides)
-          .cast<RankedTensorType>();
-  if (resultType.getRank() != resultRank) {
-    resultType = SubTensorOp::inferResultType(sourceType, mixedOffsets,
-                                              mixedSizes, mixedStrides)
-                     .cast<RankedTensorType>();
-  }
-  return resultType;
-}
-
-namespace {
-/// Pattern to rewrite a subtensor op with tensor::Cast arguments.
-/// This essentially pushes memref_cast past its consuming subtensor when
-/// `canFoldIntoConsumerOp` is true.
-///
-/// Example:
-/// ```
-///   %0 = tensorcast %V : tensor<16x16xf32> to tensor<?x?xf32>
-///   %1 = subtensor %0[0, 0][3, 4][1, 1] : tensor<?x?xf32> to tensor<3x4xf32>
-/// ```
-/// is rewritten into:
-/// ```
-///   %0 = subtensor %V[0, 0][3, 4][1, 1] : tensor<16x16xf32> to tensor<3x4xf32>
-///   %1 = tensor.cast %0: tensor<3x4xf32> to tensor<3x4xf32>
-/// ```
-class SubTensorOpCastFolder final : public OpRewritePattern<SubTensorOp> {
-public:
-  using OpRewritePattern<SubTensorOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(SubTensorOp subTensorOp,
-                                PatternRewriter &rewriter) const override {
-    // Any constant operand, just return to let SubViewOpConstantFolder kick in.
-    if (llvm::any_of(subTensorOp.getOperands(), [](Value operand) {
-          return matchPattern(operand, matchConstantIndex());
-        }))
-      return failure();
-
-    auto castOp = subTensorOp.source().getDefiningOp<tensor::CastOp>();
-    if (!castOp)
-      return failure();
-
-    if (!canFoldIntoConsumerOp(castOp))
-      return failure();
-
-    /// Deduce the type of the result to use for the canonicalized operation.
-    RankedTensorType resultType = getCanonicalSubTensorResultType(
-        subTensorOp.getType().getRank(), subTensorOp.getSourceType(),
-        subTensorOp.getMixedOffsets(), subTensorOp.getMixedSizes(),
-        subTensorOp.getMixedStrides());
-    Value newSubTensor = rewriter.create<SubTensorOp>(
-        subTensorOp.getLoc(), resultType, castOp.source(),
-        subTensorOp.offsets(), subTensorOp.sizes(), subTensorOp.strides(),
-        subTensorOp.static_offsets(), subTensorOp.static_sizes(),
-        subTensorOp.static_strides());
-    rewriter.replaceOpWithNewOp<tensor::CastOp>(
-        subTensorOp, subTensorOp.getType(), newSubTensor);
-    return success();
-  }
-};
-} // namespace
-
-/// Return the canonical type of the result of a subtensor.
-struct SubTensorReturnTypeCanonicalizer {
-  RankedTensorType operator()(SubTensorOp op,
-                              ArrayRef<OpFoldResult> mixedOffsets,
-                              ArrayRef<OpFoldResult> mixedSizes,
-                              ArrayRef<OpFoldResult> mixedStrides) {
-    return getCanonicalSubTensorResultType(op.getType().getRank(),
-                                           op.getSourceType(), mixedOffsets,
-                                           mixedSizes, mixedStrides);
-  }
-};
-
-/// A canonicalizer wrapper to replace SubTensorOps.
-struct SubTensorCanonicalizer {
-  void operator()(PatternRewriter &rewriter, SubTensorOp op,
-                  SubTensorOp newOp) {
-    Value replacement = newOp.getResult();
-    if (replacement.getType() != op.getType())
-      replacement = rewriter.create<tensor::CastOp>(op.getLoc(), op.getType(),
-                                                    replacement);
-    rewriter.replaceOp(op, replacement);
-  }
-};
-
-void SubTensorOp::getCanonicalizationPatterns(RewritePatternSet &results,
-                                              MLIRContext *context) {
-  results.add<OpWithOffsetSizesAndStridesConstantArgumentFolder<
-                  SubTensorOp, SubTensorReturnTypeCanonicalizer,
-                  SubTensorCanonicalizer>,
-              SubTensorOpCastFolder>(context);
-}
-
-//
-static LogicalResult
-foldIdentityOffsetSizeAndStrideOpInterface(OffsetSizeAndStrideOpInterface op,
-                                           ShapedType shapedType) {
-  OpBuilder b(op.getContext());
-  for (OpFoldResult ofr : op.getMixedOffsets())
-    if (!isEqualConstantIntOrValue(ofr, b.getIndexAttr(0)))
-      return failure();
-  // Rank-reducing noops only need to inspect the leading dimensions: llvm::zip
-  // is appropriate.
-  auto shape = shapedType.getShape();
-  for (auto it : llvm::zip(op.getMixedSizes(), shape))
-    if (!isEqualConstantIntOrValue(std::get<0>(it),
-                                   b.getIndexAttr(std::get<1>(it))))
-      return failure();
-  for (OpFoldResult ofr : op.getMixedStrides())
-    if (!isEqualConstantIntOrValue(ofr, b.getIndexAttr(1)))
-      return failure();
-  return success();
-}
-
-OpFoldResult SubTensorOp::fold(ArrayRef<Attribute>) {
-  if (getSourceType() == getType() &&
-      succeeded(foldIdentityOffsetSizeAndStrideOpInterface(*this, getType())))
-    return this->source();
-  return OpFoldResult();
-}
-
-//===----------------------------------------------------------------------===//
-// SubTensorInsertOp
-//===----------------------------------------------------------------------===//
-
-// Build a SubTensorInsertOp with mixed static and dynamic entries.
-void mlir::SubTensorInsertOp::build(OpBuilder &b, OperationState &result,
-                                    Value source, Value dest,
-                                    ArrayRef<OpFoldResult> offsets,
-                                    ArrayRef<OpFoldResult> sizes,
-                                    ArrayRef<OpFoldResult> strides,
-                                    ArrayRef<NamedAttribute> attrs) {
-  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
-  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
-  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets,
-                             ShapedType::kDynamicStrideOrOffset);
-  dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes,
-                             ShapedType::kDynamicSize);
-  dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides,
-                             ShapedType::kDynamicStrideOrOffset);
-  build(b, result, dest.getType(), source, dest, dynamicOffsets, dynamicSizes,
-        dynamicStrides, b.getI64ArrayAttr(staticOffsets),
-        b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides));
-  result.addAttributes(attrs);
-}
-
-// Build a SubTensorInsertOp with dynamic entries.
-void mlir::SubTensorInsertOp::build(OpBuilder &b, OperationState &result,
-                                    Value source, Value dest,
-                                    ValueRange offsets, ValueRange sizes,
-                                    ValueRange strides,
-                                    ArrayRef<NamedAttribute> attrs) {
-  SmallVector<OpFoldResult> offsetValues = llvm::to_vector<4>(
-      llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; }));
-  SmallVector<OpFoldResult> sizeValues = llvm::to_vector<4>(
-      llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));
-  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
-      llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));
-  build(b, result, source, dest, offsetValues, sizeValues, strideValues);
-}
-
-OpFoldResult SubTensorInsertOp::fold(ArrayRef<Attribute>) {
-  if (getSourceType().hasStaticShape() && getType().hasStaticShape() &&
-      getSourceType() == getType() &&
-      succeeded(foldIdentityOffsetSizeAndStrideOpInterface(*this, getType())))
-    return this->source();
-  return OpFoldResult();
-}
-
-namespace {
-/// Pattern to rewrite a subtensor_insert op with constant arguments.
-class SubTensorInsertOpConstantArgumentFolder final
-    : public OpRewritePattern<SubTensorInsertOp> {
-public:
-  using OpRewritePattern<SubTensorInsertOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(SubTensorInsertOp subTensorInsertOp,
-                                PatternRewriter &rewriter) const override {
-    // No constant operand, just return.
-    if (llvm::none_of(subTensorInsertOp.getOperands(), [](Value operand) {
-          return matchPattern(operand, matchConstantIndex());
-        }))
-      return failure();
-
-    // At least one of offsets/sizes/strides is a new constant.
-    // Form the new list of operands and constant attributes from the
-    // existing.
-    SmallVector<OpFoldResult> mixedOffsets(subTensorInsertOp.getMixedOffsets());
-    SmallVector<OpFoldResult> mixedSizes(subTensorInsertOp.getMixedSizes());
-    SmallVector<OpFoldResult> mixedStrides(subTensorInsertOp.getMixedStrides());
-    canonicalizeSubViewPart(mixedOffsets, ShapedType::isDynamicStrideOrOffset);
-    canonicalizeSubViewPart(mixedSizes, ShapedType::isDynamic);
-    canonicalizeSubViewPart(mixedStrides, ShapedType::isDynamicStrideOrOffset);
-
-    // Create the new op in canonical form.
-    rewriter.replaceOpWithNewOp<SubTensorInsertOp>(
-        subTensorInsertOp, subTensorInsertOp.source(), subTensorInsertOp.dest(),
-        mixedOffsets, mixedSizes, mixedStrides);
-    return success();
-  }
-};
-
-/// Fold tensor_casts with subtensor_insert operations.
-struct SubTensorInsertOpCastFolder final
-    : public OpRewritePattern<SubTensorInsertOp> {
-  using OpRewritePattern<SubTensorInsertOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(SubTensorInsertOp subTensorInsertOp,
-                                PatternRewriter &rewriter) const override {
-    if (llvm::any_of(subTensorInsertOp.getOperands(), [](Value operand) {
-          return matchPattern(operand, matchConstantIndex());
-        }))
-      return failure();
-
-    auto getSourceOfCastOp = [](Value v) -> Optional<Value> {
-      auto castOp = v.getDefiningOp<tensor::CastOp>();
-      if (!castOp || !canFoldIntoConsumerOp(castOp))
-        return llvm::None;
-      return castOp.source();
-    };
-    Optional<Value> sourceCastSource =
-        getSourceOfCastOp(subTensorInsertOp.source());
-    Optional<Value> destCastSource =
-        getSourceOfCastOp(subTensorInsertOp.dest());
-    if (!sourceCastSource && !destCastSource)
-      return failure();
-
-    Value replacement = rewriter.create<SubTensorInsertOp>(
-        subTensorInsertOp.getLoc(),
-        (sourceCastSource ? *sourceCastSource : subTensorInsertOp.source()),
-        (destCastSource ? *destCastSource : subTensorInsertOp.dest()),
-        subTensorInsertOp.getMixedOffsets(), subTensorInsertOp.getMixedSizes(),
-        subTensorInsertOp.getMixedStrides());
-
-    if (replacement.getType() != subTensorInsertOp.getType()) {
-      replacement = rewriter.create<tensor::CastOp>(
-          subTensorInsertOp.getLoc(), subTensorInsertOp.getType(), replacement);
-    }
-    rewriter.replaceOp(subTensorInsertOp, replacement);
-    return success();
-  }
-};
-} // namespace
-
-void SubTensorInsertOp::getCanonicalizationPatterns(RewritePatternSet &results,
-                                                    MLIRContext *context) {
-  results.add<SubTensorInsertOpConstantArgumentFolder,
-              SubTensorInsertOpCastFolder>(context);
-}
-
 //===----------------------------------------------------------------------===//
 // SwitchOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/Dialect/StandardOps/Utils/Utils.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
@@ -25,7 +26,7 @@
 /// source tensor. This is useful to fold a tensor.cast into a consuming op and
 /// implement canonicalization patterns for ops in different dialects that may
 /// consume the results of tensor.cast operations. Such foldable tensor.cast
-/// operations are typically inserted as `subtensor` ops and are canonicalized,
+/// operations are typically inserted as `slice` ops and are canonicalized,
 /// to preserve the type compatibility of their uses.
 ///
 /// Returns true when all conditions are met:
@@ -511,6 +512,530 @@
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// ExtractSliceOp
+//===----------------------------------------------------------------------===//
+
+/// Helper function to dispatch an OpFoldResult into either the `dynamicVec` if
+/// it is a Value or into `staticVec` if it is an IntegerAttr.
+/// In the case of a Value, a copy of the `sentinel` value is also pushed to
+/// `staticVec`. This is useful to extract mixed static and dynamic entries that
+/// come from an AttrSizedOperandSegments trait.
+static void dispatchIndexOpFoldResult(OpFoldResult ofr,
+                                      SmallVectorImpl<Value> &dynamicVec,
+                                      SmallVectorImpl<int64_t> &staticVec,
+                                      int64_t sentinel) {
+  if (auto v = ofr.dyn_cast<Value>()) {
+    dynamicVec.push_back(v);
+    staticVec.push_back(sentinel);
+    return;
+  }
+  APInt apInt = ofr.dyn_cast<Attribute>().cast<IntegerAttr>().getValue();
+  staticVec.push_back(apInt.getSExtValue());
+}
+
+static void dispatchIndexOpFoldResults(ArrayRef<OpFoldResult> ofrs,
+                                       SmallVectorImpl<Value> &dynamicVec,
+                                       SmallVectorImpl<int64_t> &staticVec,
+                                       int64_t sentinel) {
+  for (auto ofr : ofrs)
+    dispatchIndexOpFoldResult(ofr, dynamicVec, staticVec, sentinel);
+}
+
+/// An extract_slice op result type can be fully inferred from the source type
+/// and the static representation of offsets, sizes and strides. Special
+/// sentinels encode the dynamic case.
+Type ExtractSliceOp::inferResultType(RankedTensorType sourceRankedTensorType,
+                                     ArrayRef<int64_t> leadingStaticOffsets,
+                                     ArrayRef<int64_t> leadingStaticSizes,
+                                     ArrayRef<int64_t> leadingStaticStrides) {
+  // An extract_slice op may specify only a leading subset of offset/sizes/
+  // strides in which case we complete with offset=0, sizes from memref type and
+  // strides=1.
+  unsigned rank = sourceRankedTensorType.getRank();
+  assert(leadingStaticSizes.size() <= rank &&
+         "unexpected leadingStaticSizes overflow");
+  auto staticSizes = llvm::to_vector<4>(leadingStaticSizes);
+  unsigned numTrailingSizes = rank - staticSizes.size();
+  llvm::append_range(staticSizes, sourceRankedTensorType.getShape().take_back(
+                                      numTrailingSizes));
+  return RankedTensorType::get(staticSizes,
+                               sourceRankedTensorType.getElementType());
+}
+
+/// Extract int64_t values from the assumed ArrayAttr of IntegerAttr.
+static SmallVector<int64_t, 4> extractFromI64ArrayAttr(Attribute attr) {
+  return llvm::to_vector<4>(
+      llvm::map_range(attr.cast<ArrayAttr>(), [](Attribute a) -> int64_t {
+        return a.cast<IntegerAttr>().getInt();
+      }));
+}
+
+Type ExtractSliceOp::inferResultType(
+    RankedTensorType sourceRankedTensorType,
+    ArrayRef<OpFoldResult> leadingStaticOffsets,
+    ArrayRef<OpFoldResult> leadingStaticSizes,
+    ArrayRef<OpFoldResult> leadingStaticStrides) {
+  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
+  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
+  dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets,
+                             staticOffsets, ShapedType::kDynamicStrideOrOffset);
+  dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes,
+                             ShapedType::kDynamicSize);
+  dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides,
+                             staticStrides, ShapedType::kDynamicStrideOrOffset);
+  return ExtractSliceOp::inferResultType(sourceRankedTensorType, staticOffsets,
+                                         staticSizes, staticStrides);
+}
+
+/// An extract_slice op result type can be fully inferred from the source type
+/// and the static representation of offsets, sizes and strides. Special
+/// sentinels encode the dynamic case.
+Type ExtractSliceOp::inferRankReducedResultType(
+    unsigned resultRank, RankedTensorType sourceRankedTensorType,
+    ArrayRef<int64_t> leadingStaticOffsets,
+    ArrayRef<int64_t> leadingStaticSizes,
+    ArrayRef<int64_t> leadingStaticStrides) {
+  auto inferredType =
+      inferResultType(sourceRankedTensorType, leadingStaticOffsets,
+                      leadingStaticSizes, leadingStaticStrides)
+          .cast<RankedTensorType>();
+  int rankDiff = inferredType.getRank() - resultRank;
+  if (rankDiff > 0) {
+    auto shape = inferredType.getShape();
+    llvm::SmallDenseSet<unsigned> dimsToProject;
+    mlir::getPositionsOfShapeOne(rankDiff, shape, dimsToProject);
+    SmallVector<int64_t> projectedShape;
+    for (unsigned pos = 0, e = shape.size(); pos < e; ++pos)
+      if (!dimsToProject.contains(pos))
+        projectedShape.push_back(shape[pos]);
+    inferredType =
+        RankedTensorType::get(projectedShape, inferredType.getElementType());
+  }
+  return inferredType;
+}
+
+Type ExtractSliceOp::inferRankReducedResultType(
+    unsigned resultRank, RankedTensorType sourceRankedTensorType,
+    ArrayRef<OpFoldResult> leadingStaticOffsets,
+    ArrayRef<OpFoldResult> leadingStaticSizes,
+    ArrayRef<OpFoldResult> leadingStaticStrides) {
+  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
+  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
+  dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets,
+                             staticOffsets, ShapedType::kDynamicStrideOrOffset);
+  dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes,
+                             ShapedType::kDynamicSize);
+  dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides,
+                             staticStrides, ShapedType::kDynamicStrideOrOffset);
+  return ExtractSliceOp::inferRankReducedResultType(
+      resultRank, sourceRankedTensorType, staticOffsets, staticSizes,
+      staticStrides);
+}
+
+/// Build an ExtractSliceOp with mixed static and dynamic entries and custom
+/// result type. If the type passed is nullptr, it is inferred.
+void ExtractSliceOp::build(OpBuilder &b, OperationState &result,
+                           RankedTensorType resultType, Value source,
+                           ArrayRef<OpFoldResult> offsets,
+                           ArrayRef<OpFoldResult> sizes,
+                           ArrayRef<OpFoldResult> strides,
+                           ArrayRef<NamedAttribute> attrs) {
+  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
+  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
+  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets,
+                             ShapedType::kDynamicStrideOrOffset);
+  dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes,
+                             ShapedType::kDynamicSize);
+  dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides,
+                             ShapedType::kDynamicStrideOrOffset);
+  auto sourceRankedTensorType = source.getType().cast<RankedTensorType>();
+  // Structuring implementation this way avoids duplication between builders.
+  if (!resultType) {
+    resultType =
+        ExtractSliceOp::inferResultType(sourceRankedTensorType, staticOffsets,
+                                        staticSizes, staticStrides)
+            .cast<RankedTensorType>();
+  }
+  build(b, result, resultType, source, dynamicOffsets, dynamicSizes,
+        dynamicStrides, b.getI64ArrayAttr(staticOffsets),
+        b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides));
+  result.addAttributes(attrs);
+}
+
+/// Build an ExtractSliceOp with mixed static and dynamic entries and inferred
+/// result type.
+void ExtractSliceOp::build(OpBuilder &b, OperationState &result, Value source,
+                           ArrayRef<OpFoldResult> offsets,
+                           ArrayRef<OpFoldResult> sizes,
+                           ArrayRef<OpFoldResult> strides,
+                           ArrayRef<NamedAttribute> attrs) {
+  build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
+}
+
+/// Build an ExtractSliceOp with dynamic entries and custom result type. If the
+/// type passed is nullptr, it is inferred.
+void ExtractSliceOp::build(OpBuilder &b, OperationState &result,
+                           RankedTensorType resultType, Value source,
+                           ValueRange offsets, ValueRange sizes,
+                           ValueRange strides, ArrayRef<NamedAttribute> attrs) {
+  SmallVector<OpFoldResult> offsetValues = llvm::to_vector<4>(
+      llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; }));
+  SmallVector<OpFoldResult> sizeValues = llvm::to_vector<4>(
+      llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));
+  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
+      llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));
+  build(b, result, resultType, source, offsetValues, sizeValues, strideValues);
+}
+
+/// Build an ExtractSliceOp with dynamic entries and inferred result type.
+void ExtractSliceOp::build(OpBuilder &b, OperationState &result, Value source,
+                           ValueRange offsets, ValueRange sizes,
+                           ValueRange strides, ArrayRef<NamedAttribute> attrs) {
+  build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
+}
+
+enum SliceVerificationResult {
+  Success,
+  RankTooLarge,
+  SizeMismatch,
+  ElemTypeMismatch,
+};
+
+/// Checks if `original` Type type can be rank reduced to `reduced` type.
+/// This function is slight variant of `is subsequence` algorithm where
+/// not matching dimension must be 1.
+static SliceVerificationResult
+isRankReducedType(Type originalType, Type candidateReducedType,
+                  std::string *errMsg = nullptr) {
+  if (originalType == candidateReducedType)
+    return SliceVerificationResult::Success;
+  if (!originalType.isa<RankedTensorType>())
+    return SliceVerificationResult::Success;
+  if (originalType.isa<RankedTensorType>() &&
+      !candidateReducedType.isa<RankedTensorType>())
+    return SliceVerificationResult::Success;
+
+  ShapedType originalShapedType = originalType.cast<ShapedType>();
+  ShapedType candidateReducedShapedType =
+      candidateReducedType.cast<ShapedType>();
+
+  // Rank and size logic is valid for all ShapedTypes.
+  ArrayRef<int64_t> originalShape = originalShapedType.getShape();
+  ArrayRef<int64_t> candidateReducedShape =
+      candidateReducedShapedType.getShape();
+  unsigned originalRank = originalShape.size(),
+           candidateReducedRank = candidateReducedShape.size();
+  if (candidateReducedRank > originalRank)
+    return SliceVerificationResult::RankTooLarge;
+
+  auto optionalUnusedDimsMask =
+      computeRankReductionMask(originalShape, candidateReducedShape);
+
+  // Sizes cannot be matched in case empty vector is returned.
+  if (!optionalUnusedDimsMask.hasValue())
+    return SliceVerificationResult::SizeMismatch;
+
+  if (originalShapedType.getElementType() !=
+      candidateReducedShapedType.getElementType())
+    return SliceVerificationResult::ElemTypeMismatch;
+
+  // We are done for the tensor case.
+  if (originalType.isa<RankedTensorType>())
+    return SliceVerificationResult::Success;
+
+  return SliceVerificationResult::Success;
+}
+
+template <typename OpTy>
+static LogicalResult produceSliceErrorMsg(SliceVerificationResult result,
+                                          OpTy op, Type expectedType,
+                                          StringRef errMsg = "") {
+  auto memrefType = expectedType.cast<ShapedType>();
+  switch (result) {
+  case SliceVerificationResult::Success:
+    return success();
+  case SliceVerificationResult::RankTooLarge:
+    return op.emitError("expected result rank to be smaller or equal to ")
+           << "the source rank. " << errMsg;
+  case SliceVerificationResult::SizeMismatch:
+    return op.emitError("expected result type to be ")
+           << expectedType
+           << " or a rank-reduced version. (mismatch of result sizes) "
+           << errMsg;
+  case SliceVerificationResult::ElemTypeMismatch:
+    return op.emitError("expected result element type to be ")
+           << memrefType.getElementType() << errMsg;
+  }
+  llvm_unreachable("unexpected extract_slice op verification result");
+}
+
+/// Verifier for ExtractSliceOp.
+static LogicalResult verify(ExtractSliceOp op) {
+  // Verify result type against inferred type.
+  auto expectedType = ExtractSliceOp::inferResultType(
+      op.getSourceType(), extractFromI64ArrayAttr(op.static_offsets()),
+      extractFromI64ArrayAttr(op.static_sizes()),
+      extractFromI64ArrayAttr(op.static_strides()));
+  auto result = isRankReducedType(expectedType, op.getType());
+  return produceSliceErrorMsg(result, op, expectedType);
+}
+
+/// Infer the canonical type of the result of an extract_slice op. Returns a
+/// type with rank `resultRank` that is either the rank of the rank-reduced
+/// type, or the non-rank-reduced type.
+static RankedTensorType
+getCanonicalSliceResultType(unsigned resultRank, RankedTensorType sourceType,
+                            ArrayRef<OpFoldResult> mixedOffsets,
+                            ArrayRef<OpFoldResult> mixedSizes,
+                            ArrayRef<OpFoldResult> mixedStrides) {
+  auto resultType =
+      ExtractSliceOp::inferRankReducedResultType(
+          resultRank, sourceType, mixedOffsets, mixedSizes, mixedStrides)
+          .cast<RankedTensorType>();
+  if (resultType.getRank() != resultRank) {
+    resultType = ExtractSliceOp::inferResultType(sourceType, mixedOffsets,
+                                                 mixedSizes, mixedStrides)
+                     .cast<RankedTensorType>();
+  }
+  return resultType;
+}
+
+namespace {
+/// Pattern to rewrite an extract_slice op with tensor::Cast arguments.
+/// This essentially pushes memref_cast past its consuming slice when
+/// `canFoldIntoConsumerOp` is true.
+///
+/// Example:
+/// ```
+///   %0 = tensor.cast %V : tensor<16x16xf32> to tensor<?x?xf32>
+///   %1 = tensor.extract_slice %0[0, 0][3, 4][1, 1] : tensor<?x?xf32> to
+///   tensor<3x4xf32>
+/// ```
+/// is rewritten into:
+/// ```
+///   %0 = tensor.extract_slice %V[0, 0][3, 4][1, 1] : tensor<16x16xf32> to
+///   tensor<3x4xf32> %1 = tensor.cast %0: tensor<3x4xf32> to tensor<3x4xf32>
+/// ```
+class ExtractSliceOpCastFolder final : public OpRewritePattern<ExtractSliceOp> {
+public:
+  using OpRewritePattern<ExtractSliceOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(ExtractSliceOp sliceOp,
+                                PatternRewriter &rewriter) const override {
+    // Any constant operand, just return to let SubViewOpConstantFolder kick in.
+    if (llvm::any_of(sliceOp.getOperands(), [](Value operand) {
+          return matchPattern(operand, matchConstantIndex());
+        }))
+      return failure();
+
+    auto castOp = sliceOp.source().getDefiningOp<tensor::CastOp>();
+    if (!castOp)
+      return failure();
+
+    if (!canFoldIntoConsumerOp(castOp))
+      return failure();
+
+    /// Deduce the type of the result to use for the canonicalized operation.
+    RankedTensorType resultType = getCanonicalSliceResultType(
+        sliceOp.getType().getRank(), sliceOp.getSourceType(),
+        sliceOp.getMixedOffsets(), sliceOp.getMixedSizes(),
+        sliceOp.getMixedStrides());
+    Value newSlice = rewriter.create<ExtractSliceOp>(
+        sliceOp.getLoc(), resultType, castOp.source(), sliceOp.offsets(),
+        sliceOp.sizes(), sliceOp.strides(), sliceOp.static_offsets(),
+        sliceOp.static_sizes(), sliceOp.static_strides());
+    rewriter.replaceOpWithNewOp<tensor::CastOp>(sliceOp, sliceOp.getType(),
+                                                newSlice);
+    return success();
+  }
+};
+} // namespace
+
+/// Return the canonical type of the result of an extract_slice op.
+struct SliceReturnTypeCanonicalizer {
+  RankedTensorType operator()(ExtractSliceOp op,
+                              ArrayRef<OpFoldResult> mixedOffsets,
+                              ArrayRef<OpFoldResult> mixedSizes,
+                              ArrayRef<OpFoldResult> mixedStrides) {
+    return getCanonicalSliceResultType(op.getType().getRank(),
+                                       op.getSourceType(), mixedOffsets,
+                                       mixedSizes, mixedStrides);
+  }
+};
+
+/// A canonicalizer wrapper to replace ExtractSliceOps.
+struct SliceCanonicalizer {
+  void operator()(PatternRewriter &rewriter, ExtractSliceOp op,
+                  ExtractSliceOp newOp) {
+    Value replacement = newOp.getResult();
+    if (replacement.getType() != op.getType())
+      replacement = rewriter.create<tensor::CastOp>(op.getLoc(), op.getType(),
+                                                    replacement);
+    rewriter.replaceOp(op, replacement);
+  }
+};
+
+void ExtractSliceOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                                 MLIRContext *context) {
+  results.add<
+      OpWithOffsetSizesAndStridesConstantArgumentFolder<
+          ExtractSliceOp, SliceReturnTypeCanonicalizer, SliceCanonicalizer>,
+      ExtractSliceOpCastFolder>(context);
+}
+
+//
+static LogicalResult
+foldIdentityOffsetSizeAndStrideOpInterface(OffsetSizeAndStrideOpInterface op,
+                                           ShapedType shapedType) {
+  OpBuilder b(op.getContext());
+  for (OpFoldResult ofr : op.getMixedOffsets())
+    if (!isEqualConstantIntOrValue(ofr, b.getIndexAttr(0)))
+      return failure();
+  // Rank-reducing noops only need to inspect the leading dimensions: llvm::zip
+  // is appropriate.
+  auto shape = shapedType.getShape();
+  for (auto it : llvm::zip(op.getMixedSizes(), shape))
+    if (!isEqualConstantIntOrValue(std::get<0>(it),
+                                   b.getIndexAttr(std::get<1>(it))))
+      return failure();
+  for (OpFoldResult ofr : op.getMixedStrides())
+    if (!isEqualConstantIntOrValue(ofr, b.getIndexAttr(1)))
+      return failure();
+  return success();
+}
+
+OpFoldResult ExtractSliceOp::fold(ArrayRef<Attribute>) {
+  if (getSourceType() == getType() &&
+      succeeded(foldIdentityOffsetSizeAndStrideOpInterface(*this, getType())))
+    return this->source();
+  return OpFoldResult();
+}
+
+//===----------------------------------------------------------------------===//
+// InsertSliceOp
+//===----------------------------------------------------------------------===//
+
+// Build a InsertSliceOp with mixed static and dynamic entries.
+void InsertSliceOp::build(OpBuilder &b, OperationState &result, Value source,
+                          Value dest, ArrayRef<OpFoldResult> offsets,
+                          ArrayRef<OpFoldResult> sizes,
+                          ArrayRef<OpFoldResult> strides,
+                          ArrayRef<NamedAttribute> attrs) {
+  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
+  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
+  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets,
+                             ShapedType::kDynamicStrideOrOffset);
+  dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes,
+                             ShapedType::kDynamicSize);
+  dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides,
+                             ShapedType::kDynamicStrideOrOffset);
+  build(b, result, dest.getType(), source, dest, dynamicOffsets, dynamicSizes,
+        dynamicStrides, b.getI64ArrayAttr(staticOffsets),
+        b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides));
+  result.addAttributes(attrs);
+}
+
+// Build a InsertSliceOp with dynamic entries.
+void InsertSliceOp::build(OpBuilder &b, OperationState &result, Value source,
+                          Value dest, ValueRange offsets, ValueRange sizes,
+                          ValueRange strides, ArrayRef<NamedAttribute> attrs) {
+  SmallVector<OpFoldResult> offsetValues = llvm::to_vector<4>(
+      llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; }));
+  SmallVector<OpFoldResult> sizeValues = llvm::to_vector<4>(
+      llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));
+  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
+      llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));
+  build(b, result, source, dest, offsetValues, sizeValues, strideValues);
+}
+
+OpFoldResult InsertSliceOp::fold(ArrayRef<Attribute>) {
+  if (getSourceType().hasStaticShape() && getType().hasStaticShape() &&
+      getSourceType() == getType() &&
+      succeeded(foldIdentityOffsetSizeAndStrideOpInterface(*this, getType())))
+    return this->source();
+  return OpFoldResult();
+}
+
+namespace {
+/// Pattern to rewrite a insert_slice op with constant arguments.
+class InsertSliceOpConstantArgumentFolder final
+    : public OpRewritePattern<InsertSliceOp> {
+public:
+  using OpRewritePattern<InsertSliceOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(InsertSliceOp insertSliceOp,
+                                PatternRewriter &rewriter) const override {
+    // No constant operand, just return.
+    if (llvm::none_of(insertSliceOp.getOperands(), [](Value operand) {
+          return matchPattern(operand, matchConstantIndex());
+        }))
+      return failure();
+
+    // At least one of offsets/sizes/strides is a new constant.
+    // Form the new list of operands and constant attributes from the
+    // existing.
+    SmallVector<OpFoldResult> mixedOffsets(insertSliceOp.getMixedOffsets());
+    SmallVector<OpFoldResult> mixedSizes(insertSliceOp.getMixedSizes());
+    SmallVector<OpFoldResult> mixedStrides(insertSliceOp.getMixedStrides());
+    canonicalizeSubViewPart(mixedOffsets, ShapedType::isDynamicStrideOrOffset);
+    canonicalizeSubViewPart(mixedSizes, ShapedType::isDynamic);
+    canonicalizeSubViewPart(mixedStrides, ShapedType::isDynamicStrideOrOffset);
+
+    // Create the new op in canonical form.
+    rewriter.replaceOpWithNewOp<InsertSliceOp>(
+        insertSliceOp, insertSliceOp.source(), insertSliceOp.dest(),
+        mixedOffsets, mixedSizes, mixedStrides);
+    return success();
+  }
+};
+
+/// Fold tensor_casts with insert_slice operations.
+struct InsertSliceOpCastFolder final : public OpRewritePattern<InsertSliceOp> {
+  using OpRewritePattern<InsertSliceOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(InsertSliceOp insertSliceOp,
+                                PatternRewriter &rewriter) const override {
+    if (llvm::any_of(insertSliceOp.getOperands(), [](Value operand) {
+          return matchPattern(operand, matchConstantIndex());
+        }))
+      return failure();
+
+    auto getSourceOfCastOp = [](Value v) -> Optional<Value> {
+      auto castOp = v.getDefiningOp<tensor::CastOp>();
+      if (!castOp || !canFoldIntoConsumerOp(castOp))
+        return llvm::None;
+      return castOp.source();
+    };
+    Optional<Value> sourceCastSource =
+        getSourceOfCastOp(insertSliceOp.source());
+    Optional<Value> destCastSource = getSourceOfCastOp(insertSliceOp.dest());
+    if (!sourceCastSource && !destCastSource)
+      return failure();
+
+    Value replacement = rewriter.create<InsertSliceOp>(
+        insertSliceOp.getLoc(),
+        (sourceCastSource ? *sourceCastSource : insertSliceOp.source()),
+        (destCastSource ? *destCastSource : insertSliceOp.dest()),
+        insertSliceOp.getMixedOffsets(), insertSliceOp.getMixedSizes(),
+        insertSliceOp.getMixedStrides());
+
+    if (replacement.getType() != insertSliceOp.getType()) {
+      replacement = rewriter.create<tensor::CastOp>(
+          insertSliceOp.getLoc(), insertSliceOp.getType(), replacement);
+    }
+    rewriter.replaceOp(insertSliceOp, replacement);
+    return success();
+  }
+};
+} // namespace
+
+void InsertSliceOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                                MLIRContext *context) {
+  results.add<InsertSliceOpConstantArgumentFolder, InsertSliceOpCastFolder>(
+      context);
+}
+
 //===----------------------------------------------------------------------===//
 // TableGen'd op method definitions
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
--- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
+++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
@@ -616,9 +616,9 @@
   // CHECK-NEXT: %[[ISNEG:.*]] = cmpi slt, %[[INDEX]], %[[C0]] : index
   // CHECK-NEXT: %[[SELECT:.*]] = select %[[ISNEG]], %[[POSINDEX]], %[[INDEX]] : index
   // CHECK-NEXT: %[[C1:.*]] = constant 1 : index
-  // CHECK-NEXT: %[[HEAD:.*]] = subtensor %[[SHAPE]][%[[C0]]] [%[[SELECT]]] [%[[C1]]] : tensor<?xindex> to tensor<?xindex>
+  // CHECK-NEXT: %[[HEAD:.*]] = tensor.extract_slice %[[SHAPE]][%[[C0]]] [%[[SELECT]]] [%[[C1]]] : tensor<?xindex> to tensor<?xindex>
   // CHECK-NEXT: %[[TAIL_SIZE:.*]] = subi %[[RANK]], %[[SELECT]] : index
-  // CHECK-NEXT: %[[TAIL:.*]] = subtensor %[[SHAPE]][%[[SELECT]]] [%[[TAIL_SIZE]]] [%[[C1]]] : tensor<?xindex> to tensor<?xindex>
+  // CHECK-NEXT: %[[TAIL:.*]] = tensor.extract_slice %[[SHAPE]][%[[SELECT]]] [%[[TAIL_SIZE]]] [%[[C1]]] : tensor<?xindex> to tensor<?xindex>
   // CHECK-NEXT: return %[[HEAD]], %[[TAIL]] : tensor<?xindex>, tensor<?xindex>
   %head, %tail = "shape.split_at"(%shape, %index) : (tensor<?xindex>, index) -> (tensor<?xindex>, tensor<?xindex>)
   return %head, %tail : tensor<?xindex>, tensor<?xindex>
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@@ -679,10 +679,10 @@
   // CHECK: [[CST:%.+]] = constant 0.0
   // CHECK: [[FILL:%.+]] = linalg.fill([[INIT]], [[CST]])
   // CHECK: [[ARG0_DIM0:%.+]] = memref.dim %arg0, [[AXIS]]
-  // CHECK: [[INSERT0:%.+]] = subtensor_insert %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
+  // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
   // CHECK: [[NEW_OFFSET:%.+]] = addi [[OFFSET]], [[ARG0_DIM0]]
   // CHECK: [[ARG1_DIM0:%.+]] = memref.dim %arg1, [[AXIS]]
-  // CHECK: [[INSERT1:%.+]] = subtensor_insert %arg1 into [[INSERT0]]{{\[}}[[NEW_OFFSET]], [[OFFSET]]] {{\[}}[[ARG1_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
+  // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %arg1 into [[INSERT0]]{{\[}}[[NEW_OFFSET]], [[OFFSET]]] {{\[}}[[ARG1_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
   %0 = "tosa.concat"(%arg0, %arg1) { axis = 0 : i64} : (tensor<5x1xf32>, tensor<6x1xf32>)  -> (tensor<11x1xf32>)
 
   // CHECK: [[AXIS:%.+]] = constant 1
@@ -698,10 +698,10 @@
   // CHECK: [[CST:%.+]] = constant 0.0
   // CHECK: [[FILL:%.+]] = linalg.fill([[INIT]], [[CST]])
   // CHECK: [[ARG0_DIM1:%.+]] = memref.dim %arg0, [[AXIS]]
-  // CHECK: [[INSERT0:%.+]] = subtensor_insert %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
+  // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
   // CHECK: [[NEW_OFFSET:%.+]] = addi [[OFFSET]], [[ARG0_DIM1]]
   // CHECK: [[ARG1_DIM1:%.+]] = memref.dim %arg0, [[AXIS]]
-  // CHECK: [[INSERT1:%.+]] = subtensor_insert %arg0 into [[INSERT0]]{{\[}}[[OFFSET]], [[NEW_OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG1_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
+  // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %arg0 into [[INSERT0]]{{\[}}[[OFFSET]], [[NEW_OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG1_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
   %1 = "tosa.concat"(%arg0, %arg0) { axis = 1 : i64} : (tensor<5x1xf32>, tensor<5x1xf32>)  -> (tensor<5x2xf32>)
   return
 }
diff --git a/mlir/test/Conversion/TosaToStandard/tosa-to-standard.mlir b/mlir/test/Conversion/TosaToStandard/tosa-to-standard.mlir
--- a/mlir/test/Conversion/TosaToStandard/tosa-to-standard.mlir
+++ b/mlir/test/Conversion/TosaToStandard/tosa-to-standard.mlir
@@ -12,7 +12,7 @@
 // -----
 
 func @slice(%arg0: tensor<6xf32>) ->() {
-  // CHECK: [[SLICE:%.+]] = subtensor %arg0[2] [1] [1]
+  // CHECK: [[SLICE:%.+]] = tensor.extract_slice %arg0[2] [1] [1]
   %0 = "tosa.slice"(%arg0) {start = [2], size = [1]} : (tensor<6xf32>)  -> (tensor<1xf32>)
   return
 }
diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir
--- a/mlir/test/Dialect/Linalg/bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/bufferize.mlir
@@ -166,9 +166,9 @@
 
 func private @make_index() -> index
 
-// CHECK-LABEL: func @bufferize_subtensor(
+// CHECK-LABEL: func @bufferize_slice(
 //  CHECK-SAME:   %[[T:[0-9a-z]*]]: tensor<?x?xf32>
-func @bufferize_subtensor(%t : tensor<?x?xf32>) -> (tensor<2x3xf32>, tensor<2x?xf32>) {
+func @bufferize_slice(%t : tensor<?x?xf32>) -> (tensor<2x3xf32>, tensor<2x?xf32>) {
   //      CHECK: %[[IDX:.*]] = call @make_index() : () -> index
   %i0 = call @make_index() : () -> index
 
@@ -178,14 +178,14 @@
   // CHECK-SAME:   memref<?x?xf32> to memref<2x3xf32, #[[$MAP0]]>
   // CHECK-NEXT: linalg.copy(%[[SM0]], %[[A0]]) : memref<2x3xf32, #[[$MAP0]]>, memref<2x3xf32>
   // CHECK-NEXT: %[[RT0:.*]] = memref.tensor_load %[[A0]] : memref<2x3xf32>
-  %st0 = subtensor %t[0, 0][2, 3][1, 1] : tensor<?x?xf32> to tensor<2x3xf32>
+  %st0 = tensor.extract_slice %t[0, 0][2, 3][1, 1] : tensor<?x?xf32> to tensor<2x3xf32>
 
   // CHECK-NEXT: %[[A1:.*]] = memref.alloc(%[[IDX]]) : memref<2x?xf32>
   // CHECK-NEXT: %[[SM1:.*]] = memref.subview %[[M]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2]
   // CHECK-SAME:   memref<?x?xf32> to memref<2x?xf32, #[[$MAP1]]>
   // CHECK-NEXT: linalg.copy(%[[SM1]], %[[A1]]) : memref<2x?xf32, #[[$MAP1]]>, memref<2x?xf32>
   // CHECK-NEXT: %[[RT1:.*]] = memref.tensor_load %[[A1]] : memref<2x?xf32>
-  %st1 = subtensor %t[0, %i0][2, %i0][1, 2] : tensor<?x?xf32> to tensor<2x?xf32>
+  %st1 = tensor.extract_slice %t[0, %i0][2, %i0][1, 2] : tensor<?x?xf32> to tensor<2x?xf32>
 
   // CHECK-NEXT: return %[[RT0]], %[[RT1]]
   return %st0, %st1 : tensor<2x3xf32>, tensor<2x?xf32>
@@ -198,11 +198,11 @@
 
 func private @make_index() -> index
 
-// CHECK-LABEL: func @bufferize_subtensor_insert(
+// CHECK-LABEL: func @bufferize_insert_slice(
 //  CHECK-SAME:   %[[T:[0-9a-z]*]]: tensor<?x?xf32>
 //  CHECK-SAME:   %[[ST0:[0-9a-z]*]]: tensor<2x3xf32>
 //  CHECK-SAME:   %[[ST1:[0-9a-z]*]]: tensor<2x?xf32>
-func @bufferize_subtensor_insert(%t : tensor<?x?xf32>, %st0 : tensor<2x3xf32>, %st1 : tensor<2x?xf32>) ->
+func @bufferize_insert_slice(%t : tensor<?x?xf32>, %st0 : tensor<2x3xf32>, %st1 : tensor<2x?xf32>) ->
     (tensor<?x?xf32>, tensor<?x?xf32>) {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
@@ -222,7 +222,7 @@
   // CHECK-SAME:   memref<?x?xf32> to memref<2x3xf32, #[[$MAP0]]>
   // CHECK-NEXT: linalg.copy(%[[SM0]], %[[SUBVIEW0]]) : memref<2x3xf32>, memref<2x3xf32, #[[$MAP0]]>
   // CHECK-NEXT: %[[RT0:.*]] = memref.tensor_load %[[M_COPY0]] : memref<?x?xf32>
-  %t0 = subtensor_insert %st0 into %t[0, 0][2, 3][1, 1] : tensor<2x3xf32> into tensor<?x?xf32>
+  %t0 = tensor.insert_slice %st0 into %t[0, 0][2, 3][1, 1] : tensor<2x3xf32> into tensor<?x?xf32>
 
   //  CHECK-DAG: %[[SM1:.*]] = memref.buffer_cast %[[ST1]] : memref<2x?xf32>
   // CHECK-NEXT: %[[M_COPY1:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
@@ -231,7 +231,7 @@
   // CHECK-SAME:   memref<?x?xf32> to memref<2x?xf32, #[[$MAP1]]>
   // CHECK-NEXT: linalg.copy(%[[SM1]], %[[SUBVIEW1]]) : memref<2x?xf32>, memref<2x?xf32, #[[$MAP1]]>
   // CHECK-NEXT: %[[RT1:.*]] = memref.tensor_load %[[M_COPY1]] : memref<?x?xf32>
-  %t1 = subtensor_insert %st1 into %t[0, %i0][2, %i0][1, 2] : tensor<2x?xf32> into tensor<?x?xf32>
+  %t1 = tensor.insert_slice %st1 into %t[0, %i0][2, %i0][1, 2] : tensor<2x?xf32> into tensor<?x?xf32>
 
   //     CHECK: return %[[RT0]], %[[RT1]]
   return %t0, %t1: tensor<?x?xf32>, tensor<?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -648,15 +648,15 @@
 
 // -----
 
-func @fold_init_tensor_with_subtensor
+func @fold_init_tensor_with_slice
   (%arg0 : index, %arg1 : index) -> tensor<5x?x20xf32>
 {
   %0 = linalg.init_tensor[%arg0, 10, 40] : tensor<?x10x40xf32>
-  %1 = subtensor %0[0, 0, 0] [5, %arg1, 20] [1, 1, 1]
+  %1 = tensor.extract_slice %0[0, 0, 0] [5, %arg1, 20] [1, 1, 1]
     : tensor<?x10x40xf32> to tensor<5x?x20xf32>
   return %1 : tensor<5x?x20xf32>
 }
-//      CHECK: func @fold_init_tensor_with_subtensor
+//      CHECK: func @fold_init_tensor_with_slice
 // CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: index
 // CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: index
 //      CHECK:   %[[T0:.+]] = linalg.init_tensor [5, %[[ARG1]], 20]
@@ -723,13 +723,13 @@
   %1 = linalg.fill(%0, %arg1) : tensor<?x?xf32>, f32 -> tensor<?x?xf32>
   %2 = memref.dim %arg0, %c0 : tensor<?x?xf32>
   %3 = memref.dim %arg0, %c1 : tensor<?x?xf32>
-  %4 = subtensor_insert %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+  %4 = tensor.insert_slice %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
   return %4 : tensor<?x?xf32>
 }
 // CHECK-LABEL: func @propogate_casts
 //       CHECK:   %[[INIT:.+]] = linalg.init_tensor [21, 42]
 //       CHECK:   %[[FILL:.+]] = linalg.fill(%[[INIT]], %{{.+}})
-//       CHECK:   %[[INSERTED:.+]] = subtensor_insert %{{.+}} into %[[FILL]]
+//       CHECK:   %[[INSERTED:.+]] = tensor.insert_slice %{{.+}} into %[[FILL]]
 //       CHECK:   %[[RESULT:.+]] = tensor.cast %[[INSERTED]]
 //       CHECK:   return %[[RESULT]]
 
diff --git a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir
--- a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir
@@ -6,43 +6,43 @@
 
 // -----
 
-// CHECK-LABEL: func @subtensor_fun
-func @subtensor_fun(%A : tensor<?xf32>, %B : tensor<?xf32> {linalg.inplaceable = true})
+// CHECK-LABEL: func @extract_slice_fun
+func @extract_slice_fun(%A : tensor<?xf32>, %B : tensor<?xf32> {linalg.inplaceable = true})
   -> (tensor<4xf32>, tensor<8xf32>)
 {
-  // subtensor is not used in a write, it is not compelled to bufferize out of
-  // place. Let callers decide whether they want to create aliasing subviews at
-  // all call sites or whether they allocate.
+  // tensor.extract_slice is not used in a write, it is not compelled to
+  // bufferize out of place. Let callers decide whether they want to create
+  // aliasing subviews at all call sites or whether they allocate.
   // This is true irrespective of whether the function argument is inplaceable.
-  //     CHECK: subtensor
+  //     CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %r0 = subtensor %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
+  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
 
-  //     CHECK: subtensor
+  //     CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %r1 = subtensor %B[0][8][1] : tensor<?xf32> to tensor<8xf32>
+  %r1 = tensor.extract_slice %B[0][8][1] : tensor<?xf32> to tensor<8xf32>
 
   return %r0, %r1: tensor<4xf32>, tensor<8xf32>
 }
 
 // -----
 
-// CHECK-LABEL: func @subtensor_insert_fun
-func @subtensor_insert_fun(
+// CHECK-LABEL: func @insert_slice_fun
+func @insert_slice_fun(
     %A : tensor<?xf32>,
     %B : tensor<?xf32> {linalg.inplaceable = true},
     %C : tensor<4xf32>)
   -> (tensor<?xf32>, tensor<?xf32>)
 {
   // must bufferize out of place.
-  //     CHECK: subtensor_insert
+  //     CHECK: tensor.insert_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
-  %r0 = subtensor_insert %C into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r0 = tensor.insert_slice %C into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
   // bufferizes inplace.
-  //     CHECK: subtensor_insert
+  //     CHECK: tensor.insert_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %r1 = subtensor_insert %C into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r1 = tensor.insert_slice %C into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
   return %r0, %r1: tensor<?xf32>, tensor<?xf32>
 }
@@ -85,34 +85,34 @@
 
 // -----
 
-// CHECK-LABEL: func @subtensor_subtensor
-func @subtensor_subtensor(
+// CHECK-LABEL: func @extract_slice_extract_slice
+func @extract_slice_extract_slice(
     %A : tensor<?xf32> {linalg.inplaceable = true}, %B : tensor<?xf32>)
   -> (tensor<2xf32>, tensor<2xf32>)
 {
-  // subtensor is not used in a write, it is not compelled to bufferize out of
-  // place. Let callers decide whether they want to create aliasing subviews at
-  // all call sites or whether they allocate.
+  // tensor.extract_slice is not used in a write, it is not compelled to
+  // bufferize out of place. Let callers decide whether they want to create
+  // aliasing subviews at all call sites or whether they allocate.
   // This is true irrespective of whether the function argument is inplaceable.
   // CHECK: {__inplace_results_attr__ = ["true"]}
-  %r0 = subtensor %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
+  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
 
   // CHECK: {__inplace_results_attr__ = ["true"]}
-  %r1 = subtensor %r0[0][2][1] : tensor<4xf32> to tensor<2xf32>
+  %r1 = tensor.extract_slice %r0[0][2][1] : tensor<4xf32> to tensor<2xf32>
 
   // CHECK: {__inplace_results_attr__ = ["true"]}
-  %r2 = subtensor %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
+  %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
 
   // CHECK: {__inplace_results_attr__ = ["true"]}
-  %r3 = subtensor %r2[0][2][1] : tensor<4xf32> to tensor<2xf32>
+  %r3 = tensor.extract_slice %r2[0][2][1] : tensor<4xf32> to tensor<2xf32>
 
   return %r1, %r3: tensor<2xf32>, tensor<2xf32>
 }
 
 // -----
 
-// CHECK-LABEL: func @subtensor_insert_subtensor_insert
-func @subtensor_insert_subtensor_insert(
+// CHECK-LABEL: func @insert_slice_insert_slice
+func @insert_slice_insert_slice(
     %A : tensor<?xf32> {linalg.inplaceable = true},
     %A2 : tensor<4xf32> {linalg.inplaceable = true},
     %A3 : tensor<2xf32> {linalg.inplaceable = true},
@@ -120,102 +120,106 @@
   -> (tensor<?xf32>, tensor<?xf32>)
 {
   // CHECK: {__inplace_results_attr__ = ["true"]}
-  %r0 = subtensor_insert %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32>
+  %r0 = tensor.insert_slice %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32>
 
   // CHECK: {__inplace_results_attr__ = ["true"]}
-  %r1 = subtensor_insert %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
   // CHECK: {__inplace_results_attr__ = ["false"]}
-  %r2 = subtensor_insert %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32>
+  %r2 = tensor.insert_slice %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32>
 
   // CHECK: {__inplace_results_attr__ = ["false"]}
-  %r3 = subtensor_insert %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
   return %r1, %r3: tensor<?xf32>, tensor<?xf32>
 }
 
 // -----
 
-// CHECK-LABEL: func @subtensor_nonmatching_subtensor_insert
-func @subtensor_nonmatching_subtensor_insert(
+// CHECK-LABEL: func @extract_slice_nonmatching_insert_slice
+func @extract_slice_nonmatching_insert_slice(
     %A : tensor<?xf32> {linalg.inplaceable = true},
     %B : tensor<?xf32>, %idx: index)
   -> (tensor<?xf32>, tensor<?xf32>)
 {
   // %r1 bufferizes inplace because %A is inplaceable.
-  // %r0 is an overlapping subtensor that does not match, it must be out of place.
-  //      CHECK: subtensor
+  // %r0 is an overlapping tensor.extract_slice that does not match, it must be
+  // out of place.
+  //      CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
-  %r0 = subtensor %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
+  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
 
   // %r1 can bufferize inplace fine.
-  //      CHECK: subtensor_insert
+  //      CHECK: tensor.insert_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %r1 = subtensor_insert %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
 
   // %r3 does bufferizes inplace because %B is not inplaceable.
-  // %r0 is an overlapping subtensor that does not match, but does not alias with
-  // the buffer coming from %r3 so it can actually bufferize inplace.
-  //      CHECK: subtensor
+  // %r0 is an overlapping tensor.extract_slice that does not match, but does
+  // not alias with the buffer coming from %r3 so it can actually bufferize
+  // inplace.
+  //      CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %r2 = subtensor %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
+  %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
 
   // %r3 cannot bufferize inplace since %B is not inplaceable.
-  //      CHECK: subtensor_insert
+  //      CHECK: tensor.insert_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
-  %r3 = subtensor_insert %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r3 = tensor.insert_slice %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
 
   return %r1, %r3: tensor<?xf32>, tensor<?xf32>
 }
 
 // -----
 
-// CHECK-LABEL: func @subtensor_matching_subtensor_insert
-func @subtensor_matching_subtensor_insert(
+// CHECK-LABEL: func @extract_slice_matching_insert_slice
+func @extract_slice_matching_insert_slice(
     %A : tensor<?xf32> {linalg.inplaceable = true},
     %B : tensor<?xf32>)
   -> (tensor<?xf32>, tensor<?xf32>)
 {
   // %r1 bufferizes inplace because %A is inplaceable.
-  // %r0 is a subtensor that matches, it can also be bufferized inplace.
-  //      CHECK: subtensor
+  // %r0 is a tensor.extract_slice that matches, it can also be bufferized
+  // inplace.
+  //      CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %r0 = subtensor %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
+  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
 
-  //      CHECK: subtensor_insert
+  //      CHECK: tensor.insert_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %r1 = subtensor_insert %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
-  // %r2 is a subtensor that matches %r3, it can be bufferized inplace.
-  //      CHECK: subtensor
+  // %r2 is a tensor.extract_slice that matches %r3, it can be bufferized
+  // inplace.
+  //      CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %r2 = subtensor %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
+  %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
 
-  // subtensor_insert cannot bufferize inplace.
+  // tensor.insert_slice cannot bufferize inplace.
   // This should have been captured by a canonicalization pattern and it would
   // be unproductive to have special logic in bufferization to encode matching
-  // subtensor_insert(subtensor(A), A).
-  //      CHECK: subtensor_insert
+  // insert_slice(extract_slice(A), A).
+  //      CHECK: tensor.insert_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
-  %r3 = subtensor_insert %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
   return %r1, %r3: tensor<?xf32>, tensor<?xf32>
 }
 
 // -----
 
-// CHECK-LABEL: func @subtensor_linalg_readonly_use
-func @subtensor_linalg_readonly_use(
+// CHECK-LABEL: func @extract_slice_linalg_readonly_use
+func @extract_slice_linalg_readonly_use(
     %A : tensor<?x?xf32>,
     %B : tensor<4x4xf32>,
     %C : tensor<4x4xf32> {linalg.inplaceable = true})
   ->  (tensor<4x4xf32>, tensor<4x4xf32>)
 {
-  // subtensor is only used as a read, no interference irrespective of user's
-  // inplace status.
-  //     CHECK: subtensor
+  // tensor.extract_slice is only used as a read, no interference irrespective
+  // of user's inplace status.
+  //     CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %sA = subtensor %A[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
+  %sA = tensor.extract_slice %A[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
 
   // matmul output operand is not inplaceable at the function boundary.
   //     CHECK: linalg.matmul
@@ -236,8 +240,8 @@
 
 // -----
 
-// CHECK-LABEL: func @subtensor_to_linalg_write_use
-func @subtensor_to_linalg_write_use(
+// CHECK-LABEL: func @extract_slice_to_linalg_write_use
+func @extract_slice_to_linalg_write_use(
     %A : tensor<4x4xf32>,
     %B : tensor<?x?xf32>,
     %C : tensor<?x?xf32> {linalg.inplaceable = true})
@@ -245,9 +249,9 @@
 {
   // Step 3. %sB forward propagates to a write in %D but it is not inplace.
   // So this is only ever read and can bufferize inplace.
-  //     CHECK: subtensor
+  //     CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %sB = subtensor %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
+  %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
 
   // Step 2. %sB has a read interference in %E, it does not bufferize inplace.
   //     CHECK: linalg.matmul
@@ -259,12 +263,12 @@
   // Step 4. %sC forward propagates to an inplace write in %E.
   // %sC backward propagates to %C which is inplaceable.
   // As a consequence this is bufferized inplace.
-  //     CHECK: subtensor
+  //     CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %sC = subtensor %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
+  %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
 
-  // Step 1. %sC backprops to the subtensor producer which is not considered an
-  // interference. This bufferizes inplace.
+  // Step 1. %sC backprops to the tensor.extract_slice producer which is not
+  // considered an interference. This bufferizes inplace.
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
   %E = linalg.matmul  ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>)
@@ -280,8 +284,8 @@
 
 // -----
 
-// CHECK-LABEL: func @subtensor_to_linalg_write_use
-func @subtensor_to_linalg_write_use(
+// CHECK-LABEL: func @extract_slice_to_linalg_write_use
+func @extract_slice_to_linalg_write_use(
     %A : tensor<4x4xf32>,
     %B : tensor<?x?xf32>,
     %C : tensor<?x?xf32> {linalg.inplaceable = true})
@@ -290,12 +294,12 @@
   // Step 4. %sB forward propagates to an inplace write in %D.
   // %sB backward propagates to %B which is not inplaceable.
   // As a consequence this is bufferized out of place.
-  //     CHECK: subtensor
+  //     CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
-  %sB = subtensor %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
+  %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
 
-  // Step 1. %sB backprops to the subtensor producer which is not considered an
-  // interference. This bufferizes inplace.
+  // Step 1. %sB backprops to the tensor.extract_slice producer which is not
+  // considered an interference. This bufferizes inplace.
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
   %D = linalg.matmul  ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>)
@@ -305,12 +309,12 @@
   // Step 3. %sC forward propagates to an inplace write in %E.
   // %sC backward propagates to %C which is inplaceable.
   // As a consequence this is bufferized inplace.
-  //     CHECK: subtensor
+  //     CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %sC = subtensor %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
+  %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
 
-  // Step 1. %sC backprops to the subtensor producer which is not considered an
-  // interference. This bufferizes inplace.
+  // Step 1. %sC backprops to the tensor.extract_slice producer which is not
+  // considered an interference. This bufferizes inplace.
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
   %E = linalg.matmul  ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>)
@@ -322,8 +326,8 @@
 
 // -----
 
-// CHECK-LABEL: func @nested_subtensor_and_insert
-func @nested_subtensor_and_insert(
+// CHECK-LABEL: func @nested_extract_slice_and_insert
+func @nested_extract_slice_and_insert(
     %A : tensor<?x?xf32>,
     %B : tensor<?x?xf32> {linalg.inplaceable = true},
     %C : tensor<?x?xf32> {linalg.inplaceable = true},
@@ -332,75 +336,78 @@
 {
   %f0 = constant 0.0 : f32
 
-  // 2-level matching subtensor / subtensor_insert into non inplaceable %A.
+  // 2-level matching tensor.extract_slice / tensor.insert_slice into non
+  // inplaceable %A.
   //   - %rA is not inplaceable because %A is not inplaceable at function boundary.
   //   - once %rA is deemed not inplaceable, nothing prevent %rsA to be inplaceable
   //   - this propagates to %FA and %ssA being inplaceable.
   //   - %sA would then bufferize to an inplace write (i.e. %FA) but %A is not
   //     inplaceable and so %sA is not inplaceable.
-  //     CHECK: subtensor
+  //     CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
-  // CHECK-NEXT: subtensor
+  // CHECK-NEXT: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
   // CHECK-NEXT: fill
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT: subtensor_insert
+  // CHECK-NEXT: tensor.insert_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT: subtensor_insert
+  // CHECK-NEXT: tensor.insert_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
-  %sA = subtensor %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-  %ssA = subtensor %sA[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
+  %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+  %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
   %FA = linalg.fill(%ssA, %f0) : tensor<4x4xf32>, f32 -> tensor<4x4xf32>
-  %rsA = subtensor_insert %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
-  %rA = subtensor_insert %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+  %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
+  %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
 
-  // 3-level matching subtensor / subtensor_insert into inplaceable %B.
-  // CHECK-NEXT: subtensor
+  // 3-level matching tensor.extract_slice / tensor.insert_slice into
+  // inplaceable %B.
+  // CHECK-NEXT: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT: subtensor
-  // Atm, this 2nd subtensor fails to bufferize inplace because clobbering
-  // analysis conservatively test for equivalent buffers.
+  // CHECK-NEXT: tensor.extract_slice
+  // Atm, this 2nd tensor.extract_slice fails to bufferize inplace because
+  // clobbering analysis conservatively test for equivalent buffers.
   // TODO: This is currently too restrictive and misses clobberings.
   // When available, use container-containee analysis.
   // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
-  // CHECK-NEXT: subtensor
+  // CHECK-NEXT: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
   // CHECK-NEXT: fill
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT: subtensor_insert
+  // CHECK-NEXT: tensor.insert_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT: subtensor_insert
+  // CHECK-NEXT: tensor.insert_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT: subtensor_insert
+  // CHECK-NEXT: tensor.insert_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %sB = subtensor %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-  %ssB = subtensor %sB[0, 0][4, %idx][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
-  %sssB = subtensor %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32>
+  %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+  %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
+  %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32>
   %FB = linalg.fill(%sssB, %f0) : tensor<4x4xf32>, f32 -> tensor<4x4xf32>
-  %rssB = subtensor_insert %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32>
-  %rsB = subtensor_insert %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor<?x?xf32>
-  %rB = subtensor_insert %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+  %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32>
+  %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor<?x?xf32>
+  %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
 
-  // 2-level matching subtensor / subtensor_insert into inplaceable %C with a twist.
+  // 2-level matching tensor.extract_slice / tensor.insert_slice into
+  // inplaceable %C with a twist.
   // Throw a wrench in the system: %rsC production sizes do not match %ssC.
-  // CHECK-NEXT: subtensor
+  // CHECK-NEXT: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // The subtensor_insert that would be candidate for matching does not actually
-  // match. That subtensor_insert can still be bufferized inplace nonetheless
-  // but this subtensor, which bufferizes to an inplace write, cannot.
-  // CHECK-NEXT: subtensor
+  // The tensor.insert_slice that would be candidate for matching does not actually
+  // match. That tensor.insert_slice can still be bufferized inplace nonetheless
+  // but this tensor.extract_slice, which bufferizes to an inplace write, cannot.
+  // CHECK-NEXT: tensor.extract_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
   // CHECK-NEXT: fill
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT: subtensor_insert
+  // CHECK-NEXT: tensor.insert_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT: subtensor_insert
+  // CHECK-NEXT: tensor.insert_slice
   // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %sC = subtensor %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-  %ssC = subtensor %sC[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
+  %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+  %ssC = tensor.extract_slice %sC[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
   %FC = linalg.fill(%ssC, %f0) : tensor<4x4xf32>, f32 -> tensor<4x4xf32>
-  %rsC = subtensor_insert %FC into %sC[0, 0][12345, 67890][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
-  %rC = subtensor_insert %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+  %rsC = tensor.insert_slice %FC into %sC[0, 0][12345, 67890][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
+  %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
 
   return %rA, %rB, %rC: tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir
--- a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir
@@ -118,8 +118,8 @@
 
 // -----
 
-// CHECK-LABEL: func @subtensor_insert_fun
-func @subtensor_insert_fun(%A0 : tensor<?xf32>, %A1 : tensor<?xf32> {linalg.inplaceable = true},
+// CHECK-LABEL: func @insert_slice_fun
+func @insert_slice_fun(%A0 : tensor<?xf32>, %A1 : tensor<?xf32> {linalg.inplaceable = true},
                            %t0 : tensor<4xf32>, %t1 : tensor<4xf32> {linalg.inplaceable = true})
   ->  (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
 {
@@ -128,40 +128,40 @@
   //      CHECK: %[[BUFFER_CAST_t0:.*]] = memref.buffer_cast {{.*}} : memref<4xf32
   //      CHECK: %[[BUFFER_CAST_t1:.*]] = memref.buffer_cast {{.*}} : memref<4xf32
 
-  // Alloc and copy the whole result tensor. Copy the subtensor.
+  // Alloc and copy the whole result tensor. Copy the tensor.extract_slice.
   //      CHECK: %[[REALLOC_A0:.*]] = memref.alloc
   //      CHECK: linalg.copy(%[[BUFFER_CAST_A0]]
   //      CHECK: %[[SV_A0:.*]] = memref.subview %[[REALLOC_A0]]
   //      CHECK: linalg.copy(%[[BUFFER_CAST_t0]], %[[SV_A0]])
-  %r0 = subtensor_insert %t0 into %A0[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r0 = tensor.insert_slice %t0 into %A0[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
-  // Alloc and copy the whole result tensor. Copy the subtensor.
+  // Alloc and copy the whole result tensor. Copy the tensor.extract_slice.
   //      CHECK: %[[REALLOC_A0_2:.*]] = memref.alloc
   //      CHECK: linalg.copy(%[[BUFFER_CAST_A0]]
   //      CHECK: %[[SV_A0_2:.*]] = memref.subview %[[REALLOC_A0_2]]
   //      CHECK: linalg.copy(%[[BUFFER_CAST_t1]], %[[SV_A0_2]])
-  %r1 = subtensor_insert %t1 into %A0[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r1 = tensor.insert_slice %t1 into %A0[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
-  //  Still alloc the large tensor because %A1 is read after. Copy the subtensor.
+  //  Still alloc the large tensor because %A1 is read after. Copy the tensor.extract_slice.
   //      CHECK: %[[REALLOC_A1:.*]] = memref.alloc
   //      CHECK: linalg.copy(%[[BUFFER_CAST_A1]]
   //      CHECK: %[[SV_A1:.*]] = memref.subview %[[REALLOC_A1]]
   //      CHECK: linalg.copy(%[[BUFFER_CAST_t0]], %[[SV_A1]])
-  %r2 = subtensor_insert %t0 into %A1[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r2 = tensor.insert_slice %t0 into %A1[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
-  //  Do not realloc the large tensor. Copy the subtensor.
+  //  Do not realloc the large tensor. Copy the tensor.extract_slice.
   //  CHECK-NOT: alloc
   //      CHECK: %[[SV_A1_2:.*]] = memref.subview %[[BUFFER_CAST_A1]]
   //      CHECK: linalg.copy(%[[BUFFER_CAST_t1]], %[[SV_A1_2]])
-  %r3 = subtensor_insert %t1 into %A1[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r3 = tensor.insert_slice %t1 into %A1[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
   return %r0, %r1, %r2, %r3: tensor<?xf32>, tensor<?xf32>, tensor<?xf32>, tensor<?xf32>
 }
 
 // -----
 
-// CHECK-LABEL: func @subtensor_insert_fun
-func @subtensor_insert_fun(%A : tensor<?xf32> {linalg.inplaceable = true}, %t : tensor<4xf32>)
+// CHECK-LABEL: func @insert_slice_fun
+func @insert_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = true}, %t : tensor<4xf32>)
   -> tensor<?xf32>
 {
   %f0 = constant 0.0 : f32
@@ -172,7 +172,7 @@
   //  CHECK-NOT: alloc
   //      CHECK: %[[SV:.*]] = memref.subview %[[BUFFER_CAST_A]]
   //      CHECK: linalg.copy(%[[BUFFER_CAST_B]], %[[SV]])
-  %r0 = subtensor_insert %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
   /// Overwrite BUFFER_CAST_A inplace.
   //      CHECK: linalg.fill(%[[BUFFER_CAST_A]]
@@ -182,8 +182,8 @@
 
 // -----
 
-// CHECK-LABEL: func @subtensor_insert_fun
-func @subtensor_insert_fun(%A : tensor<?xf32> {linalg.inplaceable = true}, %t : tensor<4xf32>)
+// CHECK-LABEL: func @insert_slice_fun
+func @insert_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = true}, %t : tensor<4xf32>)
   -> tensor<?xf32>
 {
   %f0 = constant 0.0 : f32
@@ -198,15 +198,15 @@
   //      CHECK: %[[SV:.*]] = memref.subview %[[BUFFER_CAST_A]]
   /// Overwrite BUFFER_CAST_A inplace by copying into the subview.
   //      CHECK: linalg.copy(%[[BUFFER_CAST_B]], %[[SV]])
-  %r1 = subtensor_insert %t into %r0[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r1 = tensor.insert_slice %t into %r0[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
   return %r1: tensor<?xf32>
 }
 
 // -----
 
-// CHECK-LABEL: func @subtensor_insert_fun_not_inplace
-func @subtensor_insert_fun_not_inplace(%A : tensor<?xf32>, %t : tensor<4xf32>)
+// CHECK-LABEL: func @insert_slice_fun_not_inplace
+func @insert_slice_fun_not_inplace(%A : tensor<?xf32>, %t : tensor<4xf32>)
   -> tensor<?xf32>
 {
   //      CHECK: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast {{.*}} : memref<?xf32
@@ -217,14 +217,14 @@
   //      CHECK: %[[SV:.*]] = memref.subview %[[ALLOC]][0] [4] [1] : memref<?xf32> to memref<4xf32>
   //      CHECK: linalg.copy(%[[BUFFER_CAST_B]], %[[SV]]) : memref<4xf32, #map>, memref<4xf32>
   //      CHECK: memref.dealloc %[[ALLOC]] : memref<?xf32>
-  %r0 = subtensor_insert %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
   return %r0: tensor<?xf32>
 }
 
 // -----
 
-// CHECK-LABEL: func @subtensor_insert_fun_not_inplace
-func @subtensor_insert_fun_not_inplace(%A : tensor<?xf32> {linalg.inplaceable = true}, %t : tensor<4xf32>)
+// CHECK-LABEL: func @insert_slice_fun_not_inplace
+func @insert_slice_fun_not_inplace(%A : tensor<?xf32> {linalg.inplaceable = true}, %t : tensor<4xf32>)
   -> (tensor<?xf32>, tensor<?xf32>)
 {
   %f0 = constant 0.0 : f32
@@ -232,10 +232,10 @@
   //  CHECK-DAG: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast {{.*}} : memref<?xf32{{.*}}
   //  CHECK-DAG: %[[BUFFER_CAST_B:.*]] = memref.buffer_cast {{.*}} : memref<4xf32{{.*}}
 
-  // subtensor_insert is bufferized first, %A is inplaceable so we can make this inplace
+  // tensor.insert_slice is bufferized first, %A is inplaceable so we can make this inplace
   //  CHECK-DAG: %[[SV:.*]] = memref.subview %[[BUFFER_CAST_A]][0] [4] [1] : memref<?xf32, {{.*}}> to memref<4xf32, {{.*}}>
   //  CHECK-DAG: linalg.copy(%[[BUFFER_CAST_B]], %[[SV]]) : memref<4xf32, {{.*}}>, memref<4xf32, {{.*}}>
-  %r0 = subtensor_insert %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
   // fill would interfere with %r0 that is also being returned.
   // So we need to bufferize it out of place and make a new alloc.
@@ -253,8 +253,8 @@
 
 // -----
 
-// CHECK-LABEL: func @subtensor_fun
-func @subtensor_fun(%A : tensor<?xf32> {linalg.inplaceable = true})
+// CHECK-LABEL: func @extract_slice_fun
+func @extract_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = true})
   ->  tensor<4xf32>
 {
   // This bufferizes to a pattern that the cross-function boundary pass needs to
@@ -268,9 +268,8 @@
   //     CHECK: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast {{.*}} : memref<?xf32
   //     CHECK: %[[SV:.*]] = memref.subview %[[BUFFER_CAST_A]][0] [4] [1]
   //     CHECK: %[[RES:.*]] = memref.tensor_load %[[SV]]
-  %r0 = subtensor %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
+  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
 
   //     CHECK: return %[[RES]]
   return %r0: tensor<4xf32>
 }
-
diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
--- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
+++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
@@ -299,28 +299,28 @@
 
 // -----
 
-func @fold_subtensor(
+func @fold_slice(
     %arg0 : tensor<1x?x?x1x?x1x1xf32>, %arg1 : tensor<1x?x?x?x?x1x1xf32>,
     %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index,
     %arg6 : index, %arg7 : index) -> (tensor<1x?x?x1x?x1x1xf32>, tensor<1x?x?x1x?x1x1xf32>) {
-  %0 = subtensor %arg0[0, %arg2, %arg3, 0, %arg4, 0, 0]
-                      [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] :
+  %0 = tensor.extract_slice %arg0[0, %arg2, %arg3, 0, %arg4, 0, 0]
+                             [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] :
       tensor<1x?x?x1x?x1x1xf32> to tensor<1x?x?x1x?x1x1xf32>
-  %1 = subtensor %arg1[%arg2, 0, %arg3, 0, 0, %arg4, 0]
-                      [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] :
+  %1 = tensor.extract_slice %arg1[%arg2, 0, %arg3, 0, 0, %arg4, 0]
+                             [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] :
       tensor<1x?x?x?x?x1x1xf32> to tensor<1x?x?x1x?x1x1xf32>
   return %0, %1 : tensor<1x?x?x1x?x1x1xf32>, tensor<1x?x?x1x?x1x1xf32>
 }
-//      CHECK: func @fold_subtensor
+//      CHECK: func @fold_slice
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<1x?x?x1x?x1x1xf32>
 // CHECK-SAME:   %[[ARG1:.+]]: tensor<1x?x?x?x?x1x1xf32>
-//      CHECK:   %[[SUBTENSOR1:.+]] = subtensor %[[ARG0]]
+//      CHECK:   %[[SLICE1:.+]] = tensor.extract_slice %[[ARG0]]
 // CHECK-SAME:       to tensor<?x?x?xf32>
-//      CHECK:   %[[RESULT1:.+]] = linalg.tensor_expand_shape %[[SUBTENSOR1]]
+//      CHECK:   %[[RESULT1:.+]] = linalg.tensor_expand_shape %[[SLICE1]]
 // CHECK-SAME:       [0, 1], [2], [3, 4, 5, 6]
-//      CHECK:   %[[SUBTENSOR2:.+]] = subtensor %[[ARG1]]
+//      CHECK:   %[[SLICE2:.+]] = tensor.extract_slice %[[ARG1]]
 // CHECK-SAME:       to tensor<?x?x?xf32>
-//      CHECK:   %[[RESULT2:.+]] = linalg.tensor_expand_shape %[[SUBTENSOR2]]
+//      CHECK:   %[[RESULT2:.+]] = linalg.tensor_expand_shape %[[SLICE2]]
 // CHECK-SAME:       [0, 1], [2], [3, 4, 5, 6]
 //      CHECK:   return %[[RESULT1]], %[[RESULT2]]
 
@@ -430,25 +430,25 @@
 
 // -----
 
-func @subtensor_unit_dims(%arg0: tensor<1x3xf32>) -> tensor<1x1xf32> {
-  %0 = subtensor %arg0[0, 2] [1, 1] [1, 1] : tensor<1x3xf32> to tensor<1x1xf32>
+func @slice_unit_dims(%arg0: tensor<1x3xf32>) -> tensor<1x1xf32> {
+  %0 = tensor.extract_slice %arg0[0, 2] [1, 1] [1, 1] : tensor<1x3xf32> to tensor<1x1xf32>
   return %0 : tensor<1x1xf32>
 }
-// CHECK-LABEL: func @subtensor_unit_dims
-//       CHECK:   %[[SUBTENSOR:.+]] = subtensor
+// CHECK-LABEL: func @slice_unit_dims
+//       CHECK:   %[[SLICE:.+]] = tensor.extract_slice
 //  CHECK-SAME:     tensor<1x3xf32> to tensor<f32>
-//       CHECK:   %[[RESULT:.+]] = linalg.tensor_expand_shape %[[SUBTENSOR]] []
+//       CHECK:   %[[RESULT:.+]] = linalg.tensor_expand_shape %[[SLICE]] []
 //       CHECK:   return %[[RESULT]]
 
 // -----
 
-func @subtensor_insert_unit_dims(%arg0: tensor<1x3xf32>, %arg1: tensor<1x1xf32>) -> tensor<1x3xf32> {
-  %0 = subtensor_insert %arg1 into %arg0[0, 2] [1, 1] [1, 1] : tensor<1x1xf32> into tensor<1x3xf32>
+func @insert_slice_unit_dims(%arg0: tensor<1x3xf32>, %arg1: tensor<1x1xf32>) -> tensor<1x3xf32> {
+  %0 = tensor.insert_slice %arg1 into %arg0[0, 2] [1, 1] [1, 1] : tensor<1x1xf32> into tensor<1x3xf32>
   return %0 : tensor<1x3xf32>
 }
-// CHECK-LABEL: func @subtensor_insert_unit_dims
+// CHECK-LABEL: func @insert_slice_unit_dims
 //       CHECK:   %[[RESHAPE:.+]] = linalg.tensor_collapse_shape %{{.+}} []
-//       CHECK:   %[[RESULT:.+]] = subtensor_insert %[[RESHAPE]]
+//       CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[RESHAPE]]
 //  CHECK-SAME:     tensor<f32> into tensor<1x3xf32>
 //       CHECK:   return %[[RESULT]]
 
diff --git a/mlir/test/Dialect/Linalg/fusion-sequence.mlir b/mlir/test/Dialect/Linalg/fusion-sequence.mlir
--- a/mlir/test/Dialect/Linalg/fusion-sequence.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-sequence.mlir
@@ -175,18 +175,18 @@
 //       CHECK:   %[[INIT:.+]] = linalg.init_tensor
 //       CHECK:   %[[R0:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG5:.+]] = %[[INIT]]) -> (tensor<?x?xf32>) {
 //       CHECK:     %[[R1:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG7:.+]] = %[[ARG5]]) -> (tensor<?x?xf32>) {
-//   CHECK-DAG:       %[[STARG3:.+]] = subtensor %[[ARG3]]
-//   CHECK-DAG:       %[[STARG7:.+]] = subtensor %[[ARG7]]
-//   CHECK-DAG:       %[[STARG0:.+]] = subtensor %[[ARG0]]
-//   CHECK-DAG:       %[[STARG1:.+]] = subtensor %[[ARG1]]
-//   CHECK-DAG:       %[[STARG2:.+]] = subtensor %[[ARG2]]
+//   CHECK-DAG:       %[[STARG3:.+]] = tensor.extract_slice %[[ARG3]]
+//   CHECK-DAG:       %[[STARG7:.+]] = tensor.extract_slice %[[ARG7]]
+//   CHECK-DAG:       %[[STARG0:.+]] = tensor.extract_slice %[[ARG0]]
+//   CHECK-DAG:       %[[STARG1:.+]] = tensor.extract_slice %[[ARG1]]
+//   CHECK-DAG:       %[[STARG2:.+]] = tensor.extract_slice %[[ARG2]]
 //       CHECK:       %[[T0:.+]] = linalg.matmul
 //  CHECK-SAME:         ins(%[[STARG0]], %[[STARG1]] : tensor<?x?xf32>, tensor<?x?xf32>)
 //  CHECK-SAME:         outs(%[[STARG2]] : tensor<?x?xf32>) -> tensor<?x?xf32>
 //       CHECK:       %[[T1:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[T0:.+]], %[[STARG3]] : tensor<?x?xf32>, tensor<?xf32>)
 //  CHECK-SAME:         outs(%[[STARG7]] : tensor<?x?xf32>)
-//       CHECK:       %[[RESULT:.+]] = subtensor_insert %[[T1]] into %[[ARG7]]
+//       CHECK:       %[[RESULT:.+]] = tensor.insert_slice %[[T1]] into %[[ARG7]]
 //       CHECK:       scf.yield %[[RESULT]]
 //       CHECK:     }
 //       CHECK:     scf.yield %[[R1]]
@@ -229,21 +229,21 @@
 //       CHECK:     %[[M_1:.+]] = memref.dim %[[ARG8]], %[[C0]]
 //       CHECK:     %[[TILE_M_1:.+]] = affine.min #[[MAP0]](%[[M_1]], %[[IV0]])
 //       CHECK:     %[[N3:.+]] = memref.dim %[[ARG8]], %[[C1]]
-//       CHECK:     %[[STARG6:.+]] = subtensor %[[ARG8]][%[[IV0]], 0]
+//       CHECK:     %[[STARG6:.+]] = tensor.extract_slice %[[ARG8]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M_1]], %[[N3]]]
 //       CHECK:     %[[M_2:.+]] = memref.dim %[[ARG4]], %[[C0]]
 //       CHECK:     %[[TILE_M_2:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M_2]], %[[M]]]
 //       CHECK:     %[[N2:.+]] = memref.dim %[[ARG4]], %[[C1]]
-//       CHECK:     %[[STARG4:.+]] = subtensor %[[ARG4]][%[[IV0]], 0]
+//       CHECK:     %[[STARG4:.+]] = tensor.extract_slice %[[ARG4]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M_2]], %[[N2]]]
 //       CHECK:     %[[TILE_M_3:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M]], %[[M]]]
 //       CHECK:     %[[N0:.+]] = memref.dim %[[ARG0]], %[[C1]]
-//       CHECK:     %[[STARG0:.+]] = subtensor %[[ARG0]][%[[IV0]], 0]
+//       CHECK:     %[[STARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M_3]], %[[N0]]]
 //       CHECK:     %[[M_3:.+]] = memref.dim %[[ARG2]], %[[C0]]
 //       CHECK:     %[[TILE_M_4:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M_3]], %[[M]]]
 //       CHECK:     %[[N1:.+]] = memref.dim %[[ARG2]], %[[C1]]
-//       CHECK:     %[[STARG2:.+]] = subtensor %[[ARG2]][%[[IV0]], 0]
+//       CHECK:     %[[STARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M_4]], %[[N1]]]
 //       CHECK:     %[[T0:.+]] = linalg.matmul
 //  CHECK-SAME:       ins(%[[STARG0]], %[[ARG1]] : tensor<?x?xf32>, tensor<?x?xf32>
@@ -254,7 +254,7 @@
 //       CHECK:     %[[T2:.+]] = linalg.matmul
 //  CHECK-SAME:       ins(%[[T1]], %arg5 : tensor<?x?xf32>, tensor<?x?xf32>
 //  CHECK-SAME:       ) outs(%[[STARG6]] : tensor<?x?xf32>)
-//       CHECK:     %[[R1:.+]] = subtensor_insert %[[T2]]
+//       CHECK:     %[[R1:.+]] = tensor.insert_slice %[[T2]]
 //  CHECK-SAME:       into %[[ARG8]][%[[IV0]], 0] [%[[TILE_M_1]], %[[N3]]]
 //       CHECK:     scf.yield %[[R1]] : tensor<?x?xf32>
 //       CHECK:   }
diff --git a/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir b/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir
--- a/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir
@@ -38,16 +38,16 @@
 //      CHECK:     %[[M_2:.+]] = memref.dim %[[ARG6]], %[[C0]]
 //      CHECK:     %[[TILE_M_2:.+]] = affine.min #[[MAP1]](%[[M_2]], %[[IV0]])
 //      CHECK:     %[[N3:.+]] = memref.dim %[[ARG6]], %[[C1]]
-//      CHECK:     %[[ST_ARG6:.+]] = subtensor %[[ARG6]][%[[IV0]], 0]
+//      CHECK:     %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M_2]], %[[N3]]]
 //      CHECK:     %[[TILE_M_3:.+]] = affine.min #[[MAP5]](%[[IV0]])[%[[M]], %[[M]]]
 //      CHECK:     %[[N1:.+]] = memref.dim %[[ARG0]], %[[C1]]
-//      CHECK:     %[[ST_ARG0:.+]] = subtensor %[[ARG0]][%[[IV0]], 0]
+//      CHECK:     %[[ST_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M_3]], %[[N1]]]
 //      CHECK:     %[[M_3:.+]] = memref.dim %[[ARG2]], %[[C0]]
 //      CHECK:     %[[TILE_M_4:.+]] = affine.min #[[MAP5]](%[[IV0]])[%[[M_3]], %[[M]]]
 //      CHECK:     %[[N2_2:.+]] = memref.dim %[[ARG2]], %[[C1]]
-//      CHECK:     %[[ST_ARG2:.+]] = subtensor %[[ARG2]][%[[IV0]], 0]
+//      CHECK:     %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M_4]], %[[N2_2]]]
 //      CHECK:     %[[LHS:.+]] = linalg.matmul
 // CHECK-SAME:       __internal_linalg_transform__ = "after_lhs_fusion_producer"
@@ -62,30 +62,30 @@
 // CHECK-SAME:         %[[C0]] to %[[N2]] step %[[C16]]
 // CHECK-SAME:         iter_args(%[[ARG10:.+]] = %[[ARG8]]) -> (tensor<?x?xf32>) {
 //      CHECK:         %[[TILE_N2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[N2]]]
-//      CHECK:         %[[ST_LHS:.+]] = subtensor %[[LHS]][0, %[[IV2]]]
+//      CHECK:         %[[ST_LHS:.+]] = tensor.extract_slice %[[LHS]][0, %[[IV2]]]
 // CHECK-SAME:           [%[[TILE_M_3]], %[[TILE_N2]]]
 //      CHECK:         %[[N2_3:.+]] = memref.dim %[[ARG3]], %[[C0]]
 //      CHECK:         %[[TILE_N2_2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[N2_3]]]
 //      CHECK:         %[[TILE_N3:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N3_2]]]
-//      CHECK:         %[[ST_ARG3:.+]] = subtensor %[[ARG3]][%[[IV2]], %[[IV1]]]
+//      CHECK:         %[[ST_ARG3:.+]] = tensor.extract_slice %[[ARG3]][%[[IV2]], %[[IV1]]]
 // CHECK-SAME:           [%[[TILE_N2_2]], %[[TILE_N3]]]
 //      CHECK:         %[[M_4:.+]] = memref.dim %[[ARG10]], %[[C0]]
 //      CHECK:         %[[N3_3:.+]] = memref.dim %[[ARG10]], %[[C1]]
 //      CHECK:         %[[TILE_N3_2:.+]] = affine.min #[[MAP4]](%[[N3_3]], %[[IV1]])
-//      CHECK:         %[[ST_ARG4:.+]] = subtensor %[[ARG10]][0, %[[IV1]]]
+//      CHECK:         %[[ST_ARG4:.+]] = tensor.extract_slice %[[ARG10]][0, %[[IV1]]]
 // CHECK-SAME:           [%[[M_4]], %[[TILE_N3_2]]]
 //      CHECK:         %[[ST_RESULT:.+]] = linalg.matmul
 // CHECK-SAME:           __internal_linalg_transform__ = "after_lhs_fusion"
 // CHECK-SAME:           ins(%[[ST_LHS]], %[[ST_ARG3]]
 // CHECK-SAME:             : tensor<?x?xf32>, tensor<?x?xf32>)
 // CHECK-SAME:           outs(%[[ST_ARG4]] : tensor<?x?xf32>)
-//      CHECK:         %[[UPDATE1:.+]] = subtensor_insert %[[ST_RESULT]]
+//      CHECK:         %[[UPDATE1:.+]] = tensor.insert_slice %[[ST_RESULT]]
 // CHECK-SAME:           into %[[ARG10]][0, %[[IV1]]] [%[[M_4]], %[[TILE_N3_2]]]
 //      CHECK:         scf.yield %[[UPDATE1]]
 //      CHECK:       }
 //      CHECK:       scf.yield %[[YIELD1]]
 //      CHECK:     }
-//      CHECK:     %[[UPDATE0:.+]] = subtensor_insert %[[YIELD0]] into
+//      CHECK:     %[[UPDATE0:.+]] = tensor.insert_slice %[[YIELD0]] into
 // CHECK-SAME:       %[[ARG6]][%[[IV0]], 0] [%[[TILE_M_2]], %[[N3]]]
 //      CHECK:     scf.yield %[[UPDATE0]]
 //      CHECK:   }
@@ -114,9 +114,9 @@
 // TLOOP-SAME:      %[[AB_INIT_:.*]] = %[[AB_INIT]]: tensor<?x?xf32>)
 // TLOOP-SAME: outs (%[[ABC_INIT_:.*]] = %[[ABC_INIT]]: tensor<?x?xf32>) {
 
-// TLOOP:    %[[ABC_INIT_SUB:.*]] = subtensor %[[ABC_INIT_]][%[[IV0]], 0]
-// TLOOP:    %[[A_SUB:.*]] = subtensor %[[A_]][%[[IV0]], 0]
-// TLOOP:    %[[AB_INIT_SUB:.*]] = subtensor %[[AB_INIT_]][%[[IV0]], 0]
+// TLOOP:    %[[ABC_INIT_SUB:.*]] = tensor.extract_slice %[[ABC_INIT_]][%[[IV0]], 0]
+// TLOOP:    %[[A_SUB:.*]] = tensor.extract_slice %[[A_]][%[[IV0]], 0]
+// TLOOP:    %[[AB_INIT_SUB:.*]] = tensor.extract_slice %[[AB_INIT_]][%[[IV0]], 0]
 
 // TLOOP:    %[[AB_SUB:.*]] = linalg.matmul
 // TLOOP-SAME:  ins(%[[A_SUB]], %[[B_]] : {{.*}}) outs(%[[AB_INIT_SUB]]
@@ -132,19 +132,19 @@
 // TLOOP-SAME: outs (%[[ABC_INIT_SUB_:.*]] = %[[ABC_INIT_SUB]]: [[TY]])
 // TLOOP-SAME: iterators["parallel", "reduction"] {
 
-// TLOOP:      %[[AB_SUB_SUB:.*]] = subtensor %[[AB_SUB_]][0, %[[IV2]]]
-// TLOOP:      %[[C__SUB:.*]] = subtensor %[[C__]][%[[IV2]], %[[IV1]]]
-// TLOOP:      %[[ABS_INIT_SUB_SUB:.*]] = subtensor %[[ABC_INIT_SUB_]][0, %[[IV1]]]
+// TLOOP:      %[[AB_SUB_SUB:.*]] = tensor.extract_slice %[[AB_SUB_]][0, %[[IV2]]]
+// TLOOP:      %[[C__SUB:.*]] = tensor.extract_slice %[[C__]][%[[IV2]], %[[IV1]]]
+// TLOOP:      %[[ABS_INIT_SUB_SUB:.*]] = tensor.extract_slice %[[ABC_INIT_SUB_]][0, %[[IV1]]]
 
 // TLOOP:      %[[ABC_SUB_SUB:.*]] = linalg.matmul
 // TLOOP-SAME:  ins(%[[AB_SUB_SUB]], %[[C__SUB]] : [[TY]], [[TY]])
 // TLOOP-SAME:  outs(%[[ABS_INIT_SUB_SUB]] : [[TY]]) -> [[TY]]
 
-// TLOOP:      %[[RES0:.*]] = subtensor_insert %[[ABC_SUB_SUB]]
+// TLOOP:      %[[RES0:.*]] = tensor.insert_slice %[[ABC_SUB_SUB]]
 // TLOOP-SAME:   into %[[ABC_INIT_SUB_]][0, %[[IV1]]]
 // TLOOP:      linalg.yield %[[RES0]] : [[TY]]
 // TLOOP:    }
-// TLOOP:    %[[RES1:.*]] = subtensor_insert %[[ABC_SUB_]] into %[[ABC_INIT_]][%[[IV0]], 0]
+// TLOOP:    %[[RES1:.*]] = tensor.insert_slice %[[ABC_SUB_]] into %[[ABC_INIT_]][%[[IV0]], 0]
 // TLOOP:    linalg.yield %[[RES1]] : [[TY]]
 // TLOOP:  }
 // TLOOP:  return %[[ABC]] : [[TY]]
@@ -186,10 +186,10 @@
 //  CHECK-SAME:     iter_args(%[[ARG4:.+]] = %{{[a-zA-Z0-9_]+}})
 //       CHECK:     %[[YIELD:.+]] = scf.for %[[IV1:[a-zA-Z0-9_]+]]
 //  CHECK-SAME:       iter_args(%[[ARG6:.+]] = %[[ARG4]])
-//       CHECK:       %[[ST_ARG6:.+]] = subtensor %[[ARG6]][%[[IV0]], %[[IV1]]]
-//       CHECK:       %[[ST_ARG0:.+]] = subtensor %[[ARG0]][%[[IV0]], 0]
-//       CHECK:       %[[ST_ARG1:.+]] = subtensor %[[ARG1]][0, %[[IV1]]]
-//       CHECK:       %[[ST_ARG2:.+]] = subtensor %[[ARG2]][%[[IV0]], %[[IV1]]]
+//       CHECK:       %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], %[[IV1]]]
+//       CHECK:       %[[ST_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0]
+//       CHECK:       %[[ST_ARG1:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]]
+//       CHECK:       %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], %[[IV1]]]
 //       CHECK:       %[[LHS:.+]] = linalg.matmul
 //  CHECK-SAME:         ins(%[[ST_ARG0]], %[[ST_ARG1]]
 //  CHECK-SAME:           : tensor<?x?xf32>, tensor<?x?xf32>)
@@ -197,7 +197,7 @@
 //       CHECK:       %[[ST_RESULT:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[LHS]] : tensor<?x?xf32>)
 //  CHECK-SAME:         outs(%[[ST_ARG6]] : tensor<?x?xf32>)
-//       CHECK:       %[[UPDATE:.+]] = subtensor_insert %[[ST_RESULT]]
+//       CHECK:       %[[UPDATE:.+]] = tensor.insert_slice %[[ST_RESULT]]
 //  CHECK-SAME:         into %[[ARG6]][%[[IV0]], %[[IV1]]]
 //       CHECK:       scf.yield %[[UPDATE]]
 //       CHECK:     scf.yield %[[YIELD]]
@@ -226,10 +226,10 @@
 // TLOOP-SAME:      %[[AB_:.*]] = %[[AB]]: [[TY]])
 // TLOOP-SAME: outs (%[[INIT_:.*]] = %[[INIT]]: [[TY]]) {
 
-// TLOOP:    %[[INIT_SUB:.*]] = subtensor %[[INIT_]][%[[IV0]], %[[IV1]]]
-// TLOOP:    %[[A_SUB:.*]] = subtensor %[[A_]][%[[IV0]], 0]
-// TLOOP:    %[[B_SUB:.*]] = subtensor %[[B_]][0, %[[IV1]]]
-// TLOOP:    %[[AB_SUB_INIT:.*]] = subtensor %[[AB_]][%[[IV0]], %[[IV1]]]
+// TLOOP:    %[[INIT_SUB:.*]] = tensor.extract_slice %[[INIT_]][%[[IV0]], %[[IV1]]]
+// TLOOP:    %[[A_SUB:.*]] = tensor.extract_slice %[[A_]][%[[IV0]], 0]
+// TLOOP:    %[[B_SUB:.*]] = tensor.extract_slice %[[B_]][0, %[[IV1]]]
+// TLOOP:    %[[AB_SUB_INIT:.*]] = tensor.extract_slice %[[AB_]][%[[IV0]], %[[IV1]]]
 
 // TLOOP:    %[[AB_SUB:.*]] = linalg.matmul
 // TLOOP-SAME:  ins(%[[A_SUB]], %[[B_SUB]] : [[TY]], [[TY]])
@@ -238,7 +238,7 @@
 // TLOOP:    %[[DOUBLE_AB:.*]] = linalg.generic
 // TLOOP-SAME:  ins(%[[AB_SUB]] : [[TY]]) outs(%[[INIT_SUB]] : [[TY]])
 
-// TLOOP:    %[[RESULT_SUB:.*]] = subtensor_insert
+// TLOOP:    %[[RESULT_SUB:.*]] = tensor.insert_slice
 // TLOOP-SAME:  %[[DOUBLE_AB:.*]] into %[[INIT_]][%[[IV0]], %[[IV1]]]
 
 // TLOOP:    linalg.yield %[[RESULT_SUB]] : [[TY]]
@@ -267,13 +267,13 @@
 //   CHECK-NOT: fill
 //       CHECK: scf.for %[[I:.*]]{{.*}}iter_args(%{{.*}} = %[[ARG0]]) -> (tensor<?x?xf32>) {
 //       CHECK:   scf.for %[[J:.*]]
-//       CHECK:     %[[ST:.*]] = subtensor %[[ARG0]]
+//       CHECK:     %[[ST:.*]] = tensor.extract_slice %[[ARG0]]
 //       CHECK:     %[[ST_FILL:.*]] = linalg.fill(%[[ST]], %[[C0]]) {__internal_linalg_transform__ = "after_out_fusion_producer"} : tensor<?x?xf32>, f32 -> tensor<?x?xf32>
 //       CHECK:     %[[ST_MM_RES:.*]] = scf.for %[[K:.*]]{{.*}}iter_args(%[[BB:.*]] = %[[ST_FILL]]) -> (tensor<?x?xf32>) {
 //   CHECK-NOT:       fill
 //       CHECK:       %[[ST_MM:.*]] = linalg.matmul {__internal_linalg_transform__ = "after_out_fusion"} ins(%{{.*}}, %{{.*}} : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[BB]] : tensor<?x?xf32>) -> tensor<?x?xf32>
 //       CHECK:       scf.yield %[[ST_MM]] : tensor<?x?xf32>
-//       CHECK:     %[[MM:.*]] = subtensor_insert %[[ST_MM_RES]] into {{.*}}
+//       CHECK:     %[[MM:.*]] = tensor.insert_slice %[[ST_MM_RES]] into {{.*}}
 //       CHECK:     scf.yield %[[MM]] : tensor<?x?xf32>
 
 
@@ -300,9 +300,9 @@
 // TLOOP-SAME: outs (%[[OUT_:.*]] = %[[OUT]]: [[TY]]) {
 
 // TLOOP:    %[[DIM_A__1:.*]] = memref.dim %[[A_]], %[[C1]] : [[TY]]
-// TLOOP:    %[[A_SUB:.*]] = subtensor %[[A_]][%[[I]], 0]
-// TLOOP:    %[[B_SUB:.*]] = subtensor %[[B_]][0, %[[J]]]
-// TLOOP:    %[[OUT_SUB:.*]] = subtensor %[[OUT_]][%[[I]], %[[J]]]
+// TLOOP:    %[[A_SUB:.*]] = tensor.extract_slice %[[A_]][%[[I]], 0]
+// TLOOP:    %[[B_SUB:.*]] = tensor.extract_slice %[[B_]][0, %[[J]]]
+// TLOOP:    %[[OUT_SUB:.*]] = tensor.extract_slice %[[OUT_]][%[[I]], %[[J]]]
 // TLOOP:    %[[INIT_SUB:.*]] = linalg.fill(%[[OUT_SUB]], %[[C0_F32]])
 
 // TLOOP:    %[[AB_SUB:.*]] = linalg.tiled_loop (%[[K:.*]]) = (%[[C0]])
@@ -312,15 +312,15 @@
 // TLOOP-SAME: outs (%[[INIT_SUB_:.*]] = %[[INIT_SUB]]: [[TY]])
 // TLOOP-SAME: iterators["reduction"] {
 
-// TLOOP:      %[[A_SUB_SUB:.*]] = subtensor %[[A_SUB_]][0, %[[K]]]
-// TLOOP:      %[[B_SUB_SUB:.*]] = subtensor %[[B_SUB_]][%[[K]], 0]
+// TLOOP:      %[[A_SUB_SUB:.*]] = tensor.extract_slice %[[A_SUB_]][0, %[[K]]]
+// TLOOP:      %[[B_SUB_SUB:.*]] = tensor.extract_slice %[[B_SUB_]][%[[K]], 0]
 
 // TLOOP:      %[[AB_SUB_SUB:.*]] = linalg.matmul
 // TLOOP-SAME:   ins(%[[A_SUB_SUB]], %[[B_SUB_SUB]] : [[TY]], [[TY]])
 // TLOOP-SAME:   outs(%[[INIT_SUB_]] : [[TY]]) -> [[TY]]
 // TLOOP:      linalg.yield %[[AB_SUB_SUB]] : [[TY]]
 // TLOOP:    }
-// TLOOP:    %[[SUB_RESULT:.*]] = subtensor_insert %[[AB_SUB]]
+// TLOOP:    %[[SUB_RESULT:.*]] = tensor.insert_slice %[[AB_SUB]]
 // TLOOP-SAME:  into %[[OUT_]][%[[I]], %[[J]]]
 // TLOOP:    linalg.yield %[[SUB_RESULT]] : [[TY]]
 // TLOOP:  }
@@ -371,9 +371,9 @@
 // TLOOP-SAME: outs (%[[OUT_:.*]] = %[[OUT]]: [[TY]]) {
 
 // TLOOP:    %[[DIM_A__1:.*]] = memref.dim %[[A_]], %[[C1]] : [[TY]]
-// TLOOP:    %[[A_SUB:.*]] = subtensor %[[A_]][%[[I]], 0]
-// TLOOP:    %[[B_SUB:.*]] = subtensor %[[B_]][0, %[[J]]]
-// TLOOP:    %[[OUT_SUB:.*]] = subtensor %[[OUT_]][%[[I]], %[[J]]]
+// TLOOP:    %[[A_SUB:.*]] = tensor.extract_slice %[[A_]][%[[I]], 0]
+// TLOOP:    %[[B_SUB:.*]] = tensor.extract_slice %[[B_]][0, %[[J]]]
+// TLOOP:    %[[OUT_SUB:.*]] = tensor.extract_slice %[[OUT_]][%[[I]], %[[J]]]
 // TLOOP:    %[[INIT_SUB:.*]] = linalg.generic
 // TLOOP-SAME: ins(%[[C0_F32_]]
 // TLOOP-SAME: outs(%[[OUT_SUB]]
@@ -385,15 +385,15 @@
 // TLOOP-SAME: outs (%[[INIT_SUB_:.*]] = %[[INIT_SUB]]: [[TY]])
 // TLOOP-SAME: iterators["reduction"] {
 
-// TLOOP:      %[[A_SUB_SUB:.*]] = subtensor %[[A_SUB_]][0, %[[K]]]
-// TLOOP:      %[[B_SUB_SUB:.*]] = subtensor %[[B_SUB_]][%[[K]], 0]
+// TLOOP:      %[[A_SUB_SUB:.*]] = tensor.extract_slice %[[A_SUB_]][0, %[[K]]]
+// TLOOP:      %[[B_SUB_SUB:.*]] = tensor.extract_slice %[[B_SUB_]][%[[K]], 0]
 
 // TLOOP:      %[[AB_SUB_SUB:.*]] = linalg.matmul
 // TLOOP-SAME:   ins(%[[A_SUB_SUB]], %[[B_SUB_SUB]] : [[TY]], [[TY]])
 // TLOOP-SAME:   outs(%[[INIT_SUB_]] : [[TY]]) -> [[TY]]
 // TLOOP:      linalg.yield %[[AB_SUB_SUB]] : [[TY]]
 // TLOOP:    }
-// TLOOP:    %[[SUB_RESULT:.*]] = subtensor_insert %[[AB_SUB]]
+// TLOOP:    %[[SUB_RESULT:.*]] = tensor.insert_slice %[[AB_SUB]]
 // TLOOP-SAME:  into %[[OUT_]][%[[I]], %[[J]]]
 // TLOOP:    linalg.yield %[[SUB_RESULT]] : [[TY]]
 // TLOOP:  }
diff --git a/mlir/test/Dialect/Linalg/hoist-padding.mlir b/mlir/test/Dialect/Linalg/hoist-padding.mlir
--- a/mlir/test/Dialect/Linalg/hoist-padding.mlir
+++ b/mlir/test/Dialect/Linalg/hoist-padding.mlir
@@ -53,10 +53,10 @@
   //      CHECK:   %[[A:.*]] = scf.for %[[J1:[0-9a-z]+]] =
   // Iteration count along J1
   //      CHECK:     %[[IDXpad0_K:[0-9]+]] = affine.apply #[[$DIV4]](%[[J1]])
-  //      CHECK:     subtensor %{{.*}} [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+  //      CHECK:     tensor.extract_slice %{{.*}} [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   //      CHECK:     linalg.pad_tensor %{{.*}}
   //      CHECK:       : tensor<?x?xf32> to tensor<2x4xf32>
-  //      CHECK:     subtensor_insert %{{.*}} into %{{.*}}[%[[IDXpad0_K]], 0, 0]
+  //      CHECK:     tensor.insert_slice %{{.*}} into %{{.*}}[%[[IDXpad0_K]], 0, 0]
   // CHECK-SAME:       [1, 2, 4] [1, 1, 1] : tensor<2x4xf32> into tensor<?x2x4xf32>
   // Second tensor is KxN but loop order is (M, N, K) so padded tensor is NxKx4x3
   //      CHECK:   %[[SZpad1_N:[0-9]+]] = affine.apply #[[$DIVS3]]()[%[[dN]]]
@@ -69,23 +69,23 @@
   //      CHECK:     scf.for %[[J2:[0-9a-z]+]] =
   // Iteration count along J2
   //      CHECK:       %[[IDXpad1_N:[0-9]+]] = affine.apply #[[$DIV4]](%[[J2]])
-  //      CHECK:       subtensor %{{.*}} [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+  //      CHECK:       tensor.extract_slice %{{.*}} [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   //      CHECK:       linalg.pad_tensor %{{.*}}
   //      CHECK:         : tensor<?x?xf32> to tensor<4x3xf32>
-  //      CHECK:       subtensor_insert %{{.*}} into %{{.*}}[%[[IDXpad1_K]], %[[IDXpad1_N]], 0, 0]
+  //      CHECK:       tensor.insert_slice %{{.*}} into %{{.*}}[%[[IDXpad1_K]], %[[IDXpad1_N]], 0, 0]
   // CHECK-SAME:         [1, 1, 4, 3] [1, 1, 1, 1] : tensor<4x3xf32> into tensor<?x?x4x3xf32>
   // 2-D loop
   //      CHECK:   scf.for %[[J:[0-9a-zA-Z]+]]
   //      CHECK:     scf.for %[[K:[0-9a-zA-Z]+]]
   // Iteration count along K
   //      CHECK:       %[[IDXpad0_K:[0-9]+]] = affine.apply #[[$DIV4]](%[[K]])
-  //      CHECK:       %[[stA:.*]] = subtensor %[[A]][%[[IDXpad0_K]], 0, 0] [1, 2, 4] [1, 1, 1] :
+  //      CHECK:       %[[stA:.*]] = tensor.extract_slice %[[A]][%[[IDXpad0_K]], 0, 0] [1, 2, 4] [1, 1, 1] :
   // CHECK-SAME:         tensor<?x2x4xf32> to tensor<2x4xf32>
   // Iteration count along J
   //      CHECK:       %[[IDXpad1_N:[0-9]+]] = affine.apply #[[$DIV3]](%[[J]])
   // Iteration count along K
   //      CHECK:       %[[IDXpad1_K:[0-9]+]] = affine.apply #[[$DIV4]](%[[K]])
-  //      CHECK:       %[[stB:.*]] = subtensor %[[B]][%[[IDXpad1_N]], %[[IDXpad1_K]], 0, 0] [1, 1, 4, 3] [1, 1, 1, 1] :
+  //      CHECK:       %[[stB:.*]] = tensor.extract_slice %[[B]][%[[IDXpad1_N]], %[[IDXpad1_K]], 0, 0] [1, 1, 4, 3] [1, 1, 1, 1] :
   // CHECK-SAME:         tensor<?x?x4x3xf32> to tensor<4x3xf32>
   //      CHECK:       %[[stC:.*]] = linalg.pad_tensor %{{.*}}
   //      CHECK:         : tensor<?x?xf32> to tensor<2x3xf32>
@@ -98,17 +98,17 @@
         %7 = affine.min #map0(%arg3)[%6]
         %8 = memref.dim %arg0, %c1 : tensor<?x?xf32>
         %9 = affine.min #map1(%arg7)[%8]
-        %10 = subtensor %arg0[%arg3, %arg7] [%7, %9] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+        %10 = tensor.extract_slice %arg0[%arg3, %arg7] [%7, %9] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
         %11 = memref.dim %arg1, %c0 : tensor<?x?xf32>
         %12 = affine.min #map1(%arg7)[%11]
         %13 = memref.dim %arg1, %c1 : tensor<?x?xf32>
         %14 = affine.min #map2(%arg5)[%13]
-        %15 = subtensor %arg1[%arg7, %arg5] [%12, %14] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+        %15 = tensor.extract_slice %arg1[%arg7, %arg5] [%12, %14] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
         %16 = memref.dim %arg8, %c0 : tensor<?x?xf32>
         %17 = affine.min #map3(%16, %arg3)
         %18 = memref.dim %arg8, %c1 : tensor<?x?xf32>
         %19 = affine.min #map4(%18, %arg5)
-        %20 = subtensor %arg8[%arg3, %arg5] [%17, %19] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+        %20 = tensor.extract_slice %arg8[%arg3, %arg5] [%17, %19] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
         %21 = subi %c2, %7 : index
         %22 = subi %c4, %9 : index
         %23 = linalg.pad_tensor %10 low[%c0, %c0] high[%21, %22] {
@@ -128,8 +128,8 @@
           linalg.yield %cst : f32
         } : tensor<?x?xf32> to tensor<2x3xf32>
         %30 = linalg.matmul ins(%23, %26 : tensor<2x4xf32>, tensor<4x3xf32>) outs(%29 : tensor<2x3xf32>) -> tensor<2x3xf32>
-        %31 = subtensor %30[0, 0] [%7, %14] [1, 1] : tensor<2x3xf32> to tensor<?x?xf32>
-        %32 = subtensor_insert %31 into %arg8[%arg3, %arg5] [%17, %19] [%c1, %c1] : tensor<?x?xf32> into tensor<?x?xf32>
+        %31 = tensor.extract_slice %30[0, 0] [%7, %14] [1, 1] : tensor<2x3xf32> to tensor<?x?xf32>
+        %32 = tensor.insert_slice %31 into %arg8[%arg3, %arg5] [%17, %19] [%c1, %c1] : tensor<?x?xf32> into tensor<?x?xf32>
         scf.yield %32 : tensor<?x?xf32>
       }
       scf.yield %5 : tensor<?x?xf32>
@@ -173,7 +173,7 @@
   //      CHECK:   %[[INIT_PACKED_A:.*]] = linalg.init_tensor [%[[D0]], %[[D1]], 2] : tensor<?x?x2xf32>
   //      CHECK:   %[[PACKED_A:.*]] = scf.for %[[II:[0-9a-z]+]] = {{.*}} iter_args(%{{.*}} = %[[INIT_PACKED_A]]) -> (tensor<?x?x2xf32>) {
   //      CHECK:     scf.for %[[III:[0-9a-z]+]] =
-  //      CHECK:       subtensor_insert %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor<?x?x2xf32>
+  //      CHECK:       tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor<?x?x2xf32>
   //
   //      CHECK:   %[[D0_2:.*]] = affine.apply #[[$DIV4]](%[[MR8]])
   //      CHECK:   %[[MM4_2:.*]] = affine.min #[[$MIN_MOD4]](%[[MR8]])
@@ -182,33 +182,33 @@
   //      CHECK:   %[[INIT_PACKED_B:.*]] = linalg.init_tensor [%[[D0_2]], %[[D1_2]], 2] : tensor<?x?x2xf32>
   //      CHECK:   %[[PACKED_B:.*]] = scf.for %[[II_2:[0-9a-z]+]] = {{.*}} iter_args(%{{.*}} = %[[INIT_PACKED_B]]) -> (tensor<?x?x2xf32>) {
   //      CHECK:     scf.for %[[III_2:[0-9a-z]+]] =
-  //      CHECK:       subtensor_insert %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor<?x?x2xf32>
+  //      CHECK:       tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor<?x?x2xf32>
   // Compute.
   //      CHECK:   scf.for %[[II_3:[0-9a-z]+]] =
   //      CHECK:     scf.for %[[III_3:[0-9a-z]+]] = {{.*}} iter_args(%[[C:.*]] = %{{.*}}) -> (tensor<f32>) {
   //      CHECK:       %[[IDX0:.*]] = affine.apply #[[$DIV4]](%[[II_3]])
   //      CHECK:       %[[IDX1:.*]] = affine.apply #[[$DIV2]](%[[III_3]])
-  //      CHECK:       %[[A:.*]] = subtensor %[[PACKED_A]][%[[IDX0]], %[[IDX1]], 0] [1, 1, 2] [1, 1, 1] : tensor<?x?x2xf32> to tensor<2xf32>
+  //      CHECK:       %[[A:.*]] = tensor.extract_slice %[[PACKED_A]][%[[IDX0]], %[[IDX1]], 0] [1, 1, 2] [1, 1, 1] : tensor<?x?x2xf32> to tensor<2xf32>
   //      CHECK:       %[[IDX0_2:.*]] = affine.apply #[[$DIV4]](%[[II_3]])
   //      CHECK:       %[[IDX1_2:.*]] = affine.apply #[[$DIV2]](%[[III_3]])
-  //      CHECK:       %[[B:.*]] = subtensor %[[PACKED_B]][%[[IDX0_2]], %[[IDX1_2]], 0] [1, 1, 2] [1, 1, 1] : tensor<?x?x2xf32> to tensor<2xf32>
+  //      CHECK:       %[[B:.*]] = tensor.extract_slice %[[PACKED_B]][%[[IDX0_2]], %[[IDX1_2]], 0] [1, 1, 2] [1, 1, 1] : tensor<?x?x2xf32> to tensor<2xf32>
   //      CHECK:       linalg.dot ins(%[[A]], %[[B]] : tensor<2xf32>, tensor<2xf32>) outs(%[[C]] : tensor<f32>) -> tensor<f32>
 
   %4 = scf.for %arg3 = %c0 to %1 step %c8 iter_args(%arg4 = %arg2) -> (tensor<f32>) {
     %5 = affine.min #map0(%arg3)[%2]
-    %6 = subtensor %arg0[%arg3] [%5] [1] : tensor<?xf32> to tensor<?xf32>
+    %6 = tensor.extract_slice %arg0[%arg3] [%5] [1] : tensor<?xf32> to tensor<?xf32>
     %7 = affine.min #map0(%arg3)[%3]
-    %8 = subtensor %arg1[%arg3] [%7] [1] : tensor<?xf32> to tensor<?xf32>
+    %8 = tensor.extract_slice %arg1[%arg3] [%7] [1] : tensor<?xf32> to tensor<?xf32>
     %9 = scf.for %arg5 = %c0 to %5 step %c4 iter_args(%arg6 = %arg4) -> (tensor<f32>) {
       %10 = affine.min #map1(%5, %arg5)
-      %11 = subtensor %6[%arg5] [%10] [1] : tensor<?xf32> to tensor<?xf32>
+      %11 = tensor.extract_slice %6[%arg5] [%10] [1] : tensor<?xf32> to tensor<?xf32>
       %12 = affine.min #map1(%7, %arg5)
-      %13 = subtensor %8[%arg5] [%12] [1] : tensor<?xf32> to tensor<?xf32>
+      %13 = tensor.extract_slice %8[%arg5] [%12] [1] : tensor<?xf32> to tensor<?xf32>
       %14 = scf.for %arg7 = %c0 to %10 step %c2 iter_args(%arg8 = %arg6) -> (tensor<f32>) {
         %15 = affine.min #map2(%10, %arg7)
-        %16 = subtensor %11[%arg7] [%15] [1] : tensor<?xf32> to tensor<?xf32>
+        %16 = tensor.extract_slice %11[%arg7] [%15] [1] : tensor<?xf32> to tensor<?xf32>
         %17 = affine.min #map2(%12, %arg7)
-        %18 = subtensor %13[%arg7] [%17] [1] : tensor<?xf32> to tensor<?xf32>
+        %18 = tensor.extract_slice %13[%arg7] [%17] [1] : tensor<?xf32> to tensor<?xf32>
         %19 = subi %c2, %15 : index
         %20 = linalg.pad_tensor %16 low[%c0] high[%19]  {
         ^bb0(%arg9: index):  // no predecessors
@@ -245,17 +245,17 @@
   %1 = scf.for %arg3 = %c0 to %c32 step %c16 iter_args(%arg4 = %arg2) -> (tensor<32x64xf32>) {
     %2 = scf.for %arg5 = %c0 to %c64 step %c32 iter_args(%arg6 = %arg4) -> (tensor<32x64xf32>) {
       %3 = scf.for %arg7 = %c0 to %c128 step %c32 iter_args(%arg8 = %arg6) -> (tensor<32x64xf32>) {
-        %4 = subtensor %arg0[%arg3, %arg7] [16, 32] [1, 1] : tensor<32x128xf32> to tensor<16x32xf32>
-        %5 = subtensor %arg1[%arg7, %arg5] [32, 32] [1, 1] : tensor<128x64xf32> to tensor<32x32xf32>
-        %6 = subtensor %arg8[%arg3, %arg5] [16, 32] [1, 1] : tensor<32x64xf32> to tensor<16x32xf32>
+        %4 = tensor.extract_slice %arg0[%arg3, %arg7] [16, 32] [1, 1] : tensor<32x128xf32> to tensor<16x32xf32>
+        %5 = tensor.extract_slice %arg1[%arg7, %arg5] [32, 32] [1, 1] : tensor<128x64xf32> to tensor<32x32xf32>
+        %6 = tensor.extract_slice %arg8[%arg3, %arg5] [16, 32] [1, 1] : tensor<32x64xf32> to tensor<16x32xf32>
         %7 = scf.for %arg9 = %c0 to %c16 step %c2 iter_args(%arg10 = %6) -> (tensor<16x32xf32>) {
           %10 = scf.for %arg11 = %c0 to %c32 step %c4 iter_args(%arg12 = %arg10) -> (tensor<16x32xf32>) {
             %11 = scf.for %arg13 = %c0 to %c32 step %c16 iter_args(%arg14 = %arg12) -> (tensor<16x32xf32>) {
-              %12 = subtensor %4[%arg9, %arg13] [2, 16] [1, 1] : tensor<16x32xf32> to tensor<2x16xf32>
+              %12 = tensor.extract_slice %4[%arg9, %arg13] [2, 16] [1, 1] : tensor<16x32xf32> to tensor<2x16xf32>
               %13 = tensor.cast %12 : tensor<2x16xf32> to tensor<?x?xf32>
-              %14 = subtensor %5[%arg13, %arg11] [16, 4] [1, 1] : tensor<32x32xf32> to tensor<16x4xf32>
+              %14 = tensor.extract_slice %5[%arg13, %arg11] [16, 4] [1, 1] : tensor<32x32xf32> to tensor<16x4xf32>
               %15 = tensor.cast %14 : tensor<16x4xf32> to tensor<?x?xf32>
-              %16 = subtensor %arg14[%arg9, %arg11] [2, 4] [1, 1] : tensor<16x32xf32> to tensor<2x4xf32>
+              %16 = tensor.extract_slice %arg14[%arg9, %arg11] [2, 4] [1, 1] : tensor<16x32xf32> to tensor<2x4xf32>
               %17 = tensor.cast %16 : tensor<2x4xf32> to tensor<?x?xf32>
               %18 = linalg.pad_tensor %13 low[%c0, %c0] high[%c0, %c0]  {
               ^bb0(%arg15: index, %arg16: index):  // no predecessors
@@ -271,7 +271,7 @@
               } : tensor<?x?xf32> to tensor<2x4xf32>
               %21 = linalg.matmul ins(%18, %19 : tensor<2x16xf32>, tensor<16x4xf32>) outs(%20 : tensor<2x4xf32>) -> tensor<2x4xf32>
               %22 = tensor.cast %21 : tensor<2x4xf32> to tensor<?x?xf32>
-              %23 = subtensor_insert %22 into %arg14[%arg9, %arg11] [%c2, %c4] [1, 1] : tensor<?x?xf32> into tensor<16x32xf32>
+              %23 = tensor.insert_slice %22 into %arg14[%arg9, %arg11] [%c2, %c4] [1, 1] : tensor<?x?xf32> into tensor<16x32xf32>
               scf.yield %23 : tensor<16x32xf32>
             }
             scf.yield %11 : tensor<16x32xf32>
@@ -279,7 +279,7 @@
           scf.yield %10 : tensor<16x32xf32>
         }
         %8 = tensor.cast %7 : tensor<16x32xf32> to tensor<?x?xf32>
-        %9 = subtensor_insert %8 into %arg8[%arg3, %arg5] [%c16, %c32] [1, 1] : tensor<?x?xf32> into tensor<32x64xf32>
+        %9 = tensor.insert_slice %8 into %arg8[%arg3, %arg5] [%c16, %c32] [1, 1] : tensor<?x?xf32> into tensor<32x64xf32>
         scf.yield %9 : tensor<32x64xf32>
       }
       scf.yield %3 : tensor<32x64xf32>
diff --git a/mlir/test/Dialect/Linalg/hoisting.mlir b/mlir/test/Dialect/Linalg/hoisting.mlir
--- a/mlir/test/Dialect/Linalg/hoisting.mlir
+++ b/mlir/test/Dialect/Linalg/hoisting.mlir
@@ -321,14 +321,14 @@
 
 // -----
 
-// CHECK-LABEL: func @hoist_vector_transfer_pairs_tensor_and_subtensors
+// CHECK-LABEL: func @hoist_vector_transfer_pairs_tensor_and_slices
 //  CHECK-SAME:   %[[TENSOR0:[a-zA-Z0-9]*]]: tensor<?x?xf32>,
 //  CHECK-SAME:   %[[TENSOR1:[a-zA-Z0-9]*]]: tensor<?x?xf32>,
 //  CHECK-SAME:   %[[TENSOR2:[a-zA-Z0-9]*]]: tensor<?x?xf32>,
 //  CHECK-SAME:   %[[TENSOR3:[a-zA-Z0-9]*]]: tensor<?x?xf32>,
 //  CHECK-SAME:   %[[TENSOR4:[a-zA-Z0-9]*]]: tensor<?x?xf32>,
 //  CHECK-SAME:   %[[TENSOR5:[a-zA-Z0-9]*]]: tensor<?x?xf32>
-func @hoist_vector_transfer_pairs_tensor_and_subtensors(
+func @hoist_vector_transfer_pairs_tensor_and_slices(
     %tensor0: tensor<?x?xf32>, %tensor1: tensor<?x?xf32>, %tensor2: tensor<?x?xf32>,
     %tensor3: tensor<?x?xf32>, %tensor4: tensor<?x?xf32>, %tensor5: tensor<?x?xf32>,
     %val: index, %lb : index, %ub : index, %step: index) ->
@@ -349,7 +349,7 @@
     -> (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)  {
 
     // Hoisted
-    // CHECK:   %[[ST0:.*]] = subtensor %[[TENSOR0_ARG]][%[[I]], %[[I]]]{{.*}}: tensor<?x?xf32> to tensor<?x?xf32>
+    // CHECK:   %[[ST0:.*]] = tensor.extract_slice %[[TENSOR0_ARG]][%[[I]], %[[I]]]{{.*}}: tensor<?x?xf32> to tensor<?x?xf32>
     // CHECK:   %[[V0:.*]] = vector.transfer_read %[[ST0]]{{.*}} : tensor<?x?xf32>, vector<1xf32>
 
     //      CHECK:   %[[R:.*]]:3 = scf.for %[[J:.*]] = {{.*}} iter_args(
@@ -362,19 +362,19 @@
     iter_args(%arg6 = %arg0, %arg7 = %arg1, %arg8 = %arg2)
     -> (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)  {
       // Hoists.
-      %st0 = subtensor %arg6[%i, %i][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+      %st0 = tensor.extract_slice %arg6[%i, %i][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
       %r0 = vector.transfer_read %st0[%c0, %c0], %cst: tensor<?x?xf32>, vector<1xf32>
 
-      // CHECK:     %[[ST1:.*]] = subtensor %[[TENSOR1_ARG_L2]][%[[J]],{{.*}}: tensor<?x?xf32> to tensor<?x?xf32>
+      // CHECK:     %[[ST1:.*]] = tensor.extract_slice %[[TENSOR1_ARG_L2]][%[[J]],{{.*}}: tensor<?x?xf32> to tensor<?x?xf32>
       // CHECK:     %[[V1:.*]] = vector.transfer_read %[[ST1]]{{.*}} : tensor<?x?xf32>, vector<2xf32>
-      // Does not hoist (subtensor depends on %j)
-      %st1 = subtensor %arg7[%j, %c0][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+      // Does not hoist (slice depends on %j)
+      %st1 = tensor.extract_slice %arg7[%j, %c0][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
       %r1 = vector.transfer_read %st1[%c0, %c0], %cst: tensor<?x?xf32>, vector<2xf32>
 
-      // CHECK:     %[[ST2:.*]] = subtensor %[[TENSOR2_ARG_L2]][%[[I]],{{.*}}: tensor<?x?xf32> to tensor<?x?xf32>
+      // CHECK:     %[[ST2:.*]] = tensor.extract_slice %[[TENSOR2_ARG_L2]][%[[I]],{{.*}}: tensor<?x?xf32> to tensor<?x?xf32>
       // CHECK:     %[[V2:.*]] = vector.transfer_read %[[ST2]]{{.*}} : tensor<?x?xf32>, vector<3xf32>
-      // Does not hoist, 2 subtensor %arg8.
-      %st2 = subtensor %arg8[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+      // Does not hoist, 2 slice %arg8.
+      %st2 = tensor.extract_slice %arg8[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
       %r2 = vector.transfer_read %st2[%c0, %c0], %cst: tensor<?x?xf32>, vector<3xf32>
 
       // CHECK:     %[[U0:.*]] = "some_use"(%[[V0_ARG_L2]]) : (vector<1xf32>) -> vector<1xf32>
@@ -388,25 +388,25 @@
       %w0 = vector.transfer_write %u0, %st0[%c0, %c0] : vector<1xf32>, tensor<?x?xf32>
 
       // CHECK-DAG:     %[[STI1:.*]] = vector.transfer_write %[[U1]], %{{.*}} : vector<2xf32>, tensor<?x?xf32>
-      // Does not hoist (associated subtensor depends on %j).
+      // Does not hoist (associated slice depends on %j).
       %w1 = vector.transfer_write %u1, %st1[%i, %i] : vector<2xf32>, tensor<?x?xf32>
 
       // CHECK-DAG:     %[[STI2:.*]] = vector.transfer_write %[[U2]], %{{.*}} : vector<3xf32>, tensor<?x?xf32>
-      // Does not hoist, 2 subtensor / subtensor_insert for %arg8.
+      // Does not hoist, 2 slice / insert_slice for %arg8.
       %w2 = vector.transfer_write %u2, %st2[%c0, %c0] : vector<3xf32>, tensor<?x?xf32>
 
       // Hoists.
-      %sti0 = subtensor_insert %w0 into %arg6[%i, %i][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+      %sti0 = tensor.insert_slice %w0 into %arg6[%i, %i][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
 
-      // CHECK-DAG:     subtensor_insert %[[STI1]] into %[[TENSOR1_ARG_L2]][%[[J]],{{.*}}: tensor<?x?xf32> into tensor<?x?xf32>
+      // CHECK-DAG:     tensor.insert_slice %[[STI1]] into %[[TENSOR1_ARG_L2]][%[[J]],{{.*}}: tensor<?x?xf32> into tensor<?x?xf32>
       // Does not hoist (depends on %j).
-      %sti1 = subtensor_insert %w1 into %arg7[%j, %c0][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+      %sti1 = tensor.insert_slice %w1 into %arg7[%j, %c0][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
 
-      // CHECK-DAG:     subtensor_insert %[[STI2]] into %[[TENSOR2_ARG_L2]][%[[I]],{{.*}}: tensor<?x?xf32> into tensor<?x?xf32>
-      // Does not hoist, 2 subtensor / subtensor_insert for %arg8.
-      %sti2 = subtensor_insert %w2 into %arg8[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
-      %st22 = subtensor %sti2[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-      %sti22 = subtensor_insert %st22 into %arg8[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+      // CHECK-DAG:     tensor.insert_slice %[[STI2]] into %[[TENSOR2_ARG_L2]][%[[I]],{{.*}}: tensor<?x?xf32> into tensor<?x?xf32>
+      // Does not hoist, 2 slice / insert_slice for %arg8.
+      %sti2 = tensor.insert_slice %w2 into %arg8[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+      %st22 = tensor.extract_slice %sti2[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+      %sti22 = tensor.insert_slice %st22 into %arg8[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
 
       // CHECK:     scf.yield {{.*}} : tensor<?x?xf32>, tensor<?x?xf32>, vector<1xf32>
       // CHECK:   }
@@ -416,7 +416,7 @@
 
     // Hoisted
     // CHECK:   %[[STI0:.*]] = vector.transfer_write %[[R]]#2, %[[ST0]]{{.*}} : vector<1xf32>, tensor<?x?xf32>
-    // CHECK:   subtensor_insert %[[STI0]] into %[[TENSOR0_ARG]][%[[I]], %[[I]]]{{.*}} : tensor<?x?xf32> into tensor<?x?xf32>
+    // CHECK:   tensor.insert_slice %[[STI0]] into %[[TENSOR0_ARG]][%[[I]], %[[I]]]{{.*}} : tensor<?x?xf32> into tensor<?x?xf32>
 
     // CHECK:   scf.yield {{.*}} : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>
     scf.yield %1#0, %1#1, %1#2 :
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -732,11 +732,11 @@
  %prod = linalg.tiled_loop (%i) = (%c0) to (%c24) step (%c4)
       ins(%lhs_ = %lhs: tensor<24x64xi8>, %rhs_ = %rhs: tensor<24x64xi8>)
       outs(%out_ = %out: tensor<24x64xi8>) {
-    %lhs_sub = subtensor %lhs_[%i, 0] [%c4, %c64] [1, 1]
+    %lhs_sub = tensor.extract_slice %lhs_[%i, 0] [%c4, %c64] [1, 1]
         : tensor<24x64xi8> to tensor<?x?xi8>
-    %rhs_sub = subtensor %rhs_[%i, 0] [%c4, %c64] [1, 1]
+    %rhs_sub = tensor.extract_slice %rhs_[%i, 0] [%c4, %c64] [1, 1]
         : tensor<24x64xi8> to tensor<?x?xi8>
-    %out_sub = subtensor %out_[%i, 0] [%c4, %c64] [1, 1]
+    %out_sub = tensor.extract_slice %out_[%i, 0] [%c4, %c64] [1, 1]
         : tensor<24x64xi8> to tensor<?x?xi8>
 
     %sum = linalg.generic #trait_4
@@ -747,7 +747,7 @@
         linalg.yield %s : i8
       } -> tensor<?x?xi8>
 
-    %sum_sub = subtensor_insert %sum into %out_[%i, 0][%c4, %c64][1, 1]
+    %sum_sub = tensor.insert_slice %sum into %out_[%i, 0][%c4, %c64][1, 1]
       : tensor<?x?xi8> into tensor<24x64xi8>
     linalg.yield %sum_sub : tensor<24x64xi8>
   }
@@ -792,13 +792,13 @@
       outs(%o_ =  %output: tensor<24xf32>)
       iterators["reduction", "parallel", "reduction"]
       distribution["block_x", "block_y", "none"] {
-    %sub_3d = subtensor %i3d_[%i, %j, %k][2, 4, 8][1, 1, 1]
+    %sub_3d = tensor.extract_slice %i3d_[%i, %j, %k][2, 4, 8][1, 1, 1]
       : tensor<16x24x32xf32> to tensor<2x4x8xf32>
-    %sub_2d = subtensor %i2d_[%i, %k][2, 8][1, 1]
+    %sub_2d = tensor.extract_slice %i2d_[%i, %k][2, 8][1, 1]
       : tensor<16x32xf32> to tensor<2x8xf32>
-    %sub_1d = subtensor %i1d_[%j] [4] [1]
+    %sub_1d = tensor.extract_slice %i1d_[%j] [4] [1]
       : tensor<24xf32> to tensor<4xf32>
-    %sub_out = subtensor %o_[%j] [4] [1]
+    %sub_out = tensor.extract_slice %o_[%j] [4] [1]
       : tensor<24xf32> to tensor<4xf32>
     %acc = linalg.generic #trait_5
       ins(%sub_3d, %sub_2d, %sub_1d
@@ -810,7 +810,7 @@
       linalg.yield %1 : f32
     } -> tensor<4xf32>
 
-    %sum_sub = subtensor_insert %acc into %o_[%j][%c4][1]
+    %sum_sub = tensor.insert_slice %acc into %o_[%j][%c4][1]
       : tensor<4xf32> into tensor<24xf32>
     linalg.yield %sum_sub : tensor<24xf32>
   }
diff --git a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
--- a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
+++ b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
@@ -2,7 +2,7 @@
 
 // CHECK-LABEL: @static_data_only(
 //  CHECK-SAME:     %[[ARG0:.*]]: tensor<4x5xf32>
-//       CHECK:   %[[RESULT:.*]] = subtensor %[[ARG0]][1, 2] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
+//       CHECK:   %[[RESULT:.*]] = tensor.extract_slice %[[ARG0]][1, 2] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
 //       CHECK:   return %[[RESULT]]
 func @static_data_only(%arg0 : tensor<4x5xf32>, %pad : f32)
     -> tensor<2x1xf32> {
@@ -10,7 +10,7 @@
     ^bb0(%arg1: index, %arg2: index):
       linalg.yield %pad : f32
     } : tensor<4x5xf32> to tensor<11x13xf32>
-  %1 = subtensor %0[1, 2] [2, 1] [1, 1] : tensor<11x13xf32> to tensor<2x1xf32>
+  %1 = tensor.extract_slice %0[1, 2] [2, 1] [1, 1] : tensor<11x13xf32> to tensor<2x1xf32>
   return %1 : tensor<2x1xf32>
 }
 
@@ -19,7 +19,7 @@
 // CHECK-LABEL: @static_high_pad_only
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
 //   CHECK-NOT:   linalg.pad_tensor
-//   CHECK-NOT:   subtensor
+//   CHECK-NOT:   tensor.extract_slice
 //       CHECK:   %[[RESULT:.*]] = tensor.generate
 //       CHECK:     tensor.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<2x4xf32>
@@ -29,7 +29,7 @@
     ^bb0(%arg1: index, %arg2: index):
       linalg.yield %pad : f32
     } : tensor<4x5xf32> to tensor<11x13xf32>
-  %1 = subtensor %0[4, 5] [2, 4] [1, 1] : tensor<11x13xf32> to tensor<2x4xf32>
+  %1 = tensor.extract_slice %0[4, 5] [2, 4] [1, 1] : tensor<11x13xf32> to tensor<2x4xf32>
   return %1 : tensor<2x4xf32>
 }
 
@@ -38,7 +38,7 @@
 // CHECK-LABEL: @static_low_pad_only
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
 //   CHECK-NOT:   linalg.pad_tensor
-//   CHECK-NOT:   subtensor
+//   CHECK-NOT:   tensor.extract_slice
 //       CHECK:   %[[RESULT:.*]] = tensor.generate
 //       CHECK:     tensor.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<2x3xf32>
@@ -48,7 +48,7 @@
     ^bb0(%arg1: index, %arg2: index):
       linalg.yield %pad : f32
     } : tensor<4x5xf32> to tensor<14x20xf32>
-  %1 = subtensor %0[1, 3] [2, 3] [1, 1] : tensor<14x20xf32> to tensor<2x3xf32>
+  %1 = tensor.extract_slice %0[1, 3] [2, 3] [1, 1] : tensor<14x20xf32> to tensor<2x3xf32>
   return %1 : tensor<2x3xf32>
 }
 
@@ -57,7 +57,7 @@
 // CHECK-LABEL: @static_low_pad_only_2
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
 //   CHECK-NOT:   linalg.pad_tensor
-//   CHECK-NOT:   subtensor
+//   CHECK-NOT:   tensor.extract_slice
 //       CHECK:   %[[RESULT:.*]] = tensor.generate
 //       CHECK:     tensor.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<1x3xf32>
@@ -67,7 +67,7 @@
     ^bb0(%arg1: index, %arg2: index):
       linalg.yield %pad : f32
     } : tensor<4x5xf32> to tensor<14x20xf32>
-  %1 = subtensor %0[1, 3] [1, 3] [1, 1] : tensor<14x20xf32> to tensor<1x3xf32>
+  %1 = tensor.extract_slice %0[1, 3] [1, 3] [1, 1] : tensor<14x20xf32> to tensor<1x3xf32>
   return %1 : tensor<1x3xf32>
 }
 
@@ -76,7 +76,7 @@
 // CHECK-LABEL: @static_mixed_data_high_pad
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
 //   CHECK-NOT:   linalg.pad_tensor
-//       CHECK:   %[[SUBTENSOR:.*]] = subtensor %[[ARG0]][2, 4] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
+//       CHECK:   %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][2, 4] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
 //       CHECK:   %[[RESULT:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[1, 3]
 //       CHECK:     linalg.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<3x4xf32>
@@ -86,7 +86,7 @@
     ^bb0(%arg1: index, %arg2: index):
       linalg.yield %pad : f32
     } : tensor<4x5xf32> to tensor<11x13xf32>
-  %1 = subtensor %0[2, 4] [3, 4] [1, 1] : tensor<11x13xf32> to tensor<3x4xf32>
+  %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<11x13xf32> to tensor<3x4xf32>
   return %1 : tensor<3x4xf32>
 }
 
@@ -95,7 +95,7 @@
 // CHECK-LABEL: @static_mixed_data_low_pad
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
 //   CHECK-NOT:   linalg.pad_tensor
-//       CHECK:   %[[SUBTENSOR:.*]] = subtensor %[[ARG0]][0, 0] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
+//       CHECK:   %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
 //       CHECK:   %[[RESULT:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[1, 3] high[0, 0]
 //       CHECK:     linalg.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<3x4xf32>
@@ -105,7 +105,7 @@
     ^bb0(%arg1: index, %arg2: index):
       linalg.yield %pad : f32
     } : tensor<4x5xf32> to tensor<14x20xf32>
-  %1 = subtensor %0[2, 4] [3, 4] [1, 1] : tensor<14x20xf32> to tensor<3x4xf32>
+  %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<14x20xf32> to tensor<3x4xf32>
   return %1 : tensor<3x4xf32>
 }
 
@@ -123,7 +123,7 @@
     ^bb0(%arg1: index, %arg2: index):
       linalg.yield %pad : f32
     } : tensor<4x5xf32> to tensor<13x16xf32>
-  %1 = subtensor %0[1, 2] [7, 9] [1, 1] : tensor<13x16xf32> to tensor<7x9xf32>
+  %1 = tensor.extract_slice %0[1, 2] [7, 9] [1, 1] : tensor<13x16xf32> to tensor<7x9xf32>
   return %1 : tensor<7x9xf32>
 }
 
@@ -138,7 +138,7 @@
 //       CHECK:     %[[GEN:.*]] = tensor.generate
 //       CHECK:     scf.yield %[[GEN]]
 //       CHECK:   } else {
-//       CHECK:     %[[SUBTENSOR:.*]] = subtensor %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor<?x5xf32> to tensor<?x1xf32>
+//       CHECK:     %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor<?x5xf32> to tensor<?x1xf32>
 //       CHECK:     %[[PADTENSOR:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3]
 //       CHECK:     %[[CAST:.*]] = tensor.cast %[[PADTENSOR]] : tensor<?x4xf32> to tensor<3x4xf32>
 //       CHECK:     scf.yield %[[CAST]]
@@ -149,7 +149,7 @@
     ^bb0(%arg1: index, %arg2: index):
       linalg.yield %pad : f32
     } : tensor<?x5xf32> to tensor<?x13xf32>
-  %1 = subtensor %0[2, 4] [3, 4] [1, 1] : tensor<?x13xf32> to tensor<3x4xf32>
+  %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<?x13xf32> to tensor<3x4xf32>
   return %1 : tensor<3x4xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir
--- a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir
@@ -199,12 +199,12 @@
 //      CHECK: %[[STEPX:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSX]], %[[C8]]]
 //      CHECK:   %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xf32>) {
 //      CHECK:     %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor<?x?xf32>) {
-//      CHECK:       %[[sTA:.*]] = subtensor %[[TA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
-//      CHECK:       %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
-//      CHECK:       %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:       %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
 //      CHECK:       %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<?x?xf32>, tensor<?x?xf32>)
 // CHECK-SAME:                                  outs(%[[sTC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
-//      CHECK:       %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xf32> into tensor<?x?xf32>
+//      CHECK:       %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xf32> into tensor<?x?xf32>
 //      CHECK:       scf.yield %[[TD]] : tensor<?x?xf32>
 //      CHECK:     scf.yield %[[TD2]] : tensor<?x?xf32>
 //      CHECK:   scf.yield %[[TD1]] : tensor<?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
@@ -16,11 +16,11 @@
   %3 = scf.for %arg3 = %c0 to %0 step %c2 iter_args(%arg4 = %arg2) -> (tensor<?x?xf32>) {
     %4 = scf.for %arg5 = %c0 to %2 step %c3 iter_args(%arg6 = %arg4) -> (tensor<?x?xf32>) {
       %5 = scf.for %arg7 = %c0 to %1 step %c4 iter_args(%arg8 = %arg6) -> (tensor<?x?xf32>) {
-        %6 = subtensor %t0[%arg3, %arg7][%c2, 4][1, 1] : tensor<?x?xf32> to tensor<?x4xf32>
-        %7 = subtensor %arg1[%arg7, %arg5][4, %c3][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
-        %8 = subtensor %arg8[%arg3, %arg5][%c2, %c3][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+        %6 = tensor.extract_slice %t0[%arg3, %arg7][%c2, 4][1, 1] : tensor<?x?xf32> to tensor<?x4xf32>
+        %7 = tensor.extract_slice %arg1[%arg7, %arg5][4, %c3][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
+        %8 = tensor.extract_slice %arg8[%arg3, %arg5][%c2, %c3][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
         %9 = linalg.matmul ins(%6, %7 : tensor<?x4xf32>, tensor<4x?xf32>) outs(%8 : tensor<?x?xf32>) -> tensor<?x?xf32>
-        %10 = subtensor_insert %9 into %arg8[%arg3, %arg5] [%c2, %c3] [1, 1]  : tensor<?x?xf32> into tensor<?x?xf32>
+        %10 = tensor.insert_slice %9 into %arg8[%arg3, %arg5] [%c2, %c3] [1, 1]  : tensor<?x?xf32> into tensor<?x?xf32>
         scf.yield %10 : tensor<?x?xf32>
       }
       scf.yield %5 : tensor<?x?xf32>
@@ -48,22 +48,22 @@
 //   CHECK-DAG: %[[dC1:.*]] = memref.dim %[[C]], %[[C1]] : tensor<?x?xf32>
 //       CHECK: scf.for %[[I:[0-9a-z]*]]
 //       CHECK:   %[[sizeA0:.*]] = affine.min #[[BOUND2_MAP]](%[[I]])[%[[dA0]]]
-//       CHECK:   %[[stA:.*]] = subtensor %[[A]][%[[I]], 0] [%[[sizeA0]], %[[dA1]]] [1, 1]  : tensor<?x?xf32> to tensor<?x?xf32>
+//       CHECK:   %[[stA:.*]] = tensor.extract_slice %[[A]][%[[I]], 0] [%[[sizeA0]], %[[dA1]]] [1, 1]  : tensor<?x?xf32> to tensor<?x?xf32>
 //       CHECK:   %[[sizeC0:.*]] = affine.min #[[BOUND2_MAP]](%[[I]])[%[[dC0]]]
 //  CHECK-NEXT:   scf.for %[[J:[0-9a-z]*]]
 //  CHECK-NEXT:     scf.for %[[K:[0-9a-z]*]] {{.*}} iter_args(%[[RES:[0-9a-z]*]]
-//   CHECK-DAG:       %[[stB1:.*]] = subtensor %[[B]][%[[K]], %[[J]]] [4, 3] [1, 1]  : tensor<?x?xf32> to tensor<4x3xf32>
-//   CHECK-DAG:       %[[stF:.*]] = subtensor %[[RES]][%[[I]], %[[J]]] [2, 3] [1, 1]  : tensor<?x?xf32> to tensor<2x3xf32>
+//   CHECK-DAG:       %[[stB1:.*]] = tensor.extract_slice %[[B]][%[[K]], %[[J]]] [4, 3] [1, 1]  : tensor<?x?xf32> to tensor<4x3xf32>
+//   CHECK-DAG:       %[[stF:.*]] = tensor.extract_slice %[[RES]][%[[I]], %[[J]]] [2, 3] [1, 1]  : tensor<?x?xf32> to tensor<2x3xf32>
 //
-// subtensors of the producing matmul.
+// slices of the producing matmul.
 //       CHECK:       %[[sizeB1:.*]] = affine.min #[[BOUND4_MAP]](%[[K]])[%[[dB1]]]
-//       CHECK:       %[[stB2:.*]] = subtensor %[[B]][0, %[[K]]] [%[[dB0]], %[[sizeB1]]] [1, 1]  : tensor<?x?xf32> to tensor<?x?xf32>
+//       CHECK:       %[[stB2:.*]] = tensor.extract_slice %[[B]][0, %[[K]]] [%[[dB0]], %[[sizeB1]]] [1, 1]  : tensor<?x?xf32> to tensor<?x?xf32>
 //       CHECK:       %[[sizeC1:.*]] = affine.min #[[BOUND4_MAP]](%[[K]])[%[[dC1]]]
-//       CHECK:       %[[stC:.*]] = subtensor %[[C]][%[[I]], %[[K]]] [%[[sizeC0]], %[[sizeC1]]] [1, 1]  : tensor<?x?xf32> to tensor<?x?xf32>
+//       CHECK:       %[[stC:.*]] = tensor.extract_slice %[[C]][%[[I]], %[[K]]] [%[[sizeC0]], %[[sizeC1]]] [1, 1]  : tensor<?x?xf32> to tensor<?x?xf32>
 //       CHECK:       %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[stC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
 //       CHECK:       %[[CAST:.*]] = tensor.cast %[[stD]] : tensor<?x?xf32> to tensor<?x4xf32>
 //  CHECK-NEXT:       %[[stG:.*]] = linalg.matmul ins(%[[CAST]], %[[stB1]] : tensor<?x4xf32>, tensor<4x3xf32>) outs(%[[stF]] : tensor<2x3xf32>)  -> tensor<2x3xf32>
-//  CHECK-NEXT:       subtensor_insert %[[stG]] into %[[RES]][%[[I]], %[[J]]]
+//  CHECK-NEXT:       tensor.insert_slice %[[stG]] into %[[RES]][%[[I]], %[[J]]]
 
 // -----
 
@@ -87,9 +87,9 @@
   %for0 = scf.for %iv0 = %c0 to %c112 step %c8 iter_args(%arg0 = %fill) -> tensor<1x112x112x32xf32> {
     %for1 = scf.for %iv1 = %c0 to %c112 step %c16 iter_args(%arg1 = %arg0) -> tensor<1x112x112x32xf32> {
       %for2 = scf.for %iv2 = %c0 to %c32 step %c4 iter_args(%arg2 = %arg1) -> tensor<1x112x112x32xf32> {
-        %0 = subtensor %conv[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
-        %1 = subtensor %elementwise[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
-        %2 = subtensor %arg2[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
+        %0 = tensor.extract_slice %conv[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
+        %1 = tensor.extract_slice %elementwise[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
+        %2 = tensor.extract_slice %arg2[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
         %add = linalg.generic
           {
             indexing_maps = [
@@ -104,7 +104,7 @@
           linalg.yield %result : f32
         } -> tensor<1x8x16x4xf32>
 
-        %insert = subtensor_insert %add into %arg2[0, %iv0, %iv1, %iv2] [1, 8, 16, 4] [1, 1, 1, 1]  : tensor<1x8x16x4xf32> into tensor<1x112x112x32xf32>
+        %insert = tensor.insert_slice %add into %arg2[0, %iv0, %iv1, %iv2] [1, 8, 16, 4] [1, 1, 1, 1]  : tensor<1x8x16x4xf32> into tensor<1x112x112x32xf32>
         scf.yield %insert : tensor<1x112x112x32xf32>
       }
       scf.yield %for2 : tensor<1x112x112x32xf32>
@@ -127,19 +127,19 @@
 // CHECK-NEXT:   %[[OFFSET_H:.+]] = affine.apply #[[MAP0]](%[[IV0]])
 // CHECK-NEXT:   scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG1:.+]] = %[[ARG0]])
 // CHECK-NEXT:     %[[OFFSET_W:.+]] = affine.apply #[[MAP0]](%[[IV1]])
-// CHECK-NEXT:     %[[ST_INPUT:.+]] = subtensor %arg0[0, %[[OFFSET_H]], %[[OFFSET_W]], 0] [1, 17, 33, 3] [1, 1, 1, 1] : tensor<1x225x225x3xf32> to tensor<1x17x33x3xf32>
+// CHECK-NEXT:     %[[ST_INPUT:.+]] = tensor.extract_slice %arg0[0, %[[OFFSET_H]], %[[OFFSET_W]], 0] [1, 17, 33, 3] [1, 1, 1, 1] : tensor<1x225x225x3xf32> to tensor<1x17x33x3xf32>
 // CHECK-NEXT:     scf.for %[[IV2:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG2:.+]] = %[[ARG1]])
-// CHECK-NEXT:       %[[ST_ELEM:.+]] = subtensor %[[ELEM]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
-// CHECK-NEXT:       %[[ST_ARG2:.+]] = subtensor %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
-// CHECK-NEXT:       %[[ST_FILTER:.+]] = subtensor %[[FILTER]][0, 0, 0, %[[IV2]]] [3, 3, 3, 4] [1, 1, 1, 1] : tensor<3x3x3x32xf32> to tensor<3x3x3x4xf32>
-// CHECK-NEXT:       %[[ST_FILL:.+]] = subtensor %[[FILL]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
+// CHECK-NEXT:       %[[ST_ELEM:.+]] = tensor.extract_slice %[[ELEM]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
+// CHECK-NEXT:       %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
+// CHECK-NEXT:       %[[ST_FILTER:.+]] = tensor.extract_slice %[[FILTER]][0, 0, 0, %[[IV2]]] [3, 3, 3, 4] [1, 1, 1, 1] : tensor<3x3x3x32xf32> to tensor<3x3x3x4xf32>
+// CHECK-NEXT:       %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
 // CHECK-NEXT:       %[[ST_CONV:.+]] = linalg.conv_2d_input_nhwc_filter_hwcf
 // CHECK-SAME:         ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor<1x17x33x3xf32>, tensor<3x3x3x4xf32>)
 // CHECK-SAME:         outs(%[[ST_FILL]] : tensor<1x8x16x4xf32>)
 // CHECK-NEXT:       %[[ADD:.+]] = linalg.generic
 // CHECK-SAME:         ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>)
 // CHECK-SAME:         outs(%[[ST_ARG2]] : tensor<1x8x16x4xf32>)
-//      CHECK:       subtensor_insert %[[ADD]] into %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4]
+//      CHECK:       tensor.insert_slice %[[ADD]] into %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4]
 
 // -----
 
@@ -174,9 +174,9 @@
           %oh_size = affine.min affine_map<(d0)[s0] -> (16, -d0 + s0)>(%iv1)[%oh]
           %ow_size = affine.min affine_map<(d0)[s0] -> (4, -d0 + s0)>(%iv2)[%ow]
           %oc_size = affine.min affine_map<(d0)[s0] -> (2, -d0 + s0)>(%iv2)[%oc]
-          %0 = subtensor %conv[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
-          %1 = subtensor %elementwise[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
-          %2 = subtensor %arg3[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
+          %0 = tensor.extract_slice %conv[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
+          %1 = tensor.extract_slice %elementwise[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
+          %2 = tensor.extract_slice %arg3[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
           %add = linalg.generic
             {
               indexing_maps = [
@@ -191,7 +191,7 @@
             linalg.yield %result : f32
           } -> tensor<?x?x?x?xf32>
 
-          %insert = subtensor_insert %add into %arg3[%iv0, %iv1, %iv2, %iv3] [%n_size, %oh_size, %ow_size, %oc_size] [1, 1, 1, 1]  : tensor<?x?x?x?xf32> into tensor<?x?x?x?xf32>
+          %insert = tensor.insert_slice %add into %arg3[%iv0, %iv1, %iv2, %iv3] [%n_size, %oh_size, %ow_size, %oc_size] [1, 1, 1, 1]  : tensor<?x?x?x?xf32> into tensor<?x?x?x?xf32>
           scf.yield %insert : tensor<?x?x?x?xf32>
         }
         scf.yield %for3 : tensor<?x?x?x?xf32>
@@ -257,19 +257,19 @@
 // CHECK-NEXT:         %[[SIZE_ELEM_OC:.+]] = affine.min #[[BOUND2_MAP]](%[[IV2]])[%[[ELEM_OC]]]
 // CHECK-NEXT:         %[[OFFSET_OW:.+]] = affine.apply #[[X2_MAP]](%[[IV2]])
 // CHECK-NEXT:         %[[SIZE_INPUT_W:.+]] = affine.min #[[INPUT_BOUND]](%[[SIZE_ELEM_OW]], %[[IV2]])[%[[FILTER_W]], %[[INPUT_W]]]
-// CHECK-NEXT:         %[[ST_INPUT:.+]] = subtensor %[[INPUT]][%[[IV0]], %[[OFFSET_OH]], %[[OFFSET_OW]], 0]
+// CHECK-NEXT:         %[[ST_INPUT:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], %[[OFFSET_OH]], %[[OFFSET_OW]], 0]
 // CHECK-SAME:               [%[[SIZE_INPUT_N]], %[[SIZE_INPUT_H]], %[[SIZE_INPUT_W]], %[[INPUT_C]]]
 // CHECK-NEXT:         %[[SIZE_ELEM_OW_2:.+]] = affine.min #[[BOUND4_MAP_2]](%[[IV2]])[%[[FILL_W]], %[[ELEM_OW]]]
 // CHECK-NEXT:         scf.for %[[IV3:.+]] = %{{.+}} to %[[ELEM_OC]] step %{{.+}} iter_args(%[[ARG:[a-z0-9]+]]
-// CHECK-NEXT:           %[[ST_ELEM:.+]] = subtensor %[[ELEM]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
+// CHECK-NEXT:           %[[ST_ELEM:.+]] = tensor.extract_slice %[[ELEM]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
 // CHECK-SAME:                 [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]]
-// CHECK-NEXT:           %[[ST_ARG:.+]] = subtensor %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
+// CHECK-NEXT:           %[[ST_ARG:.+]] = tensor.extract_slice %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
 // CHECK-SAME:                 [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]]
 // CHECK-NEXT:           %[[SIZE_ELEM_OC_2:.+]] = affine.min #[[BOUND2_MAP_2]](%[[IV3]], %[[IV2]])[%[[FILTER_OC]], %[[ELEM_OC]]]
-// CHECK-NEXT:           %[[ST_FILTER:.+]] = subtensor %[[FILTER]][0, 0, 0, %[[IV3]]]
+// CHECK-NEXT:           %[[ST_FILTER:.+]] = tensor.extract_slice %[[FILTER]][0, 0, 0, %[[IV3]]]
 // CHECK-SAME:                 [%[[FILTER_H]], %[[FILTER_W]], %[[FILTER_IC]], %[[SIZE_ELEM_OC_2]]]
 // CHECK-NEXT:           %[[SIZE_ELEM_OC_3:.+]] = affine.min #[[BOUND2_MAP_2]](%[[IV3]], %[[IV2]])[%[[FILL_C]], %[[ELEM_OC]]]
-// CHECK-NEXT:           %[[ST_FILL:.+]] = subtensor %[[FILL]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
+// CHECK-NEXT:           %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
 // CHECK-SAME:                 [%[[SIZE_ELEM_N_2]], %[[SIZE_ELEM_OH_2]], %[[SIZE_ELEM_OW_2]], %[[SIZE_ELEM_OC_3]]]
 // CHECK-NEXT:           %[[ST_CONV:.+]] = linalg.conv_2d_input_nhwc_filter_hwcf
 // CHECK-SAME:                 ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
@@ -277,5 +277,5 @@
 // CHECK-NEXT:           %[[ST_ADD:.+]] = linalg.generic
 // CHECK-SAME:                 ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
 // CHECK-SAME:                 outs(%[[ST_ARG]] : tensor<?x?x?x?xf32>)
-//      CHECK:           subtensor_insert %[[ST_ADD]] into %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
+//      CHECK:           tensor.insert_slice %[[ST_ADD]] into %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
 // CHECK-SAME:                 [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]]
diff --git a/mlir/test/Dialect/Linalg/tile-and-pad-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-pad-tensors.mlir
--- a/mlir/test/Dialect/Linalg/tile-and-pad-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-pad-tensors.mlir
@@ -12,9 +12,9 @@
 //      CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor<?x?xi32>) {
 //      CHECK:   %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xi32>) {
 //      CHECK:     %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor<?x?xi32>) {
-//      CHECK:       %[[sTA:.*]] = subtensor %[[TA]][{{.*}}] : tensor<?x?xi8> to tensor<?x?xi8>
-//      CHECK:       %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor<?x?xi8> to tensor<?x?xi8>
-//      CHECK:       %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor<?x?xi32> to tensor<?x?xi32>
+//      CHECK:       %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor<?x?xi8> to tensor<?x?xi8>
+//      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<?x?xi8> to tensor<?x?xi8>
+//      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<?x?xi32> to tensor<?x?xi32>
 
 // Dynamic op has been canonicalized away.
 //  CHECK-NOT:       linalg.matmul {{.*}} tensor<?x?xi8>
@@ -28,8 +28,8 @@
 //      CHECK:         : tensor<?x?xi32> to tensor<2x3xi32>
 //      CHECK:       %[[pD:.*]] = linalg.matmul_i8_i8_i32 ins(%[[pA]], %[[pB]] : tensor<2x4xi8>, tensor<4x3xi8>)
 // CHECK-SAME:                                           outs(%[[pC]] : tensor<2x3xi32>)  -> tensor<2x3xi32>
-//      CHECK:       %[[sTD:.*]] = subtensor %[[pD]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : tensor<2x3xi32> to tensor<?x?xi32>
-//      CHECK:       %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xi32> into tensor<?x?xi32>
+//      CHECK:       %[[sTD:.*]] = tensor.extract_slice %[[pD]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : tensor<2x3xi32> to tensor<?x?xi32>
+//      CHECK:       %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xi32> into tensor<?x?xi32>
 //      CHECK:       scf.yield %[[TD]] : tensor<?x?xi32>
 //      CHECK:     scf.yield %[[TD2]] : tensor<?x?xi32>
 //      CHECK:   scf.yield %[[TD1]] : tensor<?x?xi32>
@@ -52,15 +52,15 @@
 //      CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor<?x?x?xf32>) {
 //      CHECK:   %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?x?xf32>) {
 //      CHECK:     %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor<?x?x?xf32>) {
-//      CHECK:       %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
+//      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
 
 // Padding injects static information.
 //      CHECK:       %[[pC:.*]] = linalg.pad_tensor %[[sTC]] low[%[[C0]], %[[C0]], %[[C0]]] high[%{{.*}}, %{{.*}}, %{{.*}}]
 //      CHECK:        : tensor<?x?x?xf32> to tensor<2x3x4xf32>
 //      CHECK:       %[[pD:.*]] = linalg.generic
 // CHECK-SAME:         ins(%[[VAL]] : f32) outs(%[[pC]] : tensor<2x3x4xf32>)
-//      CHECK:       %[[sTD:.*]] = subtensor %[[pD]][0, 0, 0] [%{{.*}}, %{{.*}}, %{{.*}}] [1, 1, 1] : tensor<2x3x4xf32> to tensor<?x?x?xf32>
-//      CHECK:       %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?x?xf32> into tensor<?x?x?xf32>
+//      CHECK:       %[[sTD:.*]] = tensor.extract_slice %[[pD]][0, 0, 0] [%{{.*}}, %{{.*}}, %{{.*}}] [1, 1, 1] : tensor<2x3x4xf32> to tensor<?x?x?xf32>
+//      CHECK:       %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?x?xf32> into tensor<?x?x?xf32>
 //      CHECK:       scf.yield %[[TD]] : tensor<?x?x?xf32>
 //      CHECK:     scf.yield %[[TD2]] : tensor<?x?x?xf32>
 //      CHECK:   scf.yield %[[TD1]] : tensor<?x?x?xf32>
@@ -104,11 +104,11 @@
 //      CHECK-1DIM-TILE:        %[[C0:.*]] = constant 0 : index
 //      CHECK-1DIM-TILE:        %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor<?x?xi32>) {
 //      CHECK-1DIM-TILE:            %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xi32>) {
-//      CHECK-1DIM-TILE:                %[[sTA:.*]] = subtensor %[[TA]][{{.*}}] : tensor<?x8xi8> to tensor<?x8xi8>
+//      CHECK-1DIM-TILE:                %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor<?x8xi8> to tensor<?x8xi8>
 //      CHECK-1DIM-TILE:                %[[sTAc:.*]] = tensor.cast %[[sTA]] : tensor<?x8xi8> to tensor<?x?xi8>
-//      CHECK-1DIM-TILE:                %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor<8x?xi8> to tensor<8x?xi8>
+//      CHECK-1DIM-TILE:                %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<8x?xi8> to tensor<8x?xi8>
 //      CHECK-1DIM-TILE:                %[[sTBc:.*]] = tensor.cast %[[sTB]] : tensor<8x?xi8> to tensor<?x?xi8>
-//      CHECK-1DIM-TILE:                %[[sTC:.*]] = subtensor %[[TC1]][{{.*}}] : tensor<?x?xi32> to tensor<?x?xi32>
+//      CHECK-1DIM-TILE:                %[[sTC:.*]] = tensor.extract_slice %[[TC1]][{{.*}}] : tensor<?x?xi32> to tensor<?x?xi32>
 //      CHECK-1DIM-TILE:                %[[pA:.*]] = linalg.pad_tensor %[[sTAc]] low[%[[C0]], %[[C0]]] high[%{{.*}}, %{{.*}}]
 //      CHECK-1DIM-TILE:                   : tensor<?x?xi8> to tensor<2x8xi8>
 //      CHECK-1DIM-TILE:                %[[pB:.*]] = linalg.pad_tensor %[[sTBc]] low[%[[C0]], %[[C0]]] high[%{{.*}}, %{{.*}}]
diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir
--- a/mlir/test/Dialect/Linalg/tile-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir
@@ -11,12 +11,12 @@
 //      CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor<?x?xf32>) {
 //      CHECK:   %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xf32>) {
 //      CHECK:     %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor<?x?xf32>) {
-//      CHECK:       %[[sTA:.*]] = subtensor %[[TA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
-//      CHECK:       %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
-//      CHECK:       %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:       %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
 //      CHECK:       %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<?x?xf32>, tensor<?x?xf32>)
 // CHECK-SAME:                                  outs(%[[sTC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
-//      CHECK:       %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xf32> into tensor<?x?xf32>
+//      CHECK:       %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xf32> into tensor<?x?xf32>
 //      CHECK:       scf.yield %[[TD]] : tensor<?x?xf32>
 //      CHECK:     scf.yield %[[TD2]] : tensor<?x?xf32>
 //      CHECK:   scf.yield %[[TD1]] : tensor<?x?xf32>
@@ -51,14 +51,14 @@
 // TLOOP-SAME: iterators["parallel", "parallel", "reduction"]
 // TLOOP-SAME: distribution["block_x", "block_y", "none"] {
 
-// TLOOP: %[[SUB_ARG_0:.*]] = subtensor %[[A0]][%[[I]], %[[K]]]
-// TLOOP: %[[SUB_ARG_1:.*]] = subtensor %[[A1]][%[[K]], %[[J]]]
-// TLOOP: %[[SUB_ARG_2:.*]] = subtensor %[[A2]][%[[I]], %[[J]]]
+// TLOOP: %[[SUB_ARG_0:.*]] = tensor.extract_slice %[[A0]][%[[I]], %[[K]]]
+// TLOOP: %[[SUB_ARG_1:.*]] = tensor.extract_slice %[[A1]][%[[K]], %[[J]]]
+// TLOOP: %[[SUB_ARG_2:.*]] = tensor.extract_slice %[[A2]][%[[I]], %[[J]]]
 
 // TLOOP: %[[PROD:.*]] = linalg.matmul ins(%[[SUB_ARG_0]], %[[SUB_ARG_1]]
 // TLOOP-SE: outs(%[[SUB_ARG_2]] : [[TY]]) -> [[TY]]
 
-// TLOOP: %[[O:.*]] = subtensor_insert %[[PROD]] into %[[A2]][%[[I]], %[[J]]]
+// TLOOP: %[[O:.*]] = tensor.insert_slice %[[PROD]] into %[[A2]][%[[I]], %[[J]]]
 // TLOOP: linalg.yield %[[O]] : [[TY]]
 
 // -----
@@ -93,13 +93,13 @@
 //       CHECK:   %[[TD0:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC0:.+]] = %[[INIT]]) -> (tensor<?x?x?xf32>) {
 //       CHECK:     %[[TD1:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC1:.+]] = %[[TC0]]) -> (tensor<?x?x?xf32>) {
 //       CHECK:       %[[TD2:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC2:.+]] = %[[TC1]]) -> (tensor<?x?x?xf32>) {
-//       CHECK:       %[[STARG0:.+]] = subtensor %[[ARG0]][{{.+}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
-//       CHECK:       %[[STARG1:.+]] = subtensor %[[ARG1]][{{.+}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
-//       CHECK:       %[[STARG2:.+]] = subtensor %[[TC2]][{{.+}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
+//       CHECK:       %[[STARG0:.+]] = tensor.extract_slice %[[ARG0]][{{.+}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
+//       CHECK:       %[[STARG1:.+]] = tensor.extract_slice %[[ARG1]][{{.+}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
+//       CHECK:       %[[STARG2:.+]] = tensor.extract_slice %[[TC2]][{{.+}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
 //       CHECK:       %[[STRETURN:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[STARG0]], %[[STARG1]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
 //  CHECK-SAME:         outs(%[[STARG2]] : tensor<?x?x?xf32>)
-//       CHECK:       %[[TD:.+]] = subtensor_insert %[[STRETURN]] into %[[TC2]]
+//       CHECK:       %[[TD:.+]] = tensor.insert_slice %[[STRETURN]] into %[[TC2]]
 //       CHECK:       scf.yield %[[TD]]
 //       CHECK:     }
 //       CHECK:     scf.yield %[[TD2]]
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -586,7 +586,7 @@
 //       CHECK:   %[[INIT:.*]] = linalg.init_tensor [6, %[[V1]], %[[V2]], %[[V5]]] : tensor<6x?x?x?xf32>
 //       CHECK:   %[[FILL:.*]] = linalg.fill(%[[INIT]], %{{.*}}) : tensor<6x?x?x?xf32>, f32 -> tensor<6x?x?x?xf32>
 //       CHECK:   %[[SRCDIM:.*]] = memref.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32>
-//       CHECK:   %[[RESULT:.*]] = subtensor_insert %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32>
+//       CHECK:   %[[RESULT:.*]] = tensor.insert_slice %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32>
 //       CHECK:   return %[[RESULT]]
 func @pad_static_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index,
                   %pad_value: f32) -> tensor<6x?x?x?xf32> {
@@ -638,7 +638,7 @@
   } : tensor<5x6xf32> to tensor<10x13xf32>
   %1 = vector.transfer_write %arg1, %0[%c0, %c0]
       : vector<7x9xf32>, tensor<10x13xf32>
-  %2 = subtensor %1[0, 0] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32>
+  %2 = tensor.extract_slice %1[0, 0] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32>
   return %2 : tensor<5x6xf32>
 }
 
@@ -648,14 +648,14 @@
 //  CHECK-SAME:     %[[ARG0:.*]]: tensor<?x?xf32>, %[[ARG1:.*]]: vector<7x9xf32>, %[[SIZE:.*]]: index, %[[PADDING:.*]]: index
 //   CHECK-NOT:   linalg.pad_tensor
 //       CHECK:   %[[C0:.*]] = constant 0 : index
-//       CHECK:   %[[SUB:.*]] = subtensor %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor<?x?xf32> to tensor<?x6xf32>
+//       CHECK:   %[[SUB:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor<?x?xf32> to tensor<?x6xf32>
 //       CHECK:   %[[RESULT:.*]] = vector.transfer_write %[[ARG1]], %[[SUB]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor<?x6xf32>
 //       CHECK:   return %[[RESULT]]
 func @pad_and_transfer_write_dynamic_static(
     %arg0: tensor<?x?xf32>, %arg1: vector<7x9xf32>, %size: index, %padding: index) -> tensor<?x6xf32> {
   %c0 = constant 0 : index
   %c5 = constant 5.0 : f32
-  %s = subtensor %arg0[0, 0] [%size, 6] [1, 1]
+  %s = tensor.extract_slice %arg0[0, 0] [%size, 6] [1, 1]
       : tensor<?x?xf32> to tensor<?x6xf32>
   %0 = linalg.pad_tensor %s low[0, 0] high[%padding, 7] {
     ^bb0(%arg2: index, %arg3: index):
@@ -663,13 +663,13 @@
   } : tensor<?x6xf32> to tensor<?x13xf32>
   %1 = vector.transfer_write %arg1, %0[%c0, %c0]
       : vector<7x9xf32>, tensor<?x13xf32>
-  %2 = subtensor %1[0, 0] [%size, 6] [1, 1] : tensor<?x13xf32> to tensor<?x6xf32>
+  %2 = tensor.extract_slice %1[0, 0] [%size, 6] [1, 1] : tensor<?x13xf32> to tensor<?x6xf32>
   return %2 : tensor<?x6xf32>
 }
 
 // -----
 
-// CHECK-LABEL: func @pad_and_subtensor_insert
+// CHECK-LABEL: func @pad_and_insert_slice
 //  CHECK-SAME:     %[[ARG0:.*]]: tensor<5x6xf32>, %[[ARG1:.*]]: tensor<12x13xf32>
 //   CHECK-NOT:   linalg.pad_tensor
 //   CHECK-DAG:   %[[C0:.*]] = constant 0 : index
@@ -677,7 +677,7 @@
 //       CHECK:   %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32>
 //       CHECK:   %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[ARG1]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<7x9xf32>, tensor<12x13xf32>
 //       CHECK:   return %[[WRITE]]
-func @pad_and_subtensor_insert(
+func @pad_and_insert_slice(
     %arg0: tensor<5x6xf32>, %arg1: tensor<12x13xf32>) -> tensor<12x13xf32> {
   %c0 = constant 0 : index
   %c5 = constant 5.0 : f32
@@ -685,7 +685,7 @@
     ^bb0(%arg2: index, %arg3: index):
       linalg.yield %c5 : f32
   } : tensor<5x6xf32> to tensor<7x9xf32>
-  %r = subtensor_insert %0 into %arg1[0, 0][7, 9][1, 1] : tensor<7x9xf32> into tensor<12x13xf32>
+  %r = tensor.insert_slice %0 into %arg1[0, 0][7, 9][1, 1] : tensor<7x9xf32> into tensor<12x13xf32>
   return %r : tensor<12x13xf32>
 }
 
diff --git a/mlir/test/Dialect/MemRef/canonicalize.mlir b/mlir/test/Dialect/MemRef/canonicalize.mlir
--- a/mlir/test/Dialect/MemRef/canonicalize.mlir
+++ b/mlir/test/Dialect/MemRef/canonicalize.mlir
@@ -367,27 +367,3 @@
   %1 = memref.buffer_cast %0 : memref<?x?x16x32xi8>
   return %1 : memref<?x?x16x32xi8>
 }
-
-// -----
-
-// TODO: Move this test to Tensor/canonicalize.mlir.
-func @subtensor_insert_propagate_dest_cast(%arg0 : tensor<2x?xi32>, %arg1 : tensor<i32>,
-    %arg2 : index, %arg3 : index) -> tensor<?x?xi32> {
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c2 = constant 2 : index
-  %c8 = constant 8 : index
-  %0 = memref.dim %arg0, %c1 : tensor<2x?xi32>
-  %1 = tensor.extract %arg1[] : tensor<i32>
-  %2 = tensor.generate %arg2, %c8 {
-  ^bb0(%arg4: index, %arg5: index):
-    tensor.yield %1 : i32
-  } : tensor<?x?xi32>
-  %3 = subtensor_insert %arg0 into %2[%c0, %arg3] [%c2, %0] [%c1, %c1] : tensor<2x?xi32> into tensor<?x?xi32>
-  return %3 : tensor<?x?xi32>
-}
-// CHECK-LABEL: func @subtensor_insert_propagate_dest_cast
-//       CHECK:   %[[UPDATED:.+]] = subtensor_insert %{{.+}} into %{{.+}}[0, %{{.+}}] [2, %{{.+}}] [1, 1]
-//  CHECK-SAME:     tensor<2x?xi32> into tensor<?x8xi32>
-//       CHECK:   %[[CAST:.+]] = tensor.cast %[[UPDATED]]
-//       CHECK:   return %[[CAST]]
diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir
--- a/mlir/test/Dialect/SCF/canonicalize.mlir
+++ b/mlir/test/Dialect/SCF/canonicalize.mlir
@@ -659,10 +659,10 @@
     scf.yield %2 : tensor<?x?xf32>
   }
 //   CHECK-NOT: tensor.cast
-//       CHECK: %[[RES:.*]] = subtensor_insert %[[FOR_RES]] into %[[T1]][0, 0] [32, 1024] [1, 1] : tensor<32x1024xf32> into tensor<1024x1024xf32>
+//       CHECK: %[[RES:.*]] = tensor.insert_slice %[[FOR_RES]] into %[[T1]][0, 0] [32, 1024] [1, 1] : tensor<32x1024xf32> into tensor<1024x1024xf32>
 //       CHECK: return %[[RES]] : tensor<1024x1024xf32>
   %2 = tensor.cast %1 : tensor<?x?xf32> to tensor<32x1024xf32>
-  %res = subtensor_insert %2 into %t1[0, 0] [32, 1024] [1, 1] : tensor<32x1024xf32> into tensor<1024x1024xf32>
+  %res = tensor.insert_slice %2 into %t1[0, 0] [32, 1024] [1, 1] : tensor<32x1024xf32> into tensor<1024x1024xf32>
   return %res : tensor<1024x1024xf32>
 }
 
diff --git a/mlir/test/Dialect/Standard/canonicalize.mlir b/mlir/test/Dialect/Standard/canonicalize.mlir
--- a/mlir/test/Dialect/Standard/canonicalize.mlir
+++ b/mlir/test/Dialect/Standard/canonicalize.mlir
@@ -24,202 +24,6 @@
 
 // -----
 
-func @subtensor_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
-    %arg2 : index) -> tensor<?x?x?xf32>
-{
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c4 = constant 4 : index
-  %0 = subtensor %arg0[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
-  return %0 : tensor<?x?x?xf32>
-}
-// CHECK-LABEL: func @subtensor_canonicalize
-//  CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?x?xf32>
-//       CHECK:   %[[SUBTENSOR:.+]] = subtensor %[[ARG0]][0, %{{[a-zA-Z0-9_]+}}, 1]
-//  CHECK-SAME:      [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1]
-//  CHECK-SAME:      : tensor<?x?x?xf32> to tensor<4x1x?xf32>
-//       CHECK:   %[[RESULT:.+]] = tensor.cast %[[SUBTENSOR]]
-//       CHEKC:   return %[[RESULT]]
-
-// -----
-
-func @rank_reducing_subtensor_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
-    %arg2 : index) -> tensor<?x?xf32>
-{
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c4 = constant 4 : index
-  %0 = subtensor %arg0[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> to tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-// CHECK-LABEL: func @rank_reducing_subtensor_canonicalize
-//  CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?x?xf32>
-//       CHECK:   %[[SUBTENSOR:.+]] = subtensor %[[ARG0]][0, %{{[a-zA-Z0-9_]+}}, 1]
-//  CHECK-SAME:      [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1]
-//  CHECK-SAME:      : tensor<?x?x?xf32> to tensor<4x?xf32>
-//       CHECK:   %[[RESULT:.+]] = tensor.cast %[[SUBTENSOR]]
-//       CHEKC:   return %[[RESULT]]
-
-// -----
-
-// CHECK-LABEL: func @trivial_subtensor
-//  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8>
-//   CHECK-NOT:   subtensor
-//       CHECK:   return %[[ARG0]] :  tensor<4x6x16x32xi8>
-func @trivial_subtensor(%arg0 : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> {
-  %0 = subtensor %arg0[0, 0, 0, 0] [4, 6, 16, 32] [1, 1, 1, 1] : tensor<4x6x16x32xi8> to tensor<4x6x16x32xi8>
-  return %0 : tensor<4x6x16x32xi8>
-}
-
-// -----
-
-// CHECK-LABEL: func @trivial_subtensor_insert
-//  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8>
-//   CHECK-NOT:   subtensor
-//       CHECK:   return %[[ARG0]] :  tensor<4x6x16x32xi8>
-func @trivial_subtensor_insert(%arg0 : tensor<4x6x16x32xi8>, %arg1 : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> {
-  %0 = subtensor_insert %arg0 into %arg1[0, 0, 0, 0] [4, 6, 16, 32] [1, 1, 1, 1] : tensor<4x6x16x32xi8> into tensor<4x6x16x32xi8>
-  return %0 : tensor<4x6x16x32xi8>
-}
-
-// -----
-
-// CHECK-LABEL: func @rank_reducing_tensor_of_cast
-//  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8>
-//       CHECK:   %[[S:.+]] = subtensor %arg0[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<4x6x16x32xi8> to tensor<16x32xi8>
-// Tensor cast is moved after subtensor and then gets canonicalized away.
-//   CHECK-NOT:   tensor.cast
-//       CHECK:   return %[[S]] : tensor<16x32xi8>
-func @rank_reducing_tensor_of_cast(%arg : tensor<4x6x16x32xi8>) -> tensor<16x32xi8> {
-  %0 = tensor.cast %arg : tensor<4x6x16x32xi8> to tensor<?x?x16x32xi8>
-  %1 = subtensor %0[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<?x?x16x32xi8> to tensor<16x32xi8>
-  return %1 : tensor<16x32xi8>
-}
-
-// -----
-
-// CHECK-LABEL: func @rank_reducing_subtensor_insert_of_cast
-//  CHECK-SAME:   %[[A:.[a-z0-9A-Z_]+]]: tensor<16x32xi8>
-//  CHECK-SAME:   %[[B:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8>
-//       CHECK:   %[[S:.+]] = subtensor_insert %[[A]] into %[[B]][0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<16x32xi8> into tensor<4x6x16x32xi8>
-// Tensor cast is folded away.
-//   CHECK-NOT:   tensor.cast
-//       CHECK:   return %[[S]] : tensor<4x6x16x32xi8>
-func @rank_reducing_subtensor_insert_of_cast(%a : tensor<16x32xi8>, %b : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> {
-  %cast = tensor.cast %a : tensor<16x32xi8> to tensor<?x32xi8>
-  %res = subtensor_insert %cast into %b[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<?x32xi8> into tensor<4x6x16x32xi8>
-  return %res : tensor<4x6x16x32xi8>
-}
-
-// -----
-
-func @subtensor_insert_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
-    %arg2 : index, %arg3 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
-{
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c4 = constant 4 : index
-  %0 = subtensor_insert %arg0 into %arg3[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> into tensor<?x?x?xf32>
-  return %0 : tensor<?x?x?xf32>
-}
-// CHECK-LABEL: func @subtensor_insert_canonicalize
-//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
-//       CHECK:   %[[RESULT:.+]] = subtensor_insert %[[ARG0]]
-//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
-//  CHECK-SAME:      : tensor<?x?x?xf32> into tensor<?x?x?xf32>
-//       CHEKC:   return %[[RESULT]]
-
-// -----
-
-func @subtensor_to_subtensor_insert_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
-    %arg2 : index, %arg3 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
-{
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c4 = constant 4 : index
-  %0 = subtensor %arg0[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
-  %1 = subtensor_insert %0 into %arg3[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> into tensor<?x?x?xf32>
-  return %1 : tensor<?x?x?xf32>
-}
-// CHECK-LABEL: func @subtensor_to_subtensor_insert_canonicalize
-//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
-//  CHECK-SAME:   %[[ARG3:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
-//       CHECK:   %[[SUBTENSOR:.+]] = subtensor %[[ARG0]]
-//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}} [1, 1, 1]
-//  CHECK-SAME:      : tensor<?x?x?xf32> to tensor<4x1x?xf32>
-//       CHECK:   %[[RESULT:.+]] = subtensor_insert %[[SUBTENSOR]]
-//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
-//  CHECK-SAME:      : tensor<4x1x?xf32> into tensor<?x?x?xf32>
-//       CHEKC:   return %[[RESULT]]
-
-// -----
-
-func @rank_reducing_subtensor_insert_canonicalize(%arg0 : tensor<?x?xf32>, %arg1 : index,
-    %arg2 : index, %arg3 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
-{
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c4 = constant 4 : index
-  %0 = subtensor_insert %arg0 into %arg3[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor<?x?xf32> into tensor<?x?x?xf32>
-  return %0 : tensor<?x?x?xf32>
-}
-// CHECK-LABEL: func @rank_reducing_subtensor_insert_canonicalize
-//  CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?xf32>
-//       CHECK:   %[[RESULT:.+]] = subtensor_insert %[[ARG0]]
-//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
-//  CHECK-SAME:      : tensor<?x?xf32> into tensor<?x?x?xf32>
-//       CHEKC:   return %[[RESULT]]
-
-// -----
-
-func @rank_reducing_subtensor_to_subtensor_insert_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
-    %arg2 : index, %arg3 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
-{
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c4 = constant 4 : index
-  %0 = subtensor %arg0[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> to tensor<?x?xf32>
-  %1 = subtensor_insert %0 into %arg3[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor<?x?xf32> into tensor<?x?x?xf32>
-  return %1 : tensor<?x?x?xf32>
-}
-// CHECK-LABEL: func @rank_reducing_subtensor_to_subtensor_insert_canonicalize
-//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
-//  CHECK-SAME:   %[[ARG3:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
-//       CHECK:   %[[SUBTENSOR:.+]] = subtensor %[[ARG0]]
-//  CHECK-SAME:     [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
-//  CHECK-SAME:     : tensor<?x?x?xf32> to tensor<4x?xf32>
-//       CHECK:   %[[RESULT:.+]] = subtensor_insert %[[SUBTENSOR]] into %[[ARG3]]
-//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
-//  CHECK-SAME:      : tensor<4x?xf32> into tensor<?x?x?xf32>
-//       CHEKC:   return %[[RESULT]]
-
-// -----
-
-func @subtensor_insert_output_dest_canonicalize(%arg0 : tensor<2x3xi32>, %arg1 : tensor<i32>) -> tensor<3x9xi32> {
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c2 = constant 2 : index
-  %c9 = constant 9 : index
-  %c3 = constant 3 : index
-  %2 = tensor.extract %arg1[] : tensor<i32>
-  %4 = tensor.generate %c3, %c9 {
-  ^bb0(%arg2: index, %arg3: index):
-    tensor.yield %2 : i32
-  } : tensor<?x?xi32>
-  %5 = subtensor_insert %arg0 into %4[%c0, %c1] [%c2, %c3] [1, 1] : tensor<2x3xi32> into tensor<?x?xi32>
-  %6 = tensor.cast %5 : tensor<?x?xi32> to tensor<3x9xi32>
-  return %6 : tensor<3x9xi32>
-}
-// CHECK-LABEL: func @subtensor_insert_output_dest_canonicalize
-//  CHECK-SAME:   %[[ARG0:[a-zA-z0-9_]+]]: tensor<2x3xi32>
-//  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<i32>
-//       CHECK:   %[[PAD:.+]] = tensor.extract %[[ARG1]]
-//       CHECK:   %[[GENERATE:.+]] = tensor.generate
-//       CHECK:   %[[RESULT:.+]] = subtensor_insert %[[ARG0]] into %[[GENERATE]]
-//       CHECK:   return %[[RESULT]]
-
-// -----
-
 // CHECK-LABEL: @select_same_val
 //       CHECK:   return %arg1
 func @select_same_val(%arg0: i1, %arg1: i64) -> i64 {
diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@@ -263,3 +263,222 @@
   %tensor = tensor.from_elements %c1, %c2, %c1 : tensor<3xindex>
   return %tensor : tensor<3xindex>
 }
+
+// -----
+
+func @slice_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
+    %arg2 : index) -> tensor<?x?x?xf32>
+{
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c4 = constant 4 : index
+  %0 = tensor.extract_slice %arg0[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
+  return %0 : tensor<?x?x?xf32>
+}
+// CHECK-LABEL: func @slice_canonicalize
+//  CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?x?xf32>
+//       CHECK:   %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]][0, %{{[a-zA-Z0-9_]+}}, 1]
+//  CHECK-SAME:      [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1]
+//  CHECK-SAME:      : tensor<?x?x?xf32> to tensor<4x1x?xf32>
+//       CHECK:   %[[RESULT:.+]] = tensor.cast %[[SLICE]]
+//       CHEKC:   return %[[RESULT]]
+
+// -----
+
+func @rank_reducing_slice_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
+    %arg2 : index) -> tensor<?x?xf32>
+{
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c4 = constant 4 : index
+  %0 = tensor.extract_slice %arg0[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> to tensor<?x?xf32>
+  return %0 : tensor<?x?xf32>
+}
+// CHECK-LABEL: func @rank_reducing_slice_canonicalize
+//  CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?x?xf32>
+//       CHECK:   %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]][0, %{{[a-zA-Z0-9_]+}}, 1]
+//  CHECK-SAME:      [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1]
+//  CHECK-SAME:      : tensor<?x?x?xf32> to tensor<4x?xf32>
+//       CHECK:   %[[RESULT:.+]] = tensor.cast %[[SLICE]]
+//       CHEKC:   return %[[RESULT]]
+
+// -----
+
+// CHECK-LABEL: func @trivial_slice
+//  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8>
+//   CHECK-NOT:   tensor.extract_slice
+//       CHECK:   return %[[ARG0]] :  tensor<4x6x16x32xi8>
+func @trivial_slice(%arg0 : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> {
+  %0 = tensor.extract_slice %arg0[0, 0, 0, 0] [4, 6, 16, 32] [1, 1, 1, 1] : tensor<4x6x16x32xi8> to tensor<4x6x16x32xi8>
+  return %0 : tensor<4x6x16x32xi8>
+}
+
+// -----
+
+// CHECK-LABEL: func @trivial_insert_slice
+//  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8>
+//   CHECK-NOT:   tensor.extract_slice
+//       CHECK:   return %[[ARG0]] :  tensor<4x6x16x32xi8>
+func @trivial_insert_slice(%arg0 : tensor<4x6x16x32xi8>, %arg1 : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> {
+  %0 = tensor.insert_slice %arg0 into %arg1[0, 0, 0, 0] [4, 6, 16, 32] [1, 1, 1, 1] : tensor<4x6x16x32xi8> into tensor<4x6x16x32xi8>
+  return %0 : tensor<4x6x16x32xi8>
+}
+
+// -----
+
+// CHECK-LABEL: func @rank_reducing_tensor_of_cast
+//  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8>
+//       CHECK:   %[[S:.+]] = tensor.extract_slice %arg0[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<4x6x16x32xi8> to tensor<16x32xi8>
+// Tensor cast is moved after slice and then gets canonicalized away.
+//   CHECK-NOT:   tensor.cast
+//       CHECK:   return %[[S]] : tensor<16x32xi8>
+func @rank_reducing_tensor_of_cast(%arg : tensor<4x6x16x32xi8>) -> tensor<16x32xi8> {
+  %0 = tensor.cast %arg : tensor<4x6x16x32xi8> to tensor<?x?x16x32xi8>
+  %1 = tensor.extract_slice %0[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<?x?x16x32xi8> to tensor<16x32xi8>
+  return %1 : tensor<16x32xi8>
+}
+
+// -----
+
+// CHECK-LABEL: func @rank_reducing_insert_slice_of_cast
+//  CHECK-SAME:   %[[A:.[a-z0-9A-Z_]+]]: tensor<16x32xi8>
+//  CHECK-SAME:   %[[B:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8>
+//       CHECK:   %[[S:.+]] = tensor.insert_slice %[[A]] into %[[B]][0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<16x32xi8> into tensor<4x6x16x32xi8>
+// Tensor cast is folded away.
+//   CHECK-NOT:   tensor.cast
+//       CHECK:   return %[[S]] : tensor<4x6x16x32xi8>
+func @rank_reducing_insert_slice_of_cast(%a : tensor<16x32xi8>, %b : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> {
+  %cast = tensor.cast %a : tensor<16x32xi8> to tensor<?x32xi8>
+  %res = tensor.insert_slice %cast into %b[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<?x32xi8> into tensor<4x6x16x32xi8>
+  return %res : tensor<4x6x16x32xi8>
+}
+
+// -----
+
+func @insert_slice_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
+    %arg2 : index, %arg3 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+{
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c4 = constant 4 : index
+  %0 = tensor.insert_slice %arg0 into %arg3[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> into tensor<?x?x?xf32>
+  return %0 : tensor<?x?x?xf32>
+}
+// CHECK-LABEL: func @insert_slice_canonicalize
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
+//       CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[ARG0]]
+//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
+//  CHECK-SAME:      : tensor<?x?x?xf32> into tensor<?x?x?xf32>
+//       CHEKC:   return %[[RESULT]]
+
+// -----
+
+func @slice_to_insert_slice_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
+    %arg2 : index, %arg3 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+{
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c4 = constant 4 : index
+  %0 = tensor.extract_slice %arg0[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
+  %1 = tensor.insert_slice %0 into %arg3[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> into tensor<?x?x?xf32>
+  return %1 : tensor<?x?x?xf32>
+}
+// CHECK-LABEL: func @slice_to_insert_slice_canonicalize
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
+//  CHECK-SAME:   %[[ARG3:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
+//       CHECK:   %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]]
+//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}} [1, 1, 1]
+//  CHECK-SAME:      : tensor<?x?x?xf32> to tensor<4x1x?xf32>
+//       CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[SLICE]]
+//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
+//  CHECK-SAME:      : tensor<4x1x?xf32> into tensor<?x?x?xf32>
+//       CHEKC:   return %[[RESULT]]
+
+// -----
+
+func @rank_reducing_insert_slice_canonicalize(%arg0 : tensor<?x?xf32>, %arg1 : index,
+    %arg2 : index, %arg3 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+{
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c4 = constant 4 : index
+  %0 = tensor.insert_slice %arg0 into %arg3[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor<?x?xf32> into tensor<?x?x?xf32>
+  return %0 : tensor<?x?x?xf32>
+}
+// CHECK-LABEL: func @rank_reducing_insert_slice_canonicalize
+//  CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?xf32>
+//       CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[ARG0]]
+//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
+//  CHECK-SAME:      : tensor<?x?xf32> into tensor<?x?x?xf32>
+//       CHEKC:   return %[[RESULT]]
+
+// -----
+
+func @rank_reducing_slice_to_insert_slice_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
+    %arg2 : index, %arg3 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+{
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c4 = constant 4 : index
+  %0 = tensor.extract_slice %arg0[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> to tensor<?x?xf32>
+  %1 = tensor.insert_slice %0 into %arg3[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor<?x?xf32> into tensor<?x?x?xf32>
+  return %1 : tensor<?x?x?xf32>
+}
+// CHECK-LABEL: func @rank_reducing_slice_to_insert_slice_canonicalize
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
+//  CHECK-SAME:   %[[ARG3:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
+//       CHECK:   %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]]
+//  CHECK-SAME:     [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
+//  CHECK-SAME:     : tensor<?x?x?xf32> to tensor<4x?xf32>
+//       CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[SLICE]] into %[[ARG3]]
+//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
+//  CHECK-SAME:      : tensor<4x?xf32> into tensor<?x?x?xf32>
+//       CHEKC:   return %[[RESULT]]
+
+// -----
+
+func @insert_slice_propagate_dest_cast(%arg0 : tensor<2x?xi32>, %arg1 : tensor<i32>,
+    %arg2 : index, %arg3 : index) -> tensor<?x?xi32> {
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c2 = constant 2 : index
+  %c8 = constant 8 : index
+  %0 = memref.dim %arg0, %c1 : tensor<2x?xi32>
+  %1 = tensor.extract %arg1[] : tensor<i32>
+  %2 = tensor.generate %arg2, %c8 {
+  ^bb0(%arg4: index, %arg5: index):
+    tensor.yield %1 : i32
+  } : tensor<?x?xi32>
+  %3 = tensor.insert_slice %arg0 into %2[%c0, %arg3] [%c2, %0] [%c1, %c1] : tensor<2x?xi32> into tensor<?x?xi32>
+  return %3 : tensor<?x?xi32>
+}
+// CHECK-LABEL: func @insert_slice_propagate_dest_cast
+//       CHECK:   %[[UPDATED:.+]] = tensor.insert_slice %{{.+}} into %{{.+}}[0, %{{.+}}] [2, %{{.+}}] [1, 1]
+//  CHECK-SAME:     tensor<2x?xi32> into tensor<?x8xi32>
+//       CHECK:   %[[CAST:.+]] = tensor.cast %[[UPDATED]]
+//       CHECK:   return %[[CAST]]
+
+// -----
+
+func @insert_slice_output_dest_canonicalize(%arg0 : tensor<2x3xi32>, %arg1 : tensor<i32>) -> tensor<3x9xi32> {
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c2 = constant 2 : index
+  %c9 = constant 9 : index
+  %c3 = constant 3 : index
+  %2 = tensor.extract %arg1[] : tensor<i32>
+  %4 = tensor.generate %c3, %c9 {
+  ^bb0(%arg2: index, %arg3: index):
+    tensor.yield %2 : i32
+  } : tensor<?x?xi32>
+  %5 = tensor.insert_slice %arg0 into %4[%c0, %c1] [%c2, %c3] [1, 1] : tensor<2x3xi32> into tensor<?x?xi32>
+  %6 = tensor.cast %5 : tensor<?x?xi32> to tensor<3x9xi32>
+  return %6 : tensor<3x9xi32>
+}
+// CHECK-LABEL: func @insert_slice_output_dest_canonicalize
+//  CHECK-SAME:   %[[ARG0:[a-zA-z0-9_]+]]: tensor<2x3xi32>
+//  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<i32>
+//       CHECK:   %[[PAD:.+]] = tensor.extract %[[ARG1]]
+//       CHECK:   %[[GENERATE:.+]] = tensor.generate
+//       CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[ARG0]] into %[[GENERATE]]
+//       CHECK:   return %[[RESULT]]
diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir
--- a/mlir/test/IR/core-ops.mlir
+++ b/mlir/test/IR/core-ops.mlir
@@ -825,31 +825,31 @@
   return
 }
 
-// CHECK-LABEL: func @subtensor({{.*}}) {
-func @subtensor(%t: tensor<8x16x4xf32>, %idx : index) {
+// CHECK-LABEL: func @slice({{.*}}) {
+func @slice(%t: tensor<8x16x4xf32>, %idx : index) {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
 
-  // CHECK: subtensor
+  // CHECK: tensor.extract_slice
   // CHECK-SAME: tensor<8x16x4xf32> to tensor<?x?x?xf32>
-  %1 = subtensor %t[%c0, %c0, %c0][%idx, %idx, %idx][%c1, %c1, %c1]
+  %1 = tensor.extract_slice %t[%c0, %c0, %c0][%idx, %idx, %idx][%c1, %c1, %c1]
     : tensor<8x16x4xf32> to tensor<?x?x?xf32>
 
-  // CHECK: subtensor
+  // CHECK: tensor.extract_slice
   // CHECK-SAME: tensor<8x16x4xf32> to tensor<4x4x4xf32>
-  %2 = subtensor %t[0, 2, 0][4, 4, 4][1, 1, 1]
+  %2 = tensor.extract_slice %t[0, 2, 0][4, 4, 4][1, 1, 1]
     : tensor<8x16x4xf32> to tensor<4x4x4xf32>
 
-  // CHECK: subtensor
+  // CHECK: tensor.extract_slice
   // CHECK-SAME: tensor<8x16x4xf32> to tensor<4x4xf32>
-  %3 = subtensor %t[0, 2, 0][4, 1, 4][1, 1, 1]
+  %3 = tensor.extract_slice %t[0, 2, 0][4, 1, 4][1, 1, 1]
     : tensor<8x16x4xf32> to tensor<4x4xf32>
 
   return
 }
 
-// CHECK-LABEL: func @subtensor_insert({{.*}}) {
-func @subtensor_insert(
+// CHECK-LABEL: func @insert_slice({{.*}}) {
+func @insert_slice(
     %t: tensor<8x16x4xf32>,
     %t2: tensor<16x32x8xf32>,
     %t3: tensor<4x4xf32>,
@@ -857,19 +857,19 @@
   %c0 = constant 0 : index
   %c1 = constant 1 : index
 
-  // CHECK: subtensor_insert
+  // CHECK: tensor.insert_slice
   // CHECK-SAME: tensor<8x16x4xf32> into tensor<16x32x8xf32>
-  %1 = subtensor_insert %t into %t2[%c0, %c0, %c0][%idx, %idx, %idx][%c1, %c1, %c1]
+  %1 = tensor.insert_slice %t into %t2[%c0, %c0, %c0][%idx, %idx, %idx][%c1, %c1, %c1]
     : tensor<8x16x4xf32> into tensor<16x32x8xf32>
 
-  // CHECK: subtensor_insert
+  // CHECK: tensor.insert_slice
   // CHECK-SAME: tensor<8x16x4xf32> into tensor<16x32x8xf32>
-  %2 = subtensor_insert %t into %t2[%c0, %idx, %c0][%idx, 4, %idx][%c1, 1, %c1]
+  %2 = tensor.insert_slice %t into %t2[%c0, %idx, %c0][%idx, 4, %idx][%c1, 1, %c1]
     : tensor<8x16x4xf32> into tensor<16x32x8xf32>
 
-  // CHECK: subtensor_insert
+  // CHECK: tensor.insert_slice
   // CHECK-SAME: tensor<4x4xf32> into tensor<8x16x4xf32>
-  %3 = subtensor_insert %t3 into %t[0, 2, 0][4, 1, 4][1, 1, 1]
+  %3 = tensor.insert_slice %t3 into %t[0, 2, 0][4, 1, 4][1, 1, 1]
     : tensor<4x4xf32> into tensor<8x16x4xf32>
 
   return
diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir
--- a/mlir/test/IR/invalid-ops.mlir
+++ b/mlir/test/IR/invalid-ops.mlir
@@ -1214,9 +1214,9 @@
 
 // -----
 
-func @subtensor_wrong_dynamic_type(%t: tensor<8x16x4xf32>, %idx : index) {
+func @slice_wrong_dynamic_type(%t: tensor<8x16x4xf32>, %idx : index) {
       // expected-error @+1 {{expected result type to be 'tensor<4x4x4xf32>' or a rank-reduced version. (mismatch of result sizes)}}
-  %0 = subtensor %t[0, 2, 0][4, 4, 4][1, 1, 1]
+  %0 = tensor.extract_slice %t[0, 2, 0][4, 4, 4][1, 1, 1]
     : tensor<8x16x4xf32> to tensor<?x4x4xf32>
 
   return
@@ -1224,9 +1224,9 @@
 
 // -----
 
-func @subtensor_wrong_static_type(%t: tensor<8x16x4xf32>, %idx : index) {
+func @slice_wrong_static_type(%t: tensor<8x16x4xf32>, %idx : index) {
       // expected-error @+1 {{expected result type to be 'tensor<?x3x?xf32>' or a rank-reduced version. (mismatch of result sizes)}}
-  %0 = subtensor %t[0, 0, 0][%idx, 3, %idx][1, 1, 1]
+  %0 = tensor.extract_slice %t[0, 0, 0][%idx, 3, %idx][1, 1, 1]
     : tensor<8x16x4xf32> to tensor<4x4x4xf32>
 
   return
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir
@@ -10,12 +10,12 @@
   %const = constant dense<10.0> : tensor<2xf32>
   %insert_val = constant dense<20.0> : tensor<1xf32>
 
-  // Both of these subtensor_insert ops insert into the same original tensor
+  // Both of these insert_slice ops insert into the same original tensor
   // value `%const`. This can easily cause bugs if at the memref level
   // we attempt to write in-place into the memref that %const has been
   // converted into.
-  %inserted_at_position_0 = subtensor_insert %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32>
-  %inserted_at_position_1 = subtensor_insert %insert_val into %const[1][1][1] : tensor<1xf32> into tensor<2xf32>
+  %inserted_at_position_0 = tensor.insert_slice %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32>
+  %inserted_at_position_1 = tensor.insert_slice %insert_val into %const[1][1][1] : tensor<1xf32> into tensor<2xf32>
 
   %unranked_at_position_0 = tensor.cast %inserted_at_position_0 : tensor<2xf32> to tensor<*xf32>
   call @print_memref_f32(%unranked_at_position_0) : (tensor<*xf32>) -> ()
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir
@@ -9,7 +9,7 @@
 func @main() {
   %const = constant dense<10.0> : tensor<2xf32>
   %insert_val = constant dense<20.0> : tensor<1xf32>
-  %inserted = subtensor_insert %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32>
+  %inserted = tensor.insert_slice %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32>
 
   %unranked = tensor.cast %inserted : tensor<2xf32> to tensor<*xf32>
   call @print_memref_f32(%unranked) : (tensor<*xf32>) -> ()
diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -1065,9 +1065,9 @@
 
 // -----
 
-// CHECK-LABEL: func @subtensor
+// CHECK-LABEL: func @slice
 // CHECK-SAME: %[[ARG0:[0-9a-z]*]]: index, %[[ARG1:[0-9a-z]*]]: index
-func @subtensor(%t: tensor<8x16x4xf32>, %arg0 : index, %arg1 : index)
+func @slice(%t: tensor<8x16x4xf32>, %arg0 : index, %arg1 : index)
   -> tensor<?x?x?xf32>
 {
   %c0 = constant 0 : index
@@ -1076,18 +1076,18 @@
   %c7 = constant 7 : index
   %c11 = constant 11 : index
 
-  // CHECK: subtensor %{{.*}}[0, 0, 0] [7, 11, 2] [1, 1, 1] :
+  // CHECK: tensor.extract_slice %{{.*}}[0, 0, 0] [7, 11, 2] [1, 1, 1] :
   // CHECK-SAME: tensor<8x16x4xf32> to tensor<7x11x2xf32>
   // tensor.cast gets folded away in consumer.
   //  CHECK-NOT: tensor.cast
-  %1 = subtensor %t[%c0, %c0, %c0] [%c7, %c11, %c2] [%c1, %c1, %c1]
+  %1 = tensor.extract_slice %t[%c0, %c0, %c0] [%c7, %c11, %c2] [%c1, %c1, %c1]
     : tensor<8x16x4xf32> to tensor<?x?x?xf32>
 
-  // Test: subtensor with one dynamic operand can also be folded.
-  // CHECK: subtensor %{{.*}}[0, 0, 0] [2, %[[ARG0]], 2] [1, 1, 1] :
+  // Test: slice with one dynamic operand can also be folded.
+  // CHECK: tensor.extract_slice %{{.*}}[0, 0, 0] [2, %[[ARG0]], 2] [1, 1, 1] :
   // CHECK-SAME: tensor<7x11x2xf32> to tensor<2x?x2xf32>
   // CHECK: tensor.cast %{{.*}} : tensor<2x?x2xf32> to tensor<?x?x?xf32>
-  %2 = subtensor %1[%c0, %c0, %c0] [%c2, %arg0, %c2] [%c1, %c1, %c1]
+  %2 = tensor.extract_slice %1[%c0, %c0, %c0] [%c2, %arg0, %c2] [%c1, %c1, %c1]
     : tensor<?x?x?xf32> to tensor<?x?x?xf32>
 
   return %2 : tensor<?x?x?xf32>
diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
@@ -529,9 +529,9 @@
   (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
 }
 
-static void applySubTensorOfPadTensorSwapPattern(FuncOp funcOp) {
+static void applyExtractSliceOfPadTensorSwapPattern(FuncOp funcOp) {
   RewritePatternSet patterns(funcOp.getContext());
-  patterns.add<SubTensorOfPadTensorSwapPattern>(funcOp.getContext());
+  patterns.add<ExtractSliceOfPadTensorSwapPattern>(funcOp.getContext());
   (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
 }
 
@@ -614,7 +614,7 @@
   if (testTransformPadTensor)
     return applyPadTensorToGenericPatterns(getFunction());
   if (testSwapSubTensorPadTensor)
-    return applySubTensorOfPadTensorSwapPattern(getFunction());
+    return applyExtractSliceOfPadTensorSwapPattern(getFunction());
   if (testAffineMinSCFCanonicalizationPatterns)
     return applyAffineMinSCFCanonicalizationPatterns(getFunction());
   if (testTileAndPadPattern)