diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir
--- a/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir
+++ b/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir
@@ -1,4 +1,12 @@
-// RUN: mlir-opt %s -std-bufferize -linalg-bufferize -func-bufferize -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -linalg-bufferize -std-bufferize -func-bufferize \
+// RUN: -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-cpu-runner -e main -entry-point-result=void \
+// RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
+// RUN: | FileCheck %s
+
+// RUN: mlir-opt %s  -linalg-tile="linalg-tile-sizes=1,2,3" -linalg-bufferize \
+// RUN: -scf-bufferize -std-bufferize -func-bufferize -convert-linalg-to-loops \
+// RUN:  -convert-scf-to-std -convert-linalg-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
@@ -228,9 +228,95 @@
     return success();
   }
 };
-} // namespace
 
-namespace {
+// Extract int64_t values from the assumed ArrayAttr of IntegerAttr.
+static SmallVector<int64_t, 4> extractFromI64ArrayAttr(Attribute attr) {
+  return llvm::to_vector<4>(
+      llvm::map_range(attr.cast<ArrayAttr>(), [](Attribute a) -> int64_t {
+        return a.cast<IntegerAttr>().getInt();
+      }));
+}
+
+/// Convert `subtensor %t [offsets][sizes][strides] -> %st` to an alloc + copy
+/// pattern:
+/// ```
+///   %a = alloc(sizes)
+///   %sv = subview tensor_to_memref(%t) [offsets][sizes][strides]
+///   linalg_copy(%sv, %a)
+/// ```
+///
+/// This pattern is arguable a std pattern once linalg::CopyOp becomes
+/// std::CopyOp.
+class SubTensorOpConverter : public OpConversionPattern<SubTensorOp> {
+public:
+  using OpConversionPattern<SubTensorOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(SubTensorOp op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    SubTensorOpAdaptor adaptor(operands,
+                               op.getOperation()->getAttrDictionary());
+    Value sourceMemref = adaptor.source();
+    assert(sourceMemref.getType().isa<MemRefType>());
+
+    MemRefType subviewMemRefType =
+        getTypeConverter()->convertType(op.getType()).cast<MemRefType>();
+    // op.sizes() capture exactly the dynamic alloc operands matching the
+    // subviewMemRefType thanks to subview/subtensor canonicalization and
+    // verification.
+    Value alloc =
+        rewriter.create<AllocOp>(op.getLoc(), subviewMemRefType, op.sizes());
+    Value subView = rewriter.create<SubViewOp>(
+        op.getLoc(), sourceMemref, extractFromI64ArrayAttr(op.static_offsets()),
+        extractFromI64ArrayAttr(op.static_sizes()),
+        extractFromI64ArrayAttr(op.static_strides()), op.offsets(), op.sizes(),
+        op.strides());
+    rewriter.create<linalg::CopyOp>(op.getLoc(), subView, alloc);
+    rewriter.replaceOp(op, alloc);
+    return success();
+  }
+};
+
+/// Convert `subtensor_insert %source into %dest [offsets][sizes][strides] ->
+/// %t` to an tensor_to_memref + subview + copy + tensor_load pattern:
+/// ```
+///   %m = tensor_to_memref(%dest)
+///   %sv = subview %m [offsets][sizes][strides]
+///   linalg_copy(tensor_to_memref(%source), %sv)
+///   %res = tensor_load(%m)
+/// ```
+///
+/// This pattern is arguable a std pattern once linalg::CopyOp becomes
+/// std::CopyOp.
+class SubTensorInsertOpConverter
+    : public OpConversionPattern<SubTensorInsertOp> {
+public:
+  using OpConversionPattern<SubTensorInsertOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(SubTensorInsertOp op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    SubTensorInsertOpAdaptor adaptor(operands,
+                                     op.getOperation()->getAttrDictionary());
+    Value sourceMemRef = adaptor.source();
+    assert(sourceMemRef.getType().isa<MemRefType>());
+
+    Value destMemRef = adaptor.dest();
+    assert(destMemRef.getType().isa<MemRefType>());
+
+    // Take a subview to copy the small memref.
+    Value subview = rewriter.create<SubViewOp>(
+        op.getLoc(), destMemRef, extractFromI64ArrayAttr(op.static_offsets()),
+        extractFromI64ArrayAttr(op.static_sizes()),
+        extractFromI64ArrayAttr(op.static_strides()), adaptor.offsets(),
+        adaptor.sizes(), adaptor.strides());
+    // Copy the small memref.
+    rewriter.create<linalg::CopyOp>(op.getLoc(), sourceMemRef, subview);
+    rewriter.replaceOp(op, destMemRef);
+    return success();
+  }
+};
+
 /// TensorConstantOp conversion inserts a linearized 1-D vector constant that is
 /// stored in memory. A linalg.reshape is introduced to convert to the desired
 /// n-D buffer form.
@@ -287,28 +373,27 @@
 } // namespace
 
 namespace {
-
 /// Converts Linalg operations that work on tensor-type operands or results to
 /// work on buffers.
 struct LinalgBufferizePass : public LinalgBufferizeBase<LinalgBufferizePass> {
   void runOnOperation() override {
     MLIRContext &context = getContext();
     ConversionTarget target(context);
-    BufferizeTypeConverter converter;
+    BufferizeTypeConverter typeConverter;
 
     // Mark all Standard operations legal.
-    // TODO: Remove after TensorConstantOpConverter moves to std-bufferize.
     target.addLegalDialect<StandardOpsDialect, vector::VectorDialect>();
+    target.addIllegalOp<SubTensorOp, SubTensorInsertOp>();
 
     // Mark all Linalg operations illegal as long as they work on tensors.
     auto isLegalOperation = [&](Operation *op) {
-      return converter.isLegal(op);
+      return typeConverter.isLegal(op);
     };
     target.addDynamicallyLegalDialect<linalg::LinalgDialect>(isLegalOperation);
     target.addDynamicallyLegalOp<ConstantOp>(isLegalOperation);
 
     OwningRewritePatternList patterns;
-    populateLinalgBufferizePatterns(&context, converter, patterns);
+    populateLinalgBufferizePatterns(&context, typeConverter, patterns);
     if (failed(applyPartialConversion(getOperation(), target,
                                       std::move(patterns))))
       signalPassFailure();
@@ -319,10 +404,17 @@
 std::unique_ptr<OperationPass<ModuleOp>> mlir::createLinalgBufferizePass() {
   return std::make_unique<LinalgBufferizePass>();
 }
+
 void mlir::linalg::populateLinalgBufferizePatterns(
-    MLIRContext *context, BufferizeTypeConverter &converter,
+    MLIRContext *context, BufferizeTypeConverter &typeConverter,
     OwningRewritePatternList &patterns) {
-
-  patterns.insert<BufferizeAnyLinalgOp>(converter);
-  patterns.insert<TensorConstantOpConverter>(converter, context);
+  patterns.insert<BufferizeAnyLinalgOp>(typeConverter);
+  // TODO: Drop this once tensor constants work in standard.
+  patterns.insert<
+      // clang-format off
+      SubTensorOpConverter,
+      SubTensorInsertOpConverter,
+      TensorConstantOpConverter
+      // clang-format on
+      >(typeConverter, context);
 }
diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir
--- a/mlir/test/Dialect/Linalg/bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/bufferize.mlir
@@ -147,3 +147,74 @@
 
   return %0 : tensor<3x2xf32>
 }
+
+// -----
+
+// CHECK-DAG: #[[$MAP0:[0-9a-z]*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
+// CHECK-DAG: #[[$MAP1:[0-9a-z]*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1 * 2)>
+
+func @make_index() -> index
+
+// CHECK-LABEL: func @bufferize_subtensor(
+//  CHECK-SAME:   %[[T:[0-9a-z]*]]: tensor<?x?xf32>
+func @bufferize_subtensor(%t : tensor<?x?xf32>) -> (tensor<2x3xf32>, tensor<2x?xf32>) {
+  //      CHECK: %[[IDX:.*]] = call @make_index() : () -> index
+  %i0 = call @make_index() : () -> index
+
+  //      CHECK: %[[M0:.*]] = tensor_to_memref %[[T]] : memref<?x?xf32>
+  // CHECK-NEXT: %[[A0:.*]] = alloc() : memref<2x3xf32>
+  // CHECK-NEXT: %[[SM0:.*]] = subview %[[M0]][0, 0] [2, 3] [1, 1]
+  // CHECK-SAME:   memref<?x?xf32> to memref<2x3xf32, #[[$MAP0]]>
+  // CHECK-NEXT: linalg.copy(%[[SM0]], %[[A0]]) : memref<2x3xf32, #[[$MAP0]]>, memref<2x3xf32>
+  // CHECK-NEXT: %[[RT0:.*]] = tensor_load %[[A0]] : memref<2x3xf32>
+  %st0 = subtensor %t[0, 0][2, 3][1, 1] : tensor<?x?xf32> to tensor<2x3xf32>
+
+  //      CHECK: %[[M1:.*]] = tensor_to_memref %[[T]] : memref<?x?xf32>
+  // CHECK-NEXT: %[[A1:.*]] = alloc(%[[IDX]]) : memref<2x?xf32>
+  // CHECK-NEXT: %[[SM1:.*]] = subview %[[M1]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2]
+  // CHECK-SAME:   memref<?x?xf32> to memref<2x?xf32, #[[$MAP1]]>
+  // CHECK-NEXT: linalg.copy(%[[SM1]], %[[A1]]) : memref<2x?xf32, #[[$MAP1]]>, memref<2x?xf32>
+  // CHECK-NEXT: %[[RT1:.*]] = tensor_load %[[A1]] : memref<2x?xf32>
+  %st1 = subtensor %t[0, %i0][2, %i0][1, 2] : tensor<?x?xf32> to tensor<2x?xf32>
+
+  // CHECK-NEXT: return %[[RT0]], %[[RT1]]
+  return %st0, %st1 : tensor<2x3xf32>, tensor<2x?xf32>
+}
+
+// -----
+
+// CHECK-DAG: #[[$MAP0:[0-9a-z]*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
+// CHECK-DAG: #[[$MAP1:[0-9a-z]*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1 * 2)>
+
+func @make_index() -> index
+
+// CHECK-LABEL: func @bufferize_subtensor_insert(
+//  CHECK-SAME:   %[[T:[0-9a-z]*]]: tensor<?x?xf32>
+//  CHECK-SAME:   %[[ST0:[0-9a-z]*]]: tensor<2x3xf32>
+//  CHECK-SAME:   %[[ST1:[0-9a-z]*]]: tensor<2x?xf32>
+func @bufferize_subtensor_insert(%t : tensor<?x?xf32>, %st0 : tensor<2x3xf32>, %st1 : tensor<2x?xf32>) ->
+    (tensor<?x?xf32>, tensor<?x?xf32>) {
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  //      CHECK: %[[IDX:.*]] = call @make_index() : () -> index
+  %i0 = call @make_index() : () -> index
+
+  //  CHECK-DAG: %[[M0:.*]] = tensor_to_memref %[[T]] : memref<?x?xf32>
+  //  CHECK-DAG: %[[SM0:.*]] = tensor_to_memref %[[ST0]] : memref<2x3xf32>
+  // CHECK-NEXT: %[[SUBVIEW0:.*]] = subview %[[M0]][0, 0] [2, 3] [1, 1]
+  // CHECK-SAME:   memref<?x?xf32> to memref<2x3xf32, #[[$MAP0]]>
+  // CHECK-NEXT: linalg.copy(%[[SM0]], %[[SUBVIEW0]]) : memref<2x3xf32>, memref<2x3xf32, #[[$MAP0]]>
+  // CHECK-NEXT: %[[RT0:.*]] = tensor_load %[[M0]] : memref<?x?xf32>
+  %t0 = subtensor_insert %st0 into %t[0, 0][2, 3][1, 1] : tensor<2x3xf32> into tensor<?x?xf32>
+
+  //  CHECK-DAG: %[[M1:.*]] = tensor_to_memref %[[T]] : memref<?x?xf32>
+  //  CHECK-DAG: %[[SM1:.*]] = tensor_to_memref %[[ST1]] : memref<2x?xf32>
+  // CHECK-NEXT: %[[SUBVIEW1:.*]] = subview %[[M1]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2]
+  // CHECK-SAME:   memref<?x?xf32> to memref<2x?xf32, #[[$MAP1]]>
+  // CHECK-NEXT: linalg.copy(%[[SM1]], %[[SUBVIEW1]]) : memref<2x?xf32>, memref<2x?xf32, #[[$MAP1]]>
+  // CHECK-NEXT: %[[RT1:.*]] = tensor_load %[[M1]] : memref<?x?xf32>
+  %t1 = subtensor_insert %st1 into %t[0, %i0][2, %i0][1, 2] : tensor<2x?xf32> into tensor<?x?xf32>
+
+  //     CHECK: return %[[RT0]], %[[RT1]]
+  return %t0, %t1: tensor<?x?xf32>, tensor<?x?xf32>
+}