diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h
--- a/mlir/include/mlir/Dialect/Linalg/Passes.h
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.h
@@ -38,11 +38,6 @@
 
 std::unique_ptr<Pass> createLinalgNamedOpConversionPass();
 
-std::unique_ptr<OperationPass<func::FuncOp>>
-createLinalgTilingPass(ArrayRef<int64_t> tileSizes = {},
-                       linalg::LinalgTilingLoopType loopType =
-                           linalg::LinalgTilingLoopType::Loops);
-
 std::unique_ptr<OperationPass<func::FuncOp>>
 createLinalgInlineScalarOperandsPass();
 
diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td
--- a/mlir/include/mlir/Dialect/Linalg/Passes.td
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.td
@@ -102,22 +102,6 @@
   ];
 }
 
-def LinalgTilingPass : Pass<"linalg-tile", "func::FuncOp"> {
-  let summary = "Tile operations in the linalg dialect";
-  let constructor = "mlir::createLinalgTilingPass()";
-  let dependentDialects = [
-    "AffineDialect",
-    "linalg::LinalgDialect",
-    "memref::MemRefDialect",
-    "scf::SCFDialect"
-  ];
-  let options = [
-    ListOption<"tileSizes", "tile-sizes", "int64_t", "Tile sizes">,
-    Option<"loopType", "loop-type", "std::string", /*default=*/"\"for\"",
-           "Specify the type of loops to generate: for, parallel">
-  ];
-}
-
 def LinalgGeneralization : Pass<"linalg-generalize-named-ops", "func::FuncOp"> {
   let summary = "Convert named ops into generic ops";
   let constructor = "mlir::createLinalgGeneralizationPass()";
@@ -162,19 +146,6 @@
   ];
 }
 
-def LinalgStrategyTilePass
-    : Pass<"linalg-strategy-tile-pass", "func::FuncOp"> {
-  let summary = "Configurable pass to apply pattern-based linalg tiling.";
-  let constructor = "mlir::createLinalgStrategyTilePass()";
-  let dependentDialects = ["linalg::LinalgDialect"];
-  let options = [
-    Option<"anchorFuncName", "anchor-func", "std::string", /*default=*/"",
-      "Which func op is the anchor to latch on.">,
-    Option<"anchorOpName", "anchor-op", "std::string", /*default=*/"",
-      "Which linalg op within the func is the anchor to latch on.">,
-  ];
-}
-
 def LinalgStrategyRemoveMarkersPass
     : Pass<"linalg-strategy-remove-markers-pass", "func::FuncOp"> {
   let summary = "Cleanup pass that drops markers.";
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -732,6 +732,63 @@
   }];
 }
 
+def TileToScfForOp : Op<Transform_Dialect, "structured.tile_to_scf_for",
+       [DeclareOpInterfaceMethods<TransformOpInterface>,
+        DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
+  let description = [{
+    Indicates that the given `target` op should be tiled with the given sizes.
+    This transform generates a loop nest with a smaller ("tiled") target
+    operation in its body. The target must implement TilingInterface.
+
+    Tile sizes may be known at transformation time, in which case they are
+    expected to be provided in the `static_size` attribute, or not, in which
+    case the tile value must be computed by the payload IR and the handle to the
+    operation computing it must be provided through `dynamic_sizes`. When the
+    sizes are not known statically, the corresponding entry in the
+    `static_sizes` attribute must be set to `ShapedType::kDynamicSize`. Only
+    the dynamic sizes must be provided in `dynamic_sizes`, i.e., there should
+    be as many handles as `ShapedType::kDynamicSize` values in the
+    `static_sizes` attribute. A static size of `0` indicates that the dimension
+    should not be tiled. No loop will be generated for such dimensions. If all
+    tile sizes are `0`, this transform is effectively a no-op.
+
+    This op returns handles to the tiled op (in the generated loop nest) and the
+    generated loops. The number of loops is the number of tile sizes that are
+    statically known to be non-zero.
+
+    #### Return modes
+
+    On success, the resulting handles are associated with co-indexed lists of
+    tiled operations and loops around them.
+
+    This operation only supports TilingInterface ops and produces a silenceable
+    failure if the input contains any non-TilingInterface ops. The ops preceding
+    it in the list associated with the `target` handle will have been tiled.
+
+    This operation produces a silenceable failure if the `dynamic_sizes` handles
+    are associated with lists of payload operations of a size different than
+    that of the list associated with the `target` handle.
+
+    If the internal implementation of tiling for any of the operations fails,
+    produces a definite failure.
+  }];
+
+  let arguments = (ins PDL_Operation:$target,
+                   Variadic<PDL_Operation>:$dynamic_sizes,
+                   DefaultValuedAttr<I64ArrayAttr, "{}">:$static_sizes,
+                   DefaultValuedAttr<I64ArrayAttr, "{}">:$interchange);
+  let results = (outs PDL_Operation:$tiled_linalg_op,
+                      Variadic<PDL_Operation>:$loops);
+
+  let hasCustomAssemblyFormat = 1;
+
+  let extraClassDeclaration = [{
+    /// Returns the list of tile sizes, which may be static (Attribute) or
+    /// dynamic (Value).
+    SmallVector<OpFoldResult> getMixedSizes();
+  }];
+}
+
 def VectorizeOp : Op<Transform_Dialect, "structured.vectorize",
     [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
      TransformEachOpTrait, TransformOpInterface]> {
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h b/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h
@@ -30,41 +30,8 @@
   LinalgTransformationFilter::FilterFunction filter = nullptr;
 };
 
-/// Represent one application of LinalgStrategyTilePass.
-struct Tile : public Transformation {
-  Tile(StringRef name, linalg::LinalgTilingOptions options,
-       LinalgTransformationFilter::FilterFunction f = nullptr)
-      : Transformation(std::move(f)), opName(name),
-        options(std::move(options)) {}
-
-  void addToPassPipeline(OpPassManager &pm,
-                         LinalgTransformationFilter m) const override {
-    pm.addPass(createLinalgStrategyTilePass(opName, options, m));
-  }
-
-private:
-  std::string opName;
-  linalg::LinalgTilingOptions options;
-};
-
 /// Codegen strategy controls how a Linalg op is progressively lowered.
 struct CodegenStrategy {
-  /// Append a pattern to add a level of tiling for Op `opName` with tiling
-  /// `options`.
-  CodegenStrategy &
-  tile(StringRef opName, const linalg::LinalgTilingOptions &options,
-       const LinalgTransformationFilter::FilterFunction &f = nullptr) {
-    transformationSequence.emplace_back(
-        std::make_unique<Tile>(opName, options, f));
-    return *this;
-  }
-  /// Conditionally append a pattern to add a level of tiling for
-  /// `LinalgOpType` with tiling `options`.
-  CodegenStrategy &
-  tileIf(bool b, StringRef opName, linalg::LinalgTilingOptions options,
-         LinalgTransformationFilter::FilterFunction f = nullptr) {
-    return b ? tile(opName, std::move(options), std::move(f)) : *this;
-  }
   /// Configure the post staged-patterns global enabling passes options.
   CodegenStrategy &
   setVectorTransferToSCFOptions(LinalgEnablingOptions options) {
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -684,57 +684,25 @@
 RewritePatternSet getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx);
 void populateLinalgTilingCanonicalizationPatterns(RewritePatternSet &patterns);
 
-///
-/// Linalg tiling pattern.
-///
-/// Apply the `tiling` transformation as a pattern.
-/// `filter` controls LinalgTransformMarker matching and update when specified.
-/// See `tiling` for more details.
-// TODO: TiledOpInterface
-struct LinalgTilingPattern : public OpInterfaceRewritePattern<LinalgOp> {
-  /// Construct a generic pattern applied to all LinalgOp that verify `filter`.
-  LinalgTilingPattern(
-      MLIRContext *context, LinalgTilingOptions options,
-      LinalgTransformationFilter f = LinalgTransformationFilter(),
-      PatternBenefit benefit = 1);
-
-  /// Construct a pattern specifically applied to `opName`.
-  LinalgTilingPattern(
-      StringRef opName, MLIRContext *context, LinalgTilingOptions options,
-      LinalgTransformationFilter f = LinalgTransformationFilter(),
-      PatternBenefit benefit = 1);
-
-  /// `matchAndRewrite` implementation that returns the significant transformed
-  /// pieces of IR.
-  FailureOr<TiledLinalgOp>
-  returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const;
-
-  LogicalResult matchAndRewrite(LinalgOp op,
-                                PatternRewriter &rewriter) const override {
-    return returningMatchAndRewrite(op, rewriter);
-  }
-
-private:
-  /// LinalgTransformMarker handles special attribute manipulations.
-  LinalgTransformationFilter filter;
-  /// Options to control tiling;
-  LinalgTilingOptions options;
-};
+/// Perform tile and X.
+FailureOr<TiledLinalgOp> tileAndX(RewriterBase &rewriter, LinalgOp op,
+                                  const LinalgTilingOptions &options);
 
 ///
 /// Linalg padding pattern.
 ///
 /// Apply the `padding` transformation as a pattern.
-/// `filter` controls LinalgTransformMarker matching and update when specified.
-/// See `padding` for more details.
+/// `filter` controls LinalgTransformMarker matching and update when
+/// specified. See `padding` for more details.
 struct LinalgPaddingPattern : public OpInterfaceRewritePattern<LinalgOp> {
-  /// Construct a generic pattern applied to all LinalgOp that verify `filter`.
+  /// Construct a generic pattern applied to all LinalgOp that verify
+  /// `filter`.
   LinalgPaddingPattern(MLIRContext *context,
                        LinalgPaddingOptions options = LinalgPaddingOptions(),
                        PatternBenefit benefit = 1);
 
-  /// `matchAndRewrite` implementation that returns the significant transformed
-  /// pieces of IR.
+  /// `matchAndRewrite` implementation that returns the significant
+  /// transformed pieces of IR.
   FailureOr<LinalgOp> returningMatchAndRewrite(LinalgOp op,
                                                PatternRewriter &rewriter) const;
 
@@ -791,11 +759,12 @@
 /// Linalg generalization pattern.
 ///
 /// Apply the `generalization` transformation as a pattern.
-/// `filter` controls LinalgTransformMarker matching and update when specified.
-/// See `generalization` for more details.
+/// `filter` controls LinalgTransformMarker matching and update when
+/// specified. See `generalization` for more details.
 struct LinalgGeneralizationPattern
     : public OpInterfaceRewritePattern<LinalgOp> {
-  /// Construct a generic pattern applied to all LinalgOp that verify `filter`.
+  /// Construct a generic pattern applied to all LinalgOp that verify
+  /// `filter`.
   LinalgGeneralizationPattern(
       MLIRContext *context,
       LinalgTransformationFilter f = LinalgTransformationFilter(),
@@ -807,8 +776,8 @@
       LinalgTransformationFilter f = LinalgTransformationFilter(),
       PatternBenefit benefit = 1);
 
-  /// `matchAndRewrite` implementation that returns the significant transformed
-  /// pieces of IR.
+  /// `matchAndRewrite` implementation that returns the significant
+  /// transformed pieces of IR.
   FailureOr<GenericOp>
   returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const;
 
@@ -828,8 +797,8 @@
 /// Empty for now, used for SFINAE purposes only.
 struct LinalgVectorizationOptions {};
 
-/// `filter` controls LinalgTransformMarker matching and update when specified.
-/// See `vectorizeLinalgOp` for more details.
+/// `filter` controls LinalgTransformMarker matching and update when
+/// specified. See `vectorizeLinalgOp` for more details.
 struct CopyVectorizationPattern : public OpRewritePattern<memref::CopyOp> {
   using OpRewritePattern<memref::CopyOp>::OpRewritePattern;
 
@@ -875,8 +844,8 @@
 /// Linalg lowering patterns.
 ///
 /// Apply the `linalgLowerOpToLoops` transformation as a pattern.
-/// `filter` controls LinalgTransformMarker matching and update when specified.
-/// See `linalgLowerOpToLoops` for more details.
+/// `filter` controls LinalgTransformMarker matching and update when
+/// specified. See `linalgLowerOpToLoops` for more details.
 enum class LinalgLoweringType {
   LibraryCall = 0,
   Loops = 1,
@@ -927,8 +896,8 @@
 private:
   /// LinalgTransformMarker handles special attribute manipulations.
   LinalgTransformationFilter filter;
-  /// Controls whether the pattern lowers to library calls, scf.for, affine.for
-  /// or scf.parallel.
+  /// Controls whether the pattern lowers to library calls, scf.for,
+  /// affine.for or scf.parallel.
   LinalgLoweringType loweringType;
 };
 
@@ -942,9 +911,9 @@
 
 /// Linalg decompose convolutions patterns
 
-/// Populates patterns to decompose high-D convolution ops into low-D ones. This
-/// is a step in progressive lowering for convolution ops, afterwards we can
-/// vectorize the low-D convolution ops.
+/// Populates patterns to decompose high-D convolution ops into low-D ones.
+/// This is a step in progressive lowering for convolution ops, afterwards we
+/// can vectorize the low-D convolution ops.
 void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns,
                                           PatternBenefit benefit = 1);
 
@@ -952,8 +921,8 @@
 // Op-specific patterns.
 //===----------------------------------------------------------------------===//
 
-/// tensor::PadOp is not canonicalized away yet, so we provide a transformation
-/// to `linalg.generic`.
+/// tensor::PadOp is not canonicalized away yet, so we provide a
+/// transformation to `linalg.generic`.
 struct PadOpTransformationPattern : public OpRewritePattern<tensor::PadOp> {
   using OpRewritePattern<tensor::PadOp>::OpRewritePattern;
 
@@ -961,12 +930,12 @@
                                 PatternRewriter &rewriter) const override;
 };
 
-/// Pad the iterator dimensions `paddingDimensions` of all `opToPad` operands to
-/// a static bounding box. Use `paddingValues` and `packPaddings` to set padding
-/// value and nofold attribute of the created tensor::PadOps, respectively.
-/// Update `paddedOp` to the cloned operation with statically shaped
-/// `paddingDimensions` and return the extracted dynamically shaped results. If
-/// padding fails, return failure.
+/// Pad the iterator dimensions `paddingDimensions` of all `opToPad` operands
+/// to a static bounding box. Use `paddingValues` and `packPaddings` to set
+/// padding value and nofold attribute of the created tensor::PadOps,
+/// respectively. Update `paddedOp` to the cloned operation with statically
+/// shaped `paddingDimensions` and return the extracted dynamically shaped
+/// results. If padding fails, return failure.
 FailureOr<SmallVector<Value>>
 rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad,
                   ArrayRef<int64_t> paddingDimensions,
@@ -1053,7 +1022,8 @@
 ///    vector.transfer_write %..., %out[...]
 /// ```
 /// Where there is no interleaved use between transfer_write and memref.copy.
-/// This is a custom rewrite to forward partial writes to vector.transfer_write.
+/// This is a custom rewrite to forward partial writes to
+/// vector.transfer_write.
 struct LinalgCopyVTWForwardingPattern
     : public OpRewritePattern<vector::TransferWriteOp> {
   using OpRewritePattern<vector::TransferWriteOp>::OpRewritePattern;
@@ -1120,35 +1090,13 @@
                      const LinalgTransformationFilter &f) {}
 };
 
-template <typename... OpTypes>
-class TilingPatterns;
-
-template <>
-class TilingPatterns<> {
-public:
-  static void insert(RewritePatternSet &patterns,
-                     const LinalgTilingOptions &options,
-                     const LinalgTransformationFilter &f) {}
-};
-
-template <typename OpTy, typename... OpTypes>
-class TilingPatterns<OpTy, OpTypes...> {
-public:
-  static void insert(RewritePatternSet &patterns,
-                     const LinalgTilingOptions &options,
-                     const LinalgTransformationFilter &f) {
-    patterns.add<LinalgTilingPattern>(OpTy::getOperationName(),
-                                      patterns.getContext(), options, f);
-    TilingPatterns<OpTypes...>::insert(patterns, options, f);
-  }
-};
-
 /// Split Reduction options.
 struct SplitReductionOptions {
-  // Ratio used to split the reduction dimension.  If the ratio is <= 1, nothing
-  // will be done.
+  // Ratio used to split the reduction dimension.  If the ratio is <= 1,
+  // nothing will be done.
   int64_t ratio = 0;
-  // Index where the extra dimension is added to the intermediate tensor shape.
+  // Index where the extra dimension is added to the intermediate tensor
+  // shape.
   unsigned index = 0;
   // If the inner dimension after splitting is parallel or reduction.
   bool innerParallel = false;
@@ -1167,10 +1115,10 @@
     const LinalgTransformationFilter &f = LinalgTransformationFilter(),
     bool useAlloc = false);
 
-/// Apply transformation to split the single linalg op reduction into a parallel
-/// and reduction dimension. Then create a new linalg.generic op doing the rest
-/// of the reduction. Return the new linalg op with an extra parallel dimension
-/// or failure if the transformation didn't happen.
+/// Apply transformation to split the single linalg op reduction into a
+/// parallel and reduction dimension. Then create a new linalg.generic op
+/// doing the rest of the reduction. Return the new linalg op with an extra
+/// parallel dimension or failure if the transformation didn't happen.
 /// Example:
 /// ```
 ///  %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
@@ -1186,10 +1134,10 @@
 /// To:
 /// ```
 ///  %cst = arith.constant 0.000000e+00 : f32
-///  %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into tensor<4x8xf32>
-///  %1 = tensor.empty [4] : tensor<4xf32>
-///  %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32>
-///  %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+///  %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into
+///  tensor<4x8xf32> %1 = tensor.empty [4] : tensor<4xf32> %2 = linalg.fill
+///  ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32> %3 =
+///  linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
 ///                                        affine_map<(d0, d1) -> (d0)>],
 ///    iterator_types = ["parallel", "reduction"]}
 ///    ins(%0 : tensor<4x8xf32>) outs(%2 : tensor<4xf32>) {
@@ -1226,8 +1174,8 @@
                bool useAlloc = false);
 
 /// Scaling-based implementation of the split reduction transformation.
-/// Instead of introducing an ExpandShapeOp, this rewrites a reduction dimension
-/// `k` into `k * scale + kk`.
+/// Instead of introducing an ExpandShapeOp, this rewrites a reduction
+/// dimension `k` into `k * scale + kk`.
 ///
 /// Example:
 /// ```
@@ -1252,8 +1200,8 @@
 ///
 ///  %3 = linalg.generic {indexing_maps = [#map0, #map1, #map2, #map3],
 ///    iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
-///    ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>, tensor<64x4xi1>)
-///    outs(%1 : tensor<16x32x64xf32>) {
+///    ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>,
+///    tensor<64x4xi1>) outs(%1 : tensor<16x32x64xf32>) {
 ///      ^bb0(%arg3: f32, %arg4: f32, %arg5: i1, %arg6: f32):
 ///        %5 = arith.mulf %arg3, %arg4 : f32
 ///        %6 = arith.addf %arg6, %5 : f32
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -9,6 +9,7 @@
 #include "mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h"
 
 #include "mlir/AsmParser/AsmParser.h"
+#include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
@@ -22,6 +23,8 @@
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 #include "llvm/ADT/StringSet.h"
 
+#include "mlir/Dialect/SCF/Transforms/TileUsingInterface.h"
+
 using namespace mlir;
 using namespace mlir::linalg;
 using namespace mlir::transform;
@@ -36,6 +39,16 @@
   return result;
 }
 
+/// Extracts a vector of int64_t from an array attribute. Asserts if the
+/// attribute contains values other than integers.
+static SmallVector<int64_t> extractI64Array(ArrayAttr attr) {
+  SmallVector<int64_t> result;
+  result.reserve(attr.size());
+  for (APInt value : attr.getAsValueRange<IntegerAttr>())
+    result.push_back(value.getSExtValue());
+  return result;
+}
+
 namespace {
 /// A simple pattern rewriter that implements no special logic.
 class SimpleRewriter : public PatternRewriter {
@@ -721,11 +734,9 @@
   // Tiling with "scalarize_dyn_dims" actually sets the same lambda as the
   // tile sizes and asserts that it is not already set.
   SmallVector<int64_t> emptyTileSizes;
-  LinalgTilingPattern pattern(getContext(), tilingOptions);
   SimpleRewriter rewriter(getContext());
   rewriter.setInsertionPoint(target);
-  FailureOr<TiledLinalgOp> result =
-      pattern.returningMatchAndRewrite(target, rewriter);
+  FailureOr<TiledLinalgOp> result = tileAndX(rewriter, target, tilingOptions);
   if (failed(result))
     return DiagnosedSilenceableFailure(reportUnknownTransformError(target));
 
@@ -915,7 +926,6 @@
 DiagnosedSilenceableFailure
 transform::TileOp::apply(TransformResults &transformResults,
                          TransformState &state) {
-  LinalgTilingOptions tilingOptions;
   SmallVector<int64_t> tileSizes = extractFromI64ArrayAttr(getStaticSizes());
 
   ArrayRef<Operation *> targets = state.getPayloadOps(getTarget());
@@ -960,6 +970,7 @@
       return diag;
     }
 
+    LinalgTilingOptions tilingOptions;
     unsigned index = en.index();
     if (!tileSizes.empty()) {
       tilingOptions.setTileSizeComputationFunction(
@@ -981,10 +992,9 @@
     }
 
     tilingOptions.setInterchange(extractUIntArray(getInterchange()));
-    LinalgTilingPattern pattern(getContext(), tilingOptions);
     SimpleRewriter rewriter(linalgOp.getContext());
     FailureOr<TiledLinalgOp> tiledOp =
-        pattern.returningMatchAndRewrite(linalgOp, rewriter);
+        tileAndX(rewriter, linalgOp, tilingOptions);
     if (failed(tiledOp))
       return DiagnosedSilenceableFailure::definiteFailure();
 
@@ -1203,6 +1213,153 @@
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// TileToScfForOp
+//===----------------------------------------------------------------------===//
+
+DiagnosedSilenceableFailure
+transform::TileToScfForOp::apply(TransformResults &transformResults,
+                                 TransformState &state) {
+  SmallVector<int64_t> tileSizes = extractFromI64ArrayAttr(getStaticSizes());
+
+  ArrayRef<Operation *> targets = state.getPayloadOps(getTarget());
+  SmallVector<ArrayRef<Operation *>> dynamicSizeProducers;
+  dynamicSizeProducers.reserve(getDynamicSizes().size());
+  for (Value dynamicSizeProducerHandle : getDynamicSizes()) {
+    dynamicSizeProducers.push_back(
+        state.getPayloadOps(dynamicSizeProducerHandle));
+
+    if (dynamicSizeProducers.back().size() != targets.size()) {
+      DiagnosedSilenceableFailure diag =
+          emitSilenceableError()
+          << "expected as many dynamic size-producing operations ("
+          << dynamicSizeProducers.back().size() << ") as target ops ("
+          << targets.size() << ")";
+      diag.attachNote(dynamicSizeProducerHandle.getLoc()) << "for this handle";
+      return diag;
+    }
+
+    for (Operation *op : dynamicSizeProducers.back()) {
+      if (op->getNumResults() == 1 &&
+          op->getResult(0).getType().isa<IndexType>())
+        continue;
+      DiagnosedSilenceableFailure diag =
+          emitSilenceableError() << "expected sizes to be produced by ops "
+                                    "with a single index-type result";
+      diag.attachNote(op->getLoc()) << "size producer op";
+      diag.attachNote(dynamicSizeProducerHandle.getLoc()) << "for this handle";
+      return diag;
+    }
+  }
+
+  SmallVector<Operation *> tiled;
+  SmallVector<SmallVector<Operation *, 4>, 4> loops;
+  loops.resize(getLoops().size());
+  for (auto &en : llvm::enumerate(targets)) {
+    auto tilingInterfaceOp = dyn_cast<TilingInterface>(en.value());
+    if (!tilingInterfaceOp) {
+      DiagnosedSilenceableFailure diag =
+          emitSilenceableError() << "only TilingInterface ops are supported";
+      diag.attachNote(en.value()->getLoc()) << "target op";
+      return diag;
+    }
+
+    scf::SCFTilingOptions tilingOptions;
+    unsigned index = en.index();
+    if (!tileSizes.empty()) {
+      tilingOptions.setTileSizeComputationFunction(
+          [&, index](OpBuilder &b, Operation *) {
+            SmallVector<Value, 4> sizes;
+            sizes.reserve(tileSizes.size());
+            unsigned dynamicIdx = 0;
+            for (OpFoldResult ofr : getMixedSizes()) {
+              if (auto attr = ofr.dyn_cast<Attribute>()) {
+                sizes.push_back(b.create<arith::ConstantIndexOp>(
+                    getLoc(), attr.cast<IntegerAttr>().getInt()));
+              } else {
+                sizes.push_back(
+                    dynamicSizeProducers[dynamicIdx++][index]->getResult(0));
+              }
+            }
+            return sizes;
+          });
+    }
+
+    tilingOptions.setInterchange(extractI64Array(getInterchange()));
+    SimpleRewriter rewriter(tilingInterfaceOp.getContext());
+    FailureOr<scf::SCFTilingResult> tilingResult =
+        tileUsingSCFForOp(rewriter, tilingInterfaceOp, tilingOptions);
+    if (failed(tilingResult))
+      return DiagnosedSilenceableFailure::definiteFailure();
+
+    rewriter.replaceOp(tilingInterfaceOp, tilingResult->replacements);
+
+    tiled.push_back(tilingResult->tiledOp);
+    for (const auto &en2 : llvm::enumerate(tilingResult->loops))
+      loops[en2.index()].push_back(en2.value());
+  }
+
+  transformResults.set(getTiledLinalgOp().cast<OpResult>(), tiled);
+  for (const auto &en : llvm::enumerate(loops))
+    transformResults.set(getLoops()[en.index()].cast<OpResult>(), en.value());
+
+  return DiagnosedSilenceableFailure::success();
+}
+
+SmallVector<OpFoldResult> transform::TileToScfForOp::getMixedSizes() {
+  ValueRange dynamic = getDynamicSizes();
+  SmallVector<int64_t> tileSizes = extractFromI64ArrayAttr(getStaticSizes());
+  SmallVector<OpFoldResult> results;
+  results.reserve(tileSizes.size());
+  unsigned dynamicPos = 0;
+  Builder builder(getContext());
+  for (int64_t size : tileSizes) {
+    if (size == ShapedType::kDynamicSize) {
+      results.push_back(dynamic[dynamicPos++]);
+    } else {
+      results.push_back(builder.getIndexAttr(size));
+    }
+  }
+  return results;
+}
+
+ParseResult transform::TileToScfForOp::parse(OpAsmParser &parser,
+                                             OperationState &result) {
+  OpAsmParser::UnresolvedOperand target;
+  SmallVector<OpAsmParser::UnresolvedOperand> dynamicSizes;
+  ArrayAttr staticSizes;
+  auto pdlOperationType = pdl::OperationType::get(parser.getContext());
+  if (parser.parseOperand(target) ||
+      parser.resolveOperand(target, pdlOperationType, result.operands) ||
+      parseDynamicIndexList(parser, dynamicSizes, staticSizes,
+                            ShapedType::kDynamicSize) ||
+      parser.resolveOperands(dynamicSizes, pdlOperationType, result.operands) ||
+      parser.parseOptionalAttrDict(result.attributes))
+    return ParseResult::failure();
+
+  result.addAttribute(getStaticSizesAttrName(result.name), staticSizes);
+  size_t numExpectedLoops =
+      staticSizes.size() - llvm::count(extractFromI64ArrayAttr(staticSizes), 0);
+  result.addTypes(SmallVector<Type>(numExpectedLoops + 1, pdlOperationType));
+  return success();
+}
+
+void TileToScfForOp::print(OpAsmPrinter &p) {
+  p << ' ' << getTarget();
+  printDynamicIndexList(p, getOperation(), getDynamicSizes(), getStaticSizes(),
+                        ShapedType::kDynamicSize);
+  p.printOptionalAttrDict((*this)->getAttrs(), {getStaticSizesAttrName()});
+}
+
+void transform::TileToScfForOp::getEffects(
+    SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
+  consumesHandle(getTarget(), effects);
+  onlyReadsHandle(getDynamicSizes(), effects);
+  producesHandle(getTiledLinalgOp(), effects);
+  producesHandle(getLoops(), effects);
+  modifiesPayload(effects);
+}
+
 //===----------------------------------------------------------------------===//
 // VectorizeOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
@@ -51,40 +51,6 @@
 
 namespace {
 
-/// Configurable pass to apply pattern-based linalg tiling.
-struct LinalgStrategyTilePass
-    : public impl::LinalgStrategyTilePassBase<LinalgStrategyTilePass> {
-
-  LinalgStrategyTilePass() = default;
-
-  LinalgStrategyTilePass(StringRef opName,
-                         mlir::linalg::LinalgTilingOptions opt,
-                         LinalgTransformationFilter filt)
-      : options(std::move(opt)), filter(std::move(filt)) {
-    this->anchorOpName.setValue(opName.str());
-  }
-
-  void runOnOperation() override {
-    auto funcOp = getOperation();
-    if (!anchorFuncName.empty() && funcOp.getName() != anchorFuncName)
-      return;
-
-    MLIRContext *ctx = funcOp.getContext();
-    RewritePatternSet tilingPattern(ctx);
-    if (!anchorOpName.empty())
-      tilingPattern.add<LinalgTilingPattern>(anchorOpName, ctx, options,
-                                             filter);
-    else
-      tilingPattern.add<LinalgTilingPattern>(ctx, options, filter);
-    if (anchorOpName == tensor::PadOp::getOperationName())
-      populatePadTensorTilingPatterns(tilingPattern, options);
-    (void)applyPatternsAndFoldGreedily(funcOp, std::move(tilingPattern));
-  }
-
-  mlir::linalg::LinalgTilingOptions options;
-  LinalgTransformationFilter filter;
-};
-
 /// Configurable pass to lower vector operations.
 struct LinalgStrategyRemoveMarkersPass
     : public impl::LinalgStrategyRemoveMarkersPassBase<
@@ -101,14 +67,6 @@
 };
 } // namespace
 
-/// Create a LinalgStrategyTilePass.
-std::unique_ptr<OperationPass<func::FuncOp>>
-mlir::createLinalgStrategyTilePass(StringRef opName,
-                                   const LinalgTilingOptions &opt,
-                                   const LinalgTransformationFilter &filter) {
-  return std::make_unique<LinalgStrategyTilePass>(opName, opt, filter);
-}
-
 /// Create a LinalgStrategyRemoveMarkersPass.
 std::unique_ptr<OperationPass<func::FuncOp>>
 mlir::createLinalgStrategyRemoveMarkersPass() {
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -732,77 +732,8 @@
       >::insert(patterns);
 }
 
-/// Populate the given list with patterns that apply Linalg tiling.
-static void insertTilingPatterns(RewritePatternSet &patterns,
-                                 const LinalgTilingOptions &options) {
-  auto *ctx = patterns.getContext();
-  LinalgTransformationFilter f(ArrayRef<StringAttr>{},
-                               StringAttr::get(ctx, "tiled"));
-  TilingPatterns<GenericOp,
-#define GET_OP_LIST
-#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
-                 >::insert(patterns, options, f);
-  patterns.add<PadOpTilingPattern>(ctx, options);
-}
-
 void mlir::linalg::populatePadTensorTilingPatterns(
     RewritePatternSet &patterns, const LinalgTilingOptions &options) {
   auto *ctx = patterns.getContext();
   patterns.add<PadOpTilingPattern>(ctx, options);
 }
-
-static void applyExtractSliceOfPadTensorSwapPattern(func::FuncOp funcOp) {
-  MLIRContext *ctx = funcOp.getContext();
-  RewritePatternSet patterns(ctx);
-  patterns.add<ExtractSliceOfPadTensorSwapPattern>(patterns.getContext());
-  (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
-  (void)applyPatternsAndFoldGreedily(
-      funcOp, getLinalgTilingCanonicalizationPatterns(ctx));
-}
-
-namespace {
-struct LinalgTilingPass : public impl::LinalgTilingPassBase<LinalgTilingPass> {
-  LinalgTilingPass() = default;
-  LinalgTilingPass(ArrayRef<int64_t> tileSizes, LinalgTilingLoopType loopType) {
-    this->tileSizes = tileSizes;
-    this->loopType = "";
-    this->loopTypeEnum = loopType;
-  }
-
-  void runOnOperation() override {
-    func::FuncOp funcOp = getOperation();
-    LinalgTilingLoopType type =
-        llvm::StringSwitch<LinalgTilingLoopType>(loopType)
-            .Case("for", LinalgTilingLoopType::Loops)
-            .Case("affine", LinalgTilingLoopType::AffineLoops)
-            .Case("parallel", LinalgTilingLoopType::ParallelLoops)
-            .Default(loopTypeEnum);
-    auto options =
-        LinalgTilingOptions().setTileSizes(tileSizes).setLoopType(type);
-    MLIRContext *ctx = funcOp.getContext();
-    RewritePatternSet patterns(ctx);
-    insertTilingPatterns(patterns, options);
-    scf::populateSCFForLoopCanonicalizationPatterns(patterns);
-    (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
-    (void)applyPatternsAndFoldGreedily(
-        funcOp, getLinalgTilingCanonicalizationPatterns(ctx));
-    // Drop the marker.
-    funcOp.walk([](LinalgOp op) {
-      op->removeAttr(LinalgTransforms::kLinalgTransformMarker);
-    });
-
-    // Apply swap pattern after generating loop nest and running
-    // canonicalizations.
-    applyExtractSliceOfPadTensorSwapPattern(funcOp);
-  }
-
-  LinalgTilingLoopType loopTypeEnum;
-};
-
-} // namespace
-
-std::unique_ptr<OperationPass<func::FuncOp>>
-mlir::createLinalgTilingPass(ArrayRef<int64_t> tileSizes,
-                             linalg::LinalgTilingLoopType loopType) {
-  return std::make_unique<LinalgTilingPass>(tileSizes, loopType);
-}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -356,33 +356,13 @@
   }
 }
 
-/// Linalg tiling pattern.
-mlir::linalg::LinalgTilingPattern::LinalgTilingPattern(
-    MLIRContext *context, LinalgTilingOptions options,
-    LinalgTransformationFilter f, PatternBenefit benefit)
-    : OpInterfaceRewritePattern<LinalgOp>(context, benefit),
-      filter(std::move(f)), options(std::move(options)) {}
-
-mlir::linalg::LinalgTilingPattern::LinalgTilingPattern(
-    StringRef opName, MLIRContext *context, LinalgTilingOptions options,
-    LinalgTransformationFilter f, PatternBenefit benefit)
-    : OpInterfaceRewritePattern<LinalgOp>(context, benefit),
-      filter(f.addOpNameFilter(opName)), options(std::move(options)) {}
-
 FailureOr<TiledLinalgOp>
-mlir::linalg::LinalgTilingPattern::returningMatchAndRewrite(
-    LinalgOp op, PatternRewriter &rewriter) const {
-  if (failed(filter.checkAndNotify(rewriter, op)))
-    return failure();
-
+mlir::linalg::tileAndX(RewriterBase &rewriter, LinalgOp op,
+                       const LinalgTilingOptions &options) {
   FailureOr<TiledLinalgOp> res = tileLinalgOp(rewriter, op, options);
   if (failed(res))
     return failure();
 
-  // Clear filter to stop recursive pattern application.
-  // This must be done here to properly propagate to peeling branches.
-  filter.replaceLinalgTransformationFilter(rewriter, res->op);
-
   // Peel the loops of the TiledLinalgOp.
   peelTiledLinalgOp(rewriter, *res, options.peeledLoops, options.loopType);
 
diff --git a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir
deleted file mode 100644
--- a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir
+++ /dev/null
@@ -1,219 +0,0 @@
-// RUN: mlir-opt %s -test-linalg-transform-patterns=test-tile-and-distribute-options -split-input-file | FileCheck %s
-
-func.func @gemm1(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
-{
-  linalg.matmul {__internal_linalg_transform__ = "distribute1"}
-    ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
-   outs(%c: memref<?x?xf32>)
-  return
-}
-//  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)>
-//      CHECK: func @gemm1(
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-//  CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
-//  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
-//      CHECK: scf.for %[[ARG3:.*]] =
-//      CHECK:   %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:   %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:   %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:   %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:   %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
-//      CHECK:   %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
-//      CHECK:   %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]]
-//      CHECK:   linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
-
-// -----
-
-func.func @gemm2(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
-{
-  linalg.matmul  {__internal_linalg_transform__ = "distribute2"}
-    ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
-   outs(%c:memref<?x?xf32>)
-  return
-}
-//  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)>
-//      CHECK: func @gemm2(
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-//  CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
-//  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
-//      CHECK: %[[ITERY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK: %[[ITERX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK: %[[INBOUNDSY:.*]] = arith.cmpi slt, %[[ITERY]], %{{.*}}
-//      CHECK: %[[INBOUNDSX:.*]] = arith.cmpi slt, %[[ITERX]], %{{.*}}
-//      CHECK: %[[INBOUNDS:.*]] = arith.andi %[[INBOUNDSY]], %[[INBOUNDSX]]
-//      CHECK: scf.if %[[INBOUNDS]]
-//      CHECK:   scf.for %[[ARG3:.*]] =
-//      CHECK:     %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:     %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:     %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
-//      CHECK:     %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
-//      CHECK:     %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]]
-//      CHECK:     linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
-
-// -----
-
-func.func @gemm3(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
-{
-  linalg.matmul {__internal_linalg_transform__ = "distribute3"}
-    ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
-   outs(%c: memref<?x?xf32>)
-  return
-}
-//  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)>
-//      CHECK: func @gemm3(
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-//  CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
-//  CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y
-//  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
-//  CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x
-//      CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]]
-//      CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK: %[[STEPX:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX]]]
-//      CHECK: scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) = (%[[LBY]], %[[LBX]]) to (%{{.*}}, %{{.*}}) step (%[[STEPY]], %[[STEPX]])
-//      CHECK:   scf.for %[[ARG5:.*]] =
-//      CHECK:     %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG5]]]
-//      CHECK:     %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG5]], %[[ARG4]]]
-//      CHECK:     %[[SV3:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[ARG4]]]
-//      CHECK:     linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
-
-// -----
-
-func.func @gemm4(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
-{
-  linalg.matmul {__internal_linalg_transform__ = "distribute4"}
-    ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
-   outs(%c: memref<?x?xf32>)
-  return
-}
-//  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)>
-//      CHECK: func @gemm4(
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-//  CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
-//  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
-//      CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK: %[[INBOUNDS:.*]] = arith.cmpi slt, %[[LBX]], %{{.*}}
-//      CHECK: scf.if %[[INBOUNDS]]
-//      CHECK:   scf.for %[[ARG3:.*]] =
-//      CHECK:     %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:     %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:     %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
-//      CHECK:     %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
-//      CHECK:     %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]]
-//      CHECK:     linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
-
-// -----
-
-func.func @gemm5(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
-{
-  linalg.matmul {__internal_linalg_transform__ = "distribute5"}
-    ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
-   outs(%c: memref<?x?xf32>)
-  return
-}
-//  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)>
-//      CHECK: func @gemm5(
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-//  CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
-//  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
-//  CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x
-//      CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK: %[[STEPX:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX]]]
-//      CHECK: %[[INBOUNDS:.*]] = arith.cmpi slt, %[[LBY]], %{{.*}}
-//      CHECK: scf.if %[[INBOUNDS]]
-//      CHECK:   scf.parallel (%[[ARG3:.*]]) = (%[[LBX]]) to (%{{.*}}) step (%[[STEPX]])
-//      CHECK:     scf.for %[[ARG4:.*]] =
-//      CHECK:      %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:       %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:       %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG4]]]
-//      CHECK:       %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[ARG3]]]
-//      CHECK:       %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[ARG3]]]
-//      CHECK:       linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
-
-// -----
-
-func.func @gemm6(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
-{
-  linalg.matmul {__internal_linalg_transform__ = "distribute6"}
-    ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
-   outs(%c: memref<?x?xf32>)
-  return
-}
-//  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)>
-//      CHECK: func @gemm6(
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-//  CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
-//  CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y
-//  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
-//      CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]]
-//      CHECK: scf.parallel (%[[ARG3:.*]]) = (%[[LBY]]) to (%{{.*}}) step (%[[STEPY]])
-//      CHECK:   scf.for %[[ARG4:.*]] =
-//      CHECK:     %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]]
-//      CHECK:     %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[OFFSETX]]]
-//      CHECK:     %[[SV3:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[OFFSETX_2]]]
-//      CHECK:     linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
-
-// -----
-
-//      CHECK: #[[MULMAP:.+]] = affine_map<()[s0, s1] -> (s0 * s1)>
-//      CHECK: #[[ADDMAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1)>
-//      CHECK: func @matmul_tensors(
-// CHECK-SAME:    %[[TA:[0-9a-z]+]]: tensor<?x?xf32>
-// CHECK-SAME:    %[[TB:[0-9a-z]+]]: tensor<?x?xf32>
-// CHECK-SAME:    %[[TC:[0-9a-z]+]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
-func.func @matmul_tensors(
-  %arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>)
-    -> tensor<?x?xf32> {
-//  CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
-//  CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-//  CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
-//  CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y
-//  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
-//  CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x
-//      CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDY]], %[[C8]]]
-//      CHECK: %[[LBY:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]]
-//      CHECK: %[[STEPY:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSY]], %[[C8]]]
-//      CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor<?x?xf32>) {
-//      CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDX]], %[[C8]]]
-//      CHECK: %[[LBX:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]]
-//      CHECK: %[[STEPX:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSX]], %[[C8]]]
-//      CHECK:   %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xf32>) {
-//      CHECK:     %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor<?x?xf32>) {
-//      CHECK:       %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
-//      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
-//      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
-//      CHECK:       %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<?x?xf32>, tensor<?x?xf32>)
-// CHECK-SAME:                                  outs(%[[sTC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
-//      CHECK:       %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xf32> into tensor<?x?xf32>
-//      CHECK:       scf.yield %[[TD]] : tensor<?x?xf32>
-//      CHECK:     scf.yield %[[TD2]] : tensor<?x?xf32>
-//      CHECK:   scf.yield %[[TD1]] : tensor<?x?xf32>
-  %0 = linalg.matmul {__internal_linalg_transform__ = "tensors_distribute1"}
-       ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%arg2: tensor<?x?xf32>)
-    -> tensor<?x?xf32>
-
-//      CHECK: return %[[TD0]] : tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-
diff --git a/mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir
deleted file mode 100644
--- a/mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir
+++ /dev/null
@@ -1,110 +0,0 @@
-// RUN: mlir-opt %s -test-linalg-transform-patterns="test-tile-pattern tile-sizes=256,128,512 peeled-loops=0" -canonicalize | \
-// RUN:     FileCheck %s -check-prefix=CHECK-PEEL-0
-
-// RUN: mlir-opt %s -test-linalg-transform-patterns="test-tile-pattern tile-sizes=256,128,512 peeled-loops=1,2" -canonicalize | \
-// RUN:     FileCheck %s -check-prefix=CHECK-PEEL-12
-
-//     CHECK-PEEL-0: func @matmul_static_tensor
-// CHECK-PEEL-0-DAG:   %[[c0:.*]] = arith.constant 0 : index
-// CHECK-PEEL-0-DAG:   %[[c128:.*]] = arith.constant 128 : index
-// CHECK-PEEL-0-DAG:   %[[c256:.*]] = arith.constant 256 : index
-// CHECK-PEEL-0-DAG:   %[[c512:.*]] = arith.constant 512 : index
-// CHECK-PEEL-0-DAG:   %[[c1280:.*]] = arith.constant 1280 : index
-// CHECK-PEEL-0-DAG:   %[[c1600:.*]] = arith.constant 1600 : index
-// CHECK-PEEL-0-DAG:   %[[c1700:.*]] = arith.constant 1700 : index
-//     CHECK-PEEL-0:   scf.for %{{.*}} = %[[c0]] to %[[c1280]] step %[[c256]] {{.*}} {
-//     CHECK-PEEL-0:     scf.for %{{.*}} = %[[c0]] to %[[c1700]] step %[[c128]] {{.*}} {
-//     CHECK-PEEL-0:       scf.for %{{.*}} = %[[c0]] to %[[c1600]] step %[[c512]] {{.*}} {
-//     CHECK-PEEL-0:         linalg.matmul ins({{.*}} : tensor<256x?xf32>, tensor<?x?xf32>) outs({{.*}} : tensor<256x?xf32>)
-//     CHECK-PEEL-0:       }
-//     CHECK-PEEL-0:     }
-//     CHECK-PEEL-0:   }
-//     CHECK-PEEL-0:   scf.for %{{.*}} = %[[c0]] to %[[c1700]] step %[[c128]] {{.*}} {
-//     CHECK-PEEL-0:     scf.for %{{.*}} = %[[c0]] to %[[c1600]] step %[[c512]] {{.*}} {
-//     CHECK-PEEL-0:       linalg.matmul ins({{.*}} : tensor<220x?xf32>, tensor<?x?xf32>) outs({{.*}} : tensor<220x?xf32>)
-//     CHECK-PEEL-0:     }
-//     CHECK-PEEL-0:   }
-
-//     CHECK-PEEL-12: func @matmul_static_tensor
-// CHECK-PEEL-12-DAG:   %[[c0:.*]] = arith.constant 0 : index
-// CHECK-PEEL-12-DAG:   %[[c128:.*]] = arith.constant 128 : index
-// CHECK-PEEL-12-DAG:   %[[c256:.*]] = arith.constant 256 : index
-// CHECK-PEEL-12-DAG:   %[[c512:.*]] = arith.constant 512 : index
-// CHECK-PEEL-12-DAG:   %[[c1500:.*]] = arith.constant 1500 : index
-// CHECK-PEEL-12-DAG:   %[[c1536:.*]] = arith.constant 1536 : index
-// CHECK-PEEL-12-DAG:   %[[c1600:.*]] = arith.constant 1600 : index
-// CHECK-PEEL-12-DAG:   %[[c1664:.*]] = arith.constant 1664 : index
-//     CHECK-PEEL-12:   scf.for %{{.*}} = %[[c0]] to %[[c1500]] step %[[c256]] {{.*}} {
-//     CHECK-PEEL-12:     scf.for %{{.*}} = %[[c0]] to %[[c1664]] step %[[c128]] {{.*}} {
-//     CHECK-PEEL-12:       scf.for %{{.*}} = %[[c0]] to %[[c1536]] step %[[c512]] {{.*}} {
-//     CHECK-PEEL-12:         linalg.matmul ins({{.*}} : tensor<?x512xf32>, tensor<512x128xf32>) outs({{.*}} : tensor<?x128xf32>)
-//     CHECK-PEEL-12:       }
-//     CHECK-PEEL-12:       linalg.matmul ins({{.*}} : tensor<?x64xf32>, tensor<64x128xf32>) outs({{.*}} : tensor<?x128xf32>)
-//     CHECK-PEEL-12:     }
-//     CHECK-PEEL-12:     scf.for %{{.*}} = %[[c0]] to %[[c1600]] step %[[c512]] {{.*}} {
-//     CHECK-PEEL-12:       linalg.matmul ins({{.*}} : tensor<?x?xf32>, tensor<?x36xf32>) outs({{.*}} : tensor<?x36xf32>)
-//     CHECK-PEEL-12:     }
-//     CHECK-PEEL-12:   }
-func.func @matmul_static_tensor(%arg0: tensor<1500x1600xf32>, %arg1: tensor<1600x1700xf32>)
-    -> tensor<1500x1700xf32> {
-  %out = tensor.empty() : tensor<1500x1700xf32>
-  %r = linalg.matmul {__internal_linalg_transform__ = "tile"}
-      ins(%arg0, %arg1: tensor<1500x1600xf32>, tensor<1600x1700xf32>)
-      outs(%out: tensor<1500x1700xf32>) -> tensor<1500x1700xf32>
-  return %r : tensor<1500x1700xf32>
-}
-
-// -----
-
-//     CHECK-PEEL-0: func @matmul_dynamic_tensor
-// CHECK-PEEL-0-DAG:   %[[c0:.*]] = arith.constant 0 : index
-// CHECK-PEEL-0-DAG:   %[[c128:.*]] = arith.constant 128 : index
-// CHECK-PEEL-0-DAG:   %[[c256:.*]] = arith.constant 256 : index
-// CHECK-PEEL-0-DAG:   %[[c512:.*]] = arith.constant 512 : index
-//     CHECK-PEEL-0:   scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c256]] {{.*}} {
-//     CHECK-PEEL-0:     scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c128]] {{.*}} {
-//     CHECK-PEEL-0:       scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c512]] {{.*}} {
-//     CHECK-PEEL-0:         linalg.matmul ins({{.*}} : tensor<256x?xf32>, tensor<?x?xf32>) outs({{.*}} : tensor<256x?xf32>)
-//     CHECK-PEEL-0:       }
-//     CHECK-PEEL-0:     }
-//     CHECK-PEEL-0:   }
-//     CHECK-PEEL-0:   scf.for %{{.*}} {
-//     CHECK-PEEL-0:     scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c128]] {{.*}} {
-//     CHECK-PEEL-0:       scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c512]] {{.*}} {
-//     CHECK-PEEL-0:         linalg.matmul ins({{.*}} : tensor<?x?xf32>, tensor<?x?xf32>) outs({{.*}} : tensor<?x?xf32>)
-//     CHECK-PEEL-0:       }
-//     CHECK-PEEL-0:     }
-//     CHECK-PEEL-0:   }
-
-//     CHECK-PEEL-12: func @matmul_dynamic_tensor
-// CHECK-PEEL-12-DAG:   %[[c0:.*]] = arith.constant 0 : index
-// CHECK-PEEL-12-DAG:   %[[c128:.*]] = arith.constant 128 : index
-// CHECK-PEEL-12-DAG:   %[[c256:.*]] = arith.constant 256 : index
-// CHECK-PEEL-12-DAG:   %[[c512:.*]] = arith.constant 512 : index
-//     CHECK-PEEL-12:   scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c256]] {{.*}} {
-//     CHECK-PEEL-12:     scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c128]] {{.*}} {
-//     CHECK-PEEL-12:       scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c512]] {{.*}} {
-//     CHECK-PEEL-12:         linalg.matmul ins({{.*}} : tensor<?x512xf32>, tensor<512x128xf32>) outs({{.*}} : tensor<?x128xf32>)
-//     CHECK-PEEL-12:       }
-//     CHECK-PEEL-12:       scf.for %{{.*}} {
-//     CHECK-PEEL-12:         linalg.matmul ins({{.*}} : tensor<?x?xf32>, tensor<?x128xf32>) outs({{.*}} : tensor<?x128xf32>)
-//     CHECK-PEEL-12:       }
-//     CHECK-PEEL-12:     }
-//     CHECK-PEEL-12:     scf.for %{{.*}} {
-//     CHECK-PEEL-12:       scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c512]] {{.*}} {
-//     CHECK-PEEL-12:         linalg.matmul ins({{.*}} : tensor<?x?xf32>, tensor<?x?xf32>) outs({{.*}} : tensor<?x?xf32>)
-//     CHECK-PEEL-12:       }
-//     CHECK-PEEL-12:     }
-//     CHECK-PEEL-12:   }
-func.func @matmul_dynamic_tensor(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>)
-    -> tensor<?x?xf32> {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
-  %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
-  %out = tensor.empty(%d0, %d1) : tensor<?x?xf32>
-  %r = linalg.matmul {__internal_linalg_transform__ = "tile"}
-      ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%out: tensor<?x?xf32>) -> tensor<?x?xf32>
-  return %r : tensor<?x?xf32>
-}
diff --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir
--- a/mlir/test/Dialect/Linalg/tile-conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile-conv.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3" | FileCheck %s
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -canonicalize | FileCheck %s
 
 //  CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s1 + 1)>
 //  CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s1 + 2)>
@@ -10,6 +10,12 @@
   return
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.conv_2d"]} in %arg1
+    %1, %loop:2 = transform.structured.tile %0 [2, 3]
+}
+
 //       CHECK: func @conv
 //  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
 //  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/tile-indexed.mlir b/mlir/test/Dialect/Linalg/tile-indexed.mlir
--- a/mlir/test/Dialect/Linalg/tile-indexed.mlir
+++ b/mlir/test/Dialect/Linalg/tile-indexed.mlir
@@ -1,6 +1,4 @@
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=10,25" -split-input-file | FileCheck %s -check-prefix=TILE-10n25
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=25,0" -split-input-file | FileCheck %s -check-prefix=TILE-25n0
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,25" -split-input-file | FileCheck %s -check-prefix=TILE-0n25
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -canonicalize -split-input-file | FileCheck %s -check-prefix=TILE-10n25
 
 func.func @indexed_vector(%arg0: memref<50xindex>) {
   linalg.generic {indexing_maps = [affine_map<(i) -> (i)>],
@@ -12,6 +10,13 @@
   }
   return
 }
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.generic"]} in %arg1
+    %1, %loop:2 = transform.structured.tile %0 [10, 25]
+}
+
 // TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)>
 // TILE-10n25-LABEL: func @indexed_vector
 // TILE-10n25: %[[C10:.*]] = arith.constant 10 : index
@@ -21,19 +26,6 @@
 // TILE-10n25:     %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[I]], %[[J]])
 // TILE-10n25:     linalg.yield %[[NEW_I]] : index
 
-// TILE-25n0-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)>
-// TILE-25n0-LABEL: func @indexed_vector
-// TILE-25n0: %[[C25:.*]] = arith.constant 25 : index
-// TILE-25n0: scf.for %[[J:.*]] = {{.*}} step %[[C25]]
-// TILE-25n0:   linalg.generic
-// TILE-25n0:     %[[I:.*]] = linalg.index 0 : index
-// TILE-25n0:     %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[I]], %[[J]])
-// TILE-25n0:     linalg.yield %[[NEW_I]] : index
-
-// TILE-0n25-LABEL: func @indexed_vector
-// TILE-0n25-NOT: scf.for %[[J:.*]] = {{.*}} step %
-// TILE-0n25: linalg.generic
-
 // -----
 
 func.func @indexed_matrix(%arg0: memref<50x50xindex>) {
@@ -48,6 +40,13 @@
   }
   return
 }
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.generic"]} in %arg1
+    %1, %loop:2 = transform.structured.tile %0 [10, 25]
+}
+
 // TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)>
 // TILE-10n25-LABEL: func @indexed_matrix
 // TILE-10n25-DAG: %[[C25:.*]] = arith.constant 25 : index
@@ -61,25 +60,3 @@
 // TILE-10n25:       %[[NEW_J:.*]] = affine.apply [[$MAP]](%[[J]], %[[L]])
 // TILE-10n25:       %[[SUM:.*]] = arith.addi %[[NEW_I]], %[[NEW_J]] : index
 // TILE-10n25:       linalg.yield %[[SUM]] : index
-
-// TILE-25n0-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)>
-// TILE-25n0-LABEL: func @indexed_matrix
-// TILE-25n0: %[[C25:.*]] = arith.constant 25 : index
-// TILE-25n0: scf.for %[[L:.*]] = {{.*}} step %[[C25]]
-// TILE-25n0:   linalg.generic
-// TILE-25n0:     %[[I:.*]] = linalg.index 0 : index
-// TILE-25n0:     %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[I]], %[[L]])
-// TILE-25n0:     %[[J:.*]] = linalg.index 1 : index
-// TILE-25n0:     %[[SUM:.*]] = arith.addi %[[NEW_I]], %[[J]] : index
-// TILE-25n0:     linalg.yield %[[SUM]] : index
-
-// TILE-0n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)>
-// TILE-0n25-LABEL: func @indexed_matrix
-// TILE-0n25: %[[C25:.*]] = arith.constant 25 : index
-// TILE-0n25: scf.for %[[L:.*]] = {{.*}} step %[[C25]]
-// TILE-0n25:   linalg.generic
-// TILE-0n25:     %[[I:.*]] = linalg.index 0 : index
-// TILE-0n25:     %[[J:.*]] = linalg.index 1 : index
-// TILE-0n25:     %[[NEW_J:.*]] = affine.apply [[$MAP]](%[[J]], %[[L]])
-// TILE-0n25:     %[[SUM:.*]] = arith.addi %[[I]], %[[NEW_J]] : index
-// TILE-0n25:     linalg.yield %[[SUM]] : index
diff --git a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
--- a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
+++ b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
@@ -1,53 +1,65 @@
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3" -cse -split-input-file | \
-// RUN: FileCheck %s -check-prefix=TILE2
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,3" -resolve-shaped-type-result-dims -cse -split-input-file | \
-// RUN: FileCheck %s -check-prefix=TILE1
-// This test only checks that tiling does not crash.
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2" -resolve-shaped-type-result-dims -cse -split-input-file
-
-//  TILE2-DAG:  #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 8)>
-//  TILE2-DAG:  #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 7)>
-//       TILE2: func @dynamic_pad_tensor(
-//  TILE2-SAME:     %[[IN:.*]]: tensor<?x?xf32>
-//   TILE2-DAG:   %[[C0:.*]] = arith.constant 0 : index
-//   TILE2-DAG:   %[[C1:.*]] = arith.constant 1 : index
-//   TILE2-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//   TILE2-DAG:   %[[C3:.*]] = arith.constant 3 : index
-//       TILE2:   %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]]
-//       TILE2:   %[[DIM0:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN0]]]
-//       TILE2:   %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]]
-//       TILE2:   %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]]
-//       TILE2:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM0]] step %[[C2]]
-//       TILE2:     scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       TILE2:       %[[SWAP_RESULT:.*]] = scf.if
-//       TILE2:         tensor.generate
-//       TILE2:       else
-//       TILE2:         %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       TILE2:         %[[PAD:.*]] = tensor.pad %[[SLICE]]
-//       TILE2:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       TILE2:   return %[[RESULT]]
-
-//   TILE1-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 7)>
-//   TILE1-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)>
-//       TILE1: func @dynamic_pad_tensor(
-//  TILE1-SAME:     %[[IN:.*]]: tensor<?x?xf32>
-//   TILE1-DAG:   %[[C0:.*]] = arith.constant 0 : index
-//   TILE1-DAG:   %[[C1:.*]] = arith.constant 1 : index
-//   TILE1-DAG:   %[[C3:.*]] = arith.constant 3 : index
-//       TILE1:   %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]]
-//       TILE1:   %[[DIM1:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN1]]]
-//       TILE1:   %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]]
-//       TILE1:   %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN0]]]
-//       TILE1:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       TILE1:     %[[SWAP_RESULT:.*]] = scf.if
-//       TILE1:       tensor.generate
-//       TILE1:     else
-//       TILE1:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       TILE1:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}]
-//       TILE1:     tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1]
-//       TILE1:   return %[[RESULT]]
-
-func.func @dynamic_pad_tensor(%input_tensor: tensor<?x?xf32>,
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -canonicalize -cse -split-input-file
+
+//  CHECK-DAG:  #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 8)>
+//  CHECK-DAG:  #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 7)>
+//       CHECK: func @dynamic_pad_tensor_3_4(
+//  CHECK-SAME:     %[[IN:.*]]: tensor<?x?xf32>
+//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
+//   CHECK-DAG:   %[[C1:.*]] = arith.constant 1 : index
+//   CHECK-DAG:   %[[C2:.*]] = arith.constant 2 : index
+//   CHECK-DAG:   %[[C3:.*]] = arith.constant 3 : index
+//   CHECK-DAG:   %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]]
+//   CHECK-DAG:   %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]]
+//   CHECK-DAG:   %[[DIM0:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN0]]]
+//   CHECK-DAG:   %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]]
+//       CHECK:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM0]] step %[[C2]]
+//       CHECK:     scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
+//       CHECK:       %[[SWAP_RESULT:.*]] = scf.if
+//       CHECK:         tensor.generate
+//       CHECK:       else
+//       CHECK:         %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:         %[[PAD:.*]] = tensor.pad %[[SLICE]]
+//       CHECK:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:   return %[[RESULT]]
+
+func.func @dynamic_pad_tensor_3_4(%input_tensor: tensor<?x?xf32>,
+                         %pad_value: f32) -> tensor<?x?xf32> {
+  %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] {
+    ^bb0(%arg1: index, %arg2: index):
+      tensor.yield %pad_value : f32
+    } : tensor<?x?xf32> to tensor<?x?xf32>
+  return %0 : tensor<?x?xf32>
+}
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["tensor.pad"]} in %arg1
+    %1, %loops:2 = transform.structured.tile_to_scf_for %0 [2, 3]
+}
+
+// -----
+
+//   CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 7)>
+//   CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)>
+//       CHECK: func @dynamic_pad_tensor_0_3(
+//  CHECK-SAME:     %[[IN:.*]]: tensor<?x?xf32>
+//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
+//   CHECK-DAG:   %[[C1:.*]] = arith.constant 1 : index
+//   CHECK-DAG:   %[[C3:.*]] = arith.constant 3 : index
+//   CHECK-DAG:   %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]]
+//   CHECK-DAG:   %[[DIM1:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN1]]]
+//   CHECK-DAG:   %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]]
+//   CHECK-DAG:   %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN0]]]
+//       CHECK:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
+//       CHECK:     %[[SWAP_RESULT:.*]] = scf.if
+//       CHECK:       tensor.generate
+//       CHECK:     else
+//       CHECK:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}]
+//       CHECK:     tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1]
+//       CHECK:   return %[[RESULT]]
+
+func.func @dynamic_pad_tensor_0_3(%input_tensor: tensor<?x?xf32>,
                          %pad_value: f32) -> tensor<?x?xf32> {
   %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] {
     ^bb0(%arg1: index, %arg2: index):
@@ -56,41 +68,64 @@
   return %0 : tensor<?x?xf32>
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["tensor.pad"]} in %arg1
+    %1, %loop = transform.structured.tile_to_scf_for %0 [0, 3]
+}
+
+// -----
+
+// CHECK-LABEL: func @static_pad_tensor_3_4(
+//  CHECK-SAME:     %[[IN:.*]]: tensor<7x9xf32>
+//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
+//   CHECK-DAG:   %[[C2:.*]] = arith.constant 2 : index
+//   CHECK-DAG:   %[[C3:.*]] = arith.constant 3 : index
+//   CHECK-DAG:   %[[C15:.*]] = arith.constant 15 : index
+//   CHECK-DAG:   %[[C16:.*]] = arith.constant 16 : index
+//       CHECK:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]]
+//       CHECK:     scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
+//       CHECK:       %[[SWAP_RESULT:.*]] = scf.if
+//       CHECK:         tensor.generate
+//       CHECK:       else
+//       CHECK:         %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:         %[[PAD:.*]] = tensor.pad %[[SLICE]]
+//       CHECK:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:   return %[[RESULT]]
+
+func.func @static_pad_tensor_3_4(%input_tensor: tensor<7x9xf32>,
+                        %pad_value: f32) -> tensor<15x16xf32> {
+  %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] {
+    ^bb0(%arg1: index, %arg2: index):
+      tensor.yield %pad_value : f32
+    } : tensor<7x9xf32> to tensor<15x16xf32>
+  return %0 : tensor<15x16xf32>
+}
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["tensor.pad"]} in %arg1
+    %1, %loops:2 = transform.structured.tile_to_scf_for %0 [2, 3]
+}
+
 // -----
 
-// TILE2-LABEL: func @static_pad_tensor(
-//  TILE2-SAME:     %[[IN:.*]]: tensor<7x9xf32>
-//   TILE2-DAG:   %[[C0:.*]] = arith.constant 0 : index
-//   TILE2-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//   TILE2-DAG:   %[[C3:.*]] = arith.constant 3 : index
-//   TILE2-DAG:   %[[C15:.*]] = arith.constant 15 : index
-//   TILE2-DAG:   %[[C16:.*]] = arith.constant 16 : index
-//       TILE2:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]]
-//       TILE2:     scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       TILE2:       %[[SWAP_RESULT:.*]] = scf.if
-//       TILE2:         tensor.generate
-//       TILE2:       else
-//       TILE2:         %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       TILE2:         %[[PAD:.*]] = tensor.pad %[[SLICE]]
-//       TILE2:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       TILE2:   return %[[RESULT]]
-
-
-// TILE1-LABEL: func @static_pad_tensor(
-//  TILE1-SAME:     %[[IN:.*]]: tensor<7x9xf32>
-//   TILE1-DAG:   %[[C0:.*]] = arith.constant 0 : index
-//   TILE1-DAG:   %[[C3:.*]] = arith.constant 3 : index
-//   TILE1-DAG:   %[[C16:.*]] = arith.constant 16 : index
-//       TILE1:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       TILE1:     %[[SWAP_RESULT:.*]] = scf.if
-//       TILE1:       tensor.generate
-//       TILE1:     else
-//       TILE1:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1]
-//       TILE1:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
-//       TILE1:     tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1]
-//       TILE1:   return %[[RESULT]]
-
-func.func @static_pad_tensor(%input_tensor: tensor<7x9xf32>,
+// CHECK-LABEL: func @static_pad_tensor_0_3(
+//  CHECK-SAME:     %[[IN:.*]]: tensor<7x9xf32>
+//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
+//   CHECK-DAG:   %[[C3:.*]] = arith.constant 3 : index
+//   CHECK-DAG:   %[[C16:.*]] = arith.constant 16 : index
+//       CHECK:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
+//       CHECK:     %[[SWAP_RESULT:.*]] = scf.if
+//       CHECK:       tensor.generate
+//       CHECK:     else
+//       CHECK:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1]
+//       CHECK:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
+//       CHECK:     %[[CAST_SWAP_RESULT:.*]] = tensor.cast %[[SWAP_RESULT]] : tensor<?x?xf32> to tensor<15x?xf32> 
+//       CHECK:     tensor.insert_slice %[[CAST_SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1]
+//       CHECK:   return %[[RESULT]]
+
+func.func @static_pad_tensor_0_3(%input_tensor: tensor<7x9xf32>,
                         %pad_value: f32) -> tensor<15x16xf32> {
   %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] {
     ^bb0(%arg1: index, %arg2: index):
@@ -99,25 +134,35 @@
   return %0 : tensor<15x16xf32>
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["tensor.pad"]} in %arg1
+    %1, %loop = transform.structured.tile_to_scf_for %0 [0, 3]
+}
+
 // -----
 
-// TILE1-LABEL: func @static_pad_tile_evenly(
-//  TILE1-SAME:     %[[IN:.*]]: tensor<7x9xf32>, %[[OUT:.*]]: tensor<14x15xf32>
-//   TILE1-DAG:   %[[C0:.*]] = arith.constant 0 : index
-//   TILE1-DAG:   %[[C3:.*]] = arith.constant 3 : index
-//   TILE1-DAG:   %[[C15:.*]] = arith.constant 15 : index
-//       TILE1:   %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C15]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       TILE1:     %[[R2:.*]] = scf.if
-//       TILE1:       %[[GEN:.*]] = tensor.generate
-//       TILE1:       scf.yield %[[GEN]] : tensor<14x3xf32>
-//       TILE1:     else
-//       TILE1:       %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32>
-//       TILE1:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}]
-//       TILE1:       scf.yield %[[PAD]] : tensor<14x3xf32>
-//       TILE1:     %[[R3:.*]] = tensor.insert_slice %[[R2]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32>
-//       TILE1:     scf.yield %[[R3]] : tensor<14x15xf32>
-//       TILE1:   return %[[RESULT]] : tensor<14x15xf32>
-func.func @static_pad_tile_evenly(%input_tensor: tensor<7x9xf32>,
+// CHECK-LABEL: func @static_pad_tile_evenly_0_3(
+//  CHECK-SAME:     %[[IN:.*]]: tensor<7x9xf32>, %[[OUT:.*]]: tensor<14x15xf32>
+//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
+//   CHECK-DAG:   %[[C3:.*]] = arith.constant 3 : index
+//   CHECK-DAG:   %[[C15:.*]] = arith.constant 15 : index
+//       CHECK:   %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C15]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
+//       CHECK:     %[[R2:.*]] = scf.if
+//       CHECK:       %[[GEN:.*]] = tensor.generate
+//       CHECK:       %[[cast_0:.*]] = tensor.cast %[[GEN]] : tensor<14x3xf32> to tensor<?x?xf32>
+//       CHECK:       scf.yield %[[cast_0]] : tensor<?x?xf32>
+//       CHECK:     else
+//       CHECK:       %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32>
+//       CHECK:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}]
+//       CHECK:       %[[cast_1:.*]] = tensor.cast %[[PAD]] : tensor<14x?xf32> to tensor<?x?xf32>
+//       CHECK:       scf.yield %[[cast_1]] : tensor<?x?xf32>
+//       CHECK:     %[[cast:.*]] = tensor.cast %[[R2]] : tensor<?x?xf32> to tensor<14x3xf32>
+//       CHECK:     %[[R3:.*]] = tensor.insert_slice %[[cast]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32>
+//       CHECK:     scf.yield %[[R3]] : tensor<14x15xf32>
+//       CHECK:   return %[[RESULT]] : tensor<14x15xf32>
+
+func.func @static_pad_tile_evenly_0_3(%input_tensor: tensor<7x9xf32>,
                              %output_tensor: tensor<14x15xf32>,
                              %pad_value: f32) -> tensor<14x15xf32> {
   %0 = tensor.pad %input_tensor low[0, 0] high[7, 6] {
@@ -126,3 +171,9 @@
     } : tensor<7x9xf32> to tensor<14x15xf32>
   return %0 : tensor<14x15xf32>
 }
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["tensor.pad"]} in %arg1
+    %1, %loop = transform.structured.tile_to_scf_for %0 [0, 3]
+}
diff --git a/mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir b/mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir
deleted file mode 100644
--- a/mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir
+++ /dev/null
@@ -1,113 +0,0 @@
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,4,8 loop-type=parallel" -split-input-file | FileCheck %s
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2 loop-type=parallel" -split-input-file | FileCheck %s -check-prefix=TILE1
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,4 loop-type=parallel" -split-input-file | FileCheck %s -check-prefix=TILE2
-
-func.func @gemm(%arg0 : memref<?x?xf32>,
-           %arg1 : memref<?x?xf32>,
-           %arg2 : memref<?x?xf32>)
-{
-  linalg.matmul ins(%arg0, %arg1: memref<?x?xf32>, memref<?x?xf32>)
-               outs(%arg2: memref<?x?xf32>)
-  return
-}
-// CHECK-LABEL: func @gemm
-//   CHECK-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//   CHECK-DAG:   %[[C4:.*]] = arith.constant 4 : index
-//   CHECK-DAG:   %[[C8:.*]] = arith.constant 8 : index
-//       CHECK:   scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) =
-//  CHECK-SAME:     step (%[[C2]], %[[C4]])
-//       CHECK:     scf.for %[[ARG5:.*]] =
-//  CHECK-SAME:       step %[[C8]]
-//       CHECK:       %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG5]]]
-//       CHECK:       %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG5]], %[[ARG4]]]
-//       CHECK:       %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]]]
-//       CHECK:       linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
-
-// TILE1-LABEL: func @gemm
-//   TILE1-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//       TILE1:   scf.parallel (%[[ARG3:.*]]) =
-//  TILE1-SAME:     step (%[[C2]])
-//       TILE1:     %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0]
-//       TILE1:     %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0]
-//   TILE1-NOT:     memref.subview
-//       TILE1:     linalg.matmul ins(%[[SV1]], %{{.*}} outs(%[[SV3]]
-
-// TILE2-LABEL: func @gemm
-//   TILE2-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//   TILE2-DAG:   %[[C4:.*]] = arith.constant 4 : index
-//       TILE2:   scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) =
-//  TILE2-SAME:     step (%[[C2]], %[[C4]])
-//       TILE2:       %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0]
-//       TILE2:       %[[SV2:.*]] = memref.subview %{{.*}}[0, %[[ARG4]]]
-//       TILE2:       %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]]]
-//       TILE2:       linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
-
-// -----
-
-#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
-#map1 = affine_map<(d0, d1, d2) -> (d0, d2)>
-#map2 = affine_map<(d0, d1, d2) -> (d1)>
-#accesses = [#map0, #map1, #map2]
-#trait = {
-  args_in = 2 : i64,
-  args_out = 1 : i64,
-  iterator_types = ["reduction", "parallel", "reduction"],
-  indexing_maps = #accesses
-}
-
-func.func @reduction(%arg0 : memref<?x?x?xf32>,
-                %arg1 : memref<?x?xf32>,
-                %arg2 : memref<?xf32>)
-{
-  linalg.generic #trait
-    ins(%arg0, %arg1 : memref<?x?x?xf32>, memref<?x?xf32>)
-   outs(%arg2 : memref<?xf32>) {
-  ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
-    %0 = arith.addf %arg3, %arg4 : f32
-    %1 = arith.addf %0, %arg5 : f32
-    linalg.yield %1 : f32
-  }
-  return
-}
-
-// CHECK-LABEL: func @reduction
-//   CHECK-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//   CHECK-DAG:   %[[C4:.*]] = arith.constant 4 : index
-//   CHECK-DAG:   %[[C8:.*]] = arith.constant 8 : index
-//       CHECK:   scf.for %[[ARG3:.*]] =
-//  CHECK-SAME:     step %[[C2]]
-//       CHECK:     scf.parallel (%[[ARG4:.*]]) =
-//  CHECK-SAME:       step (%[[C4]])
-//       CHECK:       scf.for %[[ARG5:.*]] =
-//  CHECK-SAME:         step %[[C8]]
-//       CHECK:         %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]], %[[ARG5]]]
-//       CHECK:         %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG5]]]
-//       CHECK:         %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG4]]]
-//       CHECK:         linalg.generic
-//  CHECK-SAME:           ins(%[[SV1]], %[[SV2]]
-//  CHECK-SAME:          outs(%[[SV3]]
-
-// TILE1-LABEL: func @reduction
-//   TILE1-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//       TILE1:   scf.for %[[ARG3:.*]] =
-//  TILE1-SAME:     step %[[C2]]
-//       TILE1:         %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0, 0]
-//       TILE1:         %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0]
-//   TILE1-NOT:         memref.subview
-//       TILE1:         linalg.generic
-//  TILE1-SAME:           ins(%[[SV1]], %[[SV2]]
-//  TILE1-SAME:          outs(%{{.*}}
-
-// TILE2-LABEL: func @reduction
-//   TILE2-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//   TILE2-DAG:   %[[C4:.*]] = arith.constant 4 : index
-//       TILE2:   scf.for %[[ARG3:.*]] =
-//  TILE2-SAME:     step %[[C2]]
-//       TILE2:     scf.parallel (%[[ARG4:.*]]) =
-//  TILE2-SAME:       step (%[[C4]])
-//       TILE2:         %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]], 0]
-//       TILE2:         %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0]
-//       TILE2:         %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG4]]]
-//       TILE2:         linalg.generic
-//  TILE2-SAME:           ins(%[[SV1]], %[[SV2]]
-//  TILE2-SAME:          outs(%[[SV3]]
diff --git a/mlir/test/Dialect/Linalg/tile-parallel.mlir b/mlir/test/Dialect/Linalg/tile-parallel.mlir
deleted file mode 100644
--- a/mlir/test/Dialect/Linalg/tile-parallel.mlir
+++ /dev/null
@@ -1,68 +0,0 @@
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2 loop-type=parallel" | FileCheck %s -check-prefix=TILE-2
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,2 loop-type=parallel" | FileCheck %s -check-prefix=TILE-02
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,0,2 loop-type=parallel" | FileCheck %s -check-prefix=TILE-002
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4 loop-type=parallel" | FileCheck %s -check-prefix=TILE-234
-
-#id_2d = affine_map<(i, j) -> (i, j)>
-#pointwise_2d_trait = {
-  args_in = 2,
-  args_out = 1,
-  indexing_maps = [#id_2d, #id_2d, #id_2d],
-  iterator_types = ["parallel", "parallel"]
-}
-
-func.func @sum(%lhs: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-          %rhs: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-          %sum: memref<?x?xf32, strided<[?, 1], offset: ?>>) {
-  linalg.generic #pointwise_2d_trait
-     ins(%lhs, %rhs: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-                     memref<?x?xf32, strided<[?, 1], offset: ?>>)
-    outs(%sum : memref<?x?xf32, strided<[?, 1], offset: ?>>) {
-  ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32):
-    %result = arith.addf %lhs_in, %rhs_in : f32
-    linalg.yield %result : f32
-  }
-  return
-}
-// TILE-2-LABEL: func @sum(
-// TILE-2-SAME:    [[LHS:%.*]]: memref{{.*}}, [[RHS:%.*]]: memref{{.*}}, [[SUM:%.*]]: memref{{.*}}) {
-// TILE-2-DAG: [[C0:%.*]] = arith.constant 0 : index
-// TILE-2-DAG: [[C2:%.*]] = arith.constant 2 : index
-// TILE-2: [[LHS_ROWS:%.*]] = memref.dim [[LHS]], %c0
-// TILE-2: scf.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_ROWS]]) step ([[C2]]) {
-// TILE-2-NO: scf.parallel
-// TILE-2:   [[LHS_SUBVIEW:%.*]] = memref.subview [[LHS]]
-// TILE-2:   [[RHS_SUBVIEW:%.*]] = memref.subview [[RHS]]
-// TILE-2:   [[SUM_SUBVIEW:%.*]] = memref.subview [[SUM]]
-// TILE-2:   linalg.generic {{.*}} ins([[LHS_SUBVIEW]], [[RHS_SUBVIEW]]{{.*}} outs([[SUM_SUBVIEW]]
-
-// TILE-02-LABEL: func @sum(
-// TILE-02-SAME:    [[LHS:%.*]]: memref{{.*}}, [[RHS:%.*]]: memref{{.*}}, [[SUM:%.*]]: memref{{.*}}) {
-// TILE-02-DAG: [[C0:%.*]] = arith.constant 0 : index
-// TILE-02-DAG: [[C2:%.*]] = arith.constant 2 : index
-// TILE-02: [[LHS_COLS:%.*]] = memref.dim [[LHS]], %c1
-// TILE-02: scf.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_COLS]]) step ([[C2]]) {
-// TILE-02-NO: scf.parallel
-// TILE-02:   [[LHS_SUBVIEW:%.*]] = memref.subview [[LHS]]
-// TILE-02:   [[RHS_SUBVIEW:%.*]] = memref.subview [[RHS]]
-// TILE-02:   [[SUM_SUBVIEW:%.*]] = memref.subview [[SUM]]
-// TILE-02:    linalg.generic {{.*}} ins([[LHS_SUBVIEW]], [[RHS_SUBVIEW]]{{.*}} outs([[SUM_SUBVIEW]]
-
-// TILE-002-LABEL: func @sum(
-// TILE-002-SAME:    [[LHS:%.*]]: memref{{.*}}, [[RHS:%.*]]: memref{{.*}}, [[SUM:%.*]]: memref{{.*}}) {
-// TILE-002-NO: scf.parallel
-// TILE-002:   linalg.generic {{.*}} ins([[LHS]], [[RHS]]{{.*}} outs([[SUM]]
-
-// TILE-234-LABEL: func @sum(
-// TILE-234-SAME:    [[LHS:%.*]]: memref{{.*}}, [[RHS:%.*]]: memref{{.*}}, [[SUM:%.*]]: memref{{.*}}) {
-// TILE-234-DAG: [[C0:%.*]] = arith.constant 0 : index
-// TILE-234-DAG: [[C2:%.*]] = arith.constant 2 : index
-// TILE-234-DAG: [[C3:%.*]] = arith.constant 3 : index
-// TILE-234: [[LHS_ROWS:%.*]] = memref.dim [[LHS]], %c0
-// TILE-234: [[LHS_COLS:%.*]] = memref.dim [[LHS]], %c1
-// TILE-234: scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]]) to ([[LHS_ROWS]], [[LHS_COLS]]) step ([[C2]], [[C3]]) {
-// TILE-234-NO: scf.parallel
-// TILE-234:   [[LHS_SUBVIEW:%.*]] = memref.subview [[LHS]]
-// TILE-234:   [[RHS_SUBVIEW:%.*]] = memref.subview [[RHS]]
-// TILE-234:   [[SUM_SUBVIEW:%.*]] = memref.subview [[SUM]]
-// TILE-234:   linalg.generic {{.*}} ins([[LHS_SUBVIEW]], [[RHS_SUBVIEW]]{{.*}} outs([[SUM_SUBVIEW]]
diff --git a/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir b/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir
deleted file mode 100644
--- a/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir
+++ /dev/null
@@ -1,74 +0,0 @@
-// RUN: mlir-opt %s -test-linalg-transform-patterns="test-tile-scalarize-dynamic-dims" -scf-for-loop-canonicalization -canonicalize -split-input-file | \
-// RUN:     FileCheck %s
-
-// CHECK-LABEL: func @matmul_partly_dynamic_tensor(
-//  CHECK-SAME:     %[[ARG0:.*]]: tensor<?x?xf32>, %[[ARG1:.*]]: tensor<?x2000xf32>
-//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
-//   CHECK-DAG:   %[[C1:.*]] = arith.constant 1 : index
-//       CHECK:   tensor.dim %[[ARG0]], %[[C0]] : tensor<?x?xf32>
-//       CHECK:   %[[UB1:.*]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<?x?xf32>
-//       CHECK:   %[[UB2:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor<?x?xf32>
-//       CHECK:   scf.for %[[IV0:.*]] = %[[C0]] to %[[UB1]] step %[[C1]]
-//       CHECK:     scf.for %[[IV1:.*]] = %[[C0]] to %[[UB2]] step %[[C1]]
-//       CHECK:       %[[S1:.*]] = tensor.extract_slice %[[ARG0]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] : tensor<?x?xf32> to tensor<1x1xf32>
-//       CHECK:       %[[S2:.*]] = tensor.extract_slice %[[ARG1]][%[[IV1]], 0] [1, 2000] [1, 1] : tensor<?x2000xf32> to tensor<1x2000xf32>
-//       CHECK:       %[[S3:.*]] = tensor.extract_slice %{{.*}}[%[[IV0]], 0] [1, 2000] [1, 1] : tensor<?x2000xf32> to tensor<1x2000xf32>
-//       CHECK:       linalg.matmul ins(%[[S1]], %[[S2]] : tensor<1x1xf32>, tensor<1x2000xf32>) outs(%[[S3]] : tensor<1x2000xf32>) -> tensor<1x2000xf32>
-func.func @matmul_partly_dynamic_tensor(%arg0: tensor<?x?xf32>, %arg1: tensor<?x2000xf32>)
-    -> tensor<?x2000xf32> {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
-  %out = tensor.empty(%d0) : tensor<?x2000xf32>
-  %r = linalg.matmul {__internal_linalg_transform__ = "tile"}
-      ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x2000xf32>)
-      outs(%out: tensor<?x2000xf32>) -> tensor<?x2000xf32>
-  return %r : tensor<?x2000xf32>
-}
-
-// -----
-
-// The input IR of this test case is a tiled and peeled linalg.matmul op.
-
-// CHECK-LABEL: func @tiled_and_peeled_matmul(
-//       CHECK:   linalg.matmul ins({{.*}} : tensor<32x259xf32>, tensor<259x258xf32>) outs({{.*}} : tensor<32x258xf32>) -> tensor<32x258xf32>
-//       CHECK:   linalg.matmul ins({{.*}} : tensor<1x259xf32>, tensor<259x258xf32>) outs({{.*}} : tensor<1x258xf32>) -> tensor<1x258xf32>
-#map0 = affine_map<(d0) -> (64, -d0 + 257)>
-#map1 = affine_map<()[s0] -> ((s0 floordiv 32) * 32)>
-#map2 = affine_map<(d0)[s0] -> (d0 - (s0 floordiv 32) * 32)>
-
-func.func @tiled_and_peeled_matmul(%arg0: tensor<257x259xf32>, %arg1: tensor<259x258xf32>, %arg2: tensor<257x258xf32>) -> tensor<257x258xf32> {
-  %c257 = arith.constant 257 : index
-  %c64 = arith.constant 64 : index
-  %cst = arith.constant 0.000000e+00 : f32
-  %c0 = arith.constant 0 : index
-  %c32 = arith.constant 32 : index
-  %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<257x258xf32>) -> tensor<257x258xf32>
-  %1 = scf.for %arg3 = %c0 to %c257 step %c64 iter_args(%arg4 = %0) -> (tensor<257x258xf32>) {
-    %2 = affine.min #map0(%arg3)
-    %3 = tensor.extract_slice %arg0[%arg3, 0] [%2, 259] [1, 1] : tensor<257x259xf32> to tensor<?x259xf32>
-    %4 = tensor.extract_slice %arg4[%arg3, 0] [%2, 258] [1, 1] : tensor<257x258xf32> to tensor<?x258xf32>
-    %5 = affine.apply #map1()[%2]
-    %6 = scf.for %arg5 = %c0 to %5 step %c32 iter_args(%arg6 = %4) -> (tensor<?x258xf32>) {
-      %10 = tensor.extract_slice %3[%arg5, 0] [32, 259] [1, 1] : tensor<?x259xf32> to tensor<32x259xf32>
-      %11 = tensor.extract_slice %arg6[%arg5, 0] [32, 258] [1, 1] : tensor<?x258xf32> to tensor<32x258xf32>
-      %12 = linalg.matmul {__internal_linalg_transform__ = "tile"} ins(%10, %arg1 : tensor<32x259xf32>, tensor<259x258xf32>) outs(%11 : tensor<32x258xf32>) -> tensor<32x258xf32>
-      %13 = tensor.insert_slice %12 into %arg6[%arg5, 0] [32, 258] [1, 1] : tensor<32x258xf32> into tensor<?x258xf32>
-      scf.yield %13 : tensor<?x258xf32>
-    }
-    %7 = arith.cmpi slt, %5, %2 : index
-    %8 = scf.if %7 -> (tensor<?x258xf32>) {
-      %10 = affine.apply #map2(%2)[%2]
-      %11 = tensor.extract_slice %3[%5, 0] [%10, 259] [1, 1] : tensor<?x259xf32> to tensor<?x259xf32>
-      %12 = tensor.extract_slice %6[%5, 0] [%10, 258] [1, 1] : tensor<?x258xf32> to tensor<?x258xf32>
-      %13 = linalg.matmul {__internal_linalg_transform__ = "tile"} ins(%11, %arg1 : tensor<?x259xf32>, tensor<259x258xf32>) outs(%12 : tensor<?x258xf32>) -> tensor<?x258xf32>
-      %14 = tensor.insert_slice %13 into %6[%5, 0] [%10, 258] [1, 1] : tensor<?x258xf32> into tensor<?x258xf32>
-      scf.yield %14 : tensor<?x258xf32>
-    } else {
-      scf.yield %6 : tensor<?x258xf32>
-    }
-    %9 = tensor.insert_slice %8 into %arg4[%arg3, 0] [%2, 258] [1, 1] : tensor<?x258xf32> into tensor<257x258xf32>
-    scf.yield %9 : tensor<257x258xf32>
-  }
-  return %1 : tensor<257x258xf32>
-}
diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir
--- a/mlir/test/Dialect/Linalg/tile-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -split-input-file | FileCheck %s
 
 // CHECK-LABEL: func @matmul_tensors(
 // CHECK-SAME:    %[[TA:[0-9a-z]+]]: tensor<?x?xf32>
@@ -27,6 +27,12 @@
   return %0 : tensor<?x?xf32>
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
+    %1, %loops:3 = transform.structured.tile %0 [2, 3, 4]
+}
+
 // -----
 
 func.func @generic_op_tensors(
@@ -52,6 +58,12 @@
   return %4 : tensor<?x?x?xf32>
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.generic"]} in %arg1
+    %1, %loops:3 = transform.structured.tile %0 [2, 3, 4]
+}
+
 // CHECK-LABEL: func @generic_op_tensors
 //  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
 //  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
@@ -117,3 +129,8 @@
   return %2 : tensor<?x42xf32>
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.generic"]} in %arg1
+    %1, %loops:3 = transform.structured.tile %0 [2, 3, 4]
+}
diff --git a/mlir/test/Dialect/Linalg/tile-zero.mlir b/mlir/test/Dialect/Linalg/tile-zero.mlir
deleted file mode 100644
--- a/mlir/test/Dialect/Linalg/tile-zero.mlir
+++ /dev/null
@@ -1,12 +0,0 @@
-// RUN: mlir-opt -test-linalg-transform-patterns=test-tile-pattern %s | FileCheck %s
-
-func.func @matmul_zero_tile(
-  %arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> {
-  %0 = linalg.matmul {__internal_linalg_transform__ = "tile"}
-      ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-// CHECK-LABEL: matmul_zero_tile
-//       CHECK:   linalg.matmul
-//   CHECK-NOT:   __internal_linalg_transform__
diff --git a/mlir/test/Dialect/Linalg/tile.mlir b/mlir/test/Dialect/Linalg/tile.mlir
deleted file mode 100644
--- a/mlir/test/Dialect/Linalg/tile.mlir
+++ /dev/null
@@ -1,331 +0,0 @@
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-2
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,2" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-02
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,0,2" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-002
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-234
-
-//   TILE-2-DAG: #[[$bound_map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
-//  TILE-02-DAG: #[[$bound_map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
-// TILE-002-DAG: #[[$bound_map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
-// TILE-234-DAG: #[[$bound_map_2:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
-// TILE-234-DAG: #[[$bound_map_3:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 3)>
-// TILE-234-DAG: #[[$bound_map_4:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
-
-func.func @matmul(%arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-             %arg1: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-             %arg2: memref<?x?xf32, strided<[?, 1], offset: ?>>) {
-  linalg.matmul
-    ins(%arg0, %arg1: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-                      memref<?x?xf32, strided<[?, 1], offset: ?>>)
-   outs(%arg2: memref<?x?xf32, strided<[?, 1], offset: ?>>)
-  return
-}
-// TILE-2-LABEL: func @matmul(
-//       TILE-2-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-2-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-2:   %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
-//       TILE-2:   %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-2:   %[[szK:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
-//       TILE-2:   %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-2:   %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[K]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-2:   %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szK]], %[[N]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-2:   linalg.matmul ins(%[[sAi]]{{.*}} outs(%[[sCi]]
-
-// TILE-02-LABEL: func @matmul(
-//       TILE-02-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-02-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-02: %[[N:.*]] = memref.dim %arg1, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
-//       TILE-02:   %[[K:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-02:   %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[N]]]
-//       TILE-02:   %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-02:   %[[szK:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[N]]]
-//       TILE-02:   %[[sBj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[K]], %[[szN]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-02:   %[[sCj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-02:   linalg.matmul ins(%{{.*}}, %[[sBj]]{{.*}} outs(%[[sCj]]
-
-// TILE-002-LABEL: func @matmul(
-//       TILE-002-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-002-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-002: %[[ubK:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
-//       TILE-002:   %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-002:   %[[szK:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[ubK]]]
-//       TILE-002:   %[[szK_1:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[ubK]]]
-//       TILE-002:   %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-002:   %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[K]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-002:   %[[sBj:.*]] = memref.subview %{{.*}}[%[[K]], 0] [%[[szK_1]], %[[N]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-002:   linalg.matmul ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}}
-
-// TILE-234-LABEL: func @matmul(
-//       TILE-234-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-234-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-234-DAG: %[[C3:.*]] = arith.constant 3 : index
-//       TILE-234-DAG: %[[C4:.*]] = arith.constant 4 : index
-//       TILE-234: %[[ubM:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-234: %[[ubK:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-234: %[[ubN:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[ubM]] step %{{.*}} {
-//       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[ubN]] step %{{.*}} {
-//       TILE-234:      scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
-//       TILE-234:        %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubM]]]
-//       TILE-234:        %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[ubK]]]
-//       TILE-234:        %[[szK_1:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[ubK]]]
-//       TILE-234:        %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]]
-//       TILE-234:        %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubM]]]
-//       TILE-234:        %[[szN_1:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]]
-//       TILE-234:        %[[sAik:.*]] = memref.subview %{{.*}}[%[[I]], %[[K]]] [%[[szM]], %[[szK]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-234:        %[[sBkj:.*]] = memref.subview %{{.*}}[%[[K]], %[[J]]] [%[[szK_1]], %[[szN]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-234:        %[[sCij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM_1]], %[[szN_1]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//
-//       TILE-234:        linalg.matmul ins(%[[sAik]], %[[sBkj]]{{.*}} outs(%[[sCij]]
-
-// When the buffer shapes are known at compile time, it is possible to avoid
-// the "min" in subview size computation. This test uses buffer sizes divisible
-// by respective tile sizes (M=10 divisble by 2, N=12 divisible by 2 and 3,
-// K=16 divisble by 2 and 4).
-func.func @matmul_static(%arg0: memref<10x16xf32, strided<[?, 1], offset: ?>>,
-                    %arg1: memref<16x12xf32, strided<[?, 1], offset: ?>>,
-                    %arg2: memref<10x12xf32, strided<[?, 1], offset: ?>>) {
-  linalg.matmul
-    ins(%arg0, %arg1: memref<10x16xf32, strided<[?, 1], offset: ?>>,
-                      memref<16x12xf32, strided<[?, 1], offset: ?>>)
-   outs(%arg2: memref<10x12xf32, strided<[?, 1], offset: ?>>)
-  return
-}
-// TILE-2-LABEL: func @matmul_static(
-//  TILE-2-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
-//  TILE-2-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
-//  TILE-2-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
-//       TILE-2-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-2-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-2-DAG: %[[M:.*]] = arith.constant 10 : index
-//       TILE-2: scf.for %[[I:.*]] = %{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-2:   %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [2, 16] [1, 1] : memref<10x16xf32, strided<[?, 1], offset: ?>> to memref<2x16xf32, strided<[?, 1], offset: ?>>
-//       TILE-2:   %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [2, 12] [1, 1] : memref<10x12xf32, strided<[?, 1], offset: ?>> to memref<2x12xf32, strided<[?, 1], offset: ?>>
-//       TILE-2:   linalg.matmul ins(%[[sAi]], %{{.*}}{{.*}} outs(%[[sCi]]
-
-// TILE-02-LABEL: func @matmul_static(
-//       TILE-02-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-02-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-02-DAG: %[[N:.*]] = arith.constant 12 : index
-//       TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
-//       TILE-02:   %[[sBj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [16, 2] [1, 1] : memref<16x12xf32, strided<[?, 1], offset: ?>> to memref<16x2xf32, strided<[?, 1], offset: ?>>
-//       TILE-02:   %[[sCj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [10, 2] [1, 1] : memref<10x12xf32, strided<[?, 1], offset: ?>> to memref<10x2xf32, strided<[?, 1], offset: ?>>
-//       TILE-02:   linalg.matmul ins(%{{.*}}, %[[sBj]]{{.*}} outs(%[[sCj]]
-
-// TILE-002-LABEL: func @matmul_static(
-//       TILE-002-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-002-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-002-DAG: %[[C16:.*]] = arith.constant 16 : index
-//       TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[C16]] step %{{.*}} {
-//       TILE-002:   %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[K]]] [10, 2] [1, 1] : memref<10x16xf32, strided<[?, 1], offset: ?>> to memref<10x2xf32, strided<[?, 1], offset: ?>>
-//       TILE-002:   %[[sBj:.*]] = memref.subview %{{.*}}[%[[K]], 0] [2, 12] [1, 1] : memref<16x12xf32, strided<[?, 1], offset: ?>> to memref<2x12xf32, strided<[?, 1], offset: ?>>
-//       TILE-002:   linalg.matmul ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}}
-
-// TILE-234-LABEL: func @matmul_static(
-//       TILE-234-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-234-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-234-DAG: %[[C3:.*]] = arith.constant 3 : index
-//       TILE-234-DAG: %[[C4:.*]] = arith.constant 4 : index
-//       TILE-234-DAG: %[[C10:.*]] = arith.constant 10 : index
-//       TILE-234-DAG: %[[C16:.*]] = arith.constant 16 : index
-//       TILE-234-DAG: %[[C12:.*]] = arith.constant 12 : index
-//       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[C10]] step %{{.*}} {
-//       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[C12]] step %{{.*}} {
-//       TILE-234:      scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[C16]] step %{{.*}} {
-//       TILE-234:        %[[sAik:.*]] = memref.subview %{{.*}}[%[[I]], %[[K]]] [2, 4] [1, 1] : memref<10x16xf32, strided<[?, 1], offset: ?>> to memref<2x4xf32, strided<[?, 1], offset: ?>>
-//       TILE-234:        %[[sBkj:.*]] = memref.subview %{{.*}}[%[[K]], %[[J]]] [4, 3] [1, 1] : memref<16x12xf32, strided<[?, 1], offset: ?>> to memref<4x3xf32, strided<[?, 1], offset: ?>>
-//       TILE-234:        %[[sCij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [2, 3] [1, 1] : memref<10x12xf32, strided<[?, 1], offset: ?>> to memref<2x3xf32, strided<[?, 1], offset: ?>>
-//
-//       TILE-234:        linalg.matmul ins(%[[sAik]], %[[sBkj]]{{.*}} outs(%[[sCij]]
-
-func.func @matvec(%arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>, %arg1: memref<?xf32, strided<[1], offset: ?>>, %arg2: memref<?xf32, strided<[1], offset: ?>>) {
-  linalg.matvec
-    ins(%arg0, %arg1: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-                      memref<?xf32, strided<[1], offset: ?>>)
-   outs(%arg2: memref<?xf32, strided<[1], offset: ?>>)
-  return
-}
-// TILE-2-LABEL: func @matvec(
-//  TILE-2-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
-//  TILE-2-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
-//  TILE-2-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
-//       TILE-2-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-2-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-2:   %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
-//       TILE-2:   %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-2:   %[[szN:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
-//       TILE-2:   %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[N]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-2:   %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szN]]] [1] : memref<?xf32, strided<[1], offset: ?>> to memref<?xf32, strided<[1], offset: ?>>
-//       TILE-2:   linalg.matvec ins(%[[sAi]], %{{.*}} outs(%[[sCi]]
-
-// TILE-02-LABEL: func @matvec(
-// TILE-02-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
-// TILE-02-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
-// TILE-02-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
-//       TILE-02-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-02-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-02: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-02: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
-//       TILE-02:   %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-02:   %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[K]]]
-//       TILE-02:   %[[szN_1:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[K]]]
-//       TILE-02:   %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szN]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-02:   %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN_1]]] [1] : memref<?xf32, strided<[1], offset: ?>> to memref<?xf32, strided<[1], offset: ?>>
-//       TILE-02:   linalg.matvec ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}}
-
-// TILE-002-LABEL: func @matvec(
-// TILE-002-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
-// TILE-002-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
-// TILE-002-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
-//   TILE-002-NOT: scf.for
-
-// TILE-234-LABEL: func @matvec(
-// TILE-234-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
-// TILE-234-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
-// TILE-234-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
-//       TILE-234-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-234-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-234-DAG: %[[C3:.*]] = arith.constant 3 : index
-//       TILE-234: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-234: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
-//       TILE-234:      %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[M]]]
-//       TILE-234:      %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[K]]]
-//       TILE-234:      %[[szN_1:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[K]]]
-//       TILE-234:      %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[M]]]
-//       TILE-234:      %[[sAij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-234:      %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN_1]]] [1] : memref<?xf32, strided<[1], offset: ?>> to memref<?xf32, strided<[1], offset: ?>>
-//       TILE-234:      %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref<?xf32, strided<[1], offset: ?>> to memref<?xf32, strided<[1], offset: ?>>
-//
-//       TILE-234:      linalg.matvec ins(%[[sAij]], %[[sBj]]{{.*}} outs(%[[sCi]]
-
-func.func @dot(%arg0: memref<?xf32, strided<[1], offset: ?>>, %arg1: memref<?xf32, strided<[1], offset: ?>>, %arg2: memref<f32>) {
-  linalg.dot
-    ins(%arg0, %arg1: memref<?xf32, strided<[1], offset: ?>>, memref<?xf32, strided<[1], offset: ?>>)
-   outs(%arg2: memref<f32>)
-  return
-}
-// TILE-2-LABEL: func @dot(
-//       TILE-2-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-2-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?xf32, strided<[1], offset: ?>>
-//       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-2:   %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
-//       TILE-2:   %[[szM_1:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
-//       TILE-2:   %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, strided<[1], offset: ?>> to memref<?xf32, strided<[1], offset: ?>>
-//       TILE-2:   %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref<?xf32, strided<[1], offset: ?>> to memref<?xf32, strided<[1], offset: ?>>
-//       TILE-2:   linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs(
-
-// TILE-02-LABEL: func @dot(
-//   TILE-02-NOT: scf.for
-
-// TILE-002-LABEL: func @dot(
-//   TILE-002-NOT: scf.for
-
-// TILE-234-LABEL: func @dot(
-//       TILE-234-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-234-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-234:  %[[ubK:.*]] = memref.dim %{{.*}}, %c0 : memref<?xf32, strided<[1], offset: ?>>
-//       TILE-234:  scf.for %[[I:.*]] = %{{.*}} to %[[ubK]] step %{{.*}} {
-//       TILE-234:    %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubK]]]
-//       TILE-234:    %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubK]]]
-//       TILE-234:    %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, strided<[1], offset: ?>> to memref<?xf32, strided<[1], offset: ?>>
-//       TILE-234:    %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref<?xf32, strided<[1], offset: ?>> to memref<?xf32, strided<[1], offset: ?>>
-//       TILE-234:    linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs(
-
-func.func @fill_static(%arg0: memref<127x99xf32>, %arg1: f32) {
-  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<127x99xf32>)
-  return
-}
-// TILE-2-LABEL: func @fill_static
-//       TILE-2:   for
-//   TILE-2-NOT:   for
-//       TILE-2:       memref.subview{{.*}} : memref<127x99xf32>
-//       TILE-2:       linalg.fill{{.*}} : memref<?x99xf32, strided<[99, 1], offset: ?>>
-
-// TILE-02-LABEL: func @fill_static
-//       TILE-02:   for
-//   TILE-02-NOT:   for
-//       TILE-02:       memref.subview{{.*}} : memref<127x99xf32>
-//       TILE-02:       linalg.fill{{.*}} : memref<127x?xf32, strided<[99, 1], offset: ?>>
-
-// TILE-002-LABEL: func @fill_static
-//   TILE-002-NOT:   for
-//       TILE-002:     linalg.fill{{.*}} : memref<127x99xf32>
-
-// TILE-234-LABEL: func @fill_static
-//       TILE-234:   for
-//       TILE-234:     for
-//   TILE-234-NOT:   for
-//       TILE-234:       memref.subview{{.*}} : memref<127x99xf32>
-//       TILE-234:       linalg.fill{{.*}} : memref<?x3xf32, strided<[99, 1], offset: ?>>
-
-
-func.func @fill(%arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>, %arg1: f32) {
-  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?x?xf32, strided<[?, 1], offset: ?>>)
-  return
-}
-// TILE-2-LABEL: func @fill
-//       TILE-2:   for
-//   TILE-2-NOT:   for
-//       TILE-2:   fill{{.*}} f32
-
-// TILE-02-LABEL: func @fill
-//       TILE-02:   for
-//   TILE-02-NOT:   for
-//       TILE-02:     fill{{.*}} f32
-
-// TILE-002-LABEL: func @fill
-//   TILE-002-NOT:   for
-//       TILE-002:     fill{{.*}} f32
-
-// TILE-234-LABEL: func @fill
-//       TILE-234:   for
-//       TILE-234:     for
-//   TILE-234-NOT:   for
-//       TILE-234:       fill{{.*}} f32
-
-#id_2d = affine_map<(i, j) -> (i, j)>
-#pointwise_2d_trait = {
-  args_in = 2,
-  args_out = 1,
-  indexing_maps = [#id_2d, #id_2d, #id_2d],
-  iterator_types = ["parallel", "parallel"]
-}
-
-func.func @pointwise(%arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>, %arg1: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-                %arg2: memref<?x?xf32, strided<[?, 1], offset: ?>>) {
-  linalg.generic #pointwise_2d_trait
-    ins(%arg0, %arg1 : memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?x?xf32, strided<[?, 1], offset: ?>>)
-    outs(%arg2 : memref<?x?xf32, strided<[?, 1], offset: ?>>) {
-  ^bb0(%arg4: f32, %arg5: f32, %arg6: f32):
-    %4 = arith.addf %arg4, %arg5 : f32
-    linalg.yield %4 : f32
-  }
-  return
-}
-// TILE-2-LABEL: func @pointwise
-//       TILE-2:   for
-//   TILE-2-NOT:   for
-//       TILE-2:   linalg.generic
-
-// TILE-02-LABEL: func @pointwise
-//       TILE-02:   for
-//   TILE-02-NOT:   for
-//       TILE-02:     linalg.generic
-
-// TILE-002-LABEL: func @pointwise
-//   TILE-002-NOT:   for
-//       TILE-002:     linalg.generic
-
-// TILE-234-LABEL: func @pointwise
-//       TILE-234:   for
-//       TILE-234:     for
-//   TILE-234-NOT:   for
-//       TILE-234:       linalg.generic
diff --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir
--- a/mlir/test/Dialect/Linalg/transform-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir
@@ -1,20 +1,22 @@
-// RUN: mlir-opt %s -test-linalg-transform-patterns=test-patterns -split-input-file -test-transform-dialect-interpreter | FileCheck %s
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-linalg-transform-patterns=test-patterns -split-input-file | FileCheck %s
 
-// Map corresponding to a 2D memory access where the stride along the last dim is known to be 1.
-// CHECK-DAG: #[[$kn:.*]] = affine_map<(d0, d1, d2) -> (d2, d1)>
-// CHECK-DAG: #[[$nm:.*]] = affine_map<(d0, d1, d2) -> (d1, d0)>
-// CHECK-DAG: #[[$km:.*]] = affine_map<(d0, d1, d2) -> (d2, d0)>
+// -----
 
 func.func @dot(%x: memref<?xf32, strided<[1], offset: ?>>,
           %y: memref<?xf32, strided<[1], offset: ?>>,
           %v: memref<f32>) {
-  linalg.dot { __internal_linalg_transform__ = "MEM" }
-    ins(%x, %y: memref<?xf32, strided<[1], offset: ?>>,
-                memref<?xf32, strided<[1], offset: ?>>)
-    outs(%v: memref<f32>)
-
+  linalg.dot ins(%x, %y: memref<?xf32, strided<[1], offset: ?>>,
+                         memref<?xf32, strided<[1], offset: ?>>)
+            outs(%v: memref<f32>)
   return
 }
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.dot"]} in %arg1
+    %1, %loop = transform.structured.tile %0 [8000]
+}
+
 // CHECK-LABEL: func @dot
 // CHECK-DAG:     %[[c0:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[c1:.*]] = arith.constant 1 : index
@@ -28,6 +30,8 @@
 // CHECK:               arith.addf
 // CHECK:               store
 
+// -----
+
 func.func @matvec(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
              %x: memref<?xf32, strided<[1], offset: ?>>,
              %y: memref<?xf32, strided<[1], offset: ?>>) {
@@ -37,25 +41,43 @@
     outs(%y: memref<?xf32, strided<[1], offset: ?>>)
   return
 }
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.matvec"]} in %arg1
+    %1, %loops:2 = transform.structured.tile %0 [5, 6]
+}
+
 // CHECK-LABEL: func @matvec
 // CHECK-DAG:     %[[c0:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[c5:.*]] = arith.constant 5 : index
 // CHECK-DAG:     %[[c6:.*]] = arith.constant 6 : index
-// CHECK:         scf.parallel {{.*}} step (%[[c5]])
+// CHECK:         scf.for {{.*}} step %[[c5]]
 // CHECK:           scf.for {{.*}} step %[[c6]]
 // CHECK:             linalg.matvec
 // CHECK:               ins({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?xf32, strided<[1], offset: ?>>)
 // CHECK:              outs({{.*}}: memref<?xf32, strided<[1], offset: ?>>)
 
+// -----
+
 func.func @matmul(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
              %B: memref<?x?xf32, strided<[?, 1], offset: ?>>,
              %C: memref<?x?xf32, strided<[?, 1], offset: ?>>) {
-  linalg.matmul { __internal_linalg_transform__ = "MEM" }
-    ins(%A, %B: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-                memref<?x?xf32, strided<[?, 1], offset: ?>>)
-    outs(%C: memref<?x?xf32, strided<[?, 1], offset: ?>>)
+  linalg.matmul ins(%A, %B: memref<?x?xf32, strided<[?, 1], offset: ?>>,
+                            memref<?x?xf32, strided<[?, 1], offset: ?>>)
+               outs(%C: memref<?x?xf32, strided<[?, 1], offset: ?>>)
   return
 }
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
+    %1, %loops:3 = transform.structured.tile %0 [2000, 3000, 4000]
+    %2, %loops_2:3 = transform.structured.tile %1 [200, 300, 400]
+    %3, %loops_3:3 = transform.structured.tile %2 [20, 30, 40]
+    %4, %loops_4:3 = transform.structured.tile %3 [2, 3, 4]
+}
+
 // CHECK-LABEL: func @matmul
 // CHECK-DAG:     %[[c0:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[c2:.*]] = arith.constant 2 : index
@@ -86,6 +108,13 @@
 // CHECK:                                   ins({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?x?xf32, strided<[?, 1], offset: ?>>)
 // CHECK:                                  outs({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>)
 
+// -----
+
+// Map corresponding to a 2D memory access where the stride along the last dim is known to be 1.
+// CHECK-DAG: #[[$kn:.*]] = affine_map<(d0, d1, d2) -> (d2, d1)>
+// CHECK-DAG: #[[$nm:.*]] = affine_map<(d0, d1, d2) -> (d1, d0)>
+// CHECK-DAG: #[[$km:.*]] = affine_map<(d0, d1, d2) -> (d2, d0)>
+
 #matmul_accesses = [
   affine_map<(m, n, k) -> (m, k)>,
   affine_map<(m, n, k) -> (k, n)>,
@@ -112,6 +141,7 @@
   }
   return
 }
+
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
   transform.sequence %arg0 failures(propagate) {
@@ -120,6 +150,7 @@
     transform.structured.interchange %0 { iterator_interchange = [1, 2, 0]}
   }
 }
+
 // CHECK-LABEL:  func @permute_generic
 // CHECK:        linalg.generic {
 // CHECK-SAME:   indexing_maps = [#[[$kn]], #[[$nm]], #[[$km]]],
@@ -129,15 +160,23 @@
 // CHECK-SAME:     memref<?x?xf32, strided<[?, 1], offset: ?>>
 // CHECK-SAME:     memref<?x?xf32, strided<[?, 1], offset: ?>>
 
+// -----
+
 func.func @matvec_perm(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
              %x: memref<?xf32, strided<[1], offset: ?>>,
              %y: memref<?xf32, strided<[1], offset: ?>>) {
-  linalg.matvec {__internal_linalg_transform__ = "__with_perm__"}
-    ins(%A, %x: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-                memref<?xf32, strided<[1], offset: ?>>)
-   outs(%y: memref<?xf32, strided<[1], offset: ?>>)
+  linalg.matvec ins(%A, %x: memref<?x?xf32, strided<[?, 1], offset: ?>>,
+                            memref<?xf32, strided<[1], offset: ?>>)
+               outs(%y: memref<?xf32, strided<[1], offset: ?>>)
   return
 }
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.matvec"]} in %arg1
+    %1, %loops:2 = transform.structured.tile %0 [5, 6] {interchange = [1, 0]}
+}
+
 // CHECK-LABEL: func @matvec_perm
 // CHECK-DAG:     %[[c0:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[c5:.*]] = arith.constant 5 : index
@@ -148,15 +187,25 @@
 // CHECK:               ins({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?xf32, strided<[1], offset: ?>>)
 // CHECK:              outs({{.*}}: memref<?xf32, strided<[1], offset: ?>>)
 
+// -----
+
 func.func @matmul_perm(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
              %B: memref<?x?xf32, strided<[?, 1], offset: ?>>,
              %C: memref<?x?xf32, strided<[?, 1], offset: ?>>) {
-  linalg.matmul {__internal_linalg_transform__ = "__with_perm__"}
-    ins(%A, %B: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-                memref<?x?xf32, strided<[?, 1], offset: ?>>)
-   outs(%C : memref<?x?xf32, strided<[?, 1], offset: ?>>)
+  linalg.matmul ins(%A, %B: memref<?x?xf32, strided<[?, 1], offset: ?>>,
+                            memref<?x?xf32, strided<[?, 1], offset: ?>>)
+               outs(%C : memref<?x?xf32, strided<[?, 1], offset: ?>>)
   return
 }
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
+    %1, %loops:3 = transform.structured.tile %0 [2000, 3000, 4000] {interchange=[1, 2, 0]}
+    %2, %loops_2:3 = transform.structured.tile %1 [200, 300, 400] {interchange=[1, 0, 2]}
+    %3, %loops_3:3 = transform.structured.tile %2 [20, 30, 40]
+}
+
 // CHECK-LABEL: func @matmul_perm
 // CHECK-DAG:     %[[c0:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[c20:.*]] = arith.constant 20 : index
@@ -180,26 +229,3 @@
 // CHECK:                                 linalg.matmul
 // CHECK:                                  ins({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?x?xf32, strided<[?, 1], offset: ?>>)
 // CHECK:                                   outs({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>)
-
-func.func @tile_permute_parallel_loop(%arg0: memref<?x?xf32>,
-                                 %arg1: memref<?x?xf32>,
-                                 %arg2: memref<?x?xf32>) {
-  linalg.matmul {__internal_linalg_transform__ = "par__with_perm__"}
-    ins(%arg0, %arg1: memref<?x?xf32>, memref<?x?xf32>)
-   outs(%arg2: memref<?x?xf32>)
-  return
-}
-// CHECK-LABEL: func @tile_permute_parallel_loop
-//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: memref<?x?xf32>
-//  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: memref<?x?xf32>
-//  CHECK-SAME:   %[[ARG2:[a-zA-Z0-9_]+]]: memref<?x?xf32>
-//   CHECK-DAG:   %[[C16:.*]] = arith.constant 16 : index
-//   CHECK-DAG:   %[[C8:.*]] = arith.constant 8 : index
-//   CHECK-DAG:   %[[C4:.*]] = arith.constant 4 : index
-//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
-//   CHECK-DAG:   %[[D0:.*]] = memref.dim %[[ARG0]], %c0
-//   CHECK-DAG:   %[[D1:.*]] = memref.dim %[[ARG0]], %c1
-//   CHECK-DAG:   %[[D2:.*]] = memref.dim %[[ARG1]], %c1
-//       CHECK:   scf.parallel (%{{.*}}) = (%[[C0]]) to (%[[D2]]) step (%[[C8]])
-//       CHECK:     scf.for %{{.*}} = %[[C0]] to %[[D1]] step %[[C4]]
-//       CHECK:       scf.parallel (%{{.*}}) = (%[[C0]]) to (%[[D0]]) step (%[[C16]])
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=4" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \
 // RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
@@ -24,6 +24,12 @@
   return
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.conv_1d"]} in %arg1
+    %1, %loop = transform.structured.tile %0 [4]
+}
+
 func.func @main() {
   %c3 = arith.constant 3 : index
   %c6 = arith.constant 6 : index
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,4" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \
 // RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
@@ -26,6 +26,12 @@
   return
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.conv_1d_nwc_wcf"]} in %arg1
+    %1, %loops:2 = transform.structured.tile %0 [2, 4]
+}
+
 func.func @main() {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,2" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \
 // RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
@@ -24,6 +24,12 @@
   return
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.conv_2d"]} in %arg1
+    %1, %loops:2 = transform.structured.tile %0 [2, 2]
+}
+
 func.func @main() {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,3,2" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \
 // RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
@@ -26,6 +26,12 @@
   return
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.conv_2d_nhwc_hwcf"]} in %arg1
+    %1, %loops:4 = transform.structured.tile %0 [2, 3, 3, 2]
+}
+
 func.func @main() {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,2,2" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \
 // RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
@@ -24,6 +24,12 @@
   return
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.conv_3d"]} in %arg1
+    %1, %loops:3 = transform.structured.tile %0 [2, 2, 2]
+}
+
 func.func @main() {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,5,5,5" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \
 // RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
@@ -26,6 +26,11 @@
   return
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.conv_3d_ndhwc_dhwcf"]} in %arg1
+    %1, %loops:3 = transform.structured.tile %0 [0, 5, 5, 5]
+}
 
 func.func @main() {
   %c0 = arith.constant 0 : index
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
@@ -1,12 +1,12 @@
 // UNSUPPORTED: asan
-// RUN: mlir-opt %s -linalg-bufferize -arith-bufferize \
+// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -linalg-bufferize -arith-bufferize \
 // RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation -convert-linalg-to-loops -convert-scf-to-cf \
 // RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s  -linalg-tile="tile-sizes=1,2,3" -linalg-bufferize \
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -linalg-bufferize \
 // RUN: -scf-bufferize -arith-bufferize -tensor-bufferize \
 // RUN: -func-bufferize \
 // RUN: -finalizing-bufferize -convert-linalg-to-loops -convert-scf-to-cf -convert-scf-to-cf \
@@ -36,4 +36,10 @@
   return
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
+    %1, %loops:3 = transform.structured.tile %0 [1, 2, 3]
+}
+
 func.func private @printMemrefF32(%ptr : tensor<*xf32>)
diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
@@ -61,10 +61,6 @@
   Option<bool> testPatterns{*this, "test-patterns",
                             llvm::cl::desc("Test a mixed set of patterns"),
                             llvm::cl::init(false)};
-  Option<bool> testTileAndDistributionOptions{
-      *this, "test-tile-and-distribute-options",
-      llvm::cl::desc("Test tile and distribute options"),
-      llvm::cl::init(false)};
   Option<bool> testVectorTransferForwardingPatterns{
       *this, "test-vector-transfer-forwarding-patterns",
       llvm::cl::desc(
@@ -75,13 +71,6 @@
       llvm::cl::desc("Test a set of patterns that rewrite a linalg contraction "
                      "in vector.contract form"),
       llvm::cl::init(false)};
-  Option<bool> testTilePattern{*this, "test-tile-pattern",
-                               llvm::cl::desc("Test tile pattern"),
-                               llvm::cl::init(false)};
-  Option<bool> testTileScalarizeDynamicDims{
-      *this, "test-tile-scalarize-dynamic-dims",
-      llvm::cl::desc("Test tiling of dynamic dims by 1"),
-      llvm::cl::init(false)};
   Option<bool> testTransformPadTensor{
       *this, "test-transform-pad-tensor",
       llvm::cl::desc("Test transform pad tensor by copying with generic ops"),
@@ -135,91 +124,12 @@
   MLIRContext *ctx = funcOp.getContext();
   RewritePatternSet patterns(ctx);
 
-  //===--------------------------------------------------------------------===//
-  // Linalg tiling patterns.
-  //===--------------------------------------------------------------------===//
-  patterns.add<LinalgTilingPattern>(
-      MatmulOp::getOperationName(), ctx,
-      LinalgTilingOptions().setTileSizes({2000, 3000, 4000}),
-      LinalgTransformationFilter(StringAttr::get(ctx, "MEM"),
-                                 StringAttr::get(ctx, "L3")));
-  patterns.add<LinalgTilingPattern>(
-      MatmulOp::getOperationName(), ctx,
-      LinalgTilingOptions().setTileSizes({200, 300, 400}),
-      LinalgTransformationFilter(StringAttr::get(ctx, "L3"),
-                                 StringAttr::get(ctx, "L2")));
-  patterns.add<LinalgTilingPattern>(
-      MatmulOp::getOperationName(), ctx,
-      LinalgTilingOptions().setTileSizes({20, 30, 40}),
-      LinalgTransformationFilter(StringAttr::get(ctx, "L2"),
-                                 StringAttr::get(ctx, "L1")));
-  patterns.add<LinalgTilingPattern>(
-      MatmulOp::getOperationName(), ctx,
-      LinalgTilingOptions().setTileSizes({2, 3, 4}),
-      LinalgTransformationFilter(StringAttr::get(ctx, "L1"),
-                                 StringAttr::get(ctx, "REG")));
-
-  patterns.add<LinalgTilingPattern>(
-      MatvecOp::getOperationName(), ctx,
-      LinalgTilingOptions().setTileSizes({5, 6}).setLoopType(
-          LinalgTilingLoopType::ParallelLoops),
-      LinalgTransformationFilter(ArrayRef<StringAttr>{},
-                                 StringAttr::get(ctx, "L1")));
-
-  patterns.add<LinalgTilingPattern>(
-      DotOp::getOperationName(), ctx, LinalgTilingOptions().setTileSizes(8000),
-      LinalgTransformationFilter(
-          ArrayRef<StringAttr>{StringAttr::get(ctx, "MEM"),
-                               StringAttr::get(ctx, "L3"),
-                               StringAttr::get(ctx, "L2")},
-          StringAttr::get(ctx, "REG")));
-
-  //===--------------------------------------------------------------------===//
-  // Linalg tiling and permutation patterns.
-  //===--------------------------------------------------------------------===//
-  patterns.add<LinalgTilingPattern>(
-      MatmulOp::getOperationName(), ctx,
-      LinalgTilingOptions()
-          .setTileSizes({2000, 3000, 4000})
-          .setInterchange({1, 2, 0}),
-      LinalgTransformationFilter(StringAttr::get(ctx, "__with_perm__"),
-                                 StringAttr::get(ctx, "L2__with_perm__")));
-  patterns.add<LinalgTilingPattern>(
-      MatmulOp::getOperationName(), ctx,
-      LinalgTilingOptions()
-          .setTileSizes({200, 300, 400})
-          .setInterchange({1, 0, 2}),
-      LinalgTransformationFilter(StringAttr::get(ctx, "L2__with_perm__"),
-                                 StringAttr::get(ctx, "L1__with_perm__")));
-  patterns.add<LinalgTilingPattern>(
-      MatmulOp::getOperationName(), ctx,
-      LinalgTilingOptions().setTileSizes({20, 30, 40}),
-      LinalgTransformationFilter(StringAttr::get(ctx, "L1__with_perm__"),
-                                 StringAttr::get(ctx, "REG__with_perm__")));
-
-  patterns.add<LinalgTilingPattern>(
-      MatvecOp::getOperationName(), ctx,
-      LinalgTilingOptions().setTileSizes({5, 6}).setInterchange({1, 0}),
-      LinalgTransformationFilter(StringAttr::get(ctx, "__with_perm__"),
-                                 StringAttr::get(ctx, "L1__with_perm__")));
-
-  patterns.add<LinalgTilingPattern>(
-      MatmulOp::getOperationName(), ctx,
-      LinalgTilingOptions()
-          .setTileSizes({16, 8, 4})
-          .setInterchange({1, 2, 0})
-          .setLoopType(LinalgTilingLoopType::ParallelLoops),
-      LinalgTransformationFilter(
-          StringAttr::get(ctx, "par__with_perm__"),
-          StringAttr::get(ctx, "after_par__with_perm__")));
-
   //===--------------------------------------------------------------------===//
   // Linalg to loops patterns.
   //===--------------------------------------------------------------------===//
   patterns.add<LinalgLoweringPattern<DotOp>>(
       ctx,
-      /*loweringType=*/LinalgLoweringType::Loops,
-      LinalgTransformationFilter(StringAttr::get(ctx, "REG")));
+      /*loweringType=*/LinalgLoweringType::Loops);
 
   //===--------------------------------------------------------------------===//
   // Linalg distribution patterns.
@@ -239,178 +149,6 @@
   });
 }
 
-template <typename IdOp, typename NProcsOp>
-static SmallVector<ProcInfo, 2>
-getGpuProcIds(OpBuilder &b, Location loc, ArrayRef<Range> parallelLoopRanges,
-              ArrayRef<linalg::DistributionMethod> distributionMethod) {
-  size_t count = std::min<size_t>(3, parallelLoopRanges.size());
-  SmallVector<ProcInfo, 2> procInfo(count);
-  Type indexType = b.getIndexType();
-  for (unsigned i = 0; i < count; ++i) {
-    gpu::Dimension dim = *gpu::symbolizeDimension(i);
-    procInfo[count - 1 - i] = {b.create<IdOp>(loc, indexType, dim),
-                               b.create<NProcsOp>(loc, indexType, dim),
-                               distributionMethod[count - 1 - i]};
-  }
-  return procInfo;
-}
-
-static void fillTileAndDistributePatterns(MLIRContext *context,
-                                          RewritePatternSet &patterns) {
-  {
-    LinalgLoopDistributionOptions cyclicNprocsEqNiters;
-    SmallVector<linalg::DistributionMethod> distributionMethod = {
-        DistributionMethod::CyclicNumProcsEqNumIters,
-        DistributionMethod::CyclicNumProcsEqNumIters};
-    cyclicNprocsEqNiters.procInfo =
-        [distributionMethod](OpBuilder &b, Location loc,
-                             ArrayRef<Range> parallelLoopRanges) {
-          return getGpuProcIds<gpu::BlockIdOp, gpu::GridDimOp>(
-              b, loc, parallelLoopRanges, distributionMethod);
-        };
-    patterns.add<LinalgTilingPattern>(
-        MatmulOp::getOperationName(), context,
-        LinalgTilingOptions()
-            .setTileSizes({8, 8, 4})
-            .setLoopType(LinalgTilingLoopType::ParallelLoops)
-            .setDistributionOptions(cyclicNprocsEqNiters),
-        LinalgTransformationFilter(
-            StringAttr::get(context, "distribute1"),
-            StringAttr::get(context, "after_distribute1")));
-  }
-
-  {
-    LinalgLoopDistributionOptions cyclicNprocsGeNiters;
-    SmallVector<linalg::DistributionMethod> distributionMethod = {
-        DistributionMethod::CyclicNumProcsGeNumIters,
-        DistributionMethod::CyclicNumProcsGeNumIters};
-    cyclicNprocsGeNiters.procInfo =
-        [distributionMethod](OpBuilder &b, Location loc,
-                             ArrayRef<Range> parallelLoopRanges) {
-          return getGpuProcIds<gpu::BlockIdOp, gpu::GridDimOp>(
-              b, loc, parallelLoopRanges, distributionMethod);
-        };
-    patterns.add<LinalgTilingPattern>(
-        MatmulOp::getOperationName(), context,
-        LinalgTilingOptions()
-            .setTileSizes({8, 8, 4})
-            .setLoopType(LinalgTilingLoopType::ParallelLoops)
-            .setDistributionOptions(cyclicNprocsGeNiters),
-        LinalgTransformationFilter(
-            StringAttr::get(context, "distribute2"),
-            StringAttr::get(context, "after_distribute2")));
-  }
-
-  {
-    LinalgLoopDistributionOptions cyclicNprocsDefault;
-    SmallVector<linalg::DistributionMethod> distributionMethod = {
-        DistributionMethod::Cyclic, DistributionMethod::Cyclic};
-    cyclicNprocsDefault.procInfo =
-        [distributionMethod](OpBuilder &b, Location loc,
-                             ArrayRef<Range> parallelLoopRanges) {
-          return getGpuProcIds<gpu::BlockIdOp, gpu::GridDimOp>(
-              b, loc, parallelLoopRanges, distributionMethod);
-        };
-    patterns.add<LinalgTilingPattern>(
-        MatmulOp::getOperationName(), context,
-        LinalgTilingOptions()
-            .setTileSizes({8, 8, 4})
-            .setLoopType(LinalgTilingLoopType::ParallelLoops)
-            .setDistributionOptions(cyclicNprocsDefault),
-        LinalgTransformationFilter(
-            StringAttr::get(context, "distribute3"),
-            StringAttr::get(context, "after_distribute3")));
-  }
-
-  {
-    LinalgLoopDistributionOptions cyclicNprocsMixed1;
-    SmallVector<linalg::DistributionMethod> distributionMethod = {
-        DistributionMethod::CyclicNumProcsEqNumIters,
-        DistributionMethod::CyclicNumProcsGeNumIters};
-    cyclicNprocsMixed1.procInfo =
-        [distributionMethod](OpBuilder &b, Location loc,
-                             ArrayRef<Range> parallelLoopRanges) {
-          return getGpuProcIds<gpu::BlockIdOp, gpu::GridDimOp>(
-              b, loc, parallelLoopRanges, distributionMethod);
-        };
-    patterns.add<LinalgTilingPattern>(
-        MatmulOp::getOperationName(), context,
-        LinalgTilingOptions()
-            .setTileSizes({8, 8, 4})
-            .setLoopType(LinalgTilingLoopType::ParallelLoops)
-            .setDistributionOptions(cyclicNprocsMixed1),
-        LinalgTransformationFilter(
-            StringAttr::get(context, "distribute4"),
-            StringAttr::get(context, "after_distribute4")));
-  }
-
-  {
-    LinalgLoopDistributionOptions cyclicNprocsMixed2;
-    SmallVector<linalg::DistributionMethod> distributionMethod = {
-        DistributionMethod::CyclicNumProcsGeNumIters,
-        DistributionMethod::Cyclic};
-    cyclicNprocsMixed2.procInfo =
-        [distributionMethod](OpBuilder &b, Location loc,
-                             ArrayRef<Range> parallelLoopRanges) {
-          return getGpuProcIds<gpu::BlockIdOp, gpu::GridDimOp>(
-              b, loc, parallelLoopRanges, distributionMethod);
-        };
-    patterns.add<LinalgTilingPattern>(
-        MatmulOp::getOperationName(), context,
-        LinalgTilingOptions()
-            .setTileSizes({8, 8, 4})
-            .setLoopType(LinalgTilingLoopType::ParallelLoops)
-            .setDistributionOptions(cyclicNprocsMixed2),
-        LinalgTransformationFilter(
-            StringAttr::get(context, "distribute5"),
-            StringAttr::get(context, "after_distribute5")));
-  }
-
-  {
-    LinalgLoopDistributionOptions cyclicNprocsMixed3;
-    SmallVector<linalg::DistributionMethod> distributionMethod = {
-        DistributionMethod::Cyclic,
-        DistributionMethod::CyclicNumProcsEqNumIters};
-    cyclicNprocsMixed3.procInfo =
-        [distributionMethod](OpBuilder &b, Location loc,
-                             ArrayRef<Range> parallelLoopRanges) {
-          return getGpuProcIds<gpu::BlockIdOp, gpu::GridDimOp>(
-              b, loc, parallelLoopRanges, distributionMethod);
-        };
-
-    patterns.add<LinalgTilingPattern>(
-        MatmulOp::getOperationName(), context,
-        LinalgTilingOptions()
-            .setTileSizes({8, 8, 4})
-            .setLoopType(LinalgTilingLoopType::ParallelLoops)
-            .setDistributionOptions(cyclicNprocsMixed3),
-        LinalgTransformationFilter(
-            StringAttr::get(context, "distribute6"),
-            StringAttr::get(context, "after_distribute6")));
-  }
-
-  {
-    LinalgLoopDistributionOptions cyclicNprocsEqNiters;
-    SmallVector<linalg::DistributionMethod> distributionMethod = {
-        DistributionMethod::Cyclic, DistributionMethod::Cyclic};
-    cyclicNprocsEqNiters.procInfo =
-        [distributionMethod](OpBuilder &b, Location loc,
-                             ArrayRef<Range> parallelLoopRanges) {
-          return getGpuProcIds<gpu::BlockIdOp, gpu::GridDimOp>(
-              b, loc, parallelLoopRanges, distributionMethod);
-        };
-    patterns.add<LinalgTilingPattern>(
-        MatmulOp::getOperationName(), context,
-        LinalgTilingOptions()
-            .setTileSizes({8, 8, 4})
-            .setLoopType(LinalgTilingLoopType::Loops)
-            .setDistributionOptions(cyclicNprocsEqNiters),
-        LinalgTransformationFilter(
-            StringAttr::get(context, "tensors_distribute1"),
-            StringAttr::get(context, "tensors_after_distribute1")));
-  }
-}
-
 static void applyVectorTransferForwardingPatterns(func::FuncOp funcOp) {
   RewritePatternSet forwardPattern(funcOp.getContext());
   forwardPattern.add<LinalgCopyVTRForwardingPattern>(funcOp.getContext());
@@ -445,33 +183,6 @@
   (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
 }
 
-static void applyTilePattern(func::FuncOp funcOp, const std::string &loopType,
-                             ArrayRef<int64_t> tileSizes,
-                             ArrayRef<int64_t> peeledLoops,
-                             bool scalarizeDynamicDims) {
-  MLIRContext *context = funcOp.getContext();
-  RewritePatternSet tilingPattern(context);
-  LinalgTilingLoopType type =
-      llvm::StringSwitch<LinalgTilingLoopType>(loopType)
-          .Case("for", LinalgTilingLoopType::Loops)
-          .Case("affine", LinalgTilingLoopType::AffineLoops)
-          .Case("parallel", LinalgTilingLoopType::ParallelLoops);
-  auto linalgTilingOptions = linalg::LinalgTilingOptions()
-                                 .setPeeledLoops(peeledLoops)
-                                 .setLoopType(type);
-  if (scalarizeDynamicDims) {
-    linalgTilingOptions.scalarizeDynamicDims();
-    assert(tileSizes.empty() &&
-           "tileSizes and scalarizeDynamicDims is mutually exclusive");
-  } else {
-    linalgTilingOptions.setTileSizes(tileSizes);
-  }
-  linalg::LinalgTransformationFilter f(StringAttr::get(context, "tile"));
-  TilingPatterns<linalg::MatmulOp, linalg::GenericOp>::insert(
-      tilingPattern, linalgTilingOptions, f);
-  (void)applyPatternsAndFoldGreedily(funcOp, std::move(tilingPattern));
-}
-
 static void applySplitReduction(func::FuncOp funcOp) {
   RewritePatternSet patterns(funcOp.getContext());
   linalg::populateSplitReductionPattern(
@@ -521,12 +232,6 @@
   };
   std::unique_ptr<void, decltype(lambda)> cleanupGuard{(void *)1, lambda};
 
-  if (testTileAndDistributionOptions) {
-    RewritePatternSet patterns(&getContext());
-    fillTileAndDistributePatterns(&getContext(), patterns);
-    (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
-    return;
-  }
   if (testPatterns)
     return applyPatterns(getOperation());
   if (testVectorTransferForwardingPatterns)
@@ -539,12 +244,6 @@
     return applyGeneralizePadTensorPatterns(getOperation());
   if (testSwapSubTensorPadTensor)
     return applyExtractSliceOfPadTensorSwapPattern(getOperation());
-  if (testTilePattern)
-    return applyTilePattern(getOperation(), loopType, tileSizes, peeledLoops,
-                            /*scalarizeDynamicDims=*/false);
-  if (testTileScalarizeDynamicDims)
-    return applyTilePattern(getOperation(), loopType, tileSizes,
-                            /*peeledLoops=*/{}, /*scalarizeDynamicDims=*/true);
   if (testSplitReduction)
     return applySplitReduction(getOperation());
   if (testSplitReductionInnerParallel)
diff --git a/mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp b/mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp
--- a/mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp
+++ b/mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp
@@ -57,10 +57,39 @@
       llvm::cl::desc("perform expensive checks to better report errors in the "
                      "transform IR")};
 };
+
+struct TestTransformDialectEraseSchedulePass
+    : public PassWrapper<TestTransformDialectEraseSchedulePass,
+                         OperationPass<ModuleOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(
+      TestTransformDialectEraseSchedulePass)
+
+  StringRef getArgument() const final {
+    return "test-transform-dialect-erase-schedule";
+  }
+
+  StringRef getDescription() const final {
+    return "erase transform dialect schedule from the IR";
+  }
+
+  void runOnOperation() override {
+    getOperation()->walk<WalkOrder::PreOrder>([&](Operation *nestedOp) {
+      if (isa<::mlir::transform::TransformOpInterface>(nestedOp)) {
+        nestedOp->erase();
+        return WalkResult::skip();
+      }
+      return WalkResult::advance();
+    });
+  }
+};
 } // namespace
 
 namespace mlir {
 namespace test {
+/// Registers the test pass for erasing transform dialect ops.
+void registerTestTransformDialectEraseSchedulePass() {
+  PassRegistration<TestTransformDialectEraseSchedulePass> reg;
+}
 /// Registers the test pass for applying transform dialect ops.
 void registerTestTransformDialectInterpreterPass() {
   PassRegistration<TestTransformDialectInterpreterPass> reg;
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -114,6 +114,7 @@
 void registerTestTensorTransforms();
 void registerTestTilingInterface();
 void registerTestTopologicalSortAnalysisPass();
+void registerTestTransformDialectEraseSchedulePass();
 void registerTestTransformDialectInterpreterPass();
 void registerTestVectorLowerings();
 void registerTestNvgpuLowerings();
@@ -214,6 +215,7 @@
   mlir::test::registerTestTensorTransforms();
   mlir::test::registerTestTilingInterface();
   mlir::test::registerTestTopologicalSortAnalysisPass();
+  mlir::test::registerTestTransformDialectEraseSchedulePass();
   mlir::test::registerTestTransformDialectInterpreterPass();
   mlir::test::registerTestVectorLowerings();
   mlir::test::registerTestNvgpuLowerings();