diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h
--- a/mlir/include/mlir/Dialect/Linalg/Passes.h
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.h
@@ -26,6 +26,9 @@
 std::unique_ptr<OperationPass<FuncOp>>
 createLinalgTilingPass(ArrayRef<int64_t> tileSizes = {});
 
+std::unique_ptr<OperationPass<FuncOp>>
+createLinalgPadTensorTilingPass(ArrayRef<int64_t> tileSizes = {});
+
 std::unique_ptr<OperationPass<FuncOp>>
 createLinalgTilingToParallelLoopsPass(ArrayRef<int64_t> tileSizes = {});
 
diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td
--- a/mlir/include/mlir/Dialect/Linalg/Passes.td
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.td
@@ -171,6 +171,21 @@
   ];
 }
 
+def LinalgPadTensorOpTiling : FunctionPass<"linalg-tile-pad-tensor-ops"> {
+  let summary = "Tile linalg.pad_tensor operations";
+  let constructor = "mlir::createLinalgPadTensorTilingPass()";
+  let dependentDialects = [
+    "AffineDialect",
+    "linalg::LinalgDialect",
+    "memref::MemRefDialect",
+    "scf::SCFDialect"
+  ];
+  let options = [
+    ListOption<"tileSizes", "linalg-tile-sizes", "int64_t", "Tile sizes",
+               "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated">
+  ];
+}
+
 def LinalgTilingToParallelLoops
     : FunctionPass<"linalg-tile-to-parallel-loops"> {
   let summary = "Tile operations in the linalg dialect to parallel loops";
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -42,6 +42,10 @@
   inVec = auxVec;
 }
 
+/// Given a value, try to extract a constant integer as an Attribute.
+/// If this fails, return the original value.
+OpFoldResult asOpFoldResult(OpBuilder &builder, Value val);
+
 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on
 /// the type of `source`.
 Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim);
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -152,6 +152,18 @@
   }
 }
 
+// Insert a tile `source` into the destination tensor `dest`. The position at
+// which the tile is inserted (as well as size of tile) is taken from a given
+// ExtractSliceOp `sliceOp`.
+static Value writeTileToTensor(OpBuilder &b, Location loc,
+                               tensor::ExtractSliceOp sliceOp, Value source,
+                               Value dest) {
+  return b.create<tensor::InsertSliceOp>(
+      loc, sliceOp.source().getType(), source, dest, sliceOp.offsets(),
+      sliceOp.sizes(), sliceOp.strides(), sliceOp.static_offsets(),
+      sliceOp.static_sizes(), sliceOp.static_strides());
+}
+
 template <typename LoopTy>
 static Optional<TiledLinalgOp>
 tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ValueRange tileSizes,
@@ -259,11 +271,8 @@
       // `tiledOperands`.
       Value outputTensor = tiledOperands[opOperand->getOperandNumber()];
       if (auto sliceOp = outputTensor.getDefiningOp<tensor::ExtractSliceOp>()) {
-        tensorResults.push_back(b.create<tensor::InsertSliceOp>(
-            loc, sliceOp.source().getType(), res->getResult(resultIdx),
-            sliceOp.source(), sliceOp.offsets(), sliceOp.sizes(),
-            sliceOp.strides(), sliceOp.static_offsets(), sliceOp.static_sizes(),
-            sliceOp.static_strides()));
+        tensorResults.push_back(writeTileToTensor(
+            b, loc, sliceOp, res->getResult(resultIdx), sliceOp.source()));
       } else {
         tensorResults.push_back(res->getResult(resultIdx));
       }
@@ -341,6 +350,74 @@
   return llvm::None;
 }
 
+namespace {
+struct PadTensorOpTilingPattern : public OpRewritePattern<PadTensorOp> {
+  PadTensorOpTilingPattern(MLIRContext *ctx, LinalgTilingOptions opt)
+      : OpRewritePattern<PadTensorOp>(ctx), options(opt),
+        filter(LinalgTransformationFilter(ArrayRef<Identifier>{},
+                                          Identifier::get("tiled", ctx))) {}
+
+  LogicalResult matchAndRewrite(PadTensorOp op,
+                                PatternRewriter &rewriter) const override {
+    // Can tile only PadTensorOp that have an output operand.
+    if (!op.output())
+      return failure();
+    if (failed(filter.checkAndNotify(rewriter, op)))
+      return failure();
+    Location loc = op.getLoc();
+    OpBuilder::InsertionGuard g(rewriter);
+    rewriter.setInsertionPoint(op);
+
+    // Clone PadTensorOp so that the existing op can be replaced more easily.
+    Operation *newPadOp = rewriter.clone(*op.getOperation());
+    // Get rank and tile sizes.
+    int64_t rank = op.getResultType().getRank();
+    SmallVector<Value> tileSizes =
+        options.tileSizeComputationFunction(rewriter, op);
+    assert(tileSizes.size() == rank);
+    // Compute lower and upper bounds of the loop nest.
+    SmallVector<Value> lbs, dims, steps;
+    for (int64_t i = 0; i < rank; ++i) {
+      if (!isZero(tileSizes[i])) {
+        lbs.push_back(rewriter.create<ConstantIndexOp>(loc, 0));
+        dims.push_back(rewriter.create<tensor::DimOp>(loc, op.output(), i));
+        steps.push_back(tileSizes[i]);
+      }
+    }
+    // Generate loop nest: One loop per dimension.
+    LoopNest loopNest = mlir::scf::buildLoopNest(
+        rewriter, loc, lbs, /*ubs=*/dims, steps, ValueRange(op.output()),
+        [&](OpBuilder &b, Location loc, ValueRange localIvs,
+            ValueRange iterArgs) -> scf::ValueVector {
+          // Compute offsets and sizes of ExtractSliceOp.
+          SmallVector<Value> offsets =
+              computeTileOffsets(b, loc, localIvs, tileSizes);
+          SmallVector<Value> sizes =
+              computeTileSizes(b, loc, localIvs, tileSizes, dims);
+          // Create ExtractSliceOp: Extract a tile from the PadTensorOp.
+          // Note: The PadTensorOp is located outside of the loop nest. It is
+          // later moved inside by ExtractSliceOfPadTensorSwapPattern.
+          auto map = AffineMap::getMultiDimIdentityMap(rank, b.getContext());
+          Value tiledOutput = makeTiledShape(b, loc, newPadOp->getResult(0),
+                                             tileSizes, map, offsets, sizes);
+          auto sliceOp = tiledOutput.getDefiningOp<tensor::ExtractSliceOp>();
+          assert(sliceOp && "expected ExtractSliceOp");
+          // Insert the tile into the output tensor.
+          Value yieldValue =
+              writeTileToTensor(b, loc, sliceOp, sliceOp, iterArgs[0]);
+          return scf::ValueVector({yieldValue});
+        });
+    // Replace all uses of the original PadTensorOp.
+    rewriter.replaceOp(op, loopNest.getResults()[0]);
+    filter.replaceLinalgTransformationFilter(rewriter, newPadOp);
+    return success();
+  }
+
+  LinalgTilingOptions options;
+  LinalgTransformationFilter filter;
+};
+} // namespace
+
 namespace {
 /// Helper classes for type list expansion.
 template <typename... OpTypes>
@@ -408,6 +485,7 @@
   memref::SubViewOp::getCanonicalizationPatterns(patterns, ctx);
   tensor::CastOp::getCanonicalizationPatterns(patterns, ctx);
   memref::ViewOp::getCanonicalizationPatterns(patterns, ctx);
+  PadTensorOp::getCanonicalizationPatterns(patterns, ctx);
   ctx->getLoadedDialect<LinalgDialect>()->getCanonicalizationPatterns(patterns);
   CanonicalizationPatternList<
 #define GET_OP_LIST
@@ -444,6 +522,26 @@
   });
 }
 
+static void applyPadTensorOpTilingToLoopPatterns(
+    LinalgTilingLoopType loopType, FuncOp funcOp, ArrayRef<int64_t> tileSizes,
+    ArrayRef<StringRef> distributionTypes = {}) {
+  auto options = LinalgTilingOptions()
+                     .setTileSizes(tileSizes)
+                     .setLoopType(loopType)
+                     .setDistributionTypes(distributionTypes);
+  MLIRContext *ctx = funcOp.getContext();
+  RewritePatternSet patterns(ctx);
+  patterns.add<PadTensorOpTilingPattern>(patterns.getContext(), options);
+  patterns.add<ExtractSliceOfPadTensorSwapPattern>(patterns.getContext());
+  (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
+  (void)applyPatternsAndFoldGreedily(
+      funcOp, getLinalgTilingCanonicalizationPatterns(ctx));
+  // Drop the marker.
+  funcOp.walk([](PadTensorOp op) {
+    op->removeAttr(LinalgTransforms::kLinalgTransformMarker);
+  });
+}
+
 namespace {
 struct LinalgTilingPass : public LinalgTilingBase<LinalgTilingPass> {
   LinalgTilingPass() = default;
@@ -455,6 +553,17 @@
   }
 };
 
+struct LinalgPadTensorOpTilingPass
+    : public LinalgPadTensorOpTilingBase<LinalgPadTensorOpTilingPass> {
+  LinalgPadTensorOpTilingPass() = default;
+  LinalgPadTensorOpTilingPass(ArrayRef<int64_t> sizes) { tileSizes = sizes; }
+
+  void runOnFunction() override {
+    applyPadTensorOpTilingToLoopPatterns(LinalgTilingLoopType::Loops,
+                                         getFunction(), tileSizes);
+  }
+};
+
 struct LinalgTilingToParallelLoopsPass
     : public LinalgTilingToParallelLoopsBase<LinalgTilingToParallelLoopsPass> {
   LinalgTilingToParallelLoopsPass() = default;
@@ -494,6 +603,11 @@
   return std::make_unique<LinalgTilingPass>(tileSizes);
 }
 
+std::unique_ptr<OperationPass<FuncOp>>
+mlir::createLinalgPadTensorTilingPass(ArrayRef<int64_t> tileSizes) {
+  return std::make_unique<LinalgPadTensorOpTilingPass>(tileSizes);
+}
+
 std::unique_ptr<OperationPass<FuncOp>>
 mlir::createLinalgTilingToParallelLoopsPass(ArrayRef<int64_t> tileSizes) {
   return std::make_unique<LinalgTilingToParallelLoopsPass>(tileSizes);
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -709,14 +709,6 @@
   return builder.create<ConstantIndexOp>(loc, *intVal);
 }
 
-/// Given a value, try to extract a constant index-type integer as an Attribute.
-/// If this fails, return the original value.
-static OpFoldResult asOpFoldResult(OpBuilder &builder, Value val) {
-  if (auto constInt = getConstantIntValue(val))
-    return builder.getIndexAttr(*constInt);
-  return val;
-}
-
 LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite(
     tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const {
   auto padOp = sliceOp.source().getDefiningOp<PadTensorOp>();
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -157,6 +157,14 @@
 namespace mlir {
 namespace linalg {
 
+/// Given a value, try to extract a constant integer as an Attribute.
+/// If this fails, return the original value.
+OpFoldResult asOpFoldResult(OpBuilder &builder, Value val) {
+  if (auto constInt = getConstantIntValue(val))
+    return builder.getIndexAttr(*constInt);
+  return val;
+}
+
 /// Helper function that creates a memref::DimOp or tensor::DimOp depending on
 /// the type of `source`.
 Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim) {
@@ -550,7 +558,7 @@
     if (!isTiled(map.getSubMap({r}), tileSizes)) {
       offsets.push_back(builder.getIndexAttr(0));
       Value dim = createOrFoldDimOp(builder, loc, valueToTile, r);
-      sizes.push_back(dim);
+      sizes.push_back(asOpFoldResult(builder, dim));
       strides.push_back(builder.getIndexAttr(1));
       LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
       continue;
diff --git a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
new file mode 100644
--- /dev/null
+++ b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
@@ -0,0 +1,94 @@
+// RUN: mlir-opt %s -linalg-tile-pad-tensor-ops="linalg-tile-sizes=2,3" -cse -split-input-file | \
+// RUN: FileCheck %s -check-prefix=TILE2
+// RUN: mlir-opt %s -linalg-tile-pad-tensor-ops="linalg-tile-sizes=0,3" -cse -split-input-file | \
+// RUN: FileCheck %s -check-prefix=TILE1
+
+// TILE2-LABEL: func @dynamic_pad_tensor(
+//  TILE2-SAME:     %[[IN:.*]]: tensor<?x?xf32>, %[[OUT:.*]]: tensor<?x?xf32>
+//   TILE2-DAG:   %[[C0:.*]] = constant 0 : index
+//   TILE2-DAG:   %[[C1:.*]] = constant 1 : index
+//   TILE2-DAG:   %[[C2:.*]] = constant 2 : index
+//   TILE2-DAG:   %[[C3:.*]] = constant 3 : index
+//       TILE2:   %[[DIM0:.*]] = tensor.dim %[[OUT]], %[[C0]]
+//       TILE2:   %[[DIM1:.*]] = tensor.dim %[[OUT]], %[[C1]]
+//       TILE2:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM0]] step %[[C2]]
+//       TILE2:     scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
+//       TILE2:       %[[SWAP_RESULT:.*]] = scf.if
+//       TILE2:         tensor.generate
+//       TILE2:       else
+//       TILE2:         %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       TILE2:         %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]]
+//       TILE2:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       TILE2:   return %[[RESULT]]
+
+// TILE1-LABEL: func @dynamic_pad_tensor(
+//  TILE1-SAME:     %[[IN:.*]]: tensor<?x?xf32>, %[[OUT:.*]]: tensor<?x?xf32>
+//   TILE1-DAG:   %[[C0:.*]] = constant 0 : index
+//   TILE1-DAG:   %[[C1:.*]] = constant 1 : index
+//   TILE1-DAG:   %[[C3:.*]] = constant 3 : index
+//       TILE1:   %[[DIM1:.*]] = tensor.dim %[[OUT]], %[[C1]]
+//       TILE1:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
+//       TILE1:     %[[DIM0:.*]] = tensor.dim %[[OUT]], %[[C0]]
+//       TILE1:     %[[SWAP_RESULT:.*]] = scf.if
+//       TILE1:       tensor.generate
+//       TILE1:     else
+//       TILE1:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       TILE1:       %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}]
+//       TILE1:     tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1]
+//       TILE1:   return %[[RESULT]]
+
+func @dynamic_pad_tensor(%input_tensor: tensor<?x?xf32>,
+                         %output_tensor: tensor<?x?xf32>,
+                         %pad_value: f32) -> tensor<?x?xf32> {
+  %0 = linalg.pad_tensor %input_tensor, %output_tensor
+    low[3, 4] high[5, 3] {
+    ^bb0(%arg1: index, %arg2: index):
+      linalg.yield %pad_value : f32
+    } : tensor<?x?xf32> to tensor<?x?xf32>
+  return %0 : tensor<?x?xf32>
+}
+
+// -----
+
+// TILE2-LABEL: func @static_pad_tensor(
+//  TILE2-SAME:     %[[IN:.*]]: tensor<7x9xf32>, %[[OUT:.*]]: tensor<15x16xf32>
+//   TILE2-DAG:   %[[C0:.*]] = constant 0 : index
+//   TILE2-DAG:   %[[C2:.*]] = constant 2 : index
+//   TILE2-DAG:   %[[C3:.*]] = constant 3 : index
+//   TILE2-DAG:   %[[C15:.*]] = constant 15 : index
+//   TILE2-DAG:   %[[C16:.*]] = constant 16 : index
+//       TILE2:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]]
+//       TILE2:     scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
+//       TILE2:       %[[SWAP_RESULT:.*]] = scf.if
+//       TILE2:         tensor.generate
+//       TILE2:       else
+//       TILE2:         %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       TILE2:         %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]]
+//       TILE2:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       TILE2:   return %[[RESULT]]
+
+
+// TILE1-LABEL: func @static_pad_tensor(
+//  TILE1-SAME:     %[[IN:.*]]: tensor<7x9xf32>, %[[OUT:.*]]: tensor<15x16xf32>
+//   TILE1-DAG:   %[[C0:.*]] = constant 0 : index
+//   TILE1-DAG:   %[[C3:.*]] = constant 3 : index
+//   TILE1-DAG:   %[[C16:.*]] = constant 16 : index
+//       TILE1:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
+//       TILE1:     %[[SWAP_RESULT:.*]] = scf.if
+//       TILE1:       tensor.generate
+//       TILE1:     else
+//       TILE1:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1]
+//       TILE1:       %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
+//       TILE1:     tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1]
+//       TILE1:   return %[[RESULT]]
+
+func @static_pad_tensor(%input_tensor: tensor<7x9xf32>,
+                        %output_tensor: tensor<15x16xf32>,
+                        %pad_value: f32) -> tensor<15x16xf32> {
+  %0 = linalg.pad_tensor %input_tensor, %output_tensor
+    low[3, 4] high[5, 3] {
+    ^bb0(%arg1: index, %arg2: index):
+      linalg.yield %pad_value : f32
+    } : tensor<7x9xf32> to tensor<15x16xf32>
+  return %0 : tensor<15x16xf32>
+}