diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
--- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
@@ -488,11 +488,6 @@
   // Compute new offsets, lengths, low padding, high padding.
   SmallVector<OpFoldResult> newOffsets, newLengths, newStrides;
   SmallVector<OpFoldResult> newLows, newHighs;
-  // Set to true if the original data source is not read at all.
-  bool hasZeroLen = false;
-  // Same as hasZeroLen, but for dynamic dimension sizes. This condition
-  // is true if the original data source turns out to be unused at runtime.
-  Value dynHasZeroLenCond;
 
   int64_t rank = padOp.getSourceType().getRank();
   for (unsigned dim = 0; dim < rank; ++dim) {
@@ -506,10 +501,12 @@
 
     // The new amount of low padding is `low - offset`. Except for the case
     // where none of the low padding is read. In that case, the new amount of
-    // low padding is zero.
+    // low padding is zero. Also, do not add more low padding than the size of
+    // result dimension.
     //
     // Optimization: If low = 0, then newLow = 0.
-    OpFoldResult newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
+    OpFoldResult newLow =
+        hasLowPad ? min(max(zero, sub(low, offset)), length) : zero;
     newLows.push_back(newLow);
 
     // Start reading the data from position `offset - low`. Since the original
@@ -554,21 +551,6 @@
     OpFoldResult newLength = sub(endLoc, newOffset);
     newLengths.push_back(newLength);
 
-    // Check if newLength is zero. In that case, no SubTensorOp should be
-    // executed.
-    if (isConstantIntValue(newLength, 0)) {
-      hasZeroLen = true;
-    } else if (!hasZeroLen) {
-      Value check = b.create<arith::CmpIOp>(
-          loc, arith::CmpIPredicate::eq,
-          getValueOrCreateConstantIndexOp(b, loc, newLength),
-          getValueOrCreateConstantIndexOp(b, loc, zero));
-      dynHasZeroLenCond =
-          dynHasZeroLenCond
-              ? b.create<arith::OrIOp>(loc, check, dynHasZeroLenCond)
-              : check;
-    }
-
     // The amount of high padding is simply the number of elements remaining,
     // so that the result has the same length as the original ExtractSliceOp.
     // As an optimization, if the original high padding is zero, then the new
@@ -595,27 +577,14 @@
     return b.create<tensor::CastOp>(loc, resultType, val);
   };
 
-  // In cases where the original data source is unused: Emit a GenerateOp and
-  // do not generate a SliceOp. (The result shape of the SliceOp would
-  // have a dimension of size 0, the semantics of which is unclear.)
-  auto createGenerateOp = [&]() {
-    // Create GenerateOp.
-    auto generateOp = b.create<tensor::GenerateOp>(
-        loc, resultType, dynDims,
-        [&](OpBuilder &builder, Location gLoc, ValueRange indices) {
-          builder.create<tensor::YieldOp>(gLoc, padValue);
-        });
-    return generateOp;
-  };
-
-  // Emit a SliceOp and a PadOp. Should not be used in cases where
-  // the result shape of the new SliceOp has a zero dimension.
+  // Emit a SliceOp and a PadOp.
   auto createPadOfExtractSlice = [&]() {
-    // Create pad(extract_slice(x)).
-    Value newSliceOp = b.create<tensor::ExtractSliceOp>(
-        loc, padOp.getSource(), newOffsets, newLengths, newStrides);
+    Value newSlice =
+        b.create<tensor::ExtractSliceOp>(loc, padOp.getSource(), newOffsets,
+                                         newLengths, newStrides)
+            .getResult();
     auto newPadOp = b.create<PadOp>(
-        loc, Type(), newSliceOp, newLows, newHighs,
+        loc, Type(), newSlice, newLows, newHighs,
         /*nofold=*/padOp.getNofold(),
         getPrunedAttributeList(padOp, PadOp::getAttributeNames()));
 
@@ -627,33 +596,6 @@
     return newPadOp;
   };
 
-  // Rewrite extract_slice(pad(x)) into a GenerateOp it is statically known that
-  // the original data source x is not used.
-  if (hasZeroLen) {
-    Operation *generateOp = createGenerateOp();
-    return TilingResult{{generateOp}, {castResult(generateOp->getResult(0))}};
-  }
-
-  // If there are dynamic dimensions: Generate an scf.if check to avoid
-  // creating SliceOps with result dimensions of size 0 at runtime.
-  if (generateZeroSliceGuard && dynHasZeroLenCond) {
-    Operation *thenOp;
-    Operation *elseOp;
-    auto result = b.create<scf::IfOp>(
-        loc, dynHasZeroLenCond,
-        /*thenBuilder=*/
-        [&](OpBuilder &b, Location loc) {
-          thenOp = createGenerateOp();
-          b.create<scf::YieldOp>(loc, castResult(thenOp->getResult(0)));
-        },
-        /*elseBuilder=*/
-        [&](OpBuilder &b, Location loc) {
-          elseOp = createPadOfExtractSlice();
-          b.create<scf::YieldOp>(loc, castResult(elseOp->getResult(0)));
-        });
-    return TilingResult{{elseOp}, SmallVector<Value>(result->getResults())};
-  }
-
   Operation *newPadOp = createPadOfExtractSlice();
   return TilingResult{{newPadOp}, {castResult(newPadOp->getResult(0))}};
 }
diff --git a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
--- a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
+++ b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
@@ -18,9 +18,9 @@
 
 // CHECK-LABEL: @static_high_pad_only
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
-//   CHECK-NOT:   tensor.pad
 //   CHECK-NOT:   tensor.extract_slice
-//       CHECK:   %[[RESULT:.*]] = tensor.generate
+//       CHECK:   %[[EMPTY:.*]] = tensor.extract_slice %[[ARG0]][4, 5] [0, 0] [1, 1] : tensor<4x5xf32> to tensor<0x0xf32>
+//       CHECK:   %[[RESULT:.*]] = tensor.pad %[[EMPTY]] low[0, 0] high[2, 4]
 //       CHECK:     tensor.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<2x4xf32>
 func.func @static_high_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32)
@@ -37,9 +37,9 @@
 
 // CHECK-LABEL: @static_low_pad_only
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
-//   CHECK-NOT:   tensor.pad
 //   CHECK-NOT:   tensor.extract_slice
-//       CHECK:   %[[RESULT:.*]] = tensor.generate
+//       CHECK:   %[[EMPTY:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [0, 0] [1, 1] : tensor<4x5xf32> to tensor<0x0xf32>
+//       CHECK:   %[[RESULT:.*]] = tensor.pad %[[EMPTY]] low[2, 3] high[0, 0]
 //       CHECK:     tensor.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<2x3xf32>
 func.func @static_low_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32)
@@ -56,9 +56,9 @@
 
 // CHECK-LABEL: @static_low_pad_only_2
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
-//   CHECK-NOT:   tensor.pad
 //   CHECK-NOT:   tensor.extract_slice
-//       CHECK:   %[[RESULT:.*]] = tensor.generate
+//       CHECK:   %[[EMPTY:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [0, 0] [1, 1] : tensor<4x5xf32> to tensor<0x0xf32>
+//       CHECK:   %[[RESULT:.*]] = tensor.pad %[[EMPTY]] low[1, 3] high[0, 0]
 //       CHECK:     tensor.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<1x3xf32>
 func.func @static_low_pad_only_2(%arg0 : tensor<4x5xf32>, %pad : f32)
@@ -134,15 +134,9 @@
 //   CHECK-NOT:   tensor.pad
 //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
 //       CHECK:   tensor.dim %[[ARG0]], %[[C0]]
-//       CHECK:   %[[RESULT:.*]] = scf.if %{{.*}} -> (tensor<3x4xf32>) {
-//       CHECK:     %[[GEN:.*]] = tensor.generate
-//       CHECK:     scf.yield %[[GEN]]
-//       CHECK:   } else {
-//       CHECK:     %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor<?x5xf32> to tensor<?x1xf32>
-//       CHECK:     %[[PADTENSOR:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3]
-//       CHECK:     scf.yield %[[PADTENSOR]]
-//       CHECK:   }
-//       CHECK:   return %[[RESULT]]
+//       CHECK:   %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor<?x5xf32> to tensor<?x1xf32>
+//       CHECK:   %[[PADTENSOR:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3]
+//       CHECK:   return %[[PADTENSOR]]
 func.func @dynamic_high_pad(%arg0 : tensor<?x5xf32>, %h1: index, %pad : f32) -> tensor<3x4xf32> {
   %0 = tensor.pad %arg0 low[0, 0] high[%h1, 8] {
     ^bb0(%arg1: index, %arg2: index):
@@ -159,15 +153,9 @@
 //   CHECK-NOT:   tensor.pad
 //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
 //       CHECK:   tensor.dim %[[ARG0]], %[[C0]]
-//       CHECK:   %[[RESULT:.*]] = scf.if %{{.*}} -> (tensor<?x4xf32>) {
-//       CHECK:     %[[GEN:.*]] = tensor.generate %[[ARG1]]
-//       CHECK:     scf.yield %[[GEN]]
-//       CHECK:   } else {
-//       CHECK:     %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor<?x5xf32> to tensor<?x1xf32>
-//       CHECK:     %[[PADTENSOR:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3]
-//       CHECK:     scf.yield %[[PADTENSOR]]
-//       CHECK:   }
-//       CHECK:   return %[[RESULT]]
+//       CHECK:   %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor<?x5xf32> to tensor<?x1xf32>
+//       CHECK:   %[[PADTENSOR:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3]
+//       CHECK:   return %[[PADTENSOR]]
 func.func @dynamic_extract_size(%arg0 : tensor<?x5xf32>, %s1: index, %pad : f32) -> tensor<?x4xf32> {
   %0 = tensor.pad %arg0 low[0, 0] high[7, 8] {
     ^bb0(%arg1: index, %arg2: index):
@@ -180,11 +168,8 @@
 // -----
 
 // CHECK-LABEL: @dynamic_zero_low_padding
-//       CHECK:   scf.if
-//       CHECK:     tensor.generate
-//       CHECK:   else
-//       CHECK:     %[[SLICE:.*]] = tensor.extract_slice
-//       CHECK:     tensor.pad %[[SLICE]] low[0, 0]
+//       CHECK:   %[[SLICE:.*]] = tensor.extract_slice
+//       CHECK:   tensor.pad %[[SLICE]] low[0, 0]
 func.func @dynamic_zero_low_padding(%arg0 : tensor<?x?xf32>, %pad : f32,
                                %o1 : index, %o2 : index,
                                %s1 : index, %s2 : index)
@@ -200,11 +185,8 @@
 // -----
 
 // CHECK-LABEL: @dynamic_zero_high_padding
-//       CHECK:   scf.if
-//       CHECK:     tensor.generate
-//       CHECK:   else
-//       CHECK:     %[[SLICE:.*]] = tensor.extract_slice
-//       CHECK:     tensor.pad %[[SLICE]] low[%{{.*}}, %{{.*}}] high[0, 0]
+//       CHECK:   %[[SLICE:.*]] = tensor.extract_slice
+//       CHECK:   tensor.pad %[[SLICE]] low[%{{.*}}, %{{.*}}] high[0, 0]
 func.func @dynamic_zero_high_padding(%arg0 : tensor<?x?xf32>, %pad : f32,
                                 %o1 : index, %o2 : index,
                                 %s1 : index, %s2 : index)
diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
@@ -273,15 +273,9 @@
 // CHECK-DAG:   %[[C64:.*]] = arith.constant 64 : index
 // CHECK-DAG:   %[[C128:.*]] = arith.constant 128 : index
 //     CHECK:   scf.for %{{.*}} = %[[C0]] to %[[C64]] step %[[C16]]
-//     CHECK:     %[[CMPI1:.*]] = arith.cmpi eq
 //     CHECK:     scf.for %{{.*}} = %[[C0]] to %[[C128]] step %[[C32]]
-//     CHECK:       %[[CMPI2:.*]] = arith.cmpi eq
-//     CHECK:       %[[HASZERO:.*]] = arith.ori %[[CMPI2]], %[[CMPI1]] : i1
-//     CHECK:       scf.if %[[HASZERO]]
-//     CHECK:         tensor.generate
-//     CHECK:       else
-//     CHECK:         tensor.extract_slice
-//     CHECK:         tensor.pad
+//     CHECK:       tensor.extract_slice
+//     CHECK:       tensor.pad
 //     CHECK:       tensor.extract_slice
 //     CHECK:       tensor.extract_slice
 //     CHECK:       linalg.generic
diff --git a/mlir/test/Dialect/Linalg/transform-op-tile.mlir b/mlir/test/Dialect/Linalg/transform-op-tile.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-tile.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-tile.mlir
@@ -132,10 +132,7 @@
     -> tensor<20x40xf32>
 {
   // CHECK: scf.forall
-  // CHECK:   scf.if
-  // CHECK:     tensor.generate
-  // CHECK:   else
-  // CHECK:     tensor.pad {{.*}} nofold
+  // CHECK:   tensor.pad {{.*}} nofold
   %0 = tensor.pad %arg0 nofold low[%low, %low] high[%high, %high] {
         ^bb0(%arg9: index, %arg10: index):
           tensor.yield %cst : f32
diff --git a/mlir/test/Dialect/Tensor/tiling.mlir b/mlir/test/Dialect/Tensor/tiling.mlir
--- a/mlir/test/Dialect/Tensor/tiling.mlir
+++ b/mlir/test/Dialect/Tensor/tiling.mlir
@@ -14,12 +14,9 @@
 //   CHECK-DAG:   %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]]
 //       CHECK:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM0]] step %[[C2]]
 //       CHECK:     scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       CHECK:       %[[SWAP_RESULT:.*]] = scf.if
-//       CHECK:         tensor.generate
-//       CHECK:       else
-//       CHECK:         %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       CHECK:         %[[PAD:.*]] = tensor.pad %[[SLICE]]
-//       CHECK:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:       %[[PAD:.*]] = tensor.pad %[[SLICE]]
+//       CHECK:       tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
 //       CHECK:   return %[[RESULT]]
 
 func.func @dynamic_pad_tensor_3_4(%input_tensor: tensor<?x?xf32>,
@@ -51,12 +48,9 @@
 //   CHECK-DAG:   %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]]
 //   CHECK-DAG:   %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN0]]]
 //       CHECK:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       CHECK:     %[[SWAP_RESULT:.*]] = scf.if
-//       CHECK:       tensor.generate
-//       CHECK:     else
-//       CHECK:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       CHECK:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}]
-//       CHECK:     tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1]
+//       CHECK:     %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:     %[[PAD:.*]] = tensor.pad %[[SLICE]] low[{{.*}}, %{{.*}}] high[{{.*}}, {{.*}}]
+//       CHECK:     tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1]
 //       CHECK:   return %[[RESULT]]
 
 func.func @dynamic_pad_tensor_0_3(%input_tensor: tensor<?x?xf32>,
@@ -85,12 +79,9 @@
 //   CHECK-DAG:   %[[C16:.*]] = arith.constant 16 : index
 //       CHECK:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]]
 //       CHECK:     scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       CHECK:       %[[SWAP_RESULT:.*]] = scf.if
-//       CHECK:         tensor.generate
-//       CHECK:       else
-//       CHECK:         %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       CHECK:         %[[PAD:.*]] = tensor.pad %[[SLICE]]
-//       CHECK:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:       %[[PAD:.*]] = tensor.pad %[[SLICE]]
+//       CHECK:       tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
 //       CHECK:   return %[[RESULT]]
 
 func.func @static_pad_tensor_3_4(%input_tensor: tensor<7x9xf32>,
@@ -116,13 +107,9 @@
 //   CHECK-DAG:   %[[C3:.*]] = arith.constant 3 : index
 //   CHECK-DAG:   %[[C16:.*]] = arith.constant 16 : index
 //       CHECK:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       CHECK:     %[[SWAP_RESULT:.*]] = scf.if
-//       CHECK:       tensor.generate
-//       CHECK:     else
-//       CHECK:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1]
-//       CHECK:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
-//       CHECK:     %[[CAST_SWAP_RESULT:.*]] = tensor.cast %[[SWAP_RESULT]] : tensor<?x?xf32> to tensor<15x?xf32>
-//       CHECK:     tensor.insert_slice %[[CAST_SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1]
+//       CHECK:     %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1]
+//       CHECK:     %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
+//       CHECK:     tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1]
 //       CHECK:   return %[[RESULT]]
 
 func.func @static_pad_tensor_0_3(%input_tensor: tensor<7x9xf32>,
@@ -148,17 +135,9 @@
 //   CHECK-DAG:   %[[C3:.*]] = arith.constant 3 : index
 //   CHECK-DAG:   %[[C15:.*]] = arith.constant 15 : index
 //       CHECK:   %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C15]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       CHECK:     %[[R2:.*]] = scf.if
-//       CHECK:       %[[GEN:.*]] = tensor.generate
-//       CHECK:       %[[cast_0:.*]] = tensor.cast %[[GEN]] : tensor<14x3xf32> to tensor<?x?xf32>
-//       CHECK:       scf.yield %[[cast_0]] : tensor<?x?xf32>
-//       CHECK:     else
-//       CHECK:       %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32>
-//       CHECK:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}]
-//       CHECK:       %[[cast_1:.*]] = tensor.cast %[[PAD]] : tensor<14x?xf32> to tensor<?x?xf32>
-//       CHECK:       scf.yield %[[cast_1]] : tensor<?x?xf32>
-//       CHECK:     %[[cast:.*]] = tensor.cast %[[R2]] : tensor<?x?xf32> to tensor<14x3xf32>
-//       CHECK:     %[[R3:.*]] = tensor.insert_slice %[[cast]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32>
+//       CHECK:     %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32>
+//       CHECK:     %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}]
+//       CHECK:     %[[R3:.*]] = tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32>
 //       CHECK:     scf.yield %[[R3]] : tensor<14x15xf32>
 //       CHECK:   return %[[RESULT]] : tensor<14x15xf32>
 
diff --git a/mlir/test/Interfaces/TilingInterface/tile-pad-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-pad-using-interface.mlir
--- a/mlir/test/Interfaces/TilingInterface/tile-pad-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-pad-using-interface.mlir
@@ -23,12 +23,9 @@
 //       CHECK:   %[[DIM1:.+]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]]
 //       CHECK:   %[[RESULT:[a-zA-Z0-9]+]] = scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[DIM0]] step %[[C2]]
 //       CHECK:     scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       CHECK:       %[[SWAP_RESULT:.*]] = scf.if
-//       CHECK:         tensor.generate
-//       CHECK:       else
-//       CHECK:         %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       CHECK:         %[[PAD:.*]] = tensor.pad %[[SLICE]]
-//       CHECK:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:       %[[PAD:.*]] = tensor.pad %[[SLICE]]
+//       CHECK:       tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
 //       CHECK:   return %[[RESULT]]
 
 // -----
@@ -53,12 +50,9 @@
 //       CHECK:   %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]]
 //       CHECK:   %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]]
 //       CHECK:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       CHECK:     %[[SWAP_RESULT:.*]] = scf.if
-//       CHECK:       tensor.generate
-//       CHECK:     else
-//       CHECK:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       CHECK:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}]
-//       CHECK:     tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][%[[C0]], {{.*}}] [%[[DIM0]], {{.*}}] [1, 1]
+//       CHECK:     %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:     %[[PAD:.*]] = tensor.pad %[[SLICE]] low[{{.*}}, %{{.*}}] high[{{.*}}, {{.*}}]
+//       CHECK:     tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][%[[C0]], {{.*}}] [%[[DIM0]], {{.*}}] [1, 1]
 //       CHECK:   return %[[RESULT]]
 
 // -----
@@ -80,12 +74,9 @@
 //   CHECK-DAG:   %[[C16:.*]] = arith.constant 16 : index
 //       CHECK:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]]
 //       CHECK:     scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       CHECK:       %[[SWAP_RESULT:.*]] = scf.if
-//       CHECK:         tensor.generate
-//       CHECK:       else
-//       CHECK:         %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       CHECK:         %[[PAD:.*]] = tensor.pad %[[SLICE]]
-//       CHECK:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:       %[[PAD:.*]] = tensor.pad %[[SLICE]]
+//       CHECK:       tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
 //       CHECK:   return %[[RESULT]]
 
 // -----
@@ -105,12 +96,10 @@
 //   CHECK-DAG:   %[[C15:.*]] = arith.constant 15 : index
 //   CHECK-DAG:   %[[C16:.*]] = arith.constant 16 : index
 //       CHECK:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       CHECK:     %[[SWAP_RESULT:.*]] = scf.if
-//       CHECK:       tensor.generate
-//       CHECK:     else
-//       CHECK:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1]
-//       CHECK:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
-//       CHECK:     tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][%[[C0]], {{.*}}] [%[[C15]], {{.*}}] [1, 1]
+//       CHECK:     %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1]
+//       CHECK:     %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
+//       CHECK:     %[[CAST:.*]] = tensor.cast %[[PAD]]
+//       CHECK:     tensor.insert_slice %[[CAST]] into %[[INNER_OUT]][%[[C0]], {{.*}}] [%[[C15]], {{.*}}] [1, 1]
 //       CHECK:   return %[[RESULT]]
 
 /// Rest of the tests only check that they dont fail.