diff --git a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
--- a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
@@ -26,6 +26,11 @@
 SmallVector<Value> createDynamicDimValues(OpBuilder &b, Location loc,
                                           Value rankedTensor);
 
+// Returns the tensor extent along dimension `dim` if `rankedTensor` is of
+// `RankedTensorType`. Returns `failure()` otherwise.
+FailureOr<OpFoldResult> createDimValue(OpBuilder &b, Location loc,
+                                       Value rankedTensor, int64_t dim);
+
 // Creates dim ops or constant ops for each dimension of the ranked tensor
 // argument and returns these as values.
 SmallVector<OpFoldResult> createDimValues(OpBuilder &b, Location loc,
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
--- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
@@ -459,25 +459,25 @@
   bindDims(b.getContext(), dim0, dim1);
   // Add two integers.
   auto addMap = AffineMap::get(2, 0, {dim0 + dim1});
-  auto add = [&](Value v1, Value v2) {
-    return b.createOrFold<AffineApplyOp>(loc, addMap, ValueRange{v1, v2});
+  auto add = [&](OpFoldResult v1, OpFoldResult v2) {
+    return makeComposedFoldedAffineApply(b, loc, addMap, {v1, v2});
   };
   // Subtract two integers.
   auto subMap = AffineMap::get(2, 0, {dim0 - dim1});
-  auto sub = [&](Value v1, Value v2) {
-    return b.createOrFold<AffineApplyOp>(loc, subMap, ValueRange{v1, v2});
+  auto sub = [&](OpFoldResult v1, OpFoldResult v2) {
+    return makeComposedFoldedAffineApply(b, loc, subMap, {v1, v2});
   };
   // Take the minimum of two integers.
   auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext());
-  auto min = [&](Value v1, Value v2) {
-    return b.createOrFold<AffineMinOp>(loc, idMap, ValueRange{v1, v2});
+  auto min = [&](OpFoldResult v1, OpFoldResult v2) {
+    return makeComposedFoldedAffineMin(b, loc, idMap, {v1, v2});
   };
   // Take the maximum of two integers.
-  auto max = [&](Value v1, Value v2) {
-    return b.createOrFold<AffineMaxOp>(loc, idMap, ValueRange{v1, v2});
+  auto max = [&](OpFoldResult v1, OpFoldResult v2) {
+    return makeComposedFoldedAffineMax(b, loc, idMap, {v1, v2});
   };
   // Zero index-typed integer.
-  auto zero = b.create<arith::ConstantIndexOp>(loc, 0);
+  OpFoldResult zero = b.getIndexAttr(0);
 
   // Helper function for filling static/dynamic low/high padding indices
   // vectors of PadOp.
@@ -493,8 +493,7 @@
 
   // Compute new offsets, lengths, low padding, high padding.
   SmallVector<OpFoldResult> newOffsets, newLengths, newStrides;
-  SmallVector<Value> newLows, newHighs;
-  SmallVector<int64_t> staticNewLows, staticNewHighs;
+  SmallVector<OpFoldResult> newLows, newHighs;
   // Set to true if the original data source is not read at all.
   bool hasZeroLen = false;
   // Same as hasZeroLen, but for dynamic dimension sizes. This condition
@@ -503,23 +502,22 @@
 
   int64_t rank = padOp.getSourceType().getRank();
   for (unsigned dim = 0; dim < rank; ++dim) {
-    auto low =
-        getValueOrCreateConstantIndexOp(b, loc, padOp.getMixedLowPad()[dim]);
-    bool hasLowPad = getConstantIntValue(low) != static_cast<int64_t>(0);
-    auto high =
-        getValueOrCreateConstantIndexOp(b, loc, padOp.getMixedHighPad()[dim]);
-    bool hasHighPad = getConstantIntValue(high) != static_cast<int64_t>(0);
-    auto offset = getValueOrCreateConstantIndexOp(b, loc, offsets[dim]);
-    auto length = getValueOrCreateConstantIndexOp(b, loc, sizes[dim]);
-    auto srcSize = b.createOrFold<tensor::DimOp>(loc, padOp.getSource(), dim);
+    auto low = padOp.getMixedLowPad()[dim];
+    bool hasLowPad = !isConstantIntValue(low, 0);
+    auto high = padOp.getMixedHighPad()[dim];
+    bool hasHighPad = !isConstantIntValue(high, 0);
+    auto offset = offsets[dim];
+    auto length = sizes[dim];
+    auto srcSize =
+        tensor::createDimValue(b, loc, padOp.getSource(), dim).value();
 
     // The new amount of low padding is `low - offset`. Except for the case
     // where none of the low padding is read. In that case, the new amount of
     // low padding is zero.
     //
     // Optimization: If low = 0, then newLow = 0.
-    Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
-    appendIndex(newLow, newLows, staticNewLows);
+    OpFoldResult newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
+    newLows.push_back(newLow);
 
     // Start reading the data from position `offset - low`. Since the original
     // read may have started in the low padding zone, this value could be
@@ -533,9 +531,10 @@
     // no data from the source.)
     //
     // Optimization: If low = 0, then the formula can be simplified.
-    Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize)
-                                : min(offset, srcSize);
-    newOffsets.push_back(getAsOpFoldResult(newOffset));
+    OpFoldResult newOffset = hasLowPad
+                                 ? min(max(sub(offset, low), zero), srcSize)
+                                 : min(offset, srcSize);
+    newOffsets.push_back(newOffset);
 
     // The original ExtractSliceOp was reading until position `offset +
     // length`. Therefore, the corresponding position within the source tensor
@@ -556,19 +555,21 @@
     // The new ExtractSliceOp length is `endLoc - newOffset`.
     //
     // Optimization: If low = 0, then the formula can be simplified.
-    Value endLoc = hasLowPad
-                       ? min(max(add(sub(offset, low), length), zero), srcSize)
-                       : min(add(offset, length), srcSize);
-    Value newLength = sub(endLoc, newOffset);
-    newLengths.push_back(getAsOpFoldResult(newLength));
+    OpFoldResult endLoc =
+        hasLowPad ? min(max(add(sub(offset, low), length), zero), srcSize)
+                  : min(add(offset, length), srcSize);
+    OpFoldResult newLength = sub(endLoc, newOffset);
+    newLengths.push_back(newLength);
 
     // Check if newLength is zero. In that case, no SubTensorOp should be
     // executed.
-    if (auto newLengthInt = getConstantIntValue(newLength)) {
-      hasZeroLen |= *newLengthInt == 0;
-    } else {
-      Value check = b.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
-                                            newLength, zero);
+    if (isConstantIntValue(newLength, 0)) {
+      hasZeroLen = true;
+    } else if (!hasZeroLen) {
+      Value check = b.create<arith::CmpIOp>(
+          loc, arith::CmpIPredicate::eq,
+          getValueOrCreateConstantIndexOp(b, loc, newLength),
+          getValueOrCreateConstantIndexOp(b, loc, zero));
       dynHasZeroLenCond =
           dynHasZeroLenCond
               ? b.create<arith::OrIOp>(loc, check, dynHasZeroLenCond)
@@ -579,8 +580,9 @@
     // so that the result has the same length as the original ExtractSliceOp.
     // As an optimization, if the original high padding is zero, then the new
     // high padding must also be zero.
-    Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero;
-    appendIndex(newHigh, newHighs, staticNewHighs);
+    OpFoldResult newHigh =
+        hasHighPad ? sub(sub(length, newLength), newLow) : zero;
+    newHighs.push_back(newHigh);
 
     // Only unit stride supported.
     newStrides.push_back(b.getIndexAttr(1));
@@ -594,7 +596,10 @@
       RankedTensorType::get(shape, padOp.getResultType().getElementType());
 
   // Insert cast to ensure that types match. (May be folded away.)
-  auto castResult = [&](Value val) -> Operation * {
+  auto castResult = [&](Operation *op) -> Operation * {
+    Value val = op->getResult(0);
+    if (resultType == val.getType())
+      return op;
     return b.create<tensor::CastOp>(loc, resultType, val);
   };
 
@@ -615,10 +620,9 @@
   // the result shape of the new SliceOp has a zero dimension.
   auto createPadOfExtractSlice = [&]() {
     // Create pad(extract_slice(x)).
-    auto newSliceOp = b.create<tensor::ExtractSliceOp>(
+    Value newSliceOp = b.create<tensor::ExtractSliceOp>(
         loc, padOp.getSource(), newOffsets, newLengths, newStrides);
-    auto newPadOp = b.create<PadOp>(loc, newSliceOp, staticNewLows,
-                                    staticNewHighs, newLows, newHighs);
+    auto newPadOp = b.create<PadOp>(loc, Type(), newSliceOp, newLows, newHighs);
 
     // Copy region to new PadOp.
     IRMapping bvm;
diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
--- a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
@@ -52,6 +52,20 @@
   return dynamicDims;
 }
 
+FailureOr<OpFoldResult> mlir::tensor::createDimValue(OpBuilder &b, Location loc,
+                                                     Value rankedTensor,
+                                                     int64_t dim) {
+  auto tensorTy = rankedTensor.getType().dyn_cast<RankedTensorType>();
+  if (!tensorTy)
+    return failure();
+  auto shape = tensorTy.getShape();
+  if (dim >= shape.size())
+    return failure();
+  if (ShapedType::isDynamic(shape[dim]))
+    return OpFoldResult(b.createOrFold<tensor::DimOp>(loc, rankedTensor, dim));
+  return OpFoldResult(b.getIndexAttr(shape[dim]));
+}
+
 SmallVector<OpFoldResult>
 mlir::tensor::createDimValues(OpBuilder &b, Location loc, Value rankedTensor) {
   auto tensorTy = rankedTensor.getType().cast<RankedTensorType>();