diff --git a/mlir/include/mlir/Dialect/SCF/Utils/AffineCanonicalizationUtils.h b/mlir/include/mlir/Dialect/SCF/Utils/AffineCanonicalizationUtils.h
--- a/mlir/include/mlir/Dialect/SCF/Utils/AffineCanonicalizationUtils.h
+++ b/mlir/include/mlir/Dialect/SCF/Utils/AffineCanonicalizationUtils.h
@@ -16,12 +16,13 @@
 
 #include "mlir/Support/LLVM.h"
 #include "mlir/Support/LogicalResult.h"
+#include "llvm/ADT/ArrayRef.h"
 
 namespace mlir {
 class AffineApplyOp;
-class AffineMap;
 class FlatAffineValueConstraints;
-struct LogicalResult;
+class Location;
+class OpBuilder;
 class Operation;
 class OpFoldResult;
 class RewriterBase;
@@ -44,11 +45,23 @@
                                OpFoldResult &step);
 
 /// Populate the given constraint set with induction variable constraints of a
-/// "for" loop with the given range and step.
+/// "for" loop with the given range and step. The step is optional.
 LogicalResult addLoopRangeConstraints(FlatAffineValueConstraints &cstr,
                                       Value iv, OpFoldResult lb,
                                       OpFoldResult ub, OpFoldResult step);
 
+/// Build a value that computes an upper bound of the result of the given
+/// AffineApplyOp without any of the given loop induction variables. If no loop
+/// induction variables are specified, the op is made independent of any loop
+/// induction variables.
+///
+/// Return failure if no upper bound could be determined. `changed` is set to
+/// true if the returned bound differs from the given affine.apply op result.
+/// Also return failure if at least one given loop IV is not actually an IV.
+FailureOr<Value> buildInductionVarIndependentUpperBound(
+    OpBuilder &b, Location loc, AffineApplyOp applyOp, bool *changed = nullptr,
+    std::optional<ArrayRef<Value>> ivs = std::nullopt);
+
 /// Try to canonicalize the given affine.min/max operation in the context of
 /// for `loops` with a known range.
 ///
diff --git a/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt b/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt
--- a/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/Tensor/CMakeLists.txt
@@ -1,2 +1,3 @@
 add_subdirectory(IR)
 add_subdirectory(Transforms)
+add_subdirectory(TransformOps)
diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt b/mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_TARGET_DEFINITIONS TensorTransformOps.td)
+mlir_tablegen(TensorTransformOps.h.inc -gen-op-decls)
+mlir_tablegen(TensorTransformOps.cpp.inc -gen-op-defs)
+add_public_tablegen_target(MLIRTensorTransformOpsIncGen)
+
+add_mlir_doc(TensorTransformOps TensorTransformOps Dialects/ -gen-op-doc)
diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h
@@ -0,0 +1,30 @@
+//===- TensorTransformOps.h - Tensor transformation ops ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_TENSOR_TRANSFORMOPS_TENSORTRANSFORMOPS_H
+#define MLIR_DIALECT_TENSOR_TRANSFORMOPS_TENSORTRANSFORMOPS_H
+
+#include "mlir/Dialect/PDL/IR/PDLTypes.h"
+#include "mlir/Dialect/Transform/IR/TransformInterfaces.h"
+#include "mlir/Dialect/Transform/IR/TransformTypes.h"
+#include "mlir/IR/OpImplementation.h"
+
+namespace mlir {
+class DialectRegistry;
+
+namespace tensor {
+class PadOp;
+
+void registerTransformDialectExtension(DialectRegistry &registry);
+} // namespace tensor
+} // namespace mlir
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h.inc"
+
+#endif // MLIR_DIALECT_TENSOR_TRANSFORMOPS_TENSORTRANSFORMOPS_H
diff --git a/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td
@@ -0,0 +1,66 @@
+//===- TensorTransformOps.td - Tensor transformation ops ---*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TENSOR_TRANSFORM_OPS
+#define TENSOR_TRANSFORM_OPS
+
+include "mlir/Dialect/PDL/IR/PDLTypes.td"
+include "mlir/Dialect/Transform/IR/TransformDialect.td"
+include "mlir/Dialect/Transform/IR/TransformInterfaces.td"
+include "mlir/Dialect/Transform/IR/TransformTypes.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/IR/OpBase.td"
+
+def Transform_TensorPadOp : Transform_ConcreteOpType<"tensor.pad">;
+
+def MakeLoopIndependentOp
+    : Op<Transform_Dialect, "tensor.make_loop_independent",
+         [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
+          TransformOpInterface, TransformEachOpTrait]> {
+  let description = [{
+    Rewrite the targeted ops such that their index-typed operands no longer
+    depend on any loop induction variable of the `num_loop` enclosing `scf.for`
+    loops. I.e., compute an upper bound that is independent of any such loop IV
+    for every tensor dimension. The transformed op could then be hoisted from
+    the `num_loop` enclosing loops. To preserve the original semantics, place a
+    `tensor.extract_slice` inside the loop.
+
+    Currently supported operations are:
+    - tensor.pad: Replaced by an upper bound padding, followed by a
+      tensor.extract_slice.
+
+    Note: Only index-typed operands that are affine.apply ops are taken into
+    account at the moment. Furthermore, only direct uses of `scf.for` induction
+    variables are eliminated.
+
+    #### Return modes
+
+    This operation fails if at least one induction variable could not be
+    eliminated. In case the targeted op is already independent of induction
+    variables, this transform succeeds and returns the unmodified target op.
+
+    Otherwise, the returned handle points to a subset of the produced ops:
+    - tensor.pad: The returned handle points to the tensor.extract_slice op.
+
+    This transform op consumes the target handle and produces a result handle.
+  }];
+
+  let arguments = (ins PDL_Operation:$target, I64Attr:$num_loops);
+  let results = (outs PDL_Operation:$transformed);
+  let assemblyFormat = "$target attr-dict";
+
+  let extraClassDeclaration = [{
+    ::mlir::DiagnosedSilenceableFailure applyToOne(
+        ::mlir::tensor::PadOp target,
+        ::mlir::transform::ApplyToEachResultList &results,
+        ::mlir::transform::TransformState &state);
+  }];
+}
+
+#endif // TENSOR_TRANSFORM_OPS
+
diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
--- a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h
@@ -15,6 +15,40 @@
 namespace mlir {
 namespace tensor {
 
+/// Build a new tensor::PadOp with low/high padding that is independent of all
+/// given SCF loop induction variables. If the op is already independent of loop
+/// IVs, the same PadOp result is returned.
+///
+/// Failure indicates the no suitable upper bound for low/high padding could be
+/// found. Failure is also returned if at least one of the given IVs is not an
+/// SCF loop IV.
+///
+/// Note: This function takes into account only low/high padding values that
+/// are affine.apply ops that directly use a loop's IV.
+///
+/// Example:
+/// scf.for %iv = %lb to %ub step %step {
+///   %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
+///   %p = tensor.pad %t low[5] high[%high] ...
+///   ...
+/// }
+///
+/// The function builds IR such as:
+/// %high_new = affine.apply affine_map<()[s0, s1] -> (-s0 + s1)> ()[%lb, %ub]
+/// %p_hoistable = tensor.pad %t low[5] high[%high_new]
+/// %dim = tensor.dim %t, %c0
+/// %size = affine.apply affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>
+///     (%iv)[%ub, %dim]
+/// %slice = tensor.extract_slice %p_hoistable [0] [%size] [1]
+///
+/// The slice is returned.
+///
+/// Note: Due to limitations in the FlatAffineValueConstraints, we over-
+/// approximate by assuming a step size of 1 for every loop.
+FailureOr<Value>
+buildInductionVarIndependentOp(OpBuilder &b, tensor::PadOp padOp,
+                               std::optional<ArrayRef<Value>> ivs = {});
+
 /// Populates `patterns` with patterns to wrap a tensor.pad op with an scf.if op
 /// to separate the cases where we don't need padding (all pad sizes are
 /// actually zeros) and where we indeed need padding.
diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h
--- a/mlir/include/mlir/InitAllDialects.h
+++ b/mlir/include/mlir/InitAllDialects.h
@@ -64,6 +64,7 @@
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h"
 #include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"
+#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h"
 #include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/Tosa/IR/TosaOps.h"
 #include "mlir/Dialect/Transform/IR/TransformDialect.h"
@@ -124,6 +125,7 @@
   linalg::registerTransformDialectExtension(registry);
   memref::registerTransformDialectExtension(registry);
   scf::registerTransformDialectExtension(registry);
+  tensor::registerTransformDialectExtension(registry);
   vector::registerTransformDialectExtension(registry);
 
   // Register all external models.
diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp
--- a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp
@@ -1010,7 +1010,7 @@
     unsigned offset, unsigned num, MLIRContext *context,
     SmallVectorImpl<AffineMap> *lbMaps, SmallVectorImpl<AffineMap> *ubMaps,
     bool getClosedUB) {
-  assert(num < getNumDimVars() && "invalid range");
+  assert(offset + num <= getNumDimVars() && "invalid range");
 
   // Basic simplification.
   normalizeConstraintsByGCD();
diff --git a/mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp b/mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp
--- a/mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp
+++ b/mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp
@@ -80,11 +80,18 @@
                                            OpFoldResult ub, OpFoldResult step) {
   Builder b(iv.getContext());
 
-  // IntegerPolyhedron does not support semi-affine expressions.
-  // Therefore, only constant step values are supported.
-  auto stepInt = getConstantIntValue(step);
-  if (!stepInt)
-    return failure();
+  int64_t stepInt;
+  if (!step) {
+    // No step given: Assume step size 1.
+    stepInt = 1;
+  } else if (auto maybeConstStep = getConstantIntValue(step)) {
+    stepInt = *maybeConstStep;
+  } else {
+    // Note: IntegerPolyhedron does not support semi-affine expressions.
+    // Therefore, only constant step values are supported. In case of non-const
+    // step sizes, we conservatively assume a step size of 1.
+    stepInt = 1;
+  }
 
   unsigned dimIv = cstr.appendDimVar(iv);
   auto lbv = lb.dyn_cast<Value>();
@@ -110,7 +117,13 @@
 
   // Upper bound
   AffineExpr ivUb;
-  if (lbInt && ubInt && (*lbInt + *stepInt >= *ubInt)) {
+  AffineExpr exprLb = lbInt
+                          ? b.getAffineConstantExpr(*lbInt)
+                          : b.getAffineSymbolExpr(symLb - cstr.getNumDimVars());
+  AffineExpr exprUb = ubInt
+                          ? b.getAffineConstantExpr(*ubInt)
+                          : b.getAffineSymbolExpr(symUb - cstr.getNumDimVars());
+  if (lbInt && ubInt && (*lbInt + stepInt >= *ubInt)) {
     // The loop has at most one iteration.
     // iv < lb + 1
     // TODO: Try to derive this constraint by simplifying the expression in
@@ -119,13 +132,7 @@
   } else {
     // The loop may have more than one iteration.
     // iv < lb + step * ((ub - lb - 1) floorDiv step) + 1
-    AffineExpr exprLb =
-        lbInt ? b.getAffineConstantExpr(*lbInt)
-              : b.getAffineSymbolExpr(symLb - cstr.getNumDimVars());
-    AffineExpr exprUb =
-        ubInt ? b.getAffineConstantExpr(*ubInt)
-              : b.getAffineSymbolExpr(symUb - cstr.getNumDimVars());
-    ivUb = exprLb + 1 + (*stepInt * ((exprUb - exprLb - 1).floorDiv(*stepInt)));
+    ivUb = exprLb + 1 + (stepInt * ((exprUb - exprLb - 1).floorDiv(stepInt)));
   }
   auto map = AffineMap::get(
       /*dimCount=*/cstr.getNumDimVars(),
@@ -134,6 +141,106 @@
   return cstr.addBound(IntegerPolyhedron::UB, dimIv, map);
 }
 
+static void unpackOptionalValues(ArrayRef<std::optional<Value>> source,
+                                 SmallVector<Value> &target) {
+  target =
+      llvm::to_vector<4>(llvm::map_range(source, [](std::optional<Value> val) {
+        return val.has_value() ? *val : Value();
+      }));
+}
+
+/// Bound an identifier `pos` in a given FlatAffineValueConstraints with
+/// constraints drawn from an affine map. Before adding the constraint, the
+/// dimensions/symbols of the affine map are aligned with `constraints`.
+/// `operands` are the SSA Value operands used with the affine map.
+/// Note: This function adds a new symbol column to the `constraints` for each
+/// dimension/symbol that exists in the affine map but not in `constraints`.
+static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints,
+                                      IntegerPolyhedron::BoundType type,
+                                      unsigned pos, AffineMap map,
+                                      ValueRange operands) {
+  SmallVector<Value> dims, syms, newSyms;
+  unpackOptionalValues(constraints.getMaybeValues(VarKind::SetDim), dims);
+  unpackOptionalValues(constraints.getMaybeValues(VarKind::Symbol), syms);
+
+  AffineMap alignedMap =
+      alignAffineMapWithValues(map, operands, dims, syms, &newSyms);
+  for (unsigned i = syms.size(); i < newSyms.size(); ++i)
+    constraints.appendSymbolVar(newSyms[i]);
+  return constraints.addBound(type, pos, alignedMap);
+}
+
+FailureOr<Value> scf::buildInductionVarIndependentUpperBound(
+    OpBuilder &b, Location loc, AffineApplyOp applyOp, bool *changed,
+    std::optional<ArrayRef<Value>> ivs) {
+  if (changed)
+    *changed = false;
+
+  // Build constraint set for the loop
+  FlatAffineValueConstraints cstr;
+  unsigned applyOpDim = cstr.appendDimVar();
+
+  SmallVector<Value> allIvs;
+  // Find all iteration variables among the operands add constrain them.
+  for (Value operand : applyOp->getOperands()) {
+    // Skip duplicate ivs.
+    if (llvm::is_contained(allIvs, operand))
+      continue;
+
+    // If `operand` is an iteration variable: Find corresponding loop
+    // bounds and step.
+    OpFoldResult lb, ub, step;
+    Value iv = operand;
+    if (ivs.has_value()) {
+      // Check if this iv should be eliminated.
+      if (!llvm::is_contained(*ivs, iv))
+        continue;
+      // Failure if one of the given ivs is not an IV.
+      if (failed(matchForLikeLoop(operand, lb, ub, step)))
+        return failure();
+    } else {
+      if (failed(matchForLikeLoop(operand, lb, ub, step)))
+        continue;
+    }
+    allIvs.push_back(iv);
+
+    if (failed(addLoopRangeConstraints(cstr, iv, lb, ub,
+                                       /*step=*/OpFoldResult())))
+      return failure();
+  }
+  if (allIvs.empty())
+    return success();
+
+  // Add the affine map of the affine.apply op.
+  if (failed(alignAndAddBound(
+          cstr, presburger::IntegerPolyhedron::BoundType::EQ, applyOpDim,
+          applyOp.getAffineMap(), applyOp->getOperands())))
+    return failure();
+
+  // Project out all iteration variables.
+  for (Value iv : allIvs)
+    cstr.projectOut(iv);
+
+  // Compute an upper bound for the affine.apply op.
+  SmallVector<AffineMap> opLb(1), opUb(1);
+  cstr.getSliceBounds(applyOpDim, 1, applyOp->getContext(), &opLb, &opUb);
+  if (opUb.empty() || !opUb[0])
+    return failure();
+  assert(opUb[0].getNumResults() == 1 && "expected single result");
+
+  // Create new AffineApplyOp.
+  if (changed)
+    *changed = true;
+  // Turn open bound into closed bound.
+  AffineMap newMap = AffineMap::get(
+      opUb[0].getNumDims(), opUb[0].getNumSymbols(), opUb[0].getResult(0) - 1);
+  SmallVector<Value> newOperands;
+  for (auto maybeValue : cstr.getMaybeValues().drop_front())
+    newOperands.push_back(*maybeValue);
+  mlir::canonicalizeMapAndOperands(&newMap, &newOperands);
+  return b.create<AffineApplyOp>(loc, newMap, newOperands).getResult();
+}
+
 /// Canonicalize min/max operations in the context of for loops with a known
 /// range. Call `canonicalizeMinMaxOp` and add the following constraints to
 /// the constraint system (along with the missing dimensions):
diff --git a/mlir/lib/Dialect/Tensor/CMakeLists.txt b/mlir/lib/Dialect/Tensor/CMakeLists.txt
--- a/mlir/lib/Dialect/Tensor/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tensor/CMakeLists.txt
@@ -1,3 +1,4 @@
 add_subdirectory(IR)
 add_subdirectory(Transforms)
+add_subdirectory(TransformOps)
 add_subdirectory(Utils)
diff --git a/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Dialect/Tensor/TransformOps/CMakeLists.txt
@@ -0,0 +1,17 @@
+add_mlir_dialect_library(MLIRTensorTransformOps
+  TensorTransformOps.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor/TransformOps
+
+  DEPENDS
+  MLIRTensorTransformOpsIncGen
+
+  LINK_LIBS PUBLIC
+  MLIRAffineDialect
+  MLIRIR
+  MLIRPDLDialect
+  MLIRSCFDialect
+  MLIRTensorTransforms
+  MLIRTransformDialect
+)
diff --git a/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Dialect/Tensor/TransformOps/TensorTransformOps.cpp
@@ -0,0 +1,86 @@
+//===- TensorTransformOps.cpp - Implementation of tensor transform ops ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h"
+
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/Dialect/Tensor/Transforms/Transforms.h"
+#include "mlir/Dialect/Transform/IR/TransformDialect.h"
+#include "mlir/Dialect/Transform/IR/TransformInterfaces.h"
+#include "mlir/Dialect/Transform/IR/TransformUtils.h"
+
+using namespace mlir;
+
+//===----------------------------------------------------------------------===//
+// MakeLoopIndependentOp
+//===----------------------------------------------------------------------===//
+
+DiagnosedSilenceableFailure transform::MakeLoopIndependentOp::applyToOne(
+    tensor::PadOp target, transform::ApplyToEachResultList &results,
+    transform::TransformState &state) {
+  // Gather IVs.
+  SmallVector<Value> ivs;
+  Operation *nextOp = target;
+  for (uint64_t i = 0; i < getNumLoops(); ++i) {
+    nextOp = nextOp->getParentOfType<scf::ForOp>();
+    if (!nextOp) {
+      DiagnosedSilenceableFailure diag = emitSilenceableError()
+                                         << "could not find " << i
+                                         << "-th enclosing loop";
+      diag.attachNote(target->getLoc()) << "target op";
+      return diag;
+    }
+    ivs.push_back(cast<scf::ForOp>(nextOp).getInductionVar());
+  }
+
+  // Rewrite IR.
+  IRRewriter rewriter(target->getContext());
+  FailureOr<Value> replacement =
+      tensor::buildInductionVarIndependentOp(rewriter, target);
+  if (failed(replacement)) {
+    DiagnosedSilenceableFailure diag =
+        emitSilenceableError() << "could not make target op loop-independent";
+    diag.attachNote(target->getLoc()) << "target op";
+    return diag;
+  }
+  rewriter.replaceOp(target, *replacement);
+  results.push_back(replacement->getDefiningOp());
+  return DiagnosedSilenceableFailure::success();
+}
+
+//===----------------------------------------------------------------------===//
+// Transform op registration
+//===----------------------------------------------------------------------===//
+
+namespace {
+class TensorTransformDialectExtension
+    : public transform::TransformDialectExtension<
+          TensorTransformDialectExtension> {
+public:
+  using Base::Base;
+
+  void init() {
+    declareGeneratedDialect<AffineDialect>();
+    declareGeneratedDialect<tensor::TensorDialect>();
+
+    registerTransformOps<
+#define GET_OP_LIST
+#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc"
+        >();
+  }
+};
+} // namespace
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc"
+
+void mlir::tensor::registerTransformDialectExtension(
+    DialectRegistry &registry) {
+  registry.addExtensions<TensorTransformDialectExtension>();
+}
diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
--- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
@@ -4,6 +4,7 @@
   EmptyOpPatterns.cpp
   ExtractSliceFromReshapeUtils.cpp
   FoldIntoPackAndUnpackPatterns.cpp
+  LoopTransforms.cpp
   MergeConsecutiveInsertExtractSlicePatterns.cpp
   ReshapePatterns.cpp
   SplitPaddingPatterns.cpp
@@ -26,6 +27,7 @@
   MLIRMemRefDialect
   MLIRPass
   MLIRSCFDialect
+  MLIRSCFUtils
   MLIRTensorDialect
   MLIRTilingInterface
   MLIRTransforms
diff --git a/mlir/lib/Dialect/Tensor/Transforms/LoopTransforms.cpp b/mlir/lib/Dialect/Tensor/Transforms/LoopTransforms.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Dialect/Tensor/Transforms/LoopTransforms.cpp
@@ -0,0 +1,107 @@
+//===- LoopTransforms.cpp - Transforms wrt. loops -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Tensor/Transforms/Transforms.h"
+
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/SCF/Utils/AffineCanonicalizationUtils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Utils/StaticValueUtils.h"
+
+using namespace mlir;
+using namespace mlir::tensor;
+
+// Compute upper bounds for low/high padding such that they are independent of
+// any SCF loop induction variables.
+FailureOr<Value>
+tensor::buildInductionVarIndependentOp(OpBuilder &b, tensor::PadOp padOp,
+                                       std::optional<ArrayRef<Value>> ivs) {
+  OpBuilder::InsertionGuard g(b);
+  b.setInsertionPoint(padOp);
+  Location loc = padOp.getLoc();
+
+  // Non-constant padding not supported.
+  Value constantPadding = padOp.getConstantPaddingValue();
+  if (!constantPadding)
+    return failure();
+
+  // Try to compute upper bounds for the given values if they affine.apply ops.
+  // If they are not affine.apply ops or if the affine.apply ops do not directly
+  // depend on loop IVs, simply store them in `result`.
+  bool foundUb = false;
+  auto computeUpperBounds = [&](ValueRange values, SmallVector<Value> &result) {
+    for (Value v : values) {
+      auto applyOp = v.getDefiningOp<AffineApplyOp>();
+      if (!applyOp) {
+        result.push_back(v);
+        continue;
+      }
+      bool changed;
+      auto ub = scf::buildInductionVarIndependentUpperBound(b, loc, applyOp,
+                                                            &changed, ivs);
+      if (failed(ub) || !changed) {
+        result.push_back(v);
+        continue;
+      }
+      result.push_back(*ub);
+      foundUb = true;
+    }
+  };
+
+  // Compute new low/high padding.
+  SmallVector<Value> newLow, newHigh;
+  computeUpperBounds(padOp.getLow(), newLow);
+  computeUpperBounds(padOp.getHigh(), newHigh);
+  // Return failure if no upper bound was computed. (This function would be a
+  // no-op.)
+  if (!foundUb)
+    return failure();
+  SmallVector<OpFoldResult> newMixedLow =
+      getMixedValues(padOp.getStaticLow(), newLow, b);
+  SmallVector<OpFoldResult> newMixedHigh =
+      getMixedValues(padOp.getStaticHigh(), newHigh, b);
+
+  // Create a new tensor::PadOp.
+  auto newPadOp = b.create<PadOp>(
+      loc, padOp.getResultType(), padOp.getSource(), newMixedLow, newMixedHigh,
+      constantPadding, padOp.getNofold(), /*attrs=*/ArrayRef<NamedAttribute>{});
+
+  // Create a tensor::ExtractSliceOp.
+  // Reify the result sizes of the old tensor::PadOp.
+  ReifiedRankedShapedTypeDims reifiedSizes;
+  ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
+      dyn_cast<ReifyRankedShapedTypeOpInterface>(padOp.getOperation());
+  if (failed(reifyShapedTypeInterface.reifyResultShapes(b, reifiedSizes)))
+    return failure();
+  SmallVector<OpFoldResult> offsets, sizes, strides;
+  for (int64_t i = 0; i < padOp.getResultType().getRank(); ++i) {
+    // offset = ub(low_padding) - low_padding
+    OpFoldResult prevLow = padOp.getMixedLowPad()[i];
+    if (prevLow.is<Attribute>()) {
+      offsets.push_back(b.getIndexAttr(0));
+    } else {
+      offsets.push_back(
+          b.create<AffineApplyOp>(
+               loc, b.getAffineDimExpr(0) - b.getAffineDimExpr(1),
+               std::initializer_list<Value>{newMixedLow[i].get<Value>(),
+                                            prevLow.get<Value>()})
+              .getResult());
+    }
+    // size = reified result size
+    if (!padOp.getResultType().isDynamicDim(i)) {
+      sizes.push_back(b.getIndexAttr(padOp.getResultType().getDimSize(i)));
+    } else {
+      sizes.push_back(reifiedSizes[0][i]);
+    }
+    // stride = 1
+    strides.push_back(b.getIndexAttr(1));
+  }
+
+  return b.create<ExtractSliceOp>(loc, newPadOp, offsets, sizes, strides)
+      .getResult();
+}
diff --git a/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir b/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir
new file mode 100644
--- /dev/null
+++ b/mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir
@@ -0,0 +1,125 @@
+// RUN: mlir-opt %s -allow-unregistered-dialect \
+// RUN:     -test-transform-dialect-interpreter -canonicalize \
+// RUN:     -split-input-file -verify-diagnostics | FileCheck %s
+
+// This is a test case where "high" padding depends on the IV.
+
+//       CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (-s0 + s1)>
+//       CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>
+// CHECK-LABEL: func @make_pad_loop_independent_1(
+//  CHECK-SAME:     %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index,
+//  CHECK-SAME:     %[[t:.*]]: tensor<?xf32>
+func.func @make_pad_loop_independent_1(%lb: index, %ub: index, %step: index,
+                                       %t: tensor<?xf32>, %f: f32) {
+  // CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]]
+  scf.for %i = %lb to %ub step %step {
+    // CHECK: %[[high:.*]] = affine.apply #[[$map]]()[%[[lb]], %[[ub]]]
+    // CHECK: %[[padded:.*]] = tensor.pad %[[t]] low[5] high[%[[high]]]
+    // CHECK: %[[dim:.*]] = tensor.dim %[[t]]
+    // CHECK: %[[size:.*]] = affine.apply #[[$map1]](%[[iv]])[%[[ub]], %[[dim]]]
+    // CHECK: %[[replacement:.*]] = tensor.extract_slice %[[padded]][0] [%[[size]]] [1]
+    %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
+    %p = tensor.pad %t low[5] high[%high] {
+    ^bb0(%arg1: index):
+      tensor.yield %f : f32
+    } : tensor<?xf32> to tensor<?xf32>
+    // CHECK: "dummy.some_use"(%[[replacement]])
+    "dummy.some_use"(%p) : (tensor<?xf32>) -> ()
+  }
+  return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+  %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
+  %1 = transform.tensor.make_loop_independent %0 {num_loops = 1}
+}
+
+// -----
+
+// This is a test case where "low" padding depends on the IV.
+
+//       CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (-s0 + s1)>
+//       CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>
+//       CHECK: #[[$map2:.*]] = affine_map<(d0)[s0] -> (d0 - s0)>
+// CHECK-LABEL: func @make_pad_loop_independent_1(
+//  CHECK-SAME:     %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index,
+//  CHECK-SAME:     %[[t:.*]]: tensor<?xf32>
+func.func @make_pad_loop_independent_1(%lb: index, %ub: index, %step: index,
+                                       %t: tensor<?xf32>, %f: f32) {
+  // CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]]
+  scf.for %i = %lb to %ub step %step {
+    // CHECK: %[[low:.*]] = affine.apply #[[$map]]()[%[[lb]], %[[ub]]]
+    // CHECK: %[[padded:.*]] = tensor.pad %[[t]] low[%[[low]]] high[5]
+    // CHECK: %[[dim:.*]] = tensor.dim %[[t]]
+    // CHECK: %[[size:.*]] = affine.apply #[[$map1]](%[[iv]])[%[[ub]], %[[dim]]]
+    // CHECK: %[[offset:.*]] = affine.apply #[[$map2]](%[[iv]])[%[[lb]]]
+    // CHECK: %[[replacement:.*]] = tensor.extract_slice %[[padded]][%[[offset]]] [%[[size]]] [1]
+    %low = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
+    %p = tensor.pad %t low[%low] high[5] {
+    ^bb0(%arg1: index):
+      tensor.yield %f : f32
+    } : tensor<?xf32> to tensor<?xf32>
+    // CHECK: "dummy.some_use"(%[[replacement]])
+    "dummy.some_use"(%p) : (tensor<?xf32>) -> ()
+  }
+  return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+  %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
+  %1 = transform.tensor.make_loop_independent %0 {num_loops = 1}
+}
+
+// -----
+
+//       CHECK: #[[$map:.*]] = affine_map<()[s0] -> (s0 * 2 - 2)>
+// CHECK-LABEL: func @two_loops(
+func.func @two_loops(%lb: index, %ub: index, %step: index,
+                     %t: tensor<?xf32>, %f: f32) {
+  scf.for %i = %lb to %ub step %step {
+    scf.for %j = %lb to %ub step %step {
+      // CHECK: affine.apply #map()[%{{.*}}]
+      %low = affine.apply affine_map<(d0, d1)[] -> (d0 + d1)> (%i, %j)[]
+      %p = tensor.pad %t low[%low] high[5] {
+      ^bb0(%arg1: index):
+        tensor.yield %f : f32
+      } : tensor<?xf32> to tensor<?xf32>
+      "dummy.some_use"(%p) : (tensor<?xf32>) -> ()
+    }
+  }
+  return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+  %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
+  %1 = transform.tensor.make_loop_independent %0 {num_loops = 2}
+}
+
+// -----
+
+
+func.func @not_enough_loops(%lb: index, %ub: index, %step: index,
+                            %t: tensor<?xf32>, %f: f32) {
+  scf.for %i = %lb to %ub step %step {
+    scf.for %j = %lb to %ub step %step {
+      %low = affine.apply affine_map<(d0, d1)[] -> (d0 + d1)> (%i, %j)[]
+      // expected-note@below {{target op}}
+      %p = tensor.pad %t low[%low] high[5] {
+      ^bb0(%arg1: index):
+        tensor.yield %f : f32
+      } : tensor<?xf32> to tensor<?xf32>
+      "dummy.some_use"(%p) : (tensor<?xf32>) -> ()
+    }
+  }
+  return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !pdl.operation):
+  %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
+  // expected-error@below {{could not find 2-th enclosing loop}}
+  %1 = transform.tensor.make_loop_independent %0 {num_loops = 3}
+}
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -5543,6 +5543,7 @@
         ":MemRefDialect",
         ":Pass",
         ":SCFDialect",
+        ":SCFUtils",
         ":TensorDialect",
         ":TensorPassIncGen",
         ":TilingInterface",
@@ -5551,6 +5552,56 @@
     ],
 )
 
+td_library(
+    name = "TensorTransformOpsTdFiles",
+    srcs = [
+        "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td",
+    ],
+    includes = ["include"],
+    deps = [
+        ":PDLDialect",
+        ":TransformDialectTdFiles",
+    ],
+)
+
+gentbl_cc_library(
+    name = "TensorTransformOpsIncGen",
+    strip_include_prefix = "include",
+    tbl_outs = [
+        (
+            ["-gen-op-decls"],
+            "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h.inc",
+        ),
+        (
+            ["-gen-op-defs"],
+            "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc",
+        ),
+    ],
+    tblgen = ":mlir-tblgen",
+    td_file = "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td",
+    deps = [
+        ":TensorTransformOpsTdFiles",
+    ],
+)
+
+cc_library(
+    name = "TensorTransformOps",
+    srcs = glob(["lib/Dialect/Tensor/TransformOps/*.cpp"]),
+    hdrs = glob(["include/mlir/Dialect/Tensor/TransformOps/*.h"]),
+    includes = ["include"],
+    deps = [
+        ":AffineDialect",
+        ":IR",
+        ":PDLDialect",
+        ":SCFDialect",
+        ":TensorDialect",
+        ":TensorTransformOpsIncGen",
+        ":TensorTransforms",
+        ":TransformDialect",
+        "//llvm:Support",
+    ],
+)
+
 cc_library(
     name = "Rewrite",
     srcs = glob([
@@ -6980,6 +7031,7 @@
         ":TensorDialect",
         ":TensorInferTypeOpInterfaceImpl",
         ":TensorTilingInterfaceImpl",
+        ":TensorTransformOps",
         ":TensorTransforms",
         ":TosaDialect",
         ":TosaToLinalg",