diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
@@ -1202,7 +1202,8 @@
 
     The slice is further defined by a full-rank index within the MemRef/Tensor,
     supplied as the operands `[1 .. 1 + rank(memref/tensor))` that defines the
-    starting point of the transfer (e.g. `%A[%i0, %i1, %i2]`).
+    starting point of the transfer (e.g. `%A[%i0, %i1, %i2]`). All indices must
+    be positive.
 
     The permutation_map [attribute](../LangRef.md#attributes) is an
     [affine-map](Affine.md#affine-maps) which specifies the transposition on the
@@ -1225,13 +1226,11 @@
 
     An optional boolean array attribute `in_bounds` specifies for every tensor/
     memref dimension if the transfer is guaranteed to be within the source
-    bounds. While the starting point of the transfer has to be in-bounds,
-    accesses may run out-of-bounds as indices increase. If specified, the
-    `in_bounds` array length has to be equal to the rank of the source. In
-    absence of the attribute, accesses along all dimensions may run
-    out-of-bounds. A `vector.transfer_read` can be lowered to a simple load if
-    all dimensions are specified to be within bounds and no `mask` was
-    specified.
+    bounds. If specified, the `in_bounds` array length has to be equal to the
+    rank of the source. In absence of the attribute, accesses along all
+    dimensions may run out-of-bounds. A `vector.transfer_read` can be lowered to
+    a simple load if all dimensions are specified to be within bounds and no
+    `mask` was specified.
 
     This operation is called 'read' by opposition to 'load' because the
     super-vector granularity is generally not representable with a single
@@ -1443,7 +1442,8 @@
 
     The slice is further defined by a full-rank index within the MemRef/Tensor,
     supplied as the operands `[2 .. 2 + rank(memref/tensor))` that defines the
-    starting point of the transfer (e.g. `%A[%i0, %i1, %i2, %i3]`).
+    starting point of the transfer (e.g. `%A[%i0, %i1, %i2, %i3]`). All indices
+    must be positive.
 
     The permutation_map [attribute](../LangRef.md#attributes) is an
     [affine-map](Affine.md#affine-maps) which specifies the transposition on the
@@ -1466,13 +1466,11 @@
 
     An optional boolean array attribute `in_bounds` specifies for every tensor/
     memref dimension if the transfer is guaranteed to be within the source
-    bounds. While the starting point of the transfer has to be in-bounds,
-    accesses may run out-of-bounds as indices increase. If specified, the
-    `in_bounds` array length has to be equal to the rank of the source. In
-    absence of the attribute, accesses along all dimensions may run
-    out-of-bounds. A `vector.transfer_write` can be lowered to a simple store
-    if all dimensions are specified to be within bounds and no `mask` was
-    specified.
+    bounds. If specified, the `in_bounds` array length has to be equal to the
+    rank of the source. In absence of the attribute, accesses along all
+    dimensions may run out-of-bounds. A `vector.transfer_write` can be lowered
+    to a simple store if all dimensions are specified to be within bounds and no
+    `mask` was specified.
 
     This operation is called 'write' by opposition to 'store' because the
     super-vector granularity is generally not representable with a single
diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
--- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
+++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
@@ -47,8 +47,9 @@
 template <typename OpTy>
 struct VectorToSCFPattern : public OpRewritePattern<OpTy> {
   explicit VectorToSCFPattern(MLIRContext *context,
-                              VectorTransferToSCFOptions opt)
-      : OpRewritePattern<OpTy>(context), options(opt) {}
+                              VectorTransferToSCFOptions opt,
+                              PatternBenefit benefit = 1)
+      : OpRewritePattern<OpTy>(context, benefit), options(opt) {}
 
   VectorTransferToSCFOptions options;
 };
@@ -173,6 +174,9 @@
 /// This function variant returns the value returned by `inBoundsCase` or
 /// `outOfBoundsCase`. The MLIR type of the return value must be specified in
 /// `resultTypes`.
+///
+/// Note: `iv` is optional. If it is not provided, a "if (%a < %d)" check will
+/// be generated.
 template <typename OpTy>
 static Value generateInBoundsCheck(
     OpBuilder &b, OpTy xferOp, Value iv, std::optional<int64_t> dim,
@@ -189,21 +193,38 @@
   if (!isBroadcast && !xferOp.isDimInBounds(*dim)) {
     Value memrefDim =
         vector::createOrFoldDimOp(b, loc, xferOp.getSource(), *dim);
-    AffineExpr d0, d1;
-    bindDims(xferOp.getContext(), d0, d1);
-    Value base = xferOp.getIndices()[*dim];
-    Value memrefIdx =
-        affine::makeComposedAffineApply(b, loc, d0 + d1, {base, iv});
+    Value memrefIdx;
+    if (iv) {
+      AffineExpr d0, d1;
+      bindDims(xferOp.getContext(), d0, d1);
+      Value base = xferOp.getIndices()[*dim];
+      memrefIdx = affine::makeComposedAffineApply(b, loc, d0 + d1, {base, iv});
+    } else {
+      memrefIdx = xferOp.getIndices()[*dim];
+    }
     cond = lb.create<arith::CmpIOp>(arith::CmpIPredicate::sgt, memrefDim,
                                     memrefIdx);
   }
 
-  // Condition check 2: Masked in?
-  if (auto maskCond = generateMaskCheck(b, xferOp, iv)) {
-    if (cond)
-      cond = lb.create<arith::AndIOp>(cond, maskCond);
-    else
-      cond = maskCond;
+  // Condition check 2: Masked in? If no `iv` is provided, this is a check for
+  // a non-transfer dimensions (which does not have a mask).
+  if (iv) {
+    if (auto maskCond = generateMaskCheck(b, xferOp, iv)) {
+      if (cond)
+        cond = lb.create<arith::AndIOp>(cond, maskCond);
+      else
+        cond = maskCond;
+    }
+  } else {
+#ifndef NDEBUG
+    if (dim.has_value()) {
+      for (AffineExpr expr : xferOp.getPermutationMap().getResults()) {
+        if (auto dimExpr = expr.dyn_cast<AffineDimExpr>()) {
+          assert(dimExpr.getPosition() != *dim && "expected non-transfer dim");
+        }
+      }
+    }
+#endif // NDEBUG
   }
 
   // If the condition is non-empty, generate an SCF::IfOp.
@@ -1243,9 +1264,17 @@
       return failure();
     if (xferOp.getVectorType().getRank() != 1)
       return failure();
+    assert(map.getNumResults() == 1 && "Expected 1 permutation map result");
     if (map.isMinorIdentity() && isLastMemrefDimUnitStride(memRefType))
       return failure(); // Handled by ConvertVectorToLLVM
 
+    // Only the transfer dimension may be out of bounds.
+    for (int64_t i = 0, e = xferOp.getShapedType().getRank(); i < e; ++i)
+      if (!xferOp.isDimInBounds(i))
+        if (auto dimExpr = map.getResult(0).template dyn_cast<AffineDimExpr>())
+          if (dimExpr.getPosition() != i)
+            return failure();
+
     // Loop bounds, step, state...
     Location loc = xferOp.getLoc();
     auto vecType = xferOp.getVectorType();
@@ -1272,6 +1301,100 @@
 };
 
 } // namespace lowering_1_d
+
+/// This pattern turn an out-of-bounds non-transfer dimension into an in-bound
+/// dimension by inserting the corresponding in-bounds check as an scf.if op.
+///
+/// Example:
+/// ```
+/// %vec = vector.transfer_read %A[%a, %b] %cst {in_bounds = [false, false]}
+///     : vector<4xf32>, memref<?x?xf32>
+/// ```
+/// Generated IR:
+/// ```
+/// %d = memref.dim %A, %c0 : memref<?x?xf32>
+/// if (%a < %d) {
+///   %vec = vector.transfer_read %A[%a, %b] %cst {in_bounds = [true, false]}
+///       : vector<4xf32>, memref<?x?xf32>
+///   scf.yield %vec
+/// } else {
+///   %vec = vector.splat %cst
+///   scf.yield %vec
+/// }
+/// ```
+///
+/// This pattern ignores all transfer dims such as the %b in the example above.
+/// Such dims are handles by other patterns (e.g., as part of unrolling or
+/// materialization of masks).
+template <typename OpTy>
+struct TransferOpOutOfBoundsNonTransferDims : public VectorToSCFPattern<OpTy> {
+  using VectorToSCFPattern<OpTy>::VectorToSCFPattern;
+
+  /// Out-of-bounds read: return padding value.
+  static Value handleOutOfBoundsDim(OpBuilder &b, TransferReadOp xferOp) {
+    return b.create<vector::SplatOp>(xferOp.getLoc(), xferOp.getVectorType(),
+                                     xferOp.getPadding());
+  }
+
+  /// Out-of-bounds write: nothing to do.
+  static Value handleOutOfBoundsDim(OpBuilder &b, TransferWriteOp xferOp) {
+    if (xferOp.getNumResults() == 0)
+      return Value();
+    assert(xferOp.getNumResults() == 1 && "expected 0 or 1 result");
+    return xferOp.getSource();
+  }
+
+  LogicalResult matchAndRewrite(OpTy xferOp,
+                                PatternRewriter &rewriter) const override {
+    // Find out-of-bounds non-transfer dim.
+    int64_t dim = -1;
+    DenseSet<int64_t> xferDims;
+    for (AffineExpr expr : xferOp.getPermutationMap().getResults()) {
+      if (auto dimExpr = expr.template dyn_cast<AffineDimExpr>())
+        xferDims.insert(dimExpr.getPosition());
+    }
+    for (int64_t i = 0, e = xferOp.getShapedType().getRank(); i < e; ++i) {
+      if (!xferDims.contains(i) && !xferOp.isDimInBounds(i)) {
+        dim = i;
+        break;
+      }
+    }
+
+    // Nothing to do if all non-transfer dims are in-bounds.
+    if (dim == -1)
+      return failure();
+
+    Value result = generateInBoundsCheck(
+        rewriter, xferOp, /*iv=*/Value(), dim, xferOp->getResultTypes(),
+        /*inBoundsCase=*/
+        [&](OpBuilder &b, Location loc) -> Value {
+          // Clone xfer op and set in_bounds to "true".
+          OpTy cloned = cast<OpTy>(b.clone(*xferOp.getOperation()));
+          SmallVector<bool> inBounds(xferOp.getShapedType().getRank(), false);
+          if (xferOp.getInBounds().has_value())
+            inBounds =
+                extractFromIntegerArrayAttr<bool>(xferOp.getInBoundsAttr());
+          inBounds[dim] = true;
+          cloned.setInBoundsAttr(b.getBoolArrayAttr(inBounds));
+          if (cloned->getNumResults() == 0)
+            return Value();
+          assert(cloned->getNumResults() == 1 && "expected 0 or 1 result");
+          return cloned->getResult(0);
+        },
+        /*outOfBoundsCase=*/
+        [&](OpBuilder &b, Location loc) -> Value {
+          return handleOutOfBoundsDim(b, xferOp);
+        });
+
+    if (xferOp->getNumResults() == 0) {
+      assert(!result && "expected that check has no result");
+      rewriter.eraseOp(xferOp);
+    } else {
+      rewriter.replaceOp(xferOp, result);
+    }
+    return success();
+  }
+};
 } // namespace
 
 void mlir::populateVectorToSCFConversionPatterns(
@@ -1293,6 +1416,10 @@
                  lowering_1_d::TransferOp1dConversion<TransferWriteOp>>(
         patterns.getContext(), options);
   }
+
+  patterns.add<TransferOpOutOfBoundsNonTransferDims<TransferReadOp>,
+               TransferOpOutOfBoundsNonTransferDims<TransferWriteOp>>(
+      patterns.getContext(), options);
 }
 
 namespace {
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -3471,21 +3471,6 @@
              << " vs inBounds of size: " << inBounds.size();
   }
 
-  // Make sure that all non-transfer dimensions are in-bounds.
-  SmallVector<bool> inBoundsVals(op.getShapedType().getRank(), false);
-  if (inBounds)
-    inBoundsVals = extractFromIntegerArrayAttr<bool>(inBounds);
-  DenseSet<int64_t> xferDims;
-  for (AffineExpr expr : permutationMap.getResults()) {
-    if (auto dimExpr = expr.template dyn_cast<AffineDimExpr>())
-      xferDims.insert(dimExpr.getPosition());
-  }
-  for (int64_t i = 0, e = op.getShapedType().getRank(); i < e; ++i)
-    if (!xferDims.contains(i) && !op.isDimInBounds(i)) {
-      return op->emitOpError(
-          "expects that all non-transfer dims are in-bounds");
-    }
-
   return success();
 }
 
diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
--- a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
+++ b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
@@ -548,3 +548,81 @@
 // CHECK:             } else {
 // CHECK:             }
 // CHECK:           }
+
+// -----
+
+// CHECK-LABEL: func @read_non_transfer_dim_out_of_bounds(
+//       CHECK:   %[[r5:.*]] = scf.if {{.*}} {
+//       CHECK:     %[[r2:.*]] = scf.if {{.*}} {
+//   CHECK-NOT:       scf.if
+//       CHECK:       %[[r:.*]] = vector.transfer_read {{.*}} {in_bounds = [true, true, true, false]} : tensor<?x?x?x?xf32>, vector<5xf32>
+//       CHECK:       scf.yield %[[r]]
+//       CHECK:     } else {
+///      CHECK:       %[[r3:.*]] = vector.splat {{.*}} : vector<5xf32>
+//       CHECK:       scf.yield %[[r3]]
+//       CHECK:     }
+//       CHECK:     scf.yield %[[r2]]
+//       CHECK:   } else {
+//       CHECK:     %[[r4:.*]] = vector.splat {{.*}} : vector<5xf32>
+//       CHECK:     scf.yield %[[r4]]
+//       CHECK:   }
+//       CHECK:   return %[[r5]]
+func.func @read_non_transfer_dim_out_of_bounds(%t: tensor<?x?x?x?xf32>, %idx: index, %f: f32) -> vector<5xf32> {
+  %0 = vector.transfer_read %t[%idx, %idx, %idx, %idx], %f {in_bounds = [false, true, false, false]} : tensor<?x?x?x?xf32>, vector<5xf32>
+  return %0 : vector<5xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @write_non_transfer_dim_out_of_bounds(
+//  CHECK-SAME:     %[[t:.*]]: tensor
+//       CHECK:   scf.if {{.*}} {
+//       CHECK:     scf.if {{.*}} {
+//   CHECK-NOT:       scf.if
+//       CHECK:       %[[r:.*]] = vector.transfer_write {{.*}} {in_bounds = [true, true, true, false]} : vector<5xf32>, tensor<?x?x?x?xf32>
+//       CHECK:       scf.yield %[[r]]
+//       CHECK:     } else {
+//       CHECK:       scf.yield %[[t]]
+//       CHECK:     }
+//       CHECK:   } else {
+//       CHECK:     scf.yield %[[t]]
+//       CHECK:   }
+func.func @write_non_transfer_dim_out_of_bounds(%t: tensor<?x?x?x?xf32>, %idx: index, %f: f32, %v: vector<5xf32>) -> tensor<?x?x?x?xf32> {
+  %0 = vector.transfer_write %v, %t[%idx, %idx, %idx, %idx] {in_bounds = [false, true, false, false]} : vector<5xf32>, tensor<?x?x?x?xf32>
+  return %0 : tensor<?x?x?x?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @read_memref_non_transfer_dim_out_of_bounds(
+//       CHECK:   scf.if {{.*}} {
+//       CHECK:     scf.if {{.*}} {
+//   CHECK-NOT:       scf.if
+//       CHECK:       vector.transfer_read {{.*}} {in_bounds = [true, true, true, false]} : memref<?x?x?x?xf32>, vector<5xf32>
+//       CHECK:     } else {
+//       CHECK:       vector.splat {{.*}} : vector<5xf32>
+//       CHECK:     }
+//       CHECK:   } else {
+//       CHECK:     vector.splat {{.*}} : vector<5xf32>
+//       CHECK:   }
+func.func @read_memref_non_transfer_dim_out_of_bounds(%t: memref<?x?x?x?xf32>, %idx: index, %f: f32) -> vector<5xf32> {
+  %0 = vector.transfer_read %t[%idx, %idx, %idx, %idx], %f {in_bounds = [false, true, false, false]} : memref<?x?x?x?xf32>, vector<5xf32>
+  return %0 : vector<5xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @memref_write_non_transfer_dim_out_of_bounds(
+//  CHECK-SAME:     %[[t:.*]]: memref
+//       CHECK:   scf.if {{.*}} {
+//       CHECK:     scf.if {{.*}} {
+//   CHECK-NOT:       scf.if
+//       CHECK:       vector.transfer_write {{.*}} {in_bounds = [true, true, true, false]} : vector<5xf32>, memref<?x?x?x?xf32>
+//       CHECK:     } else {
+//       CHECK:     }
+//       CHECK:   } else {
+//       CHECK:   }
+func.func @memref_write_non_transfer_dim_out_of_bounds(%t: memref<?x?x?x?xf32>, %idx: index, %f: f32, %v: vector<5xf32>) {
+  vector.transfer_write %v, %t[%idx, %idx, %idx, %idx] {in_bounds = [false, true, false, false]} : vector<5xf32>, memref<?x?x?x?xf32>
+  return
+}
diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir
--- a/mlir/test/Dialect/Vector/invalid.mlir
+++ b/mlir/test/Dialect/Vector/invalid.mlir
@@ -1675,11 +1675,3 @@
   } %arg0, %arg1, %arg2 : vector<16x32xsi8>, vector<32x16xsi8> into vector<16x16xsi32>
   return %0: vector<16x16xsi32>
 }
-
-// -----
-
-func.func @out_of_bounds_non_transfer_dim(%arg0: tensor<?x?xf32>, %pos: index, %f: f32) -> vector<5xf32> {
-  // expected-error @below{{expects that all non-transfer dims are in-bounds}}
-  %0 = vector.transfer_read %arg0[%pos, %pos], %f : tensor<?x?xf32>, vector<5xf32>
-  return %0 : vector<5xf32>
-}
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
@@ -99,6 +99,17 @@
   return
 }
 
+// Non-contiguous, strided load.
+func.func @transfer_read_1d_out_of_bounds(
+    %A : memref<?x?xf32>, %base1 : index, %base2 : index) {
+  %fm42 = arith.constant -42.0: f32
+  %f = vector.transfer_read %A[%base1, %base2], %fm42
+      {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [false, false]}
+      : memref<?x?xf32>, vector<3xf32>
+  vector.print %f: vector<3xf32>
+  return
+}
+
 // Non-contiguous, strided load.
 func.func @transfer_read_1d_mask(
     %A : memref<?x?xf32>, %base1 : index, %base2 : index) {
@@ -149,6 +160,7 @@
   %c1 = arith.constant 1: index
   %c2 = arith.constant 2: index
   %c3 = arith.constant 3: index
+  %c10 = arith.constant 10 : index
   %0 = memref.get_global @gv : memref<5x6xf32>
   %A = memref.cast %0 : memref<5x6xf32> to memref<?x?xf32>
 
@@ -169,6 +181,18 @@
   call @transfer_read_1d_non_static_unit_stride(%A) : (memref<?x?xf32>) -> ()
   // CHECK: ( 31, 32, 33, 34 )
 
+  // 2.c. Read 1D vector from 2D memref with out-of-bounds non-transfer dim
+  //      starting point.
+  call @transfer_read_1d_out_of_bounds(%A, %c10, %c1)
+      : (memref<?x?xf32>, index, index) -> ()
+  // CHECK: ( -42, -42, -42 )
+
+  // 2.d. Read 1D vector from 2D memref with out-of-bounds transfer dim starting
+  //      point.
+  call @transfer_read_1d_out_of_bounds(%A, %c1, %c10)
+      : (memref<?x?xf32>, index, index) -> ()
+  // CHECK: ( -42, -42, -42 )
+
   // 3. Read 1D vector from 2D memref with non-unit stride on second dim.
   call @transfer_read_1d_non_unit_stride(%A) : (memref<?x?xf32>) -> ()
   // CHECK: ( 22, 24, -42 )
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
@@ -126,13 +126,24 @@
   %c1 = arith.constant 1: index
   %c2 = arith.constant 2: index
   %c3 = arith.constant 3: index
+  %c10 = arith.constant 10 : index
   %0 = memref.get_global @gv : memref<3x4xf32>
   %A = memref.cast %0 : memref<3x4xf32> to memref<?x?xf32>
 
-  // 1. Read 2D vector from 2D memref.
+  // 1.a. Read 2D vector from 2D memref.
   call @transfer_read_2d(%A, %c1, %c2) : (memref<?x?xf32>, index, index) -> ()
   // CHECK: ( ( 12, 13, -42, -42, -42, -42, -42, -42, -42 ), ( 22, 23, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
 
+  // 1.b. Read 2D vector from 2D memref. Starting position of first dim is
+  //      out-of-bounds.
+  call @transfer_read_2d(%A, %c3, %c2) : (memref<?x?xf32>, index, index) -> ()
+  // CHECK: ( ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
+
+  // 1.c. Read 2D vector from 2D memref. Starting position of second dim is
+  //      out-of-bounds.
+  call @transfer_read_2d(%A, %c1, %c10) : (memref<?x?xf32>, index, index) -> ()
+  // CHECK: ( ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) )
+
   // 2. Read 2D vector from 2D memref at specified location and transpose the
   //    result.
   call @transfer_read_2d_transposed(%A, %c1, %c2)