diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -1202,7 +1202,8 @@ The slice is further defined by a full-rank index within the MemRef/Tensor, supplied as the operands `[1 .. 1 + rank(memref/tensor))` that defines the - starting point of the transfer (e.g. `%A[%i0, %i1, %i2]`). + starting point of the transfer (e.g. `%A[%i0, %i1, %i2]`). All indices must + be positive. The permutation_map [attribute](../LangRef.md#attributes) is an [affine-map](Affine.md#affine-maps) which specifies the transposition on the @@ -1225,13 +1226,11 @@ An optional boolean array attribute `in_bounds` specifies for every tensor/ memref dimension if the transfer is guaranteed to be within the source - bounds. While the starting point of the transfer has to be in-bounds, - accesses may run out-of-bounds as indices increase. If specified, the - `in_bounds` array length has to be equal to the rank of the source. In - absence of the attribute, accesses along all dimensions may run - out-of-bounds. A `vector.transfer_read` can be lowered to a simple load if - all dimensions are specified to be within bounds and no `mask` was - specified. + bounds. If specified, the `in_bounds` array length has to be equal to the + rank of the source. In absence of the attribute, accesses along all + dimensions may run out-of-bounds. A `vector.transfer_read` can be lowered to + a simple load if all dimensions are specified to be within bounds and no + `mask` was specified. This operation is called 'read' by opposition to 'load' because the super-vector granularity is generally not representable with a single @@ -1443,7 +1442,8 @@ The slice is further defined by a full-rank index within the MemRef/Tensor, supplied as the operands `[2 .. 2 + rank(memref/tensor))` that defines the - starting point of the transfer (e.g. `%A[%i0, %i1, %i2, %i3]`). + starting point of the transfer (e.g. `%A[%i0, %i1, %i2, %i3]`). All indices + must be positive. The permutation_map [attribute](../LangRef.md#attributes) is an [affine-map](Affine.md#affine-maps) which specifies the transposition on the @@ -1466,13 +1466,11 @@ An optional boolean array attribute `in_bounds` specifies for every tensor/ memref dimension if the transfer is guaranteed to be within the source - bounds. While the starting point of the transfer has to be in-bounds, - accesses may run out-of-bounds as indices increase. If specified, the - `in_bounds` array length has to be equal to the rank of the source. In - absence of the attribute, accesses along all dimensions may run - out-of-bounds. A `vector.transfer_write` can be lowered to a simple store - if all dimensions are specified to be within bounds and no `mask` was - specified. + bounds. If specified, the `in_bounds` array length has to be equal to the + rank of the source. In absence of the attribute, accesses along all + dimensions may run out-of-bounds. A `vector.transfer_write` can be lowered + to a simple store if all dimensions are specified to be within bounds and no + `mask` was specified. This operation is called 'write' by opposition to 'store' because the super-vector granularity is generally not representable with a single diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -47,8 +47,9 @@ template struct VectorToSCFPattern : public OpRewritePattern { explicit VectorToSCFPattern(MLIRContext *context, - VectorTransferToSCFOptions opt) - : OpRewritePattern(context), options(opt) {} + VectorTransferToSCFOptions opt, + PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit), options(opt) {} VectorTransferToSCFOptions options; }; @@ -173,6 +174,9 @@ /// This function variant returns the value returned by `inBoundsCase` or /// `outOfBoundsCase`. The MLIR type of the return value must be specified in /// `resultTypes`. +/// +/// Note: `iv` is optional. If it is not provided, a "if (%a < %d)" check will +/// be generated. template static Value generateInBoundsCheck( OpBuilder &b, OpTy xferOp, Value iv, std::optional dim, @@ -189,21 +193,38 @@ if (!isBroadcast && !xferOp.isDimInBounds(*dim)) { Value memrefDim = vector::createOrFoldDimOp(b, loc, xferOp.getSource(), *dim); - AffineExpr d0, d1; - bindDims(xferOp.getContext(), d0, d1); - Value base = xferOp.getIndices()[*dim]; - Value memrefIdx = - affine::makeComposedAffineApply(b, loc, d0 + d1, {base, iv}); + Value memrefIdx; + if (iv) { + AffineExpr d0, d1; + bindDims(xferOp.getContext(), d0, d1); + Value base = xferOp.getIndices()[*dim]; + memrefIdx = affine::makeComposedAffineApply(b, loc, d0 + d1, {base, iv}); + } else { + memrefIdx = xferOp.getIndices()[*dim]; + } cond = lb.create(arith::CmpIPredicate::sgt, memrefDim, memrefIdx); } - // Condition check 2: Masked in? - if (auto maskCond = generateMaskCheck(b, xferOp, iv)) { - if (cond) - cond = lb.create(cond, maskCond); - else - cond = maskCond; + // Condition check 2: Masked in? If no `iv` is provided, this is a check for + // a non-transfer dimensions (which does not have a mask). + if (iv) { + if (auto maskCond = generateMaskCheck(b, xferOp, iv)) { + if (cond) + cond = lb.create(cond, maskCond); + else + cond = maskCond; + } + } else { +#ifndef NDEBUG + if (dim.has_value()) { + for (AffineExpr expr : xferOp.getPermutationMap().getResults()) { + if (auto dimExpr = expr.dyn_cast()) { + assert(dimExpr.getPosition() != *dim && "expected non-transfer dim"); + } + } + } +#endif // NDEBUG } // If the condition is non-empty, generate an SCF::IfOp. @@ -1243,9 +1264,17 @@ return failure(); if (xferOp.getVectorType().getRank() != 1) return failure(); + assert(map.getNumResults() == 1 && "Expected 1 permutation map result"); if (map.isMinorIdentity() && isLastMemrefDimUnitStride(memRefType)) return failure(); // Handled by ConvertVectorToLLVM + // Only the transfer dimension may be out of bounds. + for (int64_t i = 0, e = xferOp.getShapedType().getRank(); i < e; ++i) + if (!xferOp.isDimInBounds(i)) + if (auto dimExpr = map.getResult(0).template dyn_cast()) + if (dimExpr.getPosition() != i) + return failure(); + // Loop bounds, step, state... Location loc = xferOp.getLoc(); auto vecType = xferOp.getVectorType(); @@ -1272,6 +1301,100 @@ }; } // namespace lowering_1_d + +/// This pattern turn an out-of-bounds non-transfer dimension into an in-bound +/// dimension by inserting the corresponding in-bounds check as an scf.if op. +/// +/// Example: +/// ``` +/// %vec = vector.transfer_read %A[%a, %b] %cst {in_bounds = [false, false]} +/// : vector<4xf32>, memref +/// ``` +/// Generated IR: +/// ``` +/// %d = memref.dim %A, %c0 : memref +/// if (%a < %d) { +/// %vec = vector.transfer_read %A[%a, %b] %cst {in_bounds = [true, false]} +/// : vector<4xf32>, memref +/// scf.yield %vec +/// } else { +/// %vec = vector.splat %cst +/// scf.yield %vec +/// } +/// ``` +/// +/// This pattern ignores all transfer dims such as the %b in the example above. +/// Such dims are handles by other patterns (e.g., as part of unrolling or +/// materialization of masks). +template +struct TransferOpOutOfBoundsNonTransferDims : public VectorToSCFPattern { + using VectorToSCFPattern::VectorToSCFPattern; + + /// Out-of-bounds read: return padding value. + static Value handleOutOfBoundsDim(OpBuilder &b, TransferReadOp xferOp) { + return b.create(xferOp.getLoc(), xferOp.getVectorType(), + xferOp.getPadding()); + } + + /// Out-of-bounds write: nothing to do. + static Value handleOutOfBoundsDim(OpBuilder &b, TransferWriteOp xferOp) { + if (xferOp.getNumResults() == 0) + return Value(); + assert(xferOp.getNumResults() == 1 && "expected 0 or 1 result"); + return xferOp.getSource(); + } + + LogicalResult matchAndRewrite(OpTy xferOp, + PatternRewriter &rewriter) const override { + // Find out-of-bounds non-transfer dim. + int64_t dim = -1; + DenseSet xferDims; + for (AffineExpr expr : xferOp.getPermutationMap().getResults()) { + if (auto dimExpr = expr.template dyn_cast()) + xferDims.insert(dimExpr.getPosition()); + } + for (int64_t i = 0, e = xferOp.getShapedType().getRank(); i < e; ++i) { + if (!xferDims.contains(i) && !xferOp.isDimInBounds(i)) { + dim = i; + break; + } + } + + // Nothing to do if all non-transfer dims are in-bounds. + if (dim == -1) + return failure(); + + Value result = generateInBoundsCheck( + rewriter, xferOp, /*iv=*/Value(), dim, xferOp->getResultTypes(), + /*inBoundsCase=*/ + [&](OpBuilder &b, Location loc) -> Value { + // Clone xfer op and set in_bounds to "true". + OpTy cloned = cast(b.clone(*xferOp.getOperation())); + SmallVector inBounds(xferOp.getShapedType().getRank(), false); + if (xferOp.getInBounds().has_value()) + inBounds = + extractFromIntegerArrayAttr(xferOp.getInBoundsAttr()); + inBounds[dim] = true; + cloned.setInBoundsAttr(b.getBoolArrayAttr(inBounds)); + if (cloned->getNumResults() == 0) + return Value(); + assert(cloned->getNumResults() == 1 && "expected 0 or 1 result"); + return cloned->getResult(0); + }, + /*outOfBoundsCase=*/ + [&](OpBuilder &b, Location loc) -> Value { + return handleOutOfBoundsDim(b, xferOp); + }); + + if (xferOp->getNumResults() == 0) { + assert(!result && "expected that check has no result"); + rewriter.eraseOp(xferOp); + } else { + rewriter.replaceOp(xferOp, result); + } + return success(); + } +}; } // namespace void mlir::populateVectorToSCFConversionPatterns( @@ -1293,6 +1416,10 @@ lowering_1_d::TransferOp1dConversion>( patterns.getContext(), options); } + + patterns.add, + TransferOpOutOfBoundsNonTransferDims>( + patterns.getContext(), options); } namespace { diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -3471,21 +3471,6 @@ << " vs inBounds of size: " << inBounds.size(); } - // Make sure that all non-transfer dimensions are in-bounds. - SmallVector inBoundsVals(op.getShapedType().getRank(), false); - if (inBounds) - inBoundsVals = extractFromIntegerArrayAttr(inBounds); - DenseSet xferDims; - for (AffineExpr expr : permutationMap.getResults()) { - if (auto dimExpr = expr.template dyn_cast()) - xferDims.insert(dimExpr.getPosition()); - } - for (int64_t i = 0, e = op.getShapedType().getRank(); i < e; ++i) - if (!xferDims.contains(i) && !op.isDimInBounds(i)) { - return op->emitOpError( - "expects that all non-transfer dims are in-bounds"); - } - return success(); } diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir --- a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir @@ -548,3 +548,81 @@ // CHECK: } else { // CHECK: } // CHECK: } + +// ----- + +// CHECK-LABEL: func @read_non_transfer_dim_out_of_bounds( +// CHECK: %[[r5:.*]] = scf.if {{.*}} { +// CHECK: %[[r2:.*]] = scf.if {{.*}} { +// CHECK-NOT: scf.if +// CHECK: %[[r:.*]] = vector.transfer_read {{.*}} {in_bounds = [true, true, true, false]} : tensor, vector<5xf32> +// CHECK: scf.yield %[[r]] +// CHECK: } else { +/// CHECK: %[[r3:.*]] = vector.splat {{.*}} : vector<5xf32> +// CHECK: scf.yield %[[r3]] +// CHECK: } +// CHECK: scf.yield %[[r2]] +// CHECK: } else { +// CHECK: %[[r4:.*]] = vector.splat {{.*}} : vector<5xf32> +// CHECK: scf.yield %[[r4]] +// CHECK: } +// CHECK: return %[[r5]] +func.func @read_non_transfer_dim_out_of_bounds(%t: tensor, %idx: index, %f: f32) -> vector<5xf32> { + %0 = vector.transfer_read %t[%idx, %idx, %idx, %idx], %f {in_bounds = [false, true, false, false]} : tensor, vector<5xf32> + return %0 : vector<5xf32> +} + +// ----- + +// CHECK-LABEL: func @write_non_transfer_dim_out_of_bounds( +// CHECK-SAME: %[[t:.*]]: tensor +// CHECK: scf.if {{.*}} { +// CHECK: scf.if {{.*}} { +// CHECK-NOT: scf.if +// CHECK: %[[r:.*]] = vector.transfer_write {{.*}} {in_bounds = [true, true, true, false]} : vector<5xf32>, tensor +// CHECK: scf.yield %[[r]] +// CHECK: } else { +// CHECK: scf.yield %[[t]] +// CHECK: } +// CHECK: } else { +// CHECK: scf.yield %[[t]] +// CHECK: } +func.func @write_non_transfer_dim_out_of_bounds(%t: tensor, %idx: index, %f: f32, %v: vector<5xf32>) -> tensor { + %0 = vector.transfer_write %v, %t[%idx, %idx, %idx, %idx] {in_bounds = [false, true, false, false]} : vector<5xf32>, tensor + return %0 : tensor +} + +// ----- + +// CHECK-LABEL: func @read_memref_non_transfer_dim_out_of_bounds( +// CHECK: scf.if {{.*}} { +// CHECK: scf.if {{.*}} { +// CHECK-NOT: scf.if +// CHECK: vector.transfer_read {{.*}} {in_bounds = [true, true, true, false]} : memref, vector<5xf32> +// CHECK: } else { +// CHECK: vector.splat {{.*}} : vector<5xf32> +// CHECK: } +// CHECK: } else { +// CHECK: vector.splat {{.*}} : vector<5xf32> +// CHECK: } +func.func @read_memref_non_transfer_dim_out_of_bounds(%t: memref, %idx: index, %f: f32) -> vector<5xf32> { + %0 = vector.transfer_read %t[%idx, %idx, %idx, %idx], %f {in_bounds = [false, true, false, false]} : memref, vector<5xf32> + return %0 : vector<5xf32> +} + +// ----- + +// CHECK-LABEL: func @memref_write_non_transfer_dim_out_of_bounds( +// CHECK-SAME: %[[t:.*]]: memref +// CHECK: scf.if {{.*}} { +// CHECK: scf.if {{.*}} { +// CHECK-NOT: scf.if +// CHECK: vector.transfer_write {{.*}} {in_bounds = [true, true, true, false]} : vector<5xf32>, memref +// CHECK: } else { +// CHECK: } +// CHECK: } else { +// CHECK: } +func.func @memref_write_non_transfer_dim_out_of_bounds(%t: memref, %idx: index, %f: f32, %v: vector<5xf32>) { + vector.transfer_write %v, %t[%idx, %idx, %idx, %idx] {in_bounds = [false, true, false, false]} : vector<5xf32>, memref + return +} diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -1675,11 +1675,3 @@ } %arg0, %arg1, %arg2 : vector<16x32xsi8>, vector<32x16xsi8> into vector<16x16xsi32> return %0: vector<16x16xsi32> } - -// ----- - -func.func @out_of_bounds_non_transfer_dim(%arg0: tensor, %pos: index, %f: f32) -> vector<5xf32> { - // expected-error @below{{expects that all non-transfer dims are in-bounds}} - %0 = vector.transfer_read %arg0[%pos, %pos], %f : tensor, vector<5xf32> - return %0 : vector<5xf32> -} diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir @@ -99,6 +99,17 @@ return } +// Non-contiguous, strided load. +func.func @transfer_read_1d_out_of_bounds( + %A : memref, %base1 : index, %base2 : index) { + %fm42 = arith.constant -42.0: f32 + %f = vector.transfer_read %A[%base1, %base2], %fm42 + {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [false, false]} + : memref, vector<3xf32> + vector.print %f: vector<3xf32> + return +} + // Non-contiguous, strided load. func.func @transfer_read_1d_mask( %A : memref, %base1 : index, %base2 : index) { @@ -149,6 +160,7 @@ %c1 = arith.constant 1: index %c2 = arith.constant 2: index %c3 = arith.constant 3: index + %c10 = arith.constant 10 : index %0 = memref.get_global @gv : memref<5x6xf32> %A = memref.cast %0 : memref<5x6xf32> to memref @@ -169,6 +181,18 @@ call @transfer_read_1d_non_static_unit_stride(%A) : (memref) -> () // CHECK: ( 31, 32, 33, 34 ) + // 2.c. Read 1D vector from 2D memref with out-of-bounds non-transfer dim + // starting point. + call @transfer_read_1d_out_of_bounds(%A, %c10, %c1) + : (memref, index, index) -> () + // CHECK: ( -42, -42, -42 ) + + // 2.d. Read 1D vector from 2D memref with out-of-bounds transfer dim starting + // point. + call @transfer_read_1d_out_of_bounds(%A, %c1, %c10) + : (memref, index, index) -> () + // CHECK: ( -42, -42, -42 ) + // 3. Read 1D vector from 2D memref with non-unit stride on second dim. call @transfer_read_1d_non_unit_stride(%A) : (memref) -> () // CHECK: ( 22, 24, -42 ) diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir @@ -126,13 +126,24 @@ %c1 = arith.constant 1: index %c2 = arith.constant 2: index %c3 = arith.constant 3: index + %c10 = arith.constant 10 : index %0 = memref.get_global @gv : memref<3x4xf32> %A = memref.cast %0 : memref<3x4xf32> to memref - // 1. Read 2D vector from 2D memref. + // 1.a. Read 2D vector from 2D memref. call @transfer_read_2d(%A, %c1, %c2) : (memref, index, index) -> () // CHECK: ( ( 12, 13, -42, -42, -42, -42, -42, -42, -42 ), ( 22, 23, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) ) + // 1.b. Read 2D vector from 2D memref. Starting position of first dim is + // out-of-bounds. + call @transfer_read_2d(%A, %c3, %c2) : (memref, index, index) -> () + // CHECK: ( ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) ) + + // 1.c. Read 2D vector from 2D memref. Starting position of second dim is + // out-of-bounds. + call @transfer_read_2d(%A, %c1, %c10) : (memref, index, index) -> () + // CHECK: ( ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) ) + // 2. Read 2D vector from 2D memref at specified location and transpose the // result. call @transfer_read_2d_transposed(%A, %c1, %c2)