diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -1224,14 +1224,15 @@ `0` are masked out and replaced with `padding`. An optional boolean array attribute `in_bounds` specifies for every vector - dimension if the transfer is guaranteed to be within the source bounds. - While the starting point of the transfer has to be in-bounds, accesses may - run out-of-bounds as indices increase. Broadcast dimensions must always be - in-bounds. If specified, the `in_bounds` array length has to be equal to the - vector rank. In absence of the attribute, accesses along all dimensions - (except for broadcasts) may run out-of-bounds. A `vector.transfer_read` can - be lowered to a simple load if all dimensions are specified to be within - bounds and no `mask` was specified. + dimension if the transfer is guaranteed to be within the source bounds. If + specified, the `in_bounds` array length has to be equal to the vector rank. + If set to "false", accesses (including the starting point) may run + out-of-bounds along the respective vector dimension as the index increases. + Broadcast dimensions must always be in-bounds. In absence of the attribute, + accesses along all vector dimensions (except for broadcasts) may run + out-of-bounds. A `vector.transfer_read` can be lowered to a simple load if + all dimensions are specified to be within bounds and no `mask` was + specified. Note that non-vector dimensions *must* always be in-bounds. This operation is called 'read' by opposition to 'load' because the super-vector granularity is generally not representable with a single @@ -1465,13 +1466,14 @@ is `0` are masked out. An optional boolean array attribute `in_bounds` specifies for every vector - dimension if the transfer is guaranteed to be within the source bounds. - While the starting point of the transfer has to be in-bounds, accesses may - run out-of-bounds as indices increase. If specified, the `in_bounds` array - length has to be equal to the vector rank. In absence of the attribute, - accesses along all dimensions may run out-of-bounds. A - `vector.transfer_write` can be lowered to a simple store if all dimensions - are specified to be within bounds and no `mask` was specified. + dimension if the transfer is guaranteed to be within the source bounds. If + specified, the `in_bounds` array length has to be equal to the vector rank. + If set to "false", accesses (including the starting point) may run + out-of-bounds along the respective vector dimension as the index increases. + In absence of the attribute, accesses along all vector dimensions may run + out-of-bounds. A `vector.transfer_write` can be lowered to a simple store if + all dimensions are specified to be within bounds and no `mask` was + specified. Note that non-vector dimensions *must* always be in-bounds. This operation is called 'write' by opposition to 'store' because the super-vector granularity is generally not representable with a single diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir @@ -111,6 +111,17 @@ return } +// Non-contiguous, out-of-bounds, strided load. +func.func @transfer_read_1d_out_of_bounds( + %A : memref, %base1 : index, %base2 : index) { + %fm42 = arith.constant -42.0: f32 + %f = vector.transfer_read %A[%base1, %base2], %fm42 + {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [false]} + : memref, vector<3xf32> + vector.print %f: vector<3xf32> + return +} + // Non-contiguous, strided load. func.func @transfer_read_1d_mask_in_bounds( %A : memref, %base1 : index, %base2 : index) { @@ -149,6 +160,7 @@ %c1 = arith.constant 1: index %c2 = arith.constant 2: index %c3 = arith.constant 3: index + %c10 = arith.constant 10 : index %0 = memref.get_global @gv : memref<5x6xf32> %A = memref.cast %0 : memref<5x6xf32> to memref @@ -169,6 +181,12 @@ call @transfer_read_1d_non_static_unit_stride(%A) : (memref) -> () // CHECK: ( 31, 32, 33, 34 ) + // 2.c. Read 1D vector from 2D memref with out-of-bounds transfer dim starting + // point. + call @transfer_read_1d_out_of_bounds(%A, %c10, %c1) + : (memref, index, index) -> () + // CHECK: ( -42, -42, -42 ) + // 3. Read 1D vector from 2D memref with non-unit stride on second dim. call @transfer_read_1d_non_unit_stride(%A) : (memref) -> () // CHECK: ( 22, 24, -42 ) diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir @@ -123,13 +123,24 @@ %c1 = arith.constant 1: index %c2 = arith.constant 2: index %c3 = arith.constant 3: index + %c10 = arith.constant 10 : index %0 = memref.get_global @gv : memref<3x4xf32> %A = memref.cast %0 : memref<3x4xf32> to memref - // 1. Read 2D vector from 2D memref. + // 1.a. Read 2D vector from 2D memref. call @transfer_read_2d(%A, %c1, %c2) : (memref, index, index) -> () // CHECK: ( ( 12, 13, -42, -42, -42, -42, -42, -42, -42 ), ( 22, 23, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) ) + // 1.b. Read 2D vector from 2D memref. Starting position of first dim is + // out-of-bounds. + call @transfer_read_2d(%A, %c3, %c2) : (memref, index, index) -> () + // CHECK: ( ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) ) + + // 1.c. Read 2D vector from 2D memref. Starting position of second dim is + // out-of-bounds. + call @transfer_read_2d(%A, %c1, %c10) : (memref, index, index) -> () + // CHECK: ( ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) ) + // 2. Read 2D vector from 2D memref at specified location and transpose the // result. call @transfer_read_2d_transposed(%A, %c1, %c2)