diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -637,14 +637,33 @@ LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize << ", size: " << size << ": make sure in bound with affine.min\n"); + AffineExpr dim0, dim1, dim2; bindDims(builder.getContext(), dim0, dim1, dim2); - // Compute min(size, dim - offset) to avoid out-of-bounds accesses. - AffineMap minMap = - AffineMap::inferFromExprList( - ArrayRef>{{dim0, dim1 - dim2}}) + + // Get the dimension size for this dimension. We need to first calculate + // the max index and then plus one. This is important because for + // convolution ops, we have its input window dimension's affine map of the + // form `(d0 * s0 + d1)`, where `d0`/`d1 is an output/filter window + // dimension and `s0` is stride. Directly use the dimension size of + // output/filer window dimensions will cause incorrect calculation. + AffineMap minusOneMap = + AffineMap::inferFromExprList({ArrayRef{dim0 - 1}}) .front(); - Value d = applyMapToValues(builder, loc, m, ubs).front(); + AffineMap plusOneMap = + AffineMap::inferFromExprList({ArrayRef{dim0 + 1}}) + .front(); + auto maxIndices = llvm::to_vector<8>(llvm::map_range(ubs, [&](Value ub) { + return makeComposedAffineApply(builder, loc, minusOneMap, {ub}) + .getResult(); + })); + Value maxIndex = applyMapToValues(builder, loc, m, maxIndices).front(); + Value d = makeComposedAffineApply(builder, loc, plusOneMap, {maxIndex}); + + // Compute min(size, dim - offset) to avoid out-of-bounds accesses. + AffineMap minMap = AffineMap::inferFromExprList( + {ArrayRef{dim0, dim1 - dim2}}) + .front(); SmallVector operands{size, d, offset}; fullyComposeAffineMapAndOperands(&minMap, &operands); canonicalizeMapAndOperands(&minMap, &operands); diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir @@ -233,7 +233,7 @@ // ----- // CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (8, -d0 - d1 + 18)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (8, -d0 - d1 + 17)> // CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> (d0, -d1 - d2 + 18)> #map0 = affine_map<(d0, d1) -> (d0, d0 + d1)> #map1 = affine_map<(d0, d1) -> (d0, d1)> @@ -245,13 +245,13 @@ %cst = constant 0.000000e+00 : f32 %0 = linalg.fill(%cst, %arg0) : f32, tensor<10x18xf32> -> tensor<10x18xf32> - // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] = - // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] = + // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] = %c0 to %c8 step %c4 + // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] = %c0 to %c10 step %c5 // Compute producer on a hyper rectangular bounding box. Along the second dimenson, - // the offset is set to the sum of the induction variables and the upper bound - // to either eight (sum of the tile sizes) or eighteen (sum of the domain sizes) - // minus the induction variables. + // the offset is set to the sum of the induction variables, and the upper bound + // to either 8 (tile size) or 17 (sum of max indices (9+7) then + 1) minus the + // induction variables. // CHECK: %[[SUM:.*]] = affine.apply #[[MAP0]](%[[IV1]], %[[IV0]] // CHECK: %[[TS1:.*]] = affine.min #[[MAP1]](%[[IV1]], %[[IV0]] // CHECK: %[[UB1:.*]] = affine.min #[[MAP2]](%[[TS1]], %[[IV1]], %[[IV0]] diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir @@ -203,7 +203,7 @@ // CHECK: #[[BOUND8_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 8, -d0 + s1)> // CHECK: #[[BOUND16_MAP:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)> // CHECK: #[[X2_MAP:.+]] = affine_map<(d0) -> (d0 * 2)> -// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0 - 2, d1 * -2 + s0 + s1 * 2)> +// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0 - 2, d1 * -2 + s0 + s1 * 2 - 2)> // CHECK: #[[BOUND16_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 16, -d0 + s1)> // CHECK: #[[BOUND4_MAP:.+]] = affine_map<(d0)[s0] -> (4, -d0 + s0)> // CHECK: #[[BOUND2_MAP:.+]] = affine_map<(d0)[s0] -> (2, -d0 + s0)> diff --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir --- a/mlir/test/Dialect/Linalg/tile-conv.mlir +++ b/mlir/test/Dialect/Linalg/tile-conv.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004 // TILE-23004-DAG: #[[$D0x30pS0x10:.*]] = affine_map<(d0) -> (d0 * 30)> -// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s0 * 10 + s1 * 30)> +// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s0 * 10 + s1 * 30 - 39)> // TILE-23004-DAG: #[[$strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> // TILE-23004-DAG: #[[$bound_map_2:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)> // TILE-23004-DAG: #[[$bound_map_3:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)> diff --git a/mlir/test/Dialect/Linalg/tile-simple-conv.mlir b/mlir/test/Dialect/Linalg/tile-simple-conv.mlir --- a/mlir/test/Dialect/Linalg/tile-simple-conv.mlir +++ b/mlir/test/Dialect/Linalg/tile-simple-conv.mlir @@ -1,8 +1,8 @@ // RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4" | FileCheck %s // CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 2, -d0 + s0 + s1)> -// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s0 + s1)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 2, -d0 + s0 + s1 - 1)> +// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s0 + s1 - 1)> // CHECK-DAG: #[[MAP4:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)> // CHECK-DAG: #[[MAP5:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>