diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -1171,22 +1171,21 @@ PadTensorOp PadTensorOp::createPadHighOp(Type type, Value source, Value pad, bool nofold, Location loc, - OpBuilder &builder) { + OpBuilder &b) { SmallVector low, high; auto rankedTensorType = type.cast(); assert(rankedTensorType.hasStaticShape()); - int rank = rankedTensorType.getRank(); - for (int i = 0; i < rank; ++i) { - auto dimOp = builder.createOrFold(loc, source, i); - auto resultDimSize = builder.createOrFold( - loc, rankedTensorType.getDimSize(i)); - auto highValue = - builder.createOrFold(loc, resultDimSize, dimOp); - high.push_back(highValue); - low.push_back(builder.createOrFold(loc, 0)); + for (auto en : enumerate(rankedTensorType.getShape())) { + AffineExpr d0; + bindDims(b.getContext(), d0); + auto dimOp = b.createOrFold(loc, source, en.index()); + Value paddingWidth = + makeComposedAffineApply(b, loc, en.value() - d0, {dimOp}); + high.push_back(paddingWidth); + low.push_back(b.createOrFold(loc, 0)); } return PadTensorOp::createPadScalarOp(type, source, pad, low, high, nofold, - loc, builder); + loc, b); } LogicalResult PadTensorOp::reifyResultShapes( diff --git a/mlir/test/Dialect/Linalg/pad-and-hoist.mlir b/mlir/test/Dialect/Linalg/pad-and-hoist.mlir --- a/mlir/test/Dialect/Linalg/pad-and-hoist.mlir +++ b/mlir/test/Dialect/Linalg/pad-and-hoist.mlir @@ -3,6 +3,8 @@ // CHECK-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0) -> (5, -d0 + 24)> // CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<(d0) -> (7, -d0 + 25)> +// CHECK-DAG: #[[MAP2:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 5)> +// CHECK-DAG: #[[MAP3:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 7)> // CHECK-DAG: #[[DIV6:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 6)> #map0 = affine_map<(d0) -> (5, -d0 + 24)> #map1 = affine_map<(d0) -> (7, -d0 + 25)> @@ -37,7 +39,7 @@ // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ARG0]] // CHECK-SAME: %[[IV0]], %[[PIV0]] // CHECK-SAME: %[[TS0]], 6 - // CHECK: %[[V0:.*]] = arith.subi %[[C5]], %[[TS0]] + // CHECK: %[[V0:.*]] = affine.apply #[[MAP2]](%[[TS0]]) // CHECK: %[[T1:.*]] = linalg.pad_tensor %[[T0]] nofold {{.*}} high[%[[V0]] // CHECK: %[[T2:.*]] = tensor.insert_slice %[[T1:.*]] into %{{.*}}[%[[PIDX0]], 0, 0] // CHECK: scf.yield %[[T2:.*]] @@ -53,7 +55,7 @@ // CHECK: %[[T3:.*]] = tensor.extract_slice %[[ARG1]] // CHECK-SAME: %[[PIV1]], %[[IV1]] // CHECK-SAME: 6, %[[TS1]] - // CHECK: %[[V1:.*]] = arith.subi %[[C7]], %[[TS1]] + // CHECK: %[[V1:.*]] = affine.apply #[[MAP3]](%[[TS1]]) // CHECK: %[[T4:.*]] = linalg.pad_tensor %[[T3]] nofold {{.*}} high[%[[C0]], %[[V1]] // CHECK: %[[T5:.*]] = tensor.insert_slice %[[T4:.*]] into %{{.*}}[%[[PIDX1]], 0, 0] // CHECK: scf.yield %[[T5:.*]] @@ -93,6 +95,9 @@ // CHECK-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0)[s0] -> (5, -d0 + s0)> // CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<(d0)[s0] -> (6, -d0 + s0)> // CHECK-DAG: #[[MAP2:[0-9a-z]+]] = affine_map<(d0)[s0] -> (7, -d0 + s0)> +// CHECK-DAG: #[[MAP3:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 5)> +// CHECK-DAG: #[[MAP4:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 6)> +// CHECK-DAG: #[[MAP5:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 7)> // CHECK-DAG: #[[SDIV6:[0-9a-z]+]] = affine_map<()[s0] -> (s0 ceildiv 6)> // CHECK-DAG: #[[DDIV6:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 6)> #map0 = affine_map<(d0)[s0] -> (5, -d0 + s0)> @@ -137,8 +142,8 @@ // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ARG0]] // CHECK-SAME: %[[IV0]], %[[PIV0]] // CHECK-SAME: %[[TS0]], %[[TS1]] - // CHECK: %[[V0:.*]] = arith.subi %[[C5]], %[[TS0]] - // CHECK: %[[V1:.*]] = arith.subi %[[C6]], %[[TS1]] + // CHECK: %[[V0:.*]] = affine.apply #[[MAP3]](%[[TS0]]) + // CHECK: %[[V1:.*]] = affine.apply #[[MAP4]](%[[TS1]]) // CHECK: %[[T1:.*]] = linalg.pad_tensor %[[T0]] nofold {{.*}} high[%[[V0]], %[[V1]] // CHECK: %[[T2:.*]] = tensor.insert_slice %[[T1:.*]] into %{{.*}}[%[[PIDX0]], 0, 0] // CHECK: scf.yield %[[T2:.*]] @@ -155,8 +160,8 @@ // CHECK: %[[T3:.*]] = tensor.extract_slice %[[ARG1]] // CHECK-SAME: %[[PIV1]], %[[IV1]] // CHECK-SAME: %[[TS2]], %[[TS3]] - // CHECK: %[[V2:.*]] = arith.subi %[[C6]], %[[TS2]] - // CHECK: %[[V3:.*]] = arith.subi %[[C7]], %[[TS3]] + // CHECK: %[[V2:.*]] = affine.apply #[[MAP4]](%[[TS2]]) + // CHECK: %[[V3:.*]] = affine.apply #[[MAP5]](%[[TS3]]) // CHECK: %[[T4:.*]] = linalg.pad_tensor %[[T3]] nofold {{.*}} high[%[[V2]], %[[V3]] // CHECK: %[[T5:.*]] = tensor.insert_slice %[[T4:.*]] into %{{.*}}[%[[PIDX1]], 0, 0] // CHECK: scf.yield %[[T5:.*]] diff --git a/mlir/test/Dialect/Linalg/pad.mlir b/mlir/test/Dialect/Linalg/pad.mlir --- a/mlir/test/Dialect/Linalg/pad.mlir +++ b/mlir/test/Dialect/Linalg/pad.mlir @@ -1,6 +1,7 @@ // RUN: mlir-opt %s -test-linalg-transform-patterns="test-pad-pattern pack-paddings=1,1,0 hoist-paddings=0,0,0" -cse -canonicalize -split-input-file | FileCheck %s // CHECK-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0) -> (7, -d0 + 12)> +// CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 7)> #map = affine_map<(d0) -> (7, -d0 + 12)> // CHECK: static_sizes_output_divisible @@ -39,7 +40,7 @@ %6 = tensor.extract_slice %arg8[%arg3, %arg5] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> // Check statically sized matmul inputs with partially divisible sizes are padded. - // CHECK: %[[V0:.*]] = arith.subi %[[C7]], %[[TS2]] + // CHECK: %[[V0:.*]] = affine.apply #[[MAP1]](%[[TS2]]) // CHECK: %[[T3:.*]] = linalg.pad_tensor %[[T0]] nofold // CHECK-SAME: [%[[C0]], %[[C0]]] // CHECK-SAME: [%[[C0]], %[[V0]] @@ -66,6 +67,7 @@ // ----- // CHECK-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0) -> (7, -d0 + 25)> +// CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 7)> #map = affine_map<(d0) -> (7, -d0 + 25)> // CHECK: static_sizes_input_divisible @@ -100,7 +102,7 @@ %6 = tensor.extract_slice %arg8[%arg3, %arg5] [4, %4] [1, 1] : tensor<24x25xf32> to tensor<4x?xf32> // Check the statically sized matmul output with partially divisible sizes is padded. - // CHECK: %[[V0:.*]] = arith.subi %[[C7]], %[[TS1]] + // CHECK: %[[V0:.*]] = affine.apply #[[MAP1]](%[[TS1]]) // CHECK: %[[T1:.*]] = linalg.pad_tensor %[[T0]] low // CHECK-SAME: [%[[C0]], %[[C0]]] // CHECK-SAME: [%[[C0]], %[[V0]] @@ -127,6 +129,9 @@ // CHECK-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0)[s0] -> (5, -d0 + s0)> // CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<(d0)[s0] -> (7, -d0 + s0)> // CHECK-DAG: #[[MAP2:[0-9a-z]+]] = affine_map<(d0)[s0] -> (6, -d0 + s0)> +// CHECK-DAG: #[[MAP3:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 5)> +// CHECK-DAG: #[[MAP4:[0-9a-z]+]] = affine_map<(d0) -> (-d0 + 6)> + #map0 = affine_map<(d0)[s0] -> (5, -d0 + s0)> #map1 = affine_map<(d0)[s0] -> (6, -d0 + s0)> #map2 = affine_map<(d0)[s0] -> (7, -d0 + s0)> @@ -175,8 +180,8 @@ %11 = tensor.extract_slice %arg8[%arg3, %arg5] [%6, %9] [1, 1] : tensor to tensor // Check all matmul operands are padded. - // CHECK: %[[V0:.*]] = arith.subi %[[C5]], %[[TS0]] - // CHECK: %[[V1:.*]] = arith.subi %[[C6]], %[[TS2]] + // CHECK: %[[V0:.*]] = affine.apply #[[MAP3]](%[[TS0]]) + // CHECK: %[[V1:.*]] = affine.apply #[[MAP4]](%[[TS2]]) // CHECK: %[[T3:.*]] = linalg.pad_tensor %{{.*}} nofold // CHECK-SAME: [%[[C0]], %[[C0]]] // CHECK-SAME: [%[[V0]], %[[V1]]