diff --git a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp @@ -180,9 +180,12 @@ createAllocationForTensor(rewriter, loc, padOp.getResult(), memorySpace); rewriter.setInsertionPointAfter(alloc.getDefiningOp()); - // Create linalg.fill or linalg.generic. - Operation *fillOp = movePaddingToFillOrGenericOp(rewriter, loc, padOp, alloc); - rewriter.setInsertionPointAfter(fillOp); + if (!padOp.hasZeroLowPad() || !padOp.hasZeroHighPad()) { + // Create linalg.fill or linalg.generic. Not needed if there is no padding. + Operation *fillOp = + movePaddingToFillOrGenericOp(rewriter, loc, padOp, alloc); + rewriter.setInsertionPointAfter(fillOp); + } // Create memref.tensor_store. SmallVector sizes = diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp --- a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp @@ -36,8 +36,7 @@ bindDims(b.getContext(), d0); OpFoldResult sz = tensor::getMixedSize(b, loc, source, en.index()); high[en.index()] = - affine::makeComposedAffineApply(b, loc, en.value() - d0, {sz}) - .getResult(); + affine::makeComposedFoldedAffineApply(b, loc, en.value() - d0, {sz}); } return b.create(loc, type, source, low, high, pad, nofold); } diff --git a/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir b/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir --- a/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir +++ b/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir @@ -28,9 +28,7 @@ // CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]] // CHECK-SAME: %[[PAD_VAL:[a-zA-Z0-9]+]] // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index -// CHECK: %[[PAD:.+]] = tensor.pad %[[SRC]] low[%[[C0]], %[[C0]]] high[%[[C3]], %[[C1]]] +// CHECK: %[[PAD:.+]] = tensor.pad %[[SRC]] low[%[[C0]], %[[C0]]] high[3, 1] // CHECK: tensor.yield %[[PAD_VAL]] // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32> // CHECK: %[[TRANSP:.+]] = linalg.transpose diff --git a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir --- a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir +++ b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir @@ -32,7 +32,7 @@ // CHECK: memref.copy %[[s1]], %[[alloc1_view]] // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> - // CHECK: linalg.fill {{.*}} outs(%[[alloc2]] + // CHECK-NOT: linalg.fill {{.*}} outs(%[[alloc2]] // No subview because there is 0 padding // CHECK: memref.copy %[[s2]], %[[alloc2]] diff --git a/mlir/test/Dialect/Linalg/transform-op-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-pad.mlir --- a/mlir/test/Dialect/Linalg/transform-op-pad.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-pad.mlir @@ -188,7 +188,6 @@ // linalg op is not produced by an empty op or an extract_slice op. // CHECK-DAG: #[[$MAP_MIN:.*]] = affine_map<(d0) -> (-d0 + 2044, 16)> -// CHECK-DAG: #[[$MAP_C0:.*]] = affine_map<() -> (0)> // CHECK-DAG: #[[$MAP_TO_16:.*]] = affine_map<(d0) -> (-d0 + 16)> // CHECK-LABEL: @outs_not_produced_by_empty_or_extract_slice( // CHECK-SAME: %[[A:[^: ]*]]: tensor<128x2044xf32>, @@ -212,16 +211,13 @@ // CHECK-DAG: %[[CST:.*]] = arith.constant 0. // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index - // CHECK-DAG: %[[ZERO:.*]] = affine.apply #[[$MAP_C0]]() // CHECK-DAG: %[[TO_16:.*]] = affine.apply #[[$MAP_TO_16]](%[[MIN]]) - // CHECK: %[[PADDED_A_SLICE:.*]] = tensor.pad %[[A_SLICE]] nofold low[%[[C0]], %[[C0]]] high[%[[ZERO]], %[[TO_16]]] + // CHECK: %[[PADDED_A_SLICE:.*]] = tensor.pad %[[A_SLICE]] nofold low[%[[C0]], %[[C0]]] high[0, %[[TO_16]]] // CHECK: tensor.yield %[[CST]] // CHECK: %[[PADDED_B_SLICE:.*]] = tensor.pad %[[B_SLICE]] nofold // The output shape is already padded, so actually we shouldn't // add anything to the upper bound. - // CHECK: %[[ZERO0:.*]] = affine.apply #[[$MAP_C0]]() - // CHECK: %[[ZERO1:.*]] = affine.apply #[[$MAP_C0]]() - // CHECK: %[[PADDED_ARG4:.*]] = tensor.pad %[[ARG4]] nofold low[{{.*}}] high[%[[ZERO0]], %[[ZERO1]]] + // CHECK: %[[PADDED_ARG4:.*]] = tensor.pad %[[ARG4]] nofold low[{{.*}}] high[0, 0] // CHECK: %[[T5:.*]] = linalg.matmul // CHECK-SAME: ins(%[[PADDED_A_SLICE]], %[[PADDED_B_SLICE]] : tensor<128x16xf32>, tensor<16x128xf32>)