diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td @@ -147,12 +147,11 @@ dimension, i.e `low`. * high: A list contains the padding along the end of each dimension, i.e. `high`. - * output: An optional output operand. The result tensor dimensions are `low` + `dim` + `high` along that dimension. The number of elements of `low` and `high` must match - the rank of the input tensor (which is also the rank of the output - tensor). They can be either a constant or a dynamic value. + the rank of the input tensor. They can be either a constant or a + dynamic value. The region of the `pad_tensor` operation returns the value to use for the padding. The arguments of the region represent the index @@ -196,8 +195,7 @@ Variadic:$low, Variadic:$high, I64ArrayAttr:$static_low, - I64ArrayAttr:$static_high, - Optional:$output); + I64ArrayAttr:$static_high); let regions = (region SizedRegion<1>:$region); @@ -208,9 +206,7 @@ $source `low` `` custom($low, $static_low) `high` `` custom($high, $static_high) - (`into` $output^ )? $region attr-dict `:` type($source) `to` type($result) - custom(ref($output), type($output), ref(type($result))) }]; let extraClassDeclaration = [{ @@ -300,11 +296,6 @@ OpBuilder<(ins "Type":$resultType, "Value":$source, "ArrayRef":$low, "ArrayRef":$high, CArg<"ArrayRef", "{}">:$attrs)>, - // Build a PadTensorOp with mixed static and dynamic entries and custom - // result type. - OpBuilder<(ins "Type":$resultType, "Value":$source, - "ArrayRef":$low, "ArrayRef":$high, "ArrayAttr":$staticLow, - "ArrayAttr":$staticHigh)> ]; let hasCanonicalizer = 1; diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -1040,9 +1040,6 @@ << resultType << " does not match the inferred type " << expectedType; } - if (op.output() && op.output().getType() != op.getResultType()) { - op.emitError("expected that output operand type equals result type"); - } auto ®ion = op.region(); unsigned rank = resultType.getRank(); @@ -1089,7 +1086,7 @@ auto sourceType = source.getType().cast(); auto resultType = inferResultType(sourceType, staticLow, staticHigh); build(b, result, resultType, source, low, high, b.getI64ArrayAttr(staticLow), - b.getI64ArrayAttr(staticHigh), /*output=*/Value()); + b.getI64ArrayAttr(staticHigh)); result.addAttributes(attrs); } @@ -1126,15 +1123,7 @@ PadTensorOp::inferResultType(sourceType, staticLow, staticHigh); } build(b, result, resultType, source, dynamicLow, dynamicHigh, - b.getI64ArrayAttr(staticLow), b.getI64ArrayAttr(staticHigh), - /*output=*/Value()); -} - -void PadTensorOp::build(OpBuilder &b, OperationState &result, Type resultType, - Value source, ArrayRef low, ArrayRef high, - ArrayAttr staticLow, ArrayAttr staticHigh) { - build(b, result, resultType, source, low, high, staticLow, staticHigh, - /*output=*/{}); + b.getI64ArrayAttr(staticLow), b.getI64ArrayAttr(staticHigh)); } PadTensorOp PadTensorOp::createPadScalarOp(Type type, Value source, Value pad, @@ -1221,7 +1210,8 @@ SmallVector PadTensorOp::getDestinationOperands(OpBuilder &b) { ReifiedRankedShapedTypeDims reifiedShapes; (void)reifyResultShapes(b, reifiedShapes); - Value initTensor = b.create(getLoc(), reifiedShapes[0], + SmallVector mixedSizes = getAsOpFoldResult(reifiedShapes[0]); + Value initTensor = b.create(getLoc(), mixedSizes, getResultType().getElementType()); return {initTensor}; } @@ -1465,21 +1455,6 @@ } }; -// Fold tensor.dim(pad_tensor(%input, %output)) to tensor.dim(%output). -struct FoldToDimOfOutputOperand : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(tensor::DimOp dimOp, - PatternRewriter &rewriter) const override { - auto padTensorOp = dimOp.source().getDefiningOp(); - if (!padTensorOp || !padTensorOp.output()) - return failure(); - rewriter.replaceOpWithNewOp(dimOp, padTensorOp.output(), - dimOp.index()); - return success(); - } -}; - // Fold CastOp into PadTensorOp when adding static information. struct FoldSourceTensorCast : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -1503,7 +1478,7 @@ auto newOp = rewriter.create( padTensorOp->getLoc(), newResultType, padTensorOp.source(), padTensorOp.low(), padTensorOp.high(), padTensorOp.static_low(), - padTensorOp.static_high(), /*output=*/nullptr); + padTensorOp.static_high()); BlockAndValueMapping mapper; padTensorOp.getRegion().cloneInto(&newOp.getRegion(), mapper); @@ -1517,8 +1492,7 @@ void PadTensorOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add(context); + results.add(context); } /// Return the padding value of the PadTensorOp if it constant. In this context, diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -357,10 +357,6 @@ static LogicalResult tilePadTensorOp(OpBuilder &builder, PadTensorOp op, PadTensorOp &newPadOp, LoopNest &loopNest, const LinalgTilingOptions &options) { - // Can tile only PadTensorOp that have an output operand. - if (!op.output()) - return failure(); - Location loc = op.getLoc(); OpBuilder::InsertionGuard g(builder); builder.setInsertionPoint(op); @@ -383,8 +379,9 @@ } } // Generate loop nest: One loop per dimension. + SmallVector destOperand = op.getDestinationOperands(builder); loopNest = mlir::scf::buildLoopNest( - builder, loc, lbs, /*ubs=*/dims, steps, ValueRange(op.output()), + builder, loc, lbs, /*ubs=*/dims, steps, ValueRange(destOperand), [&](OpBuilder &b, Location loc, ValueRange localIvs, ValueRange iterArgs) -> scf::ValueVector { // Compute offsets and sizes of ExtractSliceOp. diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -904,24 +904,6 @@ // ----- -// CHECK-LABEL: func @dim_of_pad_tensor( -// CHECK-SAME: %[[ARG0:.*]]: tensor, %[[ARG1:.*]]: tensor -// CHECK: %[[C0:.*]] = constant 0 : index -// CHECK: %[[RESULT:.*]] = tensor.dim %[[ARG1]], %[[C0]] -// CHECK: return %[[RESULT]] -func @dim_of_pad_tensor(%arg0: tensor, %arg1: tensor, - %pad_value: f32) -> index { - %c0 = constant 0 : index - %0 = linalg.pad_tensor %arg0 low[2, 3] high[4, 5] into %arg1 { - ^bb0(%arg2: index, %arg3: index): - linalg.yield %pad_value : f32 - } : tensor to tensor - %r = tensor.dim %0, %c0 : tensor - return %r : index -} - -// ----- - // CHECK-LABEL: func @dim_of_tiled_loop_input( // CHECK-SAME: %[[arg0:.*]]: tensor, %[[arg1:.*]]: tensor, %[[arg2:.*]]: tensor // CHECK: %[[c0:.*]] = constant 0 : index diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -459,18 +459,6 @@ // ----- -// expected-note@+1 {{prior use here}} -func @pad_output_type(%arg0: tensor, %arg1: index, %arg2: i32, %output: tensor) -> tensor { - // expected-error @+1 {{use of value '%output' expects different type than prior uses: 'tensor' vs 'tensor'}} - %0 = linalg.pad_tensor %arg0 low[1, 1, 1, 1] high[2, 2, 2, 2] into %output { - ^bb0(%arg3: index, %arg4: index): // no predecessors - linalg.yield %arg2 : i32 - } : tensor to tensor - return %0 : tensor -} - -// ----- - func @pad_number_of_block_args(%arg0: tensor, %arg1: i32) -> tensor { // expected-error @+1 {{expected the block to have 2 arguments}} %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] { diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -49,24 +49,6 @@ // ----- -func @pad_static_with_output(%arg0: tensor<3x4xf32>, - %out_tensor : tensor<6x9xf32>, - %pad_value: f32) - -> tensor<6x9xf32> { - %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] into %out_tensor { - ^bb0(%arg1 : index, %arg2 : index): - linalg.yield %pad_value : f32 - } : tensor<3x4xf32> to tensor<6x9xf32> - return %0 : tensor<6x9xf32> -} -// CHECK-LABEL: func @pad_static -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: tensor<3x4xf32>, -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: tensor<6x9xf32>, -// CHECK: linalg.pad_tensor %[[ARG0]] low[1, 2] high[2, 3] into %[[ARG1]] -// CHECK: : tensor<3x4xf32> to tensor<6x9xf32> - -// ----- - func @pad_asymmetrical(%arg0: tensor<2x3xf32>, %ub0: index, %ub1: index, %pad_value: f32) -> tensor { %0 = linalg.pad_tensor %arg0 low[0, 0] high[%ub0, %ub1] { diff --git a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir --- a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir +++ b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir @@ -1,12 +1,12 @@ -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3" -cse -split-input-file | \ +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3" -resolve-shaped-type-result-dims -cse -split-input-file | \ // RUN: FileCheck %s -check-prefix=TILE2 -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,3" -cse -split-input-file | \ +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,3" -resolve-shaped-type-result-dims -cse -split-input-file | \ // RUN: FileCheck %s -check-prefix=TILE1 // TILE2-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 8)> // TILE2-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 7)> // TILE2: func @dynamic_pad_tensor( -// TILE2-SAME: %[[IN:.*]]: tensor, %[[OUT:.*]]: tensor +// TILE2-SAME: %[[IN:.*]]: tensor // TILE2-DAG: %[[C0:.*]] = constant 0 : index // TILE2-DAG: %[[C1:.*]] = constant 1 : index // TILE2-DAG: %[[C2:.*]] = constant 2 : index @@ -25,16 +25,18 @@ // TILE2: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] // TILE2: return %[[RESULT]] -// TILE1-DAG: #[[MAP:.*]] = affine_map<()[s0] -> (s0 + 7)> +// TILE1-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 7)> +// TILE1-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)> // TILE1: func @dynamic_pad_tensor( -// TILE1-SAME: %[[IN:.*]]: tensor, %[[OUT:.*]]: tensor +// TILE1-SAME: %[[IN:.*]]: tensor // TILE1-DAG: %[[C0:.*]] = constant 0 : index // TILE1-DAG: %[[C1:.*]] = constant 1 : index // TILE1-DAG: %[[C3:.*]] = constant 3 : index // TILE1: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]] -// TILE1: %[[DIM1:.*]] = affine.apply #[[MAP]]()[%[[DIM_IN1]]] +// TILE1: %[[DIM1:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN1]]] +// TILE1: %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]] +// TILE1: %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN0]]] // TILE1: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// TILE1: %[[DIM0:.*]] = tensor.dim %[[OUT]], %[[C0]] // TILE1: %[[SWAP_RESULT:.*]] = scf.if // TILE1: tensor.generate // TILE1: else @@ -44,10 +46,8 @@ // TILE1: return %[[RESULT]] func @dynamic_pad_tensor(%input_tensor: tensor, - %output_tensor: tensor, %pad_value: f32) -> tensor { - %0 = linalg.pad_tensor %input_tensor - low[3, 4] high[5, 3] into %output_tensor{ + %0 = linalg.pad_tensor %input_tensor low[3, 4] high[5, 3] { ^bb0(%arg1: index, %arg2: index): linalg.yield %pad_value : f32 } : tensor to tensor @@ -57,7 +57,7 @@ // ----- // TILE2-LABEL: func @static_pad_tensor( -// TILE2-SAME: %[[IN:.*]]: tensor<7x9xf32>, %[[OUT:.*]]: tensor<15x16xf32> +// TILE2-SAME: %[[IN:.*]]: tensor<7x9xf32> // TILE2-DAG: %[[C0:.*]] = constant 0 : index // TILE2-DAG: %[[C2:.*]] = constant 2 : index // TILE2-DAG: %[[C3:.*]] = constant 3 : index @@ -75,7 +75,7 @@ // TILE1-LABEL: func @static_pad_tensor( -// TILE1-SAME: %[[IN:.*]]: tensor<7x9xf32>, %[[OUT:.*]]: tensor<15x16xf32> +// TILE1-SAME: %[[IN:.*]]: tensor<7x9xf32> // TILE1-DAG: %[[C0:.*]] = constant 0 : index // TILE1-DAG: %[[C3:.*]] = constant 3 : index // TILE1-DAG: %[[C16:.*]] = constant 16 : index @@ -89,10 +89,8 @@ // TILE1: return %[[RESULT]] func @static_pad_tensor(%input_tensor: tensor<7x9xf32>, - %output_tensor: tensor<15x16xf32>, %pad_value: f32) -> tensor<15x16xf32> { - %0 = linalg.pad_tensor %input_tensor - low[3, 4] high[5, 3] into %output_tensor { + %0 = linalg.pad_tensor %input_tensor low[3, 4] high[5, 3] { ^bb0(%arg1: index, %arg2: index): linalg.yield %pad_value : f32 } : tensor<7x9xf32> to tensor<15x16xf32> @@ -112,7 +110,7 @@ // TILE1: scf.yield %[[GEN]] : tensor<14x3xf32> // TILE1: else // TILE1: %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32> -// TILE1: %[[PAD:.*]] = linalg.pad_tensor %8 low[0, 0] high[7, %{{.*}}] +// TILE1: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] low[0, 0] high[7, %{{.*}}] // TILE1: %[[CAST:.*]] = tensor.cast %[[PAD]] : tensor<14x?xf32> to tensor<14x3xf32> // TILE1: scf.yield %[[CAST]] : tensor<14x3xf32> // TILE1: %[[R3:.*]] = tensor.insert_slice %[[R2]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32> @@ -121,8 +119,7 @@ func @static_pad_tile_evenly(%input_tensor: tensor<7x9xf32>, %output_tensor: tensor<14x15xf32>, %pad_value: f32) -> tensor<14x15xf32> { - %0 = linalg.pad_tensor %input_tensor - low[0, 0] high[7, 6] into %output_tensor { + %0 = linalg.pad_tensor %input_tensor low[0, 0] high[7, 6] { ^bb0(%arg1: index, %arg2: index): linalg.yield %pad_value : f32 } : tensor<7x9xf32> to tensor<14x15xf32>