diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td @@ -147,12 +147,11 @@ dimension, i.e `low`. * high: A list contains the padding along the end of each dimension, i.e. `high`. - * output: An optional output operand. The result tensor dimensions are `low` + `dim` + `high` along that dimension. The number of elements of `low` and `high` must match - the rank of the input tensor (which is also the rank of the output - tensor). They can be either a constant or a dynamic value. + the rank of the input tensor. They can be either a constant or a + dynamic value. The region of the `pad_tensor` operation returns the value to use for the padding. The arguments of the region represent the index @@ -196,8 +195,7 @@ Variadic:$low, Variadic:$high, I64ArrayAttr:$static_low, - I64ArrayAttr:$static_high, - Optional:$output); + I64ArrayAttr:$static_high); let regions = (region SizedRegion<1>:$region); @@ -208,9 +206,7 @@ $source `low` `` custom($low, $static_low) `high` `` custom($high, $static_high) - (`into` $output^ )? $region attr-dict `:` type($source) `to` type($result) - custom(ref($output), type($output), ref(type($result))) }]; let extraClassDeclaration = [{ @@ -300,11 +296,6 @@ OpBuilder<(ins "Type":$resultType, "Value":$source, "ArrayRef":$low, "ArrayRef":$high, CArg<"ArrayRef", "{}">:$attrs)>, - // Build a PadTensorOp with mixed static and dynamic entries and custom - // result type. - OpBuilder<(ins "Type":$resultType, "Value":$source, - "ArrayRef":$low, "ArrayRef":$high, "ArrayAttr":$staticLow, - "ArrayAttr":$staticHigh)> ]; let hasCanonicalizer = 1; diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -1035,9 +1035,6 @@ << resultType << " does not match the inferred type " << expectedType; } - if (op.output() && op.output().getType() != op.getResultType()) { - op.emitError("expected that output operand type equals result type"); - } auto ®ion = op.region(); unsigned rank = resultType.getRank(); @@ -1084,7 +1081,7 @@ auto sourceType = source.getType().cast(); auto resultType = inferResultType(sourceType, staticLow, staticHigh); build(b, result, resultType, source, low, high, b.getI64ArrayAttr(staticLow), - b.getI64ArrayAttr(staticHigh), /*output=*/Value()); + b.getI64ArrayAttr(staticHigh)); result.addAttributes(attrs); } @@ -1121,15 +1118,7 @@ PadTensorOp::inferResultType(sourceType, staticLow, staticHigh); } build(b, result, resultType, source, dynamicLow, dynamicHigh, - b.getI64ArrayAttr(staticLow), b.getI64ArrayAttr(staticHigh), - /*output=*/Value()); -} - -void PadTensorOp::build(OpBuilder &b, OperationState &result, Type resultType, - Value source, ArrayRef low, ArrayRef high, - ArrayAttr staticLow, ArrayAttr staticHigh) { - build(b, result, resultType, source, low, high, staticLow, staticHigh, - /*output=*/{}); + b.getI64ArrayAttr(staticLow), b.getI64ArrayAttr(staticHigh)); } PadTensorOp PadTensorOp::createPadScalarOp(Type type, Value source, Value pad, @@ -1217,7 +1206,8 @@ SmallVector PadTensorOp::getDestinationOperands(OpBuilder &b) { ReifiedRankedShapedTypeDims reifiedShapes; (void)reifyResultShapes(b, reifiedShapes); - Value initTensor = b.create(getLoc(), reifiedShapes[0], + SmallVector mixedSizes = getAsOpFoldResult(reifiedShapes[0]); + Value initTensor = b.create(getLoc(), mixedSizes, getResultType().getElementType()); return {initTensor}; } @@ -1460,21 +1450,6 @@ } }; -// Fold tensor.dim(pad_tensor(%input, %output)) to tensor.dim(%output). -struct FoldToDimOfOutputOperand : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(tensor::DimOp dimOp, - PatternRewriter &rewriter) const override { - auto padTensorOp = dimOp.source().getDefiningOp(); - if (!padTensorOp || !padTensorOp.output()) - return failure(); - rewriter.replaceOpWithNewOp(dimOp, padTensorOp.output(), - dimOp.index()); - return success(); - } -}; - // Fold CastOp into PadTensorOp when adding static information. struct FoldSourceTensorCast : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -1498,7 +1473,7 @@ auto newOp = rewriter.create( padTensorOp->getLoc(), newResultType, padTensorOp.source(), padTensorOp.low(), padTensorOp.high(), padTensorOp.static_low(), - padTensorOp.static_high(), /*output=*/nullptr); + padTensorOp.static_high()); BlockAndValueMapping mapper; padTensorOp.getRegion().cloneInto(&newOp.getRegion(), mapper); @@ -1512,8 +1487,7 @@ void PadTensorOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add(context); + results.add(context); } /// Return the padding value of the PadTensorOp if it constant. In this context, diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -356,10 +356,6 @@ static LogicalResult tilePadTensorOp(OpBuilder &builder, PadTensorOp op, PadTensorOp &newPadOp, LoopNest &loopNest, const LinalgTilingOptions &options) { - // Can tile only PadTensorOp that have an output operand. - if (!op.output()) - return failure(); - Location loc = op.getLoc(); OpBuilder::InsertionGuard g(builder); builder.setInsertionPoint(op); @@ -382,8 +378,9 @@ } } // Generate loop nest: One loop per dimension. + SmallVector destOperand = op.getDestinationOperands(builder); loopNest = mlir::scf::buildLoopNest( - builder, loc, lbs, /*ubs=*/dims, steps, ValueRange(op.output()), + builder, loc, lbs, /*ubs=*/dims, steps, ValueRange(destOperand), [&](OpBuilder &b, Location loc, ValueRange localIvs, ValueRange iterArgs) -> scf::ValueVector { // Compute offsets and sizes of ExtractSliceOp. diff --git a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir --- a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir +++ b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir @@ -1,12 +1,12 @@ -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3" -cse -split-input-file | \ +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3" -resolve-shaped-type-result-dims -cse -split-input-file | \ // RUN: FileCheck %s -check-prefix=TILE2 -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,3" -cse -split-input-file | \ +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,3" -resolve-shaped-type-result-dims -cse -split-input-file | \ // RUN: FileCheck %s -check-prefix=TILE1 // TILE2-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 8)> // TILE2-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 7)> // TILE2: func @dynamic_pad_tensor( -// TILE2-SAME: %[[IN:.*]]: tensor, %[[OUT:.*]]: tensor +// TILE2-SAME: %[[IN:.*]]: tensor // TILE2-DAG: %[[C0:.*]] = constant 0 : index // TILE2-DAG: %[[C1:.*]] = constant 1 : index // TILE2-DAG: %[[C2:.*]] = constant 2 : index @@ -25,16 +25,18 @@ // TILE2: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] // TILE2: return %[[RESULT]] -// TILE1-DAG: #[[MAP:.*]] = affine_map<()[s0] -> (s0 + 7)> +// TILE1-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 7)> +// TILE1-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)> // TILE1: func @dynamic_pad_tensor( -// TILE1-SAME: %[[IN:.*]]: tensor, %[[OUT:.*]]: tensor +// TILE1-SAME: %[[IN:.*]]: tensor // TILE1-DAG: %[[C0:.*]] = constant 0 : index // TILE1-DAG: %[[C1:.*]] = constant 1 : index // TILE1-DAG: %[[C3:.*]] = constant 3 : index // TILE1: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]] -// TILE1: %[[DIM1:.*]] = affine.apply #[[MAP]]()[%[[DIM_IN1]]] +// TILE1: %[[DIM1:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN1]]] +// TILE1: %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]] +// TILE1: %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN0]]] // TILE1: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// TILE1: %[[DIM0:.*]] = tensor.dim %[[OUT]], %[[C0]] // TILE1: %[[SWAP_RESULT:.*]] = scf.if // TILE1: tensor.generate // TILE1: else @@ -44,10 +46,8 @@ // TILE1: return %[[RESULT]] func @dynamic_pad_tensor(%input_tensor: tensor, - %output_tensor: tensor, %pad_value: f32) -> tensor { - %0 = linalg.pad_tensor %input_tensor - low[3, 4] high[5, 3] into %output_tensor{ + %0 = linalg.pad_tensor %input_tensor low[3, 4] high[5, 3] { ^bb0(%arg1: index, %arg2: index): linalg.yield %pad_value : f32 } : tensor to tensor @@ -57,7 +57,7 @@ // ----- // TILE2-LABEL: func @static_pad_tensor( -// TILE2-SAME: %[[IN:.*]]: tensor<7x9xf32>, %[[OUT:.*]]: tensor<15x16xf32> +// TILE2-SAME: %[[IN:.*]]: tensor<7x9xf32> // TILE2-DAG: %[[C0:.*]] = constant 0 : index // TILE2-DAG: %[[C2:.*]] = constant 2 : index // TILE2-DAG: %[[C3:.*]] = constant 3 : index @@ -75,7 +75,7 @@ // TILE1-LABEL: func @static_pad_tensor( -// TILE1-SAME: %[[IN:.*]]: tensor<7x9xf32>, %[[OUT:.*]]: tensor<15x16xf32> +// TILE1-SAME: %[[IN:.*]]: tensor<7x9xf32> // TILE1-DAG: %[[C0:.*]] = constant 0 : index // TILE1-DAG: %[[C3:.*]] = constant 3 : index // TILE1-DAG: %[[C16:.*]] = constant 16 : index @@ -89,10 +89,8 @@ // TILE1: return %[[RESULT]] func @static_pad_tensor(%input_tensor: tensor<7x9xf32>, - %output_tensor: tensor<15x16xf32>, %pad_value: f32) -> tensor<15x16xf32> { - %0 = linalg.pad_tensor %input_tensor - low[3, 4] high[5, 3] into %output_tensor { + %0 = linalg.pad_tensor %input_tensor low[3, 4] high[5, 3] { ^bb0(%arg1: index, %arg2: index): linalg.yield %pad_value : f32 } : tensor<7x9xf32> to tensor<15x16xf32> @@ -112,7 +110,7 @@ // TILE1: scf.yield %[[GEN]] : tensor<14x3xf32> // TILE1: else // TILE1: %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32> -// TILE1: %[[PAD:.*]] = linalg.pad_tensor %8 low[0, 0] high[7, %{{.*}}] +// TILE1: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] low[0, 0] high[7, %{{.*}}] // TILE1: %[[CAST:.*]] = tensor.cast %[[PAD]] : tensor<14x?xf32> to tensor<14x3xf32> // TILE1: scf.yield %[[CAST]] : tensor<14x3xf32> // TILE1: %[[R3:.*]] = tensor.insert_slice %[[R2]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32> @@ -121,8 +119,7 @@ func @static_pad_tile_evenly(%input_tensor: tensor<7x9xf32>, %output_tensor: tensor<14x15xf32>, %pad_value: f32) -> tensor<14x15xf32> { - %0 = linalg.pad_tensor %input_tensor - low[0, 0] high[7, 6] into %output_tensor { + %0 = linalg.pad_tensor %input_tensor low[0, 0] high[7, 6] { ^bb0(%arg1: index, %arg2: index): linalg.yield %pad_value : f32 } : tensor<7x9xf32> to tensor<14x15xf32>