diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td @@ -18,7 +18,6 @@ include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/LoopLikeInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" -include "mlir/Interfaces/TilingInterface.td" include "mlir/Interfaces/ViewLikeInterface.td" // Base class for Linalg dialect ops that do not correspond to library calls. @@ -130,207 +129,6 @@ let hasCanonicalizer = 1; } -def Linalg_PadTensorOp : Linalg_Op<"pad_tensor", - [AttrSizedOperandSegments, NoSideEffect, - DeclareOpInterfaceMethods, - DeclareOpInterfaceMethods]> { - let summary = "tensor pad operation"; - let description = [{ - `linalg.pad_tensor` is an operation that pads the `source` tensor - with given `low` and `high` padding config. - - The PadTensor operation supports the following arguments: - - * source: the "base" tensor on which to pad. - * low: A list contains the padding along the start of each - dimension, i.e `low`. - * high: A list contains the padding along the end of each - dimension, i.e. `high`. - * nofold: indicates that the operation should not be folded when source and - result types are equal. - - The result tensor dimensions are `low` + `dim` + `high` along that - dimension. The number of elements of `low` and `high` must match - the rank of the input tensor. They can be either a constant or a - dynamic value. - - The region of the `pad_tensor` operation returns the value to use - for the padding. The arguments of the region represent the index - of the source being accessed. There should be as many arguments as - the rank of the `source` tensor. The value `yield`-ed by the - region is used as the value of the view at the given position. - - If `nofold` is set, the padding operation will not be folded away even - if the source type and the padded type have the same static shape. This can - be used, e.g., for packing or promotion to faster memory. - - Example 1: - - ```mlir - %pad_value = ... : f32 - %0 = linalg.pad_tensor %0 low[1, 2] high[2, 3] { - ^bb0(%arg0 : index, %arg1 : index): - linalg.yield %pad_value : f32 - } : tensor to tensor - ``` - - Example 2: - - ```mlir - %pad_value = ... : f32 - %0 = linalg.pad_tensor %arg0 low[2, %arg1, 3, 3] high[3, 3, %arg1, 2] { - ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index): - linalg.yield %pad_value : f32 - } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> - ``` - - Example 3: - - ```mlir - %pad_value = ... : f32 - %0 = linalg.pad_tensor %arg0 low[0, 0] high[%ub0, %ub1] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad_value : f32 - } : tensor<2x3xf32> to tensor - ``` - - Example 4: - - ```mlir - // Force a padded value to be always exist with `nofold`. - %pad_value = ... : f32 - %0 = linalg.pad_tensor %arg0 nofold low[0, 0] high[0, 0] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad_value : f32 - } : tensor<2x3xf32> to tensor<2x3xf32> - ``` - }]; - - let arguments = (ins - AnyTensor:$source, - Variadic:$low, - Variadic:$high, - I64ArrayAttr:$static_low, - I64ArrayAttr:$static_high, - UnitAttr:$nofold); - - let regions = (region SizedRegion<1>:$region); - - let results = (outs AnyTensor:$result); - - // TODO: Remove custom when AllTypesMatch supports opt. operands. - let assemblyFormat = [{ - $source - (`nofold` $nofold^)? - `low` `` custom($low, $static_low) - `high` `` custom($high, $static_high) - $region attr-dict `:` type($source) `to` type($result) - }]; - - let extraClassDeclaration = [{ - static StringRef getStaticLowAttrName() { - return "static_low"; - } - - static StringRef getStaticHighAttrName() { - return "static_high"; - } - - RankedTensorType getSourceType() { - return source().getType().cast(); - } - RankedTensorType getResultType() { - return getResult().getType().cast(); - } - - // Infer the shape of the result tensor given the type of the source tensor - // and paddings. Known result dimensions that cannot necessarily be inferred - // from low/high padding sizes can be optionally specified. Those will be - // considered when computing the result type. - static RankedTensorType inferResultType( - RankedTensorType sourceType, - ArrayRef staticLow, - ArrayRef staticHigh, - ArrayRef resultShape = {}); - - // Return a PadTensorOp that pads `source` to `type` size where the static - // sizes are assumed to be greater than the dynamic sizes. The op performs - // "high" padding (i.e. it adds trailing padding values until the desired - // size is met). - static linalg::PadTensorOp createPadHighOp( - Type type, Value source, Value pad, bool nofold, Location loc, - OpBuilder & builder); - - // Return a PadTensorOp that pads `source to `type` size with `pad` value. - // I.e., a block will be created and the `pad` value will be yielded - // directly. If the type passed is nullptr, it is inferred. - static linalg::PadTensorOp createPadScalarOp( - Type type, Value source, Value pad, ArrayRef low, - ArrayRef high, bool nofold, Location loc, - OpBuilder & builder); - - // Return the pad value if it is a constant. Return null value otherwise. - Value getConstantPaddingValue(); - - // Return a vector of all the static or dynamic values (low/high padding) of - // the op. - inline SmallVector getMixedPadImpl(ArrayAttr staticAttrs, - ValueRange values) { - SmallVector res; - unsigned numDynamic = 0; - unsigned count = staticAttrs.size(); - for (unsigned idx = 0; idx < count; ++idx) { - if (ShapedType::isDynamic(staticAttrs[idx].cast().getInt())) - res.push_back(values[numDynamic++]); - else - res.push_back(staticAttrs[idx]); - } - return res; - } - SmallVector getMixedLowPad() { - return getMixedPadImpl(static_low(), low()); - } - SmallVector getMixedHighPad() { - return getMixedPadImpl(static_high(), high()); - } - // Return true if low padding is guaranteed to be 0. - bool hasZeroLowPad() { - return llvm::all_of(getMixedLowPad(), [](OpFoldResult ofr) { - return getConstantIntValue(ofr) == static_cast(0); - }); - } - // Return true if high padding is guaranteed to be 0. - bool hasZeroHighPad() { - return llvm::all_of(getMixedHighPad(), [](OpFoldResult ofr) { - return getConstantIntValue(ofr) == static_cast(0); - }); - } - }]; - - let builders = [ - // Build a PadTensorOp with mixed static and dynamic entries. - OpBuilder<(ins "Value":$source, "ArrayRef":$staticLow, - "ArrayRef":$staticHigh, "ValueRange":$low, "ValueRange":$high, - CArg<"bool", "false">:$nofold, - CArg<"ArrayRef", "{}">:$attrs)>, - // Build a PadTensorOp with all dynamic entries. - OpBuilder<(ins "Value":$source, "ValueRange":$low, "ValueRange":$high, - CArg<"bool", "false">:$nofold, - CArg<"ArrayRef", "{}">:$attrs)>, - // Build a PadTensorOp with mixed static and dynamic entries and custom - // result type. If the type passed is nullptr, it is inferred. - OpBuilder<(ins "Type":$resultType, "Value":$source, - "ArrayRef":$low, "ArrayRef":$high, - CArg<"bool", "false">:$nofold, - CArg<"ArrayRef", "{}">:$attrs)>, - ]; - - let hasCanonicalizer = 1; - let hasFolder = 1; -} - def Linalg_YieldOp : Linalg_Op<"yield", [NoSideEffect, ReturnLike, Terminator]>, Arguments<(ins Variadic:$values)> { let summary = "Linalg yield operation"; diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h @@ -1,4 +1,5 @@ -//===- HoistPadding.h - Hoisting transformation for PadTensorOp -*- C++ -*-===// +//===- HoistPadding.h - Hoisting transformation for tensor::PadOp -*- C++ +//-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,8 +15,11 @@ namespace mlir { class Value; +namespace tensor { +class PadOp; +} // namespace tensor + namespace linalg { -class PadTensorOp; /// Mechanically hoist padding operations on tensors by `numLoops` into a new, /// generally larger tensor. This achieves packing of multiple padding ops into @@ -59,8 +63,8 @@ /// } /// } /// ``` -FailureOr hoistPaddingOnTensors(PadTensorOp opToHoist, int numLoops, - PadTensorOp &hoistedOp); +FailureOr hoistPaddingOnTensors(tensor::PadOp opToHoist, int numLoops, + tensor::PadOp &hoistedOp); } // namespace linalg } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -1132,18 +1132,18 @@ // Op-specific patterns. //===----------------------------------------------------------------------===// -/// PadTensorOp is not canonicalized away yet, so we provide a transformation to -/// `linalg.generic`. -struct PadTensorOpTransformationPattern : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; +/// tensor::PadOp is not canonicalized away yet, so we provide a transformation +/// to `linalg.generic`. +struct PadOpTransformationPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(PadTensorOp padOp, + LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const override; }; /// Pad the operands of `opToPad` to a static bounding box. Use `paddingFunc` /// and `nofoldFunc` to set the padding value and the nofold attribute of the -/// introduced PadTensorOps, respectively. Update `paddedOp` to the cloned +/// introduced tensor::PadOps, respectively. Update `paddedOp` to the cloned /// statically shaped operation and return the extracted dynamically shaped /// results. If padding fails, return failure. FailureOr> @@ -1153,23 +1153,23 @@ LinalgOp &paddedOp); using OptimizeCopyFn = - std::function; + std::function; -/// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp and +/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp and /// InsertSliceOp. For now, only constant padding values are supported. /// `OptimizeCopyFn` can be used to customize copying step optimization. -struct GeneralizePadTensorOpPattern : public OpRewritePattern { - GeneralizePadTensorOpPattern(MLIRContext *context, - OptimizeCopyFn optimizeCopyFn = nullptr, - PatternBenefit benefit = 1) - : OpRewritePattern(context, benefit), +struct GeneralizePadOpPattern : public OpRewritePattern { + GeneralizePadOpPattern(MLIRContext *context, + OptimizeCopyFn optimizeCopyFn = nullptr, + PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit), optimizeCopyFn(std::move(optimizeCopyFn)) {} - LogicalResult matchAndRewrite(PadTensorOp padOp, + LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const override; protected: OptimizeCopyFn optimizeCopyFn; - Value createFillOrGenerateOp(PatternRewriter &rewriter, PadTensorOp padOp, + Value createFillOrGenerateOp(PatternRewriter &rewriter, tensor::PadOp padOp, Value dest, const SmallVector &dynSizes) const; }; @@ -1179,9 +1179,9 @@ /// are used to encode a certain ordering of pattern application. To avoid /// scattering magic constants throughout the code base, the patterns must be /// added with this function. `baseBenefit` can be used to offset the benefit -/// of all PadTensorOp vectorization patterns by a certain value. -void populatePadTensorOpVectorizationPatterns(RewritePatternSet &patterns, - PatternBenefit baseBenefit = 1); +/// of all tensor::PadOp vectorization patterns by a certain value. +void populatePadOpVectorizationPatterns(RewritePatternSet &patterns, + PatternBenefit baseBenefit = 1); /// Match and rewrite for the pattern: /// ``` diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -107,12 +107,12 @@ OpBuilder &b, Location loc, Value source, ArrayRef offsets, ArrayRef sizes, ArrayRef strides); -/// Create a PadTensorOp that pads `source` to the size of the statically sized -/// `type` whose static sizes are assumed to be greater than the dynamic +/// Create a tensor::PadOp that pads `source` to the size of the statically +/// sized `type` whose static sizes are assumed to be greater than the dynamic /// `source` size. The padding introduces trailing `pad` values until the target /// size is met. If `source` is defined by one or more LinalgOps that have been /// padded with the same value and sizes, return their padded result instead of -/// creating a PadTensorOp. +/// creating a tensor::PadOp. /// /// Example: /// ``` diff --git a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h --- a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h +++ b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h @@ -19,6 +19,7 @@ #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" +#include "mlir/Interfaces/TilingInterface.h" #include "mlir/Interfaces/ViewLikeInterface.h" //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td --- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td +++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td @@ -14,6 +14,7 @@ include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/Interfaces/TilingInterface.td" include "mlir/Interfaces/ViewLikeInterface.td" class Tensor_Op traits = []> @@ -777,6 +778,190 @@ let extraClassDeclaration = commonExtraClassDeclaration; } +//===----------------------------------------------------------------------===// +// PadOp +//===----------------------------------------------------------------------===// + +def Tensor_PadOp : Tensor_Op<"pad", [AttrSizedOperandSegments, NoSideEffect]> { + let summary = "tensor pad operation"; + let description = [{ + `tensor.pad` is an operation that pads the `source` tensor + with given `low` and `high` padding config. + + The PadTensor operation supports the following arguments: + + * source: the "base" tensor on which to pad. + * low: A list contains the padding along the start of each + dimension, i.e `low`. + * high: A list contains the padding along the end of each + dimension, i.e. `high`. + * nofold: indicates that the operation should not be folded when source and + result types are equal. + + The result tensor dimensions are `low` + `dim` + `high` along that + dimension. The number of elements of `low` and `high` must match + the rank of the input tensor. They can be either a constant or a + dynamic value. + + The region of the `tensor.pad` operation returns the value to use + for the padding. The arguments of the region represent the index + of the source being accessed. There should be as many arguments as + the rank of the `source` tensor. The value `yield`-ed by the + region is used as the value of the view at the given position. + + If `nofold` is set, the padding operation will not be folded away even + if the source type and the padded type have the same static shape. This can + be used, e.g., for packing or promotion to faster memory. + + Example 1: + + ```mlir + %pad_value = ... : f32 + %0 = tensor.pad %0 low[1, 2] high[2, 3] { + ^bb0(%arg0 : index, %arg1 : index): + tensor.yield %pad_value : f32 + } : tensor to tensor + ``` + + Example 2: + + ```mlir + %pad_value = ... : f32 + %0 = tensor.pad %arg0 low[2, %arg1, 3, 3] high[3, 3, %arg1, 2] { + ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index): + tensor.yield %pad_value : f32 + } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> + ``` + + Example 3: + + ```mlir + %pad_value = ... : f32 + %0 = tensor.pad %arg0 low[0, 0] high[%ub0, %ub1] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %pad_value : f32 + } : tensor<2x3xf32> to tensor + ``` + + Example 4: + + ```mlir + // Force a padded value to be always exist with `nofold`. + %pad_value = ... : f32 + %0 = tensor.pad %arg0 nofold low[0, 0] high[0, 0] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %pad_value : f32 + } : tensor<2x3xf32> to tensor<2x3xf32> + ``` + }]; + + let arguments = (ins + AnyTensor:$source, + Variadic:$low, + Variadic:$high, + I64ArrayAttr:$static_low, + I64ArrayAttr:$static_high, + UnitAttr:$nofold); + + let regions = (region SizedRegion<1>:$region); + + let results = (outs AnyTensor:$result); + + // TODO: Remove custom when AllTypesMatch supports opt. operands. + let assemblyFormat = [{ + $source + (`nofold` $nofold^)? + `low` `` custom($low, $static_low) + `high` `` custom($high, $static_high) + $region attr-dict `:` type($source) `to` type($result) + }]; + + let extraClassDeclaration = [{ + static StringRef getStaticLowAttrName() { + return "static_low"; + } + + static StringRef getStaticHighAttrName() { + return "static_high"; + } + + RankedTensorType getSourceType() { + return source().getType().cast(); + } + RankedTensorType getResultType() { + return getResult().getType().cast(); + } + + // Infer the shape of the result tensor given the type of the source tensor + // and paddings. Known result dimensions that cannot necessarily be inferred + // from low/high padding sizes can be optionally specified. Those will be + // considered when computing the result type. + static RankedTensorType inferResultType( + RankedTensorType sourceType, + ArrayRef staticLow, + ArrayRef staticHigh, + ArrayRef resultShape = {}); + + // Return the pad value if it is a constant. Return null value otherwise. + Value getConstantPaddingValue(); + + // Return a vector of all the static or dynamic values (low/high padding) of + // the op. + inline SmallVector getMixedPadImpl(ArrayAttr staticAttrs, + ValueRange values) { + SmallVector res; + unsigned numDynamic = 0; + unsigned count = staticAttrs.size(); + for (unsigned idx = 0; idx < count; ++idx) { + if (ShapedType::isDynamic(staticAttrs[idx].cast().getInt())) + res.push_back(values[numDynamic++]); + else + res.push_back(staticAttrs[idx]); + } + return res; + } + SmallVector getMixedLowPad() { + return getMixedPadImpl(static_low(), low()); + } + SmallVector getMixedHighPad() { + return getMixedPadImpl(static_high(), high()); + } + // Return true if low padding is guaranteed to be 0. + bool hasZeroLowPad() { + return llvm::all_of(getMixedLowPad(), [](OpFoldResult ofr) { + return getConstantIntValue(ofr) == static_cast(0); + }); + } + // Return true if high padding is guaranteed to be 0. + bool hasZeroHighPad() { + return llvm::all_of(getMixedHighPad(), [](OpFoldResult ofr) { + return getConstantIntValue(ofr) == static_cast(0); + }); + } + }]; + + let builders = [ + // Build a PadOp with mixed static and dynamic entries. + OpBuilder<(ins "Value":$source, "ArrayRef":$staticLow, + "ArrayRef":$staticHigh, "ValueRange":$low, "ValueRange":$high, + CArg<"bool", "false">:$nofold, + CArg<"ArrayRef", "{}">:$attrs)>, + // Build a PadOp with all dynamic entries. + OpBuilder<(ins "Value":$source, "ValueRange":$low, "ValueRange":$high, + CArg<"bool", "false">:$nofold, + CArg<"ArrayRef", "{}">:$attrs)>, + // Build a PadOp with mixed static and dynamic entries and custom + // result type. If the type passed is nullptr, it is inferred. + OpBuilder<(ins "Type":$resultType, "Value":$source, + "ArrayRef":$low, "ArrayRef":$high, + CArg<"bool", "false">:$nofold, + CArg<"ArrayRef", "{}">:$attrs)>, + ]; + + let hasCanonicalizer = 1; + let hasFolder = 1; +} + //===----------------------------------------------------------------------===// // YieldOp @@ -784,16 +969,17 @@ def Tensor_YieldOp : Tensor_Op<"yield", [NoSideEffect, ReturnLike, Terminator, - HasParent<"::mlir::tensor::GenerateOp">]> { + HasParent<"::mlir::tensor::GenerateOp, ::mlir::tensor::PadOp">]> { let summary = "Yield a value from a region"; let description = [{ This operation is used to yield a single value from a within a region. It is used to create dynamically sized tensors - (see `tensor.generate` op). + (see `tensor.generate` and `tensor.pad` ops). }]; let arguments = (ins AnyType:$value); let assemblyFormat = "$value attr-dict `:` type($value)"; + // Dummy builder to appease code in templated ensureTerminator that // GenerateOp's auto-generated parser calls. let builders = [OpBuilder<(ins), [{ /* nothing to do */ }]>]; diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h b/mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h @@ -0,0 +1,36 @@ +//===- TensorTilingOpInterfaceImpl.h - ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements Tiling interface for TensorOps with ExternalModel. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_TENSOR_IR_TENSORTILINGINTERFACEIMPL_H_ +#define MLIR_DIALECT_TENSOR_IR_TENSORTILINGINTERFACEIMPL_H_ + +#include "mlir/IR/Dialect.h" + +namespace mlir { +namespace tensor { + +/// Registers external models for Tiling interface for tensor ops. +/// Currently, it registers: +/// +/// * TilingInterface for `tensor.pad`. +/// +/// Unfortunately, a "normal" internal registration is not possible at the +/// moment, because of the dependency of the interface implementation for these +/// ops on `affine.apply` and Affine dialect already depends on TensorOps. In +/// order to break the cyclic dependency (TensorOps->AffineOps->TensorOps) the +/// implementation is moved to a separate library. +void registerTilingOpInterfaceExternalModels(mlir::DialectRegistry ®istry); + +} // namespace tensor +} // namespace mlir + +#endif // MLIR_DIALECT_TENSOR_IR_TENSORTILINGINTERFACEIMPL_H_ diff --git a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h @@ -0,0 +1,34 @@ +//===- Utils.h - Utilities to support the Tensor dialect -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_TENSOR_UTILS_UTILS_H_ +#define MLIR_DIALECT_TENSOR_UTILS_UTILS_H_ + +#include "mlir/Dialect/Tensor/IR/Tensor.h" + +namespace mlir { +namespace tensor { + +// Return a PadOp that pads `source` to `type` size where the static +// sizes are assumed to be greater than the dynamic sizes. The op performs +// "high" padding (i.e. it adds trailing padding values until the desired +// size is met). +PadOp createPadHighOp(Type type, Value source, Value pad, bool nofold, + Location loc, OpBuilder &builder); + +// Return a PadOp that pads `source to `type` size with `pad` value. +// I.e., a block will be created and the `pad` value will be yielded +// directly. If the type passed is nullptr, it is inferred. +PadOp createPadScalarOp(Type type, Value source, Value pad, + ArrayRef low, ArrayRef high, + bool nofold, Location loc, OpBuilder &builder); + +} // namespace tensor +} // namespace mlir + +#endif // MLIR_DIALECT_TENSOR_UTILS_UTILS_H_ diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -43,6 +43,7 @@ #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h" +#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/Dialect/X86Vector/X86VectorDialect.h" @@ -86,6 +87,7 @@ x86vector::X86VectorDialect>(); // clang-format on tensor::registerInferTypeOpInterfaceExternalModels(registry); + tensor::registerTilingOpInterfaceExternalModels(registry); } /// Append all the MLIR dialects to the registry contained in the given context. diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -17,6 +17,7 @@ #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tensor/Utils/Utils.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Tosa/Utils/CoversionUtils.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" @@ -1929,7 +1930,7 @@ highValues.push_back(highVal); } - auto newPadOp = linalg::PadTensorOp::createPadScalarOp( + auto newPadOp = tensor::createPadScalarOp( padOp.getType(), input, padConstant, lowValues, highValues, /*nofold=*/false, loc, rewriter); diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp @@ -17,6 +17,7 @@ #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tensor/Utils/Utils.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Tosa/Utils/CoversionUtils.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" @@ -55,9 +56,9 @@ Value padValue = rewriter.create(loc, padAttr); - return linalg::PadTensorOp::createPadScalarOp( - RankedTensorType::get(paddedShape, inputETy), input, padValue, - lowIndices, highIndices, /*nofold=*/false, loc, rewriter) + return tensor::createPadScalarOp(RankedTensorType::get(paddedShape, inputETy), + input, padValue, lowIndices, highIndices, + /*nofold=*/false, loc, rewriter) .result(); } diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -1074,561 +1074,6 @@ return success(); } -//===----------------------------------------------------------------------===// -// PadTensorOp -//===----------------------------------------------------------------------===// - -// TODO: Replace custom directive with AllTypesMatch as soon as it -// supports optional types. -void printInferType(OpAsmPrinter &printer, Operation *op, Value optOperand, - Type typeToInfer, Type typeToInferFrom) {} - -ParseResult parseInferType(OpAsmParser &parser, - Optional optOperand, - Type &typeToInfer, Type typeToInferFrom) { - if (optOperand) - typeToInfer = typeToInferFrom; - return success(); -} - -static LogicalResult verify(PadTensorOp op) { - auto sourceType = op.source().getType().cast(); - auto resultType = op.result().getType().cast(); - auto expectedType = PadTensorOp::inferResultType( - sourceType, extractFromI64ArrayAttr(op.static_low()), - extractFromI64ArrayAttr(op.static_high())); - for (int i = 0, e = sourceType.getRank(); i < e; ++i) { - if (resultType.getDimSize(i) == expectedType.getDimSize(i)) - continue; - if (expectedType.isDynamicDim(i)) - continue; - return op.emitError("specified type ") - << resultType << " does not match the inferred type " - << expectedType; - } - - auto ®ion = op.region(); - unsigned rank = resultType.getRank(); - Block &block = region.front(); - if (block.getNumArguments() != rank) - return op.emitError("expected the block to have ") << rank << " arguments"; - - // Note: the number and type of yield values are checked in the YieldOp. - for (const auto &en : llvm::enumerate(block.getArgumentTypes())) { - if (!en.value().isIndex()) - return op.emitOpError("expected block argument ") - << (en.index() + 1) << " to be an index"; - } - - return success(); -} - -RankedTensorType PadTensorOp::inferResultType(RankedTensorType sourceType, - ArrayRef staticLow, - ArrayRef staticHigh, - ArrayRef resultShape) { - unsigned rank = sourceType.getRank(); - assert(staticLow.size() == rank && "unexpected staticLow size mismatch"); - assert(staticHigh.size() == rank && "unexpected staticHigh size mismatch"); - assert((resultShape.empty() || resultShape.size() == rank) && - "unexpected resultShape size mismatch"); - - SmallVector inferredShape; - for (auto i : llvm::seq(0, rank)) { - if (sourceType.isDynamicDim(i) || - staticLow[i] == ShapedType::kDynamicSize || - staticHigh[i] == ShapedType::kDynamicSize) { - inferredShape.push_back(resultShape.empty() ? ShapedType::kDynamicSize - : resultShape[i]); - } else { - int64_t size = sourceType.getDimSize(i) + staticLow[i] + staticHigh[i]; - assert((resultShape.empty() || size == resultShape[i] || - resultShape[i] == ShapedType::kDynamicSize) && - "mismatch between inferred shape and result shape"); - inferredShape.push_back(size); - } - } - - return RankedTensorType::get(inferredShape, sourceType.getElementType()); -} - -void PadTensorOp::build(OpBuilder &b, OperationState &result, Value source, - ArrayRef staticLow, - ArrayRef staticHigh, ValueRange low, - ValueRange high, bool nofold, - ArrayRef attrs) { - auto sourceType = source.getType().cast(); - auto resultType = inferResultType(sourceType, staticLow, staticHigh); - build(b, result, resultType, source, low, high, b.getI64ArrayAttr(staticLow), - b.getI64ArrayAttr(staticHigh), nofold ? b.getUnitAttr() : UnitAttr()); - result.addAttributes(attrs); -} - -void PadTensorOp::build(OpBuilder &b, OperationState &result, Value source, - ValueRange low, ValueRange high, bool nofold, - ArrayRef attrs) { - auto sourceType = source.getType().cast(); - unsigned rank = sourceType.getRank(); - SmallVector staticVector(rank, ShapedType::kDynamicSize); - build(b, result, source, staticVector, staticVector, low, high, nofold, - attrs); -} - -void PadTensorOp::build(OpBuilder &b, OperationState &result, Type resultType, - Value source, ArrayRef low, - ArrayRef high, bool nofold, - ArrayRef attrs) { - assert(resultType.isa()); - auto sourceType = source.getType().cast(); - SmallVector dynamicLow, dynamicHigh; - SmallVector staticLow, staticHigh; - // staticLow and staticHigh have full information of the padding config. - // This will grow staticLow and staticHigh with 1 value. If the config is - // dynamic (ie not a constant), dynamicLow and dynamicHigh will grow with 1 - // value as well. - dispatchIndexOpFoldResults(low, dynamicLow, staticLow, - ShapedType::kDynamicSize); - dispatchIndexOpFoldResults(high, dynamicHigh, staticHigh, - ShapedType::kDynamicSize); - if (!resultType) { - resultType = - PadTensorOp::inferResultType(sourceType, staticLow, staticHigh); - } - build(b, result, resultType, source, dynamicLow, dynamicHigh, - b.getI64ArrayAttr(staticLow), b.getI64ArrayAttr(staticHigh), - nofold ? b.getUnitAttr() : UnitAttr()); - result.addAttributes(attrs); -} - -PadTensorOp PadTensorOp::createPadScalarOp(Type type, Value source, Value pad, - ArrayRef low, - ArrayRef high, - bool nofold, Location loc, - OpBuilder &builder) { - auto padTensorOp = - builder.create(loc, type, source, low, high, nofold); - int rank = padTensorOp.getResultType().getRank(); - SmallVector blockArgTypes; - blockArgTypes.assign(rank, builder.getIndexType()); - auto ®ion = padTensorOp.region(); - // `builder.createBlock` changes the insertion point within the block. Create - // a guard to reset the insertion point of the builder after it is destroyed. - OpBuilder::InsertionGuard guard(builder); - builder.createBlock(®ion, region.end(), blockArgTypes); - builder.create(loc, pad); - return padTensorOp; -} - -PadTensorOp PadTensorOp::createPadHighOp(Type type, Value source, Value pad, - bool nofold, Location loc, - OpBuilder &b) { - SmallVector low, high; - auto rankedTensorType = type.cast(); - assert(rankedTensorType.hasStaticShape()); - for (const auto &en : enumerate(rankedTensorType.getShape())) { - AffineExpr d0; - bindDims(b.getContext(), d0); - auto dimOp = b.createOrFold(loc, source, en.index()); - Value paddingWidth = - makeComposedAffineApply(b, loc, en.value() - d0, {dimOp}); - high.push_back(paddingWidth); - low.push_back(b.createOrFold(loc, 0)); - } - return PadTensorOp::createPadScalarOp(type, source, pad, low, high, nofold, - loc, b); -} - -LogicalResult PadTensorOp::reifyResultShapes( - OpBuilder &b, ReifiedRankedShapedTypeDims &reifiedReturnShapes) { - Location loc = getLoc(); - auto lowPad = getMixedLowPad(); - auto highPad = getMixedHighPad(); - SmallVector shapes; - for (auto dim : llvm::seq(0, getSourceType().getRank())) { - // Shape along each dimension is source dim + low pad + high pad. - SmallVector mapOperands; - mapOperands.push_back(b.createOrFold(loc, source(), dim)); - AffineExpr expr = b.getAffineDimExpr(0); - unsigned numSymbols = 0; - auto addOpFoldResult = [&](OpFoldResult valueOrAttr) { - if (Value v = valueOrAttr.dyn_cast()) { - expr = expr + b.getAffineSymbolExpr(numSymbols++); - mapOperands.push_back(v); - return; - } - int64_t staticValue = - valueOrAttr.get().cast().getInt(); - expr = expr + staticValue; - }; - addOpFoldResult(lowPad[dim]); - addOpFoldResult(highPad[dim]); - shapes.push_back(applyMapToValues( - b, loc, AffineMap::get(1, numSymbols, expr), mapOperands)[0]); - } - reifiedReturnShapes.emplace_back(std::move(shapes)); - return success(); -} - -//===----------------------------------------------------------------------===// -// Methods related to PadTensor tiling. -//===----------------------------------------------------------------------===// - -SmallVector PadTensorOp::getDestinationOperands(OpBuilder &b) { - ReifiedRankedShapedTypeDims reifiedShapes; - (void)reifyResultShapes(b, reifiedShapes); - SmallVector mixedSizes = getAsOpFoldResult(reifiedShapes[0]); - Value initTensor = b.create(getLoc(), mixedSizes, - getResultType().getElementType()); - return {initTensor}; -} - -SmallVector PadTensorOp::getLoopIteratorTypes() { - SmallVector iteratorTypes(getResultType().getRank(), - getParallelIteratorTypeName()); - return iteratorTypes; -} - -SmallVector PadTensorOp::getIterationDomain(OpBuilder &b) { - ReifiedRankedShapedTypeDims reifiedShapes; - (void)reifyResultShapes(b, reifiedShapes); - Value zero = b.create(getLoc(), 0); - Value one = b.create(getLoc(), 1); - // Initialize all the ranges to {zero, one, one}. All the `ub`s are - // overwritten. - SmallVector loopRanges(reifiedShapes[0].size(), {zero, one, one}); - for (const auto &ub : enumerate(reifiedShapes[0])) - loopRanges[ub.index()].size = ub.value(); - return loopRanges; -} - -SmallVector PadTensorOp::getTiledImplementation( - OpBuilder &b, ValueRange dest, ArrayRef offsets, - ArrayRef sizes, bool /*tileDestOperands*/) { - // Only constant padding value supported. - Value padValue = getConstantPaddingValue(); - if (!padValue) - return {}; - - // Helper variables and functions for various arithmetic operations. These are - // used extensively for computing new offset/length and padding values. - Location loc = getLoc(); - AffineExpr dim0, dim1; - bindDims(b.getContext(), dim0, dim1); - // Add two integers. - auto addMap = AffineMap::get(2, 0, {dim0 + dim1}); - auto add = [&](Value v1, Value v2) { - return b.createOrFold(loc, addMap, ValueRange{v1, v2}); - }; - // Subtract two integers. - auto subMap = AffineMap::get(2, 0, {dim0 - dim1}); - auto sub = [&](Value v1, Value v2) { - return b.createOrFold(loc, subMap, ValueRange{v1, v2}); - }; - // Take the minimum of two integers. - auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext()); - auto min = [&](Value v1, Value v2) { - return b.createOrFold(loc, idMap, ValueRange{v1, v2}); - }; - // Take the maximum of two integers. - auto max = [&](Value v1, Value v2) { - return b.createOrFold(loc, idMap, ValueRange{v1, v2}); - }; - // Zero index-typed integer. - auto zero = b.create(loc, 0); - - // Helper function for filling static/dynamic low/high padding indices vectors - // of PadTensorOp. - auto appendIndex = [&](Value val, SmallVector &dynIndices, - SmallVector &staticIndices) { - if (auto constInt = getConstantIntValue(val)) { - staticIndices.push_back(*constInt); - } else { - staticIndices.push_back(ShapedType::kDynamicSize); - dynIndices.push_back(val); - } - }; - - // Compute new offsets, lengths, low padding, high padding. - SmallVector newOffsets, newLengths, newStrides; - SmallVector newLows, newHighs; - SmallVector staticNewLows, staticNewHighs; - // Set to true if the original data source is not read at all. - bool hasZeroLen = false; - // Same as hasZeroLen, but for dynamic dimension sizes. This condition - // is true if the original data source turns out to be unused at runtime. - Value dynHasZeroLenCond; - - int64_t rank = getSourceType().getRank(); - for (unsigned dim = 0; dim < rank; ++dim) { - auto low = getValueOrCreateConstantIndexOp(b, loc, getMixedLowPad()[dim]); - bool hasLowPad = getConstantIntValue(low) != static_cast(0); - auto high = getValueOrCreateConstantIndexOp(b, loc, getMixedHighPad()[dim]); - bool hasHighPad = getConstantIntValue(high) != static_cast(0); - auto offset = getValueOrCreateConstantIndexOp(b, loc, offsets[dim]); - auto length = getValueOrCreateConstantIndexOp(b, loc, sizes[dim]); - auto srcSize = b.createOrFold(loc, source(), dim); - - // The new amount of low padding is `low - offset`. Except for the case - // where none of the low padding is read. In that case, the new amount of - // low padding is zero. - // - // Optimization: If low = 0, then newLow = 0. - Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero; - appendIndex(newLow, newLows, staticNewLows); - - // Start reading the data from position `offset - low`. Since the original - // read may have started in the low padding zone, this value could be - // negative. Therefore, start reading from: - // - // max(offset - low, 0) - // - // The original read could also have started in the high padding zone. - // In that case, set the offset to the end of source tensor. The new - // ExtractSliceOp length will be zero in that case. (Effectively reading no - // data from the source.) - // - // Optimization: If low = 0, then the formula can be simplified. - Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize) - : min(offset, srcSize); - newOffsets.push_back(getAsOpFoldResult(newOffset)); - - // The original ExtractSliceOp was reading until position `offset + length`. - // Therefore, the corresponding position within the source tensor is: - // - // offset + length - low - // - // In case the original ExtractSliceOp stopped reading within the low - // padding zone, this value can be negative. In that case, the end position - // of the read should be zero. (Similar to newOffset.) - // - // The original read could also have stopped in the high padding zone. - // In that case, set the end positition of the read should be the end of the - // source tensor. (Similar to newOffset.) - // - // endLoc = min(max(offset - low + length, 0), srcSize) - // - // The new ExtractSliceOp length is `endLoc - newOffset`. - // - // Optimization: If low = 0, then the formula can be simplified. - Value endLoc = hasLowPad - ? min(max(add(sub(offset, low), length), zero), srcSize) - : min(add(offset, length), srcSize); - Value newLength = sub(endLoc, newOffset); - newLengths.push_back(getAsOpFoldResult(newLength)); - - // Check if newLength is zero. In that case, no SubTensorOp should be - // executed. - if (auto newLengthInt = getConstantIntValue(newLength)) { - hasZeroLen |= *newLengthInt == 0; - } else { - Value check = b.create(loc, arith::CmpIPredicate::eq, - newLength, zero); - dynHasZeroLenCond = - dynHasZeroLenCond - ? b.create(loc, check, dynHasZeroLenCond) - : check; - } - - // The amount of high padding is simply the number of elements remaining, - // so that the result has the same length as the original ExtractSliceOp. - // As an optimization, if the original high padding is zero, then the new - // high padding must also be zero. - Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero; - appendIndex(newHigh, newHighs, staticNewHighs); - - // Only unit stride supported. - newStrides.push_back(b.getIndexAttr(1)); - } - - // The shape of the result can be obtained from the sizes passed in. - SmallVector dynDims; - SmallVector shape; - dispatchIndexOpFoldResults(sizes, dynDims, shape, ShapedType::kDynamicSize); - RankedTensorType resultType = - RankedTensorType::get(shape, getResultType().getElementType()); - - // Insert cast to ensure that types match. (May be folded away.) - auto castResult = [&](Value val) -> Operation * { - auto castOp = b.create(loc, resultType, val); - return castOp; - }; - - // In cases where the original data source is unused: Emit a GenerateOp and - // do not generate a SliceOp. (The result shape of the SliceOp would - // have a dimension of size 0, the semantics of which is unclear.) - auto createGenerateOp = [&]() { - // Create GenerateOp. - auto generateOp = b.create( - loc, resultType, dynDims, - [&](OpBuilder &builder, Location gLoc, ValueRange indices) { - builder.create(gLoc, padValue); - }); - return castResult(generateOp); - }; - - // Emit a SliceOp and a PadTensorOp. Should not be used in cases where - // the result shape of the new SliceOp has a zero dimension. - auto createPadTensorOfSubTensor = [&]() { - // Create pad_tensor(subtensor(x)). - auto newSliceOp = b.create( - loc, source(), newOffsets, newLengths, newStrides); - auto newPadTensorOp = b.create( - loc, newSliceOp, staticNewLows, staticNewHighs, newLows, newHighs); - - // Copy region to new PadTensorOp. - BlockAndValueMapping bvm; - region().cloneInto(&newPadTensorOp.getRegion(), bvm); - - // Cast result and return. - return castResult(newPadTensorOp); - }; - - // Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known - // that the original data source x is not used. - if (hasZeroLen) { - return {createGenerateOp()}; - } - - // If there are dynamic dimensions: Generate an scf.if check to avoid creating - // SliceOps with result dimensions of size 0 at runtime. - if (dynHasZeroLenCond) { - auto result = b.create( - loc, resultType, dynHasZeroLenCond, - /*thenBuilder=*/ - [&](OpBuilder &b, Location loc) { - b.create(loc, createGenerateOp()->getResult(0)); - }, - /*elseBuilder=*/ - [&](OpBuilder &b, Location loc) { - b.create(loc, - createPadTensorOfSubTensor()->getResult(0)); - }); - return {result}; - } - return {createPadTensorOfSubTensor()}; -} - -namespace { -// Folds linalg.pad_tensor when padding is static zeros and the attribute -// doesn't request otherwise. -struct FoldStaticZeroPadding : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(PadTensorOp padTensorOp, - PatternRewriter &rewriter) const override { - if (!padTensorOp.hasZeroLowPad() || !padTensorOp.hasZeroHighPad()) - return failure(); - if (padTensorOp.nofold()) - return failure(); - rewriter.replaceOpWithNewOp( - padTensorOp, padTensorOp.result().getType(), padTensorOp.source()); - return success(); - } -}; - -// Fold CastOp into PadTensorOp when adding static information. -struct FoldSourceTensorCast : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(PadTensorOp padTensorOp, - PatternRewriter &rewriter) const override { - auto castOp = padTensorOp.source().getDefiningOp(); - if (!tensor::canFoldIntoConsumerOp(castOp)) - return failure(); - - auto newResultType = PadTensorOp::inferResultType( - castOp.source().getType().cast(), - extractFromI64ArrayAttr(padTensorOp.static_low()), - extractFromI64ArrayAttr(padTensorOp.static_high()), - padTensorOp.getResultType().getShape()); - - if (newResultType == padTensorOp.getResultType()) { - rewriter.updateRootInPlace(padTensorOp, [&]() { - padTensorOp.sourceMutable().assign(castOp.source()); - }); - } else { - auto newOp = rewriter.create( - padTensorOp->getLoc(), newResultType, padTensorOp.source(), - padTensorOp.low(), padTensorOp.high(), padTensorOp.static_low(), - padTensorOp.static_high(), padTensorOp.nofold()); - BlockAndValueMapping mapper; - padTensorOp.getRegion().cloneInto(&newOp.getRegion(), mapper); - - rewriter.replaceOpWithNewOp( - padTensorOp, padTensorOp.getResultType(), newOp); - } - return success(); - } -}; - -// Fold CastOp using the result of PadTensorOp back into the latter if it adds -// static information. -struct FoldTargetTensorCast : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(PadTensorOp padTensorOp, - PatternRewriter &rewriter) const override { - if (!padTensorOp.result().hasOneUse()) - return failure(); - auto tensorCastOp = - dyn_cast(*padTensorOp->getUsers().begin()); - if (!tensorCastOp) - return failure(); - if (!tensor::preservesStaticInformation(padTensorOp.result().getType(), - tensorCastOp.dest().getType())) - return failure(); - - auto replacementOp = rewriter.create( - padTensorOp.getLoc(), tensorCastOp.dest().getType(), - padTensorOp.source(), padTensorOp.low(), padTensorOp.high(), - padTensorOp.static_low(), padTensorOp.static_high(), - padTensorOp.nofold()); - replacementOp.region().takeBody(padTensorOp.region()); - - rewriter.replaceOp(padTensorOp, replacementOp.result()); - rewriter.replaceOp(tensorCastOp, replacementOp.result()); - return success(); - } -}; -} // namespace - -void PadTensorOp::getCanonicalizationPatterns(RewritePatternSet &results, - MLIRContext *context) { - results.add(context); - results.add(context); -} - -/// Return the padding value of the PadTensorOp if it constant. In this context, -/// "constant" means an actual constant or "defined outside of the block". -/// -/// Values are considered constant in three cases: -/// - A ConstantLike value. -/// - A basic block argument from a different block. -/// - A value defined outside of the block. -/// -/// If the padding value is not constant, an empty Value is returned. -Value PadTensorOp::getConstantPaddingValue() { - auto yieldOp = dyn_cast(getRegion().front().getTerminator()); - if (!yieldOp || yieldOp.values().size() != 1) - return {}; - Value padValue = yieldOp.values().front(); - // Check if yield value is a constant. - if (matchPattern(padValue, m_Constant())) - return padValue; - // Check if yield value is defined inside the PadTensorOp block. - if (padValue.getParentBlock() == &getRegion().front()) - return {}; - // Else: Yield value defined outside of the PadTensorOp block. - return padValue; -} - -OpFoldResult PadTensorOp::fold(ArrayRef) { - if (getResultType().hasStaticShape() && getResultType() == getSourceType() && - !nofold()) - return source(); - return {}; -} - //===----------------------------------------------------------------------===// // YieldOp //===----------------------------------------------------------------------===// @@ -1682,16 +1127,6 @@ if (auto linalgOp = dyn_cast(parentOp)) return verifyYield(op, cast(parentOp)); - if (auto padTensorOp = dyn_cast(parentOp)) { - if (op.getNumOperands() != 1) - return op.emitOpError("expected single yield operand (got ") - << op->getNumOperands() << ")"; - if (op.getOperand(0).getType() != - padTensorOp.getType().cast().getElementType()) - return op.emitOpError("expected yield type to match shape element type"); - return success(); - } - if (auto tiledLoopOp = dyn_cast(parentOp)) { // Check if output args with tensor types match results types. SmallVector tensorOuts; diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp @@ -320,7 +320,7 @@ target.addLegalDialect(); - target.addIllegalOp(); @@ -363,5 +363,5 @@ VectorTransferWriteOpConverter >(typeConverter, patterns.getContext()); // clang-format on - patterns.add(patterns.getContext()); + patterns.add(patterns.getContext()); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp @@ -66,7 +66,7 @@ static void applyEnablingTransformations(ModuleOp moduleOp) { RewritePatternSet patterns(moduleOp.getContext()); - patterns.add(moduleOp.getContext()); + patterns.add(moduleOp.getContext()); (void)applyPatternsAndFoldGreedily(moduleOp, std::move(patterns)); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp @@ -1,4 +1,5 @@ -//===- HoistPadding.cpp - Hoisting transformation for PadTensorOp ---------===// +//===- HoistPadding.cpp - Hoisting transformation for tensor::PadOp +//---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -37,7 +38,7 @@ using namespace mlir; using namespace mlir::linalg; -/// Analysis class to support PadTensorOp hoisting across multiple enclosing +/// Analysis class to support tensor::PadOp hoisting across multiple enclosing /// loops. The failure conditions are: /// 1. Pad op has a use that is not an input of a LinalgOp. /// 2. Pad op does not have a constant padding value. @@ -53,7 +54,7 @@ /// 8. There is no enclosing scf::ForOp that indexes the padded data. /// Other cases succeed and will trigger hoisting of the pad op. struct HoistingAnalysis { - HoistingAnalysis(PadTensorOp padTensorOp, int numLoops); + HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops); bool isValid() { return valid; } @@ -98,7 +99,7 @@ /// ``` /// dropNonIndexDependencies(%padded_slice, %slice) /// removes [scf.for %k, linalg.fill(%cst, %arg1)] from backwardSlice. - LogicalResult dropNonIndexDependencies(PadTensorOp padTensorOp, + LogicalResult dropNonIndexDependencies(tensor::PadOp padTensorOp, tensor::ExtractSliceOp sliceOp); /// Encodes whether the analysis is valid and hoisting can proceed. @@ -107,7 +108,7 @@ /// Return true if all uses of `padTensorOp` are an input tensor of some /// LinalgOp. -static bool isOnlyUsedAsInputOfLinalgOp(PadTensorOp padTensorOp) { +static bool isOnlyUsedAsInputOfLinalgOp(tensor::PadOp padTensorOp) { for (OpOperand &use : padTensorOp.result().getUses()) { auto linalgUser = dyn_cast(use.getOwner()); if (!linalgUser || !linalgUser.isInputTensor(&use)) { @@ -126,7 +127,7 @@ /// Multi-loops such as scf.parallel or linalg.tiled_loop are not modeled atm. /// Control-flow and other containing ops with regions are not modeled atm. static void -getAtMostNEnclosingLoops(PadTensorOp padTensorOp, int nLevels, +getAtMostNEnclosingLoops(tensor::PadOp padTensorOp, int nLevels, SmallVector &reverseEnclosingLoops) { AsmState state(padTensorOp->getParentOfType()); (void)state; @@ -143,7 +144,7 @@ } } -HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int numLoops) { +HoistingAnalysis::HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops) { valid = false; // Bail on any use that isn't an input of a Linalg op. @@ -232,7 +233,7 @@ } LogicalResult -HoistingAnalysis::dropNonIndexDependencies(PadTensorOp padTensorOp, +HoistingAnalysis::dropNonIndexDependencies(tensor::PadOp padTensorOp, tensor::ExtractSliceOp sliceOp) { // Set of all values used for index computation. SetVector indexEdges; @@ -373,9 +374,9 @@ ValueRange{ivVal, lbVal, stepVal}); } -FailureOr mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist, +FailureOr mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist, int numLoops, - PadTensorOp &hoistedOp) { + tensor::PadOp &hoistedOp) { LLVM_DEBUG(DBGS() << "Try to hoist " << *(opToHoist) << " by " << numLoops << " loops\n"); HoistingAnalysis analysis(opToHoist, numLoops); @@ -399,7 +400,7 @@ // Create the packed tensor into which we amortize // padding. SmallVector packedShape(nPackedLoops, ShapedType::kDynamicSize); - // TODO: go grab dims when necessary, for now PadTensorOp returns a static + // TODO: go grab dims when necessary, for now tensor::PadOp returns a static // tensor. llvm::append_range(packedShape, paddedTensorType.getShape()); auto packedTensorType = @@ -463,7 +464,7 @@ // sizes = [1 .. 1, paddedShape]. SmallVector sizes(nPackedLoops, b.getIndexAttr(1)); for (int64_t sz : paddedTensorType.getShape()) { - // TODO: go grab dims when necessary, for now PadTensorOp returns a static + // TODO: go grab dims when necessary, for now tensor::PadOp returns a static // tensor. assert(!ShapedType::isDynamic(sz) && "padded tensor needs static sizes"); sizes.push_back(b.getIndexAttr(sz)); @@ -506,6 +507,7 @@ loc, opToHoist.getResultType(), packedTensor, offsets, sizes, strides); // Make the newly cloned `opToHoist` available to the caller. - hoistedOp = cast(bvm.lookup(opToHoist.result()).getDefiningOp()); + hoistedOp = + cast(bvm.lookup(opToHoist.result()).getDefiningOp()); return newResult; } diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp @@ -100,7 +100,7 @@ filter); else tilingPattern.add(ctx, options, filter); - if (anchorOpName == linalg::PadTensorOp::getOperationName()) + if (anchorOpName == tensor::PadOp::getOperationName()) populatePadTensorTilingPatterns(tilingPattern, options); (void)applyPatternsAndFoldGreedily(funcOp, std::move(tilingPattern)); } @@ -302,12 +302,12 @@ std::move(vectorizationPatterns)); // Apply the pad tensor op vectorization separately to avoid running the - // GenericPadTensorOpVectorizationPattern too early. + // GenericPadOpVectorizationPattern too early. // TODO: Improve once we have better infrastructure to control pattern // application. if (vectorizePadding) { RewritePatternSet patterns(funcOp.getContext()); - linalg::populatePadTensorOpVectorizationPatterns(patterns); + linalg::populatePadOpVectorizationPatterns(patterns); (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); } } diff --git a/mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp b/mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp @@ -38,9 +38,9 @@ /// ``` /// /// if the `linalg.generic` has all parallel iterator types. -struct FusePadTensorOp : OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(PadTensorOp padOp, +struct FusePadOp : OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const override { // Only works on padding op that sets the padded value to a constant. Value padValue = padOp.getConstantPaddingValue(); @@ -61,7 +61,10 @@ padOp, "only supported for ops with all parallel iterator types"); } ReifiedRankedShapedTypeDims resultShape; - if (failed(padOp.reifyResultShapes(rewriter, resultShape)) || + ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface = + dyn_cast(padOp.getOperation()); + if (failed(reifyShapedTypeInterface.reifyResultShapes(rewriter, + resultShape)) || resultShape.size() != 1) { return rewriter.notifyMatchFailure( padOp, "failed to get shape of pad op result"); @@ -118,5 +121,5 @@ void mlir::linalg::populateFusePadTensorWithProducerLinalgOpPatterns( RewritePatternSet &patterns) { - patterns.add(patterns.getContext()); + patterns.add(patterns.getContext()); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -338,18 +338,18 @@ return failure(); } -/// Generate a loop nest around a given PadTensorOp (for tiling). `newPadOp` -/// and `loopNest` are output parameters that return the new (tiled) PadTensorOp -/// and the loop nest. -static LogicalResult tilePadTensorOp(RewriterBase &builder, PadTensorOp op, - PadTensorOp &newPadOp, LoopNest &loopNest, - const LinalgTilingOptions &options) { +/// Generate a loop nest around a given tensor::PadOp (for tiling). `newPadOp` +/// and `loopNest` are output parameters that return the new (tiled) +/// tensor::PadOp and the loop nest. +static LogicalResult tilePadOp(RewriterBase &builder, tensor::PadOp op, + tensor::PadOp &newPadOp, LoopNest &loopNest, + const LinalgTilingOptions &options) { Location loc = op.getLoc(); OpBuilder::InsertionGuard g(builder); builder.setInsertionPoint(op); - // Clone PadTensorOp so that the existing op can be replaced more easily. - newPadOp = cast(builder.clone(*op.getOperation())); + // Clone tensor::PadOp so that the existing op can be replaced more easily. + newPadOp = cast(builder.clone(*op.getOperation())); // Get rank and tile sizes. int64_t rank = op.getResultType().getRank(); SmallVector tileSizes = @@ -358,7 +358,9 @@ Value zero = builder.create(loc, 0); tileSizes.append(rank - tileSizes.size(), zero); // Compute lower and upper bounds of the loop nest. - SmallVector ranges = op.getIterationDomain(builder); + TilingInterface tilingInterface = + dyn_cast(op.getOperation()); + SmallVector ranges = tilingInterface.getIterationDomain(builder); SmallVector lbs, dims, allDims, steps; for (int64_t i = 0; i < rank; ++i) { allDims.push_back(ranges[i].size); @@ -369,7 +371,8 @@ } } // Generate loop nest: One loop per dimension. - SmallVector destOperand = op.getDestinationOperands(builder); + SmallVector destOperand = + tilingInterface.getDestinationOperands(builder); loopNest = mlir::scf::buildLoopNest( builder, loc, lbs, /*ubs=*/dims, steps, ValueRange(destOperand), [&](OpBuilder &b, Location loc, ValueRange localIvs, @@ -379,8 +382,8 @@ computeTileOffsets(b, loc, localIvs, tileSizes); SmallVector sizes = computeTileSizes(b, loc, localIvs, tileSizes, allDims); - // Create ExtractSliceOp: Extract a tile from the PadTensorOp. - // Note: The PadTensorOp is located outside of the loop nest. It is + // Create ExtractSliceOp: Extract a tile from the tensor::PadOp. + // Note: The tensor::PadOp is located outside of the loop nest. It is // later moved inside by ExtractSliceOfPadTensorSwapPattern. auto map = AffineMap::getMultiDimIdentityMap(rank, b.getContext()); Value tiledOutput = @@ -399,21 +402,21 @@ } namespace { -struct PadTensorOpTilingPattern : public OpRewritePattern { - PadTensorOpTilingPattern(MLIRContext *ctx, LinalgTilingOptions opt) - : OpRewritePattern(ctx), options(std::move(opt)) {} +struct PadOpTilingPattern : public OpRewritePattern { + PadOpTilingPattern(MLIRContext *ctx, LinalgTilingOptions opt) + : OpRewritePattern(ctx), options(std::move(opt)) {} - LogicalResult matchAndRewrite(PadTensorOp op, + LogicalResult matchAndRewrite(tensor::PadOp op, PatternRewriter &rewriter) const override { if (op->hasAttr(LinalgTransforms::kLinalgTransformMarker)) return failure(); - PadTensorOp newPadOp; + tensor::PadOp newPadOp; LoopNest loopNest; - if (failed(tilePadTensorOp(rewriter, op, newPadOp, loopNest, options))) + if (failed(tilePadOp(rewriter, op, newPadOp, loopNest, options))) return failure(); newPadOp->setAttr(LinalgTransforms::kLinalgTransformMarker, rewriter.getUnitAttr()); - // Replace all uses of the original PadTensorOp. + // Replace all uses of the original tensor::PadOp. rewriter.replaceOp(op, loopNest.getResults()[0]); return success(); } @@ -470,7 +473,7 @@ tensor::InsertSliceOp::getCanonicalizationPatterns(patterns, ctx); InitTensorOp::getCanonicalizationPatterns(patterns, ctx); - PadTensorOp::getCanonicalizationPatterns(patterns, ctx); + tensor::PadOp::getCanonicalizationPatterns(patterns, ctx); ctx->getLoadedDialect()->getCanonicalizationPatterns(patterns); CanonicalizationPatternList< @@ -489,13 +492,13 @@ #define GET_OP_LIST #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc" >::insert(patterns, options, f); - patterns.add(ctx, options); + patterns.add(ctx, options); } void mlir::linalg::populatePadTensorTilingPatterns( RewritePatternSet &patterns, const LinalgTilingOptions &options) { auto *ctx = patterns.getContext(); - patterns.add(ctx, options); + patterns.add(ctx, options); } static void applyExtractSliceOfPadTensorSwapPattern(FuncOp funcOp) { diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -160,9 +160,9 @@ /// Helper function that tries to pad `opOperand`. Exit early for scalar /// operands, if `paddingFunc` returns failure, or if `opOperand` is not defined /// by an ExtractSliceOp. Otherwise, try to pad the operand even if it already -/// has a static shape. Set `result` to the result of the created PadTensorOp or -/// and return success if the operand either has been padded to a static shape -/// or already had a static shape and failure otherwise. +/// has a static shape. Set `result` to the result of the created tensor::PadOp +/// or and return success if the operand either has been padded to a static +/// shape or already had a static shape and failure otherwise. static LogicalResult padOperandToSmallestStaticBoundingBox( OpBuilder &b, linalg::LinalgOp opToPad, OpOperand *opOperand, const PaddingValueComputationFunction &paddingFunc, @@ -528,10 +528,10 @@ // Hoist the padding. for (const auto &en : enumerate(depths)) { OpOperand &opOperand = paddedOp->getOpOperand(en.index()); - auto padTensorOp = opOperand.get().getDefiningOp(); + auto padTensorOp = opOperand.get().getDefiningOp(); if (!padTensorOp || en.value() == 0) continue; - PadTensorOp hoistedOp; + tensor::PadOp hoistedOp; FailureOr newResult = hoistPaddingOnTensors(padTensorOp, en.value(), hoistedOp); if (failed(newResult)) @@ -749,10 +749,11 @@ return SmallVector(nParallelLoops, getParallelIteratorTypeName()); } -/// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp (to +/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp (to /// initialize with pad_val) and GenericOp (to copy contents). -LogicalResult PadTensorOpTransformationPattern::matchAndRewrite( - linalg::PadTensorOp padOp, PatternRewriter &rewriter) const { +LogicalResult +PadOpTransformationPattern::matchAndRewrite(tensor::PadOp padOp, + PatternRewriter &rewriter) const { auto inputShapedType = padOp.source().getType().cast(); auto resultShapedType = padOp.result().getType().cast(); @@ -767,9 +768,8 @@ // 1. A BBarg from a different block. // 2. A value defined outside of the current block. Block &block = padOp.region().front(); - auto yieldOp = cast(block.getTerminator()); - assert(yieldOp.getNumOperands() == 1 && "expected single operand yield"); - Value padValue = yieldOp.values().front(); + auto yieldOp = cast(block.getTerminator()); + Value padValue = yieldOp.value(); Operation *definingOp = padValue.getDefiningOp(); if (definingOp && definingOp->getBlock() == &block) return failure(); @@ -812,8 +812,8 @@ /// Filling `dest` using FillOp constant padding value if possible. /// Otherwise, generate a tensor::GenerateOp. -Value GeneralizePadTensorOpPattern::createFillOrGenerateOp( - PatternRewriter &rewriter, PadTensorOp padOp, Value dest, +Value GeneralizePadOpPattern::createFillOrGenerateOp( + PatternRewriter &rewriter, tensor::PadOp padOp, Value dest, const SmallVector &dynSizes) const { auto padValue = padOp.getConstantPaddingValue(); if (padValue) @@ -825,20 +825,12 @@ // Copy region to new op. BlockAndValueMapping bvm; padOp.region().cloneInto(&generateOp.getRegion(), bvm); - // Rewrite linalg::YieldOp to tensor::YieldOp. - OpBuilder::InsertionGuard guard(rewriter); - auto yieldOp = - dyn_cast(generateOp.getRegion().front().getTerminator()); - assert(yieldOp && "malformed PadTensorOp: expected YieldOp terminator"); - assert(yieldOp.values().size() == 1); - rewriter.setInsertionPoint(yieldOp); - rewriter.replaceOpWithNewOp(yieldOp, yieldOp.values()[0]); return generateOp; } LogicalResult -GeneralizePadTensorOpPattern::matchAndRewrite(PadTensorOp padOp, - PatternRewriter &rewriter) const { +GeneralizePadOpPattern::matchAndRewrite(tensor::PadOp padOp, + PatternRewriter &rewriter) const { // Given an OpFoldResult, return an index-typed value. auto getIdxValue = [&](OpFoldResult ofr) { if (auto val = ofr.dyn_cast()) @@ -877,10 +869,10 @@ if (optimizeCopyFn && optimizeCopyFn(rewriter, padOp, fill).succeeded()) return success(); - // PadTensorOps cannot be optimized. Generate a InsertSliceOp instead + // tensor::PadOps cannot be optimized. Generate a InsertSliceOp instead // for copying the PadOp source. auto sourceType = padOp.getSourceType(); - // Compute size of source of PadTensorOp. + // Compute size of source of tensor::PadOp. SmallVector srcSizes; for (unsigned dim = 0; dim < sourceType.getRank(); ++dim) { if (sourceType.isDynamicDim(dim)) { @@ -901,15 +893,17 @@ LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite( tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const { - auto padOp = sliceOp.source().getDefiningOp(); + auto padOp = sliceOp.source().getDefiningOp(); if (!padOp) return failure(); // Only unit stride supported. if (!sliceOp.hasUnitStride()) return failure(); + TilingInterface tilingInterface = + dyn_cast(padOp.getOperation()); Operation *tiledPadOp = - padOp + tilingInterface .getTiledImplementation( rewriter, /*dest=*/ValueRange{}, sliceOp.getMixedOffsets(), sliceOp.getMixedSizes(), /*tileDestOperands=*/false) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -682,20 +682,19 @@ return result; } -/// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp and +/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp and /// InsertSliceOp. For now, only constant padding values are supported. /// If there is enough static type information, TransferReadOps and /// TransferWriteOps may be generated instead of InsertSliceOps. -struct GenericPadTensorOpVectorizationPattern - : public GeneralizePadTensorOpPattern { - GenericPadTensorOpVectorizationPattern(MLIRContext *context, - PatternBenefit benefit = 1) - : GeneralizePadTensorOpPattern(context, tryVectorizeCopy, benefit) {} - /// Vectorize the copying of a PadTensorOp's source. This is possible if +struct GenericPadOpVectorizationPattern : public GeneralizePadOpPattern { + GenericPadOpVectorizationPattern(MLIRContext *context, + PatternBenefit benefit = 1) + : GeneralizePadOpPattern(context, tryVectorizeCopy, benefit) {} + /// Vectorize the copying of a tensor::PadOp's source. This is possible if /// each dimension size is statically know in the source type or the result /// type (or both). static LogicalResult tryVectorizeCopy(PatternRewriter &rewriter, - PadTensorOp padOp, Value dest) { + tensor::PadOp padOp, Value dest) { auto sourceType = padOp.getSourceType(); auto resultType = padOp.getResultType(); @@ -767,13 +766,13 @@ } }; -/// Base pattern for rewriting PadTensorOps whose result is consumed by a +/// Base pattern for rewriting tensor::PadOps whose result is consumed by a /// given operation type OpTy. template -struct VectorizePadTensorOpUserPattern : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; +struct VectorizePadOpUserPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(PadTensorOp padOp, + LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const final { bool changed = false; // Insert users in vector, because some users may be replaced/removed. @@ -785,10 +784,10 @@ protected: virtual LogicalResult rewriteUser(PatternRewriter &rewriter, - PadTensorOp padOp, OpTy op) const = 0; + tensor::PadOp padOp, OpTy op) const = 0; }; -/// Rewrite use of PadTensorOp result in TransferReadOp. E.g.: +/// Rewrite use of tensor::PadOp result in TransferReadOp. E.g.: /// ``` /// %0 = linalg.pad_tensor %src ... : tensor to tensor<17x5xf32> /// %r = vector.transfer_read %0[%c0, %c0], %cst @@ -807,12 +806,12 @@ /// - `xferOp` has no out-of-bounds dims or mask. /// - Low padding is static 0. /// - Single, scalar padding value. -struct PadTensorOpVectorizationWithTransferReadPattern - : public VectorizePadTensorOpUserPattern { - using VectorizePadTensorOpUserPattern< - vector::TransferReadOp>::VectorizePadTensorOpUserPattern; +struct PadOpVectorizationWithTransferReadPattern + : public VectorizePadOpUserPattern { + using VectorizePadOpUserPattern< + vector::TransferReadOp>::VectorizePadOpUserPattern; - LogicalResult rewriteUser(PatternRewriter &rewriter, PadTensorOp padOp, + LogicalResult rewriteUser(PatternRewriter &rewriter, tensor::PadOp padOp, vector::TransferReadOp xferOp) const override { // Low padding must be static 0. if (!padOp.hasZeroLowPad()) @@ -837,7 +836,7 @@ } }; -/// Rewrite use of PadTensorOp result in TransferWriteOp. +/// Rewrite use of tensor::PadOp result in TransferWriteOp. /// This pattern rewrites TransferWriteOps that write to a padded tensor /// value, where the same amount of padding is immediately removed again after /// the write. In such cases, the TransferWriteOp can write to the non-padded @@ -869,12 +868,12 @@ /// ExtractSliceOp trims the same amount of padding that was added /// beforehand. /// - Single, scalar padding value. -struct PadTensorOpVectorizationWithTransferWritePattern - : public VectorizePadTensorOpUserPattern { - using VectorizePadTensorOpUserPattern< - vector::TransferWriteOp>::VectorizePadTensorOpUserPattern; +struct PadOpVectorizationWithTransferWritePattern + : public VectorizePadOpUserPattern { + using VectorizePadOpUserPattern< + vector::TransferWriteOp>::VectorizePadOpUserPattern; - LogicalResult rewriteUser(PatternRewriter &rewriter, PadTensorOp padOp, + LogicalResult rewriteUser(PatternRewriter &rewriter, tensor::PadOp padOp, vector::TransferWriteOp xferOp) const override { // TODO: support 0-d corner case. if (xferOp.getTransferRank() == 0) @@ -925,7 +924,7 @@ /// sizes may turn out to be equal at runtime. bool hasSameTensorSize(Value beforePadding, tensor::ExtractSliceOp afterTrimming) const { - // If the input to PadTensorOp is a CastOp, try with with both CastOp + // If the input to tensor::PadOp is a CastOp, try with with both CastOp // result and CastOp operand. if (auto castOp = beforePadding.getDefiningOp()) if (hasSameTensorSize(castOp.source(), afterTrimming)) @@ -1000,7 +999,7 @@ } }; -/// Rewrite use of PadTensorOp result in InsertSliceOp. E.g.: +/// Rewrite use of tensor::PadOp result in InsertSliceOp. E.g.: /// ``` /// %0 = linalg.pad_tensor %src ... : tensor to tensor<17x5xf32> /// %r = tensor.insert_slice %0 @@ -1023,12 +1022,12 @@ /// - Only unit strides in `insertOp`. /// - Single, scalar padding value. /// - `padOp` result not used as destination. -struct PadTensorOpVectorizationWithInsertSlicePattern - : public VectorizePadTensorOpUserPattern { - using VectorizePadTensorOpUserPattern< - tensor::InsertSliceOp>::VectorizePadTensorOpUserPattern; +struct PadOpVectorizationWithInsertSlicePattern + : public VectorizePadOpUserPattern { + using VectorizePadOpUserPattern< + tensor::InsertSliceOp>::VectorizePadOpUserPattern; - LogicalResult rewriteUser(PatternRewriter &rewriter, PadTensorOp padOp, + LogicalResult rewriteUser(PatternRewriter &rewriter, tensor::PadOp padOp, tensor::InsertSliceOp insertOp) const override { // Low padding must be static 0. if (!padOp.hasZeroLowPad()) @@ -1087,14 +1086,14 @@ } }; -void mlir::linalg::populatePadTensorOpVectorizationPatterns( +void mlir::linalg::populatePadOpVectorizationPatterns( RewritePatternSet &patterns, PatternBenefit baseBenefit) { - patterns.add(patterns.getContext(), - baseBenefit); + patterns.add(patterns.getContext(), + baseBenefit); // Try these specialized patterns first before resorting to the generic one. - patterns.add( + patterns.add( patterns.getContext(), baseBenefit.getBenefit() + 1); } diff --git a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt --- a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt @@ -12,5 +12,6 @@ MLIRSCF MLIRPass MLIRStandard + MLIRTensorUtils MLIRTransformUtils ) diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -23,6 +23,7 @@ #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/StandardOps/Utils/Utils.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tensor/Utils/Utils.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineExprVisitor.h" @@ -328,7 +329,7 @@ // Exit if `source` is not defined by an ExtractSliceOp. auto sliceOp = source.getDefiningOp(); if (!sliceOp) - return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b); + return tensor::createPadHighOp(type, source, pad, nofold, loc, b); // Search the `source` use-def chain for padded LinalgOps. Value current = sliceOp.source(); @@ -339,22 +340,22 @@ OpResult opResult = current.cast(); current = linalgOp.getOutputOperand(opResult.getResultNumber())->get(); } - auto padTensorOp = current ? current.getDefiningOp() : nullptr; + auto padTensorOp = current ? current.getDefiningOp() : nullptr; - // Exit if the search fails to match a PadTensorOp at the end of the matched + // Exit if the search fails to match a tensor::PadOp at the end of the matched // LinalgOp sequence. if (!padTensorOp) - return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b); + return tensor::createPadHighOp(type, source, pad, nofold, loc, b); // Exit if the padded result type does not match. if (sliceOp.source().getType() != type) - return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b); + return tensor::createPadHighOp(type, source, pad, nofold, loc, b); // Exit if the LinalgOps are not high padded. if (llvm::any_of(padTensorOp.getMixedLowPad(), [](OpFoldResult ofr) { return getConstantIntValue(ofr) != static_cast(0); })) - return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b); + return tensor::createPadHighOp(type, source, pad, nofold, loc, b); // Exit if `padTensorOpSliceOp`, which defines the slice used by // `padTensorOp`, is rank-reducing. @@ -362,7 +363,7 @@ padTensorOp.source().getDefiningOp(); if (!padTensorOpSliceOp || sliceOp.getMixedSizes().size() != padTensorOpSliceOp.getMixedSizes().size()) - return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b); + return tensor::createPadHighOp(type, source, pad, nofold, loc, b); // Exit if the sizes of the dynamic sizes of `sliceOp` do not match the size // of the slice padded by `padTensorOp`. @@ -372,7 +373,7 @@ return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it)); })) - return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b); + return tensor::createPadHighOp(type, source, pad, nofold, loc, b); // Exit if the padding values do not match. Attribute padTensorOpPadAttr, padAttr; @@ -380,7 +381,7 @@ if (!padTensorOpPad || !matchPattern(padTensorOpPad, m_Constant(&padTensorOpPadAttr)) || !matchPattern(pad, m_Constant(&padAttr)) || padTensorOpPadAttr != padAttr) - return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b); + return tensor::createPadHighOp(type, source, pad, nofold, loc, b); // Return the padded result if the padding values and sizes match. return sliceOp.source(); diff --git a/mlir/lib/Dialect/Tensor/CMakeLists.txt b/mlir/lib/Dialect/Tensor/CMakeLists.txt --- a/mlir/lib/Dialect/Tensor/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(IR) add_subdirectory(Transforms) +add_subdirectory(Utils) diff --git a/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt b/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt --- a/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt @@ -2,6 +2,7 @@ TensorDialect.cpp TensorInferTypeOpInterfaceImpl.cpp TensorOps.cpp + TensorTilingInterfaceImpl.cpp ) add_mlir_dialect_library(MLIRTensor @@ -43,3 +44,20 @@ MLIRSupport MLIRTensor ) + +add_mlir_dialect_library(MLIRTensorTilingInterfaceImpl + TensorTilingInterfaceImpl.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor + + LINK_LIBS PUBLIC + MLIRAffine + MLIRIR + MLIRLinalg + MLIRSCF + MLIRStandard + MLIRSupport + MLIRTensor + MLIRTilingInterface + ) diff --git a/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp --- a/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp @@ -161,6 +161,48 @@ } }; +namespace { + +struct ReifyPadOp + : public ReifyRankedShapedTypeOpInterface::ExternalModel { + LogicalResult + reifyResultShapes(Operation *op, OpBuilder &b, + ReifiedRankedShapedTypeDims &reifiedReturnShapes) const { + auto padOp = cast(op); + Location loc = padOp.getLoc(); + auto lowPad = padOp.getMixedLowPad(); + auto highPad = padOp.getMixedHighPad(); + SmallVector shapes; + for (auto dim : llvm::seq(0, padOp.getSourceType().getRank())) { + // Shape along each dimension is source dim + low pad + high pad. + SmallVector mapOperands; + mapOperands.push_back( + b.createOrFold(loc, padOp.source(), dim)); + AffineExpr expr = b.getAffineDimExpr(0); + unsigned numSymbols = 0; + auto addOpFoldResult = [&](OpFoldResult valueOrAttr) { + if (Value v = valueOrAttr.dyn_cast()) { + expr = expr + b.getAffineSymbolExpr(numSymbols++); + mapOperands.push_back(v); + return; + } + int64_t staticValue = + valueOrAttr.get().cast().getInt(); + expr = expr + staticValue; + }; + addOpFoldResult(lowPad[dim]); + addOpFoldResult(highPad[dim]); + shapes.push_back(applyMapToValues( + b, loc, AffineMap::get(1, numSymbols, expr), mapOperands)[0]); + } + reifiedReturnShapes.emplace_back(std::move(shapes)); + return success(); + } +}; + +} // namespace + void mlir::tensor::registerInferTypeOpInterfaceExternalModels( DialectRegistry ®istry) { registry @@ -169,4 +211,5 @@ registry .addOpInterface>(); + registry.addOpInterface(); } diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -476,6 +476,7 @@ // Ensure that the region yields an element of the right type. auto yieldOp = llvm::cast(op.body().getBlocks().front().getTerminator()); + if (yieldOp.value().getType() != resultTy.getElementType()) return op.emitOpError( "body must be terminated with a `yield` operation of the tensor " @@ -1481,6 +1482,258 @@ sizes, strides); } +//===----------------------------------------------------------------------===// +// PadOp +//===----------------------------------------------------------------------===// + +// TODO: Replace custom directive with AllTypesMatch as soon as it +// supports optional types. +void printInferType(OpAsmPrinter &printer, Operation *op, Value optOperand, + Type typeToInfer, Type typeToInferFrom) {} + +ParseResult parseInferType(OpAsmParser &parser, + Optional optOperand, + Type &typeToInfer, Type typeToInferFrom) { + if (optOperand) + typeToInfer = typeToInferFrom; + return success(); +} + +static LogicalResult verify(PadOp op) { + auto sourceType = op.source().getType().cast(); + auto resultType = op.result().getType().cast(); + auto expectedType = PadOp::inferResultType( + sourceType, extractFromI64ArrayAttr(op.static_low()), + extractFromI64ArrayAttr(op.static_high())); + for (int i = 0, e = sourceType.getRank(); i < e; ++i) { + if (resultType.getDimSize(i) == expectedType.getDimSize(i)) + continue; + if (expectedType.isDynamicDim(i)) + continue; + return op.emitError("specified type ") + << resultType << " does not match the inferred type " + << expectedType; + } + + auto ®ion = op.region(); + unsigned rank = resultType.getRank(); + Block &block = region.front(); + if (block.getNumArguments() != rank) + return op.emitError("expected the block to have ") << rank << " arguments"; + + // Note: the number and type of yield values are checked in the YieldOp. + for (const auto &en : llvm::enumerate(block.getArgumentTypes())) { + if (!en.value().isIndex()) + return op.emitOpError("expected block argument ") + << (en.index() + 1) << " to be an index"; + } + + // Ensure that the region yields an element of the right type. + auto yieldOp = llvm::cast(block.getTerminator()); + if (yieldOp.value().getType() != + op.getType().cast().getElementType()) + return op.emitOpError("expected yield type to match shape element type"); + + return success(); +} + +RankedTensorType PadOp::inferResultType(RankedTensorType sourceType, + ArrayRef staticLow, + ArrayRef staticHigh, + ArrayRef resultShape) { + unsigned rank = sourceType.getRank(); + assert(staticLow.size() == rank && "unexpected staticLow size mismatch"); + assert(staticHigh.size() == rank && "unexpected staticHigh size mismatch"); + assert((resultShape.empty() || resultShape.size() == rank) && + "unexpected resultShape size mismatch"); + + SmallVector inferredShape; + for (auto i : llvm::seq(0, rank)) { + if (sourceType.isDynamicDim(i) || + staticLow[i] == ShapedType::kDynamicSize || + staticHigh[i] == ShapedType::kDynamicSize) { + inferredShape.push_back(resultShape.empty() ? ShapedType::kDynamicSize + : resultShape[i]); + } else { + int64_t size = sourceType.getDimSize(i) + staticLow[i] + staticHigh[i]; + assert((resultShape.empty() || size == resultShape[i] || + resultShape[i] == ShapedType::kDynamicSize) && + "mismatch between inferred shape and result shape"); + inferredShape.push_back(size); + } + } + + return RankedTensorType::get(inferredShape, sourceType.getElementType()); +} + +void PadOp::build(OpBuilder &b, OperationState &result, Value source, + ArrayRef staticLow, ArrayRef staticHigh, + ValueRange low, ValueRange high, bool nofold, + ArrayRef attrs) { + auto sourceType = source.getType().cast(); + auto resultType = inferResultType(sourceType, staticLow, staticHigh); + build(b, result, resultType, source, low, high, b.getI64ArrayAttr(staticLow), + b.getI64ArrayAttr(staticHigh), nofold ? b.getUnitAttr() : UnitAttr()); + result.addAttributes(attrs); +} + +void PadOp::build(OpBuilder &b, OperationState &result, Value source, + ValueRange low, ValueRange high, bool nofold, + ArrayRef attrs) { + auto sourceType = source.getType().cast(); + unsigned rank = sourceType.getRank(); + SmallVector staticVector(rank, ShapedType::kDynamicSize); + build(b, result, source, staticVector, staticVector, low, high, nofold, + attrs); +} + +void PadOp::build(OpBuilder &b, OperationState &result, Type resultType, + Value source, ArrayRef low, + ArrayRef high, bool nofold, + ArrayRef attrs) { + assert(resultType.isa()); + auto sourceType = source.getType().cast(); + SmallVector dynamicLow, dynamicHigh; + SmallVector staticLow, staticHigh; + // staticLow and staticHigh have full information of the padding config. + // This will grow staticLow and staticHigh with 1 value. If the config is + // dynamic (ie not a constant), dynamicLow and dynamicHigh will grow with 1 + // value as well. + dispatchIndexOpFoldResults(low, dynamicLow, staticLow, + ShapedType::kDynamicSize); + dispatchIndexOpFoldResults(high, dynamicHigh, staticHigh, + ShapedType::kDynamicSize); + if (!resultType) { + resultType = PadOp::inferResultType(sourceType, staticLow, staticHigh); + } + build(b, result, resultType, source, dynamicLow, dynamicHigh, + b.getI64ArrayAttr(staticLow), b.getI64ArrayAttr(staticHigh), + nofold ? b.getUnitAttr() : UnitAttr()); + result.addAttributes(attrs); +} + +namespace { +// Folds tensor.pad when padding is static zeros and the attribute +// doesn't request otherwise. +struct FoldStaticZeroPadding : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(PadOp padTensorOp, + PatternRewriter &rewriter) const override { + if (!padTensorOp.hasZeroLowPad() || !padTensorOp.hasZeroHighPad()) + return failure(); + if (padTensorOp.nofold()) + return failure(); + rewriter.replaceOpWithNewOp( + padTensorOp, padTensorOp.result().getType(), padTensorOp.source()); + return success(); + } +}; + +// Fold CastOp into PadOp when adding static information. +struct FoldSourceTensorCast : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(PadOp padTensorOp, + PatternRewriter &rewriter) const override { + auto castOp = padTensorOp.source().getDefiningOp(); + if (!tensor::canFoldIntoConsumerOp(castOp)) + return failure(); + + auto newResultType = PadOp::inferResultType( + castOp.source().getType().cast(), + extractFromI64ArrayAttr(padTensorOp.static_low()), + extractFromI64ArrayAttr(padTensorOp.static_high()), + padTensorOp.getResultType().getShape()); + + if (newResultType == padTensorOp.getResultType()) { + rewriter.updateRootInPlace(padTensorOp, [&]() { + padTensorOp.sourceMutable().assign(castOp.source()); + }); + } else { + auto newOp = rewriter.create( + padTensorOp->getLoc(), newResultType, padTensorOp.source(), + padTensorOp.low(), padTensorOp.high(), padTensorOp.static_low(), + padTensorOp.static_high(), padTensorOp.nofold()); + BlockAndValueMapping mapper; + padTensorOp.getRegion().cloneInto(&newOp.getRegion(), mapper); + + rewriter.replaceOpWithNewOp( + padTensorOp, padTensorOp.getResultType(), newOp); + } + return success(); + } +}; + +// Fold CastOp using the result of PadOp back into the latter if it adds +// static information. +struct FoldTargetTensorCast : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(PadOp padTensorOp, + PatternRewriter &rewriter) const override { + if (!padTensorOp.result().hasOneUse()) + return failure(); + auto tensorCastOp = + dyn_cast(*padTensorOp->getUsers().begin()); + if (!tensorCastOp) + return failure(); + if (!tensor::preservesStaticInformation(padTensorOp.result().getType(), + tensorCastOp.dest().getType())) + return failure(); + + auto replacementOp = rewriter.create( + padTensorOp.getLoc(), tensorCastOp.dest().getType(), + padTensorOp.source(), padTensorOp.low(), padTensorOp.high(), + padTensorOp.static_low(), padTensorOp.static_high(), + padTensorOp.nofold()); + replacementOp.region().takeBody(padTensorOp.region()); + + rewriter.replaceOp(padTensorOp, replacementOp.result()); + rewriter.replaceOp(tensorCastOp, replacementOp.result()); + return success(); + } +}; +} // namespace + +void PadOp::getCanonicalizationPatterns(RewritePatternSet &results, + MLIRContext *context) { + results + .add( + context); +} + +/// Return the padding value of the PadOp if it constant. In this context, +/// "constant" means an actual constant or "defined outside of the block". +/// +/// Values are considered constant in three cases: +/// - A ConstantLike value. +/// - A basic block argument from a different block. +/// - A value defined outside of the block. +/// +/// If the padding value is not constant, an empty Value is returned. +Value PadOp::getConstantPaddingValue() { + auto yieldOp = dyn_cast(getRegion().front().getTerminator()); + if (!yieldOp) + return {}; + Value padValue = yieldOp.value(); + // Check if yield value is a constant. + if (matchPattern(padValue, m_Constant())) + return padValue; + // Check if yield value is defined inside the PadOp block. + if (padValue.getParentBlock() == &getRegion().front()) + return {}; + // Else: Yield value defined outside of the PadOp block. + return padValue; +} + +OpFoldResult PadOp::fold(ArrayRef) { + if (getResultType().hasStaticShape() && getResultType() == getSourceType() && + !nofold()) + return source(); + return {}; +} + //===----------------------------------------------------------------------===// // TableGen'd op method definitions //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp @@ -0,0 +1,279 @@ +//===- TensorTilingInterface.cpp - Tiling Interface models *- C++ ------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Dialect/StandardOps/Utils/Utils.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Interfaces/TilingInterface.h" + +using namespace mlir; +using namespace mlir::tensor; + +namespace { + +struct PadOpTiling : public TilingInterface::ExternalModel { + + SmallVector getDestinationOperands(Operation *op, OpBuilder &b) const { + ReifiedRankedShapedTypeDims reifiedShapes; + ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface = + dyn_cast(op); + (void)reifyShapedTypeInterface.reifyResultShapes(b, reifiedShapes); + + auto padOp = cast(op); + SmallVector mixedSizes = getAsOpFoldResult(reifiedShapes[0]); + Value initTensor = b.create( + op->getLoc(), mixedSizes, padOp.getResultType().getElementType()); + return {initTensor}; + } + + SmallVector getLoopIteratorTypes(Operation *op) const { + auto padOp = cast(op); + SmallVector iteratorTypes(padOp.getResultType().getRank(), + getParallelIteratorTypeName()); + return iteratorTypes; + } + + SmallVector getIterationDomain(Operation *op, OpBuilder &b) const { + ReifiedRankedShapedTypeDims reifiedShapes; + ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface = + dyn_cast(op); + (void)reifyShapedTypeInterface.reifyResultShapes(b, reifiedShapes); + + Location loc = op->getLoc(); + Value zero = b.create(loc, 0); + Value one = b.create(loc, 1); + // Initialize all the ranges to {zero, one, one}. All the `ub`s are + // overwritten. + SmallVector loopRanges(reifiedShapes[0].size(), {zero, one, one}); + for (const auto &ub : enumerate(reifiedShapes[0])) + loopRanges[ub.index()].size = ub.value(); + return loopRanges; + } + + SmallVector + getTiledImplementation(Operation *op, OpBuilder &b, ValueRange dest, + ArrayRef offsets, + ArrayRef sizes, + bool /*tileDestOperands*/) const { + auto padOp = cast(op); + // Only constant padding value supported. + Value padValue = padOp.getConstantPaddingValue(); + if (!padValue) + return {}; + + // Helper variables and functions for various arithmetic operations. These + // are used extensively for computing new offset/length and padding values. + Location loc = op->getLoc(); + AffineExpr dim0, dim1; + bindDims(b.getContext(), dim0, dim1); + // Add two integers. + auto addMap = AffineMap::get(2, 0, {dim0 + dim1}); + auto add = [&](Value v1, Value v2) { + return b.createOrFold(loc, addMap, ValueRange{v1, v2}); + }; + // Subtract two integers. + auto subMap = AffineMap::get(2, 0, {dim0 - dim1}); + auto sub = [&](Value v1, Value v2) { + return b.createOrFold(loc, subMap, ValueRange{v1, v2}); + }; + // Take the minimum of two integers. + auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext()); + auto min = [&](Value v1, Value v2) { + return b.createOrFold(loc, idMap, ValueRange{v1, v2}); + }; + // Take the maximum of two integers. + auto max = [&](Value v1, Value v2) { + return b.createOrFold(loc, idMap, ValueRange{v1, v2}); + }; + // Zero index-typed integer. + auto zero = b.create(loc, 0); + + // Helper function for filling static/dynamic low/high padding indices + // vectors of PadOp. + auto appendIndex = [&](Value val, SmallVector &dynIndices, + SmallVector &staticIndices) { + if (auto constInt = getConstantIntValue(val)) { + staticIndices.push_back(*constInt); + } else { + staticIndices.push_back(ShapedType::kDynamicSize); + dynIndices.push_back(val); + } + }; + + // Compute new offsets, lengths, low padding, high padding. + SmallVector newOffsets, newLengths, newStrides; + SmallVector newLows, newHighs; + SmallVector staticNewLows, staticNewHighs; + // Set to true if the original data source is not read at all. + bool hasZeroLen = false; + // Same as hasZeroLen, but for dynamic dimension sizes. This condition + // is true if the original data source turns out to be unused at runtime. + Value dynHasZeroLenCond; + + int64_t rank = padOp.getSourceType().getRank(); + for (unsigned dim = 0; dim < rank; ++dim) { + auto low = + getValueOrCreateConstantIndexOp(b, loc, padOp.getMixedLowPad()[dim]); + bool hasLowPad = getConstantIntValue(low) != static_cast(0); + auto high = + getValueOrCreateConstantIndexOp(b, loc, padOp.getMixedHighPad()[dim]); + bool hasHighPad = getConstantIntValue(high) != static_cast(0); + auto offset = getValueOrCreateConstantIndexOp(b, loc, offsets[dim]); + auto length = getValueOrCreateConstantIndexOp(b, loc, sizes[dim]); + auto srcSize = b.createOrFold(loc, padOp.source(), dim); + + // The new amount of low padding is `low - offset`. Except for the case + // where none of the low padding is read. In that case, the new amount of + // low padding is zero. + // + // Optimization: If low = 0, then newLow = 0. + Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero; + appendIndex(newLow, newLows, staticNewLows); + + // Start reading the data from position `offset - low`. Since the original + // read may have started in the low padding zone, this value could be + // negative. Therefore, start reading from: + // + // max(offset - low, 0) + // + // The original read could also have started in the high padding zone. + // In that case, set the offset to the end of source tensor. The new + // ExtractSliceOp length will be zero in that case. (Effectively reading + // no data from the source.) + // + // Optimization: If low = 0, then the formula can be simplified. + Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize) + : min(offset, srcSize); + newOffsets.push_back(getAsOpFoldResult(newOffset)); + + // The original ExtractSliceOp was reading until position `offset + + // length`. Therefore, the corresponding position within the source tensor + // is: + // + // offset + length - low + // + // In case the original ExtractSliceOp stopped reading within the low + // padding zone, this value can be negative. In that case, the end + // position of the read should be zero. (Similar to newOffset.) + // + // The original read could also have stopped in the high padding zone. + // In that case, set the end positition of the read should be the end of + // the source tensor. (Similar to newOffset.) + // + // endLoc = min(max(offset - low + length, 0), srcSize) + // + // The new ExtractSliceOp length is `endLoc - newOffset`. + // + // Optimization: If low = 0, then the formula can be simplified. + Value endLoc = + hasLowPad ? min(max(add(sub(offset, low), length), zero), srcSize) + : min(add(offset, length), srcSize); + Value newLength = sub(endLoc, newOffset); + newLengths.push_back(getAsOpFoldResult(newLength)); + + // Check if newLength is zero. In that case, no SubTensorOp should be + // executed. + if (auto newLengthInt = getConstantIntValue(newLength)) { + hasZeroLen |= *newLengthInt == 0; + } else { + Value check = b.create(loc, arith::CmpIPredicate::eq, + newLength, zero); + dynHasZeroLenCond = + dynHasZeroLenCond + ? b.create(loc, check, dynHasZeroLenCond) + : check; + } + + // The amount of high padding is simply the number of elements remaining, + // so that the result has the same length as the original ExtractSliceOp. + // As an optimization, if the original high padding is zero, then the new + // high padding must also be zero. + Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero; + appendIndex(newHigh, newHighs, staticNewHighs); + + // Only unit stride supported. + newStrides.push_back(b.getIndexAttr(1)); + } + + // The shape of the result can be obtained from the sizes passed in. + SmallVector dynDims; + SmallVector shape; + dispatchIndexOpFoldResults(sizes, dynDims, shape, ShapedType::kDynamicSize); + RankedTensorType resultType = + RankedTensorType::get(shape, padOp.getResultType().getElementType()); + + // Insert cast to ensure that types match. (May be folded away.) + auto castResult = [&](Value val) -> Operation * { + auto castOp = b.create(loc, resultType, val); + return castOp; + }; + + // In cases where the original data source is unused: Emit a GenerateOp and + // do not generate a SliceOp. (The result shape of the SliceOp would + // have a dimension of size 0, the semantics of which is unclear.) + auto createGenerateOp = [&]() { + // Create GenerateOp. + auto generateOp = b.create( + loc, resultType, dynDims, + [&](OpBuilder &builder, Location gLoc, ValueRange indices) { + builder.create(gLoc, padValue); + }); + return castResult(generateOp); + }; + + // Emit a SliceOp and a PadOp. Should not be used in cases where + // the result shape of the new SliceOp has a zero dimension. + auto createPadTensorOfSubTensor = [&]() { + // Create pad_tensor(subtensor(x)). + auto newSliceOp = b.create( + loc, padOp.source(), newOffsets, newLengths, newStrides); + auto newPadOp = b.create(loc, newSliceOp, staticNewLows, + staticNewHighs, newLows, newHighs); + + // Copy region to new PadOp. + BlockAndValueMapping bvm; + padOp.region().cloneInto(&newPadOp.getRegion(), bvm); + + // Cast result and return. + return castResult(newPadOp); + }; + + // Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known + // that the original data source x is not used. + if (hasZeroLen) + return {createGenerateOp()}; + + // If there are dynamic dimensions: Generate an scf.if check to avoid + // creating SliceOps with result dimensions of size 0 at runtime. + if (dynHasZeroLenCond) { + auto result = b.create( + loc, resultType, dynHasZeroLenCond, + /*thenBuilder=*/ + [&](OpBuilder &b, Location loc) { + b.create(loc, createGenerateOp()->getResult(0)); + }, + /*elseBuilder=*/ + [&](OpBuilder &b, Location loc) { + b.create(loc, + createPadTensorOfSubTensor()->getResult(0)); + }); + return {result}; + } + return {createPadTensorOfSubTensor()}; + } +}; + +} // namespace + +void mlir::tensor::registerTilingOpInterfaceExternalModels( + DialectRegistry ®istry) { + registry.addOpInterface(); +} diff --git a/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt @@ -0,0 +1,12 @@ +add_mlir_dialect_library(MLIRTensorUtils + Utils.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor + + LINK_LIBS PUBLIC + MLIRAffine + MLIRArithmetic + MLIRIR + MLIRTensor +) diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp @@ -0,0 +1,53 @@ +//===- Utils.cpp - Utilities to support the Tensor dialect ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements utilities for the Tensor dialect. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Tensor/Utils/Utils.h" + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" + +using namespace mlir; +using namespace mlir::tensor; + +PadOp mlir::tensor::createPadScalarOp(Type type, Value source, Value pad, + ArrayRef low, + ArrayRef high, bool nofold, + Location loc, OpBuilder &builder) { + auto padTensorOp = + builder.create(loc, type, source, low, high, nofold); + int rank = padTensorOp.getResultType().getRank(); + SmallVector blockArgTypes(rank, builder.getIndexType()); + auto ®ion = padTensorOp.region(); + // `builder.createBlock` changes the insertion point within the block. Create + // a guard to reset the insertion point of the builder after it is destroyed. + OpBuilder::InsertionGuard guard(builder); + builder.createBlock(®ion, region.end(), blockArgTypes); + builder.create(loc, pad); + return padTensorOp; +} + +PadOp mlir::tensor::createPadHighOp(Type type, Value source, Value pad, + bool nofold, Location loc, OpBuilder &b) { + SmallVector low, high; + auto rankedTensorType = type.cast(); + assert(rankedTensorType.hasStaticShape()); + for (const auto &en : enumerate(rankedTensorType.getShape())) { + AffineExpr d0; + bindDims(b.getContext(), d0); + auto dimOp = b.createOrFold(loc, source, en.index()); + Value paddingWidth = + makeComposedAffineApply(b, loc, en.value() - d0, {dimOp}); + high.push_back(paddingWidth); + low.push_back(b.createOrFold(loc, 0)); + } + return createPadScalarOp(type, source, pad, low, high, nofold, loc, b); +} diff --git a/mlir/lib/Interfaces/TilingInterface.cpp b/mlir/lib/Interfaces/TilingInterface.cpp --- a/mlir/lib/Interfaces/TilingInterface.cpp +++ b/mlir/lib/Interfaces/TilingInterface.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "mlir/Interfaces/TilingInterface.h" -#include "mlir/Dialect/Tensor/IR/Tensor.h" using namespace mlir; diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir @@ -153,8 +153,8 @@ // CHECK-LABEL: @max_pool_padded func @max_pool_padded(%arg0: tensor<1x6x34x62xf32>) -> () { // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 : f32 - // CHECK-DAG: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0] - // CHECK-DAG: linalg.yield [[CONST]] + // CHECK-DAG: [[PAD:%.+]] = tensor.pad %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0] + // CHECK-DAG: tensor.yield [[CONST]] // CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32 // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 33, 62] // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[INITVAL]], [[INIT]]) @@ -206,7 +206,7 @@ func @avg_pool(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) { // Initial piece computes the sum of the pooling region, with appropriate padding. // CHECK: [[CONST:%.+]] = arith.constant 0 - // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: [[PAD:%.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] // CHECK: [[CONST:%.+]] = arith.constant 0 // CHECK: [[POOLINIT:%.+]] = linalg.init_tensor [1, 5, 33, 62] // CHECK: [[FILL:%.+]] = linalg.fill([[CONST]], [[POOLINIT]]) @@ -268,7 +268,7 @@ // The calculations remain the same as above, only testing for dyn behavior // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]] - // CHECK: %[[PAD:.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: %[[PAD:.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] // CHECK: %[[POOLINIT:.+]] = linalg.init_tensor [%[[BATCH]], 5, 33, 62] // CHECK: %[[FILL:.+]] = linalg.fill // CHECK: %[[KERNEL:.+]] = linalg.init_tensor [4, 4] @@ -386,8 +386,8 @@ // CHECK-LABEL: @conv2d_padded_f32 func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28x3x3x28xf32>, %bias: tensor<28xf32>) -> () { // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: linalg.yield %[[C0]] + // CHECK: tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: tensor.yield %[[C0]] // CHECK: linalg.conv_2d_nhwc_hwcf %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [1, 1, 1, 1], stride = [1, 1], dilation = [2, 1]} : (tensor<1x47x40x28xf32>, tensor<28x3x3x28xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>) return @@ -398,8 +398,8 @@ // CHECK-LABEL: @conv2d_quant func @conv2d_quant(%arg0 : tensor<1x12x12x1xi8>, %arg1 : tensor<1024x3x3x1xi8>, %arg2 : tensor<1024xi32>) -> () { // CHECK: %[[C22:.+]] = arith.constant -22 - // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: linalg.yield %[[C22]] + // CHECK: tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: tensor.yield %[[C22]] // CHECK: linalg.conv_2d_nhwc_hwcf_q %0 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = [1, 1], pad = [1, 1, 1, 1], quantization_info = {input_zp = -22 : i32, weight_zp = 42 : i32}, stride = [1, 1]} : (tensor<1x12x12x1xi8>, tensor<1024x3x3x1xi8>, tensor<1024xi32>) -> tensor<1x12x12x1024xi32> return @@ -481,8 +481,8 @@ // CHECK-LABEL: @depthwise_conv_quant func @depthwise_conv_quant(%arg0 : tensor<1x12x12x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () { // CHECK: [[PADV:%.+]] = arith.constant -128 - // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: linalg.yield [[PADV]] + // CHECK: [[PAD:%.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: tensor.yield [[PADV]] // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 12, 12, 4, 128] // CHECK: [[CST0:%.+]] = arith.constant 0 diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -1158,9 +1158,9 @@ // CHECK-DAG: [[INDEX3:%.+]] = arith.constant 3 : index // CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index // CHECK-DAG: [[CST:%.+]] = arith.constant 0.000000e+00 : f32 - // CHECK: linalg.pad_tensor %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] { + // CHECK: tensor.pad %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] { // CHECK: ^bb0(%arg1: index, %arg2: index): - // CHECK: linalg.yield [[CST]] + // CHECK: tensor.yield [[CST]] // CHECK: } : tensor<1x2xf32> to tensor<4x9xf32> %1 = "tosa.pad"(%arg0, %0) : (tensor<1x2xf32>, tensor<2x2xi32>) -> (tensor<4x9xf32>) return %1 : tensor<4x9xf32> @@ -1169,8 +1169,8 @@ func @pad_int(%arg0 : tensor<1x2xi32>) -> (tensor<4x9xi32>) { %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32> // CHECK: [[CST:%.+]] = arith.constant 0 : i32 - // CHECK: linalg.pad_tensor - // CHECK: linalg.yield [[CST]] + // CHECK: tensor.pad + // CHECK: tensor.yield [[CST]] %1 = "tosa.pad"(%arg0, %0) : (tensor<1x2xi32>, tensor<2x2xi32>) -> (tensor<4x9xi32>) return %1 : tensor<4x9xi32> } @@ -1178,8 +1178,8 @@ func @pad_quant(%arg0 : tensor<1x2xi32>) -> (tensor<4x9xi32>) { %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32> // CHECK: [[CST:%.+]] = arith.constant 42 : i32 - // CHECK: linalg.pad_tensor - // CHECK: linalg.yield [[CST]] + // CHECK: tensor.pad + // CHECK: tensor.yield [[CST]] %1 = "tosa.pad"(%arg0, %0) { quantization_info = { input_zp = 42 : i32}} : (tensor<1x2xi32>, tensor<2x2xi32>) -> (tensor<4x9xi32>) return %1 : tensor<4x9xi32> } @@ -1194,9 +1194,9 @@ // CHECK-DAG: [[INDEX3:%.+]] = arith.constant 3 : index // CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index // CHECK-DAG: [[CST:%.+]] = arith.constant 4.200000e+01 : f32 - // CHECK: linalg.pad_tensor %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] { + // CHECK: tensor.pad %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] { // CHECK: ^bb0(%arg1: index, %arg2: index): - // CHECK: linalg.yield [[CST]] + // CHECK: tensor.yield [[CST]] // CHECK: } : tensor<1x2xf32> to tensor<4x9xf32> %1 = arith.constant dense<42.0> : tensor %2 = "tosa.pad"(%arg0, %0, %1) : (tensor<1x2xf32>, tensor<2x2xi32>, tensor) -> (tensor<4x9xf32>) diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir --- a/mlir/test/Dialect/Linalg/bufferize.mlir +++ b/mlir/test/Dialect/Linalg/bufferize.mlir @@ -277,9 +277,9 @@ func @pad_tensor_dynamic_shape(%arg0: tensor<4x?x2x?xf32>, %arg1: index) -> tensor<4x?x?x?xf32> { %c0 = arith.constant 0 : index %cst = arith.constant 0.0 : f32 - %out = linalg.pad_tensor %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1] { + %out = tensor.pad %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1] { ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index, %gen_arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<4x?x2x?xf32> to tensor<4x?x?x?xf32> return %out : tensor<4x?x?x?xf32> } diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -282,7 +282,7 @@ // CHECK-NOT: linalg.fill // CHECK-NOT: linalg.matmul // CHECK-NOT: linalg.generic -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK: return func @dead_linalg_tensor(%arg0 : tensor<7x7xi32>, %arg1 : tensor<7x7xf32>, %arg2: tensor, %high : index) { @@ -296,146 +296,15 @@ ^bb(%3: i32) : linalg.yield %3 : i32 } -> tensor<7x7xi32> - %3 = linalg.pad_tensor %arg2 low[%c0, %c0] high[%high, %high] { - ^bb0(%arg9: index, %arg10: index): - linalg.yield %cst : f32 + %3 = tensor.pad %arg2 low[%c0, %c0] high[%high, %high] { + ^bb0(%arg9: index, %arg10: index): + tensor.yield %cst : f32 } : tensor to tensor<2x4xf32> return } // ----- -// CHECK-LABEL: func @pad_tensor_same_static_shape( -// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: linalg.pad_tensor -// CHECK: return %[[ARG0]] -func @pad_tensor_same_static_shape(%arg0: tensor<5x6xf32>, %a: index) - -> tensor<5x6xf32> { - %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.pad_tensor %arg0 low[%a, 0] high[0, %a] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %cst : f32 - } : tensor<5x6xf32> to tensor<5x6xf32> - return %0 : tensor<5x6xf32> -} - -// ----- - -// CHECK-LABEL: func @pad_tensor_nofold_same_static_shape( -// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK: %[[PAD:.*]] = linalg.pad_tensor -// CHECK: return %[[PAD]] -func @pad_tensor_nofold_same_static_shape(%arg0: tensor<5x6xf32>, %a: index) - -> tensor<5x6xf32> { - %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.pad_tensor %arg0 nofold low[%a, 0] high[0, %a] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %cst : f32 - } : tensor<5x6xf32> to tensor<5x6xf32> - return %0 : tensor<5x6xf32> -} - -// ----- - -// CHECK-LABEL: func @pad_tensor_after_cast_different_shape( -// CHECK-SAME: %[[INPUT:.*]]: tensor) -> tensor { -// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[PADDED:.*]] = linalg.pad_tensor %[[INPUT]] -// CHECK-SAME: low[0, 0, 1, 1] high[0, 0, 1, 1] { -// CHECK: ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index): -// CHECK: linalg.yield %[[CST]] : f32 -// CHECK: } : tensor to tensor -// CHECK: %[[DYNAMIC:.*]] = tensor.cast %[[PADDED:.*]] : -// CHECK-SAME: tensor to tensor -// CHECK: return %[[DYNAMIC]] : tensor -// CHECK: } -func @pad_tensor_after_cast_different_shape(%arg0: tensor) - -> tensor { - %cst = arith.constant 0.000000e+00 : f32 - %dynamic = tensor.cast %arg0 : tensor to tensor - %padded = linalg.pad_tensor %dynamic low[0, 0, 1, 1] high[0, 0, 1, 1] { - ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): - linalg.yield %cst: f32 - } : tensor to tensor - return %padded: tensor -} - -// ----- - -// CHECK-LABEL: func @pad_tensor_after_cast_same_shape( -// CHECK-SAME: %[[INPUT:.*]]: tensor, -// CHECK-SAME: %[[PADDING:.*]]: index) -> tensor { -// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[PADDED:.*]] = linalg.pad_tensor %[[INPUT]] -// CHECK-SAME: low[0, %[[PADDING]], 1, 1] high[0, %[[PADDING]], 1, 1] { -// CHECK: ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index): -// CHECK: linalg.yield %[[CST]] : f32 -// CHECK: } : tensor to tensor -// CHECK: return %[[PADDED:.*]] : tensor -// CHECK: } -func @pad_tensor_after_cast_same_shape(%arg0: tensor, %padding : index) - -> tensor { - %cst = arith.constant 0.000000e+00 : f32 - %dynamic = tensor.cast %arg0 : tensor to tensor - %padded = linalg.pad_tensor %dynamic low[0, %padding, 1, 1] high[0, %padding, 1, 1] { - ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): - linalg.yield %cst: f32 - } : tensor to tensor - return %padded: tensor -} - -// ----- - -// CHECK-LABEL: func @pad_tensor_of_cast( -// CHECK-NOT: tensor.cast -// CHECK: linalg.pad_tensor -// CHECK: tensor<8x?xf32> to tensor<8x32xf32> -func @pad_tensor_of_cast(%t: tensor<8x?xf32>, %s: index) -> tensor<8x32xf32> { - %c0 = arith.constant 0 : index - %cst = arith.constant 0.000000e+00 : f32 - %0 = tensor.cast %t : tensor<8x?xf32> to tensor - %1 = linalg.pad_tensor %0 low[%c0, %c0] high[%c0, %s] { - ^bb0(%arg9: index, %arg10: index): - linalg.yield %cst : f32 - } : tensor to tensor<8x32xf32> - return %1 : tensor<8x32xf32> -} - -// ----- - -// CHECK-LABEL: @cast_of_pad_more_static -func @cast_of_pad_more_static(%arg0: tensor, %padding: index) -> tensor<32x32xf32> { - %cst = arith.constant 0.000000e+00 : f32 - // CHECK: %[[PAD:.*]] = linalg.pad_tensor - // CHECK: tensor to tensor<32x32xf32> - %padded = linalg.pad_tensor %arg0 low[%padding, %padding] high[0, 0] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %cst : f32 - } : tensor to tensor - // CHECK-NOT: tensor.cast - %casted = tensor.cast %padded : tensor to tensor<32x32xf32> - // CHECK: return %[[PAD]] - return %casted : tensor<32x32xf32> -} - -// ----- - -// CHECK-LABEL: @cast_of_pad_less_static -func @cast_of_pad_less_static(%arg0: tensor<32x?x?xf32>, %padding: index) -> tensor { - %cst = arith.constant 0.000000e+00 : f32 - // CHECK: linalg.pad_tensor - %padded = linalg.pad_tensor %arg0 low[%padding, %padding, %padding] high[0, 0, 0] { - ^bb0(%arg1: index, %arg2: index, %arg3: index): - linalg.yield %cst : f32 - } : tensor<32x?x?xf32> to tensor<32x?x?xf32> - // CHECK: %[[CAST:.*]] = tensor.cast - %casted = tensor.cast %padded : tensor<32x?x?xf32> to tensor - // CHECK: return %[[CAST]] - return %casted : tensor -} - -// ----- - func @propogate_casts(%arg0 : tensor, %arg1 : f32, %arg2 : index, %arg3 : index) -> tensor { %c0 = arith.constant 0 : index @@ -579,71 +448,6 @@ // ----- -func @tensor_pad_cast_fold(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> { - %c0 = arith.constant 0 : index - %cst = arith.constant 0.0 : f32 - %0 = tensor.cast %arg0 : tensor<4x4xf32> to tensor - %1 = linalg.pad_tensor %0 low[%c0, %c0] high[%c0, %c0] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %cst : f32 - } : tensor to tensor<4x4xf32> - return %1 : tensor<4x4xf32> -} -// CHECK-LABEL: @tensor_pad_cast -// CHECK-SAME: %[[ARG0:.+]]: tensor<4x4xf32> -// CHECK: return %[[ARG0]] - -// ----- - -// CHECK-LABEL: func @fold_pad_tensor_source_cast( -// CHECK-SAME: %[[ARG0:.*]]: tensor<4x?xf32> -// CHECK-NOT: tensor.cast -// CHECK: %[[RESULT:.*]] = linalg.pad_tensor %[[ARG0]] -func @fold_pad_tensor_source_cast(%arg0: tensor<4x?xf32>) -> tensor<4x4xf32> { - %cst = arith.constant 0.0 : f32 - %0 = tensor.cast %arg0 : tensor<4x?xf32> to tensor - %1 = linalg.pad_tensor %0 low[0, 0] high[0, 1] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %cst : f32 - } : tensor to tensor<4x4xf32> - return %1 : tensor<4x4xf32> -} - -// ----- - -// CHECK-LABEL: func @pad_static_zero_cast( -// CHECK-SAME: %[[ARG0:.*]]: tensor -// CHECK-NOT: linalg.pad_tensor -// CHECK: %[[RESULT:.*]] = tensor.cast %[[ARG0]] : tensor to tensor<2x3x4xf32> -// CHECK: return %[[RESULT]] -func @pad_static_zero_cast(%arg0: tensor, %pad_value: f32) -> tensor<2x3x4xf32> { - %c0 = arith.constant 0 : index - %0 = linalg.pad_tensor %arg0 low[0, %c0, 0] high[0, 0, %c0] { - ^bb0(%arg1: index, %arg2: index, %arg3: index): - linalg.yield %pad_value : f32 - } : tensor to tensor<2x3x4xf32> - - return %0 : tensor<2x3x4xf32> -} - -// ----- - -// CHECK-LABEL: func @pad_nofold_static_zero( -// CHECK-SAME: %[[ARG0:.*]]: tensor -// CHECK: %[[PAD:.*]] = linalg.pad_tensor -// CHECK: return %[[PAD]] -func @pad_nofold_static_zero(%arg0: tensor, %pad_value: f32) -> tensor<2x3x4xf32> { - %c0 = arith.constant 0 : index - %0 = linalg.pad_tensor %arg0 nofold low[0, %c0, 0] high[0, 0, %c0] { - ^bb0(%arg1: index, %arg2: index, %arg3: index): - linalg.yield %pad_value : f32 - } : tensor to tensor<2x3x4xf32> - - return %0 : tensor<2x3x4xf32> -} - -// ----- - func private @some_use(%i : index, %j : index) // CHECK-LABEL: func @init_canonicalize diff --git a/mlir/test/Dialect/Linalg/codegen-strategy.mlir b/mlir/test/Dialect/Linalg/codegen-strategy.mlir --- a/mlir/test/Dialect/Linalg/codegen-strategy.mlir +++ b/mlir/test/Dialect/Linalg/codegen-strategy.mlir @@ -48,7 +48,7 @@ func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> { // Check the padding of the input operands has been hoisted out of the tile loop nest. - // CHECK-PAD-COUNT=2: linalg.pad_tensor %{{.*}} nofold + // CHECK-PAD-COUNT=2: tensor.pad %{{.*}} nofold // CHECK-PAD: scf.for // Check CSE eliminates the duplicate min operations introduced by tiling. // CHECK-PAD: affine.min #[[MAP0]] diff --git a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir --- a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir +++ b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir @@ -9,9 +9,9 @@ // CHECK: return %[[PADDED]] : tensor<1x32x32x1xf32> func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> { %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.pad_tensor %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0] { + %0 = tensor.pad %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0] { ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<1x28x28x1xf32> to tensor<1x32x32x1xf32> return %0 : tensor<1x32x32x1xf32> } @@ -38,9 +38,9 @@ func @generalize_pad_tensor_dynamic_shape(%arg0: tensor<4x?x2x?xf32>, %arg1: index) -> tensor<4x?x?x?xf32> { %c0 = arith.constant 0 : index %cst = arith.constant 0.0 : f32 - %out = linalg.pad_tensor %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1] { + %out = tensor.pad %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1] { ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index, %gen_arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<4x?x2x?xf32> to tensor<4x?x?x?xf32> return %out : tensor<4x?x?x?xf32> } diff --git a/mlir/test/Dialect/Linalg/hoist-padding.mlir b/mlir/test/Dialect/Linalg/hoist-padding.mlir --- a/mlir/test/Dialect/Linalg/hoist-padding.mlir +++ b/mlir/test/Dialect/Linalg/hoist-padding.mlir @@ -18,7 +18,7 @@ // MATVEC: %[[T0:.*]] = scf.for %[[PIV0:[0-9a-z]+]] = // MATVEC: %[[PIDX0:.*]] = affine.apply #[[DIV4]](%[[PIV0]]) // MATVEC: %[[T1:.*]] = tensor.extract_slice %[[ARG1]][%[[PIV0]]] [4] - // MATVEC: %[[T2:.*]] = linalg.pad_tensor %[[T1]] + // MATVEC: %[[T2:.*]] = tensor.pad %[[T1]] // MATVEC: %[[T3:.*]] = tensor.insert_slice %[[T1:.*]]{{.*}}[%[[PIDX0]] // MATVEC: scf.for %[[IV0:[0-9a-zA-Z]*]] = @@ -29,9 +29,9 @@ // MATVEC-DAG: %[[IDX0:.*]] = affine.apply #[[DIV4]](%[[IV0]]) // MATVEC-DAG: %[[T4:.*]] = tensor.extract_slice %[[T0]][%[[IDX0]] %2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32> - %3 = linalg.pad_tensor %2 nofold low[%c0] high[%c0] { + %3 = tensor.pad %2 nofold low[%c0] high[%c0] { ^bb0(%arg5: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<4xf32> to tensor<4xf32> // Check matvec uses the packed input vector. @@ -67,7 +67,7 @@ // MATVEC: %[[TS0:.*]] = affine.min #[[MAP0]](%[[PIV0]]) // MATVEC: %[[T1:.*]] = tensor.extract_slice %[[ARG1]][%[[PIV0]]] [%[[TS0]]] // MATVEC: %[[HPD0:.*]] = affine.apply #[[MAP1]](%[[TS0]]) - // MATVEC: %[[T2:.*]] = linalg.pad_tensor %[[T1]]{{.*}}high[%[[HPD0]] + // MATVEC: %[[T2:.*]] = tensor.pad %[[T1]]{{.*}}high[%[[HPD0]] // MATVEC: %[[T3:.*]] = tensor.insert_slice %[[T1:.*]]{{.*}}[%[[PIDX0]] // MATVEC: scf.for %[[IV0:[0-9a-zA-Z]*]] = @@ -80,13 +80,13 @@ // MATVEC-DAG: %[[T4:.*]] = tensor.extract_slice %[[T0]][%[[IDX0]] %3 = tensor.extract_slice %arg1[%arg3] [%1] [1] : tensor<12xf32> to tensor %4 = affine.apply #map1(%1) - %5 = linalg.pad_tensor %2 low[%c0, %c0] high[%c0, %4] { + %5 = tensor.pad %2 low[%c0, %c0] high[%c0, %4] { ^bb0(%arg5: index, %arg6: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<24x?xf32> to tensor<24x5xf32> - %6 = linalg.pad_tensor %3 low[%c0] high[%4] { + %6 = tensor.pad %3 low[%c0] high[%4] { ^bb0(%arg5: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<5xf32> // Check matvec uses the packed input vector. @@ -127,7 +127,7 @@ // MATVEC: %[[TS0:.*]] = affine.min #[[MAP0]](%[[PIV0]])[%[[D0]]] // MATVEC: %[[T1:.*]] = tensor.extract_slice %[[ARG1]][%[[PIV0]]] [%[[TS0]]] // MATVEC: %[[HPD0:.*]] = affine.apply #[[MAP1]](%[[TS0]]) - // MATVEC: %[[T2:.*]] = linalg.pad_tensor %[[T1]]{{.*}}high[%[[HPD0]] + // MATVEC: %[[T2:.*]] = tensor.pad %[[T1]]{{.*}}high[%[[HPD0]] // MATVEC: %[[T3:.*]] = tensor.insert_slice %[[T1:.*]]{{.*}}[%[[PIDX0]] // MATVEC: scf.for %[[IV0:[0-9a-zA-Z]*]] = @@ -140,13 +140,13 @@ // MATVEC-DAG: %[[T4:.*]] = tensor.extract_slice %[[T0]][%[[IDX0]] %4 = tensor.extract_slice %arg1[%arg3] [%2] [1] : tensor to tensor %5 = affine.apply #map1(%2) - %6 = linalg.pad_tensor %3 low[%c0, %c0] high[%c0, %5] { + %6 = tensor.pad %3 low[%c0, %c0] high[%c0, %5] { ^bb0(%arg5: index, %arg6: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<24x?xf32> to tensor<24x4xf32> - %7 = linalg.pad_tensor %4 nofold low[%c0] high[%5] { + %7 = tensor.pad %4 nofold low[%c0] high[%5] { ^bb0(%arg5: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<4xf32> // Check matvec uses the packed input vector. @@ -174,13 +174,13 @@ // Check the non constant padding is not hoisted. // MATVEC: %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]] - // MATVEC: %[[T1:.*]] = linalg.pad_tensor %[[T0]] + // MATVEC: %[[T1:.*]] = tensor.pad %[[T0]] %2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32> - %3 = linalg.pad_tensor %2 nofold low[%c0] high[%c0] { + %3 = tensor.pad %2 nofold low[%c0] high[%c0] { ^bb0(%arg5: index): %5 = arith.index_cast %arg3 : index to i32 %6 = arith.sitofp %5 : i32 to f32 - linalg.yield %6 : f32 + tensor.yield %6 : f32 } : tensor<4xf32> to tensor<4xf32> // Check matvec uses the padded input vector. @@ -209,13 +209,13 @@ // Check the non constant op padding is not hoisted. // MATVEC: %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]] // MATVEC: %[[V0:.*]] = tensor.extract %[[ARG1]][%[[IV0]] - // MATVEC: %[[T1:.*]] = linalg.pad_tensor %[[T0]] - // MATVEC: linalg.yield %[[V0]] + // MATVEC: %[[T1:.*]] = tensor.pad %[[T0]] + // MATVEC: tensor.yield %[[V0]] %2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32> %3 = tensor.extract %arg1[%arg3] : tensor<12xf32> - %4 = linalg.pad_tensor %2 nofold low[%c0] high[%c0] { + %4 = tensor.pad %2 nofold low[%c0] high[%c0] { ^bb0(%arg5: index): - linalg.yield %3 : f32 + tensor.yield %3 : f32 } : tensor<4xf32> to tensor<4xf32> // Check matvec uses the padded input vector. @@ -247,12 +247,12 @@ // Check the index_cast prevents hoisting due to its non index operand. // MATVEC: %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]] // MATVEC: %[[IDX0:.*]] = arith.index_cast %[[ARG3]] - // MATVEC: %[[T1:.*]] = linalg.pad_tensor %[[T0]]{{.*}}%[[IDX0]] + // MATVEC: %[[T1:.*]] = tensor.pad %[[T0]]{{.*}}%[[IDX0]] %2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32> %3 = arith.index_cast %arg3 : i32 to index - %4 = linalg.pad_tensor %2 nofold low[%3] high[%3] { + %4 = tensor.pad %2 nofold low[%3] high[%3] { ^bb0(%arg6: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<4xf32> to tensor<4xf32> // Check matvec uses the padded input vector. @@ -284,12 +284,12 @@ // Check the load prevents hoisting due to its memory effect. // MATVEC: %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]] // MATVEC: %[[IDX0:.*]] = memref.load %[[ARG3]] - // MATVEC: %[[T1:.*]] = linalg.pad_tensor %[[T0]]{{.*}}%[[IDX0]] + // MATVEC: %[[T1:.*]] = tensor.pad %[[T0]]{{.*}}%[[IDX0]] %2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32> %3 = memref.load %arg3[%c0] : memref - %4 = linalg.pad_tensor %2 nofold low[%3] high[%3] { + %4 = tensor.pad %2 nofold low[%3] high[%3] { ^bb0(%arg6: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<4xf32> to tensor<4xf32> // Check matvec uses the padded input vector. @@ -321,15 +321,15 @@ // Check the unexpected operation with a region prevents hoisting. // MATVEC: %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]] // MATVEC: %[[IDX0:.*]] = scf.for {{.*}} step %[[ARG3]] - // MATVEC: %[[T1:.*]] = linalg.pad_tensor %[[T0]]{{.*}}%[[IDX0]] + // MATVEC: %[[T1:.*]] = tensor.pad %[[T0]]{{.*}}%[[IDX0]] %2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32> %3 = scf.for %arg6 = %c0 to %c12 step %arg3 iter_args(%arg7 = %c0) -> (index) { %6 = arith.addi %arg3, %arg7 : index scf.yield %6 : index } - %4 = linalg.pad_tensor %2 nofold low[%3] high[%3] { + %4 = tensor.pad %2 nofold low[%3] high[%3] { ^bb0(%arg6: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<4xf32> to tensor<4xf32> // Check matvec uses the padded input vector. @@ -361,7 +361,7 @@ // Check the second input operand is hoisted by two loop nests. // MATMUL: %[[T0:.*]] = scf.for %[[PIV0:[0-9a-z]+]] = // MATMUL: %[[T1:.*]] = tensor.extract_slice %[[ARG1]] - // MATMUL: %[[T2:.*]] = linalg.pad_tensor %[[T1]] + // MATMUL: %[[T2:.*]] = tensor.pad %[[T1]] // MATMUL: scf.for %[[IV0:[0-9a-zA-Z]*]] = %0 = scf.for %arg3 = %c0 to %c12 step %c5 iter_args(%arg4 = %arg2) -> (tensor<12x24xf32>) { @@ -372,9 +372,9 @@ %3 = affine.apply #map1(%1) // Check the fused and padded fill op does not prevent hoisting. - %4 = linalg.pad_tensor %2 nofold low[%c0, %c0] high[%3, %c0] { + %4 = tensor.pad %2 nofold low[%c0, %c0] high[%3, %c0] { ^bb0(%arg5: index, %arg6: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<5x24xf32> %5 = linalg.fill(%cst, %4) : f32, tensor<5x24xf32> -> tensor<5x24xf32> %6 = tensor.extract_slice %5[0, 0] [%1, 24] [1, 1] : tensor<5x24xf32> to tensor @@ -382,7 +382,7 @@ // Check the first input operand is hoisted by one loop nest. // MATMUL: %[[T3:.*]] = scf.for %[[PIV1:[0-9a-z]+]] = // MATMUL: %[[T4:.*]] = tensor.extract_slice %[[ARG0]] - // MATMUL: %[[T5:.*]] = linalg.pad_tensor %[[T4]] + // MATMUL: %[[T5:.*]] = tensor.pad %[[T4]] // MATMUL: scf.for %[[IV1:[0-9a-zA-Z]*]] = %7 = scf.for %arg5 = %c0 to %c6 step %c3 iter_args(%arg6 = %6) -> (tensor) { @@ -393,20 +393,20 @@ %9 = tensor.extract_slice %arg0[%arg3, %arg5] [%1, 3] [1, 1] : tensor<12x6xf32> to tensor %10 = tensor.extract_slice %arg1[%arg5, 0] [3, 24] [1, 1] : tensor<6x24xf32> to tensor<3x24xf32> %11 = tensor.extract_slice %arg6[0, 0] [%1, 24] [1, 1] : tensor to tensor - %12 = linalg.pad_tensor %9 nofold low[%c0, %c0] high[%3, %c0] { + %12 = tensor.pad %9 nofold low[%c0, %c0] high[%3, %c0] { ^bb0(%arg7: index, %arg8: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<5x3xf32> - %13 = linalg.pad_tensor %10 nofold low[%c0, %c0] high[%c0, %c0] { + %13 = tensor.pad %10 nofold low[%c0, %c0] high[%c0, %c0] { ^bb0(%arg7: index, %arg8: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<3x24xf32> to tensor<3x24xf32> // Check the output padding is not hoisted. - // MATMUL: %[[T8:.*]] = linalg.pad_tensor - %14 = linalg.pad_tensor %11 nofold low[%c0, %c0] high[%3, %c0] { + // MATMUL: %[[T8:.*]] = tensor.pad + %14 = tensor.pad %11 nofold low[%c0, %c0] high[%3, %c0] { ^bb0(%arg7: index, %arg8: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<5x24xf32> // Check matmul uses the padded operands. diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -353,71 +353,6 @@ // ----- - -func @pad_result_type(%arg0: tensor, %arg1: index, %arg2: i32) -> tensor { - // expected-error @+1 {{specified type 'tensor' does not match the inferred type 'tensor}} - %0 = linalg.pad_tensor %arg0 low[1, %arg1, 2, 2] high[1, 2, %arg1, 3] { - ^bb0(%arg3: index, %arg4: index): - linalg.yield %arg2 : i32 - } : tensor to tensor - return %0 : tensor -} - -// ----- - -func @pad_number_of_block_args(%arg0: tensor, %arg1: i32) -> tensor { - // expected-error @+1 {{expected the block to have 2 arguments}} - %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] { - ^bb0(%arg2: index, %arg3: index, %arg4: index): - linalg.yield %arg1 : i32 - } : tensor to tensor - return %0 : tensor -} - -// ----- - -func @pad_no_block(%arg0: tensor, %arg1: i32) -> tensor { - // expected-error @+1 {{op region #0 ('region') failed to verify constraint: region with 1 blocks}} - %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] { - } : tensor to tensor - return %0 : tensor -} - -// ----- - -func @pad_block_args(%arg0: tensor, %arg1: i32) -> tensor { - // expected-error @+1 {{op expected block argument 1 to be an index}} - %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] { - ^bb0(%arg2: i32, %arg3: i32): - linalg.yield %arg1 : i32 - } : tensor to tensor - return %0 : tensor -} - -// ----- - -func @pad_num_yields(%arg0: tensor, %arg1: i32) -> tensor { - // expected-error @+3 {{op expected single yield operand (got 2)}} - %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] { - ^bb0(%arg2: index, %arg3: index): - linalg.yield %arg1, %arg1 : i32, i32 - } : tensor to tensor - return %0 : tensor -} - -// ----- - -func @pad_yield_type(%arg0: tensor, %arg1: i8) -> tensor { - // expected-error @+3 {{op expected yield type to match shape element type}} - %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] { - ^bb0(%arg2: index, %arg3: index): - linalg.yield %arg1 : i8 - } : tensor to tensor - return %0 : tensor -} - -// ----- - func @illegal_fill_tensor_no_return(%arg0 : index, %arg1 : index, %arg2 : f32) { %0 = linalg.init_tensor [%arg0, %arg1] : tensor diff --git a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir --- a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir +++ b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir @@ -6,9 +6,9 @@ func @pad_tensor_with_memrefs(%arg0: memref<1x28x28x1xf32>) -> memref<2x31x31x3xf32> { %cst = arith.constant 0.000000e+00 : f32 %0 = bufferization.to_tensor %arg0 : memref<1x28x28x1xf32> - %1 = linalg.pad_tensor %0 low[1, 1, 1, 2] high[0, 2, 2, 0] { + %1 = tensor.pad %0 low[1, 1, 1, 2] high[0, 2, 2, 0] { ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<1x28x28x1xf32> to tensor<2x31x31x3xf32> %2 = bufferization.to_memref %1 : memref<2x31x31x3xf32> return %2 : memref<2x31x31x3xf32> @@ -25,9 +25,9 @@ // CHECK-LABEL: func @pad_tensor_no_memrefs func @pad_tensor_no_memrefs(%arg0: tensor<1x28x28xf32>) -> tensor<2x32x32xf32> { %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.pad_tensor %arg0 low[1, 2, 2] high[0, 2, 2] { + %0 = tensor.pad %arg0 low[1, 2, 2] high[0, 2, 2] { ^bb0(%arg1: index, %arg2: index, %arg3: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<1x28x28xf32> to tensor<2x32x32xf32> return %0 : tensor<2x32x32xf32> } @@ -43,9 +43,9 @@ // CHECK-LABEL: func @pad_tensor_detailed func @pad_tensor_detailed(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> { %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.pad_tensor %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0] { + %0 = tensor.pad %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0] { ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<1x28x28x1xf32> to tensor<1x32x32x1xf32> return %0 : tensor<1x32x32x1xf32> } diff --git a/mlir/test/Dialect/Linalg/pad.mlir b/mlir/test/Dialect/Linalg/pad.mlir --- a/mlir/test/Dialect/Linalg/pad.mlir +++ b/mlir/test/Dialect/Linalg/pad.mlir @@ -31,10 +31,10 @@ // Check statically sized matmul inputs with partially divisible sizes are padded. // MATMUL: %[[V0:.*]] = affine.apply #[[MAP1]]()[%[[TS2]]] - // MATMUL: %[[T3:.*]] = linalg.pad_tensor %[[T0]] nofold + // MATMUL: %[[T3:.*]] = tensor.pad %[[T0]] nofold // MATMUL-SAME: [%[[C0]], %[[C0]]] // MATMUL-SAME: [%[[C0]], %[[V0]] - // MATMUL: %[[T4:.*]] = linalg.pad_tensor %[[T1]] nofold + // MATMUL: %[[T4:.*]] = tensor.pad %[[T1]] nofold // Check the statically sized matmul output with fully divisible sizes is not padded. // MATMUL: %[[T5:.*]] = linalg.matmul @@ -74,7 +74,7 @@ // Check the statically sized matmul output with partially divisible sizes is padded. // MATMUL: %[[V0:.*]] = affine.apply #[[MAP1]]()[%[[TS1]]] - // MATMUL: %[[T1:.*]] = linalg.pad_tensor %[[T0]] low + // MATMUL: %[[T1:.*]] = tensor.pad %[[T0]] low // MATMUL-SAME: [%[[C0]], %[[C0]]] // MATMUL-SAME: [%[[C0]], %[[V0]] @@ -137,11 +137,11 @@ // Check all matmul operands are padded. // MATMUL: %[[V0:.*]] = affine.apply #[[MAP3]]()[%[[TS0]]] // MATMUL: %[[V1:.*]] = affine.apply #[[MAP4]]()[%[[TS2]]] - // MATMUL: %[[T3:.*]] = linalg.pad_tensor %{{.*}} nofold + // MATMUL: %[[T3:.*]] = tensor.pad %{{.*}} nofold // MATMUL-SAME: [%[[C0]], %[[C0]]] // MATMUL-SAME: [%[[V0]], %[[V1]] - // MATMUL: %[[T4:.*]] = linalg.pad_tensor %{{.*}} nofold - // MATMUL: %[[T5:.*]] = linalg.pad_tensor %{{.*}} low + // MATMUL: %[[T4:.*]] = tensor.pad %{{.*}} nofold + // MATMUL: %[[T5:.*]] = tensor.pad %{{.*}} low // Check the dynamic matmul has been erased. // MATMUL-NOT: = linalg.matmul {{.*}} tensor @@ -172,7 +172,7 @@ %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor // Check both fill operations are padded by the same pad tensor operation. - // FILL: %[[T0:.*]] = linalg.pad_tensor + // FILL: %[[T0:.*]] = tensor.pad // FILL: %[[T1:.*]] = linalg.fill(%{{.*}}, %[[T0]]) // FILL: %[[T2:.*]] = linalg.fill(%{{.*}}, %[[T1]]) // FILL: = tensor.extract_slice %[[T2]] @@ -197,20 +197,20 @@ // MATMUL: %[[T0:.*]] = tensor.extract_slice %[[ARG0]] // MATMUL-SAME: [0, 0] // MATMUL-SAME: [%[[SIZE]], %[[SIZE]]] - // MATMUL: %[[T1:.*]] = linalg.pad_tensor %[[T0]] + // MATMUL: %[[T1:.*]] = tensor.pad %[[T0]] // MATMUL: %[[T2:.*]] = linalg.fill(%{{.*}}, %[[T1]] // MATMUL: %[[T3:.*]] = linalg.fill(%{{.*}}, %[[T2]] %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor - %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] { + %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0] { ^bb0(%arg3: index, %arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<64x64xf32> %2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32> %3 = linalg.fill(%cst, %2) : f32, tensor<64x64xf32> -> tensor<64x64xf32> %4 = tensor.extract_slice %3[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor // Check there are no additional pad tensor operations. - // MATMUL-NOT: linalg.pad_tensor + // MATMUL-NOT: tensor.pad // Check the matmul directly uses the result of the fill operation. // MATMUL: %[[T4:.*]] = linalg.matmul ins(%[[T3]] @@ -233,16 +233,16 @@ %cst = arith.constant 42.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor - %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] { + %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0] { ^bb0(%arg3: index, %arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<64x64xf32> %2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32> %4 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor // Different padding values prevent composing the paddings (42.0 vs. 0.0). // MATMUL: = linalg.fill - // MATMUL: = linalg.pad_tensor + // MATMUL: = tensor.pad // MATMUL: = linalg.matmul %5 = linalg.matmul ins(%4, %4 : tensor, tensor) outs(%4 : tensor) -> tensor return %5 : tensor @@ -258,16 +258,16 @@ %cst = arith.constant 0.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0] [%iv0, %iv0] [1, 1] : tensor<64x64xf32> to tensor - %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] { + %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0] { ^bb0(%arg3: index, %arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<64x64xf32> %2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32> %4 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor // Different dynamic sizes prevent composing the paddings (%iv0 vs %size). // MATMUL: = linalg.fill - // MATMUL: = linalg.pad_tensor + // MATMUL: = tensor.pad // MATMUL: = linalg.matmul %5 = linalg.matmul ins(%4, %4 : tensor, tensor) outs(%4 : tensor) -> tensor return %5 : tensor @@ -283,16 +283,16 @@ %cst = arith.constant 0.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0, 0] [%size, %size, 1] [1, 1, 1] : tensor<64x64x1xf32> to tensor - %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] { + %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0] { ^bb0(%arg3: index, %arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<64x64xf32> %2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32> %3 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor // Different dynamic ranks prevent composing the paddings ([%size, %size, 1] vs [%size, %size]). // MATMUL: = linalg.fill - // MATMUL: = linalg.pad_tensor + // MATMUL: = tensor.pad // MATMUL: = linalg.matmul %4 = linalg.matmul ins(%3, %3 : tensor, tensor) outs(%3 : tensor) -> tensor return %4 : tensor @@ -308,16 +308,16 @@ %cst = arith.constant 0.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<62x62xf32> to tensor - %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] { + %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0] { ^bb0(%arg3: index, %arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<62x62xf32> %2 = linalg.fill(%cst, %1) : f32, tensor<62x62xf32> -> tensor<62x62xf32> %4 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<62x62xf32> to tensor // Different static sizes prevent composing the paddings (62 vs 64 derived from #map0). // MATMUL: = linalg.fill - // MATMUL: = linalg.pad_tensor + // MATMUL: = tensor.pad // MATMUL: = linalg.matmul %5 = linalg.matmul ins(%4, %4 : tensor, tensor) outs(%4 : tensor) -> tensor return %5 : tensor @@ -336,7 +336,7 @@ %0 = affine.min #map0()[%iv0] // FILL: %[[T0:.*]] = tensor.extract_slice %[[ARG1]] - // FILL: %[[T1:.*]] = linalg.pad_tensor %[[T0]] nofold + // FILL: %[[T1:.*]] = tensor.pad %[[T0]] nofold %1 = tensor.extract_slice %arg1[0, 0] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> // Check only the fill output operand is padded. @@ -361,8 +361,8 @@ %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor // Check the matmul inputs are padded despite the missing slice for the static output. - // MATMUL: %[[T0:.*]] = linalg.pad_tensor - // MATMUL: %[[T1:.*]] = linalg.pad_tensor + // MATMUL: %[[T0:.*]] = tensor.pad + // MATMUL: %[[T1:.*]] = tensor.pad // MATMUL: = linalg.matmul ins(%[[T0]], %[[T1]] // MATMUL-SAME: outs(%[[ARG2]] %3 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%arg2 : tensor<4x5xf32>) -> tensor<4x5xf32> @@ -414,8 +414,8 @@ %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> // Check the matmul inputs are padded despite the failure to compute a padding value for the static output. - // INPUTS-ONLY: %[[T1:.*]] = linalg.pad_tensor - // INPUTS-ONLY: %[[T2:.*]] = linalg.pad_tensor + // INPUTS-ONLY: %[[T1:.*]] = tensor.pad + // INPUTS-ONLY: %[[T2:.*]] = tensor.pad // INPUTS-ONLY: = linalg.matmul ins(%[[T1]], %[[T2]] // INPUTS-ONLY-SAME: outs(%[[T0]] %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> @@ -465,7 +465,7 @@ %0 = tensor.extract_slice %arg0[0, 0, 0, 0] [1, %size, 1, %size] [1, 1, 1, 1] : tensor<1x64x1x64xf32> to tensor<1x?x?xf32> // Check the fill is padded despite the rank-reducing slice operation. - // FILL: %[[T0:.*]] = linalg.pad_tensor + // FILL: %[[T0:.*]] = tensor.pad // FILL: %[[T1:.*]] = linalg.fill(%{{.*}}, %[[T0]]) // FILL-SAME: tensor<1x64x64xf32> // FILL: = tensor.extract_slice %[[T1]] diff --git a/mlir/test/Dialect/Linalg/pad_fusion.mlir b/mlir/test/Dialect/Linalg/pad_fusion.mlir --- a/mlir/test/Dialect/Linalg/pad_fusion.mlir +++ b/mlir/test/Dialect/Linalg/pad_fusion.mlir @@ -15,9 +15,9 @@ %1 = arith.mulf %arg6, %arg6 : f32 linalg.yield %1 : f32 } -> tensor - %1 = linalg.pad_tensor %0 low [%arg1, %arg2] high [%arg3, %arg4] { + %1 = tensor.pad %0 low [%arg1, %arg2] high [%arg3, %arg4] { ^bb0(%arg6: index, %arg7 : index): - linalg.yield %arg5 : f32 + tensor.yield %arg5 : f32 } : tensor to tensor return %1 : tensor } @@ -64,9 +64,9 @@ %1 = arith.mulf %arg4, %arg4 : f32 linalg.yield %1 : f32 } -> tensor<42x?xf32> - %1 = linalg.pad_tensor %0 low [3, %arg1] high [4, %arg2] { + %1 = tensor.pad %0 low [3, %arg1] high [4, %arg2] { ^bb0(%arg4: index, %arg5 : index): - linalg.yield %arg3 : f32 + tensor.yield %arg3 : f32 } : tensor<42x?xf32> to tensor<49x?xf32> return %1 : tensor<49x?xf32> } diff --git a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir --- a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir +++ b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir @@ -253,9 +253,9 @@ %c3 = arith.constant 3 : index %c4 = arith.constant 4 : index %c5 = arith.constant 5 : index - %0 = linalg.pad_tensor %arg0 low[%c3, %arg1, %c4] high[7, %c5, %arg2] { + %0 = tensor.pad %arg0 low[%c3, %arg1, %c4] high[7, %c5, %arg2] { ^bb0(%arg4: index, %arg5: index, %arg6: index): - linalg.yield %arg3 : f32 + tensor.yield %arg3 : f32 } : tensor<2x?x?xf32> to tensor %1 = tensor.dim %0, %c0 : tensor %2 = tensor.dim %0, %c1 : tensor diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -15,77 +15,6 @@ // CHECK-DAG: #[[$strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> // CHECK-DAG: #[[$strided3DT:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2 * s1 + s0 + d1 * s2 + d0)> -func @pad_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index, - %pad_value: f32) -> tensor<6x?x?x?xf32> { - %0 = linalg.pad_tensor %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] { - ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): - linalg.yield %pad_value : f32 - } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> - return %0 : tensor<6x?x?x?xf32> -} -// CHECK-LABEL: func @pad_dynamic -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]] -// CHECK-SAME: %[[LOW:[a-zA-Z0-9_]*]] -// CHECK-SAME: %[[HIGH:[a-zA-Z0-9_]*]] -// CHECK: linalg.pad_tensor %[[ARG0]] -// CHECK-SAME: low[2, %[[LOW]], 3, 3] -// CHECK-SAME: high[3, 3, %[[HIGH]], 2] -// CHECK: : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> - -// ----- - -func @pad_static(%arg0: tensor<3x4xf32>, %pad_value: f32) -> tensor<6x9xf32> { - %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] { - ^bb0(%arg1 : index, %arg2 : index): - linalg.yield %pad_value : f32 - } : tensor<3x4xf32> to tensor<6x9xf32> - return %0 : tensor<6x9xf32> -} -// CHECK-LABEL: func @pad_static -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]] -// CHECK: linalg.pad_tensor %[[ARG0]] low[1, 2] high[2, 3] -// CHECK: : tensor<3x4xf32> to tensor<6x9xf32> - -// ----- - -func @pad_asymmetrical(%arg0: tensor<2x3xf32>, %ub0: index, %ub1: index, - %pad_value: f32) -> tensor { - %0 = linalg.pad_tensor %arg0 low[0, 0] high[%ub0, %ub1] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad_value : f32 - } : tensor<2x3xf32> to tensor - return %0 : tensor -} -// CHECK-LABEL: func @pad_asymmetrical -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]] -// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]*]] -// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]*]] -// CHECK: linalg.pad_tensor %[[ARG0]] -// CHECK-SAME: low[0, 0] -// CHECK-SAME: high[%[[UB0]], %[[UB1]]] -// CHECK: : tensor<2x3xf32> to tensor - -// ----- - -func @pad_to_static_size(%arg0: tensor, %ub0: index, %ub1: index, - %pad_value: f32) -> tensor<2x3xf32> { - %0 = linalg.pad_tensor %arg0 low[0, 0] high[%ub0, %ub1] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad_value : f32 - } : tensor to tensor<2x3xf32> - return %0 : tensor<2x3xf32> -} -// CHECK-LABEL: func @pad_to_static_size -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]] -// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]*]] -// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]*]] -// CHECK: linalg.pad_tensor %[[ARG0]] -// CHECK-SAME: low[0, 0] -// CHECK-SAME: high[%[[UB0]], %[[UB1]]] -// CHECK: : tensor to tensor<2x3xf32> - -// ----- - func @views(%arg0: index) { %c0 = arith.constant 0 : index %0 = arith.muli %arg0, %arg0 : index diff --git a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir --- a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir +++ b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir @@ -6,9 +6,9 @@ // CHECK: return %[[RESULT]] func @static_data_only(%arg0 : tensor<4x5xf32>, %pad : f32) -> tensor<2x1xf32> { - %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] { + %0 = tensor.pad %arg0 low[0, 0] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor<4x5xf32> to tensor<11x13xf32> %1 = tensor.extract_slice %0[1, 2] [2, 1] [1, 1] : tensor<11x13xf32> to tensor<2x1xf32> return %1 : tensor<2x1xf32> @@ -18,16 +18,16 @@ // CHECK-LABEL: @static_high_pad_only // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-NOT: tensor.extract_slice // CHECK: %[[RESULT:.*]] = tensor.generate // CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<2x4xf32> func @static_high_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32) -> tensor<2x4xf32> { - %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] { + %0 = tensor.pad %arg0 low[0, 0] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor<4x5xf32> to tensor<11x13xf32> %1 = tensor.extract_slice %0[4, 5] [2, 4] [1, 1] : tensor<11x13xf32> to tensor<2x4xf32> return %1 : tensor<2x4xf32> @@ -37,16 +37,16 @@ // CHECK-LABEL: @static_low_pad_only // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-NOT: tensor.extract_slice // CHECK: %[[RESULT:.*]] = tensor.generate // CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<2x3xf32> func @static_low_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32) -> tensor<2x3xf32> { - %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] { + %0 = tensor.pad %arg0 low[3, 7] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor<4x5xf32> to tensor<14x20xf32> %1 = tensor.extract_slice %0[1, 3] [2, 3] [1, 1] : tensor<14x20xf32> to tensor<2x3xf32> return %1 : tensor<2x3xf32> @@ -56,16 +56,16 @@ // CHECK-LABEL: @static_low_pad_only_2 // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-NOT: tensor.extract_slice // CHECK: %[[RESULT:.*]] = tensor.generate // CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<1x3xf32> func @static_low_pad_only_2(%arg0 : tensor<4x5xf32>, %pad : f32) -> tensor<1x3xf32> { - %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] { + %0 = tensor.pad %arg0 low[3, 7] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor<4x5xf32> to tensor<14x20xf32> %1 = tensor.extract_slice %0[1, 3] [1, 3] [1, 1] : tensor<14x20xf32> to tensor<1x3xf32> return %1 : tensor<1x3xf32> @@ -75,16 +75,16 @@ // CHECK-LABEL: @static_mixed_data_high_pad // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][2, 4] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32> -// CHECK: %[[RESULT:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[1, 3] -// CHECK: linalg.yield %[[PAD]] +// CHECK: %[[RESULT:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[1, 3] +// CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<3x4xf32> func @static_mixed_data_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32) -> tensor<3x4xf32> { - %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] { + %0 = tensor.pad %arg0 low[0, 0] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor<4x5xf32> to tensor<11x13xf32> %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<11x13xf32> to tensor<3x4xf32> return %1 : tensor<3x4xf32> @@ -94,16 +94,16 @@ // CHECK-LABEL: @static_mixed_data_low_pad // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32> -// CHECK: %[[RESULT:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[1, 3] high[0, 0] -// CHECK: linalg.yield %[[PAD]] +// CHECK: %[[RESULT:.*]] = tensor.pad %[[SUBTENSOR]] low[1, 3] high[0, 0] +// CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<3x4xf32> func @static_mixed_data_low_pad(%arg0 : tensor<4x5xf32>, %pad : f32) -> tensor<3x4xf32> { - %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] { + %0 = tensor.pad %arg0 low[3, 7] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor<4x5xf32> to tensor<14x20xf32> %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<14x20xf32> to tensor<3x4xf32> return %1 : tensor<3x4xf32> @@ -113,15 +113,15 @@ // CHECK-LABEL: @static_mixed_data_low_high_pad // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: linalg.pad_tensor -// CHECK: %[[RESULT:.*]] = linalg.pad_tensor %[[ARG0]] low[1, 1] high[2, 3] -// CHECK: linalg.yield %[[PAD]] +// CHECK-NOT: tensor.pad +// CHECK: %[[RESULT:.*]] = tensor.pad %[[ARG0]] low[1, 1] high[2, 3] +// CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<7x9xf32> func @static_mixed_data_low_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32) -> tensor<7x9xf32> { - %0 = linalg.pad_tensor %arg0 low[2, 3] high[7, 8] { + %0 = tensor.pad %arg0 low[2, 3] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor<4x5xf32> to tensor<13x16xf32> %1 = tensor.extract_slice %0[1, 2] [7, 9] [1, 1] : tensor<13x16xf32> to tensor<7x9xf32> return %1 : tensor<7x9xf32> @@ -131,7 +131,7 @@ // CHECK-LABEL: @dynamic_high_pad // CHECK-SAME: %[[ARG0:.*]]: tensor -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: tensor.dim %[[ARG0]], %[[C0]] // CHECK: %[[RESULT:.*]] = scf.if %{{.*}} -> (tensor<3x4xf32>) { @@ -139,14 +139,14 @@ // CHECK: scf.yield %[[GEN]] // CHECK: } else { // CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor to tensor -// CHECK: %[[PADTENSOR:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3] +// CHECK: %[[PADTENSOR:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3] // CHECK: scf.yield %[[PADTENSOR]] // CHECK: } // CHECK: return %[[RESULT]] func @dynamic_high_pad(%arg0 : tensor, %h1: index, %pad : f32) -> tensor<3x4xf32> { - %0 = linalg.pad_tensor %arg0 low[0, 0] high[%h1, 8] { + %0 = tensor.pad %arg0 low[0, 0] high[%h1, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor to tensor %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor to tensor<3x4xf32> return %1 : tensor<3x4xf32> @@ -156,7 +156,7 @@ // CHECK-LABEL: @dynamic_extract_size // CHECK-SAME: %[[ARG0:.*]]: tensor, %[[ARG1:.*]]: index -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: tensor.dim %[[ARG0]], %[[C0]] // CHECK: %[[RESULT:.*]] = scf.if %{{.*}} -> (tensor) { @@ -164,14 +164,14 @@ // CHECK: scf.yield %[[GEN]] // CHECK: } else { // CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor to tensor -// CHECK: %[[PADTENSOR:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3] +// CHECK: %[[PADTENSOR:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3] // CHECK: scf.yield %[[PADTENSOR]] // CHECK: } // CHECK: return %[[RESULT]] func @dynamic_extract_size(%arg0 : tensor, %s1: index, %pad : f32) -> tensor { - %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] { + %0 = tensor.pad %arg0 low[0, 0] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor to tensor %1 = tensor.extract_slice %0[2, 4] [%s1, 4] [1, 1] : tensor to tensor return %1 : tensor @@ -184,14 +184,14 @@ // CHECK: tensor.generate // CHECK: else // CHECK: %[[SLICE:.*]] = tensor.extract_slice -// CHECK: linalg.pad_tensor %[[SLICE]] low[0, 0] +// CHECK: tensor.pad %[[SLICE]] low[0, 0] func @dynamic_zero_low_padding(%arg0 : tensor, %pad : f32, %o1 : index, %o2 : index, %s1 : index, %s2 : index) -> tensor { - %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] { + %0 = tensor.pad %arg0 low[0, 0] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor to tensor %1 = tensor.extract_slice %0[%o1, %o2] [%s1, %s2] [1, 1] : tensor to tensor return %1 : tensor @@ -204,14 +204,14 @@ // CHECK: tensor.generate // CHECK: else // CHECK: %[[SLICE:.*]] = tensor.extract_slice -// CHECK: linalg.pad_tensor %[[SLICE]] low[%{{.*}}, %{{.*}}] high[0, 0] +// CHECK: tensor.pad %[[SLICE]] low[%{{.*}}, %{{.*}}] high[0, 0] func @dynamic_zero_high_padding(%arg0 : tensor, %pad : f32, %o1 : index, %o2 : index, %s1 : index, %s2 : index) -> tensor { - %0 = linalg.pad_tensor %arg0 low[7, 8] high[0, 0] { + %0 = tensor.pad %arg0 low[7, 8] high[0, 0] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor to tensor %1 = tensor.extract_slice %0[%o1, %o2] [%s1, %s2] [1, 1] : tensor to tensor return %1 : tensor diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir @@ -288,7 +288,7 @@ // CHECK: tensor.generate // CHECK: else // CHECK: tensor.extract_slice -// CHECK: linalg.pad_tensor +// CHECK: tensor.pad // CHECK: tensor.extract_slice // CHECK: tensor.extract_slice // CHECK: linalg.generic @@ -303,9 +303,9 @@ %d0 = tensor.dim %large_input, %c0 : tensor<64x128xf32> %d1 = tensor.dim %large_input, %c1 : tensor<64x128xf32> - %pad = linalg.pad_tensor %small_input low[4, 60] high[2, 67] { + %pad = tensor.pad %small_input low[4, 60] high[2, 67] { ^bb0(%arg0: index, %arg1: index): - linalg.yield %zero : f32 + tensor.yield %zero : f32 } : tensor<58x1xf32> to tensor<64x128xf32> %fill = linalg.fill(%zero, %large_input) : f32, tensor<64x128xf32> -> tensor<64x128xf32> diff --git a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir --- a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir +++ b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir @@ -23,7 +23,7 @@ // TILE2: tensor.generate // TILE2: else // TILE2: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// TILE2: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] +// TILE2: %[[PAD:.*]] = tensor.pad %[[SLICE]] // TILE2: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] // TILE2: return %[[RESULT]] @@ -43,15 +43,15 @@ // TILE1: tensor.generate // TILE1: else // TILE1: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// TILE1: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}] +// TILE1: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}] // TILE1: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1] // TILE1: return %[[RESULT]] func @dynamic_pad_tensor(%input_tensor: tensor, %pad_value: f32) -> tensor { - %0 = linalg.pad_tensor %input_tensor low[3, 4] high[5, 3] { + %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad_value : f32 + tensor.yield %pad_value : f32 } : tensor to tensor return %0 : tensor } @@ -71,7 +71,7 @@ // TILE2: tensor.generate // TILE2: else // TILE2: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// TILE2: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] +// TILE2: %[[PAD:.*]] = tensor.pad %[[SLICE]] // TILE2: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] // TILE2: return %[[RESULT]] @@ -86,15 +86,15 @@ // TILE1: tensor.generate // TILE1: else // TILE1: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1] -// TILE1: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}] +// TILE1: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}] // TILE1: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1] // TILE1: return %[[RESULT]] func @static_pad_tensor(%input_tensor: tensor<7x9xf32>, %pad_value: f32) -> tensor<15x16xf32> { - %0 = linalg.pad_tensor %input_tensor low[3, 4] high[5, 3] { + %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad_value : f32 + tensor.yield %pad_value : f32 } : tensor<7x9xf32> to tensor<15x16xf32> return %0 : tensor<15x16xf32> } @@ -112,7 +112,7 @@ // TILE1: scf.yield %[[GEN]] : tensor<14x3xf32> // TILE1: else // TILE1: %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32> -// TILE1: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] low[0, 0] high[7, %{{.*}}] +// TILE1: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}] // TILE1: scf.yield %[[PAD]] : tensor<14x3xf32> // TILE1: %[[R3:.*]] = tensor.insert_slice %[[R2]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32> // TILE1: scf.yield %[[R3]] : tensor<14x15xf32> @@ -120,9 +120,9 @@ func @static_pad_tile_evenly(%input_tensor: tensor<7x9xf32>, %output_tensor: tensor<14x15xf32>, %pad_value: f32) -> tensor<14x15xf32> { - %0 = linalg.pad_tensor %input_tensor low[0, 0] high[7, 6] { + %0 = tensor.pad %input_tensor low[0, 0] high[7, 6] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad_value : f32 + tensor.yield %pad_value : f32 } : tensor<7x9xf32> to tensor<14x15xf32> return %0 : tensor<14x15xf32> } diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -537,7 +537,7 @@ // CHECK-LABEL: func @pad_static( // CHECK-SAME: %[[ARG0:.*]]: tensor<2x?x2xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index // CHECK-DAG: %[[INIT:.*]] = linalg.init_tensor [2, 3, 4] : tensor<2x3x4xf32> @@ -547,9 +547,9 @@ // CHECK: %[[RESULT:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x3x2xf32>, tensor<2x3x4xf32> // CHECK: return %[[RESULT]] func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32> { - %0 = linalg.pad_tensor %arg0 low[0, 0, 2] high[0, 1, 0] { + %0 = tensor.pad %arg0 low[0, 0, 2] high[0, 1, 0] { ^bb0(%arg1: index, %arg2: index, %arg3: index): - linalg.yield %pad_value : f32 + tensor.yield %pad_value : f32 } : tensor<2x?x2xf32> to tensor<2x3x4xf32> return %0 : tensor<2x3x4xf32> } @@ -558,7 +558,7 @@ // CHECK-LABEL: func @pad_static_source( // CHECK-SAME: %[[ARG0:.*]]: tensor<2x5x2xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index // CHECK: %[[INIT:.*]] = linalg.init_tensor [2, 6, 4] : tensor<2x6x4xf32> @@ -568,9 +568,9 @@ // CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x5x2xf32>, tensor<2x6x4xf32> // CHECK: return %[[WRITE]] func @pad_static_source(%arg0: tensor<2x5x2xf32>, %pad_value: f32) -> tensor<2x6x4xf32> { - %0 = linalg.pad_tensor %arg0 low[0, 0, 2] high[0, 1, 0] { + %0 = tensor.pad %arg0 low[0, 0, 2] high[0, 1, 0] { ^bb0(%arg1: index, %arg2: index, %arg3: index): - linalg.yield %pad_value : f32 + tensor.yield %pad_value : f32 } : tensor<2x5x2xf32> to tensor<2x6x4xf32> return %0 : tensor<2x6x4xf32> } @@ -579,7 +579,7 @@ // CHECK-LABEL: func @pad_static_dynamic( // CHECK-SAME: %[[SRC:.*]]: tensor<1x2x2x?xf32>, %[[LOW:.*]]: index, %[[HIGH:.*]]: index -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index // CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index // CHECK-DAG: %[[C5:.*]] = arith.constant 5 : index @@ -596,9 +596,9 @@ // CHECK: return %[[RESULT]] func @pad_static_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index, %pad_value: f32) -> tensor<6x?x?x?xf32> { - %0 = linalg.pad_tensor %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] { + %0 = tensor.pad %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] { ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): - linalg.yield %pad_value : f32 + tensor.yield %pad_value : f32 } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> return %0 : tensor<6x?x?x?xf32> } @@ -607,7 +607,7 @@ // CHECK-LABEL: func @pad_and_transfer_read // CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C5:.*]] = arith.constant 5.0 // CHECK: %[[RESULT:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32> @@ -616,9 +616,9 @@ %c0 = arith.constant 0 : index %c5 = arith.constant 5.0 : f32 %c6 = arith.constant 6.0 : f32 - %0 = linalg.pad_tensor %arg0 low[0, 0] high[5, 7] { + %0 = tensor.pad %arg0 low[0, 0] high[5, 7] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %c5 : f32 + tensor.yield %c5 : f32 } : tensor<5x6xf32> to tensor<10x13xf32> %1 = vector.transfer_read %0[%c0, %c0], %c6 : tensor<10x13xf32>, vector<7x9xf32> @@ -631,7 +631,7 @@ // CHECK-LABEL: func @pad_and_transfer_write_static // CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32> // CHECK: %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[ARG0]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor<5x6xf32> @@ -640,9 +640,9 @@ %arg0: tensor<5x6xf32>) -> tensor<5x6xf32> { %c0 = arith.constant 0 : index %c5 = arith.constant 5.0 : f32 - %0 = linalg.pad_tensor %arg0 low[0, 0] high[5, 7] { + %0 = tensor.pad %arg0 low[0, 0] high[5, 7] { ^bb0(%arg2: index, %arg3: index): - linalg.yield %c5 : f32 + tensor.yield %c5 : f32 } : tensor<5x6xf32> to tensor<10x13xf32> %1 = call @make_vector() : () -> vector<7x9xf32> %2 = vector.transfer_write %1, %0[%c0, %c0] @@ -657,7 +657,7 @@ // CHECK-LABEL: func @pad_and_transfer_write_dynamic_static // CHECK-SAME: %[[ARG0:.*]]: tensor, %[[SIZE:.*]]: index, %[[PADDING:.*]]: index -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[SUB:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor to tensor // CHECK: %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32> @@ -669,9 +669,9 @@ %c5 = arith.constant 5.0 : f32 %s = tensor.extract_slice %arg0[0, 0] [%size, 6] [1, 1] : tensor to tensor - %0 = linalg.pad_tensor %s low[0, 0] high[%padding, 7] { + %0 = tensor.pad %s low[0, 0] high[%padding, 7] { ^bb0(%arg2: index, %arg3: index): - linalg.yield %c5 : f32 + tensor.yield %c5 : f32 } : tensor to tensor %1 = call @make_vector() : () -> vector<7x9xf32> %2 = vector.transfer_write %1, %0[%c0, %c0] @@ -686,7 +686,7 @@ // CHECK-LABEL: func @pad_and_insert_slice_source // CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C5:.*]] = arith.constant 5.0 // CHECK: %[[VEC0:.*]] = call @make_vector() : () -> tensor<12x13xf32> @@ -697,9 +697,9 @@ %arg0: tensor<5x6xf32>) -> tensor<12x13xf32> { %c0 = arith.constant 0 : index %c5 = arith.constant 5.0 : f32 - %0 = linalg.pad_tensor %arg0 low[0, 0] high[2, 3] { + %0 = tensor.pad %arg0 low[0, 0] high[2, 3] { ^bb0(%arg2: index, %arg3: index): - linalg.yield %c5 : f32 + tensor.yield %c5 : f32 } : tensor<5x6xf32> to tensor<7x9xf32> %1 = call @make_vector() : () -> tensor<12x13xf32> %r = tensor.insert_slice %0 into %1[0, 0][7, 9][1, 1] : tensor<7x9xf32> into tensor<12x13xf32> @@ -717,9 +717,9 @@ func @pad_and_insert_slice_dest( %arg0: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> { %c5 = arith.constant 5.0 : f32 - %0 = linalg.pad_tensor %arg0 low[0, 0, 0] high[0, 7, 7] { + %0 = tensor.pad %arg0 low[0, 0, 0] high[0, 7, 7] { ^bb0(%arg2: index, %arg3: index, %arg4: index): - linalg.yield %c5 : f32 + tensor.yield %c5 : f32 } : tensor<1x5x6xf32> to tensor<1x12x13xf32> %1 = call @make_vector() : () -> tensor<12x13xf32> %r = tensor.insert_slice %1 into %0[0, 0, 0][1, 12, 13][1, 1, 1] : tensor<12x13xf32> into tensor<1x12x13xf32> @@ -730,7 +730,7 @@ // CHECK-LABEL: func @pad_tensor_non_const_pad_value // CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index @@ -743,14 +743,14 @@ func @pad_tensor_non_const_pad_value(%arg0: tensor<5x6xf32>) -> tensor<12x13xf32> { %c0 = arith.constant 0 : index %c5 = arith.constant 5.0 : f32 - %0 = linalg.pad_tensor %arg0 low[3, 4] high[4, 3] { + %0 = tensor.pad %arg0 low[3, 4] high[4, 3] { ^bb0(%arg1: index, %arg2: index): %i1 = arith.index_cast %arg1 : index to i32 %i2 = arith.index_cast %arg2 : index to i32 %f1 = arith.sitofp %i1 : i32 to f32 %f2 = arith.sitofp %i2 : i32 to f32 %m = arith.mulf %f1, %f2 : f32 - linalg.yield %m : f32 + tensor.yield %m : f32 } : tensor<5x6xf32> to tensor<12x13xf32> return %0 : tensor<12x13xf32> } diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir --- a/mlir/test/Dialect/Tensor/canonicalize.mlir +++ b/mlir/test/Dialect/Tensor/canonicalize.mlir @@ -982,3 +982,199 @@ // CHECK-NEXT: return [[C3]] return %rank_0 : index } + +// ----- + +// CHECK-LABEL: func @pad_tensor_same_static_shape( +// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> +// CHECK-NOT: tensor.pad +// CHECK: return %[[ARG0]] +func @pad_tensor_same_static_shape(%arg0: tensor<5x6xf32>, %a: index) + -> tensor<5x6xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %0 = tensor.pad %arg0 low[%a, 0] high[0, %a] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %cst : f32 + } : tensor<5x6xf32> to tensor<5x6xf32> + return %0 : tensor<5x6xf32> +} + +// ----- + +// CHECK-LABEL: func @pad_tensor_nofold_same_static_shape( +// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> +// CHECK: %[[PAD:.*]] = tensor.pad +// CHECK: return %[[PAD]] +func @pad_tensor_nofold_same_static_shape(%arg0: tensor<5x6xf32>, %a: index) + -> tensor<5x6xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %0 = tensor.pad %arg0 nofold low[%a, 0] high[0, %a] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %cst : f32 + } : tensor<5x6xf32> to tensor<5x6xf32> + return %0 : tensor<5x6xf32> +} + +// ----- + +// CHECK-LABEL: func @pad_tensor_after_cast_different_shape( +// CHECK-SAME: %[[INPUT:.*]]: tensor) -> tensor { +// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[PADDED:.*]] = tensor.pad %[[INPUT]] +// CHECK-SAME: low[0, 0, 1, 1] high[0, 0, 1, 1] { +// CHECK: ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index): +// CHECK: tensor.yield %[[CST]] : f32 +// CHECK: } : tensor to tensor +// CHECK: %[[DYNAMIC:.*]] = tensor.cast %[[PADDED:.*]] : +// CHECK-SAME: tensor to tensor +// CHECK: return %[[DYNAMIC]] : tensor +// CHECK: } +func @pad_tensor_after_cast_different_shape(%arg0: tensor) + -> tensor { + %cst = arith.constant 0.000000e+00 : f32 + %dynamic = tensor.cast %arg0 : tensor to tensor + %padded = tensor.pad %dynamic low[0, 0, 1, 1] high[0, 0, 1, 1] { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): + tensor.yield %cst: f32 + } : tensor to tensor + return %padded: tensor +} + +// ----- + +// CHECK-LABEL: func @pad_tensor_after_cast_same_shape( +// CHECK-SAME: %[[INPUT:.*]]: tensor, +// CHECK-SAME: %[[PADDING:.*]]: index) -> tensor { +// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[PADDED:.*]] = tensor.pad %[[INPUT]] +// CHECK-SAME: low[0, %[[PADDING]], 1, 1] high[0, %[[PADDING]], 1, 1] { +// CHECK: ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index): +// CHECK: tensor.yield %[[CST]] : f32 +// CHECK: } : tensor to tensor +// CHECK: return %[[PADDED:.*]] : tensor +// CHECK: } +func @pad_tensor_after_cast_same_shape(%arg0: tensor, %padding : index) + -> tensor { + %cst = arith.constant 0.000000e+00 : f32 + %dynamic = tensor.cast %arg0 : tensor to tensor + %padded = tensor.pad %dynamic low[0, %padding, 1, 1] high[0, %padding, 1, 1] { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): + tensor.yield %cst: f32 + } : tensor to tensor + return %padded: tensor +} + +// ----- + +// CHECK-LABEL: func @pad_tensor_of_cast( +// CHECK-NOT: tensor.cast +// CHECK: tensor.pad +// CHECK: tensor<8x?xf32> to tensor<8x32xf32> +func @pad_tensor_of_cast(%t: tensor<8x?xf32>, %s: index) -> tensor<8x32xf32> { + %c0 = arith.constant 0 : index + %cst = arith.constant 0.000000e+00 : f32 + %0 = tensor.cast %t : tensor<8x?xf32> to tensor + %1 = tensor.pad %0 low[%c0, %c0] high[%c0, %s] { + ^bb0(%arg9: index, %arg10: index): + tensor.yield %cst : f32 + } : tensor to tensor<8x32xf32> + return %1 : tensor<8x32xf32> +} + +// ----- + +// CHECK-LABEL: @cast_of_pad_more_static +func @cast_of_pad_more_static(%arg0: tensor, %padding: index) -> tensor<32x32xf32> { + %cst = arith.constant 0.000000e+00 : f32 + // CHECK: %[[PAD:.*]] = tensor.pad + // CHECK: tensor to tensor<32x32xf32> + %padded = tensor.pad %arg0 low[%padding, %padding] high[0, 0] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %cst : f32 + } : tensor to tensor + // CHECK-NOT: tensor.cast + %casted = tensor.cast %padded : tensor to tensor<32x32xf32> + // CHECK: return %[[PAD]] + return %casted : tensor<32x32xf32> +} + +// ----- + +// CHECK-LABEL: @cast_of_pad_less_static +func @cast_of_pad_less_static(%arg0: tensor<32x?x?xf32>, %padding: index) -> tensor { + %cst = arith.constant 0.000000e+00 : f32 + // CHECK: tensor.pad + %padded = tensor.pad %arg0 low[%padding, %padding, %padding] high[0, 0, 0] { + ^bb0(%arg1: index, %arg2: index, %arg3: index): + tensor.yield %cst : f32 + } : tensor<32x?x?xf32> to tensor<32x?x?xf32> + // CHECK: %[[CAST:.*]] = tensor.cast + %casted = tensor.cast %padded : tensor<32x?x?xf32> to tensor + // CHECK: return %[[CAST]] + return %casted : tensor +} + +// ----- + +func @tensor_pad_cast_fold(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> { + %c0 = arith.constant 0 : index + %cst = arith.constant 0.0 : f32 + %0 = tensor.cast %arg0 : tensor<4x4xf32> to tensor + %1 = tensor.pad %0 low[%c0, %c0] high[%c0, %c0] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %cst : f32 + } : tensor to tensor<4x4xf32> + return %1 : tensor<4x4xf32> +} +// CHECK-LABEL: @tensor_pad_cast +// CHECK-SAME: %[[ARG0:.+]]: tensor<4x4xf32> +// CHECK: return %[[ARG0]] + +// ----- + +// CHECK-LABEL: func @fold_pad_tensor_source_cast( +// CHECK-SAME: %[[ARG0:.*]]: tensor<4x?xf32> +// CHECK-NOT: tensor.cast +// CHECK: %[[RESULT:.*]] = tensor.pad %[[ARG0]] +func @fold_pad_tensor_source_cast(%arg0: tensor<4x?xf32>) -> tensor<4x4xf32> { + %cst = arith.constant 0.0 : f32 + %0 = tensor.cast %arg0 : tensor<4x?xf32> to tensor + %1 = tensor.pad %0 low[0, 0] high[0, 1] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %cst : f32 + } : tensor to tensor<4x4xf32> + return %1 : tensor<4x4xf32> +} + +// ----- + +// CHECK-LABEL: func @pad_static_zero_cast( +// CHECK-SAME: %[[ARG0:.*]]: tensor +// CHECK-NOT: tensor.pad +// CHECK: %[[RESULT:.*]] = tensor.cast %[[ARG0]] : tensor to tensor<2x3x4xf32> +// CHECK: return %[[RESULT]] +func @pad_static_zero_cast(%arg0: tensor, %pad_value: f32) -> tensor<2x3x4xf32> { + %c0 = arith.constant 0 : index + %0 = tensor.pad %arg0 low[0, %c0, 0] high[0, 0, %c0] { + ^bb0(%arg1: index, %arg2: index, %arg3: index): + tensor.yield %pad_value : f32 + } : tensor to tensor<2x3x4xf32> + + return %0 : tensor<2x3x4xf32> +} + +// ----- + +// CHECK-LABEL: func @pad_nofold_static_zero( +// CHECK-SAME: %[[ARG0:.*]]: tensor +// CHECK: %[[PAD:.*]] = tensor.pad +// CHECK: return %[[PAD]] +func @pad_nofold_static_zero(%arg0: tensor, %pad_value: f32) -> tensor<2x3x4xf32> { + %c0 = arith.constant 0 : index + %0 = tensor.pad %arg0 nofold low[0, %c0, 0] high[0, 0, %c0] { + ^bb0(%arg1: index, %arg2: index, %arg3: index): + tensor.yield %pad_value : f32 + } : tensor to tensor<2x3x4xf32> + + return %0 : tensor<2x3x4xf32> +} diff --git a/mlir/test/Dialect/Tensor/invalid.mlir b/mlir/test/Dialect/Tensor/invalid.mlir --- a/mlir/test/Dialect/Tensor/invalid.mlir +++ b/mlir/test/Dialect/Tensor/invalid.mlir @@ -317,3 +317,58 @@ %0 = tensor.insert_slice %arg0 into %arg1[0, 0] [%arg2, %arg3] [1, 1] : tensor into tensor return } + +// ----- + + +func @pad_result_type(%arg0: tensor, %arg1: index, %arg2: i32) -> tensor { + // expected-error @+1 {{specified type 'tensor' does not match the inferred type 'tensor}} + %0 = tensor.pad %arg0 low[1, %arg1, 2, 2] high[1, 2, %arg1, 3] { + ^bb0(%arg3: index, %arg4: index): + tensor.yield %arg2 : i32 + } : tensor to tensor + return %0 : tensor +} + +// ----- + +func @pad_number_of_block_args(%arg0: tensor, %arg1: i32) -> tensor { + // expected-error @+1 {{expected the block to have 2 arguments}} + %0 = tensor.pad %arg0 low[1, 2] high[2, 3] { + ^bb0(%arg2: index, %arg3: index, %arg4: index): + tensor.yield %arg1 : i32 + } : tensor to tensor + return %0 : tensor +} + +// ----- + +func @pad_no_block(%arg0: tensor, %arg1: i32) -> tensor { + // expected-error @+1 {{op region #0 ('region') failed to verify constraint: region with 1 blocks}} + %0 = tensor.pad %arg0 low[1, 2] high[2, 3] { + } : tensor to tensor + return %0 : tensor +} + +// ----- + +func @pad_block_args(%arg0: tensor, %arg1: i32) -> tensor { + // expected-error @+1 {{op expected block argument 1 to be an index}} + %0 = tensor.pad %arg0 low[1, 2] high[2, 3] { + ^bb0(%arg2: i32, %arg3: i32): + tensor.yield %arg1 : i32 + } : tensor to tensor + return %0 : tensor +} + +// ----- + +func @pad_yield_type(%arg0: tensor, %arg1: i8) -> tensor { + // expected-error @+1 {{op expected yield type to match shape element type}} + %0 = tensor.pad %arg0 low[1, 2] high[2, 3] { + ^bb0(%arg2: index, %arg3: index): + tensor.yield %arg1 : i8 + } : tensor to tensor + return %0 : tensor +} + diff --git a/mlir/test/Dialect/Tensor/ops.mlir b/mlir/test/Dialect/Tensor/ops.mlir --- a/mlir/test/Dialect/Tensor/ops.mlir +++ b/mlir/test/Dialect/Tensor/ops.mlir @@ -176,3 +176,77 @@ %1 = tensor.rank %t : tensor<4x4x?xf32> return } + +// ----- + +func @pad_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index, + %pad_value: f32) -> tensor<6x?x?x?xf32> { + %0 = tensor.pad %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): + tensor.yield %pad_value : f32 + } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> + return %0 : tensor<6x?x?x?xf32> +} +// CHECK-LABEL: func @pad_dynamic +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]] +// CHECK-SAME: %[[LOW:[a-zA-Z0-9_]*]] +// CHECK-SAME: %[[HIGH:[a-zA-Z0-9_]*]] +// CHECK: tensor.pad %[[ARG0]] +// CHECK-SAME: low[2, %[[LOW]], 3, 3] +// CHECK-SAME: high[3, 3, %[[HIGH]], 2] +// CHECK: : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> + +// ----- + +func @pad_static(%arg0: tensor<3x4xf32>, %pad_value: f32) -> tensor<6x9xf32> { + %0 = tensor.pad %arg0 low[1, 2] high[2, 3] { + ^bb0(%arg1 : index, %arg2 : index): + tensor.yield %pad_value : f32 + } : tensor<3x4xf32> to tensor<6x9xf32> + return %0 : tensor<6x9xf32> +} +// CHECK-LABEL: func @pad_static +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]] +// CHECK: tensor.pad %[[ARG0]] low[1, 2] high[2, 3] +// CHECK: : tensor<3x4xf32> to tensor<6x9xf32> + +// ----- + +func @pad_asymmetrical(%arg0: tensor<2x3xf32>, %ub0: index, %ub1: index, + %pad_value: f32) -> tensor { + %0 = tensor.pad %arg0 low[0, 0] high[%ub0, %ub1] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %pad_value : f32 + } : tensor<2x3xf32> to tensor + return %0 : tensor +} +// CHECK-LABEL: func @pad_asymmetrical +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]] +// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]*]] +// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]*]] +// CHECK: tensor.pad %[[ARG0]] +// CHECK-SAME: low[0, 0] +// CHECK-SAME: high[%[[UB0]], %[[UB1]]] +// CHECK: : tensor<2x3xf32> to tensor + +// ----- + +func @pad_to_static_size(%arg0: tensor, %ub0: index, %ub1: index, + %pad_value: f32) -> tensor<2x3xf32> { + %0 = tensor.pad %arg0 low[0, 0] high[%ub0, %ub1] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %pad_value : f32 + } : tensor to tensor<2x3xf32> + return %0 : tensor<2x3xf32> +} +// CHECK-LABEL: func @pad_to_static_size +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]] +// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]*]] +// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]*]] +// CHECK: tensor.pad %[[ARG0]] +// CHECK-SAME: low[0, 0] +// CHECK-SAME: high[%[[UB0]], %[[UB1]]] +// CHECK: : tensor to tensor<2x3xf32> + +// ----- + diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir @@ -21,9 +21,9 @@ %8 = affine.apply #map1(%arg3, %c0)[%c2] %9 = tensor.extract_slice %arg1[%arg3] [2] [1] : tensor<64xf32> to tensor<2xf32> %10 = tensor.cast %9 : tensor<2xf32> to tensor - %11 = linalg.pad_tensor %10 low[%c0] high[%c0] { + %11 = tensor.pad %10 low[%c0] high[%c0] { ^bb0(%arg5: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<2xf32> %12 = tensor.insert_slice %11 into %arg4[%8, 0] [1, 2] [1, 1] : tensor<2xf32> into tensor scf.yield %12 : tensor @@ -38,9 +38,9 @@ %8 = affine.apply #map1(%arg3, %c0)[%c2] %9 = tensor.extract_slice %arg0[%arg3] [2] [1] : tensor<64xf32> to tensor<2xf32> %10 = tensor.cast %9 : tensor<2xf32> to tensor - %11 = linalg.pad_tensor %10 low[%c0] high[%c0] { + %11 = tensor.pad %10 low[%c0] high[%c0] { ^bb0(%arg5: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<2xf32> %12 = tensor.insert_slice %11 into %arg4[%8, 0] [1, 2] [1, 1] : tensor<2xf32> into tensor scf.yield %12 : tensor diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir @@ -13,9 +13,9 @@ %offset = arith.constant 2 : index %cst = arith.constant 2.3 : f32 %c0 = arith.constant 0 : index - %out = linalg.pad_tensor %dynamic low[%c0, %offset, %c0] high[%c0, %c0, %offset] { + %out = tensor.pad %dynamic low[%c0, %offset, %c0] high[%c0, %c0, %offset] { ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<1x?x3xf32> to tensor<1x?x?xf32> %unranked = tensor.cast %out: tensor<1x?x?xf32> to tensor<*xf32> call @print_memref_f32(%unranked) : (tensor<*xf32>) -> () diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp --- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp @@ -42,6 +42,7 @@ memref::MemRefDialect, scf::SCFDialect, StandardOpsDialect, + linalg::LinalgDialect, vector::VectorDialect, gpu::GPUDialect>(); // clang-format on @@ -549,20 +550,20 @@ funcOp.getContext(), LinalgTransformationFilter() .addOpFilter()); - populatePadTensorOpVectorizationPatterns(patterns); + populatePadOpVectorizationPatterns(patterns); populateConvolutionVectorizationPatterns(patterns); (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); } static void applyPadTensorToGenericPatterns(FuncOp funcOp) { RewritePatternSet patterns(funcOp.getContext()); - patterns.add(funcOp.getContext()); + patterns.add(funcOp.getContext()); (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); } static void applyGeneralizePadTensorPatterns(FuncOp funcOp) { RewritePatternSet patterns(funcOp.getContext()); - patterns.add(funcOp.getContext()); + patterns.add(funcOp.getContext()); (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); } diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -4280,6 +4280,7 @@ ":InferTypeOpInterfaceTdFiles", ":OpBaseTdFiles", ":SideEffectInterfacesTdFiles", + ":TilingInterfaceTdFiles", ":ViewLikeInterfaceTdFiles", ], ) @@ -4336,6 +4337,7 @@ ":StandardOps", ":Support", ":TensorOpsIncGen", + ":TilingInterface", ":ViewLikeInterface", "//llvm:Support", ], @@ -4356,6 +4358,38 @@ ], ) +cc_library( + name = "TensorTilingInterfaceImpl", + srcs = ["lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp"], + hdrs = ["include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"], + includes = ["include"], + deps = [ + ":Affine", + ":IR", + ":LinalgOps", + ":SCFDialect", + ":StandardOps", + ":TensorDialect", + ":TilingInterface", + "//llvm:Support", + ], +) + +cc_library( + name = "TensorUtils", + srcs = ["lib/Dialect/Tensor/Utils/Utils.cpp"], + hdrs = ["include/mlir/Dialect/Tensor/Utils/Utils.h"], + includes = ["include"], + deps = [ + ":Affine", + ":ArithmeticDialect", + ":IR", + ":Support", + ":TensorDialect", + "//llvm:Support", + ], +) + gentbl_cc_library( name = "TensorPassIncGen", strip_include_prefix = "include", @@ -5634,6 +5668,7 @@ ":StandardToSPIRV", ":TensorDialect", ":TensorInferTypeOpInterfaceImpl", + ":TensorTilingInterfaceImpl", ":TensorTransforms", ":TosaDialect", ":TosaToLinalg", @@ -6911,6 +6946,7 @@ ":Support", ":TensorBufferizableOpInterfaceImpl", ":TensorDialect", + ":TensorUtils", ":TransformUtils", ":VectorBufferizableOpInterfaceImpl", ":VectorOps", @@ -6957,7 +6993,6 @@ deps = [ ":IR", ":Support", - ":TensorDialect", ":TilingInterfaceIncGen", ":ViewLikeInterface", "//llvm:Support", @@ -7260,6 +7295,7 @@ ":SCFDialect", ":StandardOps", ":TensorDialect", + ":TensorUtils", ":TosaDialect", ":Transforms", ],