diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -367,7 +367,8 @@ let dependentDialects = [ "memref::MemRefDialect", "StandardOpsDialect", - "scf::SCFDialect" + "scf::SCFDialect", + "tensor::TensorDialect" ]; } @@ -504,7 +505,7 @@ def TosaToStandard : Pass<"tosa-to-standard"> { let summary = "Lower TOSA to the Standard dialect"; - let dependentDialects = ["StandardOpsDialect"]; + let dependentDialects = ["StandardOpsDialect", "tensor::TensorDialect"]; let description = [{ Pass that converts TOSA operations to the equivalent operations using the operations in the Standard dialect. diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td @@ -579,12 +579,11 @@ Tensor-based version: - The body region of the loop contains `subtensor` operations applied to + The body region of the loop contains `extract_slice` operations applied to every tensor argument of TiledLoopOp. The body region must contain exactly one block that terminates with - `linalg.yield` with the operands resulting from `subtensor_insert` - operations. + `linalg.yield` with the operands resulting from `insert_slice` operations. Example: @@ -594,16 +593,16 @@ outs(%out : tensor<24x64xi8>) iterators("parallel") distribution("block_x") { - %lhs_sub = subtensor %lhs[%i, 0] [%c4, %c64] [1, 1] + %lhs_sub = tensor.extract_slice %lhs[%i, 0] [%c4, %c64] [1, 1] : tensor<24x64xi8> to tensor - %rhs_sub = subtensor %rhs[%i, 0] [%c4, %c64] [1, 1] + %rhs_sub = tensor.extract_slice %rhs[%i, 0] [%c4, %c64] [1, 1] : tensor<24x64xi8> to tensor - %out_sub = subtensor %out[%i, 0] [%c4, %c64] [1, 1] + %out_sub = tensor.extract_slice %out[%i, 0] [%c4, %c64] [1, 1] : tensor<24x64xi8> to tensor %result_sub = linalg.generic ... - %result = subtensor_insert %result_sub into %out[%i, 0][%c4, %c64][1, 1] + %result = tensor.insert_slice %result_sub into %out[%i, 0][%c4, %c64][1, 1] : tensor into tensor<24x64xi8> linalg.yield %result : tensor<24x64xi8> } diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h @@ -47,7 +47,7 @@ /// If hoistPaddingOnTensors is called with `nLoops` = 2 on the following IR. /// ``` /// scf.for (%i, %j, %k) -/// %st0 = subtensor f(%i, %k) : ... to tensor +/// %st0 = tensor.extract_slice f(%i, %k) : ... to tensor /// %0 = linalg.pad_tensor %st0 low[0, 0] high[...] { /// ^bb0( ... ): /// linalg.yield %pad @@ -61,16 +61,17 @@ /// scf.for (%i) { /// %packed_init = linalg.init_tensor range(%j) : tensor /// %packed = scf.for (%k) iter_args(%p : %packed_init) { -/// %st0 = subtensor f(%i, %k) : ... to tensor +/// %st0 = tensor.extract_slice f(%i, %k) : ... to tensor /// %0 = linalg.pad_tensor %st0 low[0, 0] high[...] { /// ^bb0( ... ): /// linalg.yield %pad /// } : tensor to tensor<4x8xf32> -/// %1 = subtensor_insert %0 ... : tensor<4x8xf32> to tensor +/// %1 = tensor.insert_slice %0 ... +/// : tensor<4x8xf32> to tensor /// scf.yield %1: tensor /// } -> tensor /// scf.for (%j, %k) { -/// %st0 = subtensor %packed [%k, 0, 0][1, 4, 8][1, 1, 1] : +/// %st0 = tensor.extract_slice %packed [%k, 0, 0][1, 4, 8][1, 1, 1] : /// tensor to tensor<4x8xf32> /// compute(%st0) /// } diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -12,6 +12,7 @@ #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SCF/Utils.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/IR/Identifier.h" #include "mlir/IR/PatternMatch.h" @@ -1077,12 +1078,12 @@ const FrozenRewritePatternSet &stage2Patterns, function_ref stage3Lambda = nullptr); -/// Rewrite subtensor(pad_tensor(x)) into pad_tensor(subtensor(x)). -struct SubTensorOfPadTensorSwapPattern - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; +/// Rewrite extract_slice(pad_tensor(x)) into pad_tensor(extract_slice(x)). +struct ExtractSliceOfPadTensorSwapPattern + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(SubTensorOp subTensorOp, + LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const override; }; diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -78,7 +78,7 @@ bool isFusableInto(const LinalgDependenceGraph &graph, LinalgOp consumer, Value consumedView, LinalgOp producer); -/// Creates subtensor/subview ops for all `tiledOperands` of the given +/// Creates extract_slice/subview ops for all `tiledOperands` of the given /// `linalgOp` with `builder`, assuming `linalgOp` is being fused into a loop /// nest for tiling with the given induction variables `ivs` and tile sizes /// `tileSizes`. `sizeBounds` are the iteration space bounds for *all* the @@ -118,15 +118,17 @@ const LinalgDependenceGraph &graph); /// Tensor counterpart of `fuseProducerOfBuffer`. /// This implements the fusion part of the "tileAndFuse on tensors" -/// transformation and thus requires the `consumerOpOperand` to be a `subtensor` -/// op (generally obtained by applying the tiling transformation). +/// transformation and thus requires the `consumerOpOperand` to be a +/// `extract_slice` op (generally obtained by applying the tiling +/// transformation). Optional fuseProducerOfTensor(OpBuilder &b, OpOperand &consumerOpOperand); /// Tensor counterpart of `fuseProducerOfBuffer`. /// This implements the fusion part of the "tileAndFuse on tensors" -/// transformation and thus requires the `consumerOpOperand` to be a `subtensor` -/// op (generally obtained by applying the tiling transformation). -/// Assumes `producerOfTensor` is a Linalg op that produces `consumerOpOperand`. +/// transformation and thus requires the `consumerOpOperand` to be a +/// `extract_slice` op (generally obtained by applying the tiling +/// transformation). Assumes `producerOfTensor` is a Linalg op that produces +/// `consumerOpOperand`. Optional fuseProducerOfTensor(OpBuilder &b, OpResult producerOpResult, OpOperand &consumerOpOperand); diff --git a/mlir/include/mlir/Dialect/Shape/IR/Shape.h b/mlir/include/mlir/Dialect/Shape/IR/Shape.h --- a/mlir/include/mlir/Dialect/Shape/IR/Shape.h +++ b/mlir/include/mlir/Dialect/Shape/IR/Shape.h @@ -14,6 +14,7 @@ #ifndef MLIR_SHAPE_IR_SHAPE_H #define MLIR_SHAPE_IR_SHAPE_H +#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td --- a/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td +++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td @@ -35,6 +35,7 @@ }]; let cppNamespace = "::mlir::shape"; + let dependentDialects = ["tensor::TensorDialect"]; let hasConstantMaterializer = 1; let hasOperationAttrVerify = 1; diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h @@ -23,7 +23,6 @@ #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/SideEffectInterfaces.h" #include "mlir/Interfaces/VectorInterfaces.h" -#include "mlir/Interfaces/ViewLikeInterface.h" // Pull in all enum type definitions and utility function declarations. #include "mlir/Dialect/StandardOps/IR/OpsEnums.h.inc" @@ -34,12 +33,6 @@ class FuncOp; class OpBuilder; class PatternRewriter; - -/// Return the list of Range (i.e. offset, size, stride). Each Range -/// entry contains either the dynamic value or a ConstantIndexOp constructed -/// with `b` at location `loc`. -SmallVector getOrCreateRanges(OffsetSizeAndStrideOpInterface op, - OpBuilder &b, Location loc); } // namespace mlir #define GET_OP_CLASSES diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -21,7 +21,6 @@ include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/Interfaces/VectorInterfaces.td" -include "mlir/Interfaces/ViewLikeInterface.td" def StandardOps_Dialect : Dialect { let name = "std"; @@ -1754,245 +1753,6 @@ let hasCanonicalizer = 1; } -//===----------------------------------------------------------------------===// -// SubTensorOp -//===----------------------------------------------------------------------===// - -def SubTensorOp : BaseOpWithOffsetSizesAndStrides< - StandardOps_Dialect, "subtensor", [NoSideEffect, AttrSizedOperandSegments, - OffsetSizeAndStrideOpInterface]> { - let summary = "subtensor operation"; - let description = [{ - The "subtensor" operation extract a tensor from another tensor as - specified by the operation's offsets, sizes and strides arguments. - - The subtensor operation supports the following arguments: - - * source: the "base" tensor from which to extract a subtensor. - * offsets: tensor-rank number of offsets into the "base" tensor from which - to extract the subtensor. - * sizes: tensor-rank number of sizes which specify the sizes of the result - tensor type. - * strides: tensor-rank number of strides specifying subsampling in each - dimension. - - The representation based on offsets, sizes and strides support a - partially-static specification via attributes specified through the - `static_offsets`, `static_sizes` and `static_strides` arguments. A special - sentinel value ShapedType::kDynamicSize and - ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has - a dynamic value. - - After buffer-allocation, the "subtensor" op is expected to lower into a - "subview" op. - - A subtensor operation may additionally reduce the rank of the resulting - tensor by removing dimensions that are statically known to be of size 1. - - Example: - - ``` - // Rank-reducing subtensor. - %1 = subtensor %0[0, 0, 0][1, 16, 4][1, 1, 1] : - tensor<8x16x4xf32> to tensor<16x4xf32> - %3 = subtensor %2[3, 4, 2][1, 6, 3][1, 1, 1] : - tensor<8x16x4xf32> to tensor<6x3xf32> - ``` - }]; - - let arguments = (ins - AnyRankedTensor:$source, - Variadic:$offsets, - Variadic:$sizes, - Variadic:$strides, - I64ArrayAttr:$static_offsets, - I64ArrayAttr:$static_sizes, - I64ArrayAttr:$static_strides - ); - let results = (outs AnyRankedTensor:$result); - - let assemblyFormat = [{ - $source `` - custom($offsets, $static_offsets) - custom($sizes, $static_sizes) - custom($strides, $static_strides) - attr-dict `:` type($source) `to` type($result) - }]; - - let builders = [ - // Build a SubTensorOp with mixed static and dynamic entries and inferred - // result type. - OpBuilder<(ins "Value":$source, "ArrayRef":$offsets, - "ArrayRef":$sizes, "ArrayRef":$strides, - CArg<"ArrayRef", "{}">:$attrs)>, - // Build a SubTensorOp with mixed static and dynamic entries and custom - // result type. If the type passed is nullptr, it is inferred. - OpBuilder<(ins "RankedTensorType":$resultType, "Value":$source, - "ArrayRef":$offsets, "ArrayRef":$sizes, - "ArrayRef":$strides, - CArg<"ArrayRef", "{}">:$attrs)>, - // Build a SubTensorOp with dynamic entries and custom result type. If the - // type passed is nullptr, it is inferred. - OpBuilder<(ins "Value":$source, "ValueRange":$offsets, - "ValueRange":$sizes, "ValueRange":$strides, - CArg<"ArrayRef", "{}">:$attrs)>, - // Build a SubTensorOp with dynamic entries and inferred result type. - OpBuilder<(ins "RankedTensorType":$resultType, "Value":$source, - "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides, - CArg<"ArrayRef", "{}">:$attrs)> - ]; - - let extraClassDeclaration = extraBaseClassDeclaration # [{ - /// Returns the type of the base tensor operand. - RankedTensorType getSourceType() { - return source().getType().cast(); - } - - /// The result of a subtensor is always a tensor. - RankedTensorType getType() { - return getResult().getType().cast(); - } - - /// A subtensor result type can be fully inferred from the source type and - /// the static representation of offsets, sizes and strides. Special - /// sentinels encode the dynamic case. - static Type inferResultType(RankedTensorType sourceRankedTensorType, - ArrayRef staticOffsets, - ArrayRef staticSizes, - ArrayRef staticStrides); - static Type inferResultType(RankedTensorType sourceRankedTensorType, - ArrayRef staticOffsets, - ArrayRef staticSizes, - ArrayRef staticStrides); - static Type inferRankReducedResultType(unsigned resultRank, - RankedTensorType sourceRankedTensorType, - ArrayRef staticOffsets, - ArrayRef staticSizes, - ArrayRef staticStrides); - static Type inferRankReducedResultType(unsigned resultRank, - RankedTensorType sourceRankedTensorType, - ArrayRef staticOffsets, - ArrayRef staticSizes, - ArrayRef staticStrides); - - /// Return the expected rank of each of the`static_offsets`, `static_sizes` - /// and `static_strides` attributes. - std::array getArrayAttrMaxRanks() { - unsigned rank = getSourceType().getRank(); - return {rank, rank, rank}; - } - - /// Return the number of leading operands before the `offsets`, `sizes` and - /// and `strides` operands. - static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; } - }]; - - let hasCanonicalizer = 1; - let hasFolder = 1; -} - -//===----------------------------------------------------------------------===// -// SubTensorInsertOp -//===----------------------------------------------------------------------===// - -def SubTensorInsertOp : BaseOpWithOffsetSizesAndStrides< - StandardOps_Dialect, "subtensor_insert", - [NoSideEffect, AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface, - TypesMatchWith<"expected result type to match dest type", - "dest", "result", "$_self">]> { - let summary = "subtensor_insert operation"; - let description = [{ - The "subtensor_insert" operation insert a tensor `source` into another - tensor `dest` as specified by the operation's offsets, sizes and strides - arguments. - - It returns a copy of `dest` with the proper subtensor updated with the value - of `source`. - - The subtensor_insert operation has the encodes the following information: - - * source: the tensor that is inserted. - * dest: the tensor into which the source tensor is inserted. - * offsets: tensor-rank number of offsets into the "base" tensor from which - to extract the subtensor. - * sizes: tensor-rank number of sizes which specify the sizes of the result - tensor type. - * strides: tensor-rank number of strides that specify subsampling in each - dimension. - - The representation based on offsets, sizes and strides support a - partially-static specification via attributes specified through the - `static_offsets`, `static_sizes` and `static_strides` arguments. A special - sentinel value ShapedType::kDynamicSize and - ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has - a dynamic value. - - After buffer-allocation, the "subtensor_insert" op is expected to become - an in-place buffer update. - }]; - - let arguments = (ins - AnyRankedTensor:$source, - AnyRankedTensor:$dest, - Variadic:$offsets, - Variadic:$sizes, - Variadic:$strides, - I64ArrayAttr:$static_offsets, - I64ArrayAttr:$static_sizes, - I64ArrayAttr:$static_strides - ); - let results = (outs AnyRankedTensor:$result); - - let assemblyFormat = [{ - $source `into` $dest `` - custom($offsets, $static_offsets) - custom($sizes, $static_sizes) - custom($strides, $static_strides) - attr-dict `:` type($source) `into` type($dest) - }]; - - let verifier = ?; - - let builders = [ - // Build a SubTensorInsertOp with mixed static and dynamic entries. - OpBuilder<(ins "Value":$source, "Value":$dest, - "ArrayRef":$offsets, "ArrayRef":$sizes, - "ArrayRef":$strides, - CArg<"ArrayRef", "{}">:$attrs)>, - // Build a SubTensorInsertOp with dynamic entries. - OpBuilder<(ins "Value":$source, "Value":$dest, - "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides, - CArg<"ArrayRef", "{}">:$attrs)> - ]; - - let extraClassDeclaration = extraBaseClassDeclaration # [{ - /// Returns the type of the base tensor operand. - RankedTensorType getSourceType() { - return source().getType().cast(); - } - - /// The result of a subtensor_insert is always a tensor. - RankedTensorType getType() { - return getResult().getType().cast(); - } - - /// Return the expected rank of each of the`static_offsets`, `static_sizes` - /// and `static_strides` attributes. - std::array getArrayAttrMaxRanks() { - unsigned rank = getType().getRank(); - return {rank, rank, rank}; - } - - /// Return the number of leading operands before the `offsets`, `sizes` and - /// and `strides` operands. - static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 2; } - }]; - - let hasCanonicalizer = 1; - let hasFolder = 1; -} - - //===----------------------------------------------------------------------===// // SwitchOp //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h --- a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h +++ b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h @@ -16,6 +16,21 @@ #include "mlir/Interfaces/CastInterfaces.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/SideEffectInterfaces.h" +#include "mlir/Interfaces/ViewLikeInterface.h" + +//===----------------------------------------------------------------------===// +// Tensor Dialect Helpers +//===----------------------------------------------------------------------===// + +namespace mlir { + +/// Return the list of Range (i.e. offset, size, stride). Each Range +/// entry contains either the dynamic value or a ConstantIndexOp constructed +/// with `b` at location `loc`. +SmallVector getOrCreateRanges(OffsetSizeAndStrideOpInterface op, + OpBuilder &b, Location loc); + +} // namespace mlir //===----------------------------------------------------------------------===// // Tensor Dialect @@ -41,8 +56,8 @@ /// source tensor. This is useful to fold a tensor.cast into a consuming op and /// implement canonicalization patterns for ops in different dialects that may /// consume the results of tensor.cast operations. Such foldable tensor.cast -/// operations are typically inserted as `subtensor` ops and are canonicalized, -/// to preserve the type compatibility of their uses. +/// operations are typically inserted as `extract_slice` ops and are +/// canonicalized, to preserve the type compatibility of their uses. /// /// Returns true when all conditions are met: /// 1. source and result are ranked tensors with same element type and rank. @@ -64,7 +79,6 @@ /// Performs folding of any operand of `op` if it comes from a tensor::CastOp /// that can be folded. LogicalResult foldTensorCast(Operation *op); - } // namespace tensor } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td --- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td +++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td @@ -13,6 +13,7 @@ include "mlir/Interfaces/CastInterfaces.td" include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/Interfaces/ViewLikeInterface.td" class Tensor_Op traits = []> : Op { @@ -99,6 +100,144 @@ let hasFolder = 1; } + +//===----------------------------------------------------------------------===// +// ExtractSliceOp +//===----------------------------------------------------------------------===// + +def Tensor_ExtractSliceOp : BaseOpWithOffsetSizesAndStrides< + Tensor_Dialect, "extract_slice", [NoSideEffect, AttrSizedOperandSegments, + OffsetSizeAndStrideOpInterface]> { + let summary = "extract slice operation"; + let description = [{ + The "extract_slice" operation extract a tensor from another tensor as + specified by the operation's offsets, sizes and strides arguments. + + The extract_slice operation supports the following arguments: + + * source: the "base" tensor from which to extract a slice. + * offsets: tensor-rank number of offsets into the "base" tensor from which + to extract the slice. + * sizes: tensor-rank number of sizes which specify the sizes of the result + tensor type. + * strides: tensor-rank number of strides specifying subsampling in each + dimension. + + The representation based on offsets, sizes and strides support a + partially-static specification via attributes specified through the + `static_offsets`, `static_sizes` and `static_strides` arguments. A special + sentinel value ShapedType::kDynamicSize and + ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has + a dynamic value. + + After buffer-allocation, the "extract_slice" op is expected to lower into a + "subview" op. + + An extract_slice operation may additionally reduce the rank of the resulting + tensor by removing dimensions that are statically known to be of size 1. + + Example: + + ``` + // Rank-reducing extract_slice. + %1 = tensor.extract_slice %0[0, 0, 0][1, 16, 4][1, 1, 1] : + tensor<8x16x4xf32> to tensor<16x4xf32> + %3 = tensor.extract_slice %2[3, 4, 2][1, 6, 3][1, 1, 1] : + tensor<8x16x4xf32> to tensor<6x3xf32> + ``` + }]; + + let arguments = (ins + AnyRankedTensor:$source, + Variadic:$offsets, + Variadic:$sizes, + Variadic:$strides, + I64ArrayAttr:$static_offsets, + I64ArrayAttr:$static_sizes, + I64ArrayAttr:$static_strides + ); + let results = (outs AnyRankedTensor:$result); + + let assemblyFormat = [{ + $source `` + custom($offsets, $static_offsets) + custom($sizes, $static_sizes) + custom($strides, $static_strides) + attr-dict `:` type($source) `to` type($result) + }]; + + let builders = [ + // Build an ExtractSliceOp with mixed static and dynamic entries and + // inferred result type. + OpBuilder<(ins "Value":$source, "ArrayRef":$offsets, + "ArrayRef":$sizes, "ArrayRef":$strides, + CArg<"ArrayRef", "{}">:$attrs)>, + // Build an ExtractSliceOp with mixed static and dynamic entries and custom + // result type. If the type passed is nullptr, it is inferred. + OpBuilder<(ins "RankedTensorType":$resultType, "Value":$source, + "ArrayRef":$offsets, "ArrayRef":$sizes, + "ArrayRef":$strides, + CArg<"ArrayRef", "{}">:$attrs)>, + // Build an ExtractSliceOp with dynamic entries and custom result type. If + // the type passed is nullptr, it is inferred. + OpBuilder<(ins "Value":$source, "ValueRange":$offsets, + "ValueRange":$sizes, "ValueRange":$strides, + CArg<"ArrayRef", "{}">:$attrs)>, + // Build an ExtractSliceOp with dynamic entries and inferred result type. + OpBuilder<(ins "RankedTensorType":$resultType, "Value":$source, + "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides, + CArg<"ArrayRef", "{}">:$attrs)> + ]; + + let extraClassDeclaration = extraBaseClassDeclaration # [{ + /// Returns the type of the base tensor operand. + RankedTensorType getSourceType() { + return source().getType().cast(); + } + + /// The result of an extract_slice is always a tensor. + RankedTensorType getType() { + return getResult().getType().cast(); + } + + /// An extract_slice result type can be fully inferred from the source type + /// and the static representation of offsets, sizes and strides. Special + /// sentinels encode the dynamic case. + static Type inferResultType(RankedTensorType sourceRankedTensorType, + ArrayRef staticOffsets, + ArrayRef staticSizes, + ArrayRef staticStrides); + static Type inferResultType(RankedTensorType sourceRankedTensorType, + ArrayRef staticOffsets, + ArrayRef staticSizes, + ArrayRef staticStrides); + static Type inferRankReducedResultType(unsigned resultRank, + RankedTensorType sourceRankedTensorType, + ArrayRef staticOffsets, + ArrayRef staticSizes, + ArrayRef staticStrides); + static Type inferRankReducedResultType(unsigned resultRank, + RankedTensorType sourceRankedTensorType, + ArrayRef staticOffsets, + ArrayRef staticSizes, + ArrayRef staticStrides); + + /// Return the expected rank of each of the`static_offsets`, `static_sizes` + /// and `static_strides` attributes. + std::array getArrayAttrMaxRanks() { + unsigned rank = getSourceType().getRank(); + return {rank, rank, rank}; + } + + /// Return the number of leading operands before the `offsets`, `sizes` and + /// and `strides` operands. + static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; } + }]; + + let hasCanonicalizer = 1; + let hasFolder = 1; +} + //===----------------------------------------------------------------------===// // FromElementsOp //===----------------------------------------------------------------------===// @@ -200,7 +339,7 @@ The `tensor.insert` op writes a tensor into a tensor `dest`as specified by the operation's indices. - It returns a copy of `dest` with the proper subtensor updated with the value + It returns a copy of `dest` with the proper slice updated with the value of `scalar`. The arity of indices must match the rank of the tensor `dest` (i.e., if a @@ -234,6 +373,107 @@ let hasFolder = 1; } +//===----------------------------------------------------------------------===// +// InsertSliceOp +//===----------------------------------------------------------------------===// + +def Tensor_InsertSliceOp : BaseOpWithOffsetSizesAndStrides< + Tensor_Dialect, "insert_slice", + [NoSideEffect, AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface, + TypesMatchWith<"expected result type to match dest type", + "dest", "result", "$_self">]> { + let summary = "insert_slice operation"; + let description = [{ + The "insert_slice" operation insert a tensor `source` into another + tensor `dest` as specified by the operation's offsets, sizes and strides + arguments. + + It returns a copy of `dest` with the proper slice updated with the value + of `source`. + + The insert_slice operation supports the following arguments: + + * source: the tensor that is inserted. + * dest: the tensor into which the source tensor is inserted. + * offsets: tensor-rank number of offsets into the `dest` tensor into which + the slice is inserted. + * sizes: tensor-rank number of sizes which specify the sizes of the result + tensor type. + * strides: tensor-rank number of strides that specify subsampling in each + dimension. + + The representation based on offsets, sizes and strides support a + partially-static specification via attributes specified through the + `static_offsets`, `static_sizes` and `static_strides` arguments. A special + sentinel value ShapedType::kDynamicSize and + ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has + a dynamic value. + + After buffer-allocation, the "insert_slice" op is expected to become an + in-place buffer update. + }]; + + let arguments = (ins + AnyRankedTensor:$source, + AnyRankedTensor:$dest, + Variadic:$offsets, + Variadic:$sizes, + Variadic:$strides, + I64ArrayAttr:$static_offsets, + I64ArrayAttr:$static_sizes, + I64ArrayAttr:$static_strides + ); + let results = (outs AnyRankedTensor:$result); + + let assemblyFormat = [{ + $source `into` $dest `` + custom($offsets, $static_offsets) + custom($sizes, $static_sizes) + custom($strides, $static_strides) + attr-dict `:` type($source) `into` type($dest) + }]; + + let verifier = ?; + + let builders = [ + // Build a InsertSliceOp with mixed static and dynamic entries. + OpBuilder<(ins "Value":$source, "Value":$dest, + "ArrayRef":$offsets, "ArrayRef":$sizes, + "ArrayRef":$strides, + CArg<"ArrayRef", "{}">:$attrs)>, + // Build a InsertSliceOp with dynamic entries. + OpBuilder<(ins "Value":$source, "Value":$dest, + "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides, + CArg<"ArrayRef", "{}">:$attrs)> + ]; + + let extraClassDeclaration = extraBaseClassDeclaration # [{ + /// Returns the type of the base tensor operand. + RankedTensorType getSourceType() { + return source().getType().cast(); + } + + /// The result of a insert_slice is always a tensor. + RankedTensorType getType() { + return getResult().getType().cast(); + } + + /// Return the expected rank of each of the`static_offsets`, `static_sizes` + /// and `static_strides` attributes. + std::array getArrayAttrMaxRanks() { + unsigned rank = getType().getRank(); + return {rank, rank, rank}; + } + + /// Return the number of leading operands before the `offsets`, `sizes` and + /// and `strides` operands. + static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 2; } + }]; + + let hasCanonicalizer = 1; + let hasFolder = 1; +} + //===----------------------------------------------------------------------===// // ReshapeOp //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -1384,7 +1384,7 @@ let summary = "Gather operation,"; let description = [{ - Generate a tensor for which each element in the output is a subtensor of the + Generate a tensor for which each element in the output is a slice of the values tensor based on the value of indices. }]; diff --git a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp --- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp +++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp @@ -627,10 +627,11 @@ Value index = b.create(indexIsNegative, add, originalIndex); Value one = b.create(1); - Value head = b.create(transformed.operand(), zero, index, one); + Value head = + b.create(transformed.operand(), zero, index, one); Value tailSize = b.create(rank, index); - Value tail = - b.create(transformed.operand(), index, tailSize, one); + Value tail = b.create(transformed.operand(), index, + tailSize, one); rewriter.replaceOp(op, {head, tail}); return success(); } diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -1741,8 +1741,8 @@ for (auto arg : args) { sizes[axis] = rewriter.create(loc, arg, axisValue); - result = rewriter.create(loc, arg, result, offsets, - sizes, strides); + result = rewriter.create(loc, arg, result, offsets, + sizes, strides); offsets[axis] = rewriter.create(loc, offsets[axis], sizes[axis]); } rewriter.replaceOp(op, result); diff --git a/mlir/lib/Conversion/TosaToStandard/TosaToStandard.cpp b/mlir/lib/Conversion/TosaToStandard/TosaToStandard.cpp --- a/mlir/lib/Conversion/TosaToStandard/TosaToStandard.cpp +++ b/mlir/lib/Conversion/TosaToStandard/TosaToStandard.cpp @@ -12,6 +12,7 @@ #include "mlir/Conversion/TosaToStandard/TosaToStandard.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -42,7 +43,7 @@ SmallVector strides; strides.resize(sliceOp.getType().template cast().getRank(), 1); - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( sliceOp, sliceOp.getType(), input, ValueRange({}), ValueRange({}), ValueRange({}), sliceOp.start(), sliceOp.size(), rewriter.getI64ArrayAttr(strides)); diff --git a/mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp b/mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp --- a/mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp +++ b/mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp @@ -35,6 +35,7 @@ target.addIllegalOp(); target.addIllegalOp(); target.addLegalDialect(); + target.addLegalDialect(); mlir::tosa::populateTosaToStandardConversionPatterns(&patterns); if (failed(applyPartialConversion(getOperation(), target, diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -16,6 +16,7 @@ #include "mlir/Dialect/Linalg/IR/LinalgTypes.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/AffineExprVisitor.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/OpImplementation.h" @@ -743,22 +744,23 @@ namespace { /// Since `init_tensor` operation creates a tensor needed only for its shape, a -/// subtensor of this is also needed only for its shape. The result can be -/// replaced by a new init_tensor operation of the same size as the subtensor -/// op. -struct FoldInitTensorWithSubTensorOp : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(SubTensorOp subtensorOp, +/// slice of this is also needed only for its shape. The result can be +/// replaced by a new init_tensor operation of the same size as the extract +/// slice op. +struct FoldInitTensorWithExtractSliceOp + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const override { - if (!subtensorOp.source().getDefiningOp()) + if (!sliceOp.source().getDefiningOp()) return failure(); rewriter.replaceOpWithNewOp( - subtensorOp, subtensorOp.sizes(), + sliceOp, sliceOp.sizes(), llvm::to_vector<4>(llvm::map_range( - subtensorOp.static_sizes(), + sliceOp.static_sizes(), [](Attribute attr) { return attr.cast().getInt(); })), - subtensorOp.getSourceType().getElementType()); + sliceOp.getSourceType().getElementType()); return success(); } }; @@ -794,7 +796,7 @@ void InitTensorOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add, FoldInitTensorWithTensorReshapeOp, ReplaceStaticShapeDims>(context); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp @@ -15,6 +15,7 @@ #include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Dialect/StandardOps/Transforms/Passes.h" #include "mlir/Dialect/StandardOps/Utils/Utils.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/IR/BuiltinDialect.h" #include "mlir/IR/Operation.h" @@ -232,8 +233,8 @@ } }; -/// Convert `subtensor %t [offsets][sizes][strides] -> %st` to an alloc + copy -/// pattern. +/// Convert `extract_slice %t [offsets][sizes][strides] -> %st` to an +/// alloc + copy pattern. /// ``` /// %a = alloc(sizes) /// %sv = subview %source [offsets][sizes][strides] @@ -242,21 +243,22 @@ /// /// This pattern is arguable a std pattern once linalg::CopyOp becomes /// std::CopyOp. -class SubTensorOpConverter : public OpConversionPattern { +class ExtractSliceOpConverter + : public OpConversionPattern { public: - using OpConversionPattern::OpConversionPattern; + using OpConversionPattern::OpConversionPattern; LogicalResult - matchAndRewrite(SubTensorOp op, ArrayRef operands, + matchAndRewrite(tensor::ExtractSliceOp op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { - SubTensorOpAdaptor adaptor(operands, op->getAttrDictionary()); + tensor::ExtractSliceOpAdaptor adaptor(operands, op->getAttrDictionary()); Value sourceMemref = adaptor.source(); assert(sourceMemref.getType().isa()); MemRefType subviewMemRefType = getTypeConverter()->convertType(op.getType()).cast(); // op.sizes() capture exactly the dynamic alloc operands matching the - // subviewMemRefType thanks to subview/subtensor canonicalization and + // subviewMemRefType thanks to subview/slice canonicalization and // verification. Value alloc = rewriter.create( op.getLoc(), subviewMemRefType, op.sizes()); @@ -269,7 +271,7 @@ } }; -/// Convert `subtensor_insert %source into %dest [offsets][sizes][strides] -> +/// Convert `insert_slice %source into %dest [offsets][sizes][strides] -> /// %t` to an buffer_cast + subview + copy + tensor_load pattern. /// buffer_cast and tensor_load are inserted automatically by the /// conversion infra: @@ -281,15 +283,15 @@ /// /// This pattern is arguable a std pattern once linalg::CopyOp becomes /// std::CopyOp. -class SubTensorInsertOpConverter - : public OpConversionPattern { +class InsertSliceOpConverter + : public OpConversionPattern { public: - using OpConversionPattern::OpConversionPattern; + using OpConversionPattern::OpConversionPattern; LogicalResult - matchAndRewrite(SubTensorInsertOp op, ArrayRef operands, + matchAndRewrite(tensor::InsertSliceOp op, ArrayRef operands, ConversionPatternRewriter &rewriter) const final { - SubTensorInsertOpAdaptor adaptor(operands, op->getAttrDictionary()); + tensor::InsertSliceOpAdaptor adaptor(operands, op->getAttrDictionary()); Value sourceMemRef = adaptor.source(); assert(sourceMemRef.getType().isa()); @@ -323,7 +325,8 @@ // Mark all Standard operations legal. target.addLegalDialect(); - target.addIllegalOp(); + target.addIllegalOp(); // Mark all Linalg operations illegal as long as they work on tensors. auto isLegalOperation = [&](Operation *op) { @@ -355,8 +358,8 @@ BufferizeInitTensorOp, BufferizeTensorReshapeOp, BufferizeTensorReshapeOp, - SubTensorOpConverter, - SubTensorInsertOpConverter + ExtractSliceOpConverter, + InsertSliceOpConverter >(typeConverter, patterns.getContext()); // clang-format on } diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp @@ -77,7 +77,7 @@ // out of the function at each call site. // // iii. as an optimization over ii., it may be possible to reuse an argument -// and only want to return a subtensor. +// and only want to return a slice. // This may forego allocation by letting *all* callers decide whether to // pass a new *aliasing* memref function argument (i.e. a subview). // Without loss of generality, callers may agree to allocate a new buffer @@ -284,7 +284,7 @@ // 5. Wheher an op bufferizes to a memory read. // 6. Wheher an op bufferizes to a memory write. // These interfaces are necessary to distinguish between various cases and allow -// special inplace behavior for (SubTensorOp, SubTensorInsertOp) pairs. +// special inplace behavior for (ExtractSliceOp, InsertSliceOp) pairs. //===----------------------------------------------------------------------===// /// Return `true` if the op is explicitly supported by bufferization or if it @@ -295,8 +295,8 @@ // clang-format off isa(op) // clang-format on || (none_of(op->getResultTypes(), @@ -339,8 +339,7 @@ /// Return the OpResult that may bufferize into the same buffer as `opOperand` /// when the op is bufferized inplace. /// Return null if no such result exists. -static OpResult getInplaceableOpResult(SubTensorInsertOp op, - OpOperand &opOperand) { +static OpResult getInplaceableOpResult(InsertSliceOp op, OpOperand &opOperand) { if (opOperand.get() != op.dest()) return OpResult(); return op->getResult(0); @@ -357,12 +356,12 @@ // Ops that perform destructive updates on operand(s) to produce // result(s). .Case( [&](auto op) { return getInplaceableOpResult(op, opOperand); }) - // SubTensorOp is special, when bufferized inplace it just returns an + // ExtractSliceOp is special, when bufferized inplace it just returns an // alias to its operand. Its result is never inplaceable on its operand. - .Case([&](SubTensorOp op) { return OpResult(); }) + .Case([&](ExtractSliceOp op) { return OpResult(); }) // Other ops. .Default([&](Operation *op) { return OpResult(); }); // clang-format on @@ -380,10 +379,10 @@ return TypeSwitch(opOperand.getOwner()) // ReturnOp has no result. .Case([&](ReturnOp op) { return OpResult(); }) - // SubTensorOp is different: its result is not inplaceable on op.source + // ExtractSliceOp is different: its result is not inplaceable on op.source // but when bufferized inplace, the result is an aliasing subregion of // op.source. - .Case([&](SubTensorOp op) { return op->getResult(0); }) + .Case([&](ExtractSliceOp op) { return op->getResult(0); }) .Default( [&](Operation *op) { return getInplaceableOpResult(opOperand); }); } @@ -395,8 +394,9 @@ // it. Conservatively return true. if (!maybeOpResult) return true; - // SubTensorOp alone doesn't bufferize to a memory read, one of its uses may. - if (isa(opOperand.getOwner())) + // ExtractSliceOp alone doesn't bufferize to a memory read, one of its uses + // may. + if (isa(opOperand.getOwner())) return false; if (auto linalgOp = dyn_cast(opOperand.getOwner())) return linalgOp.isInputTensor(&opOperand) || @@ -425,8 +425,9 @@ // A ReturnOp is not a write. if (isa(opOperand.getOwner())) return false; - // SubTensorOp alone doesn't bufferize to a memory write, one of its uses may. - if (maybeOpResult->getDefiningOp()) + // ExtractSliceOp alone doesn't bufferize to a memory write, one of its uses + // may. + if (maybeOpResult->getDefiningOp()) return false; // If we have a matching OpResult, this is a write. // Additionally allow to restrict to only inPlace write, if so specified. @@ -442,10 +443,10 @@ /// The BufferizationAliasInfo class maintains a list of buffer aliases and /// equivalence classes to support bufferization. -/// SubTensorOps have special behavior, they act as a level of indirection for -/// bufferization. They don't create reads or writes themselves and analysis +/// ExtractSliceOps have special behavior, they act as a level of indirection +/// for bufferization. They don't create reads or writes themselves and analysis /// needs to look through their uses. -/// SubTensorOp + SubTensorInsertOp have special joint behavior: they may +/// ExtractSliceOp + InsertSliceOp have special joint behavior: they may /// bufferize to the same buffer (i.e. subview), which is what introduces the /// need for bufferization classes. /// Some of these functionalities could be refactored in a Bufferizer class that @@ -469,7 +470,7 @@ /// Return true if the buffer to which `operand` would bufferize is equivalent /// to some use that would bufferize to a write to a buffer. - bool aliasesInPlaceWrite(SubTensorOp subTensorOp) const; + bool aliasesInPlaceWrite(ExtractSliceOp extractSliceOp) const; /// Merge result's and operand's aliasing sets and iterate to a fixed point. void bufferizeInPlace(OpResult result, OpOperand &operand, @@ -495,10 +496,10 @@ bool existsNonDominatingRead(OpOperand &opOperand, const DominanceInfo &domInfo) const; - /// Return true if the source of a `subTensorInsertOp` bufferizes to an - /// equivalent SubTensorOp. - bool isSourceEquivalentToAMatchingSubTensorOp( - SubTensorInsertOp subTensorInsertOp) const; + /// Return true if the source of a `insertSliceOp` bufferizes to an + /// equivalent ExtractSliceOp. + bool isSourceEquivalentToAMatchingExtractSliceOp( + InsertSliceOp insertSliceOp) const; /// Print to `os`. void print(raw_ostream &os) const; @@ -519,13 +520,13 @@ /// Iteratively merge alias sets until a fixed-point. void mergeAliasesToFixedPoint(); - /// Return true if the (SubTensorOp, SubTensorInsertOp) pair match (i.e. + /// Return true if the (ExtractSliceOp, InsertSliceOp) pair match (i.e. /// equivalent operand / result and same offset/sizes/strides specification). /// /// This is one particular type of relationship between ops on tensors that /// reduce to an equivalence on buffers. This should be generalized and /// exposed as interfaces on the proper types. - bool areEquivalentSubTensorOps(SubTensorOp st, SubTensorInsertOp sti) const; + bool areEquivalentExtractSliceOps(ExtractSliceOp st, InsertSliceOp sti) const; /// Return true if there is a `candidateOp` that would write to memory after /// bufferization and such that: @@ -658,10 +659,10 @@ /// Return true if the buffer to which `operand` would bufferize is equivalent /// to some use that would bufferize to a write to a buffer. bool BufferizationAliasInfo::aliasesInPlaceWrite( - SubTensorOp subTensorOp) const { + ExtractSliceOp extractSliceOp) const { LDBG("----Start aliasesInPlaceWrite\n"); - LDBG("-------for op: " << *subTensorOp.getOperation() << '\n'); - for (Value v : getAliasInfoRef(subTensorOp.result())) { + LDBG("-------for op: " << *extractSliceOp.getOperation() << '\n'); + for (Value v : getAliasInfoRef(extractSliceOp.result())) { for (auto &use : v.getUses()) { if (bufferizesToMemoryWrite(use, InPlaceSpec::True)) { LDBG("-----------wants to bufferize to inPlace write: " @@ -670,7 +671,7 @@ } } } - LDBG("----------->subtensor does not alias an inplace write"); + LDBG("----------->extract_slice does not alias an inplace write"); return false; } @@ -796,16 +797,16 @@ return false; } -/// Return true if the source of a `subTensorInsertOp` bufferizes to an -/// equivalent SubTensorOp. -bool BufferizationAliasInfo::isSourceEquivalentToAMatchingSubTensorOp( - SubTensorInsertOp subTensorInsertOp) const { - auto leaderIt = equivalentInfo.findLeader(subTensorInsertOp.source()); +/// Return true if the source of a `insertSliceOp` bufferizes to an +/// equivalent ExtractSliceOp. +bool BufferizationAliasInfo::isSourceEquivalentToAMatchingExtractSliceOp( + InsertSliceOp insertSliceOp) const { + auto leaderIt = equivalentInfo.findLeader(insertSliceOp.source()); for (auto mit = leaderIt, meit = equivalentInfo.member_end(); mit != meit; ++mit) { - if (areEquivalentSubTensorOps( - dyn_cast_or_null(mit->v.getDefiningOp()), - subTensorInsertOp)) + if (areEquivalentExtractSliceOps( + dyn_cast_or_null(mit->v.getDefiningOp()), + insertSliceOp)) return true; } return false; @@ -874,8 +875,8 @@ /// This is one particular type of relationship between ops on tensors that /// reduce to an equivalence on buffers. This should be generalized and exposed /// as interfaces on the proper types. -bool BufferizationAliasInfo::areEquivalentSubTensorOps( - SubTensorOp st, SubTensorInsertOp sti) const { +bool BufferizationAliasInfo::areEquivalentExtractSliceOps( + ExtractSliceOp st, InsertSliceOp sti) const { if (!st || !sti) return false; if (!equivalentInfo.isEquivalent(st.source(), sti.dest())) @@ -950,47 +951,47 @@ return false; } - // The case `opToBufferize` isa SubTensorOp is important enough that we look - // for it specifically. The key information to discover is whether the - // aliasing read or write come from a matching SubTensorInsertOp. + // The case `opToBufferize` isa ExtractSliceOp is important enough that we + // look for it specifically. The key information to discover is whether the + // aliasing read or write come from a matching InsertSliceOp. // Such a pattern is introduced by tiling and is the key inplace condition // not to miss. - if (auto subTensorOp = dyn_cast(opToBufferize)) { - if (auto subTensorInsertOp = dyn_cast(aliasingReadOp)) { - // %1 = subtensor %0[%offset_sizes_and_strides_1] + if (auto extractSliceOp = dyn_cast(opToBufferize)) { + if (auto insertSliceOp = dyn_cast(aliasingReadOp)) { + // %1 = extract_slice %0[%offset_sizes_and_strides_1] // // ... // 0 or more of inplace compute that reduces to: %X is an // // aliasingWrite equivalent to %1. // %W = inplace_write(%1) // - // // aliasingRead %Y in subtensor_insert - // ... = subtensor_insert %W into %R[%offset_sizes_and_strides_1] - if (aliasingRead.get() == subTensorInsertOp.dest() && + // // aliasingRead %Y in insert_slice + // ... = insert_slice %W into %R[%offset_sizes_and_strides_1] + if (aliasingRead.get() == insertSliceOp.dest() && // TODO: This is currently too restrictive and misses clobberings. // When available, use container-containee analysis: the condition // should be that the `aliasingWrite` is contained within - // `subTensorInsertOp.source()`. + // `insertSliceOp.source()`. equivalentInfo.isEquivalent(aliasingWrite.get(), - subTensorInsertOp.source()) && - areEquivalentSubTensorOps(subTensorOp, subTensorInsertOp)) { - LDBG("---->clobbering matching subtensor/subtensor_insert\n"); + insertSliceOp.source()) && + areEquivalentExtractSliceOps(extractSliceOp, insertSliceOp)) { + LDBG("---->clobbering matching extract_slice/insert_slice\n"); return true; } - // %1 = subtensor %0[%offset_sizes_and_strides_1] + // %1 = extract_slice %0[%offset_sizes_and_strides_1] // // ... // bunch of inplace ops that reduce to %X, equivalent to %1. // %X = inplace_write(%1) // - // // aliasingRead %X in subtensor_insert - // // aliasingWrite %Y in subtensor_insert - // ... = subtensor_insert %X into %Y[%offset_sizes_and_strides_1] + // // aliasingRead %X in insert_slice + // // aliasingWrite %Y in insert_slice + // ... = insert_slice %X into %Y[%offset_sizes_and_strides_1] if (aliasingReadOp == aliasingWriteOp) { - assert(aliasingRead.get() == subTensorInsertOp.source() && - "expected read to source of subtensor_insert"); - assert(aliasingWrite.get() == subTensorInsertOp.dest() && - "expected write to dest of subtensor_insert"); - if (areEquivalentSubTensorOps(subTensorOp, subTensorInsertOp)) { - LDBG("---->clobbering matching subtensor/subtensor_insert\n"); + assert(aliasingRead.get() == insertSliceOp.source() && + "expected read to source of insert_slice"); + assert(aliasingWrite.get() == insertSliceOp.dest() && + "expected write to dest of insert_slice"); + if (areEquivalentExtractSliceOps(extractSliceOp, insertSliceOp)) { + LDBG("---->clobbering matching extract_slice/insert_slice\n"); return true; } } @@ -1262,114 +1263,114 @@ return success(); } -/// Bufferize SubTensorOp to subview with optional alloc + copy depending on +/// Bufferize ExtractSliceOp to subview with optional alloc + copy depending on /// whether or not it is marked inplaceable. -/// Note that `getInplaceableOpResult` on a SubTensorOp always returns null. -/// As consequence a SubTensorOp always alloc + copy when taken in +/// Note that `getInplaceableOpResult` on a ExtractSliceOp always returns null. +/// As consequence a ExtractSliceOp always alloc + copy when taken in /// isolation. -static LogicalResult bufferize(OpBuilder &b, SubTensorOp subTensorOp, +static LogicalResult bufferize(OpBuilder &b, ExtractSliceOp extractSliceOp, BlockAndValueMapping &bvm, const BufferizationAliasInfo &aliasInfo) { - LDBG("bufferize: " << *subTensorOp << '\n'); + LDBG("bufferize: " << *extractSliceOp << '\n'); // Take a guard before anything else. OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(subTensorOp); + b.setInsertionPoint(extractSliceOp); - Location loc = subTensorOp.getLoc(); + Location loc = extractSliceOp.getLoc(); // Bail if source was not bufferized. - Value srcMemref = lookup(bvm, subTensorOp.source()); + Value srcMemref = lookup(bvm, extractSliceOp.source()); if (!srcMemref) return failure(); auto srcMemrefType = srcMemref.getType().cast(); - auto dstTensorType = subTensorOp.result().getType().cast(); + auto dstTensorType = + extractSliceOp.result().getType().cast(); // If not inplaceable, alloc. Value alloc; - auto inPlace = getInPlace(subTensorOp->getResult(0)); + auto inPlace = getInPlace(extractSliceOp->getResult(0)); if (inPlace != InPlaceSpec::True) { - alloc = - createNewAllocDeallocPairForShapedValue(b, loc, subTensorOp.result()); + alloc = createNewAllocDeallocPairForShapedValue(b, loc, + extractSliceOp.result()); b.setInsertionPointAfter(alloc.getDefiningOp()); } // Bufferize to subview. auto subviewMemRefType = memref::SubViewOp::inferRankReducedResultType( - dstTensorType.getRank(), srcMemrefType, subTensorOp.getMixedOffsets(), - subTensorOp.getMixedSizes(), subTensorOp.getMixedStrides()) + dstTensorType.getRank(), srcMemrefType, + extractSliceOp.getMixedOffsets(), extractSliceOp.getMixedSizes(), + extractSliceOp.getMixedStrides()) .cast(); Value subView = b.create( - loc, subviewMemRefType, srcMemref, subTensorOp.getMixedOffsets(), - subTensorOp.getMixedSizes(), subTensorOp.getMixedStrides()); + loc, subviewMemRefType, srcMemref, extractSliceOp.getMixedOffsets(), + extractSliceOp.getMixedSizes(), extractSliceOp.getMixedStrides()); /// If not inplaceable, copy. if (alloc) { - b.create(subTensorOp.getLoc(), subView, alloc); + b.create(extractSliceOp.getLoc(), subView, alloc); subView = alloc; } - map(bvm, subTensorOp.result(), subView); + map(bvm, extractSliceOp.result(), subView); return success(); } -static LogicalResult bufferize(OpBuilder &b, - SubTensorInsertOp subTensorInsertOp, +static LogicalResult bufferize(OpBuilder &b, InsertSliceOp insertSliceOp, BlockAndValueMapping &bvm, const BufferizationAliasInfo &aliasInfo) { - LDBG("bufferize: " << *subTensorInsertOp << '\n'); + LDBG("bufferize: " << *insertSliceOp << '\n'); // Take a guard before anything else. OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(subTensorInsertOp); - Location loc = subTensorInsertOp.getLoc(); + b.setInsertionPoint(insertSliceOp); + Location loc = insertSliceOp.getLoc(); - Value dstMemref = lookup(bvm, subTensorInsertOp.dest()); + Value dstMemref = lookup(bvm, insertSliceOp.dest()); if (!dstMemref) return failure(); - auto inPlace = getInPlace(subTensorInsertOp->getResult(0)); + auto inPlace = getInPlace(insertSliceOp->getResult(0)); if (inPlace != InPlaceSpec::True) { - // Since subtensor_insert arise from tiling and introducing loops, this + // Since insert_slice arise from tiling and introducing loops, this // case is generally a deal breaker. When used with loops, this ends up // cloning the whole tensor on every single iteration and is a symptom // of a catastrophically bad scheduling decision. // TODO: be very loud about it or even consider failing the pass. - Value newDstMemref = createNewAllocDeallocPairForShapedValue( - b, loc, subTensorInsertOp.result()); + Value newDstMemref = + createNewAllocDeallocPairForShapedValue(b, loc, insertSliceOp.result()); b.setInsertionPointAfter(newDstMemref.getDefiningOp()); - b.create(subTensorInsertOp.getLoc(), dstMemref, newDstMemref); + b.create(insertSliceOp.getLoc(), dstMemref, newDstMemref); dstMemref = newDstMemref; } auto dstMemrefType = dstMemref.getType().cast(); - Value srcMemref = lookup(bvm, subTensorInsertOp.source()); + Value srcMemref = lookup(bvm, insertSliceOp.source()); if (!srcMemref) return failure(); auto subviewMemRefType = memref::SubViewOp::inferRankReducedResultType( - subTensorInsertOp.getSourceType().getRank(), dstMemrefType, - subTensorInsertOp.getMixedOffsets(), - subTensorInsertOp.getMixedSizes(), - subTensorInsertOp.getMixedStrides()) + insertSliceOp.getSourceType().getRank(), dstMemrefType, + insertSliceOp.getMixedOffsets(), insertSliceOp.getMixedSizes(), + insertSliceOp.getMixedStrides()) .cast(); // A copy of the source buffer is needed if either: // - The producer of `source` is not inplace. This is the case where a - // subtensor is computed out of place into the inplace full tensor. + // slice is computed out of place into the inplace full tensor. // - The result is not inplace. This is the case where the whole tensor is // cloned and the clone needs to be updated. - if (!aliasInfo.isSourceEquivalentToAMatchingSubTensorOp(subTensorInsertOp) || + if (!aliasInfo.isSourceEquivalentToAMatchingExtractSliceOp(insertSliceOp) || inPlace != InPlaceSpec::True) { - LDBG("subtensor_insert needs extra source copy: " - << subTensorInsertOp.source() << " -> copy\n"); + LDBG("insert_slice needs extra source copy: " << insertSliceOp.source() + << " -> copy\n"); // Take a subview of the dst. Value subView = b.create( - loc, subviewMemRefType, dstMemref, subTensorInsertOp.getMixedOffsets(), - subTensorInsertOp.getMixedSizes(), subTensorInsertOp.getMixedStrides()); - b.create(subTensorInsertOp.getLoc(), srcMemref, subView); + loc, subviewMemRefType, dstMemref, insertSliceOp.getMixedOffsets(), + insertSliceOp.getMixedSizes(), insertSliceOp.getMixedStrides()); + b.create(insertSliceOp.getLoc(), srcMemref, subView); } - map(bvm, subTensorInsertOp.result(), dstMemref); + map(bvm, insertSliceOp.result(), dstMemref); return success(); } @@ -1433,54 +1434,54 @@ //===----------------------------------------------------------------------===// /// -/// Rationale for bufferizing `%1 = subtensor %0[...]` inplace. +/// Rationale for bufferizing `%1 = tensor.extract_slice %0[...]` inplace. /// =========================================================== /// -/// When bufferized out of place, a SubTensorOp lowers to alloc + copy. This +/// When bufferized out of place, a ExtractSlice lowers to alloc + copy. This /// cannot change the flow of information for either the source or the /// result buffers. /// -/// When bufferized inplace, a SubTensorOp does not by itself create any read or -/// write from memory. Instead, it has the effect of merging the alias sets of -/// the source and the result buffers. +/// When bufferized inplace, a ExtractSliceOp does not by itself create any read +/// or write from memory. Instead, it has the effect of merging the alias sets +/// of the source and the result buffers. /// /// An analysis is required to ensure inplace bufferization would not result in /// RaW dependence violations. -static void bufferizableInPlaceAnalysis(SubTensorOp subTensorOp, +static void bufferizableInPlaceAnalysis(ExtractSliceOp extractSliceOp, BufferizationAliasInfo &aliasInfo, const DominanceInfo &domInfo) { LDBG('\n'); - LDBG("Try to bufferize subtensor inplace: " << *subTensorOp << '\n'); + LDBG("Try to bufferize extract_slice inplace: " << *extractSliceOp << '\n'); - // If `subTensorOp` were to be bufferized inplace, it cannot end up + // If `extractSliceOp` were to be bufferized inplace, it cannot end up // aliasing a write into a non-writeable buffer. bool wouldCreateAliasingWriteToNonWriteableBuffer = - aliasInfo.aliasesInPlaceWrite(subTensorOp) && - aliasInfo.aliasesNonWriteableBuffer(subTensorOp->getOpOperand(0)); + aliasInfo.aliasesInPlaceWrite(extractSliceOp) && + aliasInfo.aliasesNonWriteableBuffer(extractSliceOp->getOpOperand(0)); if (wouldCreateAliasingWriteToNonWriteableBuffer) LDBG("->the corresponding buffer is not writeable\n"); LDBG("->bufferizes to writeable inplace buffer\n"); - // In any of subTensorOp.result's aliases, can we find 2 such that we hit + // In any of extractSliceOp.result's aliases, can we find 2 such that we hit // an interfering write? - Value s = subTensorOp.source(), r = subTensorOp.result(); + Value s = extractSliceOp.source(), r = extractSliceOp.result(); bool foundInterference = wouldCreateAliasingWriteToNonWriteableBuffer || // Do not consider (s, s) and (r, r) as all the // aliasings already exist by construction; we are // interested in new interfering aliases only. aliasInfo.wouldCreateReadAfterWriteInterference( - s, r, subTensorOp, domInfo) || + s, r, extractSliceOp, domInfo) || aliasInfo.wouldCreateReadAfterWriteInterference( - r, s, subTensorOp, domInfo); + r, s, extractSliceOp, domInfo); if (foundInterference) { - setInPlaceOpResult(subTensorOp->getResult(0), InPlaceSpec::False); + setInPlaceOpResult(extractSliceOp->getResult(0), InPlaceSpec::False); } else { - setInPlaceOpResult(subTensorOp->getResult(0), InPlaceSpec::True); - aliasInfo.bufferizeInPlace(subTensorOp->getResult(0), - subTensorOp->getOpOperand(0)); + setInPlaceOpResult(extractSliceOp->getResult(0), InPlaceSpec::True); + aliasInfo.bufferizeInPlace(extractSliceOp->getResult(0), + extractSliceOp->getOpOperand(0)); } - LDBG("Done bufferizing subtensor\n"); + LDBG("Done bufferizing extract_slice\n"); } /// Analyze the (opOperand, result) pair to determine whether the result can @@ -1490,8 +1491,8 @@ BufferizationAliasInfo &aliasInfo, const DominanceInfo &domInfo) { Operation *op = result.getDefiningOp(); - assert(result && !isa(op) && - "expected OpResult not coming from a SubTensorOp"); + assert(result && !isa(op) && + "expected OpResult not coming from a ExtractSliceOp"); int64_t resultNumber = result.getResultNumber(); (void)resultNumber; @@ -1541,48 +1542,47 @@ "expected a funcOp definition with a body"); // Collect ops so we can build our own traversal. - SmallVector subTensorOps; - SmallVector subTensorInsertOps; - SmallVector nonSubTensorOps; + SmallVector extractSliceOps; + SmallVector insertSliceOps; + SmallVector nonSliceOps; funcOp.walk([&](Operation *op) { - if (auto subTensorOp = dyn_cast(op)) - return subTensorOps.push_back(subTensorOp); - if (auto subTensorInsertOp = dyn_cast(op)) - return subTensorInsertOps.push_back(subTensorInsertOp); + if (auto extractSliceOp = dyn_cast(op)) + return extractSliceOps.push_back(extractSliceOp); + if (auto insertSliceOp = dyn_cast(op)) + return insertSliceOps.push_back(insertSliceOp); auto isaTensor = [](Type t) { return t.isa(); }; // No tensors => no buffers. if (none_of(op->getOperandTypes(), isaTensor) && none_of(op->getResultTypes(), isaTensor)) return; - nonSubTensorOps.push_back(op); + nonSliceOps.push_back(op); }); - // Bufferize SubTensorInsertOp greedily: we almost never want to bufferize + // Bufferize InsertSliceOp greedily: we almost never want to bufferize // the tensor "inserted into" to become out-of-place. This implementation - // does not distinguish between different SubTensorInsertOps. If we want - // finer-grained behavior, we could order the SubTensorInsertOps with some - // metric. - // Walk SubTensorInsertOps in reverse for better interference behavior. - for (SubTensorInsertOp subTensorInsertOp : reverse(subTensorInsertOps)) { - OpOperand &destOpOperand = subTensorInsertOp->getOpOperand(1); + // does not distinguish between different InsertSliceOp. If we want + // finer-grained behavior, we could order the InsertSliceOp with some metric. + // Walk InsertSliceOp in reverse for better interference behavior. + for (InsertSliceOp insertSliceOp : reverse(insertSliceOps)) { + OpOperand &destOpOperand = insertSliceOp->getOpOperand(1); bufferizableInPlaceAnalysis(destOpOperand, getInplaceableOpResult(destOpOperand), aliasInfo, domInfo); } - // Bufferize all ops except SubTensorOp and SubTensorInsertOp which are - // handled separately. + // Bufferize all ops except ExtractSliceOp and InsertSliceOp which are handled + // separately. // Walk other ops in reverse for better interference behavior. - for (Operation *op : reverse(nonSubTensorOps)) + for (Operation *op : reverse(nonSliceOps)) for (OpOperand &opOperand : op->getOpOperands()) if (OpResult result = getInplaceableOpResult(opOperand)) bufferizableInPlaceAnalysis(opOperand, result, aliasInfo, domInfo); - // Finally, bufferize SubTensorOp. - // Walk SubTensorOps in reverse for better clobbering behavior: it is easier - // to detect clobbers of smaller subtensors before larger ones. - for (SubTensorOp subTensorOp : reverse(subTensorOps)) - bufferizableInPlaceAnalysis(subTensorOp, aliasInfo, domInfo); + // Finally, bufferize ExtractSliceOp. + // Walk ExtractSliceOps in reverse for better clobbering behavior: it is + // easier to detect clobbers of smaller slices before larger ones. + for (ExtractSliceOp extractSliceOp : reverse(extractSliceOps)) + bufferizableInPlaceAnalysis(extractSliceOp, aliasInfo, domInfo); LDBG("End InPlaceAnalysisFuncOpInternals:\n" << funcOp << '\n'); } @@ -1611,8 +1611,8 @@ .Case( [&](auto op) { return bufferize(b, op, bvm, aliasInfo); }) // clang-format on diff --git a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp @@ -18,6 +18,7 @@ #include "mlir/Dialect/Linalg/Passes.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/BuiltinTypes.h" @@ -457,8 +458,8 @@ }; } // namespace -/// Get the reassociation maps to fold the result of a subtensor (or source of a -/// subtensor_insert) operation with given offsets, and sizes to its +/// Get the reassociation maps to fold the result of a extract_slice (or source +/// of a insert_slice) operation with given offsets, and sizes to its /// rank-reduced version. This is only done for the cases where the size is 1 /// and offset is 0. Strictly speaking the offset 0 is not required in general, /// but non-zero offsets are not handled by SPIR-V backend at this point (and @@ -486,41 +487,41 @@ } namespace { -/// Convert `subtensor` operations to rank-reduced versions. -struct UseRankReducedSubTensorOp : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; +/// Convert `extract_slice` operations to rank-reduced versions. +struct UseRankReducedExtractSliceOp + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(SubTensorOp subTensorOp, + LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const override { - RankedTensorType resultType = subTensorOp.getType(); - SmallVector offsets = subTensorOp.getMixedOffsets(); - SmallVector sizes = subTensorOp.getMixedSizes(); - SmallVector strides = subTensorOp.getMixedStrides(); + RankedTensorType resultType = sliceOp.getType(); + SmallVector offsets = sliceOp.getMixedOffsets(); + SmallVector sizes = sliceOp.getMixedSizes(); + SmallVector strides = sliceOp.getMixedStrides(); auto reassociation = getReassociationMapForFoldingUnitDims(sizes); if (!reassociation || reassociation->size() == static_cast(resultType.getRank())) return failure(); - auto rankReducedType = - SubTensorOp::inferRankReducedResultType(reassociation->size(), - subTensorOp.getSourceType(), - offsets, sizes, strides) - .cast(); - - Location loc = subTensorOp.getLoc(); - Value newSubTensor = rewriter.create( - loc, rankReducedType, subTensorOp.source(), offsets, sizes, strides); - rewriter.replaceOpWithNewOp( - subTensorOp, resultType, newSubTensor, *reassociation); + auto rankReducedType = tensor::ExtractSliceOp::inferRankReducedResultType( + reassociation->size(), sliceOp.getSourceType(), + offsets, sizes, strides) + .cast(); + + Location loc = sliceOp.getLoc(); + Value newSlice = rewriter.create( + loc, rankReducedType, sliceOp.source(), offsets, sizes, strides); + rewriter.replaceOpWithNewOp(sliceOp, resultType, + newSlice, *reassociation); return success(); } }; -/// Convert `subtensor_insert` operations to rank-reduced versions. -struct UseRankReducedSubTensorInsertOp - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; +/// Convert `insert_slice` operations to rank-reduced versions. +struct UseRankReducedInsertSliceOp + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(SubTensorInsertOp insertOp, + LogicalResult matchAndRewrite(tensor::InsertSliceOp insertOp, PatternRewriter &rewriter) const override { RankedTensorType sourceType = insertOp.getSourceType(); SmallVector offsets = insertOp.getMixedOffsets(); @@ -533,7 +534,7 @@ Location loc = insertOp.getLoc(); auto reshapedSource = rewriter.create( loc, insertOp.source(), *reassociation); - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( insertOp, reshapedSource, insertOp.dest(), insertOp.getMixedOffsets(), insertOp.getMixedSizes(), insertOp.getMixedStrides()); return success(); @@ -546,8 +547,9 @@ void mlir::linalg::populateFoldUnitExtentDimsPatterns( RewritePatternSet &patterns) { auto *context = patterns.getContext(); - patterns.add(context); + patterns.add( + context); TensorCollapseShapeOp::getCanonicalizationPatterns(patterns, context); TensorExpandShapeOp::getCanonicalizationPatterns(patterns, context); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp @@ -48,8 +48,8 @@ /// are 2 cases: /// a) buffer case: use the SSA value of the views and a simple alias /// analysis on subview ops to determine producer-consumer dependences; -/// b) tensor case: use SSA use-def chains on subtensor ops; -/// 2. greedily fuse the linalg ops that produce the subview/subtensor. +/// b) tensor case: use SSA use-def chains on extract_slice ops; +/// 2. greedily fuse the linalg ops that produce the subview/extract_slice. /// 3. inspect the fused ops and determine whether they have other remaining /// LinalgOp uses. If not, then erase the original producing linalg op. /// @@ -73,13 +73,14 @@ // Extract the subranges from the linearized ranges. for (OpOperand *opOperand : op.getInputAndOutputOperands()) { // The method `getRangeFromOperandShape` requires using SubViewOp or - // SubTensorOps. If the value isnt defined from there continue. + // ExtractSliceOps. If the value isn't defined from there continue. // todo: The method should be adapted to get the values from // `ViewInterface`. The interface needs a `getOrCreateRanges` method which // currently returns a `linalg.range`. The fix here is to move this op to // `std` dialect and add the method to `ViewInterface`. - if (fromSubViewOpOnly && !isa_and_nonnull( - opOperand->get().getDefiningOp())) + if (fromSubViewOpOnly && + !isa_and_nonnull( + opOperand->get().getDefiningOp())) continue; AffineMap map = op.getTiedIndexingMap(opOperand); @@ -221,7 +222,7 @@ SmallVector staticSizesVector(rank, ShapedType::kDynamicSize); SmallVector staticStridesVector( rank, ShapedType::kDynamicStrideOrOffset); - resultTypes.push_back(SubTensorOp::inferResultType( + resultTypes.push_back(tensor::ExtractSliceOp::inferResultType( t.cast(), staticOffsetsVector, staticSizesVector, staticStridesVector)); } @@ -252,15 +253,15 @@ } /// Get the loop range for a dimension `dim` based on the `shapedOperand`. It is -/// expected to be defined by a subview op or a subtensor op. +/// expected to be defined by a subview op or an extract_slice op. static Range getRangeFromOperandShape(OpBuilder &b, Location loc, Value shapedOperand, unsigned dim) { Operation *shapeProducingOp = shapedOperand.getDefiningOp(); if (auto subViewOp = dyn_cast(shapeProducingOp)) return subViewOp.getOrCreateRanges(b, loc)[dim]; - if (auto subTensorOp = dyn_cast(shapeProducingOp)) - return subTensorOp.getOrCreateRanges(b, loc)[dim]; - llvm_unreachable("SubviewOp or SubTensorOp expected"); + if (auto sliceOp = dyn_cast(shapeProducingOp)) + return sliceOp.getOrCreateRanges(b, loc)[dim]; + llvm_unreachable("SubviewOp or ExtractSliceOp expected"); } /// Fuses the producer into the loop immediately enclosing the consumer. @@ -439,8 +440,8 @@ if (!producerMap) return llvm::None; - // Must be a subview or a slice to guarantee there are loops we can fuse - // into. + // Must be a subview or an extract_slice to guarantee there are loops we can + // fuse into. auto subView = consumerOpOperand.get().getDefiningOp(); if (!subView) { LLVM_DEBUG(llvm::dbgs() << "\nNot fusable (not a subview)"); @@ -473,8 +474,8 @@ opResult = tensor.cast(); return; } - if (auto subTensorOp = tensor.getDefiningOp()) { - tensor = subTensorOp.source(); + if (auto sliceOp = tensor.getDefiningOp()) { + tensor = sliceOp.source(); continue; } if (auto blockArg = tensor.dyn_cast()) { @@ -512,11 +513,11 @@ Value inputTensor = consumerOpOperand.get(); - // Must be a subtensor to guarantee there are loops we can fuse into. - auto subTensor = inputTensor.getDefiningOp(); - if (!subTensor) { + // Must be an extract_slice op to guarantee there are loops we can fuse into. + auto sliceOp = inputTensor.getDefiningOp(); + if (!sliceOp) { LLVM_DEBUG(llvm::dbgs() - << "\nNot fusable, not a subtensor: " << inputTensor); + << "\nNot fusable, not an extract_slice op: " << inputTensor); return {}; } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp @@ -19,6 +19,7 @@ #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/SCF/Utils.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/Dialect/Vector/VectorUtils.h" #include "mlir/IR/BuiltinOps.h" @@ -42,13 +43,13 @@ /// instructions that need to be hoisted too. struct HoistableWrite { vector::TransferWriteOp transferWriteOp; - SubTensorInsertOp subTensorInsertOp; + tensor::InsertSliceOp insertSliceOp; }; /// Represents a unit of hoistable TransferReadOp. This may comprise other /// instructions that need to be hoisted too. struct HoistableRead { vector::TransferReadOp transferReadOp; - SubTensorOp subTensorOp; + tensor::ExtractSliceOp extractSliceOp; }; } // namespace @@ -71,7 +72,8 @@ } /// Return true is all offsets, sizes and strides are equal. -static bool sameOffsetsSizesAndStrides(SubTensorOp s, SubTensorInsertOp si) { +static bool sameOffsetsSizesAndStrides(tensor::ExtractSliceOp s, + tensor::InsertSliceOp si) { if (s.static_offsets().size() != si.static_offsets().size()) return false; if (s.static_sizes().size() != si.static_sizes().size()) @@ -99,38 +101,37 @@ LLVM_DEBUG(DBGS() << "findMatchingTransferRead for: " << *write.transferWriteOp.getOperation() << "\n"); - if (write.subTensorInsertOp) - LLVM_DEBUG(DBGS() << "findMatchingTransferRead subTensorInsertOp: " - << *write.subTensorInsertOp.getOperation() << "\n"); + if (write.insertSliceOp) + LLVM_DEBUG(DBGS() << "findMatchingTransferRead inserSliceOp: " + << *write.insertSliceOp.getOperation() << "\n"); for (Operation *user : srcTensor.getUsers()) { LLVM_DEBUG(DBGS() << "findMatchingTransferRead inspect user: " << *user << "\n"); - // If HoistableWrite involves a SubTensorInsertOp, we need to find a - // matching SubTensorOp. - SubTensorOp subTensorOp; + // If HoistableWrite involves a InsertSliceOp, we need to find a + // matching ExtractSliceOp. + tensor::ExtractSliceOp sliceOp; Operation *maybeTransferReadUser = user; - if (write.subTensorInsertOp) { - subTensorOp = dyn_cast(user); - if (!subTensorOp || subTensorOp.getResult().getType() != - write.subTensorInsertOp.source().getType()) + if (write.insertSliceOp) { + sliceOp = dyn_cast(user); + if (!sliceOp || sliceOp.getResult().getType() != + write.insertSliceOp.source().getType()) continue; LLVM_DEBUG(DBGS() << "check whether sameOffsetsSizesAndStrides: " - << *subTensorOp << " vs " << *write.subTensorInsertOp - << "\n"); - if (!sameOffsetsSizesAndStrides(subTensorOp, write.subTensorInsertOp)) + << *sliceOp << " vs " << *write.insertSliceOp << "\n"); + if (!sameOffsetsSizesAndStrides(sliceOp, write.insertSliceOp)) continue; LLVM_DEBUG(DBGS() << "sameOffsetsSizesAndStrides: SUCCESS\n"); - // If we got here, subTensorOp is hoistable iff it has exactly 2 uses: + // If we got here, sliceOp is hoistable iff it has exactly 2 uses: // 1. the transfer_write we want to hoist. // 2. a matching transfer_read. // Anything else, we skip. bool skip = false; Operation *otherUser = nullptr; - for (Operation *u : subTensorOp->getUsers()) { + for (Operation *u : sliceOp->getUsers()) { if (u == write.transferWriteOp) continue; if (otherUser) { @@ -149,7 +150,7 @@ auto read = dyn_cast(maybeTransferReadUser); if (read && read.indices() == write.transferWriteOp.indices() && read.getVectorType() == write.transferWriteOp.getVectorType()) - return HoistableRead{read, subTensorOp}; + return HoistableRead{read, sliceOp}; } return HoistableRead(); } @@ -168,13 +169,13 @@ Operation *user = use.getOwner(); // Skip the candidate use, only inspect the "other" uses. if (user == candidateRead.transferReadOp || - user == candidateRead.subTensorOp || user == write.transferWriteOp || - user == write.subTensorInsertOp) + user == candidateRead.extractSliceOp || + user == write.transferWriteOp || user == write.insertSliceOp) continue; - // Consider all transitive uses through a subtensor / subtensor_insert. + // Consider all transitive uses through a extract_slice / insert_slice. // TODO: atm we just bail because a stronger analysis is needed for these // cases. - if (isa(user)) + if (isa(user)) return true; // Consider all transitive uses through a vector.transfer_write. if (auto writeUser = dyn_cast(user)) { @@ -214,7 +215,7 @@ /// Return the `forOp`-invariant HoistableWrite that produces `yieldOperand`. /// Return the null HoistableWrite() if it is not comprised of a -/// vector.transfer_write + optional subtensor_insert or if any of the indexings +/// vector.transfer_write + optional insert_slice or if any of the indexings /// is `forOp`-dependent. static HoistableWrite getLoopInvariantTransferWriteOpDefining(scf::ForOp forOp, @@ -229,26 +230,26 @@ return HoistableWrite{write, nullptr}; } - if (auto subTensorInsertOp = v.getDefiningOp()) { - // Inserted subTensor must come from vector.transfer_write. + if (auto insertSliceOp = v.getDefiningOp()) { + // Inserted slice must come from vector.transfer_write. auto write = - subTensorInsertOp.source().getDefiningOp(); + insertSliceOp.source().getDefiningOp(); if (!write) return HoistableWrite(); // Tensor inserted into must be a BBArg at position matching yieldOperand's. - auto bbArg = subTensorInsertOp.dest().dyn_cast(); + auto bbArg = insertSliceOp.dest().dyn_cast(); if (!bbArg || bbArg.getOwner()->getParentOp() != forOp || bbArg.getArgNumber() != /*num iv=*/1 + yieldOperand.getOperandNumber()) return HoistableWrite(); // Indexing inserted into must not depend on `forOp`. - for (Value operand : subTensorInsertOp->getOperands().drop_front( - SubTensorInsertOp::getOffsetSizeAndStrideStartOperandIndex())) + for (Value operand : insertSliceOp->getOperands().drop_front( + tensor::InsertSliceOp::getOffsetSizeAndStrideStartOperandIndex())) if (!forOp.isDefinedOutsideOfLoop(operand)) return HoistableWrite(); - return HoistableWrite{write, subTensorInsertOp}; + return HoistableWrite{write, insertSliceOp}; } return HoistableWrite(); @@ -260,18 +261,18 @@ scf::ForOp forOp = cast(tensorBBArg.getOwner()->getParentOp()); assert(read.transferReadOp && write.transferWriteOp && "expected transfer_read and transfer_write ops to be set"); - assert(((read.subTensorOp && write.subTensorInsertOp) || - (!read.subTensorOp && !write.subTensorInsertOp)) && - "expected matching subtensor / subtensor_insert"); + assert(((read.extractSliceOp && write.insertSliceOp) || + (!read.extractSliceOp && !write.insertSliceOp)) && + "expected matching extract_slice / insert_slice"); LLVM_DEBUG(DBGS() << "In forOp:\n" << *forOp.getOperation() << "\nHoist: " << *read.transferReadOp.getOperation() << "\nHoist: " << *write.transferWriteOp.getOperation() << "\nInvolving: " << tensorBBArg << "\n"); - // If a read subtensor is present, hoist it. - if (read.subTensorOp && failed(forOp.moveOutOfLoop({read.subTensorOp}))) - llvm_unreachable("Unexpected failure moving subtensor out of loop"); + // If a read slice is present, hoist it. + if (read.extractSliceOp && failed(forOp.moveOutOfLoop({read.extractSliceOp}))) + llvm_unreachable("Unexpected failure moving extract_slice out of loop"); // Hoist the transfer_read op. if (failed(forOp.moveOutOfLoop({read.transferReadOp}))) @@ -282,20 +283,20 @@ unsigned initArgNumber = tensorBBArg.getArgNumber() - /*numIvs=*/1; // Update the source tensor. - if (read.subTensorOp) - read.subTensorOp.sourceMutable().assign(forOp.initArgs()[initArgNumber]); + if (read.extractSliceOp) + read.extractSliceOp.sourceMutable().assign(forOp.initArgs()[initArgNumber]); else read.transferReadOp.sourceMutable().assign(forOp.initArgs()[initArgNumber]); // Hoist write after. - if (write.subTensorInsertOp) - write.subTensorInsertOp->moveAfter(forOp); + if (write.insertSliceOp) + write.insertSliceOp->moveAfter(forOp); write.transferWriteOp->moveAfter(forOp); // Update the yield. auto yieldOp = cast(forOp.region().front().getTerminator()); - if (write.subTensorInsertOp) - yieldOp->setOperand(initArgNumber, write.subTensorInsertOp.dest()); + if (write.insertSliceOp) + yieldOp->setOperand(initArgNumber, write.insertSliceOp.dest()); else yieldOp->setOperand(initArgNumber, write.transferWriteOp.source()); @@ -306,13 +307,13 @@ // Transfer write has been hoisted, need to update the vector and tensor // source. Replace the result of the loop to use the new tensor created // outside the loop. - // Depending on whether a subtensor_insert is present or not, it carries the + // Depending on whether a insert_slice is present or not, it carries the // update on the tensor operands. - if (write.subTensorInsertOp) { + if (write.insertSliceOp) { newForOp.getResult(initArgNumber) - .replaceAllUsesWith(write.subTensorInsertOp.getResult()); - write.transferWriteOp.sourceMutable().assign(read.subTensorOp.result()); - write.subTensorInsertOp.destMutable().assign(read.subTensorOp.source()); + .replaceAllUsesWith(write.insertSliceOp.getResult()); + write.transferWriteOp.sourceMutable().assign(read.extractSliceOp.result()); + write.insertSliceOp.destMutable().assign(read.extractSliceOp.source()); } else { newForOp.getResult(initArgNumber) .replaceAllUsesWith(write.transferWriteOp.getResult(0)); @@ -350,9 +351,9 @@ LLVM_DEBUG(dbgs() << "\n"; DBGS() << "Candidate write for hoisting: " << *write.transferWriteOp.getOperation() << "\n"); - if (write.subTensorInsertOp) - LLVM_DEBUG(DBGS() << "Candidate subtensor_insert for hoisting: " - << *write.subTensorInsertOp.getOperation() << "\n"); + if (write.insertSliceOp) + LLVM_DEBUG(DBGS() << "Candidate insert_slice for hoisting: " + << *write.insertSliceOp.getOperation() << "\n"); if (llvm::any_of(write.transferWriteOp.indices(), [&forOp](Value index) { return !forOp.isDefinedOutsideOfLoop(index); @@ -788,8 +789,8 @@ // The implementation proceeds in a stack-like fashion: // 1. Iteratively clone and step into the loops, pushing the `packedTensor` // deeper in the stack. - // 2. Create a SubTensorInsert at the top of the stack. - // 3. Iteratively pop and yield the result of the SubTensorInsertOp across + // 2. Create a InsertSliceOp at the top of the stack. + // 3. Iteratively pop and yield the result of the InsertSliceOp across // the cloned loops. SmallVector clonedLoopIvs, leadingPackedTensorIndexings; clonedLoopIvs.reserve(nLoops); @@ -799,10 +800,10 @@ backwardSlice.insert(padTensorOp); // Stack step 1. iteratively clone loops and push `packedTensor`. for (Operation *op : backwardSlice) { - // Specifically sit out in the subtenso(packedTensor) case: this is the + // Specifically sit out in the extract_slice(packedTensor) case: this is the // piece we seek to replace. - if (auto subTensor = dyn_cast(op)) - if (bvm.lookupOrDefault(subTensor.source()) == packedTensor) + if (auto sliceOp = dyn_cast(op)) + if (bvm.lookupOrDefault(sliceOp.source()) == packedTensor) continue; auto effects = dyn_cast(op); bool hasNoEffects = !effects || effects.hasNoEffect(); @@ -839,7 +840,7 @@ packedTensor = clonedForOp.getRegionIterArgs().front(); } - // Stack step 2. create SubTensorInsertOp at the top of the stack. + // Stack step 2. create InsertSliceOp at the top of the stack. // offsets = [clonedLoopIvs, 0 .. 0]. SmallVector offsets(leadingPackedTensorIndexings.begin(), leadingPackedTensorIndexings.end()); @@ -856,8 +857,8 @@ SmallVector strides(nLoops + paddedRank, b.getIndexAttr(1)); Value inserted = - b.create(loc, bvm.lookup(padTensorOp.result()), - packedTensor, offsets, sizes, strides); + b.create(loc, bvm.lookup(padTensorOp.result()), + packedTensor, offsets, sizes, strides); // Stack step 3. iteratively pop the stack and propagate the yield. Value valueToYield = inserted; @@ -869,7 +870,7 @@ } // Now the packed tensor is ready, replace the original padding op by a - // 1x..x1 SubTensor [originalLoopIvs, 0 .. 0][1 .. 1, paddedShape][1 .. 1]. + // 1x..x1 slice [originalLoopIvs, 0 .. 0][1 .. 1, paddedShape][1 .. 1]. b.setInsertionPoint(padTensorOp); SmallVector loopIterationCounts = llvm::to_vector<4>(llvm::map_range(packingLoops, [&](Operation *loop) { @@ -888,8 +889,8 @@ packedTensor = scf::getForInductionVarOwner(clonedLoopIvs.front())->getResult(0); padTensorOp.replaceAllUsesWith( - b.create(loc, padTensorOp.getResultType(), packedTensor, - offsets, sizes, strides) + b.create(loc, padTensorOp.getResultType(), + packedTensor, offsets, sizes, strides) ->getResult(0)); Operation *toErase = padTensorOp; diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -254,18 +254,18 @@ res = op.clone(b, loc, resultTensorTypes, tiledOperands); - // Insert a subtensor_insert for each output tensor. + // Insert a insert_slice for each output tensor. unsigned resultIdx = 0; for (OpOperand *opOperand : op.getOutputTensorOperands()) { // TODO: use an interface/adaptor to avoid leaking position in // `tiledOperands`. Value outputTensor = tiledOperands[opOperand->getOperandNumber()]; - if (auto subtensor = outputTensor.getDefiningOp()) { - tensorResults.push_back(b.create( - loc, subtensor.source().getType(), res->getResult(resultIdx), - subtensor.source(), subtensor.offsets(), subtensor.sizes(), - subtensor.strides(), subtensor.static_offsets(), - subtensor.static_sizes(), subtensor.static_strides())); + if (auto sliceOp = outputTensor.getDefiningOp()) { + tensorResults.push_back(b.create( + loc, sliceOp.source().getType(), res->getResult(resultIdx), + sliceOp.source(), sliceOp.offsets(), sliceOp.sizes(), + sliceOp.strides(), sliceOp.static_offsets(), sliceOp.static_sizes(), + sliceOp.static_strides())); } else { tensorResults.push_back(res->getResult(resultIdx)); } @@ -406,7 +406,7 @@ scf::ForOp::getCanonicalizationPatterns(patterns, ctx); scf::ParallelOp::getCanonicalizationPatterns(patterns, ctx); ConstantIndexOp::getCanonicalizationPatterns(patterns, ctx); - SubTensorOp::getCanonicalizationPatterns(patterns, ctx); + tensor::ExtractSliceOp::getCanonicalizationPatterns(patterns, ctx); memref::SubViewOp::getCanonicalizationPatterns(patterns, ctx); tensor::CastOp::getCanonicalizationPatterns(patterns, ctx); memref::ViewOp::getCanonicalizationPatterns(patterns, ctx); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -16,6 +16,7 @@ #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Utils/StructuredOpsUtils.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/IR/AffineExpr.h" @@ -128,14 +129,13 @@ // Already static shape, no need to pad. if (llvm::none_of(opToPad.getShape(opOperand), ShapedType::isDynamic)) return success(); - auto subtensor = opOperand->get().getDefiningOp(); - // Not a subtensor, cannot construct a static bounding box. - if (!subtensor) + auto sliceOp = opOperand->get().getDefiningOp(); + // Not a slice op, cannot construct a static bounding box. + if (!sliceOp) return failure(); SmallVector staticSizes; staticSizes.reserve(opToPad.getRank(opOperand)); - auto shapedOp = - cast(subtensor.getOperation()); + auto shapedOp = cast(sliceOp.getOperation()); for (auto size : shapedOp.getMixedSizes()) { auto indexAttr = size.is() ? size.get().dyn_cast() @@ -195,8 +195,8 @@ linalg::LinalgOp paddedOp = opToPad.clone(rewriter, loc, resultTensorTypes, newOperands); - // Recover the subtensor out of the new static results. This keeps the - // original linalg op around because it uses the dims of the original results. + // Recover the slice out of the new static results. This keeps the original + // linalg op around because it uses the dims of the original results. // This later folds away. SmallVector paddedSubviewResults; paddedSubviewResults.reserve(opToPad->getNumResults()); @@ -211,7 +211,7 @@ return dimOp.getResult(); })); SmallVector strides(rank, rewriter.getIndexAttr(1)); - paddedSubviewResults.push_back(rewriter.create( + paddedSubviewResults.push_back(rewriter.create( loc, std::get<1>(it), offsets, sizes, strides)); } // Replace the transient `opToPad` locally, except for uses that we just @@ -679,7 +679,7 @@ rewriter.create(loc, initTensor, padValue).result(); // Copy original contents into new tensor - // Uses linalg.generic, but could be done with std.subtensor_insert + // Uses linalg.generic, but could be done with tensor.insert_slice SmallVector outputExprs; for (unsigned i = 0; i < resultShapedType.getRank(); ++i) { outputExprs.push_back(getAffineDimExpr(i, rewriter.getContext()) + @@ -719,13 +719,13 @@ return val; } -LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite( - SubTensorOp subTensorOp, PatternRewriter &rewriter) const { - auto padOp = subTensorOp.source().getDefiningOp(); +LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite( + tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const { + auto padOp = sliceOp.source().getDefiningOp(); if (!padOp) return failure(); // Only unit stride supported. - if (!subTensorOp.hasUnitStride()) + if (!sliceOp.hasUnitStride()) return failure(); // Only constant padding value supported. Value padValue = padOp.getConstantPaddingValue(); @@ -734,7 +734,7 @@ // Helper variables and functions for various arithmetic operations. These are // used extensively for computing new offset/length and padding values. - Location loc = subTensorOp.getLoc(); + Location loc = sliceOp.getLoc(); AffineExpr dim0, dim1; bindDims(rewriter.getContext(), dim0, dim1); // Add two integers. @@ -786,8 +786,8 @@ int64_t rank = padOp.getSourceType().getRank(); for (unsigned dim = 0; dim < rank; ++dim) { auto low = asValue(rewriter, loc, padOp.getMixedLowPad()[dim]); - auto offset = asValue(rewriter, loc, subTensorOp.getMixedOffsets()[dim]); - auto length = asValue(rewriter, loc, subTensorOp.getMixedSizes()[dim]); + auto offset = asValue(rewriter, loc, sliceOp.getMixedOffsets()[dim]); + auto length = asValue(rewriter, loc, sliceOp.getMixedSizes()[dim]); auto srcSize = rewriter.createOrFold( loc, padOp.source(), dim); @@ -805,19 +805,19 @@ // // The original read could also have started in the high padding zone. // In that case, set the offset to the end of source tensor. The new - // SubTensorOp length will be zero in that case. (Effectively reading no + // ExtractSliceOp length will be zero in that case. (Effectively reading no // data from the source.) Value newOffset = min(max(sub(offset, low), zero), srcSize); newOffsets.push_back(asOpFoldResult(rewriter, newOffset)); - // The original SubTensorOp was reading until position `offset + length`. + // The original ExtractSliceOp was reading until position `offset + length`. // Therefore, the corresponding position within the source tensor is: // // offset + length - low // - // In case the original SubTensorOp stopped reading within the low padding - // zone, this value can be negative. In that case, the end position of the - // read should be zero. (Similar to newOffset.) + // In case the original ExtractSliceOp stopped reading within the low + // padding zone, this value can be negative. In that case, the end position + // of the read should be zero. (Similar to newOffset.) // // The original read could also have stopped in the high padding zone. // In that case, set the end positition of the read should be the end of the @@ -825,7 +825,7 @@ // // endLoc = min(max(offset - low + length, 0), srcSize) // - // The new SubTensorOp length is `endLoc - newOffset`. + // The new ExtractSliceOp length is `endLoc - newOffset`. Value endLoc = min(max(add(sub(offset, low), length), zero), srcSize); Value newLength = sub(endLoc, newOffset); newLengths.push_back(asOpFoldResult(rewriter, newLength)); @@ -844,7 +844,7 @@ } // The amount of high padding is simply the number of elements remaining, - // so that the result has the same length as the original SubTensorOp. + // so that the result has the same length as the original ExtractSliceOp. Value newHigh = sub(sub(length, newLength), newLow); appendIndex(newHigh, newHighs, staticNewHighs); @@ -854,22 +854,20 @@ // Insert cast to ensure that types match. (May be folded away.) auto castResult = [&](Value val) -> Value { - auto castOp = rewriter.create( - loc, subTensorOp.getType(), val); + auto castOp = rewriter.create(loc, sliceOp.getType(), val); return castOp; }; // In cases where the original data source is unused: Emit a GenerateOp and - // do not generate a SubTensorOp. (The result shape of the SubTensorOp would + // do not generate a SliceOp. (The result shape of the SliceOp would // have a dimension of size 0, the semantics of which is unclear.) auto createGenerateOp = [&]() { - // The shape of the GenerateOp is the same as the existing SubTensorOp. - RankedTensorType type = subTensorOp.getType(); + // The shape of the GenerateOp is the same as the existing SliceOp. + RankedTensorType type = sliceOp.getType(); SmallVector dynDims; for (unsigned i = 0; i < type.getRank(); ++i) { if (type.isDynamicDim(i)) - dynDims.push_back( - asValue(rewriter, loc, subTensorOp.getMixedOffsets()[i])); + dynDims.push_back(asValue(rewriter, loc, sliceOp.getMixedOffsets()[i])); } // Create GenerateOp. @@ -893,14 +891,14 @@ return castResult(generateOp); }; - // Emit a SubTensorOp and a PadTensorOp. Should not be used in cases where - // the result shape of the new SubTensorOp has a zero dimension. + // Emit a SliceOp and a PadTensorOp. Should not be used in cases where + // the result shape of the new SliceOp has a zero dimension. auto createPadTensorOfSubTensor = [&]() { // Create pad_tensor(subtensor(x)). - auto newSubTensorOp = rewriter.create( + auto newSliceOp = rewriter.create( loc, padOp.source(), newOffsets, newLengths, newStrides); auto newPadTensorOp = rewriter.create( - loc, newSubTensorOp, staticNewLows, staticNewHighs, newLows, newHighs); + loc, newSliceOp, staticNewLows, staticNewHighs, newLows, newHighs); // Copy region to new PadTensorOp. BlockAndValueMapping bvm; @@ -913,27 +911,29 @@ // Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known // that the original data source x is not used. if (hasZeroLen) { - rewriter.replaceOp(subTensorOp, createGenerateOp()); + rewriter.replaceOp(sliceOp, createGenerateOp()); return success(); } // If there are dynamic dimensions: Generate an scf.if check to avoid creating - // SubTensorOps with result dimensions of size 0 at runtime. + // SliceOps with result dimensions of size 0 at runtime. if (dynHasZeroLenCond) { auto result = rewriter.create( - loc, subTensorOp.getType(), dynHasZeroLenCond, - /*thenBuilder=*/[&](OpBuilder &b, Location loc) { + loc, sliceOp.getType(), dynHasZeroLenCond, + /*thenBuilder=*/ + [&](OpBuilder &b, Location loc) { b.create(loc, createGenerateOp()); }, - /*elseBuilder=*/[&](OpBuilder &b, Location loc) { + /*elseBuilder=*/ + [&](OpBuilder &b, Location loc) { b.create(loc, createPadTensorOfSubTensor()); }); - rewriter.replaceOp(subTensorOp, result.getResult(0)); + rewriter.replaceOp(sliceOp, result.getResult(0)); return success(); } // All shapes are static and the data source is actually used. Rewrite into // pad_tensor(subtensor(x)). - rewriter.replaceOp(subTensorOp, createPadTensorOfSubTensor()); + rewriter.replaceOp(sliceOp, createPadTensorOfSubTensor()); return success(); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -15,6 +15,7 @@ #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Utils/StructuredOpsUtils.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/IR/AffineExpr.h" @@ -677,9 +678,9 @@ } /// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp and -/// SubTensorInsertOp. For now, only constant padding values are supported. +/// InsertSliceOp. For now, only constant padding values are supported. /// If there is enough static type information, TransferReadOps and -/// TransferWriteOps may be generated instead of SubTensorInsertOps. +/// TransferWriteOps may be generated instead of InsertSliceOps. struct GenericPadTensorOpVectorizationPattern : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -723,7 +724,7 @@ return success(); // Neither source type nor PadTensorOp result type have static shape. Such - // PadTensorOps cannot be vectorized. Generate a SubTensorInsertOp instead + // PadTensorOps cannot be vectorized. Generate a InsertSliceOp instead // for copying the PadOp source. auto sourceType = padOp.getSourceType(); @@ -737,10 +738,10 @@ srcSizes.push_back(rewriter.getIndexAttr(sourceType.getDimSize(dim))); } } - // Strides of SubTensorInsertOp are all 1. + // Strides of InsertSliceOp are all 1. SmallVector strides(sourceType.getRank(), rewriter.getIndexAttr(1)); - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( padOp, padOp.source(), fill, padOp.getMixedLowPad(), srcSizes, strides); return success(); @@ -913,27 +914,29 @@ /// write. In such cases, the TransferWriteOp can write to the non-padded tensor /// value and apply out-of-bounds masking. E.g.: /// ``` -/// %0 = subtensor ...[...] [%s0, %s1] [1, 1] : tensor<...> to tensor +/// %0 = tensor.extract_slice ...[...] [%s0, %s1] [1, 1] +/// : tensor<...> to tensor /// %1 = linalg.pad_tensor %0 ... : tensor to tensor<17x5xf32> /// %2 = vector.transfer_write %vec, %1[...] /// : vector<17x5xf32>, tensor<17x5xf32> -/// %r = subtensor %2[0, 0] [%s0, %s1] [1, 1] +/// %r = tensor.extract_slice %2[0, 0] [%s0, %s1] [1, 1] /// : tensor<17x5xf32> to tensor /// ``` /// is rewritten to: /// ``` -/// %0 = subtensor ...[...] [%s0, %s1] [1, 1] : tensor<...> to tensor +/// %0 = tensor.extract_slice ...[...] [%s0, %s1] [1, 1] +/// : tensor<...> to tensor /// %r = vector.transfer_write %vec, %0[...] : vector<17x5xf32>, tensor /// ``` -/// Note: It is important that the SubTensorOp %r resizes the result of the +/// Note: It is important that the ExtractSliceOp %r resizes the result of the /// TransferWriteOp to the same size as the input of the TensorPadOp (or an even /// smaller size). Otherwise, %r's new (dynamic) dimensions would differ from /// %r's old dimensions. /// /// This rewrite is possible if: /// - Low padding is static 0. -/// - `xferOp` has exactly one use, which is a SubTensorOp. This SubTensorOp -/// trims the same amount of padding that was added beforehand. +/// - `xferOp` has exactly one use, which is an ExtractSliceOp. This +/// ExtractSliceOp trims the same amount of padding that was added beforehand. /// - Single, scalar padding value. struct PadTensorOpVectorizationWithTransferWritePattern : public VectorizePadTensorOpUserPattern { @@ -947,9 +950,9 @@ // Pad value must be a constant. auto padValue = padOp.getConstantPaddingValue(); if (!padValue) return failure(); - // TransferWriteOp result must be directly consumed by a SubTensorOp. + // TransferWriteOp result must be directly consumed by an ExtractSliceOp. if (!xferOp->hasOneUse()) return failure(); - auto trimPadding = dyn_cast(*xferOp->user_begin()); + auto trimPadding = dyn_cast(*xferOp->user_begin()); if (!trimPadding) return failure(); // Only static zero offsets supported when trimming padding. if (!trimPadding.hasZeroOffset()) return failure(); @@ -976,7 +979,8 @@ /// This is a conservative analysis. In case equal tensor sizes cannot be /// proven statically, this analysis returns `false` even though the tensor /// sizes may turn out to be equal at runtime. - bool hasSameTensorSize(Value beforePadding, SubTensorOp afterTrimming) const { + bool hasSameTensorSize(Value beforePadding, + tensor::ExtractSliceOp afterTrimming) const { // If the input to PadTensorOp is a CastOp, try with with both CastOp result // and CastOp operand. if (auto castOp = beforePadding.getDefiningOp()) @@ -1002,21 +1006,22 @@ if (t1.getNumDynamicDims() == 0) return true; // All dynamic sizes must be the same. The only supported case at the moment - // is when `beforePadding` is a SubTensorOp (or a cast thereof). + // is when `beforePadding` is an ExtractSliceOp (or a cast thereof). - // Apart from CastOp, only SubTensorOp is supported. - auto beforeSubtensor = beforePadding.getDefiningOp(); - if (!beforeSubtensor) return false; + // Apart from CastOp, only ExtractSliceOp is supported. + auto beforeSlice = beforePadding.getDefiningOp(); + if (!beforeSlice) + return false; - assert(static_cast(t1.getRank()) - == beforeSubtensor.getMixedSizes().size()); + assert(static_cast(t1.getRank()) == + beforeSlice.getMixedSizes().size()); assert(static_cast(t2.getRank()) == afterTrimming.getMixedSizes().size()); for (unsigned i = 0; i < t1.getRank(); ++i) { // Skip static dimensions. if (!t1.isDynamicDim(i)) continue; - auto size1 = beforeSubtensor.getMixedSizes()[i]; + auto size1 = beforeSlice.getMixedSizes()[i]; auto size2 = afterTrimming.getMixedSizes()[i]; // Case 1: Same value or same constant int. @@ -1042,10 +1047,11 @@ } }; -/// Rewrite use of PadTensorOp result in SubtensorInsertOp. E.g.: +/// Rewrite use of PadTensorOp result in InsertSliceOp. E.g.: /// ``` /// %0 = linalg.pad_tensor %src ... : tensor to tensor<17x5xf32> -/// %r = subtensor_insert %0 into %dest[%a, %b, 0, 0] [1, 1, 17, 5] [1, 1, 1, 1] +/// %r = tensor.insert_slice %0 +/// into %dest[%a, %b, 0, 0] [1, 1, 17, 5] [1, 1, 1, 1] /// : tensor<17x5xf32> into tensor /// ``` /// is rewritten to: @@ -1063,13 +1069,13 @@ /// (Implies that sizes of `insertOp` are all static.) /// - Only unit strides in `insertOp`. /// - Single, scalar padding value. -struct PadTensorOpVectorizationWithSubTensorInsertPattern - : public VectorizePadTensorOpUserPattern { - using VectorizePadTensorOpUserPattern - ::VectorizePadTensorOpUserPattern; +struct PadTensorOpVectorizationWithInsertSlicePattern + : public VectorizePadTensorOpUserPattern { + using VectorizePadTensorOpUserPattern< + tensor::InsertSliceOp>::VectorizePadTensorOpUserPattern; LogicalResult rewriteUser(PatternRewriter &rewriter, PadTensorOp padOp, - SubTensorInsertOp insertOp) const override { + tensor::InsertSliceOp insertOp) const override { // Low padding must be static 0. if (!padOp.hasZeroLowPad()) return failure(); // Only unit stride supported. @@ -1103,8 +1109,8 @@ auto read = rewriter.create( padOp.getLoc(), vecType, padOp.source(), readIndices, padValue); - // Generate TransferWriteOp: Write to SubTensorInsertOp's dest tensor at - // specified offsets. Write is fully in-bounds because a SubTensorInsertOp's + // Generate TransferWriteOp: Write to InsertSliceOp's dest tensor at + // specified offsets. Write is fully in-bounds because a InsertSliceOp's // source must fit into the destination at the specified offsets. auto writeIndices = ofrToIndexValues(rewriter, padOp.getLoc(), insertOp.getMixedOffsets()); @@ -1123,7 +1129,7 @@ // Try these specialized patterns first before resorting to the generic one. patterns.add( + PadTensorOpVectorizationWithInsertSlicePattern>( patterns.getContext(), baseBenefit.getBenefit() + 1); } diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -19,6 +19,7 @@ #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/StandardOps/Utils/Utils.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineExprVisitor.h" #include "mlir/IR/AffineMap.h" @@ -556,7 +557,7 @@ } LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n"); - // Construct a new subview / subtensor for the tile. + // Construct a new subview / extract_slice for the tile. SmallVector offsets, sizes, strides; offsets.reserve(rank); sizes.reserve(rank); @@ -585,7 +586,7 @@ Value size = makeComposedAffineApply(b, loc, s0 + 1, closedIntSize); LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: raw size: " << size << "\n"); - // The size of the subview / subtensor should be trimmed to avoid + // The size of the subview / extract_slice should be trimmed to avoid // out-of-bounds accesses, unless we statically know the subshape size // divides the shape size evenly. int64_t shapeSize = shape[r]; @@ -619,8 +620,8 @@ tiledShapes.push_back( b.create(loc, shapedOp, offsets, sizes, strides)); else - tiledShapes.push_back( - b.create(loc, shapedOp, offsets, sizes, strides)); + tiledShapes.push_back(b.create( + loc, shapedOp, offsets, sizes, strides)); } return tiledShapes; diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -717,10 +717,10 @@ // The size at the given index is now known to be a dynamic size. unsigned unsignedIndex = index.getValue().getZExtValue(); - if (auto subtensor = dyn_cast_or_null(definingOp)) { - assert(subtensor.isDynamicSize(unsignedIndex) && - "Expected dynamic subtensor size"); - return subtensor.getDynamicSize(unsignedIndex); + if (auto sliceOp = dyn_cast_or_null(definingOp)) { + assert(sliceOp.isDynamicSize(unsignedIndex) && + "Expected dynamic slice size"); + return sliceOp.getDynamicSize(unsignedIndex); } // Fold dim to the size argument for an `AllocOp`, `ViewOp`, or `SubViewOp`. @@ -1314,7 +1314,7 @@ } // TODO: ponder whether we want to allow missing trailing sizes/strides that are -// completed automatically, like we have for subview and subtensor. +// completed automatically, like we have for subview and extract_slice. static LogicalResult verify(ReinterpretCastOp op) { // The source and result memrefs should be in the same memory space. auto srcType = op.source().getType().cast(); diff --git a/mlir/lib/Dialect/StandardOps/CMakeLists.txt b/mlir/lib/Dialect/StandardOps/CMakeLists.txt --- a/mlir/lib/Dialect/StandardOps/CMakeLists.txt +++ b/mlir/lib/Dialect/StandardOps/CMakeLists.txt @@ -14,9 +14,7 @@ MLIRControlFlowInterfaces MLIRIR MLIRSideEffectInterfaces - MLIRTensor MLIRVectorInterfaces - MLIRViewLikeInterface ) add_subdirectory(Transforms) diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -10,7 +10,6 @@ #include "mlir/Dialect/CommonFolders.h" #include "mlir/Dialect/StandardOps/Utils/Utils.h" -#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/BlockAndValueMapping.h" @@ -34,32 +33,6 @@ using namespace mlir; -/// Helper function to dispatch an OpFoldResult into either the `dynamicVec` if -/// it is a Value or into `staticVec` if it is an IntegerAttr. -/// In the case of a Value, a copy of the `sentinel` value is also pushed to -/// `staticVec`. This is useful to extract mixed static and dynamic entries that -/// come from an AttrSizedOperandSegments trait. -static void dispatchIndexOpFoldResult(OpFoldResult ofr, - SmallVectorImpl &dynamicVec, - SmallVectorImpl &staticVec, - int64_t sentinel) { - if (auto v = ofr.dyn_cast()) { - dynamicVec.push_back(v); - staticVec.push_back(sentinel); - return; - } - APInt apInt = ofr.dyn_cast().cast().getValue(); - staticVec.push_back(apInt.getSExtValue()); -} - -static void dispatchIndexOpFoldResults(ArrayRef ofrs, - SmallVectorImpl &dynamicVec, - SmallVectorImpl &staticVec, - int64_t sentinel) { - for (auto ofr : ofrs) - dispatchIndexOpFoldResult(ofr, dynamicVec, staticVec, sentinel); -} - /// If ofr is a constant integer, i.e., an IntegerAttr or a ConstantOp with an /// IntegerAttr, return the integer. llvm::Optional mlir::getConstantIntValue(OpFoldResult ofr) { @@ -227,7 +200,6 @@ } void StandardOpsDialect::initialize() { - getContext()->loadDialect(); addOperations< #define GET_OP_LIST #include "mlir/Dialect/StandardOps/IR/Ops.cpp.inc" @@ -286,14 +258,6 @@ [](APInt a, APInt b) { return a + b; }); } -/// Extract int64_t values from the assumed ArrayAttr of IntegerAttr. -static SmallVector extractFromI64ArrayAttr(Attribute attr) { - return llvm::to_vector<4>( - llvm::map_range(attr.cast(), [](Attribute a) -> int64_t { - return a.cast().getInt(); - })); -} - /// Canonicalize a sum of a constant and (constant - something) to simply be /// a sum of constants minus something. This transformation does similar /// transformations for additions of a constant with a subtract/add of @@ -2082,499 +2046,6 @@ return areVectorCastSimpleCompatible(a, b, areCastCompatible); } -//===----------------------------------------------------------------------===// -// SubTensorOp -//===----------------------------------------------------------------------===// - -/// A subtensor result type can be fully inferred from the source type and the -/// static representation of offsets, sizes and strides. Special sentinels -/// encode the dynamic case. -Type SubTensorOp::inferResultType(RankedTensorType sourceRankedTensorType, - ArrayRef leadingStaticOffsets, - ArrayRef leadingStaticSizes, - ArrayRef leadingStaticStrides) { - // A subtensor may specify only a leading subset of offset/sizes/strides in - // which case we complete with offset=0, sizes from memref type and strides=1. - unsigned rank = sourceRankedTensorType.getRank(); - assert(leadingStaticSizes.size() <= rank && - "unexpected leadingStaticSizes overflow"); - auto staticSizes = llvm::to_vector<4>(leadingStaticSizes); - unsigned numTrailingSizes = rank - staticSizes.size(); - llvm::append_range(staticSizes, sourceRankedTensorType.getShape().take_back( - numTrailingSizes)); - return RankedTensorType::get(staticSizes, - sourceRankedTensorType.getElementType()); -} - -Type SubTensorOp::inferResultType(RankedTensorType sourceRankedTensorType, - ArrayRef leadingStaticOffsets, - ArrayRef leadingStaticSizes, - ArrayRef leadingStaticStrides) { - SmallVector staticOffsets, staticSizes, staticStrides; - SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; - dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets, - staticOffsets, ShapedType::kDynamicStrideOrOffset); - dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes, - ShapedType::kDynamicSize); - dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides, - staticStrides, ShapedType::kDynamicStrideOrOffset); - return SubTensorOp::inferResultType(sourceRankedTensorType, staticOffsets, - staticSizes, staticStrides); -} - -/// A subtensor result type can be fully inferred from the source type and the -/// static representation of offsets, sizes and strides. Special sentinels -/// encode the dynamic case. -Type SubTensorOp::inferRankReducedResultType( - unsigned resultRank, RankedTensorType sourceRankedTensorType, - ArrayRef leadingStaticOffsets, - ArrayRef leadingStaticSizes, - ArrayRef leadingStaticStrides) { - auto inferredType = - inferResultType(sourceRankedTensorType, leadingStaticOffsets, - leadingStaticSizes, leadingStaticStrides) - .cast(); - int rankDiff = inferredType.getRank() - resultRank; - if (rankDiff > 0) { - auto shape = inferredType.getShape(); - llvm::SmallDenseSet dimsToProject; - mlir::getPositionsOfShapeOne(rankDiff, shape, dimsToProject); - SmallVector projectedShape; - for (unsigned pos = 0, e = shape.size(); pos < e; ++pos) - if (!dimsToProject.contains(pos)) - projectedShape.push_back(shape[pos]); - inferredType = - RankedTensorType::get(projectedShape, inferredType.getElementType()); - } - return inferredType; -} - -Type SubTensorOp::inferRankReducedResultType( - unsigned resultRank, RankedTensorType sourceRankedTensorType, - ArrayRef leadingStaticOffsets, - ArrayRef leadingStaticSizes, - ArrayRef leadingStaticStrides) { - SmallVector staticOffsets, staticSizes, staticStrides; - SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; - dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets, - staticOffsets, ShapedType::kDynamicStrideOrOffset); - dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes, - ShapedType::kDynamicSize); - dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides, - staticStrides, ShapedType::kDynamicStrideOrOffset); - return SubTensorOp::inferRankReducedResultType( - resultRank, sourceRankedTensorType, staticOffsets, staticSizes, - staticStrides); -} - -// Build a SubTensorOp with mixed static and dynamic entries and custom result -// type. If the type passed is nullptr, it is inferred. -void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result, - RankedTensorType resultType, Value source, - ArrayRef offsets, - ArrayRef sizes, - ArrayRef strides, - ArrayRef attrs) { - SmallVector staticOffsets, staticSizes, staticStrides; - SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; - dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets, - ShapedType::kDynamicStrideOrOffset); - dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes, - ShapedType::kDynamicSize); - dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides, - ShapedType::kDynamicStrideOrOffset); - auto sourceRankedTensorType = source.getType().cast(); - // Structuring implementation this way avoids duplication between builders. - if (!resultType) { - resultType = - SubTensorOp::inferResultType(sourceRankedTensorType, staticOffsets, - staticSizes, staticStrides) - .cast(); - } - build(b, result, resultType, source, dynamicOffsets, dynamicSizes, - dynamicStrides, b.getI64ArrayAttr(staticOffsets), - b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides)); - result.addAttributes(attrs); -} - -// Build a SubTensorOp with mixed static and dynamic entries and inferred result -// type. -void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result, - Value source, ArrayRef offsets, - ArrayRef sizes, - ArrayRef strides, - ArrayRef attrs) { - build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs); -} - -// Build a SubTensorOp with dynamic entries and custom result type. If the type -// passed is nullptr, it is inferred. -void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result, - RankedTensorType resultType, Value source, - ValueRange offsets, ValueRange sizes, - ValueRange strides, - ArrayRef attrs) { - SmallVector offsetValues = llvm::to_vector<4>( - llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; })); - SmallVector sizeValues = llvm::to_vector<4>( - llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; })); - SmallVector strideValues = llvm::to_vector<4>( - llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; })); - build(b, result, resultType, source, offsetValues, sizeValues, strideValues); -} - -// Build a SubTensorOp with dynamic entries and inferred result type. -void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result, - Value source, ValueRange offsets, - ValueRange sizes, ValueRange strides, - ArrayRef attrs) { - build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs); -} - -enum SubTensorVerificationResult { - Success, - RankTooLarge, - SizeMismatch, - ElemTypeMismatch, -}; - -/// Checks if `original` Type type can be rank reduced to `reduced` type. -/// This function is slight variant of `is subsequence` algorithm where -/// not matching dimension must be 1. -static SubTensorVerificationResult -isRankReducedType(Type originalType, Type candidateReducedType, - std::string *errMsg = nullptr) { - if (originalType == candidateReducedType) - return SubTensorVerificationResult::Success; - if (!originalType.isa()) - return SubTensorVerificationResult::Success; - if (originalType.isa() && - !candidateReducedType.isa()) - return SubTensorVerificationResult::Success; - - ShapedType originalShapedType = originalType.cast(); - ShapedType candidateReducedShapedType = - candidateReducedType.cast(); - - // Rank and size logic is valid for all ShapedTypes. - ArrayRef originalShape = originalShapedType.getShape(); - ArrayRef candidateReducedShape = - candidateReducedShapedType.getShape(); - unsigned originalRank = originalShape.size(), - candidateReducedRank = candidateReducedShape.size(); - if (candidateReducedRank > originalRank) - return SubTensorVerificationResult::RankTooLarge; - - auto optionalUnusedDimsMask = - computeRankReductionMask(originalShape, candidateReducedShape); - - // Sizes cannot be matched in case empty vector is returned. - if (!optionalUnusedDimsMask.hasValue()) - return SubTensorVerificationResult::SizeMismatch; - - if (originalShapedType.getElementType() != - candidateReducedShapedType.getElementType()) - return SubTensorVerificationResult::ElemTypeMismatch; - - // We are done for the tensor case. - if (originalType.isa()) - return SubTensorVerificationResult::Success; - - return SubTensorVerificationResult::Success; -} - -template -static LogicalResult -produceSubTensorErrorMsg(SubTensorVerificationResult result, OpTy op, - Type expectedType, StringRef errMsg = "") { - auto memrefType = expectedType.cast(); - switch (result) { - case SubTensorVerificationResult::Success: - return success(); - case SubTensorVerificationResult::RankTooLarge: - return op.emitError("expected result rank to be smaller or equal to ") - << "the source rank. " << errMsg; - case SubTensorVerificationResult::SizeMismatch: - return op.emitError("expected result type to be ") - << expectedType - << " or a rank-reduced version. (mismatch of result sizes) " - << errMsg; - case SubTensorVerificationResult::ElemTypeMismatch: - return op.emitError("expected result element type to be ") - << memrefType.getElementType() << errMsg; - } - llvm_unreachable("unexpected subtensor verification result"); -} -/// Verifier for SubTensorOp. -static LogicalResult verify(SubTensorOp op) { - // Verify result type against inferred type. - auto expectedType = SubTensorOp::inferResultType( - op.getSourceType(), extractFromI64ArrayAttr(op.static_offsets()), - extractFromI64ArrayAttr(op.static_sizes()), - extractFromI64ArrayAttr(op.static_strides())); - auto result = isRankReducedType(expectedType, op.getType()); - return produceSubTensorErrorMsg(result, op, expectedType); -} - -/// Infer the canonical type of the result of a subtensor operation. Returns a -/// type with rank `resultRank` that is either the rank of the rank-reduced -/// type, or the non-rank-reduced type. -static RankedTensorType getCanonicalSubTensorResultType( - unsigned resultRank, RankedTensorType sourceType, - ArrayRef mixedOffsets, ArrayRef mixedSizes, - ArrayRef mixedStrides) { - auto resultType = - SubTensorOp::inferRankReducedResultType( - resultRank, sourceType, mixedOffsets, mixedSizes, mixedStrides) - .cast(); - if (resultType.getRank() != resultRank) { - resultType = SubTensorOp::inferResultType(sourceType, mixedOffsets, - mixedSizes, mixedStrides) - .cast(); - } - return resultType; -} - -namespace { -/// Pattern to rewrite a subtensor op with tensor::Cast arguments. -/// This essentially pushes memref_cast past its consuming subtensor when -/// `canFoldIntoConsumerOp` is true. -/// -/// Example: -/// ``` -/// %0 = tensorcast %V : tensor<16x16xf32> to tensor -/// %1 = subtensor %0[0, 0][3, 4][1, 1] : tensor to tensor<3x4xf32> -/// ``` -/// is rewritten into: -/// ``` -/// %0 = subtensor %V[0, 0][3, 4][1, 1] : tensor<16x16xf32> to tensor<3x4xf32> -/// %1 = tensor.cast %0: tensor<3x4xf32> to tensor<3x4xf32> -/// ``` -class SubTensorOpCastFolder final : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(SubTensorOp subTensorOp, - PatternRewriter &rewriter) const override { - // Any constant operand, just return to let SubViewOpConstantFolder kick in. - if (llvm::any_of(subTensorOp.getOperands(), [](Value operand) { - return matchPattern(operand, matchConstantIndex()); - })) - return failure(); - - auto castOp = subTensorOp.source().getDefiningOp(); - if (!castOp) - return failure(); - - if (!canFoldIntoConsumerOp(castOp)) - return failure(); - - /// Deduce the type of the result to use for the canonicalized operation. - RankedTensorType resultType = getCanonicalSubTensorResultType( - subTensorOp.getType().getRank(), subTensorOp.getSourceType(), - subTensorOp.getMixedOffsets(), subTensorOp.getMixedSizes(), - subTensorOp.getMixedStrides()); - Value newSubTensor = rewriter.create( - subTensorOp.getLoc(), resultType, castOp.source(), - subTensorOp.offsets(), subTensorOp.sizes(), subTensorOp.strides(), - subTensorOp.static_offsets(), subTensorOp.static_sizes(), - subTensorOp.static_strides()); - rewriter.replaceOpWithNewOp( - subTensorOp, subTensorOp.getType(), newSubTensor); - return success(); - } -}; -} // namespace - -/// Return the canonical type of the result of a subtensor. -struct SubTensorReturnTypeCanonicalizer { - RankedTensorType operator()(SubTensorOp op, - ArrayRef mixedOffsets, - ArrayRef mixedSizes, - ArrayRef mixedStrides) { - return getCanonicalSubTensorResultType(op.getType().getRank(), - op.getSourceType(), mixedOffsets, - mixedSizes, mixedStrides); - } -}; - -/// A canonicalizer wrapper to replace SubTensorOps. -struct SubTensorCanonicalizer { - void operator()(PatternRewriter &rewriter, SubTensorOp op, - SubTensorOp newOp) { - Value replacement = newOp.getResult(); - if (replacement.getType() != op.getType()) - replacement = rewriter.create(op.getLoc(), op.getType(), - replacement); - rewriter.replaceOp(op, replacement); - } -}; - -void SubTensorOp::getCanonicalizationPatterns(RewritePatternSet &results, - MLIRContext *context) { - results.add, - SubTensorOpCastFolder>(context); -} - -// -static LogicalResult -foldIdentityOffsetSizeAndStrideOpInterface(OffsetSizeAndStrideOpInterface op, - ShapedType shapedType) { - OpBuilder b(op.getContext()); - for (OpFoldResult ofr : op.getMixedOffsets()) - if (!isEqualConstantIntOrValue(ofr, b.getIndexAttr(0))) - return failure(); - // Rank-reducing noops only need to inspect the leading dimensions: llvm::zip - // is appropriate. - auto shape = shapedType.getShape(); - for (auto it : llvm::zip(op.getMixedSizes(), shape)) - if (!isEqualConstantIntOrValue(std::get<0>(it), - b.getIndexAttr(std::get<1>(it)))) - return failure(); - for (OpFoldResult ofr : op.getMixedStrides()) - if (!isEqualConstantIntOrValue(ofr, b.getIndexAttr(1))) - return failure(); - return success(); -} - -OpFoldResult SubTensorOp::fold(ArrayRef) { - if (getSourceType() == getType() && - succeeded(foldIdentityOffsetSizeAndStrideOpInterface(*this, getType()))) - return this->source(); - return OpFoldResult(); -} - -//===----------------------------------------------------------------------===// -// SubTensorInsertOp -//===----------------------------------------------------------------------===// - -// Build a SubTensorInsertOp with mixed static and dynamic entries. -void mlir::SubTensorInsertOp::build(OpBuilder &b, OperationState &result, - Value source, Value dest, - ArrayRef offsets, - ArrayRef sizes, - ArrayRef strides, - ArrayRef attrs) { - SmallVector staticOffsets, staticSizes, staticStrides; - SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; - dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets, - ShapedType::kDynamicStrideOrOffset); - dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes, - ShapedType::kDynamicSize); - dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides, - ShapedType::kDynamicStrideOrOffset); - build(b, result, dest.getType(), source, dest, dynamicOffsets, dynamicSizes, - dynamicStrides, b.getI64ArrayAttr(staticOffsets), - b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides)); - result.addAttributes(attrs); -} - -// Build a SubTensorInsertOp with dynamic entries. -void mlir::SubTensorInsertOp::build(OpBuilder &b, OperationState &result, - Value source, Value dest, - ValueRange offsets, ValueRange sizes, - ValueRange strides, - ArrayRef attrs) { - SmallVector offsetValues = llvm::to_vector<4>( - llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; })); - SmallVector sizeValues = llvm::to_vector<4>( - llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; })); - SmallVector strideValues = llvm::to_vector<4>( - llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; })); - build(b, result, source, dest, offsetValues, sizeValues, strideValues); -} - -OpFoldResult SubTensorInsertOp::fold(ArrayRef) { - if (getSourceType().hasStaticShape() && getType().hasStaticShape() && - getSourceType() == getType() && - succeeded(foldIdentityOffsetSizeAndStrideOpInterface(*this, getType()))) - return this->source(); - return OpFoldResult(); -} - -namespace { -/// Pattern to rewrite a subtensor_insert op with constant arguments. -class SubTensorInsertOpConstantArgumentFolder final - : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(SubTensorInsertOp subTensorInsertOp, - PatternRewriter &rewriter) const override { - // No constant operand, just return. - if (llvm::none_of(subTensorInsertOp.getOperands(), [](Value operand) { - return matchPattern(operand, matchConstantIndex()); - })) - return failure(); - - // At least one of offsets/sizes/strides is a new constant. - // Form the new list of operands and constant attributes from the - // existing. - SmallVector mixedOffsets(subTensorInsertOp.getMixedOffsets()); - SmallVector mixedSizes(subTensorInsertOp.getMixedSizes()); - SmallVector mixedStrides(subTensorInsertOp.getMixedStrides()); - canonicalizeSubViewPart(mixedOffsets, ShapedType::isDynamicStrideOrOffset); - canonicalizeSubViewPart(mixedSizes, ShapedType::isDynamic); - canonicalizeSubViewPart(mixedStrides, ShapedType::isDynamicStrideOrOffset); - - // Create the new op in canonical form. - rewriter.replaceOpWithNewOp( - subTensorInsertOp, subTensorInsertOp.source(), subTensorInsertOp.dest(), - mixedOffsets, mixedSizes, mixedStrides); - return success(); - } -}; - -/// Fold tensor_casts with subtensor_insert operations. -struct SubTensorInsertOpCastFolder final - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(SubTensorInsertOp subTensorInsertOp, - PatternRewriter &rewriter) const override { - if (llvm::any_of(subTensorInsertOp.getOperands(), [](Value operand) { - return matchPattern(operand, matchConstantIndex()); - })) - return failure(); - - auto getSourceOfCastOp = [](Value v) -> Optional { - auto castOp = v.getDefiningOp(); - if (!castOp || !canFoldIntoConsumerOp(castOp)) - return llvm::None; - return castOp.source(); - }; - Optional sourceCastSource = - getSourceOfCastOp(subTensorInsertOp.source()); - Optional destCastSource = - getSourceOfCastOp(subTensorInsertOp.dest()); - if (!sourceCastSource && !destCastSource) - return failure(); - - Value replacement = rewriter.create( - subTensorInsertOp.getLoc(), - (sourceCastSource ? *sourceCastSource : subTensorInsertOp.source()), - (destCastSource ? *destCastSource : subTensorInsertOp.dest()), - subTensorInsertOp.getMixedOffsets(), subTensorInsertOp.getMixedSizes(), - subTensorInsertOp.getMixedStrides()); - - if (replacement.getType() != subTensorInsertOp.getType()) { - replacement = rewriter.create( - subTensorInsertOp.getLoc(), subTensorInsertOp.getType(), replacement); - } - rewriter.replaceOp(subTensorInsertOp, replacement); - return success(); - } -}; -} // namespace - -void SubTensorInsertOp::getCanonicalizationPatterns(RewritePatternSet &results, - MLIRContext *context) { - results.add(context); -} - //===----------------------------------------------------------------------===// // SwitchOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt b/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt --- a/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt @@ -16,4 +16,6 @@ MLIRIR MLIRSideEffectInterfaces MLIRSupport + MLIRStandard + MLIRViewLikeInterface ) diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Dialect/StandardOps/Utils/Utils.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/Builders.h" @@ -25,7 +26,7 @@ /// source tensor. This is useful to fold a tensor.cast into a consuming op and /// implement canonicalization patterns for ops in different dialects that may /// consume the results of tensor.cast operations. Such foldable tensor.cast -/// operations are typically inserted as `subtensor` ops and are canonicalized, +/// operations are typically inserted as `slice` ops and are canonicalized, /// to preserve the type compatibility of their uses. /// /// Returns true when all conditions are met: @@ -511,6 +512,530 @@ return success(); } +//===----------------------------------------------------------------------===// +// ExtractSliceOp +//===----------------------------------------------------------------------===// + +/// Helper function to dispatch an OpFoldResult into either the `dynamicVec` if +/// it is a Value or into `staticVec` if it is an IntegerAttr. +/// In the case of a Value, a copy of the `sentinel` value is also pushed to +/// `staticVec`. This is useful to extract mixed static and dynamic entries that +/// come from an AttrSizedOperandSegments trait. +static void dispatchIndexOpFoldResult(OpFoldResult ofr, + SmallVectorImpl &dynamicVec, + SmallVectorImpl &staticVec, + int64_t sentinel) { + if (auto v = ofr.dyn_cast()) { + dynamicVec.push_back(v); + staticVec.push_back(sentinel); + return; + } + APInt apInt = ofr.dyn_cast().cast().getValue(); + staticVec.push_back(apInt.getSExtValue()); +} + +static void dispatchIndexOpFoldResults(ArrayRef ofrs, + SmallVectorImpl &dynamicVec, + SmallVectorImpl &staticVec, + int64_t sentinel) { + for (auto ofr : ofrs) + dispatchIndexOpFoldResult(ofr, dynamicVec, staticVec, sentinel); +} + +/// An extract_slice op result type can be fully inferred from the source type +/// and the static representation of offsets, sizes and strides. Special +/// sentinels encode the dynamic case. +Type ExtractSliceOp::inferResultType(RankedTensorType sourceRankedTensorType, + ArrayRef leadingStaticOffsets, + ArrayRef leadingStaticSizes, + ArrayRef leadingStaticStrides) { + // An extract_slice op may specify only a leading subset of offset/sizes/ + // strides in which case we complete with offset=0, sizes from memref type and + // strides=1. + unsigned rank = sourceRankedTensorType.getRank(); + assert(leadingStaticSizes.size() <= rank && + "unexpected leadingStaticSizes overflow"); + auto staticSizes = llvm::to_vector<4>(leadingStaticSizes); + unsigned numTrailingSizes = rank - staticSizes.size(); + llvm::append_range(staticSizes, sourceRankedTensorType.getShape().take_back( + numTrailingSizes)); + return RankedTensorType::get(staticSizes, + sourceRankedTensorType.getElementType()); +} + +/// Extract int64_t values from the assumed ArrayAttr of IntegerAttr. +static SmallVector extractFromI64ArrayAttr(Attribute attr) { + return llvm::to_vector<4>( + llvm::map_range(attr.cast(), [](Attribute a) -> int64_t { + return a.cast().getInt(); + })); +} + +Type ExtractSliceOp::inferResultType( + RankedTensorType sourceRankedTensorType, + ArrayRef leadingStaticOffsets, + ArrayRef leadingStaticSizes, + ArrayRef leadingStaticStrides) { + SmallVector staticOffsets, staticSizes, staticStrides; + SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; + dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets, + staticOffsets, ShapedType::kDynamicStrideOrOffset); + dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes, + ShapedType::kDynamicSize); + dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides, + staticStrides, ShapedType::kDynamicStrideOrOffset); + return ExtractSliceOp::inferResultType(sourceRankedTensorType, staticOffsets, + staticSizes, staticStrides); +} + +/// An extract_slice op result type can be fully inferred from the source type +/// and the static representation of offsets, sizes and strides. Special +/// sentinels encode the dynamic case. +Type ExtractSliceOp::inferRankReducedResultType( + unsigned resultRank, RankedTensorType sourceRankedTensorType, + ArrayRef leadingStaticOffsets, + ArrayRef leadingStaticSizes, + ArrayRef leadingStaticStrides) { + auto inferredType = + inferResultType(sourceRankedTensorType, leadingStaticOffsets, + leadingStaticSizes, leadingStaticStrides) + .cast(); + int rankDiff = inferredType.getRank() - resultRank; + if (rankDiff > 0) { + auto shape = inferredType.getShape(); + llvm::SmallDenseSet dimsToProject; + mlir::getPositionsOfShapeOne(rankDiff, shape, dimsToProject); + SmallVector projectedShape; + for (unsigned pos = 0, e = shape.size(); pos < e; ++pos) + if (!dimsToProject.contains(pos)) + projectedShape.push_back(shape[pos]); + inferredType = + RankedTensorType::get(projectedShape, inferredType.getElementType()); + } + return inferredType; +} + +Type ExtractSliceOp::inferRankReducedResultType( + unsigned resultRank, RankedTensorType sourceRankedTensorType, + ArrayRef leadingStaticOffsets, + ArrayRef leadingStaticSizes, + ArrayRef leadingStaticStrides) { + SmallVector staticOffsets, staticSizes, staticStrides; + SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; + dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets, + staticOffsets, ShapedType::kDynamicStrideOrOffset); + dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes, + ShapedType::kDynamicSize); + dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides, + staticStrides, ShapedType::kDynamicStrideOrOffset); + return ExtractSliceOp::inferRankReducedResultType( + resultRank, sourceRankedTensorType, staticOffsets, staticSizes, + staticStrides); +} + +/// Build an ExtractSliceOp with mixed static and dynamic entries and custom +/// result type. If the type passed is nullptr, it is inferred. +void ExtractSliceOp::build(OpBuilder &b, OperationState &result, + RankedTensorType resultType, Value source, + ArrayRef offsets, + ArrayRef sizes, + ArrayRef strides, + ArrayRef attrs) { + SmallVector staticOffsets, staticSizes, staticStrides; + SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; + dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets, + ShapedType::kDynamicStrideOrOffset); + dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes, + ShapedType::kDynamicSize); + dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides, + ShapedType::kDynamicStrideOrOffset); + auto sourceRankedTensorType = source.getType().cast(); + // Structuring implementation this way avoids duplication between builders. + if (!resultType) { + resultType = + ExtractSliceOp::inferResultType(sourceRankedTensorType, staticOffsets, + staticSizes, staticStrides) + .cast(); + } + build(b, result, resultType, source, dynamicOffsets, dynamicSizes, + dynamicStrides, b.getI64ArrayAttr(staticOffsets), + b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides)); + result.addAttributes(attrs); +} + +/// Build an ExtractSliceOp with mixed static and dynamic entries and inferred +/// result type. +void ExtractSliceOp::build(OpBuilder &b, OperationState &result, Value source, + ArrayRef offsets, + ArrayRef sizes, + ArrayRef strides, + ArrayRef attrs) { + build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs); +} + +/// Build an ExtractSliceOp with dynamic entries and custom result type. If the +/// type passed is nullptr, it is inferred. +void ExtractSliceOp::build(OpBuilder &b, OperationState &result, + RankedTensorType resultType, Value source, + ValueRange offsets, ValueRange sizes, + ValueRange strides, ArrayRef attrs) { + SmallVector offsetValues = llvm::to_vector<4>( + llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; })); + SmallVector sizeValues = llvm::to_vector<4>( + llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; })); + SmallVector strideValues = llvm::to_vector<4>( + llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; })); + build(b, result, resultType, source, offsetValues, sizeValues, strideValues); +} + +/// Build an ExtractSliceOp with dynamic entries and inferred result type. +void ExtractSliceOp::build(OpBuilder &b, OperationState &result, Value source, + ValueRange offsets, ValueRange sizes, + ValueRange strides, ArrayRef attrs) { + build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs); +} + +enum SliceVerificationResult { + Success, + RankTooLarge, + SizeMismatch, + ElemTypeMismatch, +}; + +/// Checks if `original` Type type can be rank reduced to `reduced` type. +/// This function is slight variant of `is subsequence` algorithm where +/// not matching dimension must be 1. +static SliceVerificationResult +isRankReducedType(Type originalType, Type candidateReducedType, + std::string *errMsg = nullptr) { + if (originalType == candidateReducedType) + return SliceVerificationResult::Success; + if (!originalType.isa()) + return SliceVerificationResult::Success; + if (originalType.isa() && + !candidateReducedType.isa()) + return SliceVerificationResult::Success; + + ShapedType originalShapedType = originalType.cast(); + ShapedType candidateReducedShapedType = + candidateReducedType.cast(); + + // Rank and size logic is valid for all ShapedTypes. + ArrayRef originalShape = originalShapedType.getShape(); + ArrayRef candidateReducedShape = + candidateReducedShapedType.getShape(); + unsigned originalRank = originalShape.size(), + candidateReducedRank = candidateReducedShape.size(); + if (candidateReducedRank > originalRank) + return SliceVerificationResult::RankTooLarge; + + auto optionalUnusedDimsMask = + computeRankReductionMask(originalShape, candidateReducedShape); + + // Sizes cannot be matched in case empty vector is returned. + if (!optionalUnusedDimsMask.hasValue()) + return SliceVerificationResult::SizeMismatch; + + if (originalShapedType.getElementType() != + candidateReducedShapedType.getElementType()) + return SliceVerificationResult::ElemTypeMismatch; + + // We are done for the tensor case. + if (originalType.isa()) + return SliceVerificationResult::Success; + + return SliceVerificationResult::Success; +} + +template +static LogicalResult produceSliceErrorMsg(SliceVerificationResult result, + OpTy op, Type expectedType, + StringRef errMsg = "") { + auto memrefType = expectedType.cast(); + switch (result) { + case SliceVerificationResult::Success: + return success(); + case SliceVerificationResult::RankTooLarge: + return op.emitError("expected result rank to be smaller or equal to ") + << "the source rank. " << errMsg; + case SliceVerificationResult::SizeMismatch: + return op.emitError("expected result type to be ") + << expectedType + << " or a rank-reduced version. (mismatch of result sizes) " + << errMsg; + case SliceVerificationResult::ElemTypeMismatch: + return op.emitError("expected result element type to be ") + << memrefType.getElementType() << errMsg; + } + llvm_unreachable("unexpected extract_slice op verification result"); +} + +/// Verifier for ExtractSliceOp. +static LogicalResult verify(ExtractSliceOp op) { + // Verify result type against inferred type. + auto expectedType = ExtractSliceOp::inferResultType( + op.getSourceType(), extractFromI64ArrayAttr(op.static_offsets()), + extractFromI64ArrayAttr(op.static_sizes()), + extractFromI64ArrayAttr(op.static_strides())); + auto result = isRankReducedType(expectedType, op.getType()); + return produceSliceErrorMsg(result, op, expectedType); +} + +/// Infer the canonical type of the result of an extract_slice op. Returns a +/// type with rank `resultRank` that is either the rank of the rank-reduced +/// type, or the non-rank-reduced type. +static RankedTensorType +getCanonicalSliceResultType(unsigned resultRank, RankedTensorType sourceType, + ArrayRef mixedOffsets, + ArrayRef mixedSizes, + ArrayRef mixedStrides) { + auto resultType = + ExtractSliceOp::inferRankReducedResultType( + resultRank, sourceType, mixedOffsets, mixedSizes, mixedStrides) + .cast(); + if (resultType.getRank() != resultRank) { + resultType = ExtractSliceOp::inferResultType(sourceType, mixedOffsets, + mixedSizes, mixedStrides) + .cast(); + } + return resultType; +} + +namespace { +/// Pattern to rewrite an extract_slice op with tensor::Cast arguments. +/// This essentially pushes memref_cast past its consuming slice when +/// `canFoldIntoConsumerOp` is true. +/// +/// Example: +/// ``` +/// %0 = tensor.cast %V : tensor<16x16xf32> to tensor +/// %1 = tensor.extract_slice %0[0, 0][3, 4][1, 1] : tensor to +/// tensor<3x4xf32> +/// ``` +/// is rewritten into: +/// ``` +/// %0 = tensor.extract_slice %V[0, 0][3, 4][1, 1] : tensor<16x16xf32> to +/// tensor<3x4xf32> %1 = tensor.cast %0: tensor<3x4xf32> to tensor<3x4xf32> +/// ``` +class ExtractSliceOpCastFolder final : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(ExtractSliceOp sliceOp, + PatternRewriter &rewriter) const override { + // Any constant operand, just return to let SubViewOpConstantFolder kick in. + if (llvm::any_of(sliceOp.getOperands(), [](Value operand) { + return matchPattern(operand, matchConstantIndex()); + })) + return failure(); + + auto castOp = sliceOp.source().getDefiningOp(); + if (!castOp) + return failure(); + + if (!canFoldIntoConsumerOp(castOp)) + return failure(); + + /// Deduce the type of the result to use for the canonicalized operation. + RankedTensorType resultType = getCanonicalSliceResultType( + sliceOp.getType().getRank(), sliceOp.getSourceType(), + sliceOp.getMixedOffsets(), sliceOp.getMixedSizes(), + sliceOp.getMixedStrides()); + Value newSlice = rewriter.create( + sliceOp.getLoc(), resultType, castOp.source(), sliceOp.offsets(), + sliceOp.sizes(), sliceOp.strides(), sliceOp.static_offsets(), + sliceOp.static_sizes(), sliceOp.static_strides()); + rewriter.replaceOpWithNewOp(sliceOp, sliceOp.getType(), + newSlice); + return success(); + } +}; +} // namespace + +/// Return the canonical type of the result of an extract_slice op. +struct SliceReturnTypeCanonicalizer { + RankedTensorType operator()(ExtractSliceOp op, + ArrayRef mixedOffsets, + ArrayRef mixedSizes, + ArrayRef mixedStrides) { + return getCanonicalSliceResultType(op.getType().getRank(), + op.getSourceType(), mixedOffsets, + mixedSizes, mixedStrides); + } +}; + +/// A canonicalizer wrapper to replace ExtractSliceOps. +struct SliceCanonicalizer { + void operator()(PatternRewriter &rewriter, ExtractSliceOp op, + ExtractSliceOp newOp) { + Value replacement = newOp.getResult(); + if (replacement.getType() != op.getType()) + replacement = rewriter.create(op.getLoc(), op.getType(), + replacement); + rewriter.replaceOp(op, replacement); + } +}; + +void ExtractSliceOp::getCanonicalizationPatterns(RewritePatternSet &results, + MLIRContext *context) { + results.add< + OpWithOffsetSizesAndStridesConstantArgumentFolder< + ExtractSliceOp, SliceReturnTypeCanonicalizer, SliceCanonicalizer>, + ExtractSliceOpCastFolder>(context); +} + +// +static LogicalResult +foldIdentityOffsetSizeAndStrideOpInterface(OffsetSizeAndStrideOpInterface op, + ShapedType shapedType) { + OpBuilder b(op.getContext()); + for (OpFoldResult ofr : op.getMixedOffsets()) + if (!isEqualConstantIntOrValue(ofr, b.getIndexAttr(0))) + return failure(); + // Rank-reducing noops only need to inspect the leading dimensions: llvm::zip + // is appropriate. + auto shape = shapedType.getShape(); + for (auto it : llvm::zip(op.getMixedSizes(), shape)) + if (!isEqualConstantIntOrValue(std::get<0>(it), + b.getIndexAttr(std::get<1>(it)))) + return failure(); + for (OpFoldResult ofr : op.getMixedStrides()) + if (!isEqualConstantIntOrValue(ofr, b.getIndexAttr(1))) + return failure(); + return success(); +} + +OpFoldResult ExtractSliceOp::fold(ArrayRef) { + if (getSourceType() == getType() && + succeeded(foldIdentityOffsetSizeAndStrideOpInterface(*this, getType()))) + return this->source(); + return OpFoldResult(); +} + +//===----------------------------------------------------------------------===// +// InsertSliceOp +//===----------------------------------------------------------------------===// + +// Build a InsertSliceOp with mixed static and dynamic entries. +void InsertSliceOp::build(OpBuilder &b, OperationState &result, Value source, + Value dest, ArrayRef offsets, + ArrayRef sizes, + ArrayRef strides, + ArrayRef attrs) { + SmallVector staticOffsets, staticSizes, staticStrides; + SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; + dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets, + ShapedType::kDynamicStrideOrOffset); + dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes, + ShapedType::kDynamicSize); + dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides, + ShapedType::kDynamicStrideOrOffset); + build(b, result, dest.getType(), source, dest, dynamicOffsets, dynamicSizes, + dynamicStrides, b.getI64ArrayAttr(staticOffsets), + b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides)); + result.addAttributes(attrs); +} + +// Build a InsertSliceOp with dynamic entries. +void InsertSliceOp::build(OpBuilder &b, OperationState &result, Value source, + Value dest, ValueRange offsets, ValueRange sizes, + ValueRange strides, ArrayRef attrs) { + SmallVector offsetValues = llvm::to_vector<4>( + llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; })); + SmallVector sizeValues = llvm::to_vector<4>( + llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; })); + SmallVector strideValues = llvm::to_vector<4>( + llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; })); + build(b, result, source, dest, offsetValues, sizeValues, strideValues); +} + +OpFoldResult InsertSliceOp::fold(ArrayRef) { + if (getSourceType().hasStaticShape() && getType().hasStaticShape() && + getSourceType() == getType() && + succeeded(foldIdentityOffsetSizeAndStrideOpInterface(*this, getType()))) + return this->source(); + return OpFoldResult(); +} + +namespace { +/// Pattern to rewrite a insert_slice op with constant arguments. +class InsertSliceOpConstantArgumentFolder final + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(InsertSliceOp insertSliceOp, + PatternRewriter &rewriter) const override { + // No constant operand, just return. + if (llvm::none_of(insertSliceOp.getOperands(), [](Value operand) { + return matchPattern(operand, matchConstantIndex()); + })) + return failure(); + + // At least one of offsets/sizes/strides is a new constant. + // Form the new list of operands and constant attributes from the + // existing. + SmallVector mixedOffsets(insertSliceOp.getMixedOffsets()); + SmallVector mixedSizes(insertSliceOp.getMixedSizes()); + SmallVector mixedStrides(insertSliceOp.getMixedStrides()); + canonicalizeSubViewPart(mixedOffsets, ShapedType::isDynamicStrideOrOffset); + canonicalizeSubViewPart(mixedSizes, ShapedType::isDynamic); + canonicalizeSubViewPart(mixedStrides, ShapedType::isDynamicStrideOrOffset); + + // Create the new op in canonical form. + rewriter.replaceOpWithNewOp( + insertSliceOp, insertSliceOp.source(), insertSliceOp.dest(), + mixedOffsets, mixedSizes, mixedStrides); + return success(); + } +}; + +/// Fold tensor_casts with insert_slice operations. +struct InsertSliceOpCastFolder final : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(InsertSliceOp insertSliceOp, + PatternRewriter &rewriter) const override { + if (llvm::any_of(insertSliceOp.getOperands(), [](Value operand) { + return matchPattern(operand, matchConstantIndex()); + })) + return failure(); + + auto getSourceOfCastOp = [](Value v) -> Optional { + auto castOp = v.getDefiningOp(); + if (!castOp || !canFoldIntoConsumerOp(castOp)) + return llvm::None; + return castOp.source(); + }; + Optional sourceCastSource = + getSourceOfCastOp(insertSliceOp.source()); + Optional destCastSource = getSourceOfCastOp(insertSliceOp.dest()); + if (!sourceCastSource && !destCastSource) + return failure(); + + Value replacement = rewriter.create( + insertSliceOp.getLoc(), + (sourceCastSource ? *sourceCastSource : insertSliceOp.source()), + (destCastSource ? *destCastSource : insertSliceOp.dest()), + insertSliceOp.getMixedOffsets(), insertSliceOp.getMixedSizes(), + insertSliceOp.getMixedStrides()); + + if (replacement.getType() != insertSliceOp.getType()) { + replacement = rewriter.create( + insertSliceOp.getLoc(), insertSliceOp.getType(), replacement); + } + rewriter.replaceOp(insertSliceOp, replacement); + return success(); + } +}; +} // namespace + +void InsertSliceOp::getCanonicalizationPatterns(RewritePatternSet &results, + MLIRContext *context) { + results.add( + context); +} + //===----------------------------------------------------------------------===// // TableGen'd op method definitions //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir --- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir +++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir @@ -616,9 +616,9 @@ // CHECK-NEXT: %[[ISNEG:.*]] = cmpi slt, %[[INDEX]], %[[C0]] : index // CHECK-NEXT: %[[SELECT:.*]] = select %[[ISNEG]], %[[POSINDEX]], %[[INDEX]] : index // CHECK-NEXT: %[[C1:.*]] = constant 1 : index - // CHECK-NEXT: %[[HEAD:.*]] = subtensor %[[SHAPE]][%[[C0]]] [%[[SELECT]]] [%[[C1]]] : tensor to tensor + // CHECK-NEXT: %[[HEAD:.*]] = tensor.extract_slice %[[SHAPE]][%[[C0]]] [%[[SELECT]]] [%[[C1]]] : tensor to tensor // CHECK-NEXT: %[[TAIL_SIZE:.*]] = subi %[[RANK]], %[[SELECT]] : index - // CHECK-NEXT: %[[TAIL:.*]] = subtensor %[[SHAPE]][%[[SELECT]]] [%[[TAIL_SIZE]]] [%[[C1]]] : tensor to tensor + // CHECK-NEXT: %[[TAIL:.*]] = tensor.extract_slice %[[SHAPE]][%[[SELECT]]] [%[[TAIL_SIZE]]] [%[[C1]]] : tensor to tensor // CHECK-NEXT: return %[[HEAD]], %[[TAIL]] : tensor, tensor %head, %tail = "shape.split_at"(%shape, %index) : (tensor, index) -> (tensor, tensor) return %head, %tail : tensor, tensor diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -679,10 +679,10 @@ // CHECK: [[CST:%.+]] = constant 0.0 // CHECK: [[FILL:%.+]] = linalg.fill([[INIT]], [[CST]]) // CHECK: [[ARG0_DIM0:%.+]] = memref.dim %arg0, [[AXIS]] - // CHECK: [[INSERT0:%.+]] = subtensor_insert %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]] + // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]] // CHECK: [[NEW_OFFSET:%.+]] = addi [[OFFSET]], [[ARG0_DIM0]] // CHECK: [[ARG1_DIM0:%.+]] = memref.dim %arg1, [[AXIS]] - // CHECK: [[INSERT1:%.+]] = subtensor_insert %arg1 into [[INSERT0]]{{\[}}[[NEW_OFFSET]], [[OFFSET]]] {{\[}}[[ARG1_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]] + // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %arg1 into [[INSERT0]]{{\[}}[[NEW_OFFSET]], [[OFFSET]]] {{\[}}[[ARG1_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]] %0 = "tosa.concat"(%arg0, %arg1) { axis = 0 : i64} : (tensor<5x1xf32>, tensor<6x1xf32>) -> (tensor<11x1xf32>) // CHECK: [[AXIS:%.+]] = constant 1 @@ -698,10 +698,10 @@ // CHECK: [[CST:%.+]] = constant 0.0 // CHECK: [[FILL:%.+]] = linalg.fill([[INIT]], [[CST]]) // CHECK: [[ARG0_DIM1:%.+]] = memref.dim %arg0, [[AXIS]] - // CHECK: [[INSERT0:%.+]] = subtensor_insert %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]] + // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]] // CHECK: [[NEW_OFFSET:%.+]] = addi [[OFFSET]], [[ARG0_DIM1]] // CHECK: [[ARG1_DIM1:%.+]] = memref.dim %arg0, [[AXIS]] - // CHECK: [[INSERT1:%.+]] = subtensor_insert %arg0 into [[INSERT0]]{{\[}}[[OFFSET]], [[NEW_OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG1_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]] + // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %arg0 into [[INSERT0]]{{\[}}[[OFFSET]], [[NEW_OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG1_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]] %1 = "tosa.concat"(%arg0, %arg0) { axis = 1 : i64} : (tensor<5x1xf32>, tensor<5x1xf32>) -> (tensor<5x2xf32>) return } diff --git a/mlir/test/Conversion/TosaToStandard/tosa-to-standard.mlir b/mlir/test/Conversion/TosaToStandard/tosa-to-standard.mlir --- a/mlir/test/Conversion/TosaToStandard/tosa-to-standard.mlir +++ b/mlir/test/Conversion/TosaToStandard/tosa-to-standard.mlir @@ -12,7 +12,7 @@ // ----- func @slice(%arg0: tensor<6xf32>) ->() { - // CHECK: [[SLICE:%.+]] = subtensor %arg0[2] [1] [1] + // CHECK: [[SLICE:%.+]] = tensor.extract_slice %arg0[2] [1] [1] %0 = "tosa.slice"(%arg0) {start = [2], size = [1]} : (tensor<6xf32>) -> (tensor<1xf32>) return } diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir --- a/mlir/test/Dialect/Linalg/bufferize.mlir +++ b/mlir/test/Dialect/Linalg/bufferize.mlir @@ -166,9 +166,9 @@ func private @make_index() -> index -// CHECK-LABEL: func @bufferize_subtensor( +// CHECK-LABEL: func @bufferize_slice( // CHECK-SAME: %[[T:[0-9a-z]*]]: tensor -func @bufferize_subtensor(%t : tensor) -> (tensor<2x3xf32>, tensor<2x?xf32>) { +func @bufferize_slice(%t : tensor) -> (tensor<2x3xf32>, tensor<2x?xf32>) { // CHECK: %[[IDX:.*]] = call @make_index() : () -> index %i0 = call @make_index() : () -> index @@ -178,14 +178,14 @@ // CHECK-SAME: memref to memref<2x3xf32, #[[$MAP0]]> // CHECK-NEXT: linalg.copy(%[[SM0]], %[[A0]]) : memref<2x3xf32, #[[$MAP0]]>, memref<2x3xf32> // CHECK-NEXT: %[[RT0:.*]] = memref.tensor_load %[[A0]] : memref<2x3xf32> - %st0 = subtensor %t[0, 0][2, 3][1, 1] : tensor to tensor<2x3xf32> + %st0 = tensor.extract_slice %t[0, 0][2, 3][1, 1] : tensor to tensor<2x3xf32> // CHECK-NEXT: %[[A1:.*]] = memref.alloc(%[[IDX]]) : memref<2x?xf32> // CHECK-NEXT: %[[SM1:.*]] = memref.subview %[[M]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2] // CHECK-SAME: memref to memref<2x?xf32, #[[$MAP1]]> // CHECK-NEXT: linalg.copy(%[[SM1]], %[[A1]]) : memref<2x?xf32, #[[$MAP1]]>, memref<2x?xf32> // CHECK-NEXT: %[[RT1:.*]] = memref.tensor_load %[[A1]] : memref<2x?xf32> - %st1 = subtensor %t[0, %i0][2, %i0][1, 2] : tensor to tensor<2x?xf32> + %st1 = tensor.extract_slice %t[0, %i0][2, %i0][1, 2] : tensor to tensor<2x?xf32> // CHECK-NEXT: return %[[RT0]], %[[RT1]] return %st0, %st1 : tensor<2x3xf32>, tensor<2x?xf32> @@ -198,11 +198,11 @@ func private @make_index() -> index -// CHECK-LABEL: func @bufferize_subtensor_insert( +// CHECK-LABEL: func @bufferize_insert_slice( // CHECK-SAME: %[[T:[0-9a-z]*]]: tensor // CHECK-SAME: %[[ST0:[0-9a-z]*]]: tensor<2x3xf32> // CHECK-SAME: %[[ST1:[0-9a-z]*]]: tensor<2x?xf32> -func @bufferize_subtensor_insert(%t : tensor, %st0 : tensor<2x3xf32>, %st1 : tensor<2x?xf32>) -> +func @bufferize_insert_slice(%t : tensor, %st0 : tensor<2x3xf32>, %st1 : tensor<2x?xf32>) -> (tensor, tensor) { %c0 = constant 0 : index %c1 = constant 1 : index @@ -222,7 +222,7 @@ // CHECK-SAME: memref to memref<2x3xf32, #[[$MAP0]]> // CHECK-NEXT: linalg.copy(%[[SM0]], %[[SUBVIEW0]]) : memref<2x3xf32>, memref<2x3xf32, #[[$MAP0]]> // CHECK-NEXT: %[[RT0:.*]] = memref.tensor_load %[[M_COPY0]] : memref - %t0 = subtensor_insert %st0 into %t[0, 0][2, 3][1, 1] : tensor<2x3xf32> into tensor + %t0 = tensor.insert_slice %st0 into %t[0, 0][2, 3][1, 1] : tensor<2x3xf32> into tensor // CHECK-DAG: %[[SM1:.*]] = memref.buffer_cast %[[ST1]] : memref<2x?xf32> // CHECK-NEXT: %[[M_COPY1:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref @@ -231,7 +231,7 @@ // CHECK-SAME: memref to memref<2x?xf32, #[[$MAP1]]> // CHECK-NEXT: linalg.copy(%[[SM1]], %[[SUBVIEW1]]) : memref<2x?xf32>, memref<2x?xf32, #[[$MAP1]]> // CHECK-NEXT: %[[RT1:.*]] = memref.tensor_load %[[M_COPY1]] : memref - %t1 = subtensor_insert %st1 into %t[0, %i0][2, %i0][1, 2] : tensor<2x?xf32> into tensor + %t1 = tensor.insert_slice %st1 into %t[0, %i0][2, %i0][1, 2] : tensor<2x?xf32> into tensor // CHECK: return %[[RT0]], %[[RT1]] return %t0, %t1: tensor, tensor diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -648,15 +648,15 @@ // ----- -func @fold_init_tensor_with_subtensor +func @fold_init_tensor_with_slice (%arg0 : index, %arg1 : index) -> tensor<5x?x20xf32> { %0 = linalg.init_tensor[%arg0, 10, 40] : tensor - %1 = subtensor %0[0, 0, 0] [5, %arg1, 20] [1, 1, 1] + %1 = tensor.extract_slice %0[0, 0, 0] [5, %arg1, 20] [1, 1, 1] : tensor to tensor<5x?x20xf32> return %1 : tensor<5x?x20xf32> } -// CHECK: func @fold_init_tensor_with_subtensor +// CHECK: func @fold_init_tensor_with_slice // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK: %[[T0:.+]] = linalg.init_tensor [5, %[[ARG1]], 20] @@ -723,13 +723,13 @@ %1 = linalg.fill(%0, %arg1) : tensor, f32 -> tensor %2 = memref.dim %arg0, %c0 : tensor %3 = memref.dim %arg0, %c1 : tensor - %4 = subtensor_insert %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor into tensor + %4 = tensor.insert_slice %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor into tensor return %4 : tensor } // CHECK-LABEL: func @propogate_casts // CHECK: %[[INIT:.+]] = linalg.init_tensor [21, 42] // CHECK: %[[FILL:.+]] = linalg.fill(%[[INIT]], %{{.+}}) -// CHECK: %[[INSERTED:.+]] = subtensor_insert %{{.+}} into %[[FILL]] +// CHECK: %[[INSERTED:.+]] = tensor.insert_slice %{{.+}} into %[[FILL]] // CHECK: %[[RESULT:.+]] = tensor.cast %[[INSERTED]] // CHECK: return %[[RESULT]] diff --git a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir --- a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir @@ -6,43 +6,43 @@ // ----- -// CHECK-LABEL: func @subtensor_fun -func @subtensor_fun(%A : tensor, %B : tensor {linalg.inplaceable = true}) +// CHECK-LABEL: func @extract_slice_fun +func @extract_slice_fun(%A : tensor, %B : tensor {linalg.inplaceable = true}) -> (tensor<4xf32>, tensor<8xf32>) { - // subtensor is not used in a write, it is not compelled to bufferize out of - // place. Let callers decide whether they want to create aliasing subviews at - // all call sites or whether they allocate. + // tensor.extract_slice is not used in a write, it is not compelled to + // bufferize out of place. Let callers decide whether they want to create + // aliasing subviews at all call sites or whether they allocate. // This is true irrespective of whether the function argument is inplaceable. - // CHECK: subtensor + // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %r0 = subtensor %A[0][4][1] : tensor to tensor<4xf32> + %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> - // CHECK: subtensor + // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %r1 = subtensor %B[0][8][1] : tensor to tensor<8xf32> + %r1 = tensor.extract_slice %B[0][8][1] : tensor to tensor<8xf32> return %r0, %r1: tensor<4xf32>, tensor<8xf32> } // ----- -// CHECK-LABEL: func @subtensor_insert_fun -func @subtensor_insert_fun( +// CHECK-LABEL: func @insert_slice_fun +func @insert_slice_fun( %A : tensor, %B : tensor {linalg.inplaceable = true}, %C : tensor<4xf32>) -> (tensor, tensor) { // must bufferize out of place. - // CHECK: subtensor_insert + // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - %r0 = subtensor_insert %C into %A[0][4][1] : tensor<4xf32> into tensor + %r0 = tensor.insert_slice %C into %A[0][4][1] : tensor<4xf32> into tensor // bufferizes inplace. - // CHECK: subtensor_insert + // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %r1 = subtensor_insert %C into %B[0][4][1] : tensor<4xf32> into tensor + %r1 = tensor.insert_slice %C into %B[0][4][1] : tensor<4xf32> into tensor return %r0, %r1: tensor, tensor } @@ -85,34 +85,34 @@ // ----- -// CHECK-LABEL: func @subtensor_subtensor -func @subtensor_subtensor( +// CHECK-LABEL: func @extract_slice_extract_slice +func @extract_slice_extract_slice( %A : tensor {linalg.inplaceable = true}, %B : tensor) -> (tensor<2xf32>, tensor<2xf32>) { - // subtensor is not used in a write, it is not compelled to bufferize out of - // place. Let callers decide whether they want to create aliasing subviews at - // all call sites or whether they allocate. + // tensor.extract_slice is not used in a write, it is not compelled to + // bufferize out of place. Let callers decide whether they want to create + // aliasing subviews at all call sites or whether they allocate. // This is true irrespective of whether the function argument is inplaceable. // CHECK: {__inplace_results_attr__ = ["true"]} - %r0 = subtensor %A[0][4][1] : tensor to tensor<4xf32> + %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> // CHECK: {__inplace_results_attr__ = ["true"]} - %r1 = subtensor %r0[0][2][1] : tensor<4xf32> to tensor<2xf32> + %r1 = tensor.extract_slice %r0[0][2][1] : tensor<4xf32> to tensor<2xf32> // CHECK: {__inplace_results_attr__ = ["true"]} - %r2 = subtensor %B[0][4][1] : tensor to tensor<4xf32> + %r2 = tensor.extract_slice %B[0][4][1] : tensor to tensor<4xf32> // CHECK: {__inplace_results_attr__ = ["true"]} - %r3 = subtensor %r2[0][2][1] : tensor<4xf32> to tensor<2xf32> + %r3 = tensor.extract_slice %r2[0][2][1] : tensor<4xf32> to tensor<2xf32> return %r1, %r3: tensor<2xf32>, tensor<2xf32> } // ----- -// CHECK-LABEL: func @subtensor_insert_subtensor_insert -func @subtensor_insert_subtensor_insert( +// CHECK-LABEL: func @insert_slice_insert_slice +func @insert_slice_insert_slice( %A : tensor {linalg.inplaceable = true}, %A2 : tensor<4xf32> {linalg.inplaceable = true}, %A3 : tensor<2xf32> {linalg.inplaceable = true}, @@ -120,102 +120,106 @@ -> (tensor, tensor) { // CHECK: {__inplace_results_attr__ = ["true"]} - %r0 = subtensor_insert %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32> + %r0 = tensor.insert_slice %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32> // CHECK: {__inplace_results_attr__ = ["true"]} - %r1 = subtensor_insert %r0 into %A[0][4][1] : tensor<4xf32> into tensor + %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor // CHECK: {__inplace_results_attr__ = ["false"]} - %r2 = subtensor_insert %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32> + %r2 = tensor.insert_slice %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32> // CHECK: {__inplace_results_attr__ = ["false"]} - %r3 = subtensor_insert %r2 into %B[0][4][1] : tensor<4xf32> into tensor + %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor return %r1, %r3: tensor, tensor } // ----- -// CHECK-LABEL: func @subtensor_nonmatching_subtensor_insert -func @subtensor_nonmatching_subtensor_insert( +// CHECK-LABEL: func @extract_slice_nonmatching_insert_slice +func @extract_slice_nonmatching_insert_slice( %A : tensor {linalg.inplaceable = true}, %B : tensor, %idx: index) -> (tensor, tensor) { // %r1 bufferizes inplace because %A is inplaceable. - // %r0 is an overlapping subtensor that does not match, it must be out of place. - // CHECK: subtensor + // %r0 is an overlapping tensor.extract_slice that does not match, it must be + // out of place. + // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - %r0 = subtensor %A[0][4][1] : tensor to tensor<4xf32> + %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> // %r1 can bufferize inplace fine. - // CHECK: subtensor_insert + // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %r1 = subtensor_insert %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor + %r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor // %r3 does bufferizes inplace because %B is not inplaceable. - // %r0 is an overlapping subtensor that does not match, but does not alias with - // the buffer coming from %r3 so it can actually bufferize inplace. - // CHECK: subtensor + // %r0 is an overlapping tensor.extract_slice that does not match, but does + // not alias with the buffer coming from %r3 so it can actually bufferize + // inplace. + // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %r2 = subtensor %B[0][4][1] : tensor to tensor<4xf32> + %r2 = tensor.extract_slice %B[0][4][1] : tensor to tensor<4xf32> // %r3 cannot bufferize inplace since %B is not inplaceable. - // CHECK: subtensor_insert + // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - %r3 = subtensor_insert %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor + %r3 = tensor.insert_slice %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor return %r1, %r3: tensor, tensor } // ----- -// CHECK-LABEL: func @subtensor_matching_subtensor_insert -func @subtensor_matching_subtensor_insert( +// CHECK-LABEL: func @extract_slice_matching_insert_slice +func @extract_slice_matching_insert_slice( %A : tensor {linalg.inplaceable = true}, %B : tensor) -> (tensor, tensor) { // %r1 bufferizes inplace because %A is inplaceable. - // %r0 is a subtensor that matches, it can also be bufferized inplace. - // CHECK: subtensor + // %r0 is a tensor.extract_slice that matches, it can also be bufferized + // inplace. + // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %r0 = subtensor %A[0][4][1] : tensor to tensor<4xf32> + %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> - // CHECK: subtensor_insert + // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %r1 = subtensor_insert %r0 into %A[0][4][1] : tensor<4xf32> into tensor + %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor - // %r2 is a subtensor that matches %r3, it can be bufferized inplace. - // CHECK: subtensor + // %r2 is a tensor.extract_slice that matches %r3, it can be bufferized + // inplace. + // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %r2 = subtensor %B[0][4][1] : tensor to tensor<4xf32> + %r2 = tensor.extract_slice %B[0][4][1] : tensor to tensor<4xf32> - // subtensor_insert cannot bufferize inplace. + // tensor.insert_slice cannot bufferize inplace. // This should have been captured by a canonicalization pattern and it would // be unproductive to have special logic in bufferization to encode matching - // subtensor_insert(subtensor(A), A). - // CHECK: subtensor_insert + // insert_slice(extract_slice(A), A). + // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - %r3 = subtensor_insert %r2 into %B[0][4][1] : tensor<4xf32> into tensor + %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor return %r1, %r3: tensor, tensor } // ----- -// CHECK-LABEL: func @subtensor_linalg_readonly_use -func @subtensor_linalg_readonly_use( +// CHECK-LABEL: func @extract_slice_linalg_readonly_use +func @extract_slice_linalg_readonly_use( %A : tensor, %B : tensor<4x4xf32>, %C : tensor<4x4xf32> {linalg.inplaceable = true}) -> (tensor<4x4xf32>, tensor<4x4xf32>) { - // subtensor is only used as a read, no interference irrespective of user's - // inplace status. - // CHECK: subtensor + // tensor.extract_slice is only used as a read, no interference irrespective + // of user's inplace status. + // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %sA = subtensor %A[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> + %sA = tensor.extract_slice %A[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> // matmul output operand is not inplaceable at the function boundary. // CHECK: linalg.matmul @@ -236,8 +240,8 @@ // ----- -// CHECK-LABEL: func @subtensor_to_linalg_write_use -func @subtensor_to_linalg_write_use( +// CHECK-LABEL: func @extract_slice_to_linalg_write_use +func @extract_slice_to_linalg_write_use( %A : tensor<4x4xf32>, %B : tensor, %C : tensor {linalg.inplaceable = true}) @@ -245,9 +249,9 @@ { // Step 3. %sB forward propagates to a write in %D but it is not inplace. // So this is only ever read and can bufferize inplace. - // CHECK: subtensor + // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %sB = subtensor %B[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> + %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> // Step 2. %sB has a read interference in %E, it does not bufferize inplace. // CHECK: linalg.matmul @@ -259,12 +263,12 @@ // Step 4. %sC forward propagates to an inplace write in %E. // %sC backward propagates to %C which is inplaceable. // As a consequence this is bufferized inplace. - // CHECK: subtensor + // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %sC = subtensor %C[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> + %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - // Step 1. %sC backprops to the subtensor producer which is not considered an - // interference. This bufferizes inplace. + // Step 1. %sC backprops to the tensor.extract_slice producer which is not + // considered an interference. This bufferizes inplace. // CHECK: linalg.matmul // CHECK-SAME: {__inplace_results_attr__ = ["true"]} %E = linalg.matmul ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>) @@ -280,8 +284,8 @@ // ----- -// CHECK-LABEL: func @subtensor_to_linalg_write_use -func @subtensor_to_linalg_write_use( +// CHECK-LABEL: func @extract_slice_to_linalg_write_use +func @extract_slice_to_linalg_write_use( %A : tensor<4x4xf32>, %B : tensor, %C : tensor {linalg.inplaceable = true}) @@ -290,12 +294,12 @@ // Step 4. %sB forward propagates to an inplace write in %D. // %sB backward propagates to %B which is not inplaceable. // As a consequence this is bufferized out of place. - // CHECK: subtensor + // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - %sB = subtensor %B[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> + %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - // Step 1. %sB backprops to the subtensor producer which is not considered an - // interference. This bufferizes inplace. + // Step 1. %sB backprops to the tensor.extract_slice producer which is not + // considered an interference. This bufferizes inplace. // CHECK: linalg.matmul // CHECK-SAME: {__inplace_results_attr__ = ["true"]} %D = linalg.matmul ins(%B, %C: tensor, tensor) @@ -305,12 +309,12 @@ // Step 3. %sC forward propagates to an inplace write in %E. // %sC backward propagates to %C which is inplaceable. // As a consequence this is bufferized inplace. - // CHECK: subtensor + // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %sC = subtensor %C[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> + %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - // Step 1. %sC backprops to the subtensor producer which is not considered an - // interference. This bufferizes inplace. + // Step 1. %sC backprops to the tensor.extract_slice producer which is not + // considered an interference. This bufferizes inplace. // CHECK: linalg.matmul // CHECK-SAME: {__inplace_results_attr__ = ["true"]} %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) @@ -322,8 +326,8 @@ // ----- -// CHECK-LABEL: func @nested_subtensor_and_insert -func @nested_subtensor_and_insert( +// CHECK-LABEL: func @nested_extract_slice_and_insert +func @nested_extract_slice_and_insert( %A : tensor, %B : tensor {linalg.inplaceable = true}, %C : tensor {linalg.inplaceable = true}, @@ -332,75 +336,78 @@ { %f0 = constant 0.0 : f32 - // 2-level matching subtensor / subtensor_insert into non inplaceable %A. + // 2-level matching tensor.extract_slice / tensor.insert_slice into non + // inplaceable %A. // - %rA is not inplaceable because %A is not inplaceable at function boundary. // - once %rA is deemed not inplaceable, nothing prevent %rsA to be inplaceable // - this propagates to %FA and %ssA being inplaceable. // - %sA would then bufferize to an inplace write (i.e. %FA) but %A is not // inplaceable and so %sA is not inplaceable. - // CHECK: subtensor + // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - // CHECK-NEXT: subtensor + // CHECK-NEXT: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} // CHECK-NEXT: fill // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: subtensor_insert + // CHECK-NEXT: tensor.insert_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: subtensor_insert + // CHECK-NEXT: tensor.insert_slice // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - %sA = subtensor %A[0, 0][%idx, %idx][1, 1] : tensor to tensor - %ssA = subtensor %sA[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> + %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor to tensor + %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> %FA = linalg.fill(%ssA, %f0) : tensor<4x4xf32>, f32 -> tensor<4x4xf32> - %rsA = subtensor_insert %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor - %rA = subtensor_insert %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor into tensor + %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor + %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor into tensor - // 3-level matching subtensor / subtensor_insert into inplaceable %B. - // CHECK-NEXT: subtensor + // 3-level matching tensor.extract_slice / tensor.insert_slice into + // inplaceable %B. + // CHECK-NEXT: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: subtensor - // Atm, this 2nd subtensor fails to bufferize inplace because clobbering - // analysis conservatively test for equivalent buffers. + // CHECK-NEXT: tensor.extract_slice + // Atm, this 2nd tensor.extract_slice fails to bufferize inplace because + // clobbering analysis conservatively test for equivalent buffers. // TODO: This is currently too restrictive and misses clobberings. // When available, use container-containee analysis. // CHECK-SAME: {__inplace_results_attr__ = ["false"]} - // CHECK-NEXT: subtensor + // CHECK-NEXT: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} // CHECK-NEXT: fill // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: subtensor_insert + // CHECK-NEXT: tensor.insert_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: subtensor_insert + // CHECK-NEXT: tensor.insert_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: subtensor_insert + // CHECK-NEXT: tensor.insert_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %sB = subtensor %B[0, 0][%idx, %idx][1, 1] : tensor to tensor - %ssB = subtensor %sB[0, 0][4, %idx][1, 1] : tensor to tensor<4x?xf32> - %sssB = subtensor %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32> + %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor to tensor + %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor to tensor<4x?xf32> + %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32> %FB = linalg.fill(%sssB, %f0) : tensor<4x4xf32>, f32 -> tensor<4x4xf32> - %rssB = subtensor_insert %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32> - %rsB = subtensor_insert %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor - %rB = subtensor_insert %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor into tensor + %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32> + %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor + %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor into tensor - // 2-level matching subtensor / subtensor_insert into inplaceable %C with a twist. + // 2-level matching tensor.extract_slice / tensor.insert_slice into + // inplaceable %C with a twist. // Throw a wrench in the system: %rsC production sizes do not match %ssC. - // CHECK-NEXT: subtensor + // CHECK-NEXT: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // The subtensor_insert that would be candidate for matching does not actually - // match. That subtensor_insert can still be bufferized inplace nonetheless - // but this subtensor, which bufferizes to an inplace write, cannot. - // CHECK-NEXT: subtensor + // The tensor.insert_slice that would be candidate for matching does not actually + // match. That tensor.insert_slice can still be bufferized inplace nonetheless + // but this tensor.extract_slice, which bufferizes to an inplace write, cannot. + // CHECK-NEXT: tensor.extract_slice // CHECK-SAME: {__inplace_results_attr__ = ["false"]} // CHECK-NEXT: fill // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: subtensor_insert + // CHECK-NEXT: tensor.insert_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: subtensor_insert + // CHECK-NEXT: tensor.insert_slice // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - %sC = subtensor %C[0, 0][%idx, %idx][1, 1] : tensor to tensor - %ssC = subtensor %sC[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> + %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor to tensor + %ssC = tensor.extract_slice %sC[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> %FC = linalg.fill(%ssC, %f0) : tensor<4x4xf32>, f32 -> tensor<4x4xf32> - %rsC = subtensor_insert %FC into %sC[0, 0][12345, 67890][1, 1] : tensor<4x4xf32> into tensor - %rC = subtensor_insert %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor into tensor + %rsC = tensor.insert_slice %FC into %sC[0, 0][12345, 67890][1, 1] : tensor<4x4xf32> into tensor + %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor into tensor return %rA, %rB, %rC: tensor, tensor, tensor } diff --git a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir --- a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir @@ -118,8 +118,8 @@ // ----- -// CHECK-LABEL: func @subtensor_insert_fun -func @subtensor_insert_fun(%A0 : tensor, %A1 : tensor {linalg.inplaceable = true}, +// CHECK-LABEL: func @insert_slice_fun +func @insert_slice_fun(%A0 : tensor, %A1 : tensor {linalg.inplaceable = true}, %t0 : tensor<4xf32>, %t1 : tensor<4xf32> {linalg.inplaceable = true}) -> (tensor, tensor, tensor, tensor) { @@ -128,40 +128,40 @@ // CHECK: %[[BUFFER_CAST_t0:.*]] = memref.buffer_cast {{.*}} : memref<4xf32 // CHECK: %[[BUFFER_CAST_t1:.*]] = memref.buffer_cast {{.*}} : memref<4xf32 - // Alloc and copy the whole result tensor. Copy the subtensor. + // Alloc and copy the whole result tensor. Copy the tensor.extract_slice. // CHECK: %[[REALLOC_A0:.*]] = memref.alloc // CHECK: linalg.copy(%[[BUFFER_CAST_A0]] // CHECK: %[[SV_A0:.*]] = memref.subview %[[REALLOC_A0]] // CHECK: linalg.copy(%[[BUFFER_CAST_t0]], %[[SV_A0]]) - %r0 = subtensor_insert %t0 into %A0[0][4][1] : tensor<4xf32> into tensor + %r0 = tensor.insert_slice %t0 into %A0[0][4][1] : tensor<4xf32> into tensor - // Alloc and copy the whole result tensor. Copy the subtensor. + // Alloc and copy the whole result tensor. Copy the tensor.extract_slice. // CHECK: %[[REALLOC_A0_2:.*]] = memref.alloc // CHECK: linalg.copy(%[[BUFFER_CAST_A0]] // CHECK: %[[SV_A0_2:.*]] = memref.subview %[[REALLOC_A0_2]] // CHECK: linalg.copy(%[[BUFFER_CAST_t1]], %[[SV_A0_2]]) - %r1 = subtensor_insert %t1 into %A0[0][4][1] : tensor<4xf32> into tensor + %r1 = tensor.insert_slice %t1 into %A0[0][4][1] : tensor<4xf32> into tensor - // Still alloc the large tensor because %A1 is read after. Copy the subtensor. + // Still alloc the large tensor because %A1 is read after. Copy the tensor.extract_slice. // CHECK: %[[REALLOC_A1:.*]] = memref.alloc // CHECK: linalg.copy(%[[BUFFER_CAST_A1]] // CHECK: %[[SV_A1:.*]] = memref.subview %[[REALLOC_A1]] // CHECK: linalg.copy(%[[BUFFER_CAST_t0]], %[[SV_A1]]) - %r2 = subtensor_insert %t0 into %A1[0][4][1] : tensor<4xf32> into tensor + %r2 = tensor.insert_slice %t0 into %A1[0][4][1] : tensor<4xf32> into tensor - // Do not realloc the large tensor. Copy the subtensor. + // Do not realloc the large tensor. Copy the tensor.extract_slice. // CHECK-NOT: alloc // CHECK: %[[SV_A1_2:.*]] = memref.subview %[[BUFFER_CAST_A1]] // CHECK: linalg.copy(%[[BUFFER_CAST_t1]], %[[SV_A1_2]]) - %r3 = subtensor_insert %t1 into %A1[0][4][1] : tensor<4xf32> into tensor + %r3 = tensor.insert_slice %t1 into %A1[0][4][1] : tensor<4xf32> into tensor return %r0, %r1, %r2, %r3: tensor, tensor, tensor, tensor } // ----- -// CHECK-LABEL: func @subtensor_insert_fun -func @subtensor_insert_fun(%A : tensor {linalg.inplaceable = true}, %t : tensor<4xf32>) +// CHECK-LABEL: func @insert_slice_fun +func @insert_slice_fun(%A : tensor {linalg.inplaceable = true}, %t : tensor<4xf32>) -> tensor { %f0 = constant 0.0 : f32 @@ -172,7 +172,7 @@ // CHECK-NOT: alloc // CHECK: %[[SV:.*]] = memref.subview %[[BUFFER_CAST_A]] // CHECK: linalg.copy(%[[BUFFER_CAST_B]], %[[SV]]) - %r0 = subtensor_insert %t into %A[0][4][1] : tensor<4xf32> into tensor + %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor /// Overwrite BUFFER_CAST_A inplace. // CHECK: linalg.fill(%[[BUFFER_CAST_A]] @@ -182,8 +182,8 @@ // ----- -// CHECK-LABEL: func @subtensor_insert_fun -func @subtensor_insert_fun(%A : tensor {linalg.inplaceable = true}, %t : tensor<4xf32>) +// CHECK-LABEL: func @insert_slice_fun +func @insert_slice_fun(%A : tensor {linalg.inplaceable = true}, %t : tensor<4xf32>) -> tensor { %f0 = constant 0.0 : f32 @@ -198,15 +198,15 @@ // CHECK: %[[SV:.*]] = memref.subview %[[BUFFER_CAST_A]] /// Overwrite BUFFER_CAST_A inplace by copying into the subview. // CHECK: linalg.copy(%[[BUFFER_CAST_B]], %[[SV]]) - %r1 = subtensor_insert %t into %r0[0][4][1] : tensor<4xf32> into tensor + %r1 = tensor.insert_slice %t into %r0[0][4][1] : tensor<4xf32> into tensor return %r1: tensor } // ----- -// CHECK-LABEL: func @subtensor_insert_fun_not_inplace -func @subtensor_insert_fun_not_inplace(%A : tensor, %t : tensor<4xf32>) +// CHECK-LABEL: func @insert_slice_fun_not_inplace +func @insert_slice_fun_not_inplace(%A : tensor, %t : tensor<4xf32>) -> tensor { // CHECK: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast {{.*}} : memref to memref<4xf32> // CHECK: linalg.copy(%[[BUFFER_CAST_B]], %[[SV]]) : memref<4xf32, #map>, memref<4xf32> // CHECK: memref.dealloc %[[ALLOC]] : memref - %r0 = subtensor_insert %t into %A[0][4][1] : tensor<4xf32> into tensor + %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor return %r0: tensor } // ----- -// CHECK-LABEL: func @subtensor_insert_fun_not_inplace -func @subtensor_insert_fun_not_inplace(%A : tensor {linalg.inplaceable = true}, %t : tensor<4xf32>) +// CHECK-LABEL: func @insert_slice_fun_not_inplace +func @insert_slice_fun_not_inplace(%A : tensor {linalg.inplaceable = true}, %t : tensor<4xf32>) -> (tensor, tensor) { %f0 = constant 0.0 : f32 @@ -232,10 +232,10 @@ // CHECK-DAG: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast {{.*}} : memref to memref<4xf32, {{.*}}> // CHECK-DAG: linalg.copy(%[[BUFFER_CAST_B]], %[[SV]]) : memref<4xf32, {{.*}}>, memref<4xf32, {{.*}}> - %r0 = subtensor_insert %t into %A[0][4][1] : tensor<4xf32> into tensor + %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor // fill would interfere with %r0 that is also being returned. // So we need to bufferize it out of place and make a new alloc. @@ -253,8 +253,8 @@ // ----- -// CHECK-LABEL: func @subtensor_fun -func @subtensor_fun(%A : tensor {linalg.inplaceable = true}) +// CHECK-LABEL: func @extract_slice_fun +func @extract_slice_fun(%A : tensor {linalg.inplaceable = true}) -> tensor<4xf32> { // This bufferizes to a pattern that the cross-function boundary pass needs to @@ -268,9 +268,8 @@ // CHECK: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast {{.*}} : memref to tensor<4xf32> + %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> // CHECK: return %[[RES]] return %r0: tensor<4xf32> } - diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir --- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir +++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir @@ -299,28 +299,28 @@ // ----- -func @fold_subtensor( +func @fold_slice( %arg0 : tensor<1x?x?x1x?x1x1xf32>, %arg1 : tensor<1x?x?x?x?x1x1xf32>, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index, %arg7 : index) -> (tensor<1x?x?x1x?x1x1xf32>, tensor<1x?x?x1x?x1x1xf32>) { - %0 = subtensor %arg0[0, %arg2, %arg3, 0, %arg4, 0, 0] - [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] : + %0 = tensor.extract_slice %arg0[0, %arg2, %arg3, 0, %arg4, 0, 0] + [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] : tensor<1x?x?x1x?x1x1xf32> to tensor<1x?x?x1x?x1x1xf32> - %1 = subtensor %arg1[%arg2, 0, %arg3, 0, 0, %arg4, 0] - [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] : + %1 = tensor.extract_slice %arg1[%arg2, 0, %arg3, 0, 0, %arg4, 0] + [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] : tensor<1x?x?x?x?x1x1xf32> to tensor<1x?x?x1x?x1x1xf32> return %0, %1 : tensor<1x?x?x1x?x1x1xf32>, tensor<1x?x?x1x?x1x1xf32> } -// CHECK: func @fold_subtensor +// CHECK: func @fold_slice // CHECK-SAME: %[[ARG0:.+]]: tensor<1x?x?x1x?x1x1xf32> // CHECK-SAME: %[[ARG1:.+]]: tensor<1x?x?x?x?x1x1xf32> -// CHECK: %[[SUBTENSOR1:.+]] = subtensor %[[ARG0]] +// CHECK: %[[SLICE1:.+]] = tensor.extract_slice %[[ARG0]] // CHECK-SAME: to tensor -// CHECK: %[[RESULT1:.+]] = linalg.tensor_expand_shape %[[SUBTENSOR1]] +// CHECK: %[[RESULT1:.+]] = linalg.tensor_expand_shape %[[SLICE1]] // CHECK-SAME: [0, 1], [2], [3, 4, 5, 6] -// CHECK: %[[SUBTENSOR2:.+]] = subtensor %[[ARG1]] +// CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[ARG1]] // CHECK-SAME: to tensor -// CHECK: %[[RESULT2:.+]] = linalg.tensor_expand_shape %[[SUBTENSOR2]] +// CHECK: %[[RESULT2:.+]] = linalg.tensor_expand_shape %[[SLICE2]] // CHECK-SAME: [0, 1], [2], [3, 4, 5, 6] // CHECK: return %[[RESULT1]], %[[RESULT2]] @@ -430,25 +430,25 @@ // ----- -func @subtensor_unit_dims(%arg0: tensor<1x3xf32>) -> tensor<1x1xf32> { - %0 = subtensor %arg0[0, 2] [1, 1] [1, 1] : tensor<1x3xf32> to tensor<1x1xf32> +func @slice_unit_dims(%arg0: tensor<1x3xf32>) -> tensor<1x1xf32> { + %0 = tensor.extract_slice %arg0[0, 2] [1, 1] [1, 1] : tensor<1x3xf32> to tensor<1x1xf32> return %0 : tensor<1x1xf32> } -// CHECK-LABEL: func @subtensor_unit_dims -// CHECK: %[[SUBTENSOR:.+]] = subtensor +// CHECK-LABEL: func @slice_unit_dims +// CHECK: %[[SLICE:.+]] = tensor.extract_slice // CHECK-SAME: tensor<1x3xf32> to tensor -// CHECK: %[[RESULT:.+]] = linalg.tensor_expand_shape %[[SUBTENSOR]] [] +// CHECK: %[[RESULT:.+]] = linalg.tensor_expand_shape %[[SLICE]] [] // CHECK: return %[[RESULT]] // ----- -func @subtensor_insert_unit_dims(%arg0: tensor<1x3xf32>, %arg1: tensor<1x1xf32>) -> tensor<1x3xf32> { - %0 = subtensor_insert %arg1 into %arg0[0, 2] [1, 1] [1, 1] : tensor<1x1xf32> into tensor<1x3xf32> +func @insert_slice_unit_dims(%arg0: tensor<1x3xf32>, %arg1: tensor<1x1xf32>) -> tensor<1x3xf32> { + %0 = tensor.insert_slice %arg1 into %arg0[0, 2] [1, 1] [1, 1] : tensor<1x1xf32> into tensor<1x3xf32> return %0 : tensor<1x3xf32> } -// CHECK-LABEL: func @subtensor_insert_unit_dims +// CHECK-LABEL: func @insert_slice_unit_dims // CHECK: %[[RESHAPE:.+]] = linalg.tensor_collapse_shape %{{.+}} [] -// CHECK: %[[RESULT:.+]] = subtensor_insert %[[RESHAPE]] +// CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[RESHAPE]] // CHECK-SAME: tensor into tensor<1x3xf32> // CHECK: return %[[RESULT]] diff --git a/mlir/test/Dialect/Linalg/fusion-sequence.mlir b/mlir/test/Dialect/Linalg/fusion-sequence.mlir --- a/mlir/test/Dialect/Linalg/fusion-sequence.mlir +++ b/mlir/test/Dialect/Linalg/fusion-sequence.mlir @@ -175,18 +175,18 @@ // CHECK: %[[INIT:.+]] = linalg.init_tensor // CHECK: %[[R0:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG5:.+]] = %[[INIT]]) -> (tensor) { // CHECK: %[[R1:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG7:.+]] = %[[ARG5]]) -> (tensor) { -// CHECK-DAG: %[[STARG3:.+]] = subtensor %[[ARG3]] -// CHECK-DAG: %[[STARG7:.+]] = subtensor %[[ARG7]] -// CHECK-DAG: %[[STARG0:.+]] = subtensor %[[ARG0]] -// CHECK-DAG: %[[STARG1:.+]] = subtensor %[[ARG1]] -// CHECK-DAG: %[[STARG2:.+]] = subtensor %[[ARG2]] +// CHECK-DAG: %[[STARG3:.+]] = tensor.extract_slice %[[ARG3]] +// CHECK-DAG: %[[STARG7:.+]] = tensor.extract_slice %[[ARG7]] +// CHECK-DAG: %[[STARG0:.+]] = tensor.extract_slice %[[ARG0]] +// CHECK-DAG: %[[STARG1:.+]] = tensor.extract_slice %[[ARG1]] +// CHECK-DAG: %[[STARG2:.+]] = tensor.extract_slice %[[ARG2]] // CHECK: %[[T0:.+]] = linalg.matmul // CHECK-SAME: ins(%[[STARG0]], %[[STARG1]] : tensor, tensor) // CHECK-SAME: outs(%[[STARG2]] : tensor) -> tensor // CHECK: %[[T1:.+]] = linalg.generic // CHECK-SAME: ins(%[[T0:.+]], %[[STARG3]] : tensor, tensor) // CHECK-SAME: outs(%[[STARG7]] : tensor) -// CHECK: %[[RESULT:.+]] = subtensor_insert %[[T1]] into %[[ARG7]] +// CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[T1]] into %[[ARG7]] // CHECK: scf.yield %[[RESULT]] // CHECK: } // CHECK: scf.yield %[[R1]] @@ -229,21 +229,21 @@ // CHECK: %[[M_1:.+]] = memref.dim %[[ARG8]], %[[C0]] // CHECK: %[[TILE_M_1:.+]] = affine.min #[[MAP0]](%[[M_1]], %[[IV0]]) // CHECK: %[[N3:.+]] = memref.dim %[[ARG8]], %[[C1]] -// CHECK: %[[STARG6:.+]] = subtensor %[[ARG8]][%[[IV0]], 0] +// CHECK: %[[STARG6:.+]] = tensor.extract_slice %[[ARG8]][%[[IV0]], 0] // CHECK-SAME: [%[[TILE_M_1]], %[[N3]]] // CHECK: %[[M_2:.+]] = memref.dim %[[ARG4]], %[[C0]] // CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M_2]], %[[M]]] // CHECK: %[[N2:.+]] = memref.dim %[[ARG4]], %[[C1]] -// CHECK: %[[STARG4:.+]] = subtensor %[[ARG4]][%[[IV0]], 0] +// CHECK: %[[STARG4:.+]] = tensor.extract_slice %[[ARG4]][%[[IV0]], 0] // CHECK-SAME: [%[[TILE_M_2]], %[[N2]]] // CHECK: %[[TILE_M_3:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M]], %[[M]]] // CHECK: %[[N0:.+]] = memref.dim %[[ARG0]], %[[C1]] -// CHECK: %[[STARG0:.+]] = subtensor %[[ARG0]][%[[IV0]], 0] +// CHECK: %[[STARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0] // CHECK-SAME: [%[[TILE_M_3]], %[[N0]]] // CHECK: %[[M_3:.+]] = memref.dim %[[ARG2]], %[[C0]] // CHECK: %[[TILE_M_4:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M_3]], %[[M]]] // CHECK: %[[N1:.+]] = memref.dim %[[ARG2]], %[[C1]] -// CHECK: %[[STARG2:.+]] = subtensor %[[ARG2]][%[[IV0]], 0] +// CHECK: %[[STARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], 0] // CHECK-SAME: [%[[TILE_M_4]], %[[N1]]] // CHECK: %[[T0:.+]] = linalg.matmul // CHECK-SAME: ins(%[[STARG0]], %[[ARG1]] : tensor, tensor @@ -254,7 +254,7 @@ // CHECK: %[[T2:.+]] = linalg.matmul // CHECK-SAME: ins(%[[T1]], %arg5 : tensor, tensor // CHECK-SAME: ) outs(%[[STARG6]] : tensor) -// CHECK: %[[R1:.+]] = subtensor_insert %[[T2]] +// CHECK: %[[R1:.+]] = tensor.insert_slice %[[T2]] // CHECK-SAME: into %[[ARG8]][%[[IV0]], 0] [%[[TILE_M_1]], %[[N3]]] // CHECK: scf.yield %[[R1]] : tensor // CHECK: } diff --git a/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir b/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir --- a/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir +++ b/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir @@ -38,16 +38,16 @@ // CHECK: %[[M_2:.+]] = memref.dim %[[ARG6]], %[[C0]] // CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP1]](%[[M_2]], %[[IV0]]) // CHECK: %[[N3:.+]] = memref.dim %[[ARG6]], %[[C1]] -// CHECK: %[[ST_ARG6:.+]] = subtensor %[[ARG6]][%[[IV0]], 0] +// CHECK: %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], 0] // CHECK-SAME: [%[[TILE_M_2]], %[[N3]]] // CHECK: %[[TILE_M_3:.+]] = affine.min #[[MAP5]](%[[IV0]])[%[[M]], %[[M]]] // CHECK: %[[N1:.+]] = memref.dim %[[ARG0]], %[[C1]] -// CHECK: %[[ST_ARG0:.+]] = subtensor %[[ARG0]][%[[IV0]], 0] +// CHECK: %[[ST_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0] // CHECK-SAME: [%[[TILE_M_3]], %[[N1]]] // CHECK: %[[M_3:.+]] = memref.dim %[[ARG2]], %[[C0]] // CHECK: %[[TILE_M_4:.+]] = affine.min #[[MAP5]](%[[IV0]])[%[[M_3]], %[[M]]] // CHECK: %[[N2_2:.+]] = memref.dim %[[ARG2]], %[[C1]] -// CHECK: %[[ST_ARG2:.+]] = subtensor %[[ARG2]][%[[IV0]], 0] +// CHECK: %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], 0] // CHECK-SAME: [%[[TILE_M_4]], %[[N2_2]]] // CHECK: %[[LHS:.+]] = linalg.matmul // CHECK-SAME: __internal_linalg_transform__ = "after_lhs_fusion_producer" @@ -62,30 +62,30 @@ // CHECK-SAME: %[[C0]] to %[[N2]] step %[[C16]] // CHECK-SAME: iter_args(%[[ARG10:.+]] = %[[ARG8]]) -> (tensor) { // CHECK: %[[TILE_N2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[N2]]] -// CHECK: %[[ST_LHS:.+]] = subtensor %[[LHS]][0, %[[IV2]]] +// CHECK: %[[ST_LHS:.+]] = tensor.extract_slice %[[LHS]][0, %[[IV2]]] // CHECK-SAME: [%[[TILE_M_3]], %[[TILE_N2]]] // CHECK: %[[N2_3:.+]] = memref.dim %[[ARG3]], %[[C0]] // CHECK: %[[TILE_N2_2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[N2_3]]] // CHECK: %[[TILE_N3:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N3_2]]] -// CHECK: %[[ST_ARG3:.+]] = subtensor %[[ARG3]][%[[IV2]], %[[IV1]]] +// CHECK: %[[ST_ARG3:.+]] = tensor.extract_slice %[[ARG3]][%[[IV2]], %[[IV1]]] // CHECK-SAME: [%[[TILE_N2_2]], %[[TILE_N3]]] // CHECK: %[[M_4:.+]] = memref.dim %[[ARG10]], %[[C0]] // CHECK: %[[N3_3:.+]] = memref.dim %[[ARG10]], %[[C1]] // CHECK: %[[TILE_N3_2:.+]] = affine.min #[[MAP4]](%[[N3_3]], %[[IV1]]) -// CHECK: %[[ST_ARG4:.+]] = subtensor %[[ARG10]][0, %[[IV1]]] +// CHECK: %[[ST_ARG4:.+]] = tensor.extract_slice %[[ARG10]][0, %[[IV1]]] // CHECK-SAME: [%[[M_4]], %[[TILE_N3_2]]] // CHECK: %[[ST_RESULT:.+]] = linalg.matmul // CHECK-SAME: __internal_linalg_transform__ = "after_lhs_fusion" // CHECK-SAME: ins(%[[ST_LHS]], %[[ST_ARG3]] // CHECK-SAME: : tensor, tensor) // CHECK-SAME: outs(%[[ST_ARG4]] : tensor) -// CHECK: %[[UPDATE1:.+]] = subtensor_insert %[[ST_RESULT]] +// CHECK: %[[UPDATE1:.+]] = tensor.insert_slice %[[ST_RESULT]] // CHECK-SAME: into %[[ARG10]][0, %[[IV1]]] [%[[M_4]], %[[TILE_N3_2]]] // CHECK: scf.yield %[[UPDATE1]] // CHECK: } // CHECK: scf.yield %[[YIELD1]] // CHECK: } -// CHECK: %[[UPDATE0:.+]] = subtensor_insert %[[YIELD0]] into +// CHECK: %[[UPDATE0:.+]] = tensor.insert_slice %[[YIELD0]] into // CHECK-SAME: %[[ARG6]][%[[IV0]], 0] [%[[TILE_M_2]], %[[N3]]] // CHECK: scf.yield %[[UPDATE0]] // CHECK: } @@ -114,9 +114,9 @@ // TLOOP-SAME: %[[AB_INIT_:.*]] = %[[AB_INIT]]: tensor) // TLOOP-SAME: outs (%[[ABC_INIT_:.*]] = %[[ABC_INIT]]: tensor) { -// TLOOP: %[[ABC_INIT_SUB:.*]] = subtensor %[[ABC_INIT_]][%[[IV0]], 0] -// TLOOP: %[[A_SUB:.*]] = subtensor %[[A_]][%[[IV0]], 0] -// TLOOP: %[[AB_INIT_SUB:.*]] = subtensor %[[AB_INIT_]][%[[IV0]], 0] +// TLOOP: %[[ABC_INIT_SUB:.*]] = tensor.extract_slice %[[ABC_INIT_]][%[[IV0]], 0] +// TLOOP: %[[A_SUB:.*]] = tensor.extract_slice %[[A_]][%[[IV0]], 0] +// TLOOP: %[[AB_INIT_SUB:.*]] = tensor.extract_slice %[[AB_INIT_]][%[[IV0]], 0] // TLOOP: %[[AB_SUB:.*]] = linalg.matmul // TLOOP-SAME: ins(%[[A_SUB]], %[[B_]] : {{.*}}) outs(%[[AB_INIT_SUB]] @@ -132,19 +132,19 @@ // TLOOP-SAME: outs (%[[ABC_INIT_SUB_:.*]] = %[[ABC_INIT_SUB]]: [[TY]]) // TLOOP-SAME: iterators["parallel", "reduction"] { -// TLOOP: %[[AB_SUB_SUB:.*]] = subtensor %[[AB_SUB_]][0, %[[IV2]]] -// TLOOP: %[[C__SUB:.*]] = subtensor %[[C__]][%[[IV2]], %[[IV1]]] -// TLOOP: %[[ABS_INIT_SUB_SUB:.*]] = subtensor %[[ABC_INIT_SUB_]][0, %[[IV1]]] +// TLOOP: %[[AB_SUB_SUB:.*]] = tensor.extract_slice %[[AB_SUB_]][0, %[[IV2]]] +// TLOOP: %[[C__SUB:.*]] = tensor.extract_slice %[[C__]][%[[IV2]], %[[IV1]]] +// TLOOP: %[[ABS_INIT_SUB_SUB:.*]] = tensor.extract_slice %[[ABC_INIT_SUB_]][0, %[[IV1]]] // TLOOP: %[[ABC_SUB_SUB:.*]] = linalg.matmul // TLOOP-SAME: ins(%[[AB_SUB_SUB]], %[[C__SUB]] : [[TY]], [[TY]]) // TLOOP-SAME: outs(%[[ABS_INIT_SUB_SUB]] : [[TY]]) -> [[TY]] -// TLOOP: %[[RES0:.*]] = subtensor_insert %[[ABC_SUB_SUB]] +// TLOOP: %[[RES0:.*]] = tensor.insert_slice %[[ABC_SUB_SUB]] // TLOOP-SAME: into %[[ABC_INIT_SUB_]][0, %[[IV1]]] // TLOOP: linalg.yield %[[RES0]] : [[TY]] // TLOOP: } -// TLOOP: %[[RES1:.*]] = subtensor_insert %[[ABC_SUB_]] into %[[ABC_INIT_]][%[[IV0]], 0] +// TLOOP: %[[RES1:.*]] = tensor.insert_slice %[[ABC_SUB_]] into %[[ABC_INIT_]][%[[IV0]], 0] // TLOOP: linalg.yield %[[RES1]] : [[TY]] // TLOOP: } // TLOOP: return %[[ABC]] : [[TY]] @@ -186,10 +186,10 @@ // CHECK-SAME: iter_args(%[[ARG4:.+]] = %{{[a-zA-Z0-9_]+}}) // CHECK: %[[YIELD:.+]] = scf.for %[[IV1:[a-zA-Z0-9_]+]] // CHECK-SAME: iter_args(%[[ARG6:.+]] = %[[ARG4]]) -// CHECK: %[[ST_ARG6:.+]] = subtensor %[[ARG6]][%[[IV0]], %[[IV1]]] -// CHECK: %[[ST_ARG0:.+]] = subtensor %[[ARG0]][%[[IV0]], 0] -// CHECK: %[[ST_ARG1:.+]] = subtensor %[[ARG1]][0, %[[IV1]]] -// CHECK: %[[ST_ARG2:.+]] = subtensor %[[ARG2]][%[[IV0]], %[[IV1]]] +// CHECK: %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], %[[IV1]]] +// CHECK: %[[ST_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0] +// CHECK: %[[ST_ARG1:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]] +// CHECK: %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], %[[IV1]]] // CHECK: %[[LHS:.+]] = linalg.matmul // CHECK-SAME: ins(%[[ST_ARG0]], %[[ST_ARG1]] // CHECK-SAME: : tensor, tensor) @@ -197,7 +197,7 @@ // CHECK: %[[ST_RESULT:.+]] = linalg.generic // CHECK-SAME: ins(%[[LHS]] : tensor) // CHECK-SAME: outs(%[[ST_ARG6]] : tensor) -// CHECK: %[[UPDATE:.+]] = subtensor_insert %[[ST_RESULT]] +// CHECK: %[[UPDATE:.+]] = tensor.insert_slice %[[ST_RESULT]] // CHECK-SAME: into %[[ARG6]][%[[IV0]], %[[IV1]]] // CHECK: scf.yield %[[UPDATE]] // CHECK: scf.yield %[[YIELD]] @@ -226,10 +226,10 @@ // TLOOP-SAME: %[[AB_:.*]] = %[[AB]]: [[TY]]) // TLOOP-SAME: outs (%[[INIT_:.*]] = %[[INIT]]: [[TY]]) { -// TLOOP: %[[INIT_SUB:.*]] = subtensor %[[INIT_]][%[[IV0]], %[[IV1]]] -// TLOOP: %[[A_SUB:.*]] = subtensor %[[A_]][%[[IV0]], 0] -// TLOOP: %[[B_SUB:.*]] = subtensor %[[B_]][0, %[[IV1]]] -// TLOOP: %[[AB_SUB_INIT:.*]] = subtensor %[[AB_]][%[[IV0]], %[[IV1]]] +// TLOOP: %[[INIT_SUB:.*]] = tensor.extract_slice %[[INIT_]][%[[IV0]], %[[IV1]]] +// TLOOP: %[[A_SUB:.*]] = tensor.extract_slice %[[A_]][%[[IV0]], 0] +// TLOOP: %[[B_SUB:.*]] = tensor.extract_slice %[[B_]][0, %[[IV1]]] +// TLOOP: %[[AB_SUB_INIT:.*]] = tensor.extract_slice %[[AB_]][%[[IV0]], %[[IV1]]] // TLOOP: %[[AB_SUB:.*]] = linalg.matmul // TLOOP-SAME: ins(%[[A_SUB]], %[[B_SUB]] : [[TY]], [[TY]]) @@ -238,7 +238,7 @@ // TLOOP: %[[DOUBLE_AB:.*]] = linalg.generic // TLOOP-SAME: ins(%[[AB_SUB]] : [[TY]]) outs(%[[INIT_SUB]] : [[TY]]) -// TLOOP: %[[RESULT_SUB:.*]] = subtensor_insert +// TLOOP: %[[RESULT_SUB:.*]] = tensor.insert_slice // TLOOP-SAME: %[[DOUBLE_AB:.*]] into %[[INIT_]][%[[IV0]], %[[IV1]]] // TLOOP: linalg.yield %[[RESULT_SUB]] : [[TY]] @@ -267,13 +267,13 @@ // CHECK-NOT: fill // CHECK: scf.for %[[I:.*]]{{.*}}iter_args(%{{.*}} = %[[ARG0]]) -> (tensor) { // CHECK: scf.for %[[J:.*]] -// CHECK: %[[ST:.*]] = subtensor %[[ARG0]] +// CHECK: %[[ST:.*]] = tensor.extract_slice %[[ARG0]] // CHECK: %[[ST_FILL:.*]] = linalg.fill(%[[ST]], %[[C0]]) {__internal_linalg_transform__ = "after_out_fusion_producer"} : tensor, f32 -> tensor // CHECK: %[[ST_MM_RES:.*]] = scf.for %[[K:.*]]{{.*}}iter_args(%[[BB:.*]] = %[[ST_FILL]]) -> (tensor) { // CHECK-NOT: fill // CHECK: %[[ST_MM:.*]] = linalg.matmul {__internal_linalg_transform__ = "after_out_fusion"} ins(%{{.*}}, %{{.*}} : tensor, tensor) outs(%[[BB]] : tensor) -> tensor // CHECK: scf.yield %[[ST_MM]] : tensor -// CHECK: %[[MM:.*]] = subtensor_insert %[[ST_MM_RES]] into {{.*}} +// CHECK: %[[MM:.*]] = tensor.insert_slice %[[ST_MM_RES]] into {{.*}} // CHECK: scf.yield %[[MM]] : tensor @@ -301,9 +301,9 @@ // TLOOP-SAME: outs (%[[OUT_:.*]] = %[[OUT]]: [[TY]]) { // TLOOP: %[[DIM_A__1:.*]] = memref.dim %[[A_]], %[[C1]] : [[TY]] -// TLOOP: %[[A_SUB:.*]] = subtensor %[[A_]][%[[I]], 0] -// TLOOP: %[[B_SUB:.*]] = subtensor %[[B_]][0, %[[J]]] -// TLOOP: %[[OUT_SUB:.*]] = subtensor %[[OUT_]][%[[I]], %[[J]]] +// TLOOP: %[[A_SUB:.*]] = tensor.extract_slice %[[A_]][%[[I]], 0] +// TLOOP: %[[B_SUB:.*]] = tensor.extract_slice %[[B_]][0, %[[J]]] +// TLOOP: %[[OUT_SUB:.*]] = tensor.extract_slice %[[OUT_]][%[[I]], %[[J]]] // TLOOP: %[[INIT_SUB:.*]] = linalg.fill(%[[OUT_SUB]], %[[C0_F32_]]) // TLOOP: %[[AB_SUB:.*]] = linalg.tiled_loop (%[[K:.*]]) = (%[[C0]]) @@ -313,15 +313,15 @@ // TLOOP-SAME: outs (%[[INIT_SUB_:.*]] = %[[INIT_SUB]]: [[TY]]) // TLOOP-SAME: iterators["reduction"] { -// TLOOP: %[[A_SUB_SUB:.*]] = subtensor %[[A_SUB_]][0, %[[K]]] -// TLOOP: %[[B_SUB_SUB:.*]] = subtensor %[[B_SUB_]][%[[K]], 0] +// TLOOP: %[[A_SUB_SUB:.*]] = tensor.extract_slice %[[A_SUB_]][0, %[[K]]] +// TLOOP: %[[B_SUB_SUB:.*]] = tensor.extract_slice %[[B_SUB_]][%[[K]], 0] // TLOOP: %[[AB_SUB_SUB:.*]] = linalg.matmul // TLOOP-SAME: ins(%[[A_SUB_SUB]], %[[B_SUB_SUB]] : [[TY]], [[TY]]) // TLOOP-SAME: outs(%[[INIT_SUB_]] : [[TY]]) -> [[TY]] // TLOOP: linalg.yield %[[AB_SUB_SUB]] : [[TY]] // TLOOP: } -// TLOOP: %[[SUB_RESULT:.*]] = subtensor_insert %[[AB_SUB]] +// TLOOP: %[[SUB_RESULT:.*]] = tensor.insert_slice %[[AB_SUB]] // TLOOP-SAME: into %[[OUT_]][%[[I]], %[[J]]] // TLOOP: linalg.yield %[[SUB_RESULT]] : [[TY]] // TLOOP: } @@ -372,9 +372,9 @@ // TLOOP-SAME: outs (%[[OUT_:.*]] = %[[OUT]]: [[TY]]) { // TLOOP: %[[DIM_A__1:.*]] = memref.dim %[[A_]], %[[C1]] : [[TY]] -// TLOOP: %[[A_SUB:.*]] = subtensor %[[A_]][%[[I]], 0] -// TLOOP: %[[B_SUB:.*]] = subtensor %[[B_]][0, %[[J]]] -// TLOOP: %[[OUT_SUB:.*]] = subtensor %[[OUT_]][%[[I]], %[[J]]] +// TLOOP: %[[A_SUB:.*]] = tensor.extract_slice %[[A_]][%[[I]], 0] +// TLOOP: %[[B_SUB:.*]] = tensor.extract_slice %[[B_]][0, %[[J]]] +// TLOOP: %[[OUT_SUB:.*]] = tensor.extract_slice %[[OUT_]][%[[I]], %[[J]]] // TLOOP: %[[INIT_SUB:.*]] = linalg.generic // TLOOP-SAME: ins(%[[C0_F32_]] // TLOOP-SAME: outs(%[[OUT_SUB]] @@ -386,15 +386,15 @@ // TLOOP-SAME: outs (%[[INIT_SUB_:.*]] = %[[INIT_SUB]]: [[TY]]) // TLOOP-SAME: iterators["reduction"] { -// TLOOP: %[[A_SUB_SUB:.*]] = subtensor %[[A_SUB_]][0, %[[K]]] -// TLOOP: %[[B_SUB_SUB:.*]] = subtensor %[[B_SUB_]][%[[K]], 0] +// TLOOP: %[[A_SUB_SUB:.*]] = tensor.extract_slice %[[A_SUB_]][0, %[[K]]] +// TLOOP: %[[B_SUB_SUB:.*]] = tensor.extract_slice %[[B_SUB_]][%[[K]], 0] // TLOOP: %[[AB_SUB_SUB:.*]] = linalg.matmul // TLOOP-SAME: ins(%[[A_SUB_SUB]], %[[B_SUB_SUB]] : [[TY]], [[TY]]) // TLOOP-SAME: outs(%[[INIT_SUB_]] : [[TY]]) -> [[TY]] // TLOOP: linalg.yield %[[AB_SUB_SUB]] : [[TY]] // TLOOP: } -// TLOOP: %[[SUB_RESULT:.*]] = subtensor_insert %[[AB_SUB]] +// TLOOP: %[[SUB_RESULT:.*]] = tensor.insert_slice %[[AB_SUB]] // TLOOP-SAME: into %[[OUT_]][%[[I]], %[[J]]] // TLOOP: linalg.yield %[[SUB_RESULT]] : [[TY]] // TLOOP: } diff --git a/mlir/test/Dialect/Linalg/hoist-padding.mlir b/mlir/test/Dialect/Linalg/hoist-padding.mlir --- a/mlir/test/Dialect/Linalg/hoist-padding.mlir +++ b/mlir/test/Dialect/Linalg/hoist-padding.mlir @@ -53,10 +53,10 @@ // CHECK: %[[A:.*]] = scf.for %[[J1:[0-9a-z]+]] = // Iteration count along J1 // CHECK: %[[IDXpad0_K:[0-9]+]] = affine.apply #[[$DIV4]](%[[J1]]) - // CHECK: subtensor %{{.*}} [1, 1] : tensor to tensor + // CHECK: tensor.extract_slice %{{.*}} [1, 1] : tensor to tensor // CHECK: linalg.pad_tensor %{{.*}} // CHECK: : tensor to tensor<2x4xf32> - // CHECK: subtensor_insert %{{.*}} into %{{.*}}[%[[IDXpad0_K]], 0, 0] + // CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%[[IDXpad0_K]], 0, 0] // CHECK-SAME: [1, 2, 4] [1, 1, 1] : tensor<2x4xf32> into tensor // Second tensor is KxN but loop order is (M, N, K) so padded tensor is NxKx4x3 // CHECK: %[[SZpad1_N:[0-9]+]] = affine.apply #[[$DIVS3]]()[%[[dN]]] @@ -69,23 +69,23 @@ // CHECK: scf.for %[[J2:[0-9a-z]+]] = // Iteration count along J2 // CHECK: %[[IDXpad1_N:[0-9]+]] = affine.apply #[[$DIV4]](%[[J2]]) - // CHECK: subtensor %{{.*}} [1, 1] : tensor to tensor + // CHECK: tensor.extract_slice %{{.*}} [1, 1] : tensor to tensor // CHECK: linalg.pad_tensor %{{.*}} // CHECK: : tensor to tensor<4x3xf32> - // CHECK: subtensor_insert %{{.*}} into %{{.*}}[%[[IDXpad1_K]], %[[IDXpad1_N]], 0, 0] + // CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%[[IDXpad1_K]], %[[IDXpad1_N]], 0, 0] // CHECK-SAME: [1, 1, 4, 3] [1, 1, 1, 1] : tensor<4x3xf32> into tensor // 2-D loop // CHECK: scf.for %[[J:[0-9a-zA-Z]+]] // CHECK: scf.for %[[K:[0-9a-zA-Z]+]] // Iteration count along K // CHECK: %[[IDXpad0_K:[0-9]+]] = affine.apply #[[$DIV4]](%[[K]]) - // CHECK: %[[stA:.*]] = subtensor %[[A]][%[[IDXpad0_K]], 0, 0] [1, 2, 4] [1, 1, 1] : + // CHECK: %[[stA:.*]] = tensor.extract_slice %[[A]][%[[IDXpad0_K]], 0, 0] [1, 2, 4] [1, 1, 1] : // CHECK-SAME: tensor to tensor<2x4xf32> // Iteration count along J // CHECK: %[[IDXpad1_N:[0-9]+]] = affine.apply #[[$DIV3]](%[[J]]) // Iteration count along K // CHECK: %[[IDXpad1_K:[0-9]+]] = affine.apply #[[$DIV4]](%[[K]]) - // CHECK: %[[stB:.*]] = subtensor %[[B]][%[[IDXpad1_N]], %[[IDXpad1_K]], 0, 0] [1, 1, 4, 3] [1, 1, 1, 1] : + // CHECK: %[[stB:.*]] = tensor.extract_slice %[[B]][%[[IDXpad1_N]], %[[IDXpad1_K]], 0, 0] [1, 1, 4, 3] [1, 1, 1, 1] : // CHECK-SAME: tensor to tensor<4x3xf32> // CHECK: %[[stC:.*]] = linalg.pad_tensor %{{.*}} // CHECK: : tensor to tensor<2x3xf32> @@ -98,17 +98,17 @@ %7 = affine.min #map0(%arg3)[%6] %8 = memref.dim %arg0, %c1 : tensor %9 = affine.min #map1(%arg7)[%8] - %10 = subtensor %arg0[%arg3, %arg7] [%7, %9] [1, 1] : tensor to tensor + %10 = tensor.extract_slice %arg0[%arg3, %arg7] [%7, %9] [1, 1] : tensor to tensor %11 = memref.dim %arg1, %c0 : tensor %12 = affine.min #map1(%arg7)[%11] %13 = memref.dim %arg1, %c1 : tensor %14 = affine.min #map2(%arg5)[%13] - %15 = subtensor %arg1[%arg7, %arg5] [%12, %14] [1, 1] : tensor to tensor + %15 = tensor.extract_slice %arg1[%arg7, %arg5] [%12, %14] [1, 1] : tensor to tensor %16 = memref.dim %arg8, %c0 : tensor %17 = affine.min #map3(%16, %arg3) %18 = memref.dim %arg8, %c1 : tensor %19 = affine.min #map4(%18, %arg5) - %20 = subtensor %arg8[%arg3, %arg5] [%17, %19] [1, 1] : tensor to tensor + %20 = tensor.extract_slice %arg8[%arg3, %arg5] [%17, %19] [1, 1] : tensor to tensor %21 = subi %c2, %7 : index %22 = subi %c4, %9 : index %23 = linalg.pad_tensor %10 low[%c0, %c0] high[%21, %22] { @@ -128,8 +128,8 @@ linalg.yield %cst : f32 } : tensor to tensor<2x3xf32> %30 = linalg.matmul ins(%23, %26 : tensor<2x4xf32>, tensor<4x3xf32>) outs(%29 : tensor<2x3xf32>) -> tensor<2x3xf32> - %31 = subtensor %30[0, 0] [%7, %14] [1, 1] : tensor<2x3xf32> to tensor - %32 = subtensor_insert %31 into %arg8[%arg3, %arg5] [%17, %19] [%c1, %c1] : tensor into tensor + %31 = tensor.extract_slice %30[0, 0] [%7, %14] [1, 1] : tensor<2x3xf32> to tensor + %32 = tensor.insert_slice %31 into %arg8[%arg3, %arg5] [%17, %19] [%c1, %c1] : tensor into tensor scf.yield %32 : tensor } scf.yield %5 : tensor @@ -173,7 +173,7 @@ // CHECK: %[[INIT_PACKED_A:.*]] = linalg.init_tensor [%[[D0]], %[[D1]], 2] : tensor // CHECK: %[[PACKED_A:.*]] = scf.for %[[II:[0-9a-z]+]] = {{.*}} iter_args(%{{.*}} = %[[INIT_PACKED_A]]) -> (tensor) { // CHECK: scf.for %[[III:[0-9a-z]+]] = - // CHECK: subtensor_insert %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor + // CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor // // CHECK: %[[D0_2:.*]] = affine.apply #[[$DIV4]](%[[MR8]]) // CHECK: %[[MM4_2:.*]] = affine.min #[[$MIN_MOD4]](%[[MR8]]) @@ -182,33 +182,33 @@ // CHECK: %[[INIT_PACKED_B:.*]] = linalg.init_tensor [%[[D0_2]], %[[D1_2]], 2] : tensor // CHECK: %[[PACKED_B:.*]] = scf.for %[[II_2:[0-9a-z]+]] = {{.*}} iter_args(%{{.*}} = %[[INIT_PACKED_B]]) -> (tensor) { // CHECK: scf.for %[[III_2:[0-9a-z]+]] = - // CHECK: subtensor_insert %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor + // CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor // Compute. // CHECK: scf.for %[[II_3:[0-9a-z]+]] = // CHECK: scf.for %[[III_3:[0-9a-z]+]] = {{.*}} iter_args(%[[C:.*]] = %{{.*}}) -> (tensor) { // CHECK: %[[IDX0:.*]] = affine.apply #[[$DIV4]](%[[II_3]]) // CHECK: %[[IDX1:.*]] = affine.apply #[[$DIV2]](%[[III_3]]) - // CHECK: %[[A:.*]] = subtensor %[[PACKED_A]][%[[IDX0]], %[[IDX1]], 0] [1, 1, 2] [1, 1, 1] : tensor to tensor<2xf32> + // CHECK: %[[A:.*]] = tensor.extract_slice %[[PACKED_A]][%[[IDX0]], %[[IDX1]], 0] [1, 1, 2] [1, 1, 1] : tensor to tensor<2xf32> // CHECK: %[[IDX0_2:.*]] = affine.apply #[[$DIV4]](%[[II_3]]) // CHECK: %[[IDX1_2:.*]] = affine.apply #[[$DIV2]](%[[III_3]]) - // CHECK: %[[B:.*]] = subtensor %[[PACKED_B]][%[[IDX0_2]], %[[IDX1_2]], 0] [1, 1, 2] [1, 1, 1] : tensor to tensor<2xf32> + // CHECK: %[[B:.*]] = tensor.extract_slice %[[PACKED_B]][%[[IDX0_2]], %[[IDX1_2]], 0] [1, 1, 2] [1, 1, 1] : tensor to tensor<2xf32> // CHECK: linalg.dot ins(%[[A]], %[[B]] : tensor<2xf32>, tensor<2xf32>) outs(%[[C]] : tensor) -> tensor %4 = scf.for %arg3 = %c0 to %1 step %c8 iter_args(%arg4 = %arg2) -> (tensor) { %5 = affine.min #map0(%arg3)[%2] - %6 = subtensor %arg0[%arg3] [%5] [1] : tensor to tensor + %6 = tensor.extract_slice %arg0[%arg3] [%5] [1] : tensor to tensor %7 = affine.min #map0(%arg3)[%3] - %8 = subtensor %arg1[%arg3] [%7] [1] : tensor to tensor + %8 = tensor.extract_slice %arg1[%arg3] [%7] [1] : tensor to tensor %9 = scf.for %arg5 = %c0 to %5 step %c4 iter_args(%arg6 = %arg4) -> (tensor) { %10 = affine.min #map1(%5, %arg5) - %11 = subtensor %6[%arg5] [%10] [1] : tensor to tensor + %11 = tensor.extract_slice %6[%arg5] [%10] [1] : tensor to tensor %12 = affine.min #map1(%7, %arg5) - %13 = subtensor %8[%arg5] [%12] [1] : tensor to tensor + %13 = tensor.extract_slice %8[%arg5] [%12] [1] : tensor to tensor %14 = scf.for %arg7 = %c0 to %10 step %c2 iter_args(%arg8 = %arg6) -> (tensor) { %15 = affine.min #map2(%10, %arg7) - %16 = subtensor %11[%arg7] [%15] [1] : tensor to tensor + %16 = tensor.extract_slice %11[%arg7] [%15] [1] : tensor to tensor %17 = affine.min #map2(%12, %arg7) - %18 = subtensor %13[%arg7] [%17] [1] : tensor to tensor + %18 = tensor.extract_slice %13[%arg7] [%17] [1] : tensor to tensor %19 = subi %c2, %15 : index %20 = linalg.pad_tensor %16 low[%c0] high[%19] { ^bb0(%arg9: index): // no predecessors @@ -245,17 +245,17 @@ %1 = scf.for %arg3 = %c0 to %c32 step %c16 iter_args(%arg4 = %arg2) -> (tensor<32x64xf32>) { %2 = scf.for %arg5 = %c0 to %c64 step %c32 iter_args(%arg6 = %arg4) -> (tensor<32x64xf32>) { %3 = scf.for %arg7 = %c0 to %c128 step %c32 iter_args(%arg8 = %arg6) -> (tensor<32x64xf32>) { - %4 = subtensor %arg0[%arg3, %arg7] [16, 32] [1, 1] : tensor<32x128xf32> to tensor<16x32xf32> - %5 = subtensor %arg1[%arg7, %arg5] [32, 32] [1, 1] : tensor<128x64xf32> to tensor<32x32xf32> - %6 = subtensor %arg8[%arg3, %arg5] [16, 32] [1, 1] : tensor<32x64xf32> to tensor<16x32xf32> + %4 = tensor.extract_slice %arg0[%arg3, %arg7] [16, 32] [1, 1] : tensor<32x128xf32> to tensor<16x32xf32> + %5 = tensor.extract_slice %arg1[%arg7, %arg5] [32, 32] [1, 1] : tensor<128x64xf32> to tensor<32x32xf32> + %6 = tensor.extract_slice %arg8[%arg3, %arg5] [16, 32] [1, 1] : tensor<32x64xf32> to tensor<16x32xf32> %7 = scf.for %arg9 = %c0 to %c16 step %c2 iter_args(%arg10 = %6) -> (tensor<16x32xf32>) { %10 = scf.for %arg11 = %c0 to %c32 step %c4 iter_args(%arg12 = %arg10) -> (tensor<16x32xf32>) { %11 = scf.for %arg13 = %c0 to %c32 step %c16 iter_args(%arg14 = %arg12) -> (tensor<16x32xf32>) { - %12 = subtensor %4[%arg9, %arg13] [2, 16] [1, 1] : tensor<16x32xf32> to tensor<2x16xf32> + %12 = tensor.extract_slice %4[%arg9, %arg13] [2, 16] [1, 1] : tensor<16x32xf32> to tensor<2x16xf32> %13 = tensor.cast %12 : tensor<2x16xf32> to tensor - %14 = subtensor %5[%arg13, %arg11] [16, 4] [1, 1] : tensor<32x32xf32> to tensor<16x4xf32> + %14 = tensor.extract_slice %5[%arg13, %arg11] [16, 4] [1, 1] : tensor<32x32xf32> to tensor<16x4xf32> %15 = tensor.cast %14 : tensor<16x4xf32> to tensor - %16 = subtensor %arg14[%arg9, %arg11] [2, 4] [1, 1] : tensor<16x32xf32> to tensor<2x4xf32> + %16 = tensor.extract_slice %arg14[%arg9, %arg11] [2, 4] [1, 1] : tensor<16x32xf32> to tensor<2x4xf32> %17 = tensor.cast %16 : tensor<2x4xf32> to tensor %18 = linalg.pad_tensor %13 low[%c0, %c0] high[%c0, %c0] { ^bb0(%arg15: index, %arg16: index): // no predecessors @@ -271,7 +271,7 @@ } : tensor to tensor<2x4xf32> %21 = linalg.matmul ins(%18, %19 : tensor<2x16xf32>, tensor<16x4xf32>) outs(%20 : tensor<2x4xf32>) -> tensor<2x4xf32> %22 = tensor.cast %21 : tensor<2x4xf32> to tensor - %23 = subtensor_insert %22 into %arg14[%arg9, %arg11] [%c2, %c4] [1, 1] : tensor into tensor<16x32xf32> + %23 = tensor.insert_slice %22 into %arg14[%arg9, %arg11] [%c2, %c4] [1, 1] : tensor into tensor<16x32xf32> scf.yield %23 : tensor<16x32xf32> } scf.yield %11 : tensor<16x32xf32> @@ -279,7 +279,7 @@ scf.yield %10 : tensor<16x32xf32> } %8 = tensor.cast %7 : tensor<16x32xf32> to tensor - %9 = subtensor_insert %8 into %arg8[%arg3, %arg5] [%c16, %c32] [1, 1] : tensor into tensor<32x64xf32> + %9 = tensor.insert_slice %8 into %arg8[%arg3, %arg5] [%c16, %c32] [1, 1] : tensor into tensor<32x64xf32> scf.yield %9 : tensor<32x64xf32> } scf.yield %3 : tensor<32x64xf32> diff --git a/mlir/test/Dialect/Linalg/hoisting.mlir b/mlir/test/Dialect/Linalg/hoisting.mlir --- a/mlir/test/Dialect/Linalg/hoisting.mlir +++ b/mlir/test/Dialect/Linalg/hoisting.mlir @@ -321,14 +321,14 @@ // ----- -// CHECK-LABEL: func @hoist_vector_transfer_pairs_tensor_and_subtensors +// CHECK-LABEL: func @hoist_vector_transfer_pairs_tensor_and_slices // CHECK-SAME: %[[TENSOR0:[a-zA-Z0-9]*]]: tensor, // CHECK-SAME: %[[TENSOR1:[a-zA-Z0-9]*]]: tensor, // CHECK-SAME: %[[TENSOR2:[a-zA-Z0-9]*]]: tensor, // CHECK-SAME: %[[TENSOR3:[a-zA-Z0-9]*]]: tensor, // CHECK-SAME: %[[TENSOR4:[a-zA-Z0-9]*]]: tensor, // CHECK-SAME: %[[TENSOR5:[a-zA-Z0-9]*]]: tensor -func @hoist_vector_transfer_pairs_tensor_and_subtensors( +func @hoist_vector_transfer_pairs_tensor_and_slices( %tensor0: tensor, %tensor1: tensor, %tensor2: tensor, %tensor3: tensor, %tensor4: tensor, %tensor5: tensor, %val: index, %lb : index, %ub : index, %step: index) -> @@ -349,7 +349,7 @@ -> (tensor, tensor, tensor) { // Hoisted - // CHECK: %[[ST0:.*]] = subtensor %[[TENSOR0_ARG]][%[[I]], %[[I]]]{{.*}}: tensor to tensor + // CHECK: %[[ST0:.*]] = tensor.extract_slice %[[TENSOR0_ARG]][%[[I]], %[[I]]]{{.*}}: tensor to tensor // CHECK: %[[V0:.*]] = vector.transfer_read %[[ST0]]{{.*}} : tensor, vector<1xf32> // CHECK: %[[R:.*]]:3 = scf.for %[[J:.*]] = {{.*}} iter_args( @@ -362,19 +362,19 @@ iter_args(%arg6 = %arg0, %arg7 = %arg1, %arg8 = %arg2) -> (tensor, tensor, tensor) { // Hoists. - %st0 = subtensor %arg6[%i, %i][%step, %step][1, 1] : tensor to tensor + %st0 = tensor.extract_slice %arg6[%i, %i][%step, %step][1, 1] : tensor to tensor %r0 = vector.transfer_read %st0[%c0, %c0], %cst: tensor, vector<1xf32> - // CHECK: %[[ST1:.*]] = subtensor %[[TENSOR1_ARG_L2]][%[[J]],{{.*}}: tensor to tensor + // CHECK: %[[ST1:.*]] = tensor.extract_slice %[[TENSOR1_ARG_L2]][%[[J]],{{.*}}: tensor to tensor // CHECK: %[[V1:.*]] = vector.transfer_read %[[ST1]]{{.*}} : tensor, vector<2xf32> - // Does not hoist (subtensor depends on %j) - %st1 = subtensor %arg7[%j, %c0][%step, %step][1, 1] : tensor to tensor + // Does not hoist (slice depends on %j) + %st1 = tensor.extract_slice %arg7[%j, %c0][%step, %step][1, 1] : tensor to tensor %r1 = vector.transfer_read %st1[%c0, %c0], %cst: tensor, vector<2xf32> - // CHECK: %[[ST2:.*]] = subtensor %[[TENSOR2_ARG_L2]][%[[I]],{{.*}}: tensor to tensor + // CHECK: %[[ST2:.*]] = tensor.extract_slice %[[TENSOR2_ARG_L2]][%[[I]],{{.*}}: tensor to tensor // CHECK: %[[V2:.*]] = vector.transfer_read %[[ST2]]{{.*}} : tensor, vector<3xf32> - // Does not hoist, 2 subtensor %arg8. - %st2 = subtensor %arg8[%i, %c0][%step, %step][1, 1] : tensor to tensor + // Does not hoist, 2 slice %arg8. + %st2 = tensor.extract_slice %arg8[%i, %c0][%step, %step][1, 1] : tensor to tensor %r2 = vector.transfer_read %st2[%c0, %c0], %cst: tensor, vector<3xf32> // CHECK: %[[U0:.*]] = "some_use"(%[[V0_ARG_L2]]) : (vector<1xf32>) -> vector<1xf32> @@ -388,25 +388,25 @@ %w0 = vector.transfer_write %u0, %st0[%c0, %c0] : vector<1xf32>, tensor // CHECK-DAG: %[[STI1:.*]] = vector.transfer_write %[[U1]], %{{.*}} : vector<2xf32>, tensor - // Does not hoist (associated subtensor depends on %j). + // Does not hoist (associated slice depends on %j). %w1 = vector.transfer_write %u1, %st1[%i, %i] : vector<2xf32>, tensor // CHECK-DAG: %[[STI2:.*]] = vector.transfer_write %[[U2]], %{{.*}} : vector<3xf32>, tensor - // Does not hoist, 2 subtensor / subtensor_insert for %arg8. + // Does not hoist, 2 slice / insert_slice for %arg8. %w2 = vector.transfer_write %u2, %st2[%c0, %c0] : vector<3xf32>, tensor // Hoists. - %sti0 = subtensor_insert %w0 into %arg6[%i, %i][%step, %step][1, 1] : tensor into tensor + %sti0 = tensor.insert_slice %w0 into %arg6[%i, %i][%step, %step][1, 1] : tensor into tensor - // CHECK-DAG: subtensor_insert %[[STI1]] into %[[TENSOR1_ARG_L2]][%[[J]],{{.*}}: tensor into tensor + // CHECK-DAG: tensor.insert_slice %[[STI1]] into %[[TENSOR1_ARG_L2]][%[[J]],{{.*}}: tensor into tensor // Does not hoist (depends on %j). - %sti1 = subtensor_insert %w1 into %arg7[%j, %c0][%step, %step][1, 1] : tensor into tensor + %sti1 = tensor.insert_slice %w1 into %arg7[%j, %c0][%step, %step][1, 1] : tensor into tensor - // CHECK-DAG: subtensor_insert %[[STI2]] into %[[TENSOR2_ARG_L2]][%[[I]],{{.*}}: tensor into tensor - // Does not hoist, 2 subtensor / subtensor_insert for %arg8. - %sti2 = subtensor_insert %w2 into %arg8[%i, %c0][%step, %step][1, 1] : tensor into tensor - %st22 = subtensor %sti2[%i, %c0][%step, %step][1, 1] : tensor to tensor - %sti22 = subtensor_insert %st22 into %arg8[%i, %c0][%step, %step][1, 1] : tensor into tensor + // CHECK-DAG: tensor.insert_slice %[[STI2]] into %[[TENSOR2_ARG_L2]][%[[I]],{{.*}}: tensor into tensor + // Does not hoist, 2 slice / insert_slice for %arg8. + %sti2 = tensor.insert_slice %w2 into %arg8[%i, %c0][%step, %step][1, 1] : tensor into tensor + %st22 = tensor.extract_slice %sti2[%i, %c0][%step, %step][1, 1] : tensor to tensor + %sti22 = tensor.insert_slice %st22 into %arg8[%i, %c0][%step, %step][1, 1] : tensor into tensor // CHECK: scf.yield {{.*}} : tensor, tensor, vector<1xf32> // CHECK: } @@ -416,7 +416,7 @@ // Hoisted // CHECK: %[[STI0:.*]] = vector.transfer_write %[[R]]#2, %[[ST0]]{{.*}} : vector<1xf32>, tensor - // CHECK: subtensor_insert %[[STI0]] into %[[TENSOR0_ARG]][%[[I]], %[[I]]]{{.*}} : tensor into tensor + // CHECK: tensor.insert_slice %[[STI0]] into %[[TENSOR0_ARG]][%[[I]], %[[I]]]{{.*}} : tensor into tensor // CHECK: scf.yield {{.*}} : tensor, tensor, tensor scf.yield %1#0, %1#1, %1#2 : diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -732,11 +732,11 @@ %prod = linalg.tiled_loop (%i) = (%c0) to (%c24) step (%c4) ins(%lhs_ = %lhs: tensor<24x64xi8>, %rhs_ = %rhs: tensor<24x64xi8>) outs(%out_ = %out: tensor<24x64xi8>) { - %lhs_sub = subtensor %lhs_[%i, 0] [%c4, %c64] [1, 1] + %lhs_sub = tensor.extract_slice %lhs_[%i, 0] [%c4, %c64] [1, 1] : tensor<24x64xi8> to tensor - %rhs_sub = subtensor %rhs_[%i, 0] [%c4, %c64] [1, 1] + %rhs_sub = tensor.extract_slice %rhs_[%i, 0] [%c4, %c64] [1, 1] : tensor<24x64xi8> to tensor - %out_sub = subtensor %out_[%i, 0] [%c4, %c64] [1, 1] + %out_sub = tensor.extract_slice %out_[%i, 0] [%c4, %c64] [1, 1] : tensor<24x64xi8> to tensor %sum = linalg.generic #trait_4 @@ -747,7 +747,7 @@ linalg.yield %s : i8 } -> tensor - %sum_sub = subtensor_insert %sum into %out_[%i, 0][%c4, %c64][1, 1] + %sum_sub = tensor.insert_slice %sum into %out_[%i, 0][%c4, %c64][1, 1] : tensor into tensor<24x64xi8> linalg.yield %sum_sub : tensor<24x64xi8> } @@ -792,13 +792,13 @@ outs(%o_ = %output: tensor<24xf32>) iterators["reduction", "parallel", "reduction"] distribution["block_x", "block_y", "none"] { - %sub_3d = subtensor %i3d_[%i, %j, %k][2, 4, 8][1, 1, 1] + %sub_3d = tensor.extract_slice %i3d_[%i, %j, %k][2, 4, 8][1, 1, 1] : tensor<16x24x32xf32> to tensor<2x4x8xf32> - %sub_2d = subtensor %i2d_[%i, %k][2, 8][1, 1] + %sub_2d = tensor.extract_slice %i2d_[%i, %k][2, 8][1, 1] : tensor<16x32xf32> to tensor<2x8xf32> - %sub_1d = subtensor %i1d_[%j] [4] [1] + %sub_1d = tensor.extract_slice %i1d_[%j] [4] [1] : tensor<24xf32> to tensor<4xf32> - %sub_out = subtensor %o_[%j] [4] [1] + %sub_out = tensor.extract_slice %o_[%j] [4] [1] : tensor<24xf32> to tensor<4xf32> %acc = linalg.generic #trait_5 ins(%sub_3d, %sub_2d, %sub_1d @@ -810,7 +810,7 @@ linalg.yield %1 : f32 } -> tensor<4xf32> - %sum_sub = subtensor_insert %acc into %o_[%j][%c4][1] + %sum_sub = tensor.insert_slice %acc into %o_[%j][%c4][1] : tensor<4xf32> into tensor<24xf32> linalg.yield %sum_sub : tensor<24xf32> } diff --git a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir --- a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir +++ b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir @@ -2,7 +2,7 @@ // CHECK-LABEL: @static_data_only( // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32> -// CHECK: %[[RESULT:.*]] = subtensor %[[ARG0]][1, 2] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32> +// CHECK: %[[RESULT:.*]] = tensor.extract_slice %[[ARG0]][1, 2] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32> // CHECK: return %[[RESULT]] func @static_data_only(%arg0 : tensor<4x5xf32>, %pad : f32) -> tensor<2x1xf32> { @@ -10,7 +10,7 @@ ^bb0(%arg1: index, %arg2: index): linalg.yield %pad : f32 } : tensor<4x5xf32> to tensor<11x13xf32> - %1 = subtensor %0[1, 2] [2, 1] [1, 1] : tensor<11x13xf32> to tensor<2x1xf32> + %1 = tensor.extract_slice %0[1, 2] [2, 1] [1, 1] : tensor<11x13xf32> to tensor<2x1xf32> return %1 : tensor<2x1xf32> } @@ -19,7 +19,7 @@ // CHECK-LABEL: @static_high_pad_only // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 // CHECK-NOT: linalg.pad_tensor -// CHECK-NOT: subtensor +// CHECK-NOT: tensor.extract_slice // CHECK: %[[RESULT:.*]] = tensor.generate // CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<2x4xf32> @@ -29,7 +29,7 @@ ^bb0(%arg1: index, %arg2: index): linalg.yield %pad : f32 } : tensor<4x5xf32> to tensor<11x13xf32> - %1 = subtensor %0[4, 5] [2, 4] [1, 1] : tensor<11x13xf32> to tensor<2x4xf32> + %1 = tensor.extract_slice %0[4, 5] [2, 4] [1, 1] : tensor<11x13xf32> to tensor<2x4xf32> return %1 : tensor<2x4xf32> } @@ -38,7 +38,7 @@ // CHECK-LABEL: @static_low_pad_only // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 // CHECK-NOT: linalg.pad_tensor -// CHECK-NOT: subtensor +// CHECK-NOT: tensor.extract_slice // CHECK: %[[RESULT:.*]] = tensor.generate // CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<2x3xf32> @@ -48,7 +48,7 @@ ^bb0(%arg1: index, %arg2: index): linalg.yield %pad : f32 } : tensor<4x5xf32> to tensor<14x20xf32> - %1 = subtensor %0[1, 3] [2, 3] [1, 1] : tensor<14x20xf32> to tensor<2x3xf32> + %1 = tensor.extract_slice %0[1, 3] [2, 3] [1, 1] : tensor<14x20xf32> to tensor<2x3xf32> return %1 : tensor<2x3xf32> } @@ -57,7 +57,7 @@ // CHECK-LABEL: @static_low_pad_only_2 // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 // CHECK-NOT: linalg.pad_tensor -// CHECK-NOT: subtensor +// CHECK-NOT: tensor.extract_slice // CHECK: %[[RESULT:.*]] = tensor.generate // CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<1x3xf32> @@ -67,7 +67,7 @@ ^bb0(%arg1: index, %arg2: index): linalg.yield %pad : f32 } : tensor<4x5xf32> to tensor<14x20xf32> - %1 = subtensor %0[1, 3] [1, 3] [1, 1] : tensor<14x20xf32> to tensor<1x3xf32> + %1 = tensor.extract_slice %0[1, 3] [1, 3] [1, 1] : tensor<14x20xf32> to tensor<1x3xf32> return %1 : tensor<1x3xf32> } @@ -76,7 +76,7 @@ // CHECK-LABEL: @static_mixed_data_high_pad // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 // CHECK-NOT: linalg.pad_tensor -// CHECK: %[[SUBTENSOR:.*]] = subtensor %[[ARG0]][2, 4] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32> +// CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][2, 4] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32> // CHECK: %[[RESULT:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[1, 3] // CHECK: linalg.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<3x4xf32> @@ -86,7 +86,7 @@ ^bb0(%arg1: index, %arg2: index): linalg.yield %pad : f32 } : tensor<4x5xf32> to tensor<11x13xf32> - %1 = subtensor %0[2, 4] [3, 4] [1, 1] : tensor<11x13xf32> to tensor<3x4xf32> + %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<11x13xf32> to tensor<3x4xf32> return %1 : tensor<3x4xf32> } @@ -95,7 +95,7 @@ // CHECK-LABEL: @static_mixed_data_low_pad // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 // CHECK-NOT: linalg.pad_tensor -// CHECK: %[[SUBTENSOR:.*]] = subtensor %[[ARG0]][0, 0] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32> +// CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32> // CHECK: %[[RESULT:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[1, 3] high[0, 0] // CHECK: linalg.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<3x4xf32> @@ -105,7 +105,7 @@ ^bb0(%arg1: index, %arg2: index): linalg.yield %pad : f32 } : tensor<4x5xf32> to tensor<14x20xf32> - %1 = subtensor %0[2, 4] [3, 4] [1, 1] : tensor<14x20xf32> to tensor<3x4xf32> + %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<14x20xf32> to tensor<3x4xf32> return %1 : tensor<3x4xf32> } @@ -123,7 +123,7 @@ ^bb0(%arg1: index, %arg2: index): linalg.yield %pad : f32 } : tensor<4x5xf32> to tensor<13x16xf32> - %1 = subtensor %0[1, 2] [7, 9] [1, 1] : tensor<13x16xf32> to tensor<7x9xf32> + %1 = tensor.extract_slice %0[1, 2] [7, 9] [1, 1] : tensor<13x16xf32> to tensor<7x9xf32> return %1 : tensor<7x9xf32> } @@ -138,7 +138,7 @@ // CHECK: %[[GEN:.*]] = tensor.generate // CHECK: scf.yield %[[GEN]] // CHECK: } else { -// CHECK: %[[SUBTENSOR:.*]] = subtensor %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor to tensor +// CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor to tensor // CHECK: %[[PADTENSOR:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3] // CHECK: %[[CAST:.*]] = tensor.cast %[[PADTENSOR]] : tensor to tensor<3x4xf32> // CHECK: scf.yield %[[CAST]] @@ -149,7 +149,7 @@ ^bb0(%arg1: index, %arg2: index): linalg.yield %pad : f32 } : tensor to tensor - %1 = subtensor %0[2, 4] [3, 4] [1, 1] : tensor to tensor<3x4xf32> + %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor to tensor<3x4xf32> return %1 : tensor<3x4xf32> } diff --git a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir --- a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir @@ -199,12 +199,12 @@ // CHECK: %[[STEPX:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSX]], %[[C8]]] // CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor) { // CHECK: %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor) { -// CHECK: %[[sTA:.*]] = subtensor %[[TA]][{{.*}}] : tensor to tensor -// CHECK: %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor to tensor -// CHECK: %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor to tensor +// CHECK: %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor to tensor +// CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor to tensor +// CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor to tensor // CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor, tensor) // CHECK-SAME: outs(%[[sTC]] : tensor) -> tensor -// CHECK: %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor +// CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor // CHECK: scf.yield %[[TD]] : tensor // CHECK: scf.yield %[[TD2]] : tensor // CHECK: scf.yield %[[TD1]] : tensor diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir @@ -16,11 +16,11 @@ %3 = scf.for %arg3 = %c0 to %0 step %c2 iter_args(%arg4 = %arg2) -> (tensor) { %4 = scf.for %arg5 = %c0 to %2 step %c3 iter_args(%arg6 = %arg4) -> (tensor) { %5 = scf.for %arg7 = %c0 to %1 step %c4 iter_args(%arg8 = %arg6) -> (tensor) { - %6 = subtensor %t0[%arg3, %arg7][%c2, 4][1, 1] : tensor to tensor - %7 = subtensor %arg1[%arg7, %arg5][4, %c3][1, 1] : tensor to tensor<4x?xf32> - %8 = subtensor %arg8[%arg3, %arg5][%c2, %c3][1, 1] : tensor to tensor + %6 = tensor.extract_slice %t0[%arg3, %arg7][%c2, 4][1, 1] : tensor to tensor + %7 = tensor.extract_slice %arg1[%arg7, %arg5][4, %c3][1, 1] : tensor to tensor<4x?xf32> + %8 = tensor.extract_slice %arg8[%arg3, %arg5][%c2, %c3][1, 1] : tensor to tensor %9 = linalg.matmul ins(%6, %7 : tensor, tensor<4x?xf32>) outs(%8 : tensor) -> tensor - %10 = subtensor_insert %9 into %arg8[%arg3, %arg5] [%c2, %c3] [1, 1] : tensor into tensor + %10 = tensor.insert_slice %9 into %arg8[%arg3, %arg5] [%c2, %c3] [1, 1] : tensor into tensor scf.yield %10 : tensor } scf.yield %5 : tensor @@ -48,22 +48,22 @@ // CHECK-DAG: %[[dC1:.*]] = memref.dim %[[C]], %[[C1]] : tensor // CHECK: scf.for %[[I:[0-9a-z]*]] // CHECK: %[[sizeA0:.*]] = affine.min #[[BOUND2_MAP]](%[[I]])[%[[dA0]]] -// CHECK: %[[stA:.*]] = subtensor %[[A]][%[[I]], 0] [%[[sizeA0]], %[[dA1]]] [1, 1] : tensor to tensor +// CHECK: %[[stA:.*]] = tensor.extract_slice %[[A]][%[[I]], 0] [%[[sizeA0]], %[[dA1]]] [1, 1] : tensor to tensor // CHECK: %[[sizeC0:.*]] = affine.min #[[BOUND2_MAP]](%[[I]])[%[[dC0]]] // CHECK-NEXT: scf.for %[[J:[0-9a-z]*]] // CHECK-NEXT: scf.for %[[K:[0-9a-z]*]] {{.*}} iter_args(%[[RES:[0-9a-z]*]] -// CHECK-DAG: %[[stB1:.*]] = subtensor %[[B]][%[[K]], %[[J]]] [4, 3] [1, 1] : tensor to tensor<4x3xf32> -// CHECK-DAG: %[[stF:.*]] = subtensor %[[RES]][%[[I]], %[[J]]] [2, 3] [1, 1] : tensor to tensor<2x3xf32> +// CHECK-DAG: %[[stB1:.*]] = tensor.extract_slice %[[B]][%[[K]], %[[J]]] [4, 3] [1, 1] : tensor to tensor<4x3xf32> +// CHECK-DAG: %[[stF:.*]] = tensor.extract_slice %[[RES]][%[[I]], %[[J]]] [2, 3] [1, 1] : tensor to tensor<2x3xf32> // -// subtensors of the producing matmul. +// slices of the producing matmul. // CHECK: %[[sizeB1:.*]] = affine.min #[[BOUND4_MAP]](%[[K]])[%[[dB1]]] -// CHECK: %[[stB2:.*]] = subtensor %[[B]][0, %[[K]]] [%[[dB0]], %[[sizeB1]]] [1, 1] : tensor to tensor +// CHECK: %[[stB2:.*]] = tensor.extract_slice %[[B]][0, %[[K]]] [%[[dB0]], %[[sizeB1]]] [1, 1] : tensor to tensor // CHECK: %[[sizeC1:.*]] = affine.min #[[BOUND4_MAP]](%[[K]])[%[[dC1]]] -// CHECK: %[[stC:.*]] = subtensor %[[C]][%[[I]], %[[K]]] [%[[sizeC0]], %[[sizeC1]]] [1, 1] : tensor to tensor +// CHECK: %[[stC:.*]] = tensor.extract_slice %[[C]][%[[I]], %[[K]]] [%[[sizeC0]], %[[sizeC1]]] [1, 1] : tensor to tensor // CHECK: %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor, tensor) outs(%[[stC]] : tensor) -> tensor // CHECK: %[[CAST:.*]] = tensor.cast %[[stD]] : tensor to tensor<2x4xf32> // CHECK-NEXT: %[[stG:.*]] = linalg.matmul ins(%[[CAST]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) outs(%[[stF]] : tensor<2x3xf32>) -> tensor<2x3xf32> -// CHECK-NEXT: subtensor_insert %[[stG]] into %[[RES]][%[[I]], %[[J]]] +// CHECK-NEXT: tensor.insert_slice %[[stG]] into %[[RES]][%[[I]], %[[J]]] // ----- @@ -87,9 +87,9 @@ %for0 = scf.for %iv0 = %c0 to %c112 step %c8 iter_args(%arg0 = %fill) -> tensor<1x112x112x32xf32> { %for1 = scf.for %iv1 = %c0 to %c112 step %c16 iter_args(%arg1 = %arg0) -> tensor<1x112x112x32xf32> { %for2 = scf.for %iv2 = %c0 to %c32 step %c4 iter_args(%arg2 = %arg1) -> tensor<1x112x112x32xf32> { - %0 = subtensor %conv[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> - %1 = subtensor %elementwise[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> - %2 = subtensor %arg2[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> + %0 = tensor.extract_slice %conv[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> + %1 = tensor.extract_slice %elementwise[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> + %2 = tensor.extract_slice %arg2[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> %add = linalg.generic { indexing_maps = [ @@ -104,7 +104,7 @@ linalg.yield %result : f32 } -> tensor<1x8x16x4xf32> - %insert = subtensor_insert %add into %arg2[0, %iv0, %iv1, %iv2] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x8x16x4xf32> into tensor<1x112x112x32xf32> + %insert = tensor.insert_slice %add into %arg2[0, %iv0, %iv1, %iv2] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x8x16x4xf32> into tensor<1x112x112x32xf32> scf.yield %insert : tensor<1x112x112x32xf32> } scf.yield %for2 : tensor<1x112x112x32xf32> @@ -127,19 +127,19 @@ // CHECK-NEXT: %[[OFFSET_H:.+]] = affine.apply #[[MAP0]](%[[IV0]]) // CHECK-NEXT: scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG1:.+]] = %[[ARG0]]) // CHECK-NEXT: %[[OFFSET_W:.+]] = affine.apply #[[MAP0]](%[[IV1]]) -// CHECK-NEXT: %[[ST_INPUT:.+]] = subtensor %arg0[0, %[[OFFSET_H]], %[[OFFSET_W]], 0] [1, 17, 33, 3] [1, 1, 1, 1] : tensor<1x225x225x3xf32> to tensor<1x17x33x3xf32> +// CHECK-NEXT: %[[ST_INPUT:.+]] = tensor.extract_slice %arg0[0, %[[OFFSET_H]], %[[OFFSET_W]], 0] [1, 17, 33, 3] [1, 1, 1, 1] : tensor<1x225x225x3xf32> to tensor<1x17x33x3xf32> // CHECK-NEXT: scf.for %[[IV2:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG2:.+]] = %[[ARG1]]) -// CHECK-NEXT: %[[ST_ELEM:.+]] = subtensor %[[ELEM]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> -// CHECK-NEXT: %[[ST_ARG2:.+]] = subtensor %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> -// CHECK-NEXT: %[[ST_FILTER:.+]] = subtensor %[[FILTER]][0, 0, 0, %[[IV2]]] [3, 3, 3, 4] [1, 1, 1, 1] : tensor<3x3x3x32xf32> to tensor<3x3x3x4xf32> -// CHECK-NEXT: %[[ST_FILL:.+]] = subtensor %[[FILL]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> +// CHECK-NEXT: %[[ST_ELEM:.+]] = tensor.extract_slice %[[ELEM]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> +// CHECK-NEXT: %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> +// CHECK-NEXT: %[[ST_FILTER:.+]] = tensor.extract_slice %[[FILTER]][0, 0, 0, %[[IV2]]] [3, 3, 3, 4] [1, 1, 1, 1] : tensor<3x3x3x32xf32> to tensor<3x3x3x4xf32> +// CHECK-NEXT: %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> // CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_input_nhwc_filter_hwcf // CHECK-SAME: ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor<1x17x33x3xf32>, tensor<3x3x3x4xf32>) // CHECK-SAME: outs(%[[ST_FILL]] : tensor<1x8x16x4xf32>) // CHECK-NEXT: %[[ADD:.+]] = linalg.generic // CHECK-SAME: ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>) // CHECK-SAME: outs(%[[ST_ARG2]] : tensor<1x8x16x4xf32>) -// CHECK: subtensor_insert %[[ADD]] into %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] +// CHECK: tensor.insert_slice %[[ADD]] into %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] // ----- @@ -174,9 +174,9 @@ %oh_size = affine.min affine_map<(d0)[s0] -> (16, -d0 + s0)>(%iv1)[%oh] %ow_size = affine.min affine_map<(d0)[s0] -> (4, -d0 + s0)>(%iv2)[%ow] %oc_size = affine.min affine_map<(d0)[s0] -> (2, -d0 + s0)>(%iv2)[%oc] - %0 = subtensor %conv[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor to tensor - %1 = subtensor %elementwise[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor to tensor - %2 = subtensor %arg3[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor to tensor + %0 = tensor.extract_slice %conv[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor to tensor + %1 = tensor.extract_slice %elementwise[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor to tensor + %2 = tensor.extract_slice %arg3[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor to tensor %add = linalg.generic { indexing_maps = [ @@ -191,7 +191,7 @@ linalg.yield %result : f32 } -> tensor - %insert = subtensor_insert %add into %arg3[%iv0, %iv1, %iv2, %iv3] [%n_size, %oh_size, %ow_size, %oc_size] [1, 1, 1, 1] : tensor into tensor + %insert = tensor.insert_slice %add into %arg3[%iv0, %iv1, %iv2, %iv3] [%n_size, %oh_size, %ow_size, %oc_size] [1, 1, 1, 1] : tensor into tensor scf.yield %insert : tensor } scf.yield %for3 : tensor @@ -257,19 +257,19 @@ // CHECK-NEXT: %[[SIZE_ELEM_OC:.+]] = affine.min #[[BOUND2_MAP]](%[[IV2]])[%[[ELEM_OC]]] // CHECK-NEXT: %[[OFFSET_OW:.+]] = affine.apply #[[X2_MAP]](%[[IV2]]) // CHECK-NEXT: %[[SIZE_INPUT_W:.+]] = affine.min #[[INPUT_BOUND]](%[[SIZE_ELEM_OW]], %[[IV2]])[%[[FILTER_W]], %[[INPUT_W]]] -// CHECK-NEXT: %[[ST_INPUT:.+]] = subtensor %[[INPUT]][%[[IV0]], %[[OFFSET_OH]], %[[OFFSET_OW]], 0] +// CHECK-NEXT: %[[ST_INPUT:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], %[[OFFSET_OH]], %[[OFFSET_OW]], 0] // CHECK-SAME: [%[[SIZE_INPUT_N]], %[[SIZE_INPUT_H]], %[[SIZE_INPUT_W]], %[[INPUT_C]]] // CHECK-NEXT: %[[SIZE_ELEM_OW_2:.+]] = affine.min #[[BOUND4_MAP_2]](%[[IV2]])[%[[FILL_W]], %[[ELEM_OW]]] // CHECK-NEXT: scf.for %[[IV3:.+]] = %{{.+}} to %[[ELEM_OC]] step %{{.+}} iter_args(%[[ARG:[a-z0-9]+]] -// CHECK-NEXT: %[[ST_ELEM:.+]] = subtensor %[[ELEM]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] +// CHECK-NEXT: %[[ST_ELEM:.+]] = tensor.extract_slice %[[ELEM]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] // CHECK-SAME: [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]] -// CHECK-NEXT: %[[ST_ARG:.+]] = subtensor %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] +// CHECK-NEXT: %[[ST_ARG:.+]] = tensor.extract_slice %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] // CHECK-SAME: [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]] // CHECK-NEXT: %[[SIZE_ELEM_OC_2:.+]] = affine.min #[[BOUND2_MAP_2]](%[[IV3]], %[[IV2]])[%[[FILTER_OC]], %[[ELEM_OC]]] -// CHECK-NEXT: %[[ST_FILTER:.+]] = subtensor %[[FILTER]][0, 0, 0, %[[IV3]]] +// CHECK-NEXT: %[[ST_FILTER:.+]] = tensor.extract_slice %[[FILTER]][0, 0, 0, %[[IV3]]] // CHECK-SAME: [%[[FILTER_H]], %[[FILTER_W]], %[[FILTER_IC]], %[[SIZE_ELEM_OC_2]]] // CHECK-NEXT: %[[SIZE_ELEM_OC_3:.+]] = affine.min #[[BOUND2_MAP_2]](%[[IV3]], %[[IV2]])[%[[FILL_C]], %[[ELEM_OC]]] -// CHECK-NEXT: %[[ST_FILL:.+]] = subtensor %[[FILL]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] +// CHECK-NEXT: %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] // CHECK-SAME: [%[[SIZE_ELEM_N_2]], %[[SIZE_ELEM_OH_2]], %[[SIZE_ELEM_OW_2]], %[[SIZE_ELEM_OC_3]]] // CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_input_nhwc_filter_hwcf // CHECK-SAME: ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor, tensor) @@ -277,7 +277,7 @@ // CHECK-NEXT: %[[ST_ADD:.+]] = linalg.generic // CHECK-SAME: ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor, tensor) // CHECK-SAME: outs(%[[ST_ARG]] : tensor) -// CHECK: subtensor_insert %[[ST_ADD]] into %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] +// CHECK: tensor.insert_slice %[[ST_ADD]] into %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] // CHECK-SAME: [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]] // ----- @@ -297,13 +297,13 @@ // CHECK: scf.if %[[HASZERO]] // CHECK: tensor.generate // CHECK: else -// CHECK: subtensor +// CHECK: tensor.extract_slice // CHECK: linalg.pad_tensor // CHECK: tensor.cast -// CHECK: subtensor -// CHECK: subtensor +// CHECK: tensor.extract_slice +// CHECK: tensor.extract_slice // CHECK: linalg.generic -// CHECK: subtensor_insert +// CHECK: tensor.insert_slice func @pad_generic_static(%small_input: tensor<58x1xf32>, %large_input: tensor<64x128xf32>) -> tensor<64x128xf32> { %c0 = constant 0 : index %c1 = constant 1 : index @@ -323,9 +323,9 @@ %for0 = scf.for %iv0 = %c0 to %d0 step %c16 iter_args(%arg0 = %fill) -> tensor<64x128xf32> { %for1 = scf.for %iv1 = %c0 to %d1 step %c32 iter_args(%arg1 = %arg0) -> tensor<64x128xf32> { - %0 = subtensor %pad[%iv0, %iv1][16, 32][1, 1] : tensor<64x128xf32> to tensor<16x32xf32> - %1 = subtensor %large_input[%iv0, %iv1][16, 32][1, 1] : tensor<64x128xf32> to tensor<16x32xf32> - %2 = subtensor %arg1[%iv0, %iv1][16, 32][1, 1] : tensor<64x128xf32> to tensor<16x32xf32> + %0 = tensor.extract_slice %pad[%iv0, %iv1][16, 32][1, 1] : tensor<64x128xf32> to tensor<16x32xf32> + %1 = tensor.extract_slice %large_input[%iv0, %iv1][16, 32][1, 1] : tensor<64x128xf32> to tensor<16x32xf32> + %2 = tensor.extract_slice %arg1[%iv0, %iv1][16, 32][1, 1] : tensor<64x128xf32> to tensor<16x32xf32> %add = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} @@ -335,7 +335,7 @@ linalg.yield %result : f32 } -> tensor<16x32xf32> - %insert = subtensor_insert %add into %arg1[%iv0, %iv1] [16, 32] [1, 1] : tensor<16x32xf32> into tensor<64x128xf32> + %insert = tensor.insert_slice %add into %arg1[%iv0, %iv1] [16, 32] [1, 1] : tensor<16x32xf32> into tensor<64x128xf32> scf.yield %insert : tensor<64x128xf32> } scf.yield %for1 : tensor<64x128xf32> diff --git a/mlir/test/Dialect/Linalg/tile-and-pad-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-pad-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-and-pad-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-pad-tensors.mlir @@ -12,9 +12,9 @@ // CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor) { // CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor) { // CHECK: %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor) { -// CHECK: %[[sTA:.*]] = subtensor %[[TA]][{{.*}}] : tensor to tensor -// CHECK: %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor to tensor -// CHECK: %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor to tensor +// CHECK: %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor to tensor +// CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor to tensor +// CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor to tensor // Dynamic op has been canonicalized away. // CHECK-NOT: linalg.matmul {{.*}} tensor @@ -28,8 +28,8 @@ // CHECK: : tensor to tensor<2x3xi32> // CHECK: %[[pD:.*]] = linalg.matmul_i8_i8_i32 ins(%[[pA]], %[[pB]] : tensor<2x4xi8>, tensor<4x3xi8>) // CHECK-SAME: outs(%[[pC]] : tensor<2x3xi32>) -> tensor<2x3xi32> -// CHECK: %[[sTD:.*]] = subtensor %[[pD]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : tensor<2x3xi32> to tensor -// CHECK: %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor +// CHECK: %[[sTD:.*]] = tensor.extract_slice %[[pD]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : tensor<2x3xi32> to tensor +// CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor // CHECK: scf.yield %[[TD]] : tensor // CHECK: scf.yield %[[TD2]] : tensor // CHECK: scf.yield %[[TD1]] : tensor @@ -52,15 +52,15 @@ // CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor) { // CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor) { // CHECK: %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor) { -// CHECK: %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor to tensor +// CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor to tensor // Padding injects static information. // CHECK: %[[pC:.*]] = linalg.pad_tensor %[[sTC]] low[%[[C0]], %[[C0]], %[[C0]]] high[%{{.*}}, %{{.*}}, %{{.*}}] // CHECK: : tensor to tensor<2x3x4xf32> // CHECK: %[[pD:.*]] = linalg.generic // CHECK-SAME: ins(%[[VAL]] : f32) outs(%[[pC]] : tensor<2x3x4xf32>) -// CHECK: %[[sTD:.*]] = subtensor %[[pD]][0, 0, 0] [%{{.*}}, %{{.*}}, %{{.*}}] [1, 1, 1] : tensor<2x3x4xf32> to tensor -// CHECK: %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor +// CHECK: %[[sTD:.*]] = tensor.extract_slice %[[pD]][0, 0, 0] [%{{.*}}, %{{.*}}, %{{.*}}] [1, 1, 1] : tensor<2x3x4xf32> to tensor +// CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor // CHECK: scf.yield %[[TD]] : tensor // CHECK: scf.yield %[[TD2]] : tensor // CHECK: scf.yield %[[TD1]] : tensor @@ -104,11 +104,11 @@ // CHECK-1DIM-TILE: %[[C0:.*]] = constant 0 : index // CHECK-1DIM-TILE: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor) { // CHECK-1DIM-TILE: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor) { -// CHECK-1DIM-TILE: %[[sTA:.*]] = subtensor %[[TA]][{{.*}}] : tensor to tensor +// CHECK-1DIM-TILE: %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor to tensor // CHECK-1DIM-TILE: %[[sTAc:.*]] = tensor.cast %[[sTA]] : tensor to tensor -// CHECK-1DIM-TILE: %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor<8x?xi8> to tensor<8x?xi8> +// CHECK-1DIM-TILE: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<8x?xi8> to tensor<8x?xi8> // CHECK-1DIM-TILE: %[[sTBc:.*]] = tensor.cast %[[sTB]] : tensor<8x?xi8> to tensor -// CHECK-1DIM-TILE: %[[sTC:.*]] = subtensor %[[TC1]][{{.*}}] : tensor to tensor +// CHECK-1DIM-TILE: %[[sTC:.*]] = tensor.extract_slice %[[TC1]][{{.*}}] : tensor to tensor // CHECK-1DIM-TILE: %[[pA:.*]] = linalg.pad_tensor %[[sTAc]] low[%[[C0]], %[[C0]]] high[%{{.*}}, %{{.*}}] // CHECK-1DIM-TILE: : tensor to tensor<2x8xi8> // CHECK-1DIM-TILE: %[[pB:.*]] = linalg.pad_tensor %[[sTBc]] low[%[[C0]], %[[C0]]] high[%{{.*}}, %{{.*}}] diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir @@ -11,12 +11,12 @@ // CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor) { // CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor) { // CHECK: %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor) { -// CHECK: %[[sTA:.*]] = subtensor %[[TA]][{{.*}}] : tensor to tensor -// CHECK: %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor to tensor -// CHECK: %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor to tensor +// CHECK: %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor to tensor +// CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor to tensor +// CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor to tensor // CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor, tensor) // CHECK-SAME: outs(%[[sTC]] : tensor) -> tensor -// CHECK: %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor +// CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor // CHECK: scf.yield %[[TD]] : tensor // CHECK: scf.yield %[[TD2]] : tensor // CHECK: scf.yield %[[TD1]] : tensor @@ -51,14 +51,14 @@ // TLOOP-SAME: iterators["parallel", "parallel", "reduction"] // TLOOP-SAME: distribution["block_x", "block_y", "none"] { -// TLOOP: %[[SUB_ARG_0:.*]] = subtensor %[[A0]][%[[I]], %[[K]]] -// TLOOP: %[[SUB_ARG_1:.*]] = subtensor %[[A1]][%[[K]], %[[J]]] -// TLOOP: %[[SUB_ARG_2:.*]] = subtensor %[[A2]][%[[I]], %[[J]]] +// TLOOP: %[[SUB_ARG_0:.*]] = tensor.extract_slice %[[A0]][%[[I]], %[[K]]] +// TLOOP: %[[SUB_ARG_1:.*]] = tensor.extract_slice %[[A1]][%[[K]], %[[J]]] +// TLOOP: %[[SUB_ARG_2:.*]] = tensor.extract_slice %[[A2]][%[[I]], %[[J]]] // TLOOP: %[[PROD:.*]] = linalg.matmul ins(%[[SUB_ARG_0]], %[[SUB_ARG_1]] // TLOOP-SE: outs(%[[SUB_ARG_2]] : [[TY]]) -> [[TY]] -// TLOOP: %[[O:.*]] = subtensor_insert %[[PROD]] into %[[A2]][%[[I]], %[[J]]] +// TLOOP: %[[O:.*]] = tensor.insert_slice %[[PROD]] into %[[A2]][%[[I]], %[[J]]] // TLOOP: linalg.yield %[[O]] : [[TY]] // ----- @@ -93,13 +93,13 @@ // CHECK: %[[TD0:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC0:.+]] = %[[INIT]]) -> (tensor) { // CHECK: %[[TD1:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC1:.+]] = %[[TC0]]) -> (tensor) { // CHECK: %[[TD2:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC2:.+]] = %[[TC1]]) -> (tensor) { -// CHECK: %[[STARG0:.+]] = subtensor %[[ARG0]][{{.+}}] : tensor to tensor -// CHECK: %[[STARG1:.+]] = subtensor %[[ARG1]][{{.+}}] : tensor to tensor -// CHECK: %[[STARG2:.+]] = subtensor %[[TC2]][{{.+}}] : tensor to tensor +// CHECK: %[[STARG0:.+]] = tensor.extract_slice %[[ARG0]][{{.+}}] : tensor to tensor +// CHECK: %[[STARG1:.+]] = tensor.extract_slice %[[ARG1]][{{.+}}] : tensor to tensor +// CHECK: %[[STARG2:.+]] = tensor.extract_slice %[[TC2]][{{.+}}] : tensor to tensor // CHECK: %[[STRETURN:.+]] = linalg.generic // CHECK-SAME: ins(%[[STARG0]], %[[STARG1]] : tensor, tensor) // CHECK-SAME: outs(%[[STARG2]] : tensor) -// CHECK: %[[TD:.+]] = subtensor_insert %[[STRETURN]] into %[[TC2]] +// CHECK: %[[TD:.+]] = tensor.insert_slice %[[STRETURN]] into %[[TC2]] // CHECK: scf.yield %[[TD]] // CHECK: } // CHECK: scf.yield %[[TD2]] diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -586,7 +586,7 @@ // CHECK: %[[INIT:.*]] = linalg.init_tensor [6, %[[V1]], %[[V2]], %[[V5]]] : tensor<6x?x?x?xf32> // CHECK: %[[FILL:.*]] = linalg.fill(%[[INIT]], %{{.*}}) : tensor<6x?x?x?xf32>, f32 -> tensor<6x?x?x?xf32> // CHECK: %[[SRCDIM:.*]] = memref.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32> -// CHECK: %[[RESULT:.*]] = subtensor_insert %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32> +// CHECK: %[[RESULT:.*]] = tensor.insert_slice %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32> // CHECK: return %[[RESULT]] func @pad_static_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index, %pad_value: f32) -> tensor<6x?x?x?xf32> { @@ -638,7 +638,7 @@ } : tensor<5x6xf32> to tensor<10x13xf32> %1 = vector.transfer_write %arg1, %0[%c0, %c0] : vector<7x9xf32>, tensor<10x13xf32> - %2 = subtensor %1[0, 0] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32> + %2 = tensor.extract_slice %1[0, 0] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32> return %2 : tensor<5x6xf32> } @@ -648,14 +648,14 @@ // CHECK-SAME: %[[ARG0:.*]]: tensor, %[[ARG1:.*]]: vector<7x9xf32>, %[[SIZE:.*]]: index, %[[PADDING:.*]]: index // CHECK-NOT: linalg.pad_tensor // CHECK: %[[C0:.*]] = constant 0 : index -// CHECK: %[[SUB:.*]] = subtensor %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor to tensor +// CHECK: %[[SUB:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor to tensor // CHECK: %[[RESULT:.*]] = vector.transfer_write %[[ARG1]], %[[SUB]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor // CHECK: return %[[RESULT]] func @pad_and_transfer_write_dynamic_static( %arg0: tensor, %arg1: vector<7x9xf32>, %size: index, %padding: index) -> tensor { %c0 = constant 0 : index %c5 = constant 5.0 : f32 - %s = subtensor %arg0[0, 0] [%size, 6] [1, 1] + %s = tensor.extract_slice %arg0[0, 0] [%size, 6] [1, 1] : tensor to tensor %0 = linalg.pad_tensor %s low[0, 0] high[%padding, 7] { ^bb0(%arg2: index, %arg3: index): @@ -663,13 +663,13 @@ } : tensor to tensor %1 = vector.transfer_write %arg1, %0[%c0, %c0] : vector<7x9xf32>, tensor - %2 = subtensor %1[0, 0] [%size, 6] [1, 1] : tensor to tensor + %2 = tensor.extract_slice %1[0, 0] [%size, 6] [1, 1] : tensor to tensor return %2 : tensor } // ----- -// CHECK-LABEL: func @pad_and_subtensor_insert +// CHECK-LABEL: func @pad_and_insert_slice // CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32>, %[[ARG1:.*]]: tensor<12x13xf32> // CHECK-NOT: linalg.pad_tensor // CHECK-DAG: %[[C0:.*]] = constant 0 : index @@ -677,7 +677,7 @@ // CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32> // CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[ARG1]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<7x9xf32>, tensor<12x13xf32> // CHECK: return %[[WRITE]] -func @pad_and_subtensor_insert( +func @pad_and_insert_slice( %arg0: tensor<5x6xf32>, %arg1: tensor<12x13xf32>) -> tensor<12x13xf32> { %c0 = constant 0 : index %c5 = constant 5.0 : f32 @@ -685,7 +685,7 @@ ^bb0(%arg2: index, %arg3: index): linalg.yield %c5 : f32 } : tensor<5x6xf32> to tensor<7x9xf32> - %r = subtensor_insert %0 into %arg1[0, 0][7, 9][1, 1] : tensor<7x9xf32> into tensor<12x13xf32> + %r = tensor.insert_slice %0 into %arg1[0, 0][7, 9][1, 1] : tensor<7x9xf32> into tensor<12x13xf32> return %r : tensor<12x13xf32> } diff --git a/mlir/test/Dialect/MemRef/canonicalize.mlir b/mlir/test/Dialect/MemRef/canonicalize.mlir --- a/mlir/test/Dialect/MemRef/canonicalize.mlir +++ b/mlir/test/Dialect/MemRef/canonicalize.mlir @@ -367,27 +367,3 @@ %1 = memref.buffer_cast %0 : memref return %1 : memref } - -// ----- - -// TODO: Move this test to Tensor/canonicalize.mlir. -func @subtensor_insert_propagate_dest_cast(%arg0 : tensor<2x?xi32>, %arg1 : tensor, - %arg2 : index, %arg3 : index) -> tensor { - %c0 = constant 0 : index - %c1 = constant 1 : index - %c2 = constant 2 : index - %c8 = constant 8 : index - %0 = memref.dim %arg0, %c1 : tensor<2x?xi32> - %1 = tensor.extract %arg1[] : tensor - %2 = tensor.generate %arg2, %c8 { - ^bb0(%arg4: index, %arg5: index): - tensor.yield %1 : i32 - } : tensor - %3 = subtensor_insert %arg0 into %2[%c0, %arg3] [%c2, %0] [%c1, %c1] : tensor<2x?xi32> into tensor - return %3 : tensor -} -// CHECK-LABEL: func @subtensor_insert_propagate_dest_cast -// CHECK: %[[UPDATED:.+]] = subtensor_insert %{{.+}} into %{{.+}}[0, %{{.+}}] [2, %{{.+}}] [1, 1] -// CHECK-SAME: tensor<2x?xi32> into tensor -// CHECK: %[[CAST:.+]] = tensor.cast %[[UPDATED]] -// CHECK: return %[[CAST]] diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir --- a/mlir/test/Dialect/SCF/canonicalize.mlir +++ b/mlir/test/Dialect/SCF/canonicalize.mlir @@ -659,10 +659,10 @@ scf.yield %2 : tensor } // CHECK-NOT: tensor.cast -// CHECK: %[[RES:.*]] = subtensor_insert %[[FOR_RES]] into %[[T1]][0, 0] [32, 1024] [1, 1] : tensor<32x1024xf32> into tensor<1024x1024xf32> +// CHECK: %[[RES:.*]] = tensor.insert_slice %[[FOR_RES]] into %[[T1]][0, 0] [32, 1024] [1, 1] : tensor<32x1024xf32> into tensor<1024x1024xf32> // CHECK: return %[[RES]] : tensor<1024x1024xf32> %2 = tensor.cast %1 : tensor to tensor<32x1024xf32> - %res = subtensor_insert %2 into %t1[0, 0] [32, 1024] [1, 1] : tensor<32x1024xf32> into tensor<1024x1024xf32> + %res = tensor.insert_slice %2 into %t1[0, 0] [32, 1024] [1, 1] : tensor<32x1024xf32> into tensor<1024x1024xf32> return %res : tensor<1024x1024xf32> } diff --git a/mlir/test/Dialect/Standard/canonicalize.mlir b/mlir/test/Dialect/Standard/canonicalize.mlir --- a/mlir/test/Dialect/Standard/canonicalize.mlir +++ b/mlir/test/Dialect/Standard/canonicalize.mlir @@ -24,202 +24,6 @@ // ----- -func @subtensor_canonicalize(%arg0 : tensor, %arg1 : index, - %arg2 : index) -> tensor -{ - %c0 = constant 0 : index - %c1 = constant 1 : index - %c4 = constant 4 : index - %0 = subtensor %arg0[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor to tensor - return %0 : tensor -} -// CHECK-LABEL: func @subtensor_canonicalize -// CHECK-SAME: %[[ARG0:.+]]: tensor -// CHECK: %[[SUBTENSOR:.+]] = subtensor %[[ARG0]][0, %{{[a-zA-Z0-9_]+}}, 1] -// CHECK-SAME: [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1] -// CHECK-SAME: : tensor to tensor<4x1x?xf32> -// CHECK: %[[RESULT:.+]] = tensor.cast %[[SUBTENSOR]] -// CHEKC: return %[[RESULT]] - -// ----- - -func @rank_reducing_subtensor_canonicalize(%arg0 : tensor, %arg1 : index, - %arg2 : index) -> tensor -{ - %c0 = constant 0 : index - %c1 = constant 1 : index - %c4 = constant 4 : index - %0 = subtensor %arg0[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor to tensor - return %0 : tensor -} -// CHECK-LABEL: func @rank_reducing_subtensor_canonicalize -// CHECK-SAME: %[[ARG0:.+]]: tensor -// CHECK: %[[SUBTENSOR:.+]] = subtensor %[[ARG0]][0, %{{[a-zA-Z0-9_]+}}, 1] -// CHECK-SAME: [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1] -// CHECK-SAME: : tensor to tensor<4x?xf32> -// CHECK: %[[RESULT:.+]] = tensor.cast %[[SUBTENSOR]] -// CHEKC: return %[[RESULT]] - -// ----- - -// CHECK-LABEL: func @trivial_subtensor -// CHECK-SAME: %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8> -// CHECK-NOT: subtensor -// CHECK: return %[[ARG0]] : tensor<4x6x16x32xi8> -func @trivial_subtensor(%arg0 : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> { - %0 = subtensor %arg0[0, 0, 0, 0] [4, 6, 16, 32] [1, 1, 1, 1] : tensor<4x6x16x32xi8> to tensor<4x6x16x32xi8> - return %0 : tensor<4x6x16x32xi8> -} - -// ----- - -// CHECK-LABEL: func @trivial_subtensor_insert -// CHECK-SAME: %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8> -// CHECK-NOT: subtensor -// CHECK: return %[[ARG0]] : tensor<4x6x16x32xi8> -func @trivial_subtensor_insert(%arg0 : tensor<4x6x16x32xi8>, %arg1 : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> { - %0 = subtensor_insert %arg0 into %arg1[0, 0, 0, 0] [4, 6, 16, 32] [1, 1, 1, 1] : tensor<4x6x16x32xi8> into tensor<4x6x16x32xi8> - return %0 : tensor<4x6x16x32xi8> -} - -// ----- - -// CHECK-LABEL: func @rank_reducing_tensor_of_cast -// CHECK-SAME: %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8> -// CHECK: %[[S:.+]] = subtensor %arg0[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<4x6x16x32xi8> to tensor<16x32xi8> -// Tensor cast is moved after subtensor and then gets canonicalized away. -// CHECK-NOT: tensor.cast -// CHECK: return %[[S]] : tensor<16x32xi8> -func @rank_reducing_tensor_of_cast(%arg : tensor<4x6x16x32xi8>) -> tensor<16x32xi8> { - %0 = tensor.cast %arg : tensor<4x6x16x32xi8> to tensor - %1 = subtensor %0[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor to tensor<16x32xi8> - return %1 : tensor<16x32xi8> -} - -// ----- - -// CHECK-LABEL: func @rank_reducing_subtensor_insert_of_cast -// CHECK-SAME: %[[A:.[a-z0-9A-Z_]+]]: tensor<16x32xi8> -// CHECK-SAME: %[[B:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8> -// CHECK: %[[S:.+]] = subtensor_insert %[[A]] into %[[B]][0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<16x32xi8> into tensor<4x6x16x32xi8> -// Tensor cast is folded away. -// CHECK-NOT: tensor.cast -// CHECK: return %[[S]] : tensor<4x6x16x32xi8> -func @rank_reducing_subtensor_insert_of_cast(%a : tensor<16x32xi8>, %b : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> { - %cast = tensor.cast %a : tensor<16x32xi8> to tensor - %res = subtensor_insert %cast into %b[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor into tensor<4x6x16x32xi8> - return %res : tensor<4x6x16x32xi8> -} - -// ----- - -func @subtensor_insert_canonicalize(%arg0 : tensor, %arg1 : index, - %arg2 : index, %arg3 : tensor) -> tensor -{ - %c0 = constant 0 : index - %c1 = constant 1 : index - %c4 = constant 4 : index - %0 = subtensor_insert %arg0 into %arg3[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor into tensor - return %0 : tensor -} -// CHECK-LABEL: func @subtensor_insert_canonicalize -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor -// CHECK: %[[RESULT:.+]] = subtensor_insert %[[ARG0]] -// CHECK-SAME: [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1] -// CHECK-SAME: : tensor into tensor -// CHEKC: return %[[RESULT]] - -// ----- - -func @subtensor_to_subtensor_insert_canonicalize(%arg0 : tensor, %arg1 : index, - %arg2 : index, %arg3 : tensor) -> tensor -{ - %c0 = constant 0 : index - %c1 = constant 1 : index - %c4 = constant 4 : index - %0 = subtensor %arg0[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor to tensor - %1 = subtensor_insert %0 into %arg3[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor into tensor - return %1 : tensor -} -// CHECK-LABEL: func @subtensor_to_subtensor_insert_canonicalize -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor -// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: tensor -// CHECK: %[[SUBTENSOR:.+]] = subtensor %[[ARG0]] -// CHECK-SAME: [0, %{{.+}}, 1] [4, 1, %{{.+}} [1, 1, 1] -// CHECK-SAME: : tensor to tensor<4x1x?xf32> -// CHECK: %[[RESULT:.+]] = subtensor_insert %[[SUBTENSOR]] -// CHECK-SAME: [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1] -// CHECK-SAME: : tensor<4x1x?xf32> into tensor -// CHEKC: return %[[RESULT]] - -// ----- - -func @rank_reducing_subtensor_insert_canonicalize(%arg0 : tensor, %arg1 : index, - %arg2 : index, %arg3 : tensor) -> tensor -{ - %c0 = constant 0 : index - %c1 = constant 1 : index - %c4 = constant 4 : index - %0 = subtensor_insert %arg0 into %arg3[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor into tensor - return %0 : tensor -} -// CHECK-LABEL: func @rank_reducing_subtensor_insert_canonicalize -// CHECK-SAME: %[[ARG0:.+]]: tensor -// CHECK: %[[RESULT:.+]] = subtensor_insert %[[ARG0]] -// CHECK-SAME: [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1] -// CHECK-SAME: : tensor into tensor -// CHEKC: return %[[RESULT]] - -// ----- - -func @rank_reducing_subtensor_to_subtensor_insert_canonicalize(%arg0 : tensor, %arg1 : index, - %arg2 : index, %arg3 : tensor) -> tensor -{ - %c0 = constant 0 : index - %c1 = constant 1 : index - %c4 = constant 4 : index - %0 = subtensor %arg0[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor to tensor - %1 = subtensor_insert %0 into %arg3[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor into tensor - return %1 : tensor -} -// CHECK-LABEL: func @rank_reducing_subtensor_to_subtensor_insert_canonicalize -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor -// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: tensor -// CHECK: %[[SUBTENSOR:.+]] = subtensor %[[ARG0]] -// CHECK-SAME: [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1] -// CHECK-SAME: : tensor to tensor<4x?xf32> -// CHECK: %[[RESULT:.+]] = subtensor_insert %[[SUBTENSOR]] into %[[ARG3]] -// CHECK-SAME: [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1] -// CHECK-SAME: : tensor<4x?xf32> into tensor -// CHEKC: return %[[RESULT]] - -// ----- - -func @subtensor_insert_output_dest_canonicalize(%arg0 : tensor<2x3xi32>, %arg1 : tensor) -> tensor<3x9xi32> { - %c0 = constant 0 : index - %c1 = constant 1 : index - %c2 = constant 2 : index - %c9 = constant 9 : index - %c3 = constant 3 : index - %2 = tensor.extract %arg1[] : tensor - %4 = tensor.generate %c3, %c9 { - ^bb0(%arg2: index, %arg3: index): - tensor.yield %2 : i32 - } : tensor - %5 = subtensor_insert %arg0 into %4[%c0, %c1] [%c2, %c3] [1, 1] : tensor<2x3xi32> into tensor - %6 = tensor.cast %5 : tensor to tensor<3x9xi32> - return %6 : tensor<3x9xi32> -} -// CHECK-LABEL: func @subtensor_insert_output_dest_canonicalize -// CHECK-SAME: %[[ARG0:[a-zA-z0-9_]+]]: tensor<2x3xi32> -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor -// CHECK: %[[PAD:.+]] = tensor.extract %[[ARG1]] -// CHECK: %[[GENERATE:.+]] = tensor.generate -// CHECK: %[[RESULT:.+]] = subtensor_insert %[[ARG0]] into %[[GENERATE]] -// CHECK: return %[[RESULT]] - -// ----- - // CHECK-LABEL: @select_same_val // CHECK: return %arg1 func @select_same_val(%arg0: i1, %arg1: i64) -> i64 { diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir --- a/mlir/test/Dialect/Tensor/canonicalize.mlir +++ b/mlir/test/Dialect/Tensor/canonicalize.mlir @@ -263,3 +263,222 @@ %tensor = tensor.from_elements %c1, %c2, %c1 : tensor<3xindex> return %tensor : tensor<3xindex> } + +// ----- + +func @slice_canonicalize(%arg0 : tensor, %arg1 : index, + %arg2 : index) -> tensor +{ + %c0 = constant 0 : index + %c1 = constant 1 : index + %c4 = constant 4 : index + %0 = tensor.extract_slice %arg0[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor to tensor + return %0 : tensor +} +// CHECK-LABEL: func @slice_canonicalize +// CHECK-SAME: %[[ARG0:.+]]: tensor +// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]][0, %{{[a-zA-Z0-9_]+}}, 1] +// CHECK-SAME: [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1] +// CHECK-SAME: : tensor to tensor<4x1x?xf32> +// CHECK: %[[RESULT:.+]] = tensor.cast %[[SLICE]] +// CHEKC: return %[[RESULT]] + +// ----- + +func @rank_reducing_slice_canonicalize(%arg0 : tensor, %arg1 : index, + %arg2 : index) -> tensor +{ + %c0 = constant 0 : index + %c1 = constant 1 : index + %c4 = constant 4 : index + %0 = tensor.extract_slice %arg0[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor to tensor + return %0 : tensor +} +// CHECK-LABEL: func @rank_reducing_slice_canonicalize +// CHECK-SAME: %[[ARG0:.+]]: tensor +// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]][0, %{{[a-zA-Z0-9_]+}}, 1] +// CHECK-SAME: [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1] +// CHECK-SAME: : tensor to tensor<4x?xf32> +// CHECK: %[[RESULT:.+]] = tensor.cast %[[SLICE]] +// CHEKC: return %[[RESULT]] + +// ----- + +// CHECK-LABEL: func @trivial_slice +// CHECK-SAME: %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8> +// CHECK-NOT: tensor.extract_slice +// CHECK: return %[[ARG0]] : tensor<4x6x16x32xi8> +func @trivial_slice(%arg0 : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> { + %0 = tensor.extract_slice %arg0[0, 0, 0, 0] [4, 6, 16, 32] [1, 1, 1, 1] : tensor<4x6x16x32xi8> to tensor<4x6x16x32xi8> + return %0 : tensor<4x6x16x32xi8> +} + +// ----- + +// CHECK-LABEL: func @trivial_insert_slice +// CHECK-SAME: %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8> +// CHECK-NOT: tensor.extract_slice +// CHECK: return %[[ARG0]] : tensor<4x6x16x32xi8> +func @trivial_insert_slice(%arg0 : tensor<4x6x16x32xi8>, %arg1 : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> { + %0 = tensor.insert_slice %arg0 into %arg1[0, 0, 0, 0] [4, 6, 16, 32] [1, 1, 1, 1] : tensor<4x6x16x32xi8> into tensor<4x6x16x32xi8> + return %0 : tensor<4x6x16x32xi8> +} + +// ----- + +// CHECK-LABEL: func @rank_reducing_tensor_of_cast +// CHECK-SAME: %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8> +// CHECK: %[[S:.+]] = tensor.extract_slice %arg0[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<4x6x16x32xi8> to tensor<16x32xi8> +// Tensor cast is moved after slice and then gets canonicalized away. +// CHECK-NOT: tensor.cast +// CHECK: return %[[S]] : tensor<16x32xi8> +func @rank_reducing_tensor_of_cast(%arg : tensor<4x6x16x32xi8>) -> tensor<16x32xi8> { + %0 = tensor.cast %arg : tensor<4x6x16x32xi8> to tensor + %1 = tensor.extract_slice %0[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor to tensor<16x32xi8> + return %1 : tensor<16x32xi8> +} + +// ----- + +// CHECK-LABEL: func @rank_reducing_insert_slice_of_cast +// CHECK-SAME: %[[A:.[a-z0-9A-Z_]+]]: tensor<16x32xi8> +// CHECK-SAME: %[[B:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8> +// CHECK: %[[S:.+]] = tensor.insert_slice %[[A]] into %[[B]][0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<16x32xi8> into tensor<4x6x16x32xi8> +// Tensor cast is folded away. +// CHECK-NOT: tensor.cast +// CHECK: return %[[S]] : tensor<4x6x16x32xi8> +func @rank_reducing_insert_slice_of_cast(%a : tensor<16x32xi8>, %b : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> { + %cast = tensor.cast %a : tensor<16x32xi8> to tensor + %res = tensor.insert_slice %cast into %b[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor into tensor<4x6x16x32xi8> + return %res : tensor<4x6x16x32xi8> +} + +// ----- + +func @insert_slice_canonicalize(%arg0 : tensor, %arg1 : index, + %arg2 : index, %arg3 : tensor) -> tensor +{ + %c0 = constant 0 : index + %c1 = constant 1 : index + %c4 = constant 4 : index + %0 = tensor.insert_slice %arg0 into %arg3[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor into tensor + return %0 : tensor +} +// CHECK-LABEL: func @insert_slice_canonicalize +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor +// CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[ARG0]] +// CHECK-SAME: [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1] +// CHECK-SAME: : tensor into tensor +// CHEKC: return %[[RESULT]] + +// ----- + +func @slice_to_insert_slice_canonicalize(%arg0 : tensor, %arg1 : index, + %arg2 : index, %arg3 : tensor) -> tensor +{ + %c0 = constant 0 : index + %c1 = constant 1 : index + %c4 = constant 4 : index + %0 = tensor.extract_slice %arg0[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor to tensor + %1 = tensor.insert_slice %0 into %arg3[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor into tensor + return %1 : tensor +} +// CHECK-LABEL: func @slice_to_insert_slice_canonicalize +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor +// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: tensor +// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]] +// CHECK-SAME: [0, %{{.+}}, 1] [4, 1, %{{.+}} [1, 1, 1] +// CHECK-SAME: : tensor to tensor<4x1x?xf32> +// CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[SLICE]] +// CHECK-SAME: [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1] +// CHECK-SAME: : tensor<4x1x?xf32> into tensor +// CHEKC: return %[[RESULT]] + +// ----- + +func @rank_reducing_insert_slice_canonicalize(%arg0 : tensor, %arg1 : index, + %arg2 : index, %arg3 : tensor) -> tensor +{ + %c0 = constant 0 : index + %c1 = constant 1 : index + %c4 = constant 4 : index + %0 = tensor.insert_slice %arg0 into %arg3[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor into tensor + return %0 : tensor +} +// CHECK-LABEL: func @rank_reducing_insert_slice_canonicalize +// CHECK-SAME: %[[ARG0:.+]]: tensor +// CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[ARG0]] +// CHECK-SAME: [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1] +// CHECK-SAME: : tensor into tensor +// CHEKC: return %[[RESULT]] + +// ----- + +func @rank_reducing_slice_to_insert_slice_canonicalize(%arg0 : tensor, %arg1 : index, + %arg2 : index, %arg3 : tensor) -> tensor +{ + %c0 = constant 0 : index + %c1 = constant 1 : index + %c4 = constant 4 : index + %0 = tensor.extract_slice %arg0[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor to tensor + %1 = tensor.insert_slice %0 into %arg3[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor into tensor + return %1 : tensor +} +// CHECK-LABEL: func @rank_reducing_slice_to_insert_slice_canonicalize +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor +// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: tensor +// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]] +// CHECK-SAME: [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1] +// CHECK-SAME: : tensor to tensor<4x?xf32> +// CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[SLICE]] into %[[ARG3]] +// CHECK-SAME: [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1] +// CHECK-SAME: : tensor<4x?xf32> into tensor +// CHEKC: return %[[RESULT]] + +// ----- + +func @insert_slice_propagate_dest_cast(%arg0 : tensor<2x?xi32>, %arg1 : tensor, + %arg2 : index, %arg3 : index) -> tensor { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index + %c8 = constant 8 : index + %0 = memref.dim %arg0, %c1 : tensor<2x?xi32> + %1 = tensor.extract %arg1[] : tensor + %2 = tensor.generate %arg2, %c8 { + ^bb0(%arg4: index, %arg5: index): + tensor.yield %1 : i32 + } : tensor + %3 = tensor.insert_slice %arg0 into %2[%c0, %arg3] [%c2, %0] [%c1, %c1] : tensor<2x?xi32> into tensor + return %3 : tensor +} +// CHECK-LABEL: func @insert_slice_propagate_dest_cast +// CHECK: %[[UPDATED:.+]] = tensor.insert_slice %{{.+}} into %{{.+}}[0, %{{.+}}] [2, %{{.+}}] [1, 1] +// CHECK-SAME: tensor<2x?xi32> into tensor +// CHECK: %[[CAST:.+]] = tensor.cast %[[UPDATED]] +// CHECK: return %[[CAST]] + +// ----- + +func @insert_slice_output_dest_canonicalize(%arg0 : tensor<2x3xi32>, %arg1 : tensor) -> tensor<3x9xi32> { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c2 = constant 2 : index + %c9 = constant 9 : index + %c3 = constant 3 : index + %2 = tensor.extract %arg1[] : tensor + %4 = tensor.generate %c3, %c9 { + ^bb0(%arg2: index, %arg3: index): + tensor.yield %2 : i32 + } : tensor + %5 = tensor.insert_slice %arg0 into %4[%c0, %c1] [%c2, %c3] [1, 1] : tensor<2x3xi32> into tensor + %6 = tensor.cast %5 : tensor to tensor<3x9xi32> + return %6 : tensor<3x9xi32> +} +// CHECK-LABEL: func @insert_slice_output_dest_canonicalize +// CHECK-SAME: %[[ARG0:[a-zA-z0-9_]+]]: tensor<2x3xi32> +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor +// CHECK: %[[PAD:.+]] = tensor.extract %[[ARG1]] +// CHECK: %[[GENERATE:.+]] = tensor.generate +// CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[ARG0]] into %[[GENERATE]] +// CHECK: return %[[RESULT]] diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -825,31 +825,31 @@ return } -// CHECK-LABEL: func @subtensor({{.*}}) { -func @subtensor(%t: tensor<8x16x4xf32>, %idx : index) { +// CHECK-LABEL: func @slice({{.*}}) { +func @slice(%t: tensor<8x16x4xf32>, %idx : index) { %c0 = constant 0 : index %c1 = constant 1 : index - // CHECK: subtensor + // CHECK: tensor.extract_slice // CHECK-SAME: tensor<8x16x4xf32> to tensor - %1 = subtensor %t[%c0, %c0, %c0][%idx, %idx, %idx][%c1, %c1, %c1] + %1 = tensor.extract_slice %t[%c0, %c0, %c0][%idx, %idx, %idx][%c1, %c1, %c1] : tensor<8x16x4xf32> to tensor - // CHECK: subtensor + // CHECK: tensor.extract_slice // CHECK-SAME: tensor<8x16x4xf32> to tensor<4x4x4xf32> - %2 = subtensor %t[0, 2, 0][4, 4, 4][1, 1, 1] + %2 = tensor.extract_slice %t[0, 2, 0][4, 4, 4][1, 1, 1] : tensor<8x16x4xf32> to tensor<4x4x4xf32> - // CHECK: subtensor + // CHECK: tensor.extract_slice // CHECK-SAME: tensor<8x16x4xf32> to tensor<4x4xf32> - %3 = subtensor %t[0, 2, 0][4, 1, 4][1, 1, 1] + %3 = tensor.extract_slice %t[0, 2, 0][4, 1, 4][1, 1, 1] : tensor<8x16x4xf32> to tensor<4x4xf32> return } -// CHECK-LABEL: func @subtensor_insert({{.*}}) { -func @subtensor_insert( +// CHECK-LABEL: func @insert_slice({{.*}}) { +func @insert_slice( %t: tensor<8x16x4xf32>, %t2: tensor<16x32x8xf32>, %t3: tensor<4x4xf32>, @@ -857,19 +857,19 @@ %c0 = constant 0 : index %c1 = constant 1 : index - // CHECK: subtensor_insert + // CHECK: tensor.insert_slice // CHECK-SAME: tensor<8x16x4xf32> into tensor<16x32x8xf32> - %1 = subtensor_insert %t into %t2[%c0, %c0, %c0][%idx, %idx, %idx][%c1, %c1, %c1] + %1 = tensor.insert_slice %t into %t2[%c0, %c0, %c0][%idx, %idx, %idx][%c1, %c1, %c1] : tensor<8x16x4xf32> into tensor<16x32x8xf32> - // CHECK: subtensor_insert + // CHECK: tensor.insert_slice // CHECK-SAME: tensor<8x16x4xf32> into tensor<16x32x8xf32> - %2 = subtensor_insert %t into %t2[%c0, %idx, %c0][%idx, 4, %idx][%c1, 1, %c1] + %2 = tensor.insert_slice %t into %t2[%c0, %idx, %c0][%idx, 4, %idx][%c1, 1, %c1] : tensor<8x16x4xf32> into tensor<16x32x8xf32> - // CHECK: subtensor_insert + // CHECK: tensor.insert_slice // CHECK-SAME: tensor<4x4xf32> into tensor<8x16x4xf32> - %3 = subtensor_insert %t3 into %t[0, 2, 0][4, 1, 4][1, 1, 1] + %3 = tensor.insert_slice %t3 into %t[0, 2, 0][4, 1, 4][1, 1, 1] : tensor<4x4xf32> into tensor<8x16x4xf32> return diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -1214,9 +1214,9 @@ // ----- -func @subtensor_wrong_dynamic_type(%t: tensor<8x16x4xf32>, %idx : index) { +func @slice_wrong_dynamic_type(%t: tensor<8x16x4xf32>, %idx : index) { // expected-error @+1 {{expected result type to be 'tensor<4x4x4xf32>' or a rank-reduced version. (mismatch of result sizes)}} - %0 = subtensor %t[0, 2, 0][4, 4, 4][1, 1, 1] + %0 = tensor.extract_slice %t[0, 2, 0][4, 4, 4][1, 1, 1] : tensor<8x16x4xf32> to tensor return @@ -1224,9 +1224,9 @@ // ----- -func @subtensor_wrong_static_type(%t: tensor<8x16x4xf32>, %idx : index) { +func @slice_wrong_static_type(%t: tensor<8x16x4xf32>, %idx : index) { // expected-error @+1 {{expected result type to be 'tensor' or a rank-reduced version. (mismatch of result sizes)}} - %0 = subtensor %t[0, 0, 0][%idx, 3, %idx][1, 1, 1] + %0 = tensor.extract_slice %t[0, 0, 0][%idx, 3, %idx][1, 1, 1] : tensor<8x16x4xf32> to tensor<4x4x4xf32> return diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir @@ -10,12 +10,12 @@ %const = constant dense<10.0> : tensor<2xf32> %insert_val = constant dense<20.0> : tensor<1xf32> - // Both of these subtensor_insert ops insert into the same original tensor + // Both of these insert_slice ops insert into the same original tensor // value `%const`. This can easily cause bugs if at the memref level // we attempt to write in-place into the memref that %const has been // converted into. - %inserted_at_position_0 = subtensor_insert %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32> - %inserted_at_position_1 = subtensor_insert %insert_val into %const[1][1][1] : tensor<1xf32> into tensor<2xf32> + %inserted_at_position_0 = tensor.insert_slice %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32> + %inserted_at_position_1 = tensor.insert_slice %insert_val into %const[1][1][1] : tensor<1xf32> into tensor<2xf32> %unranked_at_position_0 = tensor.cast %inserted_at_position_0 : tensor<2xf32> to tensor<*xf32> call @print_memref_f32(%unranked_at_position_0) : (tensor<*xf32>) -> () diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir @@ -9,7 +9,7 @@ func @main() { %const = constant dense<10.0> : tensor<2xf32> %insert_val = constant dense<20.0> : tensor<1xf32> - %inserted = subtensor_insert %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32> + %inserted = tensor.insert_slice %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32> %unranked = tensor.cast %inserted : tensor<2xf32> to tensor<*xf32> call @print_memref_f32(%unranked) : (tensor<*xf32>) -> () diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir --- a/mlir/test/Transforms/canonicalize.mlir +++ b/mlir/test/Transforms/canonicalize.mlir @@ -1065,9 +1065,9 @@ // ----- -// CHECK-LABEL: func @subtensor +// CHECK-LABEL: func @slice // CHECK-SAME: %[[ARG0:[0-9a-z]*]]: index, %[[ARG1:[0-9a-z]*]]: index -func @subtensor(%t: tensor<8x16x4xf32>, %arg0 : index, %arg1 : index) +func @slice(%t: tensor<8x16x4xf32>, %arg0 : index, %arg1 : index) -> tensor { %c0 = constant 0 : index @@ -1076,18 +1076,18 @@ %c7 = constant 7 : index %c11 = constant 11 : index - // CHECK: subtensor %{{.*}}[0, 0, 0] [7, 11, 2] [1, 1, 1] : + // CHECK: tensor.extract_slice %{{.*}}[0, 0, 0] [7, 11, 2] [1, 1, 1] : // CHECK-SAME: tensor<8x16x4xf32> to tensor<7x11x2xf32> // tensor.cast gets folded away in consumer. // CHECK-NOT: tensor.cast - %1 = subtensor %t[%c0, %c0, %c0] [%c7, %c11, %c2] [%c1, %c1, %c1] + %1 = tensor.extract_slice %t[%c0, %c0, %c0] [%c7, %c11, %c2] [%c1, %c1, %c1] : tensor<8x16x4xf32> to tensor - // Test: subtensor with one dynamic operand can also be folded. - // CHECK: subtensor %{{.*}}[0, 0, 0] [2, %[[ARG0]], 2] [1, 1, 1] : + // Test: slice with one dynamic operand can also be folded. + // CHECK: tensor.extract_slice %{{.*}}[0, 0, 0] [2, %[[ARG0]], 2] [1, 1, 1] : // CHECK-SAME: tensor<7x11x2xf32> to tensor<2x?x2xf32> // CHECK: tensor.cast %{{.*}} : tensor<2x?x2xf32> to tensor - %2 = subtensor %1[%c0, %c0, %c0] [%c2, %arg0, %c2] [%c1, %c1, %c1] + %2 = tensor.extract_slice %1[%c0, %c0, %c0] [%c2, %arg0, %c2] [%c1, %c1, %c1] : tensor to tensor return %2 : tensor diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgFusionTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgFusionTransforms.cpp --- a/mlir/test/lib/Dialect/Linalg/TestLinalgFusionTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgFusionTransforms.cpp @@ -236,7 +236,7 @@ RewritePatternSet patterns = linalg::getLinalgTilingCanonicalizationPatterns(context); patterns.add(context); + ExtractSliceOfPadTensorSwapPattern>(context); FrozenRewritePatternSet frozenPatterns(std::move(patterns)); do { (void)applyPatternsAndFoldGreedily(getFunction(), frozenPatterns); diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp --- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp @@ -529,9 +529,9 @@ (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); } -static void applySubTensorOfPadTensorSwapPattern(FuncOp funcOp) { +static void applyExtractSliceOfPadTensorSwapPattern(FuncOp funcOp) { RewritePatternSet patterns(funcOp.getContext()); - patterns.add(funcOp.getContext()); + patterns.add(funcOp.getContext()); (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); } @@ -614,7 +614,7 @@ if (testTransformPadTensor) return applyPadTensorToGenericPatterns(getFunction()); if (testSwapSubTensorPadTensor) - return applySubTensorOfPadTensorSwapPattern(getFunction()); + return applyExtractSliceOfPadTensorSwapPattern(getFunction()); if (testAffineMinSCFCanonicalizationPatterns) return applyAffineMinSCFCanonicalizationPatterns(getFunction()); if (testTileAndPadPattern)