diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h b/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h --- a/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h @@ -12,6 +12,7 @@ #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h" #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Interfaces/CopyOpInterface.h" +#include "mlir/Interfaces/InferTypeOpInterface.h" //===----------------------------------------------------------------------===// // Bufferization Dialect diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td @@ -25,7 +25,9 @@ found in [bufferization](/docs/Bufferization/) and [buffer deallocation](/docs/BufferDeallocationInternals/). }]; - let dependentDialects = ["memref::MemRefDialect", "tensor::TensorDialect"]; + let dependentDialects = [ + "AffineDialect", "memref::MemRefDialect", "tensor::TensorDialect" + ]; let extraClassDeclaration = [{ /// An attribute that can override writability of buffers of tensor function diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td @@ -12,12 +12,128 @@ include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.td" include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td" include "mlir/Dialect/Bufferization/IR/BufferizationBase.td" +include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/Interfaces/CopyOpInterface.td" class Bufferization_Op traits = []> : Op; +//===----------------------------------------------------------------------===// +// AllocTensorOp +//===----------------------------------------------------------------------===// + +def Bufferization_AllocTensorOp : Bufferization_Op<"alloc_tensor", + [BufferizableOpInterface, + DeclareOpInterfaceMethods]> { + let summary = "buffer allocation in tensor land"; + + let description = [{ + `bufferization.alloc_tensor` is an operation that bufferizes to a buffer + allocation of a given shape. The shape could be dynamic or static. + Reading from the result of an `alloc_tensor` op yields an undefined value. + + `alloc_tensor` is a helper op for bufferization. It marks the beginning of + a new tensor SSA use-def chain and is used to control in-place bufferization + decisions during One-Shot Bufferize. + }]; + + let arguments = + (ins Variadic:$sizes, I64ArrayAttr:$static_sizes); + + let results = (outs AnyTensor:$result); + + let assemblyFormat = [{ + custom($sizes, $static_sizes) attr-dict + `:` type($result) + }]; + + let extraClassDeclaration = [{ + LogicalResult bufferize(RewriterBase &rewriter, BufferizationState &state); + + bool isMemoryWrite(OpResult opResult, const AnalysisState &state) const { + // AllocTensorOps allocate but do not write. + return false; + } + + static StringRef getStaticSizesAttrName() { + return "static_sizes"; + } + + RankedTensorType getType() { + return getResult().getType().cast(); + } + + // Infer the shape of the result tensor given the static shapes + // and element type of the result tensor. + static Type inferResultType(ArrayRef staticSizes, Type elementType, + Attribute encoding = {}); + + // Return true if the size of the tensor is dynamic at `idx` + bool isDynamicSize(unsigned idx) { + APInt v = *(static_sizes().getAsValueRange().begin() + idx); + return ShapedType::isDynamic(v.getSExtValue()); + } + + // Assert that the size of the result tensor is static at `idx` + // and return the shape. + int64_t getStaticSize(unsigned idx) { + assert(!isDynamicSize(idx) && "expected static size"); + APInt v = *(static_sizes(). + template getAsValueRange().begin() + idx); + return v.getSExtValue(); + } + + // Return the argument position that contains the dynamic size of + // the tensor at dimension `idx`. Asserts that the shape is + // dynamic at that `idx`. + unsigned getIndexOfDynamicSize(unsigned idx) { + assert(isDynamicSize(idx) && "expected dynamic size"); + return std::count_if( + static_sizes().getValue().begin(), + static_sizes().getValue().begin() + idx, + [&](Attribute attr) { + return ShapedType::isDynamic(attr.cast().getInt()); + }); + } + + // Return both static and dynamic sizes as a list of `OpFoldResult`. + SmallVector getMixedSizes(); + + // Return the Value of the dynamic size of the tensor at dimension + // `idx`. Asserts that the shape is dynamic at that `idx. + Value getDynamicSize(unsigned idx) { + return getOperand(getIndexOfDynamicSize(idx)); + } + }]; + + let builders = [ + OpBuilder<(ins "ValueRange":$shape, + "ArrayRef":$staticShape, "Type":$elementType), + [{ + build($_builder, $_state, + AllocTensorOp::inferResultType(staticShape, elementType), + shape, $_builder.getI64ArrayAttr(staticShape)); + }]>, + OpBuilder<(ins "ValueRange":$shape, "Type":$elementType), + [{ + SmallVector staticShape( + shape.size(), ShapedType::kDynamicSize); + build($_builder, $_state, shape, staticShape, elementType); + }]>, + OpBuilder<(ins "ArrayRef":$staticShape, "Type":$elementType), + [{ + build($_builder, $_state, ValueRange{}, staticShape, elementType); + }]>, + OpBuilder<(ins "ArrayRef":$sizes, "Type":$elementType, + CArg<"ArrayRef", "{}">:$attrs)> + ]; + + let hasCanonicalizer = 1; + let hasCustomAssemblyFormat = 1; + let hasVerifier = 1; +} + //===----------------------------------------------------------------------===// // CloneOp //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/AllocTensorElimination.h copy from mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h copy to mlir/include/mlir/Dialect/Bufferization/Transforms/AllocTensorElimination.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/AllocTensorElimination.h @@ -1,4 +1,4 @@ -//===- BufferizableOpInterfaceImpl.h - Impl. of BufferizableOpInterface ---===// +//===- AllocTensorElimination.h - alloc_tensor op elimination -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,17 +6,15 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_DIALECT_LINALG_BUFFERIZABLEOPINTERFACEIMPL_H -#define MLIR_DIALECT_LINALG_BUFFERIZABLEOPINTERFACEIMPL_H +#ifndef MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_ALLOCTENSORELIMINATION_H +#define MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_ALLOCTENSORELIMINATION_H #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h" namespace mlir { -class DialectRegistry; +namespace bufferization { -namespace linalg { - -/// A function that matches anchor OpOperands for InitTensorOp elimination. +/// A function that matches anchor OpOperands for AllocTensorOp elimination. /// If an OpOperand is matched, the function should populate the SmallVector /// with all values that are needed during `RewriteFn` to produce the /// replacement value. @@ -25,28 +23,26 @@ /// A function that rewrites matched anchors. using RewriteFn = std::function; -/// Try to eliminate InitTensorOps inside `op`. +/// Try to eliminate AllocTensorOps inside `op`. /// -/// * `rewriteFunc` generates the replacement for the InitTensorOp. -/// * Only InitTensorOps that are anchored on a matching OpOperand as per +/// * `rewriteFunc` generates the replacement for the AllocTensorOp. +/// * Only AllocTensorOps that are anchored on a matching OpOperand as per /// `anchorMatchFunc` are considered. "Anchored" means that there is a path /// on the reverse SSA use-def chain, starting from the OpOperand and always /// following the aliasing OpOperand, that eventually ends at a single -/// InitTensorOp. -LogicalResult eliminateInitTensors(RewriterBase &rewriter, Operation *op, - bufferization::AnalysisState &state, - AnchorMatchFn anchorMatchFunc, - RewriteFn rewriteFunc); +/// AllocTensorOp. +LogicalResult eliminateAllocTensors(RewriterBase &rewriter, Operation *op, + bufferization::AnalysisState &state, + AnchorMatchFn anchorMatchFunc, + RewriteFn rewriteFunc); -/// Try to eliminate InitTensorOps inside `op` that are anchored on an +/// Try to eliminate AllocTensorOps inside `op` that are anchored on an /// InsertSliceOp, i.e., if it is eventually inserted into another tensor /// (and some other conditions are met). -LogicalResult insertSliceAnchoredInitTensorEliminationStep( +LogicalResult insertSliceAnchoredAllocTensorEliminationStep( RewriterBase &rewriter, Operation *op, bufferization::AnalysisState &state); -void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry); - -} // namespace linalg +} // namespace bufferization } // namespace mlir -#endif // MLIR_DIALECT_LINALG_BUFFERIZABLEOPINTERFACEIMPL_H +#endif // MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_ALLOCTENSORELIMINATION_H diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h @@ -64,6 +64,13 @@ std::unique_ptr createPromoteBuffersToStackPass(std::function isSmallAlloc); +/// Create a pass that tries to eliminate alloc_tensor ops that are anchored on +/// insert_slice ops. +std::unique_ptr createAllocTensorEliminationPass(); + +/// Create a pass that bufferizes ops from the bufferization dialect. +std::unique_ptr createBufferizationBufferizePass(); + //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td @@ -149,6 +149,11 @@ let constructor = "mlir::bufferization::createFinalizingBufferizePass()"; } +def BufferizationBufferize : Pass<"bufferization-bufferize", "func::FuncOp"> { + let summary = "Bufferize the `bufferization` dialect"; + let constructor = "mlir::bufferization::createBufferizationBufferizePass()"; +} + def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> { let summary = "One-Shot Bufferize"; let description = [{ @@ -309,4 +314,16 @@ ]; } +def AllocTensorElimination : Pass<"eliminate-alloc-tensors"> { + let summary = "Try to eliminate all alloc_tensor ops."; + let description = [{ + This pass tries to eliminate all insert_slice op-anchored alloc_tensor ops. + I.e., when a value that is equivalent to an alloc_tensor op is inserted into + another tensor, this pass tries to rewrite the IR in such a way that the + destination tensor of the insert_slice op is used directly instead of the + alloc_tensor result. + }]; + let constructor = "mlir::bufferization::createAllocTensorEliminationPass()"; +} + #endif // MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_PASSES diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td @@ -27,11 +27,16 @@ def Linalg_InitTensorOp : Linalg_Op<"init_tensor", [NoSideEffect, DeclareOpInterfaceMethods]> { - let summary = "operation to define a tensor of particular value"; + let summary = "operation to define a tensor of particular shape"; let description = [{ - `linalg.init_tensor` is an operation that materializes a tensor of - a given shape. The shape could be dynamic or static. + `linalg.init_tensor` is an operation that defines a tensor of a particular + shape. The shape could be dynamic or static. The contents of the tensor are + unspecified and the only purpose of the op result is to materialize the + specified shape in IR and make it available to other transformations. + + Note: This op can be lowered to a `bufferization.alloc_tensor`, at which + point it turns into an explicit buffer allocation. }]; let arguments = diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h --- a/mlir/include/mlir/Dialect/Linalg/Passes.h +++ b/mlir/include/mlir/Dialect/Linalg/Passes.h @@ -62,9 +62,8 @@ std::unique_ptr> createConvertLinalgToAffineLoopsPass(); -/// Create a pass that tries to eliminate init_tensor ops that are anchored on -/// insert_slice ops. -std::unique_ptr createLinalgInitTensorEliminationPass(); +/// Create a pass that rewrites init_tensor to alloc_tensor. +std::unique_ptr createLinalgInitTensorToAllocTensorPass(); /// Create a pass to convert Linalg operations which work on tensors to use /// buffers instead. diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -24,16 +24,14 @@ let dependentDialects = ["linalg::LinalgDialect", "memref::MemRefDialect"]; } -def LinalgInitTensorElimination : Pass<"linalg-eliminate-init-tensors"> { - let summary = "Try to eliminate all init_tensor ops."; +def LinalgInitTensorToAllocTensor : Pass<"linalg-init-tensor-to-alloc-tensor"> { + let summary = "Replace all init_tensor ops by alloc_tensor ops."; let description = [{ - This pass tries to eliminate all insert_slice op-anchored init_tensor ops. - I.e., when a value that is aliasing with an init_tensor op is inserted into - another tensor, this pass tries to rewrite the IR in such a way that the - destination tensor of the insert_slice op is used directly instead of the - init_tensor result. + init_tensor ops return a tensor of unspecified contents who's only purpose + is to carry the tensor shape. This pass converts such ops to + bufferization.alloc_tensor ops, which bufferize to buffer allocations. }]; - let constructor = "mlir::createLinalgInitTensorEliminationPass()"; + let constructor = "mlir::createLinalgInitTensorToAllocTensorPass()"; } def LinalgFoldUnitExtentDims : Pass<"linalg-fold-unit-extent-dims", ""> { diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h @@ -9,43 +9,11 @@ #ifndef MLIR_DIALECT_LINALG_BUFFERIZABLEOPINTERFACEIMPL_H #define MLIR_DIALECT_LINALG_BUFFERIZABLEOPINTERFACEIMPL_H -#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h" - namespace mlir { class DialectRegistry; namespace linalg { - -/// A function that matches anchor OpOperands for InitTensorOp elimination. -/// If an OpOperand is matched, the function should populate the SmallVector -/// with all values that are needed during `RewriteFn` to produce the -/// replacement value. -using AnchorMatchFn = std::function &)>; - -/// A function that rewrites matched anchors. -using RewriteFn = std::function; - -/// Try to eliminate InitTensorOps inside `op`. -/// -/// * `rewriteFunc` generates the replacement for the InitTensorOp. -/// * Only InitTensorOps that are anchored on a matching OpOperand as per -/// `anchorMatchFunc` are considered. "Anchored" means that there is a path -/// on the reverse SSA use-def chain, starting from the OpOperand and always -/// following the aliasing OpOperand, that eventually ends at a single -/// InitTensorOp. -LogicalResult eliminateInitTensors(RewriterBase &rewriter, Operation *op, - bufferization::AnalysisState &state, - AnchorMatchFn anchorMatchFunc, - RewriteFn rewriteFunc); - -/// Try to eliminate InitTensorOps inside `op` that are anchored on an -/// InsertSliceOp, i.e., if it is eventually inserted into another tensor -/// (and some other conditions are met). -LogicalResult insertSliceAnchoredInitTensorEliminationStep( - RewriterBase &rewriter, Operation *op, bufferization::AnalysisState &state); - void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry); - } // namespace linalg } // namespace mlir diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp --- a/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp --- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" +#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/MemRef/Utils/MemRefUtils.h" @@ -127,6 +128,167 @@ return success(); } +//===----------------------------------------------------------------------===// +// AllocTensorOp +//===----------------------------------------------------------------------===// + +LogicalResult AllocTensorOp::bufferize(RewriterBase &rewriter, + BufferizationState &state) { + // Nothing to do for dead AllocTensorOps. + if (getOperation()->getUses().empty()) + return success(); + + FailureOr alloc = state.createAlloc(rewriter, getLoc(), getResult()); + if (failed(alloc)) + return failure(); + replaceOpWithBufferizedValues(rewriter, getOperation(), *alloc); + return success(); +} + +void AllocTensorOp::build(OpBuilder &b, OperationState &result, + ArrayRef sizes, Type elementType, + ArrayRef attrs) { + SmallVector dynamicSizes; + SmallVector staticSizes; + dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes, + ShapedType::kDynamicSize); + auto resultType = RankedTensorType ::get(staticSizes, elementType); + build(b, result, resultType, dynamicSizes, b.getI64ArrayAttr(staticSizes)); + result.addAttributes(attrs); +} + +LogicalResult AllocTensorOp::verify() { + RankedTensorType resultType = getType(); + SmallVector staticSizes = llvm::to_vector<4>(llvm::map_range( + static_sizes().cast(), + [](Attribute a) -> int64_t { return a.cast().getInt(); })); + + if (failed(verifyListOfOperandsOrIntegers( + *this, "sizes", resultType.getRank(), static_sizes(), sizes(), + ShapedType::isDynamic))) + return failure(); + + if (static_sizes().size() != static_cast(resultType.getRank())) + return emitError("expected ") << resultType.getRank() << " sizes values"; + + Type expectedType = AllocTensorOp::inferResultType( + staticSizes, resultType.getElementType(), resultType.getEncoding()); + if (resultType != expectedType) { + return emitError("specified type ") + << resultType << " does not match the inferred type " + << expectedType; + } + return success(); +} + +Type AllocTensorOp::inferResultType(ArrayRef staticSizes, + Type elementType, Attribute encoding) { + return RankedTensorType::get(staticSizes, elementType, encoding); +} + +SmallVector AllocTensorOp::getMixedSizes() { + SmallVector mixedSizes; + mixedSizes.reserve(getType().getRank()); + unsigned dynamicValIndex = 0; + for (Attribute attr : static_sizes()) { + auto intAttr = attr.cast(); + if (!ShapedType::isDynamic(intAttr.getInt())) { + mixedSizes.push_back(intAttr); + continue; + } + mixedSizes.push_back(sizes()[dynamicValIndex++]); + } + return mixedSizes; +} + +namespace { +/// Change the type of the result of a `bufferization.alloc_tensor` by making +/// the result type statically sized along dimension that in the original +/// operation where defined as dynamic, but the size was defined using a +/// `constant` op. For example: +/// +/// %c5 = arith.constant 5: index +/// %0 = bufferization.alloc_tensor [%arg0, %c5] : tensor +/// +/// to +/// +/// %0 = bufferization.alloc_tensor [%arg0, 5] : tensor +struct ReplaceStaticShapeDims : OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(AllocTensorOp op, + PatternRewriter &rewriter) const override { + SmallVector dynamicSizes; + SmallVector staticSizes; + for (unsigned i = 0, e = op.getType().getRank(); i != e; ++i) { + // If the size is already static, nothing to do. + if (!op.isDynamicSize(i)) { + staticSizes.push_back(op.getStaticSize(i)); + continue; + } + + // If the size is dynamic but defined using a `constant` op, get the + // constant value to find the static size to use. + unsigned operandNum = op.getIndexOfDynamicSize(i); + Value sizeOperand = op.getOperand(operandNum); + if (auto constantIndexOp = + sizeOperand.getDefiningOp()) { + staticSizes.push_back(constantIndexOp.value()); + continue; + } + + // Fallback case. Keep the size dynamic. + dynamicSizes.push_back(sizeOperand); + staticSizes.push_back(ShapedType::kDynamicSize); + } + RankedTensorType newType = + RankedTensorType::get(staticSizes, op.getType().getElementType()); + if (newType == op.getType()) + return failure(); + auto newOp = + rewriter.create(op.getLoc(), newType, dynamicSizes, + rewriter.getI64ArrayAttr(staticSizes)); + rewriter.replaceOpWithNewOp(op, op.getType(), newOp); + return success(); + } +}; + +struct FoldDimOfAllocTensorOp : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tensor::DimOp dimOp, + PatternRewriter &rewriter) const override { + Optional maybeConstantIndex = dimOp.getConstantIndex(); + auto allocTensorOp = dimOp.source().getDefiningOp(); + if (!allocTensorOp || !maybeConstantIndex) + return failure(); + if (!allocTensorOp.isDynamicSize(*maybeConstantIndex)) + return failure(); + rewriter.replaceOp(dimOp, + allocTensorOp.getDynamicSize(*maybeConstantIndex)); + return success(); + } +}; +} // namespace + +void AllocTensorOp::getCanonicalizationPatterns(RewritePatternSet &results, + MLIRContext *ctx) { + results.add(ctx); +} + +LogicalResult AllocTensorOp::reifyResultShapes( + OpBuilder &builder, ReifiedRankedShapedTypeDims &reifiedReturnShapes) { + auto shapes = llvm::to_vector<4>(llvm::map_range( + llvm::seq(0, getType().getRank()), [&](int64_t dim) -> Value { + if (isDynamicSize(dim)) + return getDynamicSize(dim); + return builder.create(getLoc(), + getStaticSize(dim)); + })); + reifiedReturnShapes.emplace_back(std::move(shapes)); + return success(); +} + //===----------------------------------------------------------------------===// // CloneOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt --- a/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt @@ -12,6 +12,7 @@ MLIRBufferizationOpsIncGen LINK_LIBS PUBLIC + MLIRAffine MLIRDialect MLIRFunc MLIRIR diff --git a/mlir/lib/Dialect/Bufferization/Transforms/AllocTensorElimination.cpp b/mlir/lib/Dialect/Bufferization/Transforms/AllocTensorElimination.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Bufferization/Transforms/AllocTensorElimination.cpp @@ -0,0 +1,272 @@ +//===- AllocTensorElimination.cpp - alloc_tensor op elimination -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" + +#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Bufferization/Transforms/AllocTensorElimination.h" +#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h" +#include "mlir/Dialect/Bufferization/Transforms/Passes.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/IR/Dominance.h" +#include "mlir/Pass/Pass.h" + +using namespace mlir; +using namespace mlir::bufferization; + +/// Return true if all `neededValues` are in scope at the given +/// `insertionPoint`. +static bool +neededValuesDominateInsertionPoint(const DominanceInfo &domInfo, + Operation *insertionPoint, + const SmallVector &neededValues) { + for (Value val : neededValues) { + if (auto bbArg = val.dyn_cast()) { + Block *owner = bbArg.getOwner(); + if (!owner->findAncestorOpInBlock(*insertionPoint)) + return false; + } else { + auto opResult = val.cast(); + if (!domInfo.dominates(opResult.getOwner(), insertionPoint)) + return false; + } + } + return true; +} + +/// Return true if the given `insertionPoint` dominates all uses of +/// `allocTensorOp`. +static bool insertionPointDominatesUses(const DominanceInfo &domInfo, + Operation *insertionPoint, + Operation *allocTensorOp) { + for (Operation *user : allocTensorOp->getUsers()) + if (!domInfo.dominates(insertionPoint, user)) + return false; + return true; +} + +/// Find a valid insertion point for a replacement of `allocTensorOp`, assuming +/// that the replacement may use any value from `neededValues`. +static Operation * +findValidInsertionPoint(Operation *allocTensorOp, + const SmallVector &neededValues) { + DominanceInfo domInfo; + + // Gather all possible insertion points: the location of `allocTensorOp` and + // right after the definition of each value in `neededValues`. + SmallVector insertionPointCandidates; + insertionPointCandidates.push_back(allocTensorOp); + for (Value val : neededValues) { + // Note: The anchor op is using all of `neededValues`, so: + // * in case of a block argument: There must be at least one op in the block + // (the anchor op or one of its parents). + // * in case of an OpResult: There must be at least one op right after the + // defining op (the anchor op or one of its + // parents). + if (auto bbArg = val.dyn_cast()) { + insertionPointCandidates.push_back( + &bbArg.getOwner()->getOperations().front()); + } else { + insertionPointCandidates.push_back(val.getDefiningOp()->getNextNode()); + } + } + + // Select first matching insertion point. + for (Operation *insertionPoint : insertionPointCandidates) { + // Check if all needed values are in scope. + if (!neededValuesDominateInsertionPoint(domInfo, insertionPoint, + neededValues)) + continue; + // Check if the insertion point is before all uses. + if (!insertionPointDominatesUses(domInfo, insertionPoint, allocTensorOp)) + continue; + return insertionPoint; + } + + // No suitable insertion point was found. + return nullptr; +} + +/// Try to eliminate AllocTensorOps inside `op`. An AllocTensorOp is replaced +/// with the result of `rewriteFunc` if it is anchored on a matching +/// OpOperand. "Anchored" means that there is a path on the reverse SSA use-def +/// chain, starting from the OpOperand and always following the aliasing +/// OpOperand, that eventually ends at a single AllocTensorOp. +LogicalResult mlir::bufferization::eliminateAllocTensors( + RewriterBase &rewriter, Operation *op, AnalysisState &state, + AnchorMatchFn anchorMatchFunc, RewriteFn rewriteFunc) { + OpBuilder::InsertionGuard g(rewriter); + + WalkResult status = op->walk([&](Operation *op) { + for (OpOperand &operand : op->getOpOperands()) { + // Skip operands that do not bufferize inplace. + if (!state.isInPlace(operand)) + continue; + // All values that are needed to create the replacement op. + SmallVector neededValues; + // Is this a matching OpOperand? + if (!anchorMatchFunc(operand, neededValues)) + continue; + SetVector maybeAllocTensor = + state.findValueInReverseUseDefChain(operand.get(), [&](Value val) { + // Continue traversal until this function returns true. + OpResult opResult = val.dyn_cast(); + if (!opResult) + return true; + SmallVector opOperands = + state.getAliasingOpOperand(opResult); + if (!llvm::all_of(opOperands, [&](OpOperand *operand) { + return state.isInPlace(*operand); + })) + return true; + // Only equivalent tensors are supported at the moment. + // TODO: Support cases such as extract_slice(alloc_tensor) + return !llvm::all_of(opOperands, [&](OpOperand *operand) { + return state.areEquivalentBufferizedValues(operand->get(), + opResult); + }); + }); + + // Replace only if the reverse use-def chain ends at exactly one + // AllocTensorOp. + if (maybeAllocTensor.size() != 1 || + !maybeAllocTensor.front().getDefiningOp()) + return WalkResult::skip(); + Value allocTensor = maybeAllocTensor.front(); + + // Find a suitable insertion point. + Operation *insertionPoint = + findValidInsertionPoint(allocTensor.getDefiningOp(), neededValues); + if (!insertionPoint) + continue; + + // Create a replacement for the AllocTensorOp. + rewriter.setInsertionPoint(insertionPoint); + Value replacement = rewriteFunc(rewriter, allocTensor.getLoc(), operand); + if (!replacement) + continue; + + // Replace the AllocTensorOp. + rewriter.replaceOp(allocTensor.getDefiningOp(), replacement); + } + + // Advance to the next operation. + return WalkResult::advance(); + }); + + return failure(status.wasInterrupted()); +} + +/// Try to eliminate AllocTensorOps inside `op`. An AllocTensorOp can be +/// eliminated if it is eventually inserted into another tensor (and some other +/// conditions are met). +/// +/// E.g.: +/// %0 = linalg.alloc_tensor +/// %1 = linalg.fill(%cst, %0) {inplace = [true]} +/// %2 = tensor.insert_slice %1 into %t[10][20][1] +/// +/// AllocTensorOp elimination will try to fill %t inplace instead of filling a +/// new allocation %0 and inserting it into %t. This is done by replacing the +/// AllocTensorOp with: +/// +/// %0 = tensor.extract_slice %t[10][20][1] +/// +/// The analysis looks for matching ExtractSliceOp/InsertSliceOp pairs and lets +/// those bufferize inplace in the absence of other conflicts. +/// +/// Starting from an InsertSliceOp, an AllocTensorOp at the end of the insert +/// source's reverse use-def chain is eliminated if: +/// * On the reverse use-def chain path from the InsertSliceOp to the +/// AllocTensorOp, all ops were decided to bufferize inplace and the buffer +/// relation is "equivalent" (TODO: can be relaxed if needed). +/// * The reverse use-def chain has exactly one end, which is the AllocTensorOp. +LogicalResult +mlir::bufferization::insertSliceAnchoredAllocTensorEliminationStep( + RewriterBase &rewriter, Operation *op, AnalysisState &state) { + return eliminateAllocTensors( + rewriter, op, state, + /*anchorMatchFunc=*/ + [&](OpOperand &operand, SmallVector &neededValues) { + auto insertSliceOp = + dyn_cast(operand.getOwner()); + if (!insertSliceOp) + return false; + if (&operand != &insertSliceOp->getOpOperand(0) /*source*/) + return false; + + // Collect all values that are needed to construct the replacement op. + neededValues.append(insertSliceOp.offsets().begin(), + insertSliceOp.offsets().end()); + neededValues.append(insertSliceOp.sizes().begin(), + insertSliceOp.sizes().end()); + neededValues.append(insertSliceOp.strides().begin(), + insertSliceOp.strides().end()); + neededValues.push_back(insertSliceOp.dest()); + + return true; + }, + /*rewriteFunc=*/ + [](OpBuilder &b, Location loc, OpOperand &operand) { + auto insertOp = cast(operand.getOwner()); + // Expand offsets, sizes and strides to the full rank to handle the + // rank-reducing case. + SmallVector mixedOffsets = insertOp.getMixedOffsets(); + SmallVector mixedSizes = insertOp.getMixedSizes(); + SmallVector mixedStrides = insertOp.getMixedStrides(); + OffsetSizeAndStrideOpInterface::expandToRank( + insertOp.dest(), mixedOffsets, mixedSizes, mixedStrides, + [&](Value target, int64_t dim) -> OpFoldResult { + auto shapedType = target.getType().cast(); + if (shapedType.isDynamicDim(dim)) + return b.create(loc, target, dim).result(); + return b.getIndexAttr(shapedType.getDimSize(dim)); + }); + auto t = tensor::ExtractSliceOp::inferRankReducedResultType( + insertOp.getSourceType().getRank(), + insertOp.dest().getType().cast(), mixedOffsets, + mixedSizes, mixedStrides); + auto extractOp = b.create( + loc, t, insertOp.dest(), mixedOffsets, mixedSizes, mixedStrides); + return extractOp.result(); + }); +} + +namespace { +struct AllocTensorElimination + : public AllocTensorEliminationBase { + AllocTensorElimination() = default; + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry + .insert(); + } +}; +} // namespace + +void AllocTensorElimination::runOnOperation() { + Operation *op = getOperation(); + OneShotBufferizationOptions options; + OneShotAnalysisState state(op, options); + if (failed(analyzeOp(op, state))) { + signalPassFailure(); + return; + } + + IRRewriter rewriter(op->getContext()); + if (failed(bufferization::insertSliceAnchoredAllocTensorEliminationStep( + rewriter, op, state))) + signalPassFailure(); +} + +std::unique_ptr mlir::bufferization::createAllocTensorEliminationPass() { + return std::make_unique(); +} diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -237,6 +237,28 @@ }; } // namespace +namespace { +struct BufferizationBufferizePass + : public BufferizationBufferizeBase { + void runOnOperation() override { + BufferizationOptions options = getPartialBufferizationOptions(); + options.allowDialectInFilter(); + + if (failed(bufferizeOp(getOperation(), options))) + signalPassFailure(); + } + + void getDependentDialects(DialectRegistry ®istry) const override { + registry + .insert(); + } +}; +} // namespace + +std::unique_ptr mlir::bufferization::createBufferizationBufferizePass() { + return std::make_unique(); +} + std::unique_ptr mlir::bufferization::createOneShotBufferizePass() { return std::make_unique(); } diff --git a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_dialect_library(MLIRBufferizationTransforms + AllocTensorElimination.cpp Bufferize.cpp BufferDeallocation.cpp BufferOptimizations.cpp @@ -22,5 +23,6 @@ MLIRIR MLIRMemRef MLIRPass + MLIRTensor MLIRTransforms ) diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp @@ -379,7 +379,7 @@ /// Return `true` if the given tensor value is a memory write. Most values are /// tensor writes, but ops that define a tensor SSA value without specifying its -/// contents (e.g., init_tensor) are not. +/// contents (e.g., alloc_tensor) are not. static bool isMemoryWrite(Value value, const AnalysisState &state) { auto opResult = value.dyn_cast(); if (!opResult) @@ -855,7 +855,7 @@ /// %1 = scf.if %c -> (tensor) { /// scf.yield %0 : tensor /// } else { -/// %t = linalg.init_tensor : tensor +/// %t = linalg.alloc_tensor : tensor /// scf.yield %t : tensor /// } /// ``` diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -32,7 +32,7 @@ // Example: `foo` fails bufferization because %0 is not equivalent to any bbArg. // ``` // func @foo() -> tensor { -// %0 = linalg.init_tensor [...] : tensor +// %0 = linalg.alloc_tensor [...] : tensor // return %0 : tensor // } // ``` diff --git a/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp @@ -12,7 +12,6 @@ #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/Dialect.h" -#include "mlir/IR/Dominance.h" #include "mlir/IR/Operation.h" using namespace mlir; @@ -219,32 +218,6 @@ } }; -struct InitTensorOpInterface - : public BufferizableOpInterface::ExternalModel { - bool isMemoryWrite(Operation *op, OpResult opResult, - const AnalysisState &state) const { - // InitTensorOps allocate but do not write. - return false; - } - - LogicalResult bufferize(Operation *op, RewriterBase &rewriter, - BufferizationState &state) const { - auto initTensorOp = cast(op); - - // The InitTensorOp may have been eliminated. - if (initTensorOp->getUses().empty()) - return success(); - - FailureOr alloc = state.createAlloc(rewriter, initTensorOp->getLoc(), - initTensorOp.result()); - if (failed(alloc)) - return failure(); - replaceOpWithBufferizedValues(rewriter, op, *alloc); - return success(); - } -}; - /// Helper structure that iterates over all LinalgOps in `OpTys` and registers /// the `BufferizableOpInterface` with each of them. template @@ -256,230 +229,9 @@ }; } // namespace -/// Return true if all `neededValues` are in scope at the given -/// `insertionPoint`. -static bool -neededValuesDominateInsertionPoint(const DominanceInfo &domInfo, - Operation *insertionPoint, - const SmallVector &neededValues) { - for (Value val : neededValues) { - if (auto bbArg = val.dyn_cast()) { - Block *owner = bbArg.getOwner(); - if (!owner->findAncestorOpInBlock(*insertionPoint)) - return false; - } else { - auto opResult = val.cast(); - if (!domInfo.dominates(opResult.getOwner(), insertionPoint)) - return false; - } - } - return true; -} - -/// Return true if the given `insertionPoint` dominates all uses of -/// `initTensorOp`. -static bool insertionPointDominatesUses(const DominanceInfo &domInfo, - Operation *insertionPoint, - Operation *initTensorOp) { - for (Operation *user : initTensorOp->getUsers()) - if (!domInfo.dominates(insertionPoint, user)) - return false; - return true; -} - -/// Find a valid insertion point for a replacement of `initTensorOp`, assuming -/// that the replacement may use any value from `neededValues`. -static Operation * -findValidInsertionPoint(Operation *initTensorOp, - const SmallVector &neededValues) { - DominanceInfo domInfo; - - // Gather all possible insertion points: the location of `initTensorOp` and - // right after the definition of each value in `neededValues`. - SmallVector insertionPointCandidates; - insertionPointCandidates.push_back(initTensorOp); - for (Value val : neededValues) { - // Note: The anchor op is using all of `neededValues`, so: - // * in case of a block argument: There must be at least one op in the block - // (the anchor op or one of its parents). - // * in case of an OpResult: There must be at least one op right after the - // defining op (the anchor op or one of its - // parents). - if (auto bbArg = val.dyn_cast()) { - insertionPointCandidates.push_back( - &bbArg.getOwner()->getOperations().front()); - } else { - insertionPointCandidates.push_back(val.getDefiningOp()->getNextNode()); - } - } - - // Select first matching insertion point. - for (Operation *insertionPoint : insertionPointCandidates) { - // Check if all needed values are in scope. - if (!neededValuesDominateInsertionPoint(domInfo, insertionPoint, - neededValues)) - continue; - // Check if the insertion point is before all uses. - if (!insertionPointDominatesUses(domInfo, insertionPoint, initTensorOp)) - continue; - return insertionPoint; - } - - // No suitable insertion point was found. - return nullptr; -} - -/// Try to eliminate InitTensorOps inside `op`. An InitTensorOp is replaced -/// with the the result of `rewriteFunc` if it is anchored on a matching -/// OpOperand. "Anchored" means that there is a path on the reverse SSA use-def -/// chain, starting from the OpOperand and always following the aliasing -/// OpOperand, that eventually ends at a single InitTensorOp. -LogicalResult mlir::linalg::eliminateInitTensors(RewriterBase &rewriter, - Operation *op, - AnalysisState &state, - AnchorMatchFn anchorMatchFunc, - RewriteFn rewriteFunc) { - OpBuilder::InsertionGuard g(rewriter); - - WalkResult status = op->walk([&](Operation *op) { - for (OpOperand &operand : op->getOpOperands()) { - // Skip operands that do not bufferize inplace. - if (!state.isInPlace(operand)) - continue; - // All values that are needed to create the replacement op. - SmallVector neededValues; - // Is this a matching OpOperand? - if (!anchorMatchFunc(operand, neededValues)) - continue; - SetVector maybeInitTensor = - state.findValueInReverseUseDefChain(operand.get(), [&](Value val) { - // Continue traversal until this function returns true. - OpResult opResult = val.dyn_cast(); - if (!opResult) - return true; - SmallVector opOperands = - state.getAliasingOpOperand(opResult); - if (!llvm::all_of(opOperands, [&](OpOperand *operand) { - return state.isInPlace(*operand); - })) - return true; - // Only equivalent tensors are supported at the moment. - // TODO: Support cases such as extract_slice(init_tensor) - return !llvm::all_of(opOperands, [&](OpOperand *operand) { - return state.areEquivalentBufferizedValues(operand->get(), - opResult); - }); - }); - - // Replace only if the reverse use-def chain ends at exactly one - // InitTensorOp. - if (maybeInitTensor.size() != 1 || - !maybeInitTensor.front().getDefiningOp()) - return WalkResult::skip(); - Value initTensor = maybeInitTensor.front(); - - // Find a suitable insertion point. - Operation *insertionPoint = - findValidInsertionPoint(initTensor.getDefiningOp(), neededValues); - if (!insertionPoint) - continue; - - // Create a replacement for the InitTensorOp. - rewriter.setInsertionPoint(insertionPoint); - Value replacement = rewriteFunc(rewriter, initTensor.getLoc(), operand); - if (!replacement) - continue; - - // Replace the InitTensorOp. - rewriter.replaceOp(initTensor.getDefiningOp(), replacement); - } - - // Advance to the next operation. - return WalkResult::advance(); - }); - - return failure(status.wasInterrupted()); -} - -/// Try to eliminate InitTensorOps inside `op`. An InitTensorOp can be -/// eliminated if it is eventually inserted into another tensor (and some other -/// conditions are met). -/// -/// E.g.: -/// %0 = linalg.init_tensor -/// %1 = linalg.fill(%cst, %0) {inplace = [true]} -/// %2 = tensor.insert_slice %1 into %t[10][20][1] -/// -/// InitTensorOp elimination will try to fill %t inplace instead of filling a -/// new allocation %0 and inserting it into %t. This is done by replacing the -/// InitTensorOp with: -/// -/// %0 = tensor.extract_slice %t[10][20][1] -/// -/// The analysis looks for matching ExtractSliceOp/InsertSliceOp pairs and lets -/// those bufferize inplace in the absence of other conflicts. -/// -/// Starting from an InsertSliceOp, an InitTensorOp at the end of the insert -/// source's reverse use-def chain is eliminated if: -/// * On the reverse use-def chain path from the InsertSliceOp to the -/// InitTensorOp, all ops were decided to bufferize inplace and the buffer -/// relation is "equivalent" (TODO: can be relaxed if needed). -/// * The reverse use-def chain has exactly one end, which is the InitTensorOp. -LogicalResult mlir::linalg::insertSliceAnchoredInitTensorEliminationStep( - RewriterBase &rewriter, Operation *op, AnalysisState &state) { - return eliminateInitTensors( - rewriter, op, state, - /*anchorMatchFunc=*/ - [&](OpOperand &operand, SmallVector &neededValues) { - auto insertSliceOp = - dyn_cast(operand.getOwner()); - if (!insertSliceOp) - return false; - if (&operand != &insertSliceOp->getOpOperand(0) /*source*/) - return false; - - // Collect all values that are needed to construct the replacement op. - neededValues.append(insertSliceOp.offsets().begin(), - insertSliceOp.offsets().end()); - neededValues.append(insertSliceOp.sizes().begin(), - insertSliceOp.sizes().end()); - neededValues.append(insertSliceOp.strides().begin(), - insertSliceOp.strides().end()); - neededValues.push_back(insertSliceOp.dest()); - - return true; - }, - /*rewriteFunc=*/ - [](OpBuilder &b, Location loc, OpOperand &operand) { - auto insertOp = cast(operand.getOwner()); - // Expand offsets, sizes and strides to the full rank to handle the - // rank-reducing case. - SmallVector mixedOffsets = insertOp.getMixedOffsets(); - SmallVector mixedSizes = insertOp.getMixedSizes(); - SmallVector mixedStrides = insertOp.getMixedStrides(); - OffsetSizeAndStrideOpInterface::expandToRank( - insertOp.dest(), mixedOffsets, mixedSizes, mixedStrides, - [&](Value target, int64_t dim) -> OpFoldResult { - auto shapedType = target.getType().cast(); - if (shapedType.isDynamicDim(dim)) - return b.create(loc, target, dim).result(); - return b.getIndexAttr(shapedType.getDimSize(dim)); - }); - auto t = tensor::ExtractSliceOp::inferRankReducedResultType( - insertOp.getSourceType().getRank(), - insertOp.dest().getType().cast(), mixedOffsets, - mixedSizes, mixedStrides); - auto extractOp = b.create( - loc, t, insertOp.dest(), mixedOffsets, mixedSizes, mixedStrides); - return extractOp.result(); - }); -} - void mlir::linalg::registerBufferizableOpInterfaceExternalModels( DialectRegistry ®istry) { registry.addExtension(+[](MLIRContext *ctx, linalg::LinalgDialect *dialect) { - linalg::InitTensorOp::attachInterface(*ctx); - // Register all Linalg structured ops. `LinalgOp` is an interface and it is // not possible to attach an external interface to an existing interface. // Therefore, attach the `BufferizableOpInterface` to all ops one-by-one. diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -13,7 +13,7 @@ Generalization.cpp Hoisting.cpp HoistPadding.cpp - InitTensorElimination.cpp + InitTensorToAllocTensor.cpp InlineScalarOperands.cpp Interchange.cpp Loops.cpp diff --git a/mlir/lib/Dialect/Linalg/Transforms/InitTensorElimination.cpp b/mlir/lib/Dialect/Linalg/Transforms/InitTensorElimination.cpp deleted file mode 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/InitTensorElimination.cpp +++ /dev/null @@ -1,50 +0,0 @@ -//===- ComprehensiveBufferize.cpp - Single pass bufferization -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "PassDetail.h" - -#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" -#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h" -#include "mlir/Dialect/Linalg/Passes.h" -#include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Pass/Pass.h" - -using namespace mlir; -using namespace mlir::bufferization; -using namespace mlir::linalg; - -namespace { -struct LinalgInitTensorElimination - : public LinalgInitTensorEliminationBase { - LinalgInitTensorElimination() = default; - - void runOnOperation() override; - - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - } -}; -} // namespace - -void LinalgInitTensorElimination::runOnOperation() { - Operation *op = getOperation(); - OneShotBufferizationOptions options; - OneShotAnalysisState state(op, options); - if (failed(analyzeOp(op, state))) { - signalPassFailure(); - return; - } - - IRRewriter rewriter(op->getContext()); - if (failed(insertSliceAnchoredInitTensorEliminationStep(rewriter, op, state))) - signalPassFailure(); -} - -std::unique_ptr mlir::createLinalgInitTensorEliminationPass() { - return std::make_unique(); -} diff --git a/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp b/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/InitTensorToAllocTensor.cpp @@ -0,0 +1,55 @@ +//===- InitTensorToAllocTensor.cpp - Lower init_tensor to alloc_tensor ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" + +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Linalg/Passes.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +using namespace mlir; +using namespace mlir::bufferization; +using namespace mlir::linalg; + +namespace { +struct InitTensorLoweringPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(InitTensorOp op, + PatternRewriter &rewriter) const override { + rewriter.replaceOpWithNewOp( + op, op.getMixedSizes(), op.getType().getElementType()); + return success(); + } +}; + +struct LinalgInitTensorToAllocTensor + : public LinalgInitTensorToAllocTensorBase { + LinalgInitTensorToAllocTensor() = default; + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry + .insert(); + } +}; +} // namespace + +void LinalgInitTensorToAllocTensor::runOnOperation() { + Operation *op = getOperation(); + RewritePatternSet patterns(op->getContext()); + patterns.insert(op->getContext()); + if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns)))) + signalPassFailure(); +} + +std::unique_ptr mlir::createLinalgInitTensorToAllocTensorPass() { + return std::make_unique(); +} diff --git a/mlir/python/mlir/dialects/BufferizationOps.td b/mlir/python/mlir/dialects/BufferizationOps.td new file mode 100644 --- /dev/null +++ b/mlir/python/mlir/dialects/BufferizationOps.td @@ -0,0 +1,15 @@ +//===-- BufferizationOps.td - Entry point for BufferizationOps bindings ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef PYTHON_BINDINGS_BUFFERIZATION_OPS +#define PYTHON_BINDINGS_BUFFERIZATION_OPS + +include "mlir/Bindings/Python/Attributes.td" +include "mlir/Dialect/Bufferization/IR/BufferizationOps.td" + +#endif diff --git a/mlir/python/mlir/dialects/_bufferization_ops_ext.py b/mlir/python/mlir/dialects/_bufferization_ops_ext.py new file mode 100644 --- /dev/null +++ b/mlir/python/mlir/dialects/_bufferization_ops_ext.py @@ -0,0 +1,51 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +try: + from typing import Sequence, Union + from ..ir import * + from ._ods_common import get_default_loc_context as _get_default_loc_context + + from typing import Any, List, Union +except ImportError as e: + raise RuntimeError("Error loading imports from extension module") from e + + +class AllocTensorOp: + """Extends the bufferization.alloc_tensor op.""" + + def __init__(self, + sizes: Union[Sequence[int], Sequence[Value]], + element_type: Type, + *, + loc=None, + ip=None): + """Constructs an `alloc_tensor` with either static or dynamic sizes.""" + context = get_default_loc_context(loc) + operands = [] + attributes = {} + # TODO: Refactor the AllocTensorOp to take an element type attribute and + # then use normal result type inference, unifying the Python and C++ side + # with a standard mechanism (versus stashing that in builders). + if sizes and isinstance(sizes[0], Value): + # Dynamic sizes. + operands.extend(sizes) + static_size_ints = [-1] * len(sizes) + result_type = RankedTensorType.get(static_size_ints, element_type) + else: + # Static sizes. + result_type = RankedTensorType.get(sizes, element_type) + static_size_ints = sizes + + i64_type = IntegerType.get_signless(64) + attributes["static_sizes"] = ArrayAttr.get( + [IntegerAttr.get(i64_type, s) for s in static_size_ints], + context=context) + op = self.build_generic( + results=[result_type], + operands=operands, + attributes=attributes, + loc=loc, + ip=ip) + OpView.__init__(self, op) diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-init-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir rename from mlir/test/Dialect/Linalg/one-shot-bufferize-init-tensor-elimination.mlir rename to mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir --- a/mlir/test/Dialect/Linalg/one-shot-bufferize-init-tensor-elimination.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-eliminate-init-tensors -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs" -canonicalize -split-input-file | FileCheck %s +// RUN: mlir-opt %s -eliminate-alloc-tensors -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs" -canonicalize -split-input-file | FileCheck %s // CHECK: func @buffer_forwarding_conflict( // CHECK-SAME: %[[FUNC_ARG:[0-9a-zA-Z]*]]: memref @@ -14,11 +14,11 @@ // This allocs the whole dim to allow for a full clone of t. // CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM]]) - // init_tensor itself does not alloc but forwards to the **second** - // insert_slice. InitTensorOp replaces the init_tensor with an out-of-place + // alloc_tensor itself does not alloc but forwards to the **second** + // insert_slice. AllocTensorOp replaces the alloc_tensor with an out-of-place // extract_slice. // CHECK: %[[EXTRACT_SLICE_ALLOC:.*]] = memref.alloc(%[[sz]]) - %a = linalg.init_tensor[%sz] : tensor + %a = bufferization.alloc_tensor[%sz] : tensor // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[EXTRACT_SLICE_ALLOC]] : memref) %f = linalg.fill ins(%f0 : f32) outs(%a : tensor) -> tensor @@ -47,10 +47,10 @@ { %f0 = arith.constant 0.0: f32 - // init_tensor itself does not alloc but forwards to the insert_slice. - // InitTensorOp replaces the init_tensor with an inplace extract_slice. + // alloc_tensor itself does not alloc but forwards to the insert_slice. + // InitTensorOp replaces the alloc_tensor with an inplace extract_slice. // CHECK: %[[T_SUBVIEW:.*]] = memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1] - %a = linalg.init_tensor[%sz] : tensor + %a = bufferization.alloc_tensor[%sz] : tensor // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW]] : memref) -> tensor @@ -71,7 +71,7 @@ %c5 = arith.constant 5 : index // CHECK-NOT: memref.alloc - %blank = linalg.init_tensor [5] : tensor<5xf32> + %blank = bufferization.alloc_tensor [5] : tensor<5xf32> // CHECK: scf.for %[[iv:.*]] = %{{.*}} to %[[sz]] step %{{.*}} { %r = scf.for %iv = %c0 to %sz step %c5 iter_args(%bb = %t) -> (tensor) { @@ -102,7 +102,7 @@ // CHECK-NOT: memref.alloc // CHECK: %[[subview:.*]] = memref.subview %[[t]][%[[idx]]] [5] [1] - %blank = linalg.init_tensor [5] : tensor<5xf32> + %blank = bufferization.alloc_tensor [5] : tensor<5xf32> // CHECK: scf.for %[[iv:.*]] = %{{.*}} to %[[sz]] step %{{.*}} { %r = scf.for %iv = %c0 to %sz step %c5 iter_args(%bb = %t) -> (tensor) { diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir @@ -16,7 +16,7 @@ // CHECK-NOT: dealloc // CHECK: scf.yield %[[casted]] %sz = "test.some_op"() : () -> (index) - %0 = linalg.init_tensor[%sz] : tensor + %0 = bufferization.alloc_tensor[%sz] : tensor scf.yield %0 : tensor } else { // CHECK: } else { diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir @@ -138,11 +138,11 @@ %idx = arith.constant 0 : index %cst = arith.constant 5.0 : f32 - // One alloc for the init_tensor, another one because the transfer_write + // One alloc for the alloc_tensor, another one because the transfer_write // bufferizes out-of-place. // CHECK: %[[m1:.*]] = memref.alloc() {{.*}} : memref<10xf32> // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10xf32> - %t1 = linalg.init_tensor [10] : tensor<10xf32> + %t1 = bufferization.alloc_tensor [10] : tensor<10xf32> // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[m1]] // CHECK: %[[filled_tensor:.*]] = bufferization.to_tensor %[[m1]] diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir @@ -44,7 +44,7 @@ // CHECK-LABEL: func @func_without_tensor_args func.func @func_without_tensor_args(%v : vector<10xf32>) -> () { // CHECK: %[[alloc:.*]] = memref.alloc() - %0 = linalg.init_tensor[10] : tensor<10xf32> + %0 = bufferization.alloc_tensor[10] : tensor<10xf32> %c0 = arith.constant 0 : index // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] @@ -97,7 +97,7 @@ // CHECK-LABEL: func @copy_deallocated( func.func @copy_deallocated() -> tensor<10xf32> { // CHECK: %[[alloc:.*]] = memref.alloc() - %0 = linalg.init_tensor[10] : tensor<10xf32> + %0 = bufferization.alloc_tensor[10] : tensor<10xf32> // CHECK: %[[alloc_tensor:.*]] = bufferization.to_tensor %[[alloc]] // CHECK: memref.dealloc %[[alloc]] // CHECK: return %[[alloc_tensor]] @@ -111,7 +111,7 @@ func.func @select_different_tensors(%t: tensor, %sz: index, %c: i1) -> tensor { // CHECK-DAG: %[[m:.*]] = bufferization.to_memref %[[t]] : memref // CHECK-DAG: %[[alloc:.*]] = memref.alloc(%{{.*}}) {{.*}} : memref - %0 = linalg.init_tensor [%sz] : tensor + %0 = bufferization.alloc_tensor [%sz] : tensor // A cast must be inserted because %t and %0 have different memref types. // CHECK: %[[casted:.*]] = memref.cast %[[alloc]] : memref to memref diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir @@ -17,7 +17,7 @@ // CHECK: %[[alloc:.*]] = memref.alloc // CHECK: return %[[alloc]] func.func @create_tensor() -> tensor<10xf32> { - %0 = linalg.init_tensor [10] : tensor<10xf32> + %0 = bufferization.alloc_tensor [10] : tensor<10xf32> return %0 : tensor<10xf32> } diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir @@ -682,7 +682,7 @@ %cst_0 = arith.constant 0.000000e+00 : f32 %cst_1 = arith.constant 1.000000e+00 : f32 - %7 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %7 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} @@ -720,7 +720,7 @@ %cst_0 = arith.constant 0.000000e+00 : f32 %cst_1 = arith.constant 1.000000e+00 : f32 - %7 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %7 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} @@ -1246,19 +1246,19 @@ // ----- -// CHECK-LABEL: func @write_to_same_init_tensor_in_place( -func.func @write_to_same_init_tensor_in_place( +// CHECK-LABEL: func @write_to_same_alloc_tensor_in_place( +func.func @write_to_same_alloc_tensor_in_place( %A : tensor {linalg.inplaceable = true}, %lb : index, %ub : index, %step : index, %sz: index, %sz2: index) -> (tensor) { - %B = linalg.init_tensor [%sz2] : tensor + %B = bufferization.alloc_tensor [%sz2] : tensor // CHECK: scf.for {{.*}} { %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { %i2 = arith.index_cast %i : index to i32 %i3 = arith.sitofp %i2 : i32 to f32 - // %B is written multiple times inside a loop, but it is an init_tensor. + // %B is written multiple times inside a loop, but it is an alloc_tensor. // CHECK: tensor.insert // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]} %B2 = tensor.insert %i3 into %B[%i] : tensor @@ -1274,13 +1274,13 @@ // ----- -// CHECK-LABEL: func @write_to_same_init_tensor_out_of_place( -func.func @write_to_same_init_tensor_out_of_place( +// CHECK-LABEL: func @write_to_same_alloc_tensor_out_of_place( +func.func @write_to_same_alloc_tensor_out_of_place( %A : tensor {linalg.inplaceable = true}, %lb : index, %ub : index, %step : index, %sz: index, %sz2: index, %f: f32) -> (tensor) { - %B = linalg.init_tensor [%sz2] : tensor + %B = bufferization.alloc_tensor [%sz2] : tensor %C = tensor.insert %f into %B[%lb] : tensor // CHECK: scf.for {{.*}} { @@ -1288,8 +1288,8 @@ %i2 = arith.index_cast %i : index to i32 %i3 = arith.sitofp %i2 : i32 to f32 // %C is written multiple times inside a loop. Even though %C aliases with - // an init_tensor, out-of-bounds bufferization is necessary because there is - // another alias (%C) outside of the loop. + // an alloc_tensor, out-of-bounds bufferization is necessary because there + // is another alias (%C) outside of the loop. // CHECK: tensor.insert // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]} %B2 = tensor.insert %i3 into %C[%i] : tensor diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir @@ -60,7 +60,7 @@ scf.yield %t1 : tensor } else { // This buffer aliases. - %t2 = linalg.init_tensor [%idx] : tensor + %t2 = bufferization.alloc_tensor [%idx] : tensor // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}} scf.yield %t2 : tensor } @@ -221,7 +221,7 @@ func.func @mini_test_case1() -> tensor<10x20xf32> { %f0 = arith.constant 0.0 : f32 - %t = linalg.init_tensor [10, 20] : tensor<10x20xf32> + %t = bufferization.alloc_tensor [10, 20] : tensor<10x20xf32> %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<10x20xf32>) -> tensor<10x20xf32> // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}} return %r : tensor<10x20xf32> @@ -274,7 +274,7 @@ // ----- func.func @foo(%t : tensor<5xf32>) -> (tensor<5xf32>) { - %0 = linalg.init_tensor [5] : tensor<5xf32> + %0 = bufferization.alloc_tensor [5] : tensor<5xf32> // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}} return %0 : tensor<5xf32> } @@ -291,7 +291,7 @@ func.func @destination_passing_style_dominance_test_1(%cst : f32, %idx : index, %idx2 : index) -> f32 { %0 = scf.execute_region -> tensor { - %1 = linalg.init_tensor [%idx] : tensor + %1 = bufferization.alloc_tensor [%idx] : tensor // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}} scf.yield %1 : tensor } @@ -304,7 +304,7 @@ func.func @destination_passing_style_dominance_test_2(%cst : f32, %idx : index, %idx2 : index) -> f32 { - %1 = linalg.init_tensor [%idx] : tensor + %1 = bufferization.alloc_tensor [%idx] : tensor %0 = scf.execute_region -> tensor { // This YieldOp is in destination-passing style, thus no error. diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir @@ -70,7 +70,7 @@ // CHECK-FULLY-DYNAMIC-LAYOUT-MAP-SAME: #[[$map2a]]> { func.func @return_extract_slice(%idx: index, %sz: index) -> (tensor<2x?xf32>) { - %t = linalg.init_tensor [20, 10] : tensor<20x10xf32> + %t = bufferization.alloc_tensor [20, 10] : tensor<20x10xf32> %0 = tensor.extract_slice %t[%idx, %idx][2, %sz][1, 1] : tensor<20x10xf32> to tensor<2x?xf32> return %0 : tensor<2x?xf32> @@ -120,7 +120,7 @@ // CHECK-LABEL: func @func_without_tensor_args func.func @func_without_tensor_args(%v : vector<10xf32>) -> () { // CHECK: %[[alloc:.*]] = memref.alloc() - %0 = linalg.init_tensor[10] : tensor<10xf32> + %0 = bufferization.alloc_tensor[10] : tensor<10xf32> %c0 = arith.constant 0 : index // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] @@ -456,9 +456,9 @@ // CHECK-DAG: %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]> // CHECK-DAG: %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]> // CHECK-DAG: %[[cC:.*]] = memref.cast %[[C]] : memref to memref - %A = linalg.init_tensor [64] : tensor<64xf32> - %B = linalg.init_tensor [64] : tensor<64xf32> - %C = linalg.init_tensor [] : tensor + %A = bufferization.alloc_tensor [64] : tensor<64xf32> + %B = bufferization.alloc_tensor [64] : tensor<64xf32> + %C = bufferization.alloc_tensor [] : tensor // CHECK-DAG: linalg.fill ins(%[[C1]] : f32) outs(%[[A]] : memref<64xf32>) // CHECK-DAG: linalg.fill ins(%[[C2]] : f32) outs(%[[B]] : memref<64xf32>) diff --git a/mlir/test/Dialect/Bufferization/canonicalize.mlir b/mlir/test/Dialect/Bufferization/canonicalize.mlir --- a/mlir/test/Dialect/Bufferization/canonicalize.mlir +++ b/mlir/test/Dialect/Bufferization/canonicalize.mlir @@ -243,3 +243,16 @@ // CHECK: %[[RES:.*]] = tensor.extract %[[TENSOR]][%[[IDX0]], %[[IDX1]]] // CHECK-NOT: memref.load // CHECK: return %[[RES]] : f32 + + +// ----- + +func.func @alloc_tensor_canonicalize() -> (tensor<4x5x?xf32>) { + %c6 = arith.constant 6 : index + %0 = bufferization.alloc_tensor [4, 5, %c6] : tensor<4x5x?xf32> + return %0 : tensor<4x5x?xf32> +} +// CHECK: func @alloc_tensor_canonicalize +// CHECK: %[[T0:.+]] = bufferization.alloc_tensor [4, 5, 6] : tensor<4x5x6xf32> +// CHECK: %[[T1:.+]] = tensor.cast %[[T0]] : tensor<4x5x6xf32> to tensor<4x5x?xf32> +// CHECK: return %[[T1]] diff --git a/mlir/test/Dialect/Bufferization/invalid.mlir b/mlir/test/Dialect/Bufferization/invalid.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Dialect/Bufferization/invalid.mlir @@ -0,0 +1,26 @@ +// RUN: mlir-opt %s -split-input-file -verify-diagnostics + +func.func @alloc_tensor_err(%arg0 : index, %arg1 : index) +{ + // expected-error @+1 {{specified type 'tensor<4x?x?x5xf32>' does not match the inferred type 'tensor<4x5x?x?xf32>'}} + %1 = bufferization.alloc_tensor [4, 5, %arg0, %arg1] : tensor<4x?x?x5xf32> + return +} + +// ----- + +func.func @alloc_tensor_err(%arg0 : index) +{ + // expected-error @+1 {{expected 4 sizes values}} + %1 = bufferization.alloc_tensor [4, 5, %arg0] : tensor<4x?x?x5xf32> + return +} + +// ----- + +func.func @alloc_tensor_err(%arg0 : index) +{ + // expected-error @+1 {{expected 2 dynamic sizes values}} + %1 = "bufferization.alloc_tensor"(%arg0) {static_sizes = [4, -1, -1, 5]} : (index) -> tensor<4x?x?x5xf32> + return +} diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir --- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only bufferize-function-boundaries" -split-input-file | FileCheck %s -/// All combinations of matmul(fill(extract(init_tensor)), fill(extract(%init_tensor)), %arg2) +/// All combinations of matmul(fill(extract(alloc_tensor)), fill(extract(%alloc_tensor)), %arg2) /// These should all be inplaceable except the first op. // ----- @@ -15,7 +15,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -42,7 +42,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -69,7 +69,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -96,7 +96,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -123,7 +123,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -150,7 +150,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -177,7 +177,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -204,7 +204,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -231,7 +231,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -258,7 +258,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -285,7 +285,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -312,7 +312,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> @@ -339,7 +339,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -366,7 +366,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -392,7 +392,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -419,7 +419,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -446,7 +446,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -473,7 +473,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> @@ -500,7 +500,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> @@ -527,7 +527,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> @@ -554,7 +554,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> @@ -581,7 +581,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> @@ -608,7 +608,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> @@ -635,7 +635,7 @@ %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 1.000000e+00 : f32 - %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> + %0 = bufferization.alloc_tensor [256, 256] : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir --- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-eliminate-init-tensors -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s +// RUN: mlir-opt %s -eliminate-alloc-tensors -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s //===----------------------------------------------------------------------===// // InitTensorOp elimination @@ -10,7 +10,7 @@ // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"] // Instead of allocating, share buffer with some inplace bufferization? - %0 = linalg.init_tensor [%arg1] : tensor + %0 = bufferization.alloc_tensor [%arg1] : tensor // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] @@ -37,7 +37,7 @@ // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"] // Instead of allocating, share buffer with some inplace bufferization? - %0 = linalg.init_tensor [%arg1] : tensor + %0 = bufferization.alloc_tensor [%arg1] : tensor // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir @@ -339,7 +339,7 @@ // ----- //===----------------------------------------------------------------------===// -// InitTensorOp elimination would produce SSA violations for the example below. +// AllocTensorOp elimination would produce SSA violations for the example below. //===----------------------------------------------------------------------===// func.func @depthwise_conv_1d_nwc_wc(%arg0: index, %arg1: index, %arg2: tensor<8x18x32xf32>) @@ -347,9 +347,9 @@ %c0 = arith.constant 0 : index %c32 = arith.constant 32 : index %c8 = arith.constant 8 : index - %0 = linalg.init_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32> + %0 = bufferization.alloc_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32> %1 = tensor.cast %0 : tensor<4x1x6x8xf32> to tensor - %2 = linalg.init_tensor [1, 6, 8] : tensor<1x6x8xf32> + %2 = bufferization.alloc_tensor [1, 6, 8] : tensor<1x6x8xf32> %3 = scf.for %arg3 = %c0 to %c32 step %c8 iter_args(%arg4 = %1) -> (tensor) { %4 = affine.apply affine_map<(d0) -> (d0 ceildiv 8)>(%arg3) %5 = tensor.insert_slice %2 into %arg4[%4,0, 0, 0] [1, 1, 6, 8] [1, 1, 1, 1] : @@ -361,8 +361,8 @@ // ----- -// CHECK-LABEL: func @do_not_copy_init_tensors( -func.func @do_not_copy_init_tensors(%f1: f32, %f2: f32, %idx: index) +// CHECK-LABEL: func @do_not_copy_alloc_tensors( +func.func @do_not_copy_alloc_tensors(%f1: f32, %f2: f32, %idx: index) -> (tensor<5xf32>, tensor<5xf32>) { // CHECK: memref.alloc @@ -370,7 +370,7 @@ // CHECK-NOT: copy // CHECK: memref.store // CHECK: memref.store - %0 = linalg.init_tensor [5] : tensor<5xf32> + %0 = bufferization.alloc_tensor [5] : tensor<5xf32> %1 = tensor.insert %f1 into %0[%idx] : tensor<5xf32> %2 = tensor.insert %f2 into %0[%idx] : tensor<5xf32> return %1, %2 : tensor<5xf32>, tensor<5xf32> diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir --- a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir @@ -583,7 +583,7 @@ { // CHECK: scf.for {{.*}} { %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { - %B = linalg.init_tensor [%sz] : tensor + %B = bufferization.alloc_tensor [%sz] : tensor %i2 = arith.index_cast %i : index to i32 %i3 = arith.sitofp %i2 : i32 to f32 // The tensor.insert is in-place because the %B is defined inside the loop. diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir --- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir @@ -220,7 +220,7 @@ // CHECK: return %[[r]] func.func @scf_execute_region_yield_non_equivalent(%i: index, %j: index) -> f32 { %r = scf.execute_region -> (tensor) { - %t2 = linalg.init_tensor [%i] : tensor + %t2 = bufferization.alloc_tensor [%i] : tensor scf.yield %t2 : tensor } %f = tensor.extract %r[%j] : tensor @@ -261,7 +261,7 @@ // CHECK-SAME: %[[t:.*]]: memref, %lb : index, %ub : index, %step : index) -> tensor { %r = scf.for %i = %lb to %ub step %step iter_args(%a = %t) -> tensor { - %t2 = linalg.init_tensor [%i] : tensor + %t2 = bufferization.alloc_tensor [%i] : tensor scf.yield %t2 : tensor } diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir @@ -179,9 +179,9 @@ %c8 = arith.constant 8 : index %c32 = arith.constant 32 : index %c0 = arith.constant 0 : index - %0 = linalg.init_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32> + %0 = bufferization.alloc_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32> %1 = tensor.cast %0 : tensor<4x1x6x8xf32> to tensor - %2 = linalg.init_tensor [1, 6, 8] : tensor<1x6x8xf32> + %2 = bufferization.alloc_tensor [1, 6, 8] : tensor<1x6x8xf32> %5 = scf.for %arg7 = %c0 to %c32 step %c8 iter_args(%arg8 = %1) -> (tensor) { %7 = affine.apply affine_map<(d0) -> (d0 ceildiv 8)>(%arg7) %8 = tensor.extract_slice %arg0[%i, %j, %arg7] [1, 6, 8] [1, 1, 1] : tensor<8x18x32xf32> to tensor<1x6x8xf32> diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir @@ -16,7 +16,7 @@ %c0 = arith.constant 0 : index %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor) -> tensor %1 = affine.apply #map0(%c0, %c64)[%c2] - %2 = linalg.init_tensor [%1, 2] : tensor + %2 = bufferization.alloc_tensor [%1, 2] : tensor %3 = scf.for %arg3 = %c0 to %c64 step %c2 iter_args(%arg4 = %2) -> (tensor) { %8 = affine.apply #map1(%arg3, %c0)[%c2] %9 = tensor.extract_slice %arg1[%arg3] [2] [1] : tensor<64xf32> to tensor<2xf32> @@ -33,7 +33,7 @@ // call @printMemrefF32(%B) : (tensor<*xf32>) -> () %4 = affine.apply #map0(%c0, %c64)[%c2] - %5 = linalg.init_tensor [%4, 2] : tensor + %5 = bufferization.alloc_tensor [%4, 2] : tensor %6 = scf.for %arg3 = %c0 to %c64 step %c2 iter_args(%arg4 = %5) -> (tensor) { %8 = affine.apply #map1(%arg3, %c0)[%c2] %9 = tensor.extract_slice %arg0[%arg3] [2] [1] : tensor<64xf32> to tensor<2xf32> @@ -80,9 +80,9 @@ %v1 = arith.constant 1.0 : f32 %v2 = arith.constant 2.0 : f32 - %A = linalg.init_tensor [64] : tensor<64xf32> - %B = linalg.init_tensor [64] : tensor<64xf32> - %C = linalg.init_tensor [] : tensor + %A = bufferization.alloc_tensor [64] : tensor<64xf32> + %B = bufferization.alloc_tensor [64] : tensor<64xf32> + %C = bufferization.alloc_tensor [] : tensor %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32> %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32> %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor) -> tensor diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt %s -test-linalg-transform-patterns=test-linalg-to-vector-patterns \ -// RUN: -linalg-bufferize -arith-bufferize -tensor-bufferize -func-bufferize \ +// RUN: -linalg-init-tensor-to-alloc-tensor -linalg-bufferize -arith-bufferize \ +// RUN: -bufferization-bufferize -tensor-bufferize -func-bufferize \ // RUN: -finalizing-bufferize -buffer-deallocation \ // RUN: -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -8692,6 +8692,7 @@ deps = [ ":AllocationOpInterfaceTdFiles", ":CopyOpInterfaceTdFiles", + ":InferTypeOpInterfaceTdFiles", ":OpBaseTdFiles", ":SideEffectInterfacesTdFiles", ], @@ -8755,6 +8756,7 @@ ], includes = ["include"], deps = [ + ":Affine", ":AllocationOpInterface", ":ArithmeticDialect", ":BufferizableOpInterfaceIncGen", @@ -8763,6 +8765,7 @@ ":CopyOpInterface", ":FuncDialect", ":IR", + ":InferTypeOpInterface", ":MemRefDialect", ":Support", ":TensorDialect", @@ -8809,6 +8812,7 @@ ":LoopLikeInterface", ":MemRefDialect", ":Pass", + ":TensorDialect", ":Transforms", "//llvm:Support", ],