diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td @@ -31,20 +31,54 @@ let description = [{ `bufferization.alloc_tensor` materializes an uninitialized tensor with a given shape (dynamic or static). It always bufferizes to a new buffer - allocation of the given shape. The optional `copy` operand specifies the - contents of the tensors. If no `copy` operand is specified, reading from the - result of an `alloc_tensor` op yields an undefined value. + allocation of the given shape. It is a helper op for bufferization. The + operation is provided as an anchor that marks the beginning of a new tensor + SSA use-def chain. It can be used to control in-place bufferization + decisions during One-Shot Bufferize: The bufferized result of a + `bufferization.alloc_tensor` does not alias with any other buffer, so it can + be used to resolve read-after-write conflicts that would have been + introduced by the in-place bufferization of another op. + + The region of the operation specifies how the contents of the tensor should + be initialized. The inititalizer region has a single block argument that + represents the allocated tensor. It can be used as a destination for DPS + operations. - If `copy` is specified, no dynamic sizes should be passed, since they are - the same as the dynamic sizes of the `copy` operand. + Example: + + ```mlir + %0 = bufferization.alloc_tensor(%sz) init { + ^bb0(%arg0: tensor): + bufferization.yield %t : tensor + } : tensor + ``` + + In the above example, the result is initialized with the contents of %t. + There is a shorter syntax for operations who's region has the terminator as + the only operation: + + ```mlir + %0 = bufferization.alloc_tensor(%sz) copy(%t) : tensor + ``` + + Initialization is optional: %arg0 could be yielded instead of %t. In that + case the result has no specified contents. The short syntax in that is: + + ```mlir + %0 = bufferization.alloc_tensor(%sz) : tensor + ``` + + Note: This operation acts as a hoisting barrier. The first example is + similar to writing the following IR. + + ```mlir + %0 = bufferization.alloc_tensor(%sz) : tensor + %1 = tensor.insert_slice %t into %0[0] [%sz] [1] + ``` - `alloc_tensor` is a helper op for bufferization. The operation is provided - as an anchor that marks the beginning of a new tensor SSA use-def chain. It - can be used to control in-place bufferization decisions during One-Shot - Bufferize: The bufferized result of a `bufferization.alloc_tensor` does not - alias with any other buffer, so it can be used to resolve read-after-write - conflicts that would have been introduced by the in-place bufferization of - another op. + However, the `tensor.insert_slice` is subject to canonicalizations and + foldings, such that there is no guarantee the allocated tensor will be + initialized with %t. The optional `memory_space` attribute specifies the memory space when bufferizing this op. The memory space is inferred from `copy` if specified. @@ -73,17 +107,16 @@ ``` ```mlir - %c = bufferization.alloc_tensor(%d1, %d2) size_hint = %noe + %c = bufferization.alloc_tensor(%d1, %d2) size_hint(%noe) : tensor ``` }]; let arguments = (ins Variadic:$dynamic_sizes, - Optional:$copy, Optional:$size_hint, OptionalAttr:$memory_space); - let results = (outs AnyTensor:$result); + let regions = (region SizedRegion<1>:$region); let extraClassDeclaration = [{ LogicalResult bufferize(RewriterBase &rewriter, @@ -94,19 +127,14 @@ bool bufferizesToAllocation(OpResult opResult) { return true; } - bool bufferizesToMemoryRead(OpOperand &opOperand, - const AnalysisState &state); - - bool bufferizesToMemoryWrite(OpOperand &opOperand, - const AnalysisState &state); - - AliasingOpResultList getAliasingOpResults( - OpOperand &opOperand, const AnalysisState &state); - FailureOr getBufferType( Value value, const BufferizationOptions &options, const DenseMap &fixedTypes); + bool isWritable(Value value, const AnalysisState &state) const { + return true; + } + RankedTensorType getType() { return getResult().getType().cast(); } @@ -120,7 +148,6 @@ // the tensor at dimension `idx`. Asserts that the shape is // dynamic at that `idx`. unsigned getIndexOfDynamicSize(unsigned idx) { - assert(!getCopy() && "no dim sizes specified when copying a tensor"); assert(isDynamicDim(idx) && "expected dynamic size"); ArrayRef shape = getType().getShape(); return std::count_if( @@ -132,29 +159,35 @@ // `idx`. Asserts that the shape is dynamic at that `idx. Value getDynamicSize(OpBuilder &b, unsigned idx); - // Assert that the size of the result tensor is static at `idx` - // and return the shape. - int64_t getStaticSize(unsigned idx) { - assert(!isDynamicDim(idx) && "expected static size"); - return getType().getShape()[idx]; - } + /// Return the terminator of the region. + YieldOp getTerminator(); + + /// Return the block argument of the region. + BlockArgument getBlockArgument() { return getRegion().getArgument(0); } + + /// Return true if the terminator is yielding a value that is different from + /// the region's block argument. I.e., the allocated tensor getting + /// initialized. + bool hasInitializer(); + + /// Return true if this op has an initializer and the terminator is the only + /// op in the region. + bool hasCopyInitializer(); }]; let builders = [ - // Build an op without `copy` or `memory_space` or `size_hint`. - OpBuilder<(ins "RankedTensorType":$type, "ValueRange":$dynamicSizes)>, - - // Build an op without `memory_space` or `size_hint`. + // Build an op without `size_hint`. OpBuilder<(ins "RankedTensorType":$type, "ValueRange":$dynamicSizes, - "Value":$copy)>, + CArg<"Attribute", "{}">:$memory_space)>, - // Build an op without `size_hint`. - OpBuilder<(ins "TensorType":$type, "ValueRange":$dynamicSizes, - "Value":$copy, "IntegerAttr":$memory_space)>, + OpBuilder<(ins "RankedTensorType":$type, "ValueRange":$dynamicSizes, + "Value":$size_hint, CArg<"Attribute", "{}">:$memory_space, + CArg<"Value", "{}">:$copy)>, ]; - let hasCanonicalizer = 1; + let skipDefaultBuilders = 1; let hasCustomAssemblyFormat = 1; + let hasCanonicalizer = 1; let hasVerifier = 1; } @@ -433,4 +466,39 @@ let hasCanonicalizer = 1; } +def Bufferization_YieldOp : Bufferization_Op<"yield", + [BufferizableOpInterface, Terminator]> { + let summary = "yield operation"; + let description = [{ + This is the terminator operation for `bufferization.alloc_tensor`. It yields + a single tensor value. + }]; + + let arguments = (ins AnyTensor:$tensor); + let assemblyFormat = "$tensor attr-dict `:` type($tensor)"; + + let extraClassDeclaration = [{ + AliasingOpResultList getAliasingOpResults( + OpOperand &opOperand, const AnalysisState &state) const { + return {}; + } + + bool bufferizesToMemoryRead(OpOperand &opOperand, + const AnalysisState &state) const { + return true; + } + + bool bufferizesToMemoryWrite(OpOperand &opOperand, + const AnalysisState &state) const { + return false; + } + + LogicalResult bufferize(RewriterBase &rewriter, + const BufferizationOptions &options) { + // Bufferized as part of bufferization.alloc_tensor. + return success(); + } + }]; +} + #endif // BUFFERIZATION_OPS diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -132,34 +132,34 @@ } RankedTensorType tensorType = tensor.getType().cast(); SmallVector dynamicSizes; - if (!copy) { - // Compute the dynamic part of the shape. - // First try to query the shape via ReifyRankedShapedTypeOpInterface. - bool reifiedShapes = false; - if (shapedValue.getType().isa() && - shapedValue.isa()) { - if (auto rankedOp = dyn_cast_or_null( - shapedValue.getDefiningOp())) { - ReifiedRankedShapedTypeDims resultDims; - if (succeeded(rankedOp.reifyResultShapes(b, resultDims))) { - reifiedShapes = true; - auto &shape = - resultDims[shapedValue.cast().getResultNumber()]; - for (const auto &dim : enumerate(tensorType.getShape())) - if (ShapedType::isDynamic(dim.value())) - dynamicSizes.push_back(shape[dim.index()]); - } + // Compute the dynamic part of the shape. + // First try to query the shape via ReifyRankedShapedTypeOpInterface. + bool reifiedShapes = false; + if (shapedValue.getType().isa() && + shapedValue.isa()) { + if (auto rankedOp = dyn_cast_or_null( + shapedValue.getDefiningOp())) { + ReifiedRankedShapedTypeDims resultDims; + if (succeeded(rankedOp.reifyResultShapes(b, resultDims))) { + reifiedShapes = true; + auto &shape = + resultDims[shapedValue.cast().getResultNumber()]; + for (const auto &dim : enumerate(tensorType.getShape())) + if (ShapedType::isDynamic(dim.value())) + dynamicSizes.push_back(shape[dim.index()]); } } - - // If the shape could not be reified, create DimOps. - if (!reifiedShapes) - populateDynamicDimSizes(b, loc, tensor, dynamicSizes); } + // If the shape could not be reified, create DimOps. + if (!reifiedShapes) + populateDynamicDimSizes(b, loc, tensor, dynamicSizes); + // Create AllocTensorOp. - auto allocTensorOp = b.create(loc, tensorType, dynamicSizes, - copy ? tensor : Value()); + auto allocTensorOp = + b.create(loc, tensorType, dynamicSizes, + /*sizeHint=*/Value(), /*memorySpace=*/Attribute(), + /*copy=*/copy ? tensor : Value()); allocTensorOp->setAttr(BufferizationDialect::kEscapeAttrName, b.getBoolArrayAttr({escape})); @@ -261,7 +261,9 @@ opResult.getUses(), [](OpOperand &use) { return &use; })); for (OpOperand *use : uses) { // Do not update the alloc_tensor op that we just created. - if (use->getOwner() == copy->getDefiningOp()) + if (copy->getDefiningOp() + .getRegion() + .findAncestorOpInRegion(*use->getOwner())) continue; // tensor.dim ops may have been created to be used as alloc_tensor op // dynamic extents. Do not update these either. diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp --- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp @@ -153,6 +153,7 @@ const BufferizationOptions &options) { OpBuilder::InsertionGuard g(rewriter); Location loc = getLoc(); + YieldOp yieldOp = getTerminator(); // Nothing to do for dead AllocTensorOps. if (getOperation()->getUses().empty()) { @@ -160,33 +161,40 @@ return success(); } - // Get "copy" buffer. - Value copyBuffer; - if (getCopy()) { - FailureOr maybeCopyBuffer = getBuffer(rewriter, getCopy(), options); - if (failed(maybeCopyBuffer)) - return failure(); - copyBuffer = *maybeCopyBuffer; - } - // Create memory allocation. auto allocType = bufferization::getBufferType(getResult(), options); if (failed(allocType)) return failure(); SmallVector dynamicDims = getDynamicSizes(); - if (getCopy()) { - assert(dynamicDims.empty() && "expected either `copy` or `dynamicDims`"); - populateDynamicDimSizes(rewriter, loc, copyBuffer, dynamicDims); - } FailureOr alloc = options.createAlloc( rewriter, loc, allocType->cast(), dynamicDims); if (failed(alloc)) return failure(); - // Create memory copy (if any). - if (getCopy()) { - if (failed(options.createMemCpy(rewriter, loc, copyBuffer, *alloc))) + if (hasInitializer()) { + // Bufferize terminator to memcpy. + Operation *yieldedDef = yieldOp.getTensor().getDefiningOp(); + if (yieldedDef && getRegion().findAncestorOpInRegion(*yieldedDef)) { + rewriter.setInsertionPointAfter(yieldedDef); + } else { + rewriter.setInsertionPoint(yieldOp); + } + auto cpySrcBuffer = getBuffer(rewriter, yieldOp.getTensor(), options); + if (failed(cpySrcBuffer)) + return failure(); + if (failed(options.createMemCpy(rewriter, yieldOp->getLoc(), *cpySrcBuffer, + *alloc))) return failure(); + rewriter.eraseOp(yieldOp); + + // Move region after the alloc. + rewriter.setInsertionPointAfter(getOperation()); + if (!getBlockArgument().getUses().empty()) { + Value bbargReplacement = rewriter.create(loc, *alloc); + rewriter.replaceAllUsesWith(getBlockArgument(), bbargReplacement); + } + getOperation()->getBlock()->getOperations().splice( + getOperation()->getIterator(), getRegion().front().getOperations()); } // Should the buffer be deallocated? @@ -208,46 +216,25 @@ bool AllocTensorOp::resultBufferizesToMemoryWrite(OpResult opResult, const AnalysisState &state) { - // AllocTensorOps do not write unless they have a `copy` value. - return static_cast(getCopy()); -} - -bool AllocTensorOp::bufferizesToMemoryRead(OpOperand &opOperand, - const AnalysisState &state) { - assert(opOperand.getOperandNumber() == getNumOperands() - 1 && - "expected copy operand"); - return true; -} - -bool AllocTensorOp::bufferizesToMemoryWrite(OpOperand &opOperand, - const AnalysisState &state) { - assert(opOperand.getOperandNumber() == getNumOperands() - 1 && - "expected copy operand"); - return false; -} - -AliasingOpResultList -AllocTensorOp::getAliasingOpResults(OpOperand &opOperand, - const AnalysisState &state) { - // This is a new allocation. It does not alias with any other buffer. - return {}; + // AllocTensorOps do not write unless they have an initializer. + return hasInitializer(); } FailureOr AllocTensorOp::getBufferType( Value value, const BufferizationOptions &options, const DenseMap &fixedTypes) { - assert(value == getResult() && "invalid value"); + assert(getOwnerOfValue(value) == getOperation() && "invalid value"); // Compute memory space of this allocation. Attribute memorySpace; if (getMemorySpace().has_value()) { memorySpace = *getMemorySpace(); - } else if (getCopy()) { - auto copyBufferType = - bufferization::getBufferType(getCopy(), options, fixedTypes); - if (failed(copyBufferType)) + } else if (hasCopyInitializer() && !value.isa()) { + FailureOr yieldedType = bufferization::getBufferType( + getTerminator().getTensor(), options, fixedTypes); + if (failed(yieldedType)) return failure(); - memorySpace = copyBufferType->getMemorySpace(); + memorySpace = yieldedType->getMemorySpace(); } else if (options.defaultMemorySpace.has_value()) { memorySpace = *options.defaultMemorySpace; } else { @@ -258,14 +245,10 @@ } LogicalResult AllocTensorOp::verify() { - if (getCopy() && !getDynamicSizes().empty()) - return emitError("dynamic sizes not needed when copying a tensor"); - if (!getCopy() && getType().getNumDynamicDims() != - static_cast(getDynamicSizes().size())) + if (getType().getNumDynamicDims() != + static_cast(getDynamicSizes().size())) return emitError("expected ") << getType().getNumDynamicDims() << " dynamic sizes"; - if (getCopy() && getCopy().getType() != getType()) - return emitError("expected that `copy` and return type match"); // For sparse tensor allocation, we require that none of its // uses escapes the function boundary directly. @@ -276,28 +259,44 @@ return emitError("sparse tensor allocation should not escape function"); } - return success(); -} + if (getRegion().getNumArguments() != 1) + return emitError("expected 1 block argument, found ") + << getRegion().getNumArguments(); -void AllocTensorOp::build(OpBuilder &builder, OperationState &result, - RankedTensorType type, ValueRange dynamicSizes) { - build(builder, result, type, dynamicSizes, /*copy=*/Value(), - /*size_hint=*/Value(), - /*memory_space=*/IntegerAttr()); + if (getRegion().getArgument(0).getType() != getType()) + return emitError("expected ") << getType() << " block argument"; + + return success(); } void AllocTensorOp::build(OpBuilder &builder, OperationState &result, RankedTensorType type, ValueRange dynamicSizes, - Value copy) { - build(builder, result, type, dynamicSizes, copy, /*size_hint=*/Value(), - /*memory_space=*/IntegerAttr()); + Attribute memorySpace) { + build(builder, result, type, dynamicSizes, /*size_hint=*/Value(), memorySpace, + /*copy=*/Value()); } void AllocTensorOp::build(OpBuilder &builder, OperationState &result, - TensorType type, ValueRange dynamicSizes, Value copy, - IntegerAttr memorySpace) { - build(builder, result, type, dynamicSizes, copy, /*size_hint=*/Value(), - memorySpace); + RankedTensorType type, ValueRange dynamicSizes, + Value sizeHint, Attribute memorySpace, Value copy) { + OpBuilder::InsertionGuard g(builder); + assert(type.getNumDynamicDims() == + static_cast(dynamicSizes.size()) && + "invalid number of dynamic dims"); + result.addOperands(dynamicSizes); + if (sizeHint) + result.addOperands(sizeHint); + result.addAttribute( + getOperandSegmentSizesAttrName(result.name), + builder.getDenseI32ArrayAttr( + {static_cast(dynamicSizes.size()), (sizeHint ? 1 : 0)})); + if (memorySpace) { + result.addAttribute(getMemorySpaceAttrName(result.name), memorySpace); + } + result.addTypes(type); + Region *r = result.addRegion(); + Block *block = builder.createBlock(r, r->begin(), type, result.location); + builder.create(result.location, copy ? copy : block->getArgument(0)); } namespace { @@ -317,7 +316,7 @@ LogicalResult matchAndRewrite(AllocTensorOp op, PatternRewriter &rewriter) const override { - if (op.getCopy()) + if (op.hasInitializer()) return failure(); SmallVector newShape = llvm::to_vector(op.getType().getShape()); SmallVector newDynamicSizes; @@ -338,7 +337,9 @@ if (newType == op.getType()) return failure(); auto newOp = rewriter.create( - op.getLoc(), newType, newDynamicSizes, /*copy=*/Value()); + op.getLoc(), newType, newDynamicSizes, /*sizeHint=*/op.getSizeHint(), + /*memorySpace=*/op.getMemorySpace().has_value() ? *op.getMemorySpace() + : Attribute()); rewriter.replaceOpWithNewOp(op, op.getType(), newOp); return success(); } @@ -373,63 +374,100 @@ llvm::seq(0, getType().getRank()), [&](int64_t dim) -> Value { if (isDynamicDim(dim)) return getDynamicSize(builder, dim); - return builder.create(getLoc(), - getStaticSize(dim)); + return builder.create( + getLoc(), getType().getDimSize(dim)); })); reifiedReturnShapes.emplace_back(std::move(shapes)); return success(); } ParseResult AllocTensorOp::parse(OpAsmParser &parser, OperationState &result) { - SmallVector dynamicSizesOperands; - if (parser.parseLParen() || parser.parseOperandList(dynamicSizesOperands) || - parser.parseRParen()) + Type indexType = parser.getBuilder().getIndexType(); + + // Parse dynamic sizes. + if (parser.parseLParen()) return failure(); + SmallVector dynamicSizesOperands; + SMLoc dynamicSizesLoc = parser.getCurrentLocation(); + if (parser.parseOperandList(dynamicSizesOperands) || parser.parseRParen() || + parser.resolveOperands(dynamicSizesOperands, indexType, dynamicSizesLoc, + result.operands)) + return failure(); + + // Parse optional copy operand. ParseResult copyKeyword = parser.parseOptionalKeyword("copy"); OpAsmParser::UnresolvedOperand copyOperand; if (copyKeyword.succeeded()) if (parser.parseLParen() || parser.parseOperand(copyOperand) || parser.parseRParen()) return failure(); + + // Parse optional size_hint operand. ParseResult sizeHintKeyword = parser.parseOptionalKeyword("size_hint"); OpAsmParser::UnresolvedOperand sizeHintOperand; if (sizeHintKeyword.succeeded()) - if (parser.parseEqual() || parser.parseOperand(sizeHintOperand)) + if (parser.parseLParen() || parser.parseOperand(sizeHintOperand) || + parser.parseRParen() || + parser.resolveOperand(sizeHintOperand, indexType, result.operands)) return failure(); - if (parser.parseOptionalAttrDict(result.attributes) || parser.parseColon()) + + // Parse attributes. + if (parser.parseOptionalAttrDict(result.attributes)) return failure(); + // Parse region. + Region *region = result.addRegion(); + ParseResult initKeyword; + if (copyKeyword.failed()) { + initKeyword = parser.parseOptionalKeyword("init"); + if (initKeyword.succeeded()) + if (parser.parseRegion(*region)) + return failure(); + } + + // Parse result type. TensorType type; - if (parser.parseCustomTypeWithFallback(type)) + if (parser.parseColon() || parser.parseCustomTypeWithFallback(type)) return failure(); result.addTypes(type); - Type indexType = parser.getBuilder().getIndexType(); - if (parser.resolveOperands(dynamicSizesOperands, indexType, result.operands)) - return failure(); - if (copyKeyword.succeeded()) - if (parser.resolveOperand(copyOperand, type, result.operands)) - return failure(); - if (sizeHintKeyword.succeeded()) - if (parser.resolveOperand(sizeHintOperand, indexType, result.operands)) - return failure(); - result.addAttribute(AllocTensorOp::getOperandSegmentSizeAttr(), + // Create region if none was parsed. + if (copyKeyword.succeeded() || initKeyword.failed()) { + OpBuilder b(parser.getContext()); + Block *block = + b.createBlock(region, region->begin(), type, result.location); + SmallVector copyValue; + if (copyKeyword.succeeded()) { + if (parser.resolveOperand(copyOperand, type, copyValue)) + return failure(); + } else { + copyValue.push_back(block->getArgument(0)); + } + b.create(result.location, copyValue[0]); + } + + // Add operand_segment_sizes. + result.addAttribute("operand_segment_sizes", parser.getBuilder().getDenseI32ArrayAttr( {static_cast(dynamicSizesOperands.size()), - static_cast(copyKeyword.succeeded()), static_cast(sizeHintKeyword.succeeded())})); return success(); } void AllocTensorOp::print(OpAsmPrinter &p) { p << "(" << getDynamicSizes() << ")"; - if (getCopy()) - p << " copy(" << getCopy() << ")"; + if (hasCopyInitializer()) + p << " copy(" << getTerminator().getTensor() << ")"; if (getSizeHint()) - p << " size_hint=" << getSizeHint(); + p << " size_hint(" << getSizeHint() << ")"; + if (hasInitializer() && !hasCopyInitializer()) { + p << " init "; + p.printRegion(getRegion()); + } p.printOptionalAttrDict((*this)->getAttrs(), /*elidedAttrs=*/{ AllocTensorOp::getOperandSegmentSizeAttr()}); - p << " : "; + p << ' ' << ":"; + p << ' '; auto type = getResult().getType(); if (auto validType = type.dyn_cast<::mlir::TensorType>()) p.printStrippedAttrOrType(validType); @@ -439,11 +477,22 @@ Value AllocTensorOp::getDynamicSize(OpBuilder &b, unsigned idx) { assert(isDynamicDim(idx) && "expected dynamic dim"); - if (getCopy()) - return b.create(getLoc(), getCopy(), idx); return getOperand(getIndexOfDynamicSize(idx)); } +YieldOp AllocTensorOp::getTerminator() { + return cast(getRegion().begin()->getTerminator()); +} + +bool AllocTensorOp::hasInitializer() { + return getTerminator().getTensor() != getBlockArgument(); +} + +bool AllocTensorOp::hasCopyInitializer() { + return hasInitializer() && + &getRegion().front().front() == getTerminator().getOperation(); +} + //===----------------------------------------------------------------------===// // CloneOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -357,9 +357,12 @@ protected: void notifyOperationRemoved(Operation *op) override { IRRewriter::notifyOperationRemoved(op); - erasedOps.insert(op); - // Erase if present. - toMemrefOps.erase(op); + // TODO: Remove this walk once we get notifications for nested removals. + op->walk([&](Operation *op) { + erasedOps.insert(op); + // Erase if present. + toMemrefOps.erase(op); + }); } void notifyOperationInserted(Operation *op) override { diff --git a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt @@ -21,6 +21,7 @@ MLIRBufferizationEnumsIncGen LINK_LIBS PUBLIC + MLIRArithDialect MLIRBufferizationDialect MLIRControlFlowInterfaces MLIRFuncDialect diff --git a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp --- a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp @@ -8,6 +8,7 @@ #include "mlir/Dialect/Bufferization/Transforms/Passes.h" +#include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" @@ -15,6 +16,7 @@ #include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h" #include "mlir/Dialect/Bufferization/Transforms/Transforms.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" namespace mlir { namespace bufferization { @@ -84,10 +86,16 @@ // Insert a tensor copy and replace all uses inside of repetitive regions. rewriter.setInsertionPoint(bufferizableOp); + Location loc = bufferizableOp->getLoc(); + auto tensorType = operand.getType().cast(); + SmallVector dynamicDims; + for (int64_t i = 0; i < tensorType.getRank(); ++i) + if (tensorType.isDynamicDim(i)) + dynamicDims.push_back(rewriter.create( + loc, operand, rewriter.create(loc, i))); auto tensorCopy = rewriter.create( - bufferizableOp->getLoc(), operand.getType().cast(), - /*dynamicSizes=*/ValueRange(), - /*copy=*/operand, /*memory_space=*/IntegerAttr()); + loc, tensorType, dynamicDims, /*sizeHint=*/Value(), + /*memory_space=*/Attribute(), /*copy=*/operand); for (OpOperand *use : usesInsideRegion) use->set(tensorCopy); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -736,7 +736,7 @@ const auto resType = getSparseTensorType(op); if (!resType.hasEncoding()) return failure(); - if (op.getCopy()) + if (op.hasInitializer()) return rewriter.notifyMatchFailure(op, "tensor copy not implemented"); // Construct allocation for each field. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -833,7 +833,7 @@ LogicalResult matchAndRewrite(bufferization::AllocTensorOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - if (op.getCopy()) + if (op.hasInitializer()) return rewriter.notifyMatchFailure(op, "sparse tensor copy not implemented"); Location loc = op.getLoc(); @@ -849,7 +849,7 @@ dimSizes.push_back( stt.isDynamicDim(d) ? adaptor.getOperands()[operandCtr++] - : constantIndex(rewriter, loc, op.getStaticSize(d))); + : constantIndex(rewriter, loc, op.getType().getDimSize(d))); } // Generate the call to construct empty tensor. The sizes are // explicitly defined by the arguments to the alloc operator. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -53,10 +53,9 @@ Value val = op->get(); // Check allocation, with zero alloc when required. if (auto alloc = val.getDefiningOp()) { - Value copy = alloc.getCopy(); - if (isZero) - return copy && isZeroValue(copy); - return !copy; + return isZero ? alloc.hasInitializer() && + isZeroValue(alloc.getTerminator().getTensor()) + : !alloc.hasInitializer(); } // Last resort for zero alloc: the whole value is zero. return isZero && isZeroValue(val); @@ -232,7 +231,9 @@ Value zero = constantZero(rewriter, op.getLoc(), op.getResult(0).getType()); AllocTensorOp a = op.getDpsInitOperand(0)->get().getDefiningOp(); - rewriter.updateRootInPlace(a, [&]() { a.getCopyMutable().assign(zero); }); + rewriter.updateRootInPlace(a.getTerminator(), [&]() { + a.getTerminator().getTensorMutable().assign(zero); + }); rewriter.replaceOp(op, op.getDpsInitOperand(0)->get()); return success(); } @@ -328,10 +329,13 @@ Value init = prod.getDpsInitOperand(0) ->get() .getDefiningOp() - .getCopy(); + .getTerminator() + .getTensor(); AllocTensorOp a = op.getDpsInitOperand(0)->get().getDefiningOp(); - rewriter.updateRootInPlace(a, [&]() { a.getCopyMutable().assign(init); }); + rewriter.updateRootInPlace(a.getTerminator(), [&]() { + a.getTerminator().getTensorMutable().assign(init); + }); } // Replace consumer with fused operation. Old producer // and consumer ops will be removed by DCE. @@ -390,11 +394,11 @@ // %t = sparse_tensor.cast %tmp Value nnz = rewriter.create(loc, srcTensor); RankedTensorType cooTp = getUnorderedCOOFromType(dstTp); - Value cooBuffer = - rewriter - .create(loc, cooTp, dstDynSizes, Value(), - /*sizeHint=*/nnz, Attribute()) - .getResult(); + Value cooBuffer = rewriter + .create(loc, cooTp, dstDynSizes, + /*sizeHint=*/nnz, + /*memorySpace=*/Attribute()) + .getResult(); ForeachOp foreachOp = rewriter.create( loc, srcTensor, cooBuffer, @@ -792,8 +796,9 @@ // Ensure that mutating `srcRTT` didn't invalidate `dimRank`. assert(static_cast(srcRTT.getRank()) == dimRank); tmpCoo = rewriter - .create(loc, srcRTT, dynSrcSizes, Value(), - /*sizeHint=*/nnz, Attribute()) + .create(loc, srcRTT, dynSrcSizes, + /*sizeHint=*/nnz, + /*memorySpace=*/Attribute()) .getResult(); auto foreachOp = rewriter.create( loc, src, tmpCoo, @@ -853,8 +858,9 @@ getDynamicSizes(dstTp, srcSizes, dynDstSizes); Value dst = rewriter .create(loc, dstTp.getRankedTensorType(), - dynDstSizes, Value(), - /*sizeHint=*/nnz, Attribute()) + dynDstSizes, + /*sizeHint=*/nnz, + /*memorySpace=*/Attribute()) .getResult(); SmallVector indices(dstLvlRank); auto foreachOp = rewriter.create( @@ -1044,11 +1050,11 @@ .getResult(0); RankedTensorType cooTp = getUnorderedCOOFromTypeWithOrdering(dstTp, dstTp.getDimToLvlMap()); - Value cooBuffer = - rewriter - .create(loc, cooTp, dynSizesArray, Value(), - /*sizeHint=*/nnz, Attribute()) - .getResult(); + Value cooBuffer = rewriter + .create(loc, cooTp, dynSizesArray, + /*sizeHint=*/nnz, + /*memorySpace=*/Attribute()) + .getResult(); Type eltTp = dstTp.getElementType(); Value value = genAllocaScalar(rewriter, loc, eltTp); diff --git a/mlir/python/mlir/dialects/_bufferization_ops_ext.py b/mlir/python/mlir/dialects/_bufferization_ops_ext.py --- a/mlir/python/mlir/dialects/_bufferization_ops_ext.py +++ b/mlir/python/mlir/dialects/_bufferization_ops_ext.py @@ -4,10 +4,11 @@ try: from typing import Sequence, Union + from ..dialects import bufferization from ..ir import * from ._ods_common import get_default_loc_context - from typing import Any, List, Union + from typing import Any, List, Optional, Union except ImportError as e: raise RuntimeError("Error loading imports from extension module") from e @@ -15,24 +16,41 @@ class AllocTensorOp: """Extends the bufferization.alloc_tensor op.""" - def __init__(self, - tensor_type: Type, - dynamic_sizes: Sequence[Value], - copy: Value, - size_hint: Value, - escape: BoolAttr, - *, - loc=None, - ip=None): + def __init__( + self, + tensor_type: Type, + dynamic_sizes: Sequence[Value], + copy: Optional[Value], + size_hint: Value, + escape: BoolAttr, + *, + loc=None, + ip=None + ): """Constructs an `alloc_tensor` with static and/or dynamic sizes.""" context = get_default_loc_context(loc) attributes = {} if escape: attributes["escape"] = escape op = self.build_generic( + regions=1, results=[tensor_type], - operands=[dynamic_sizes, copy, size_hint], + operands=[dynamic_sizes, size_hint], attributes=attributes, loc=loc, - ip=ip) + ip=ip, + ) OpView.__init__(self, op) + + # Create region with terminator. + self.regions[0].blocks.append(tensor_type) + with InsertionPoint(self.regions[0].blocks[0]): + if copy: + bufferization.YieldOp(copy) + else: + bufferization.YieldOp(self.regions[0].blocks[0].arguments[0]) + + @property + def block(self): + """Returns the then block of the operation.""" + return self.regions[0].blocks[0] diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir @@ -1,12 +1,12 @@ -// RUN: mlir-opt %s -one-shot-bufferize="allow-unknown-ops" -split-input-file | FileCheck %s +// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-unknown-ops" -split-input-file | FileCheck %s // Run fuzzer with different seeds. -// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null -// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null -// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null +// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null +// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null +// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null // Run with top-down analysis. -// RUN: mlir-opt %s -one-shot-bufferize="allow-unknown-ops analysis-heuristic=top-down" -split-input-file | FileCheck %s --check-prefix=CHECK-TOP-DOWN-ANALYSIS +// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-unknown-ops analysis-heuristic=top-down" -split-input-file | FileCheck %s --check-prefix=CHECK-TOP-DOWN-ANALYSIS // Test without analysis: Insert a copy on every buffer write. // RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-unknown-ops copy-before-write" -split-input-file | FileCheck %s --check-prefix=CHECK-COPY-BEFORE-WRITE @@ -199,3 +199,60 @@ %3 = tensor.extract %0[%pos3] : tensor<100xf32> return %2, %3 : f32, f32 } + +// ----- + +// CHECK-LABEL: @alloc_tensor_with_init( +func.func @alloc_tensor_with_init(%sz: index, %f: f32) { + // CHECK: %[[alloc:.*]] = memref.alloc{{.*}} : memref + // CHECK: linalg.fill {{.*}} outs(%[[alloc]] : memref) + %0 = bufferization.alloc_tensor(%sz) { memory_space = 4 } init { + ^bb0(%arg0: tensor): + %1 = linalg.fill ins(%f : f32) outs(%arg0 : tensor) -> tensor + bufferization.yield %1 : tensor + } : tensor + // CHECK: %[[t:.*]] = bufferization.to_tensor %[[alloc]] + // CHECK: "dummy.some_use"(%[[t]]) + "dummy.some_use"(%0) : (tensor) -> () + return +} + +// ----- + +// CHECK-LABEL: @alloc_tensor_with_init_conflict( +func.func @alloc_tensor_with_init_conflict( + %t: tensor<10xf32>, %sz: index, %f: f32) { + // CHECK: %[[alloc:.*]] = memref.alloc{{.*}} : memref<10xf32, 4> + + // A second alloc is needed because the linalg.fill bufferizes out-of-place. + // CHECK: %[[alloc2:.*]] = memref.alloc{{.*}} : memref<10xf32> + // CHECK: linalg.fill {{.*}} outs(%[[alloc2]] : memref<10xf32>) + + // CHECK: memref.copy %[[alloc2]], %[[alloc]] + // CHECK: memref.dealloc %[[alloc2]] + %0 = bufferization.alloc_tensor() { memory_space = 4 } init { + ^bb0(%arg0: tensor<10xf32>): + %1 = linalg.fill ins(%f : f32) outs(%t : tensor<10xf32>) -> tensor<10xf32> + bufferization.yield %1 : tensor<10xf32> + } : tensor<10xf32> + + // CHECK: %[[t:.*]] = bufferization.to_tensor %[[alloc]] + // CHECK: "dummy.some_use"(%[[t]]) + "dummy.some_use"(%0) : (tensor<10xf32>) -> () + return +} + +// ----- + +// CHECK-LABEL: func @alloc_tensor_0d( +// CHECK-SAME: %[[t:.*]]: tensor +func.func @alloc_tensor_0d(%t: tensor) -> f32 { + // CHECK: %[[m:.*]] = bufferization.to_memref %[[t]] + // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref + // CHECK: memref.copy %[[m]], %[[alloc]] + %0 = bufferization.alloc_tensor() copy(%t) : tensor + // CHECK: %[[r:.*]] = memref.load %[[alloc]][] + %1 = tensor.extract %0[] : tensor + // CHECK: return %[[r]] + return %1 : f32 +} diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir @@ -9,9 +9,10 @@ func.func @read_after_write_conflict(%t: tensor, %idx: index, %f: f32) -> (tensor, tensor) { - // CHECK: %[[copy:.*]] = bufferization.alloc_tensor() copy(%[[t]]) {bufferization.escape = [false]} : tensor - // CHECK-FUNC: bufferization.alloc_tensor() copy(%{{.*}}) {bufferization.escape = [true]} : tensor - // CHECK-NO-DEALLOC: bufferization.alloc_tensor() copy(%{{.*}}) {bufferization.escape = [true]} : tensor + // CHECK: %[[dim:.*]] = tensor.dim %[[t]] + // CHECK: %[[copy:.*]] = bufferization.alloc_tensor(%[[dim]]) copy(%[[t]]) {bufferization.escape = [false]} : tensor + // CHECK-FUNC: bufferization.alloc_tensor({{.*}}) copy(%{{.*}}) {bufferization.escape = [true]} : tensor + // CHECK-NO-DEALLOC: bufferization.alloc_tensor({{.*}}) copy(%{{.*}}) {bufferization.escape = [true]} : tensor // CHECK: %[[insert:.*]] = tensor.insert %{{.*}} into %[[copy]] %0 = tensor.insert %f into %t[%idx] : tensor // CHECK: return %[[insert]], %[[t]] diff --git a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --test-transform-dialect-interpreter %s -split-input-file -verify-diagnostics | FileCheck %s +// RUN: mlir-opt --test-transform-dialect-interpreter %s -cse -split-input-file -verify-diagnostics | FileCheck %s // Test One-Shot Bufferize. diff --git a/mlir/test/Dialect/Bufferization/invalid.mlir b/mlir/test/Dialect/Bufferization/invalid.mlir --- a/mlir/test/Dialect/Bufferization/invalid.mlir +++ b/mlir/test/Dialect/Bufferization/invalid.mlir @@ -19,8 +19,8 @@ // ----- func.func @alloc_tensor_copy_and_dims(%t: tensor, %sz: index) { - // expected-error @+1{{dynamic sizes not needed when copying a tensor}} - %0 = bufferization.alloc_tensor(%sz) copy(%t) : tensor + // expected-error @+1{{expected 1 dynamic sizes}} + %0 = bufferization.alloc_tensor() copy(%t) : tensor return } @@ -81,6 +81,28 @@ // ----- +func.func @alloc_tensor_type_mismatch(%sz: index) { + // expected-error @+1{{expected 'tensor' block argument}} + %0 = bufferization.alloc_tensor(%sz) init { + ^bb0(%arg0: tensor<5xf32>): + bufferization.yield %arg0 : tensor<5xf32> + } : tensor + return +} + +// ----- + +func.func @alloc_tensor_invalid_num_bbargs(%sz: index) { + // expected-error @+1{{expected 1 block argument, found 2}} + %0 = bufferization.alloc_tensor(%sz) init { + ^bb0(%arg0: tensor, %arg1: tensor): + bufferization.yield %arg0 : tensor + } : tensor + return +} + +// ----- + // expected-error @+1{{invalid value for 'bufferization.access'}} func.func private @invalid_buffer_access_type(tensor<*xf32> {bufferization.access = "foo"}) diff --git a/mlir/test/Dialect/Bufferization/ops.mlir b/mlir/test/Dialect/Bufferization/ops.mlir --- a/mlir/test/Dialect/Bufferization/ops.mlir +++ b/mlir/test/Dialect/Bufferization/ops.mlir @@ -27,28 +27,58 @@ return %tensor : tensor<2xf32> } -// CHECK-LABEL: func @test_alloc_tensor_op -func.func @test_alloc_tensor_op(%t: tensor, %sz: index) - -> tensor -{ +// CHECK-LABEL: func @test_alloc_tensor_op( +// CHECK-SAME: %[[t:.*]]: tensor, %[[sz:.*]]: index +func.func @test_alloc_tensor_op(%t: tensor, %sz: index, %f: f32) { // CHECK: bufferization.alloc_tensor(%{{.*}}) : tensor %0 = bufferization.alloc_tensor(%sz) : tensor - // CHECK: bufferization.alloc_tensor() copy(%{{.*}}) : tensor - %1 = bufferization.alloc_tensor() copy(%t) : tensor + + // CHECK: bufferization.alloc_tensor(%{{.*}}) copy(%{{.*}}) : tensor + %1 = bufferization.alloc_tensor(%sz) copy(%t) : tensor + // CHECK: bufferization.alloc_tensor() : tensor<5x6xf32> %2 = bufferization.alloc_tensor() : tensor<5x6xf32> + // CHECK: bufferization.alloc_tensor(%{{.*}}, %{{.*}}) : tensor %3 = bufferization.alloc_tensor(%sz, %sz) : tensor - // CHECK: bufferization.alloc_tensor() copy(%{{.*}}) {escape = true} : tensor - %4 = bufferization.alloc_tensor() copy(%t) {escape = true} : tensor - // CHECK: bufferization.alloc_tensor() copy(%{{.*}}) {escape = false} : tensor - %5 = bufferization.alloc_tensor() copy(%t) {escape = false} : tensor + + // CHECK: bufferization.alloc_tensor(%{{.*}}) copy(%{{.*}}) {escape = true} : tensor + %4 = bufferization.alloc_tensor(%sz) copy(%t) {escape = true} : tensor + + // CHECK: bufferization.alloc_tensor(%{{.*}}) copy(%{{.*}}) {escape = false} : tensor + %5 = bufferization.alloc_tensor(%sz) copy(%t) {escape = false} : tensor + + // CHECK: bufferization.alloc_tensor() size_hint( %c100 = arith.constant 100 : index - // CHECK: bufferization.alloc_tensor() size_hint= - %6 = bufferization.alloc_tensor() size_hint=%c100 : tensor<100x100xf64, #CSR> + %6 = bufferization.alloc_tensor() size_hint(%c100) : tensor<100x100xf64, #CSR> + // CHECK: bufferization.alloc_tensor(%{{.+}}) {memory_space = "foo"} : tensor %7 = bufferization.alloc_tensor(%sz) {memory_space = "foo"} : tensor - return %1 : tensor + + // CHECK: bufferization.alloc_tensor(%[[sz]]) : tensor + %8 = bufferization.alloc_tensor(%sz) init { + ^bb0(%arg0: tensor): + bufferization.yield %arg0 : tensor + } : tensor + + // CHECK: bufferization.alloc_tensor(%[[sz]]) copy(%[[t]]) : tensor + %9 = bufferization.alloc_tensor(%sz) init { + ^bb0(%arg0: tensor): + bufferization.yield %t : tensor + } : tensor + + // CHECK: bufferization.alloc_tensor(%[[sz]]) init { + // CHECK: ^{{.*}}(%[[bbarg:.*]]: tensor): + // CHECK: %[[filled:.*]] = linalg.fill {{.*}} outs(%[[bbarg]] : tensor) + // CHECK: bufferization.yield %[[filled]] + // CHECK: } + %10 = bufferization.alloc_tensor(%sz) init { + ^bb0(%arg0: tensor): + %filled = linalg.fill ins(%f : f32) outs(%arg0 : tensor) -> tensor + bufferization.yield %filled : tensor + } : tensor + + return } // CHECK-LABEL: func @test_dealloc_tensor_op diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir --- a/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir @@ -7,8 +7,8 @@ %lb : index, %ub : index, %step : index) -> (tensor, tensor) { - // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor() copy(%[[A]]) {bufferization.escape = [false]} : tensor - // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor() copy(%[[B]]) {bufferization.escape = [false]} : tensor + // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor(%{{.*}}) copy(%[[A]]) {bufferization.escape = [false]} : tensor + // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor(%{{.*}}) copy(%[[B]]) {bufferization.escape = [false]} : tensor // CHECK: %[[for:.*]]:2 = scf.for {{.*}} iter_args(%[[iter1:.*]] = %[[A_copy]], %[[iter2:.*]] = %[[B_copy]]) %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) -> (tensor, tensor) @@ -28,15 +28,15 @@ %lb : index, %ub : index, %step : index) -> (tensor, tensor) { - // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor() copy(%[[A]]) {bufferization.escape = [false]} : tensor - // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor() copy(%[[B]]) {bufferization.escape = [false]} : tensor + // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor(%{{.*}}) copy(%[[A]]) {bufferization.escape = [false]} : tensor + // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor(%{{.*}}) copy(%[[B]]) {bufferization.escape = [false]} : tensor // CHECK: %[[for:.*]]:2 = scf.for {{.*}} iter_args(%[[iter1:.*]] = %[[A_copy]], %[[iter2:.*]] = %[[B_copy]]) %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) -> (tensor, tensor) { // Yield tensors in different order. - // CHECK-DAG: %[[yield1:.*]] = bufferization.alloc_tensor() copy(%[[iter2]]) {bufferization.escape = [true]} : tensor - // CHECK-DAG: %[[yield2:.*]] = bufferization.alloc_tensor() copy(%[[iter1]]) {bufferization.escape = [true]} : tensor + // CHECK-DAG: %[[yield1:.*]] = bufferization.alloc_tensor(%{{.*}}) copy(%[[iter2]]) {bufferization.escape = [true]} : tensor + // CHECK-DAG: %[[yield2:.*]] = bufferization.alloc_tensor(%{{.*}}) copy(%[[iter1]]) {bufferization.escape = [true]} : tensor // CHECK: scf.yield %[[yield1]], %[[yield2]] scf.yield %tB, %tA : tensor, tensor } diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -371,7 +371,7 @@ // CHECK: %[[A2:.*]] = memref.alloc(%[[M2]]) : memref // CHECK: %[[A3:.*]] = memref.alloc(%[[HINT]]) : memref func.func @sparse_alloc_coo_with_size_hint(%arg0: index) -> tensor<10x20xf64, #Coo> { - %0 = bufferization.alloc_tensor() size_hint=%arg0 : tensor<10x20xf64, #Coo> + %0 = bufferization.alloc_tensor() size_hint(%arg0) : tensor<10x20xf64, #Coo> %1 = sparse_tensor.load %0 : tensor<10x20xf64, #Coo> return %1 : tensor<10x20xf64, #Coo> } diff --git a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir --- a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir @@ -21,7 +21,7 @@ // CHECK: %[[D0:.*]] = memref.load %[[DS]]{{\[}}%[[C0]]] // CHECK: %[[D1:.*]] = memref.load %[[DS]]{{\[}}%[[C1]]] // CHECK: %[[N:.*]] = call @getSparseTensorReaderNNZ(%[[R]]) -// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) size_hint=%[[N]] +// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) size_hint(%[[N]]) // CHECK: %[[VB:.*]] = memref.alloca() // CHECK: %[[T2:.*]] = scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] iter_args(%[[A2:.*]] = %[[T]]) // CHECK: func.call @getSparseTensorReaderNextF32(%[[R]], %[[DS]], %[[VB]]) @@ -52,7 +52,7 @@ // CHECK: %[[D0:.*]] = memref.load %[[DS]]{{\[}}%[[C0]]] // CHECK: %[[D1:.*]] = memref.load %[[DS]]{{\[}}%[[C1]]] // CHECK: %[[N:.*]] = call @getSparseTensorReaderNNZ(%[[R]]) -// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) size_hint=%[[N]] +// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) size_hint(%[[N]]) // CHECK: %[[VB:.*]] = memref.alloca() // CHECK: %[[T2:.*]] = scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] iter_args(%[[A2:.*]] = %[[T]]) // CHECK: func.call @getSparseTensorReaderNextF32(%[[R]], %[[DS]], %[[VB]]) diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir @@ -267,9 +267,11 @@ // CHECK-SAME: %[[t:.*]]: memref> func.func @pad_memory_space(%t: tensor, %h1: index, %f: f32, %pos: index) -> f32 { + %c0 = arith.constant 0 : index + %d = tensor.dim %t, %c0 : tensor // CHECK: %[[alloc_tensor:.*]] = memref.alloc{{.*}} : memref // CHECK: memref.copy %[[t]], %[[alloc_tensor]] - %0 = bufferization.alloc_tensor() copy(%t) + %0 = bufferization.alloc_tensor(%d) copy(%t) {memory_space = 3 : i64} : tensor // CHECK: %[[padded_alloc:.*]] = memref.alloc() {{.*}} : memref<15xf32, 3> // CHECK: linalg.map