diff --git a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h --- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h @@ -204,10 +204,10 @@ /// Set the inPlace bufferization spec to true. /// Merge result's and operand's aliasing sets and iterate to a fixed point. - void bufferizeInPlace(OpResult result, OpOperand &operand); + void bufferizeInPlace(OpOperand &operand, BufferizationState &state); /// Set the inPlace bufferization spec to false. - void bufferizeOutOfPlace(OpResult result); + void bufferizeOutOfPlace(OpOperand &operand); /// Return true if `v1` and `v2` bufferize to equivalent buffers. bool areEquivalentBufferizedValues(Value v1, Value v2) const { @@ -234,10 +234,10 @@ void applyOnAliases(Value v, function_ref fun) const; /// Mark a value as in-place bufferized. - void markInPlace(OpResult v) { inplaceBufferized.insert(v); } + void markInPlace(OpOperand &o) { inplaceBufferized.insert(&o); } /// Return `true` if a value was marked as in-place bufferized. - bool isInPlace(OpResult opResult) const; + bool isInPlace(OpOperand &opOperand) const; private: /// llvm::EquivalenceClasses wants comparable elements. This comparator uses @@ -255,7 +255,7 @@ EquivalenceClassRangeType getAliases(Value v) const; /// Set of all OpResults that were decided to bufferize in-place. - llvm::DenseSet inplaceBufferized; + llvm::DenseSet inplaceBufferized; /// Auxiliary structure to store all the values a given value may alias with. /// Alias information is "may be" conservative: In the presence of branches, a @@ -382,7 +382,7 @@ Value lookupBuffer(RewriterBase &rewriter, Value tensor) const; /// Return `true` if the given OpResult has been decided to bufferize inplace. - bool isInPlace(OpResult opResult) const; + bool isInPlace(OpOperand &opOperand) const; /// Return the result buffer (memref) for a given OpResult (tensor). Allocate /// a new buffer and copy over data from the existing buffer if out-of-place diff --git a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td --- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td +++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td @@ -104,18 +104,14 @@ >, InterfaceMethod< /*desc=*/[{ - Return `true` if the given OpResult must bufferize in-place with its - corresponding aliasing OpOperand. Alias sets and inplace attributes - will be set up accordingly before making any other bufferization - decisions. This method will never be called on OpResults that do not - have a tensor type. - - Note: This method may not return `true` if the given OpResult does not - have an aliasing OpOperand. + Return `true` if the given OpOperand must bufferize in-place. Alias + sets and inplace attributes will be set up accordingly before making + any other bufferization decisions. This method will never be called on + OpOperands that do not have a tensor type. }], /*retType=*/"bool", /*methodName=*/"mustBufferizeInPlace", - /*args=*/(ins "OpResult":$opResult, + /*args=*/(ins "OpOperand &":$opOperand, "const BufferizationState &":$state), /*methodBody=*/"", /*defaultImplementation=*/[{ diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp @@ -129,27 +129,22 @@ } /// Return `true` if a value was marked as in-place bufferized. -bool BufferizationAliasInfo::isInPlace(OpResult opResult) const { - return inplaceBufferized.contains(opResult); +bool BufferizationAliasInfo::isInPlace(OpOperand &operand) const { + return inplaceBufferized.contains(&operand); } /// Set the inPlace bufferization spec to true. -void BufferizationAliasInfo::bufferizeInPlace(OpResult result, - OpOperand &operand) { - LLVM_DEBUG(llvm::dbgs() << "bufferizeInPlace: "); - LLVM_DEBUG(result.print(llvm::dbgs())); - - markInPlace(result); - aliasInfo.unionSets(result, operand.get()); +void BufferizationAliasInfo::bufferizeInPlace(OpOperand &operand, + BufferizationState &state) { + markInPlace(operand); + if (OpResult result = state.getAliasingOpResult(operand)) + aliasInfo.unionSets(result, operand.get()); } /// Set the inPlace bufferization spec to false. -void BufferizationAliasInfo::bufferizeOutOfPlace(OpResult result) { - LLVM_DEBUG(llvm::dbgs() << "bufferizeOutOfPlace: "); - LLVM_DEBUG(result.print(llvm::dbgs())); - - if (inplaceBufferized.contains(result)) - inplaceBufferized.erase(result); +void BufferizationAliasInfo::bufferizeOutOfPlace(OpOperand &operand) { + assert(!inplaceBufferized.contains(&operand) && + "OpOperand was already decided to bufferize inplace"); } /// Apply `fun` to all the members of the equivalence class of `v`. @@ -339,16 +334,13 @@ op->walk([&](BufferizableOpInterface bufferizableOp) { if (!options.isOpAllowed(bufferizableOp)) return WalkResult::skip(); - for (OpResult opResult : bufferizableOp->getOpResults()) { - if (opResult.getType().isa()) - if (bufferizableOp.mustBufferizeInPlace(opResult, *this)) { - SmallVector operands = - bufferizableOp.getAliasingOpOperand(opResult, *this); - assert(!operands.empty() && - "expected that OpResult has aliasing OpOperand"); - for (OpOperand *operand : operands) - aliasInfo.unionAliasSets(operand->get(), opResult); - aliasInfo.markInPlace(opResult); + for (OpOperand &opOperand : bufferizableOp->getOpOperands()) { + if (opOperand.get().getType().isa()) + if (bufferizableOp.mustBufferizeInPlace(opOperand, *this)) { + if (OpResult opResult = + bufferizableOp.getAliasingOpResult(opOperand, *this)) + aliasInfo.unionAliasSets(opOperand.get(), opResult); + aliasInfo.markInPlace(opOperand); } } return WalkResult::advance(); @@ -380,7 +372,7 @@ return FailureOr(op->emitError("result buffer is ambiguous")); // If bufferizing out-of-place, allocate a new buffer. - if (!aliasInfo.isInPlace(result)) { + if (!aliasInfo.isInPlace(*opOperand)) { // Ops with multiple aliasing operands can currently not bufferize // out-of-place. assert( @@ -624,8 +616,8 @@ } bool mlir::linalg::comprehensive_bufferize::BufferizationState::isInPlace( - OpResult opResult) const { - return aliasInfo.isInPlace(opResult); + OpOperand &opOperand) const { + return aliasInfo.isInPlace(opOperand); } MemRefType mlir::linalg::comprehensive_bufferize::getContiguousMemRefType( diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp @@ -48,6 +48,19 @@ return true; } + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const BufferizationState &state) const { + // It is unknown whether the resulting MemRef will be written or not. + return true; + } + + bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand, + const BufferizationState &state) const { + // ToMemrefOps always bufferize inplace. + // TODO: Remove ToMemrefOps from the analysis. + return true; + } + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand, const BufferizationState &state) const { return OpResult(); diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp @@ -74,24 +74,25 @@ //===----------------------------------------------------------------------===// /// Attribute marker to specify op results that can be bufferized inPlace. -constexpr StringLiteral kInPlaceResultsAttrName = "__inplace_results_attr__"; +constexpr StringLiteral kInPlaceResultsAttrName = "__inplace_operands_attr__"; -/// Mark whether OpResult can actually be bufferized inplace. -/// If `inPlace` is `true`, the use-def chain analysis has guaranteed that no -/// subsequent write would occur to the bufferized tensor value (i.e. the result -/// can be bufferized inplace). -static void setInPlaceOpResult(OpResult opResult, bool inPlace) { - if (!opResult) - return; - - Operation *op = opResult.getOwner(); +/// Mark whether OpOperand will be bufferized inplace. +static void setInPlaceOpOperand(OpOperand &opOperand, bool inPlace) { + Operation *op = opOperand.getOwner(); auto attr = op->getAttr(kInPlaceResultsAttrName).dyn_cast_or_null(); - SmallVector inPlaceVector = - attr ? SmallVector( - llvm::to_vector<4>(attr.getAsValueRange())) - : SmallVector(op->getNumResults(), "false"); - inPlaceVector[opResult.getResultNumber()] = inPlace ? "true" : "false"; + SmallVector inPlaceVector; + if (attr) { + inPlaceVector = SmallVector( + llvm::to_vector<4>(attr.getAsValueRange())); + } else { + inPlaceVector = SmallVector(op->getNumOperands(), "none"); + for (OpOperand &opOperand : op->getOpOperands()) + if (opOperand.get().getType().isa()) + inPlaceVector[opOperand.getOperandNumber()] = "false"; + } + + inPlaceVector[opOperand.getOperandNumber()] = inPlace ? "true" : "false"; op->setAttr(kInPlaceResultsAttrName, OpBuilder(op).getStrArrayAttr(inPlaceVector)); } @@ -104,21 +105,11 @@ static bool isInplaceMemoryWrite(OpOperand &opOperand, const BufferizationAliasInfo &aliasInfo, BufferizationState &state) { - // The analysis does not know what happens to the result of a ToMemrefOp, so - // we assume that it is written to. - // TODO: This is a conservative implementation. This rule will have to be - // relaxed for partial bufferization. - if (isa(opOperand.getOwner())) - return true; - // OpOperands without an aliasing OpResult do not write. - OpResult opResult = state.getAliasingOpResult(opOperand); - if (!opResult) - return false; // OpOperands that do not bufferize to a memory write do not write in-place. if (!state.bufferizesToMemoryWrite(opOperand)) return false; // Check current bufferization decisions. - return aliasInfo.isInPlace(opResult); + return aliasInfo.isInPlace(opOperand); } /// Return true if, under current bufferization decisions, the buffer of `value` @@ -128,8 +119,8 @@ BufferizationState &state) { bool foundNonWritableBuffer = false; aliasInfo.applyOnAliases(value, [&](Value v) { - // Query BufferizableOpInterface to see if the OpResult is writable. - // TODO: Out-of-place bufferized OpResult could be considered writable. + // Query BufferizableOpInterface to see if the value is writable. + // TODO: Out-of-place bufferized value could be considered writable. if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(v)) if (bufferizableOp && bufferizableOp.isWritable(v, state)) return; @@ -309,8 +300,8 @@ return false; } -/// Return true if bufferizing result inplace would create a conflict. A read R -/// and a write W of the same alias set is a conflict if inplace bufferization +/// Return true if bufferizing `operand` inplace would create a conflict. A read +/// R and a write W of the same alias set is a conflict if inplace bufferization /// of W changes the value read by R to a value different from the one that /// would be expected by tracing back R's origin through SSA use-def chains. /// A conflict can only be introduced by a new alias and/or an inplace @@ -338,21 +329,10 @@ /// Note: If `checkConsistencyOnly`, this function may be called with a null /// OpResult. In that case, only the consistency of bufferization decisions /// involving aliases of the given OpOperand are checked. -bool wouldCreateReadAfterWriteInterference( - OpOperand &operand, OpResult result, const DominanceInfo &domInfo, - BufferizationState &state, const BufferizationAliasInfo &aliasInfo, +static bool wouldCreateReadAfterWriteInterference( + OpOperand &operand, const DominanceInfo &domInfo, BufferizationState &state, + const BufferizationAliasInfo &aliasInfo, bool checkConsistencyOnly = false) { -#ifndef NDEBUG - if (result) { - SmallVector opOperands = state.getAliasingOpOperand(result); - assert(llvm::find(opOperands, &operand) != opOperands.end() && - "operand and result do not match"); - } else { - assert(checkConsistencyOnly && - "result not provided, can only check consistency"); - } -#endif // NDEBUG - // Helper function to iterate on aliases of `root` and capture the reads. auto getAliasingReads = [&](DenseSet &res, Value root) { aliasInfo.applyOnAliases(root, [&](Value alias) { @@ -376,11 +356,11 @@ // Collect reads and writes of all aliases of OpOperand and OpResult. DenseSet usesRead, usesWrite; getAliasingReads(usesRead, operand.get()); - if (result) - getAliasingReads(usesRead, result); getAliasingInplaceWrites(usesWrite, operand.get()); - if (result) + if (OpResult result = state.getAliasingOpResult(operand)) { + getAliasingReads(usesRead, result); getAliasingInplaceWrites(usesWrite, result); + } if (!checkConsistencyOnly && state.bufferizesToMemoryWrite(operand)) usesWrite.insert(&operand); @@ -388,18 +368,12 @@ aliasInfo); } -/// Return true if bufferizing `opOperand` inplace with `opResult` would create -/// a write to a non-writable buffer. +/// Return true if bufferizing `opOperand` inplace would create a write to a +/// non-writable buffer. static bool -wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, OpResult opResult, +wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, const BufferizationAliasInfo &aliasInfo, BufferizationState &state) { -#ifndef NDEBUG - SmallVector opOperands = state.getAliasingOpOperand(opResult); - assert(llvm::find(opOperands, &opOperand) != opOperands.end() && - "operand and result do not match"); -#endif // NDEBUG - // Certain buffers are not writeable: // 1. A function bbArg that is not inplaceable or // 2. A constant op. @@ -409,9 +383,12 @@ return false; // This is a problem only if the buffer is written to via some alias. - bool hasWrite = aliasesInPlaceWrite(opResult, aliasInfo, state) || - aliasesInPlaceWrite(opOperand.get(), aliasInfo, state) || + bool hasWrite = aliasesInPlaceWrite(opOperand.get(), aliasInfo, state) || state.bufferizesToMemoryWrite(opOperand); + + if (OpResult opResult = state.getAliasingOpResult(opOperand)) + hasWrite |= aliasesInPlaceWrite(opResult, aliasInfo, state); + return hasWrite; } @@ -419,30 +396,23 @@ // Bufferization analyses. //===----------------------------------------------------------------------===// -/// Determine if `operand` can be bufferized in-place with `result`. +/// Determine if `operand` can be bufferized in-place. static LogicalResult bufferizableInPlaceAnalysisImpl( - OpOperand &operand, OpResult result, BufferizationAliasInfo &aliasInfo, + OpOperand &operand, BufferizationAliasInfo &aliasInfo, BufferizationState &state, const DominanceInfo &domInfo) { -#ifndef NDEBUG - SmallVector opOperands = state.getAliasingOpOperand(result); - assert(llvm::find(opOperands, &operand) != opOperands.end() && - "operand and result do not match"); -#endif // NDEBUG - bool foundInterference = - wouldCreateWriteToNonWritableBuffer(operand, result, aliasInfo, state) || - wouldCreateReadAfterWriteInterference(operand, result, domInfo, state, - aliasInfo); + wouldCreateWriteToNonWritableBuffer(operand, aliasInfo, state) || + wouldCreateReadAfterWriteInterference(operand, domInfo, state, aliasInfo); if (foundInterference) - aliasInfo.bufferizeOutOfPlace(result); + aliasInfo.bufferizeOutOfPlace(operand); else - aliasInfo.bufferizeInPlace(result, operand); + aliasInfo.bufferizeInPlace(operand, state); return success(); } -/// Analyze the `ops` to determine which OpResults are inplaceable. Walk ops in +/// Analyze the `ops` to determine which OpOperands are inplaceable. Walk ops in /// reverse and bufferize ops greedily. This is a good starter heuristic. /// /// Even if an op does not read or write, it may still create an alias when @@ -478,11 +448,9 @@ for (OpOperand &opOperand : op->getOpOperands()) if (opOperand.get().getType().isa()) if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op)) - if (OpResult opResult = - bufferizableOp.getAliasingOpResult(opOperand, state)) - if (failed(bufferizableInPlaceAnalysisImpl( - opOperand, opResult, aliasInfo, state, domInfo))) - return failure(); + if (failed(bufferizableInPlaceAnalysisImpl(opOperand, aliasInfo, + state, domInfo))) + return failure(); return success(); } @@ -520,15 +488,12 @@ if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op)) for (OpResult opResult : op->getOpResults()) if (opResult.getType().isa()) - if (aliasInfo.isInPlace(opResult)) { - SmallVector opOperands = - bufferizableOp.getAliasingOpOperand(opResult, state); - if (!opOperands.empty()) + for (OpOperand *opOperand : + bufferizableOp.getAliasingOpOperand(opResult, state)) + if (state.isInPlace(*opOperand)) if (bufferizableOp.bufferRelation(opResult, aliasInfo, state) == BufferRelation::Equivalent) - for (OpOperand *opOperand : opOperands) - aliasInfo.unionEquivalenceClasses(opResult, opOperand->get()); - } + aliasInfo.unionEquivalenceClasses(opResult, opOperand->get()); } /// Analyze equivalence of tied OpResult/OpOperand pairs of all ops contained @@ -559,15 +524,12 @@ if (auto bufferizableOp = options.dynCastBufferizableOp(op)) for (OpOperand &opOperand : op->getOpOperands()) if (opOperand.get().getType().isa()) { - OpResult opResult = - bufferizableOp.getAliasingOpResult(opOperand, state); if (wouldCreateReadAfterWriteInterference( - opOperand, opResult, domInfo, state, aliasInfo, + opOperand, domInfo, state, aliasInfo, /*checkConsistencyOnly=*/true)) { - // This error can happen for two reasons. Either the input IR - // already has a read-after-write conflict. Or certain - // "mustBufferizeInPlace" interface methods are implemented - // incorrectly. + // This error can happen if certain "mustBufferizeInPlace" interface + // methods are implemented incorrectly, such that the IR already has + // a RaW conflict before making any bufferization decisions. inconsistentOp = op; return WalkResult::interrupt(); } @@ -576,10 +538,6 @@ }); if (walkResult.wasInterrupted()) - // This can currently happen in one situation: When a tensor is passed into - // a ToMemrefOp and read by another op consecutively. ToMemrefOps are - // currently handled conservatively. Once a tensor is passed into a - // ToMemrefOp, it may longer be read. return inconsistentOp->emitError("input IR has RaW conflict"); return success(); } @@ -587,11 +545,13 @@ /// Annotate the IR with the result of the analysis. For testing/debugging only. static void annotateOpsWithBufferizationMarkers(Operation *op, - const BufferizationAliasInfo &aliasInfo) { + const BufferizationAliasInfo &aliasInfo, + BufferizationState &state) { op->walk([&](Operation *op) { - for (OpResult opResult : op->getResults()) - if (opResult.getType().isa()) - setInPlaceOpResult(opResult, aliasInfo.isInPlace(opResult)); + if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op)) + for (OpOperand &opOperand : op->getOpOperands()) + if (opOperand.get().getType().isa()) + setInPlaceOpOperand(opOperand, aliasInfo.isInPlace(opOperand)); }); } @@ -688,7 +648,7 @@ // Annotate operations if we only want to report the analysis. if (options.testAnalysisOnly) { - annotateOpsWithBufferizationMarkers(op, aliasInfo); + annotateOpsWithBufferizationMarkers(op, aliasInfo, state); return success(); } diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp @@ -397,6 +397,14 @@ return OpResult(); } + bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand, + const BufferizationState &state) const { + // Yield operands always bufferize inplace. Otherwise, an alloc + copy + // may be generated inside the block. We should not return/yield allocations + // when possible. + return true; + } + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, const BufferizationState &state) const { auto yieldOp = cast(op); @@ -447,22 +455,26 @@ WalkResult status = op->walk([&](Operation *op) { for (OpOperand &operand : op->getOpOperands()) { + // Skip operands that do not bufferize inplace. + if (!aliasInfo.isInPlace(operand)) + continue; // Is this a matching OpOperand? if (!anchorMatchFunc(operand)) continue; - SetVector maybeInitTensor = state.findValueInReverseUseDefChain(operand.get(), [&](Value val) { // Continue traversal until this function returns true. OpResult opResult = val.dyn_cast(); if (!opResult) return true; - if (!aliasInfo.isInPlace(opResult)) - return true; - // Only equivalent tensors are supported at the moment. - // TODO: Support cases such as extract_slice(init_tensor). SmallVector opOperands = state.getAliasingOpOperand(opResult); + if (!llvm::all_of(opOperands, [&](OpOperand *operand) { + return aliasInfo.isInPlace(*operand); + })) + return true; + // Only equivalent tensors are supported at the moment. + // TODO: Support cases such as extract_slice(init_tensor) return !llvm::all_of(opOperands, [&](OpOperand *operand) { return aliasInfo.areEquivalentBufferizedValues(operand->get(), opResult); @@ -542,7 +554,7 @@ if (!insertSliceOp) return false; // Only inplace bufferized InsertSliceOps are eligible. - if (!aliasInfo.isInPlace(insertSliceOp->getOpResult(0))) + if (!aliasInfo.isInPlace(insertSliceOp->getOpOperand(1) /*dest*/)) return false; return &operand == &insertSliceOp->getOpOperand(0) /*source*/; }, diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp @@ -590,6 +590,11 @@ return true; } + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const BufferizationState &state) const { + return false; + } + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand, const BufferizationState &state) const { // CallOpInterface is special, it needs to wait for the callee to be diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp @@ -42,14 +42,6 @@ return {&yieldOp->getOpOperand(resultNum)}; } - bool mustBufferizeInPlace(Operation *op, OpResult opResult, - const BufferizationState &state) const { - // ExecuteRegionOp results always bufferize in-place. Since they have no - // OpOperands, they are mostly ignored by the analysis once alias sets are - // set up. - return true; - } - // TODO: For better bufferization results, this could return `true` only if // there is a memory write in the region. bool isMemoryWrite(Operation *op, OpResult opResult, @@ -129,13 +121,6 @@ return true; } - bool mustBufferizeInPlace(Operation *op, OpResult opResult, - const BufferizationState &state) const { - // IfOp results always bufferize in-place. Since they have no OpOperands, - // they are mostly ignored by the analysis once alias sets are set up. - return true; - } - LogicalResult bufferize(Operation *op, RewriterBase &rewriter, const BufferizationState &state) const { auto ifOp = cast(op); @@ -430,9 +415,21 @@ OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand, const BufferizationState &state) const { + if (isa(op->getParentOp())) + return op->getParentOp()->getResult(opOperand.getOperandNumber()); + if (isa(op->getParentOp())) + return op->getParentOp()->getResult(opOperand.getOperandNumber()); return OpResult(); } + bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand, + const BufferizationState &state) const { + // Yield operands always bufferize inplace. Otherwise, an alloc + copy + // may be generated inside the block. We should not return/yield allocations + // when possible. + return true; + } + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, const BufferizationState &state) const { auto yieldOp = cast(op); diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp @@ -149,7 +149,7 @@ extractSliceOp.result().getType().cast(); // If not inplaceable, alloc. - bool inplace = state.isInPlace(extractSliceOp->getResult(0)); + bool inplace = state.isInPlace(extractSliceOp->getOpOperand(0)); Value alloc; if (!inplace) { FailureOr allocOrFailure = diff --git a/mlir/test/Dialect/Linalg/comprehensive-bufferize-analysis-2fill-extract-matmul-all-perms.mlir b/mlir/test/Dialect/Linalg/comprehensive-bufferize-analysis-2fill-extract-matmul-all-perms.mlir --- a/mlir/test/Dialect/Linalg/comprehensive-bufferize-analysis-2fill-extract-matmul-all-perms.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-bufferize-analysis-2fill-extract-matmul-all-perms.mlir @@ -17,14 +17,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -43,14 +44,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -68,14 +70,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -93,14 +96,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -118,15 +122,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -144,15 +148,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -171,14 +175,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -197,14 +202,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -223,15 +229,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -250,14 +256,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -276,14 +283,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -302,14 +310,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["none", "false"]} %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -328,14 +337,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -354,14 +364,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -380,14 +391,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -406,14 +418,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -432,14 +445,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -458,14 +472,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -484,15 +499,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -511,14 +526,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -537,15 +553,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -564,15 +580,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -591,14 +607,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -617,14 +634,15 @@ %cst_0 = arith.constant 1.000000e+00 : f32 %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} - // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> + // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32> + // CHECK: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32> - + // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir --- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir @@ -21,11 +21,11 @@ // aliasing subviews at all call sites or whether they allocate. // This is true irrespective of whether the function argument is inplaceable. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %r1 = tensor.extract_slice %B[0][8][1] : tensor to tensor<8xf32> return %r0, %r1: tensor<4xf32>, tensor<8xf32> @@ -41,16 +41,16 @@ { // must bufferize out of place. // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false"]} %r0 = tensor.insert_slice %C into %A[0][4][1] : tensor<4xf32> into tensor // bufferizes inplace. // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} %r1 = tensor.insert_slice %C into %B[0][4][1] : tensor<4xf32> into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [-1, 1]} + // CHECK-SAME: __equivalent_func_args__ = [-1, 1] return %r0, %r1: tensor, tensor } @@ -63,27 +63,27 @@ { // matmul output operand interferes with input operand. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %C = linalg.matmul ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>) outs(%B: tensor<4x4xf32>) -> tensor<4x4xf32> // matmul output operand interferes with input operand. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %D = linalg.matmul ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>) outs(%B: tensor<4x4xf32>) -> tensor<4x4xf32> // matmul output operand does not interferes with input operand. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) outs(%B: tensor<4x4xf32>) -> tensor<4x4xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [-1, -1, 1]} + // CHECK-SAME: __equivalent_func_args__ = [-1, -1, 1] return %C, %D, %E: tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32> } @@ -103,16 +103,16 @@ // bufferize out of place. Let callers decide whether they want to create // aliasing subviews at all call sites or whether they allocate. // This is true irrespective of whether the function argument is inplaceable. - // CHECK: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["true"]} %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> - // CHECK: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["true"]} %r1 = tensor.extract_slice %r0[0][2][1] : tensor<4xf32> to tensor<2xf32> - // CHECK: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["true"]} %r2 = tensor.extract_slice %B[0][4][1] : tensor to tensor<4xf32> - // CHECK: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["true"]} %r3 = tensor.extract_slice %r2[0][2][1] : tensor<4xf32> to tensor<2xf32> return %r1, %r3: tensor<2xf32>, tensor<2xf32> @@ -130,20 +130,20 @@ %B3 : tensor<2xf32> {linalg.inplaceable = false}) -> (tensor, tensor) { - // CHECK: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["true", "true"]} %r0 = tensor.insert_slice %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32> - // CHECK: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["true", "true"]} %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor - // CHECK: {__inplace_results_attr__ = ["false"]} + // CHECK: {__inplace_operands_attr__ = ["true", "false"]} %r2 = tensor.insert_slice %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} + // CHECK: {__inplace_operands_attr__ = ["true", "false"]} %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, -1]} + // CHECK-SAME: __equivalent_func_args__ = [0, -1] return %r1, %r3: tensor, tensor } @@ -160,12 +160,12 @@ // %r0 is an overlapping tensor.extract_slice that does not match, it must be // out of place. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["false"]} %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> // %r1 can bufferize inplace fine. // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]} %r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor // %r3 does bufferizes inplace because %B is not inplaceable. @@ -173,16 +173,16 @@ // not alias with the buffer coming from %r3 so it can actually bufferize // inplace. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %r2 = tensor.extract_slice %B[0][4][1] : tensor to tensor<4xf32> // %r3 cannot bufferize inplace since %B is not inplaceable. // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"]} %r3 = tensor.insert_slice %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, -1]} + // CHECK-SAME: __equivalent_func_args__ = [0, -1] return %r1, %r3: tensor, tensor } @@ -198,17 +198,17 @@ // %r0 is a tensor.extract_slice that matches, it can also be bufferized // inplace. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor // %r2 is a tensor.extract_slice that matches %r3, it can be bufferized // inplace. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %r2 = tensor.extract_slice %B[0][4][1] : tensor to tensor<4xf32> // tensor.insert_slice cannot bufferize inplace. @@ -216,11 +216,11 @@ // be unproductive to have special logic in bufferization to encode matching // insert_slice(extract_slice(A), A). // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false"]} %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, -1]} + // CHECK-SAME: __equivalent_func_args__ = [0, -1] return %r1, %r3: tensor, tensor } @@ -237,21 +237,21 @@ %cst2 = arith.constant 1.0 : f32 // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} %0 = tensor.extract_slice %A[%idx][%idx][1] : tensor to tensor // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %0) : f32, tensor -> tensor // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} %2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor into tensor %3 = vector.transfer_read %1[%idx2], %cst2 : tensor, vector<5xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, -1]} + // CHECK-SAME: __equivalent_func_args__ = [0, -1] return %2, %3 : tensor, vector<5xf32> } @@ -269,33 +269,33 @@ %cst2 = arith.constant 1.0 : f32 // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]} %0 = tensor.extract_slice %A[%idx][%idx][1] : tensor to tensor // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %0) : f32, tensor -> tensor // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} %2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor into tensor // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} %4 = tensor.extract_slice %2[%idx3][%idx3][1] : tensor to tensor // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} %5 = linalg.fill(%cst, %4) : f32, tensor -> tensor %3 = vector.transfer_read %1[%idx2], %cst2 : tensor, vector<5xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} %6 = tensor.insert_slice %5 into %2[%idx3][%idx3][1] : tensor into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, -1]} + // CHECK-SAME: __equivalent_func_args__ = [0, -1] return %6, %3 : tensor, vector<5xf32> } @@ -311,25 +311,25 @@ // tensor.extract_slice is only used as a read, no interference irrespective // of user's inplace status. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %sA = tensor.extract_slice %A[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> // matmul output operand is not inplaceable at the function boundary. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %D = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) outs(%B: tensor<4x4xf32>) -> tensor<4x4xf32> // matmul output operand is inplaceable at the function boundary. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) outs(%C: tensor<4x4xf32>) -> tensor<4x4xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [-1, 2]} + // CHECK-SAME: __equivalent_func_args__ = [-1, 2] return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> } @@ -345,12 +345,12 @@ // Step 4. %sB forward propagates to a write in %D but it is not inplace. // So this is only ever read and can bufferize inplace. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> // Step 3. %sB has a read interference in %E, it does not bufferize inplace. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %D = linalg.matmul ins(%B, %C: tensor, tensor) outs(%sB: tensor<4x4xf32>) -> tensor<4x4xf32> @@ -359,13 +359,13 @@ // %sC backward propagates to %C which is inplaceable. // As a consequence this is bufferized inplace. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> // Step 1. %sC backprops to the tensor.extract_slice producer which is not // considered an interference. This bufferizes inplace. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>) outs(%sC: tensor<4x4xf32>) -> tensor<4x4xf32> @@ -387,23 +387,23 @@ -> tensor<30x20xf32> { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none", "none", "none"]} %15 = tensor.extract_slice %C[%s3, %s4] [%s1, %s2] [1, 1] : tensor<30x20xf32> to tensor // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) outs(%15 : tensor) -> tensor // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} %19 = tensor.extract_slice %18[0, 0] [%s1, %s2] [1, 1] : tensor to tensor // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none", "none", "none"]} %20 = tensor.insert_slice %19 into %C[%s3, %s4] [%s1, %s2] [1, 1] : tensor into tensor<30x20xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [6]} + // CHECK-SAME: __equivalent_func_args__ = [6] return %20 : tensor<30x20xf32> } @@ -424,13 +424,13 @@ // %sB backward propagates to %B which is not inplaceable. // As a consequence this is bufferized out of place. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["false"]} %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> // Step 3. %sB backprops to the tensor.extract_slice producer which is not // considered an interference. This bufferizes inplace. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %D = linalg.matmul ins(%B, %C: tensor, tensor) outs(%sB: tensor<4x4xf32>) -> tensor<4x4xf32> @@ -439,13 +439,13 @@ // %sC backward propagates to %C which is inplaceable. // As a consequence this is bufferized inplace. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> // Step 1. %sC backprops to the tensor.extract_slice producer which is not // considered an interference. This bufferizes inplace. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) outs(%sC: tensor<4x4xf32>) -> tensor<4x4xf32> @@ -475,15 +475,15 @@ // - %sA would then bufferize to an inplace write (i.e. %FA) but %A is not // inplaceable and so %sA is not inplaceable. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]} // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK-NEXT: fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"]} %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> %FA = linalg.fill(%f0, %ssA) : f32, tensor<4x4xf32> -> tensor<4x4xf32> @@ -493,19 +493,19 @@ // 3-level matching tensor.extract_slice / tensor.insert_slice into // inplaceable %B. // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK-NEXT: fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]} // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor to tensor<4x?xf32> %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32> @@ -518,18 +518,18 @@ // inplaceable %C with a twist. // Throw a wrench in the system: %rsC production sizes do not match %ssC. // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} // The tensor.insert_slice that would be candidate for matching does not actually // match. That tensor.insert_slice can still be bufferized inplace nonetheless // but this tensor.extract_slice, which bufferizes to an inplace write, cannot. // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"]} // CHECK-NEXT: fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]} // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssC = tensor.extract_slice %sC[0, 0][%sz1, 4][1, 1] : tensor to tensor %FC = linalg.fill(%f0, %ssC) : f32, tensor -> tensor @@ -537,7 +537,7 @@ %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [-1, 1, 2]} + // CHECK-SAME: __equivalent_func_args__ = [-1, 1, 2] return %rA, %rB, %rC: tensor, tensor, tensor } @@ -558,20 +558,22 @@ { // CHECK: scf.for // CHECK-NEXT: scf.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]} %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { scf.yield %t : tensor } // CHECK: scf.for // CHECK-NEXT: scf.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor) { scf.yield %t : tensor } // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [-1, 1]} + // CHECK-SAME: __equivalent_func_args__ = [-1, 1] return %r0, %r1: tensor, tensor } @@ -593,11 +595,11 @@ // which makes bbArg inplaceable. // 2. Or it is already inplaceable and so is bbArg. // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: scf.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["false", "true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} + // CHECK-NEXT: scf.yield {__inplace_operands_attr__ = ["true", "true"]} + // CHECK-NEXT: } {__inplace_operands_attr__ = ["none", "none", "none", "false", "true"]} %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) -> (tensor, tensor) { @@ -607,7 +609,7 @@ } // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [-1, 1]} + // CHECK-SAME: __equivalent_func_args__ = [-1, 1] return %r0#0, %r0#1: tensor, tensor } @@ -628,8 +630,10 @@ // of %r1 is read. // CHECK: scf.for // CHECK-NEXT: call + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK-NEXT: scf.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]} %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { call @some_use(%t) : (tensor) -> () scf.yield %t : tensor @@ -638,8 +642,10 @@ // %r1 bufferizes inplace fine. // CHECK: scf.for // CHECK-NEXT: call + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK-NEXT: scf.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { call @some_use(%t) : (tensor) -> () scf.yield %t : tensor @@ -649,8 +655,10 @@ // of %r3 is read. // CHECK: linalg.tiled_loop // CHECK-NEXT: call + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK-NEXT: linalg.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]} %r2 = linalg.tiled_loop (%i) = (%lb) to (%ub) step (%step) ins() outs(%t = %B: tensor) { @@ -661,8 +669,10 @@ // %r3 bufferizes inplace fine. // CHECK: linalg.tiled_loop // CHECK-NEXT: call + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK-NEXT: linalg.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} %r3 = linalg.tiled_loop (%i) = (%lb) to (%ub) step (%step) ins() outs(%t = %B: tensor) { @@ -671,7 +681,7 @@ } // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, 1]} + // CHECK-SAME: __equivalent_func_args__ = [0, 1] return %r1, %r3: tensor, tensor } @@ -692,12 +702,12 @@ // value. The calls to `foo` are determined to read conservatively, so %A // cannot bufferize inplace. // CHECK: fill - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} %A = linalg.fill(%f1, %I) : f32, tensor<64xf32> -> tensor<64xf32> // 1. Bufferizes inplace: no alias to %A is yet possible. // CHECK: fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} %B = linalg.fill(%f2, %I) : f32, tensor<64xf32> -> tensor<64xf32> call @foo(%A) : (tensor<64xf32>) -> () @@ -728,17 +738,19 @@ // The calls to `foo` are determined to read conservatively, so %A cannot // bufferize inplace. // CHECK: fill - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} %A = linalg.fill(%f1, %I) : f32, tensor<64xf32> -> tensor<64xf32> // 4. Bufferizes inplace: no alias to %A is yet possible. // CHECK: fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} %B = linalg.fill(%f2, %I) : f32, tensor<64xf32> -> tensor<64xf32> // 3. Does not read or write, bufferizes inplace. - // CHECK: scf.for - // CHECK: {__inplace_results_attr__ = ["true", "true"]} + // CHECK: scf.for + // CHECK-NEXT: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true", "true"]} %r:2 = scf.for %i = %c0 to %c10 step %c1 iter_args(%0 = %A, %1 = %B) -> (tensor<64xf32>, tensor<64xf32>) { @@ -751,12 +763,12 @@ // value. The calls to `foo` are determined to read conservatively, so %A2 // cannot bufferize inplace. // CHECK: fill - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} %A2 = linalg.fill(%f1, %I2) : f32, tensor<64xf32> -> tensor<64xf32> // 1. Bufferizes inplace: no alias to %A2 is yet possible. // CHECK: fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} %B2 = linalg.fill(%f2, %I2) : f32, tensor<64xf32> -> tensor<64xf32> call @bar(%A2) : (tensor<64xf32>) -> () @@ -776,10 +788,10 @@ %s3 : index) -> tensor { %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]} %b = tensor.extract_slice %A[%s1][%s2][1] : tensor<4xi32> to tensor // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]} %r = vector.transfer_write %v, %b[%s3] : vector<5xi32>, tensor return %r : tensor } @@ -799,18 +811,18 @@ %7 = linalg.init_tensor [256, 256] : tensor<256x256xf32> // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} %8 = linalg.fill(%cst_0, %7) : f32, tensor<256x256xf32> -> tensor<256x256xf32> %11 = linalg.fill(%cst_1, %7) : f32, tensor<256x256xf32> -> tensor<256x256xf32> // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %sA = tensor.extract_slice %8[0, 0][256, 16][1, 1]: tensor<256x256xf32> to tensor<256x16xf32> %sB = tensor.extract_slice %11[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32> %r = linalg.matmul @@ -818,7 +830,7 @@ outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [2]} + // CHECK-SAME: __equivalent_func_args__ = [2] return %r : tensor<256x256xf32> } @@ -837,27 +849,27 @@ %7 = linalg.init_tensor [256, 256] : tensor<256x256xf32> // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] %8 = linalg.fill(%cst_0, %7) : f32, tensor<256x256xf32> -> tensor<256x256xf32> %9 = vector.transfer_read %arg0[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32> %10 = vector.transfer_write %9, %8[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32> // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] %11 = linalg.fill(%cst_1, %7) : f32, tensor<256x256xf32> -> tensor<256x256xf32> %12 = vector.transfer_read %arg1[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32> %13 = vector.transfer_write %12, %11[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32> // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %sA = tensor.extract_slice %10[0, 0][256, 16][1, 1]: tensor<256x256xf32> to tensor<256x16xf32> %sB = tensor.extract_slice %13[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32> %r = linalg.matmul @@ -865,7 +877,7 @@ outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [2]} + // CHECK-SAME: __equivalent_func_args__ = [2] return %r : tensor<256x256xf32> } @@ -888,31 +900,31 @@ %cst = arith.constant 0.000000e+00 : f32 // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] %0 = linalg.fill(%cst, %arg2) : f32, tensor<62x90xf32> -> tensor<62x90xf32> // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true"] %2 = tensor.extract_slice %0[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %8 = tensor.insert_slice %7 into %0[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true"] %10 = tensor.extract_slice %8[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32> // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %15 = tensor.insert_slice %14 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [4]} + // CHECK-SAME: __equivalent_func_args__ = [4] return %15 : tensor<62x90xf32> } @@ -940,7 +952,7 @@ } // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %r : tensor<10x20xf32> } @@ -962,7 +974,7 @@ %t2: tensor {linalg.inplaceable = true}) -> (tensor, tensor){ // CHECK: linalg.generic - // CHECK-SAME: {__inplace_results_attr__ = ["true", "true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"] %o:2 = linalg.generic #trait ins(%t1 : tensor) outs (%t2, %t2 : tensor, tensor) { ^bb(%0: f32, %1: f32, %2 : f32) : @@ -970,7 +982,7 @@ } -> (tensor, tensor) // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, 1]} + // CHECK-SAME: __equivalent_func_args__ = [0, 1] return %o#0, %o#1 : tensor, tensor } @@ -994,7 +1006,7 @@ -> (tensor, tensor, tensor){ // CHECK: linalg.generic - // CHECK-SAME: {__inplace_results_attr__ = ["true", "true", "false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true", "false"] %o:3 = linalg.generic #trait ins(%t1 : tensor) outs (%t2, %t2, %t2 : tensor, tensor, tensor) { @@ -1003,7 +1015,7 @@ } -> (tensor, tensor, tensor) // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, 1, -1]} + // CHECK-SAME: __equivalent_func_args__ = [0, 1, -1] return %o#0, %o#1, %o#2 : tensor, tensor, tensor } @@ -1022,31 +1034,31 @@ // Cannot bufferize inplace this extract_slice because both operand and result // are modified and returned separately. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none", "none", "none"] %e = tensor.extract_slice %arg2[%s1, %s2][%s3, %s4][1, 1] : tensor<62x90xf32> to tensor // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true"] %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %8 = tensor.insert_slice %7 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true"] %10 = tensor.extract_slice %e[32, 0] [30, 90] [1, 1] : tensor to tensor<30x90xf32> // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %15 = tensor.insert_slice %14 into %e[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [2, -1]} + // CHECK-SAME: __equivalent_func_args__ = [2, -1] return %8, %15 : tensor<62x90xf32>, tensor } @@ -1058,26 +1070,26 @@ -> (tensor<62x90xf32>) { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true"] %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> // TODO: This should bufferize inplace once we have a proper range analysis. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false"] %10 = tensor.extract_slice %arg2[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %15 = tensor.insert_slice %10 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %15 : tensor<62x90xf32> } @@ -1089,26 +1101,26 @@ -> (tensor<62x90xf32>) { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true"] %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> // The slices are overlapping, so this can never bufferize inplace. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false"] %10 = tensor.extract_slice %arg2[31, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %15 = tensor.insert_slice %10 into %8[31, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %15 : tensor<62x90xf32> } @@ -1120,19 +1132,19 @@ -> (tensor<62x90xf32>) { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false"] %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %15 = tensor.insert_slice %2 into %8[15, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %15 : tensor<62x90xf32> } @@ -1156,25 +1168,25 @@ // Write to %t1. // CHECK: vector.transfer_write - // CHECK-SAME: __inplace_results_attr__ = ["false"] + // CHECK-SAME: __inplace_operands_attr__ = ["none", "false", "none"] %t3 = vector.transfer_write %v, %t1[%s] : vector<5xf32>, tensor // Read the old value of %t1 inside the loop via an alias. - // CHECK: scf.for + // CHECK: scf.for {{.*}} { %r, %v3 = scf.for %i = %c0 to %s step %c1 iter_args(%t2 = %t1, %v0 = %v) -> (tensor, vector<5xf32>) { // CHECK: tensor.extract_slice - // CHECK-SAME: __inplace_results_attr__ = ["true"] + // CHECK-SAME: __inplace_operands_attr__ = ["true", "none", "none"] %e = tensor.extract_slice %t2[%s][%s][1] : tensor to tensor // Read from %t1 via alias %e. %v2 = vector.transfer_read %e[%s], %cst : tensor, vector<5xf32> scf.yield %t2, %v2 : tensor, vector<5xf32> } - // CHECK: __inplace_results_attr__ = ["true", "false"] + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true", "none"]} // Use %t3 in some way without reading it, so that it does not get DCE'd. // CHECK: linalg.generic - // CHECK-SAME: __inplace_results_attr__ = ["true"] + // CHECK-SAME: __inplace_operands_attr__ = ["true"] %o = linalg.generic #trait outs (%t3 : tensor) { ^bb(%0: f32) : linalg.yield %cst : f32 @@ -1203,7 +1215,7 @@ // Write to %t1. // CHECK: vector.transfer_write - // CHECK-SAME: __inplace_results_attr__ = ["true"] + // CHECK-SAME: __inplace_operands_attr__ = ["none", "true", "none"] %t3 = vector.transfer_write %v, %t1[%s] : vector<5xf32>, tensor // This loop does not read from %t1. It only writes to it. @@ -1211,7 +1223,7 @@ %r, %v3 = scf.for %i = %c0 to %s step %c1 iter_args(%t2 = %t1, %v0 = %v) -> (tensor, vector<5xf32>) { // Write to %t1 via %t2. (Overwrite %t3.) // CHECK: linalg.generic - // CHECK-SAME: __inplace_results_attr__ = ["true"] + // CHECK-SAME: __inplace_operands_attr__ = ["true"] %o2 = linalg.generic #trait outs (%t2 : tensor) { ^bb(%0: f32) : linalg.yield %cst : f32 @@ -1224,14 +1236,14 @@ // Use %t3 in some way without reading it, so that it does not get DCE'd. // CHECK: linalg.generic - // CHECK-SAME: __inplace_results_attr__ = ["true"] + // CHECK-SAME: __inplace_operands_attr__ = ["true"] %o = linalg.generic #trait outs (%t3 : tensor) { ^bb(%0: f32) : linalg.yield %cst : f32 } -> (tensor) // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, -1]} + // CHECK-SAME: __equivalent_func_args__ = [0, -1] return %o, %v3 : tensor, vector<5xf32> } @@ -1245,24 +1257,24 @@ func @buffer_forwarding_conflict(%arg0: tensor {linalg.inplaceable = true}, %arg1: index) -> (tensor, tensor) { %cst = arith.constant 0.000000e+00 : f32 // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"] // Instead of allocating, share buffer with some inplace bufferization? %0 = linalg.init_tensor [%arg1] : tensor // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] %1 = linalg.fill(%cst, %0) : f32, tensor -> tensor // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"] %2 = tensor.insert_slice %1 into %arg0[0] [%arg1] [1] : tensor into tensor // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"] %3 = tensor.insert_slice %1 into %arg0[42] [%arg1] [1] : tensor into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [-1, 0]} + // CHECK-SAME: __equivalent_func_args__ = [-1, 0] return %2, %3 : tensor, tensor } @@ -1272,20 +1284,20 @@ func @buffer_forwarding_no_conflict(%arg0: tensor {linalg.inplaceable = true}, %arg1: index) -> (tensor, tensor) { %cst = arith.constant 0.000000e+00 : f32 // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"] // Instead of allocating, share buffer with some inplace bufferization? %0 = linalg.init_tensor [%arg1] : tensor // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] %1 = linalg.fill(%cst, %0) : f32, tensor -> tensor // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"] %2 = tensor.insert_slice %1 into %arg0[42] [%arg1] [1] : tensor into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, 0]} + // CHECK-SAME: __equivalent_func_args__ = [0, 0] return %2, %2 : tensor, tensor } @@ -1301,27 +1313,35 @@ %t2: tensor {linalg.inplaceable = true}, %cond: i1) -> tensor { %r = scf.if %cond -> (tensor) { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t1 : tensor } else { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t2 : tensor } return %r : tensor } +// ----- + // CHECK-LABEL: func @scf_if_inplace2 func @scf_if_inplace2(%t1: tensor {linalg.inplaceable = true}, %v: vector<5xf32>, %idx: index, %cond: i1) -> tensor { %r = scf.if %cond -> (tensor) { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t1 : tensor } else { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor scf.yield %t2 : tensor } // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %r : tensor } @@ -1332,18 +1352,22 @@ %v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index, %cond: i1) -> tensor { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"] %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor %r = scf.if %cond -> (tensor) { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %t2 = vector.transfer_write %v1, %e[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t2 : tensor } else { // Writing the same tensor through an alias. This is OK. // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t3 : tensor } return %r : tensor @@ -1357,23 +1381,31 @@ %cond: i1, %cond2: i1) -> (tensor, vector<10xf32>) { %cst = arith.constant 0.0 : f32 %r = scf.if %cond -> (tensor) { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t1 : tensor } else { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t2 : tensor } %r_alias = scf.if %cond2 -> (tensor) { // Reading %r is OK. No conflict. + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %r : tensor } else { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %r : tensor } %v2 = vector.transfer_read %r_alias[%idx], %cst : tensor, vector<10xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, -1]} + // CHECK-SAME: __equivalent_func_args__ = [0, -1] return %r_alias, %v2 : tensor, vector<10xf32> } @@ -1384,24 +1416,28 @@ %idx: index, %cond: i1) -> tensor { %r = scf.if %cond -> (tensor) { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"] %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %e : tensor } else { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"] %f = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %f : tensor } // Inserting into an equivalent tensor at the same offset. This bufferizes // inplace. // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"] %r2 = tensor.insert_slice %r into %t1[%idx][%idx][1] : tensor into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %r2 : tensor } @@ -1416,25 +1452,33 @@ %r = scf.if %cond -> (tensor) { %t2 = scf.if %cond2 -> (tensor) { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %t3 = vector.transfer_write %v1, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t3 : tensor } else { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %t4 = vector.transfer_write %v3, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t4 : tensor } + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t2 : tensor } else { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t3 : tensor } // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %r : tensor } @@ -1447,18 +1491,22 @@ %cst = arith.constant 0.0 : f32 %r, %v_r2 = scf.if %cond -> (tensor, vector<5xf32>) { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %t2 = vector.transfer_write %v1, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} scf.yield %t2, %v1 : tensor, vector<5xf32> } else { // Writing the same tensor through an alias. // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor // Read the original value of %t1. This requires the write in this branch // to be out-of-place. But the write in the other branch can still be // inplace. %v_r = vector.transfer_read %t1[%idx2], %cst : tensor, vector<5xf32> + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} scf.yield %t3, %v_r : tensor, vector<5xf32> } return %r, %v_r2 : tensor, vector<5xf32> @@ -1472,17 +1520,21 @@ %cond: i1) -> tensor { %r = scf.if %cond -> (tensor) { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"] %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %e : tensor } else { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t1 : tensor } // Reading from and writing to the same tensor via different args. This is a // conflict. // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"] %r2 = tensor.insert_slice %r into %t1[%idx2][%idx2][1] : tensor into tensor return %r2 : tensor } @@ -1495,13 +1547,17 @@ %cond: i1) -> tensor { %r = scf.if %cond -> (tensor) { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"] %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %e : tensor } else { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"] %f = tensor.extract_slice %t1[%idx2][%idx2][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %f : tensor } @@ -1511,11 +1567,11 @@ // why the tensor.insert_slice is inplace and the two extract_slices are // out-of-place. // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"] %r2 = tensor.insert_slice %r into %t1[%idx3][%idx3][1] : tensor into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %r2 : tensor } @@ -1526,23 +1582,27 @@ %idx: index, %idx2: index, %cond: i1) -> tensor { %r = scf.if %cond -> (tensor) { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"] %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %e : tensor } else { // TODO: This one could bufferize inplace, but the analysis is too restrictive. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"] %f = tensor.extract_slice %t1[%idx2][%idx2][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %f : tensor } // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"] %r2 = tensor.insert_slice %r into %t1[%idx2][%idx2][1] : tensor into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %r2 : tensor } @@ -1557,8 +1617,10 @@ scf.yield %t1 : tensor } else { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t2 : tensor } @@ -1579,14 +1641,20 @@ scf.yield %t1 : tensor } else { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t2 : tensor } %t1_alias = scf.if %cond2 -> (tensor) { // scf.yield bufferizes to a read. That is a conflict in this example. + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t1 : tensor } else { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t1 : tensor } %v2 = vector.transfer_read %t1_alias[%idx], %cst : tensor, vector<10xf32> @@ -1600,7 +1668,7 @@ %v : vector<5xf32>) -> (tensor) { %idx = arith.constant 0 : index // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %0 = vector.transfer_write %v, %A[%idx] : vector<5xf32>, tensor return %0 : tensor } @@ -1609,9 +1677,8 @@ // CHECK-LABEL: func @main_func func @main_func(%A : tensor {linalg.inplaceable = true}, %v : vector<5xf32>) -> (tensor) { - // Function calls always bufferize out-of-place at the moment. // CHECK: call - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"] %0 = call @some_use(%A, %v) : (tensor, vector<5xf32>) -> (tensor) return %0 : tensor } @@ -1626,7 +1693,7 @@ // Write to the tensor. Cannot be inplace due to tensor_load. // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] %w = vector.transfer_write %v, %0[%idx1] : vector<5xf32>, tensor // Read from the tensor and return result. @@ -1644,7 +1711,7 @@ -> (vector<5xf32>, vector<5xf32>) { // Write + read to/from tensor. // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] %1 = vector.transfer_write %v1, %t1[%idx2] : vector<5xf32>, tensor %cst = arith.constant 0.0 : f32 %r1 = vector.transfer_read %1[%idx3], %cst : tensor, vector<5xf32> @@ -1662,7 +1729,7 @@ // CHECK-LABEL: func @inner_func func @inner_func(%t: tensor) -> tensor { // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %t : tensor } @@ -1684,7 +1751,7 @@ %c0 = arith.constant 0 : index %0 = tensor.insert %f into %t[%c0] : tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %0 : tensor }