diff --git a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h --- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h @@ -194,10 +194,10 @@ /// Set the inPlace bufferization spec to true. /// Merge result's and operand's aliasing sets and iterate to a fixed point. - void bufferizeInPlace(OpResult result, OpOperand &operand); + void bufferizeInPlace(OpOperand &operand, BufferizationState &state); /// Set the inPlace bufferization spec to false. - void bufferizeOutOfPlace(OpResult result); + void bufferizeOutOfPlace(OpOperand &operand); /// Return true if `v1` and `v2` bufferize to equivalent buffers. bool areEquivalentBufferizedValues(Value v1, Value v2) const { @@ -224,10 +224,10 @@ void applyOnAliases(Value v, function_ref fun) const; /// Mark a value as in-place bufferized. - void markInPlace(OpResult v) { inplaceBufferized.insert(v); } + void markInPlace(OpOperand &o) { inplaceBufferized.insert(&o); } /// Return `true` if a value was marked as in-place bufferized. - bool isInPlace(OpResult opResult) const; + bool isInPlace(OpOperand &opOperand) const; private: /// llvm::EquivalenceClasses wants comparable elements. This comparator uses @@ -245,7 +245,7 @@ EquivalenceClassRangeType getAliases(Value v) const; /// Set of all OpResults that were decided to bufferize in-place. - llvm::DenseSet inplaceBufferized; + llvm::DenseSet inplaceBufferized; /// Auxiliary structure to store all the values a given value may alias with. /// Alias information is "may be" conservative: In the presence of branches, a @@ -379,7 +379,7 @@ Value lookupBuffer(Value tensor); /// Return `true` if the given OpResult has been decided to bufferize inplace. - bool isInPlace(OpResult opResult) const; + bool isInPlace(OpOperand &opOperand) const; /// Return `true` if the given value is mapped. bool isMapped(Value value) const; diff --git a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td --- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td +++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td @@ -104,18 +104,14 @@ >, InterfaceMethod< /*desc=*/[{ - Return `true` if the given OpResult must bufferize in-place with its - corresponding aliasing OpOperand. Alias sets and inplace attributes - will be set up accordingly before making any other bufferization - decisions. This method will never be called on OpResults that do not - have a tensor type. - - Note: This method may not return `true` if the given OpResult does not - have an aliasing OpOperand. + Return `true` if the given OpOperand must bufferize in-place. Alias + sets and inplace attributes will be set up accordingly before making + any other bufferization decisions. This method will never be called on + OpOperands that do not have a tensor type. }], /*retType=*/"bool", /*methodName=*/"mustBufferizeInPlace", - /*args=*/(ins "OpResult":$opResult, + /*args=*/(ins "OpOperand &":$opOperand, "BufferizationState &":$state), /*methodBody=*/"", /*defaultImplementation=*/[{ diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp @@ -114,27 +114,22 @@ } /// Return `true` if a value was marked as in-place bufferized. -bool BufferizationAliasInfo::isInPlace(OpResult opResult) const { - return inplaceBufferized.contains(opResult); +bool BufferizationAliasInfo::isInPlace(OpOperand &operand) const { + return inplaceBufferized.contains(&operand); } /// Set the inPlace bufferization spec to true. -void BufferizationAliasInfo::bufferizeInPlace(OpResult result, - OpOperand &operand) { - LLVM_DEBUG(llvm::dbgs() << "bufferizeInPlace: "); - LLVM_DEBUG(result.print(llvm::dbgs())); - - markInPlace(result); - aliasInfo.unionSets(result, operand.get()); +void BufferizationAliasInfo::bufferizeInPlace(OpOperand &operand, + BufferizationState &state) { + markInPlace(operand); + if (OpResult result = state.getAliasingOpResult(operand)) + aliasInfo.unionSets(result, operand.get()); } /// Set the inPlace bufferization spec to false. -void BufferizationAliasInfo::bufferizeOutOfPlace(OpResult result) { - LLVM_DEBUG(llvm::dbgs() << "bufferizeOutOfPlace: "); - LLVM_DEBUG(result.print(llvm::dbgs())); - - if (inplaceBufferized.contains(result)) - inplaceBufferized.erase(result); +void BufferizationAliasInfo::bufferizeOutOfPlace(OpOperand &operand) { + assert(!inplaceBufferized.contains(&operand) && + "OpOperand was already decided to bufferize inplace"); } /// Apply `fun` to all the members of the equivalence class of `v`. @@ -340,16 +335,13 @@ op->walk([&](BufferizableOpInterface bufferizableOp) { if (!options.isOpAllowed(bufferizableOp)) return WalkResult::skip(); - for (OpResult opResult : bufferizableOp->getOpResults()) { - if (opResult.getType().isa()) - if (bufferizableOp.mustBufferizeInPlace(opResult, *this)) { - SmallVector operands = - bufferizableOp.getAliasingOpOperand(opResult, *this); - assert(!operands.empty() && - "expected that OpResult has aliasing OpOperand"); - for (OpOperand *operand : operands) - aliasInfo.unionAliasSets(operand->get(), opResult); - aliasInfo.markInPlace(opResult); + for (OpOperand &opOperand : bufferizableOp->getOpOperands()) { + if (opOperand.get().getType().isa()) + if (bufferizableOp.mustBufferizeInPlace(opOperand, *this)) { + if (OpResult opResult = + bufferizableOp.getAliasingOpResult(opOperand, *this)) + aliasInfo.unionAliasSets(opOperand.get(), opResult); + aliasInfo.markInPlace(opOperand); } } return WalkResult::advance(); @@ -382,7 +374,7 @@ } // If bufferizing out-of-place, allocate a new buffer. - if (!aliasInfo.isInPlace(result)) { + if (!aliasInfo.isInPlace(*opOperand)) { // Ops with multiple aliasing operands can currently not bufferize // out-of-place. assert( @@ -694,8 +686,8 @@ } bool mlir::linalg::comprehensive_bufferize::BufferizationState::isInPlace( - OpResult opResult) const { - return aliasInfo.isInPlace(opResult); + OpOperand &opOperand) const { + return aliasInfo.isInPlace(opOperand); } void mlir::linalg::comprehensive_bufferize::BufferizationState::markOpObsolete( diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp @@ -42,6 +42,19 @@ return true; } + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + BufferizationState &state) const { + // It is unknown whether the resulting MemRef will be written or not. + return true; + } + + bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand, + BufferizationState &state) const { + // ToMemrefOps always bufferize inplace. + // TODO: Remove ToMemrefOps from the analysis. + return true; + } + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand, BufferizationState &state) const { return OpResult(); diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp @@ -147,26 +147,27 @@ //===----------------------------------------------------------------------===// /// Attribute marker to specify op results that can be bufferized inPlace. -constexpr StringLiteral kInPlaceResultsAttrName = "__inplace_results_attr__"; - -/// Mark whether OpResult can actually be bufferized inplace. -/// If `inPlace` is `true`, the use-def chain analysis has guaranteed that no -/// subsequent write would occur to the bufferized tensor value (i.e. the result -/// can be bufferized inplace). -static void setInPlaceOpResult(OpResult opResult, bool inPlace) { - if (!opResult) - return; +constexpr StringLiteral kInPlaceResultsAttrName = "__inplace_operands_attr__"; - Operation *op = opResult.getOwner(); +/// Mark whether OpOperand will be bufferized inplace. +static void setInPlaceOpOperand(OpOperand &opOperand, bool inPlace) { + Operation *op = opOperand.getOwner(); auto attr = op->getAttr(kInPlaceResultsAttrName).dyn_cast_or_null(); - SmallVector inPlaceVector = - attr ? SmallVector( - llvm::to_vector<4>(attr.getAsValueRange())) - : SmallVector(op->getNumResults(), "false"); - LDBG("->set inPlace=" << inPlace << " <- #" << opResult.getResultNumber() + SmallVector inPlaceVector; + if (attr) { + inPlaceVector = SmallVector( + llvm::to_vector<4>(attr.getAsValueRange())); + } else { + inPlaceVector = SmallVector(op->getNumOperands(), "none"); + for (OpOperand &opOperand : op->getOpOperands()) + if (opOperand.get().getType().isa()) + inPlaceVector[opOperand.getOperandNumber()] = "false"; + } + + LDBG("->set inPlace=" << inPlace << " <- #" << opOperand.getOperandNumber() << ": " << printOperationInfo(op) << "\n"); - inPlaceVector[opResult.getResultNumber()] = inPlace ? "true" : "false"; + inPlaceVector[opOperand.getOperandNumber()] = inPlace ? "true" : "false"; op->setAttr(kInPlaceResultsAttrName, OpBuilder(op).getStrArrayAttr(inPlaceVector)); } @@ -230,21 +231,11 @@ static bool isInplaceMemoryWrite(OpOperand &opOperand, const BufferizationAliasInfo &aliasInfo, BufferizationState &state) { - // The analysis does not know what happens to the result of a ToMemrefOp, so - // we assume that it is written to. - // TODO: This is a conservative implementation. This rule will have to be - // relaxed for partial bufferization. - if (isa(opOperand.getOwner())) - return true; - // OpOperands without an aliasing OpResult do not write. - OpResult opResult = state.getAliasingOpResult(opOperand); - if (!opResult) - return false; // OpOperands that do not bufferize to a memory write do not write in-place. if (!state.bufferizesToMemoryWrite(opOperand)) return false; // Check current bufferization decisions. - return aliasInfo.isInPlace(opResult); + return aliasInfo.isInPlace(opOperand); } /// Return true if, under current bufferization decisions, the buffer of `value` @@ -255,8 +246,8 @@ LDBG("WRITABILITY ANALYSIS FOR " << printValueInfo(value) << "\n"); bool foundNonWritableBuffer = false; aliasInfo.applyOnAliases(value, [&](Value v) { - // Query BufferizableOpInterface to see if the OpResult is writable. - // TODO: Out-of-place bufferized OpResult could be considered writable. + // Query BufferizableOpInterface to see if the value is writable. + // TODO: Out-of-place bufferized value could be considered writable. if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(v)) if (bufferizableOp && bufferizableOp.isWritable(v, state)) return; @@ -424,8 +415,8 @@ return false; } -/// Return true if bufferizing result inplace would create a conflict. A read R -/// and a write W of the same alias set is a conflict if inplace bufferization +/// Return true if bufferizing `operand` inplace would create a conflict. A read +/// R and a write W of the same alias set is a conflict if inplace bufferization /// of W changes the value read by R to a value different from the one that /// would be expected by tracing back R's origin through SSA use-def chains. /// A conflict can only be introduced by a new alias and/or an inplace @@ -453,21 +444,10 @@ /// Note: If `checkConsistencyOnly`, this function may be called with a null /// OpResult. In that case, only the consistency of bufferization decisions /// involving aliases of the given OpOperand are checked. -bool wouldCreateReadAfterWriteInterference( - OpOperand &operand, OpResult result, const DominanceInfo &domInfo, - BufferizationState &state, const BufferizationAliasInfo &aliasInfo, +static bool wouldCreateReadAfterWriteInterference( + OpOperand &operand, const DominanceInfo &domInfo, BufferizationState &state, + const BufferizationAliasInfo &aliasInfo, bool checkConsistencyOnly = false) { -#ifndef NDEBUG - if (result) { - SmallVector opOperands = state.getAliasingOpOperand(result); - assert(llvm::find(opOperands, &operand) != opOperands.end() && - "operand and result do not match"); - } else { - assert(checkConsistencyOnly && - "result not provided, can only check consistency"); - } -#endif // NDEBUG - // Helper function to iterate on aliases of `root` and capture the reads. auto getAliasingReads = [&](DenseSet &res, Value root) { aliasInfo.applyOnAliases(root, [&](Value alias) { @@ -491,11 +471,11 @@ // Collect reads and writes of all aliases of OpOperand and OpResult. DenseSet usesRead, usesWrite; getAliasingReads(usesRead, operand.get()); - if (result) - getAliasingReads(usesRead, result); getAliasingInplaceWrites(usesWrite, operand.get()); - if (result) + if (OpResult result = state.getAliasingOpResult(operand)) { + getAliasingReads(usesRead, result); getAliasingInplaceWrites(usesWrite, result); + } if (!checkConsistencyOnly && state.bufferizesToMemoryWrite(operand)) usesWrite.insert(&operand); @@ -503,32 +483,25 @@ aliasInfo); } -/// Return true if bufferizing `opOperand` inplace with `opResult` would create -/// a write to a non-writable buffer. +/// Return true if bufferizing `opOperand` inplace would create a write to a +/// non-writable buffer. static bool -wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, OpResult opResult, +wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, const BufferizationAliasInfo &aliasInfo, BufferizationState &state) { -#ifndef NDEBUG - SmallVector opOperands = state.getAliasingOpOperand(opResult); - assert(llvm::find(opOperands, &opOperand) != opOperands.end() && - "operand and result do not match"); -#endif // NDEBUG - // Certain buffers are not writeable: // 1. A function bbArg that is not inplaceable or // 2. A constant op. - assert(!aliasesNonWritableBuffer(opResult, aliasInfo, state) && - "expected that opResult does not alias non-writable buffer"); bool nonWritable = aliasesNonWritableBuffer(opOperand.get(), aliasInfo, state); if (!nonWritable) return false; // This is a problem only if the buffer is written to via some alias. - bool hasWrite = aliasesInPlaceWrite(opResult, aliasInfo, state) || - aliasesInPlaceWrite(opOperand.get(), aliasInfo, state) || + bool hasWrite = aliasesInPlaceWrite(opOperand.get(), aliasInfo, state) || state.bufferizesToMemoryWrite(opOperand); + if (OpResult opResult = state.getAliasingOpResult(opOperand)) + hasWrite |= aliasesInPlaceWrite(opResult, aliasInfo, state); if (!hasWrite) return false; @@ -540,39 +513,23 @@ // Bufferization analyses. //===----------------------------------------------------------------------===// -/// Determine if `operand` can be bufferized in-place with `result`. +/// Determine if `operand` can be bufferized in-place. static LogicalResult bufferizableInPlaceAnalysisImpl( - OpOperand &operand, OpResult result, BufferizationAliasInfo &aliasInfo, + OpOperand &operand, BufferizationAliasInfo &aliasInfo, BufferizationState &state, const DominanceInfo &domInfo) { -#ifndef NDEBUG - SmallVector opOperands = state.getAliasingOpOperand(result); - assert(llvm::find(opOperands, &operand) != opOperands.end() && - "operand and result do not match"); -#endif // NDEBUG - - int64_t resultNumber = result.getResultNumber(); - (void)resultNumber; - LDBG('\n'); - LDBG("Inplace analysis for <- #" << resultNumber << " -> #" - << operand.getOperandNumber() << " in " - << printValueInfo(result) << '\n'); - bool foundInterference = - wouldCreateWriteToNonWritableBuffer(operand, result, aliasInfo, state) || - wouldCreateReadAfterWriteInterference(operand, result, domInfo, state, - aliasInfo); + wouldCreateWriteToNonWritableBuffer(operand, aliasInfo, state) || + wouldCreateReadAfterWriteInterference(operand, domInfo, state, aliasInfo); if (foundInterference) - aliasInfo.bufferizeOutOfPlace(result); + aliasInfo.bufferizeOutOfPlace(operand); else - aliasInfo.bufferizeInPlace(result, operand); - - LDBG("Done inplace analysis for result #" << resultNumber << '\n'); + aliasInfo.bufferizeInPlace(operand, state); return success(); } -/// Analyze the `ops` to determine which OpResults are inplaceable. Walk ops in +/// Analyze the `ops` to determine which OpOperands are inplaceable. Walk ops in /// reverse and bufferize ops greedily. This is a good starter heuristic. /// /// Even if an op does not read or write, it may still create an alias when @@ -608,11 +565,9 @@ for (OpOperand &opOperand : op->getOpOperands()) if (opOperand.get().getType().isa()) if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op)) - if (OpResult opResult = - bufferizableOp.getAliasingOpResult(opOperand, state)) - if (failed(bufferizableInPlaceAnalysisImpl( - opOperand, opResult, aliasInfo, state, domInfo))) - return failure(); + if (failed(bufferizableInPlaceAnalysisImpl(opOperand, aliasInfo, + state, domInfo))) + return failure(); return success(); } @@ -644,15 +599,12 @@ if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op)) for (OpResult opResult : op->getOpResults()) if (opResult.getType().isa()) - if (aliasInfo.isInPlace(opResult)) { - SmallVector opOperands = - bufferizableOp.getAliasingOpOperand(opResult, state); - if (!opOperands.empty()) + for (OpOperand *opOperand : + bufferizableOp.getAliasingOpOperand(opResult, state)) + if (state.isInPlace(*opOperand)) if (bufferizableOp.bufferRelation(opResult, aliasInfo, state) == BufferRelation::Equivalent) - for (OpOperand *opOperand : opOperands) - aliasInfo.unionEquivalenceClasses(opResult, opOperand->get()); - } + aliasInfo.unionEquivalenceClasses(opResult, opOperand->get()); } /// Analyze equivalence of tied OpResult/OpOperand pairs of all ops contained @@ -683,15 +635,12 @@ if (auto bufferizableOp = options.dynCastBufferizableOp(op)) for (OpOperand &opOperand : op->getOpOperands()) if (opOperand.get().getType().isa()) { - OpResult opResult = - bufferizableOp.getAliasingOpResult(opOperand, state); if (wouldCreateReadAfterWriteInterference( - opOperand, opResult, domInfo, state, aliasInfo, + opOperand, domInfo, state, aliasInfo, /*checkConsistencyOnly=*/true)) { - // This error can happen for two reasons. Either the input IR - // already has a read-after-write conflict. Or certain - // "mustBufferizeInPlace" interface methods are implemented - // incorrectly. + // This error can happen if certain "mustBufferizeInPlace" interface + // methods are implemented incorrectly, such that the IR already has + // a RaW conflict before making any bufferization decisions. inconsistentOp = op; return WalkResult::interrupt(); } @@ -700,10 +649,6 @@ }); if (walkResult.wasInterrupted()) - // This can currently happen in one situation: When a tensor is passed into - // a ToMemrefOp and read by another op consecutively. ToMemrefOps are - // currently handled conservatively. Once a tensor is passed into a - // ToMemrefOp, it may longer be read. return inconsistentOp->emitError("input IR has RaW conflict"); return success(); } @@ -711,11 +656,13 @@ /// Annotate the IR with the result of the analysis. For testing/debugging only. static void annotateOpsWithBufferizationMarkers(Operation *op, - const BufferizationAliasInfo &aliasInfo) { + const BufferizationAliasInfo &aliasInfo, + BufferizationState &state) { op->walk([&](Operation *op) { - for (OpResult opResult : op->getResults()) - if (opResult.getType().isa()) - setInPlaceOpResult(opResult, aliasInfo.isInPlace(opResult)); + if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op)) + for (OpOperand &opOperand : op->getOpOperands()) + if (opOperand.get().getType().isa()) + setInPlaceOpOperand(opOperand, aliasInfo.isInPlace(opOperand)); }); } @@ -762,7 +709,7 @@ // Annotate operations if we only want to report the analysis. if (options.testAnalysisOnly) { - annotateOpsWithBufferizationMarkers(op, aliasInfo); + annotateOpsWithBufferizationMarkers(op, aliasInfo, state); return success(); } diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp @@ -383,6 +383,14 @@ return OpResult(); } + bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand, + BufferizationState &state) const { + // Yield operands always bufferize inplace. Otherwise, an alloc + copy + // may be generated inside the block. We should not return/yield allocations + // when possible. + return true; + } + LogicalResult bufferize(Operation *op, OpBuilder &b, BufferizationState &state) const { auto yieldOp = cast(op); @@ -444,15 +452,16 @@ OpResult opResult = val.dyn_cast(); if (!opResult) return true; - if (!aliasInfo.isInPlace(opResult)) - return true; + // if (!aliasInfo.isInPlace(opResult)) + // return true; // Only equivalent tensors are supported at the moment. // TODO: Support cases such as extract_slice(init_tensor). SmallVector opOperands = state.getAliasingOpOperand(opResult); if (!llvm::all_of(opOperands, [&](OpOperand *operand) { return aliasInfo.areEquivalentBufferizedValues(operand->get(), - opResult); + opResult) && + aliasInfo.isInPlace(*operand); })) return true; return false; @@ -530,7 +539,7 @@ if (!insertSliceOp) return false; // Only inplace bufferized InsertSliceOps are eligible. - if (!aliasInfo.isInPlace(insertSliceOp->getOpResult(0))) + if (!aliasInfo.isInPlace(operand)) return false; return &operand == &insertSliceOp->getOpOperand(0) /*source*/; }, diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp @@ -510,6 +510,11 @@ return true; } + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + BufferizationState &state) const { + return false; + } + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand, BufferizationState &state) const { // CallOpInterface is special, it needs to wait for the callee to be diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp @@ -38,14 +38,6 @@ return {&yieldOp->getOpOperand(resultNum)}; } - bool mustBufferizeInPlace(Operation *op, OpResult opResult, - BufferizationState &state) const { - // ExecuteRegionOp results always bufferize in-place. Since they have no - // OpOperands, they are mostly ignored by the analysis once alias sets are - // set up. - return true; - } - // TODO: For better bufferization results, this could return `true` only if // there is a memory write in the region. bool isMemoryWrite(Operation *op, OpResult opResult, @@ -125,13 +117,6 @@ return true; } - bool mustBufferizeInPlace(Operation *op, OpResult opResult, - BufferizationState &state) const { - // IfOp results always bufferize in-place. Since they have no OpOperands, - // they are mostly ignored by the analysis once alias sets are set up. - return true; - } - LogicalResult bufferize(Operation *op, OpBuilder &b, BufferizationState &state) const { auto ifOp = cast(op); @@ -325,9 +310,21 @@ OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand, BufferizationState &state) const { + if (isa(op->getParentOp())) + return op->getParentOp()->getResult(opOperand.getOperandNumber()); + if (isa(op->getParentOp())) + return op->getParentOp()->getResult(opOperand.getOperandNumber()); return OpResult(); } + bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand, + BufferizationState &state) const { + // Yield operands always bufferize inplace. Otherwise, an alloc + copy + // may be generated inside the block. We should not return/yield allocations + // when possible. + return true; + } + LogicalResult bufferize(Operation *op, OpBuilder &b, BufferizationState &state) const { auto yieldOp = cast(op); diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp --- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp @@ -155,7 +155,7 @@ extractSliceOp.result().getType().cast(); // If not inplaceable, alloc. - bool inplace = state.isInPlace(extractSliceOp->getResult(0)); + bool inplace = state.isInPlace(extractSliceOp->getOpOperand(0)); Value alloc; if (!inplace) alloc = state.createAllocDeallocPair(b, loc, extractSliceOp.result()); @@ -285,7 +285,7 @@ if (extractSliceOp && areEquivalentExtractSliceOps(aliasInfo, extractSliceOp, insertSliceOp) && - aliasInfo.isInPlace(extractSliceOp->getResult(0))) { + aliasInfo.isInPlace(extractSliceOp->getOpOperand(0))) { foundOp = true; } }); @@ -469,7 +469,7 @@ // cloned and the clone needs to be updated. if (isSourceEquivalentToAMatchingInplaceExtractSliceOp(aliasInfo, insertSliceOp) && - state.isInPlace(insertSliceOp->getResult(0))) + state.isInPlace(insertSliceOp->getOpOperand(0))) tensorState.insertSliceOpsWithoutCopy.insert(insertSliceOp); }); return success(); diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir --- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir @@ -20,11 +20,11 @@ // aliasing subviews at all call sites or whether they allocate. // This is true irrespective of whether the function argument is inplaceable. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %r1 = tensor.extract_slice %B[0][8][1] : tensor to tensor<8xf32> return %r0, %r1: tensor<4xf32>, tensor<8xf32> @@ -41,16 +41,16 @@ { // must bufferize out of place. // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false"]} %r0 = tensor.insert_slice %C into %A[0][4][1] : tensor<4xf32> into tensor // bufferizes inplace. // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} %r1 = tensor.insert_slice %C into %B[0][4][1] : tensor<4xf32> into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [-1, 1]} + // CHECK-SAME: __equivalent_func_args__ = [-1, 1] return %r0, %r1: tensor, tensor } @@ -64,27 +64,27 @@ { // matmul output operand interferes with input operand. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %C = linalg.matmul ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>) outs(%B: tensor<4x4xf32>) -> tensor<4x4xf32> // matmul output operand interferes with input operand. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %D = linalg.matmul ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>) outs(%B: tensor<4x4xf32>) -> tensor<4x4xf32> // matmul output operand does not interferes with input operand. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) outs(%B: tensor<4x4xf32>) -> tensor<4x4xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [-1, -1, 1]} + // CHECK-SAME: __equivalent_func_args__ = [-1, -1, 1] return %C, %D, %E: tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32> } @@ -103,16 +103,16 @@ // bufferize out of place. Let callers decide whether they want to create // aliasing subviews at all call sites or whether they allocate. // This is true irrespective of whether the function argument is inplaceable. - // CHECK: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["true"]} %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> - // CHECK: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["true"]} %r1 = tensor.extract_slice %r0[0][2][1] : tensor<4xf32> to tensor<2xf32> - // CHECK: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["true"]} %r2 = tensor.extract_slice %B[0][4][1] : tensor to tensor<4xf32> - // CHECK: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["true"]} %r3 = tensor.extract_slice %r2[0][2][1] : tensor<4xf32> to tensor<2xf32> return %r1, %r3: tensor<2xf32>, tensor<2xf32> @@ -128,20 +128,20 @@ %B : tensor, %B2 : tensor<4xf32>, %B3 : tensor<2xf32>) -> (tensor, tensor) { - // CHECK: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["true", "true"]} %r0 = tensor.insert_slice %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32> - // CHECK: {__inplace_results_attr__ = ["true"]} + // CHECK: {__inplace_operands_attr__ = ["true", "true"]} %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor - // CHECK: {__inplace_results_attr__ = ["false"]} + // CHECK: {__inplace_operands_attr__ = ["true", "false"]} %r2 = tensor.insert_slice %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32> - // CHECK: {__inplace_results_attr__ = ["false"]} + // CHECK: {__inplace_operands_attr__ = ["true", "false"]} %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, -1]} + // CHECK-SAME: __equivalent_func_args__ = [0, -1] return %r1, %r3: tensor, tensor } @@ -157,12 +157,12 @@ // %r0 is an overlapping tensor.extract_slice that does not match, it must be // out of place. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["false"]} %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> // %r1 can bufferize inplace fine. // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]} %r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor // %r3 does bufferizes inplace because %B is not inplaceable. @@ -170,16 +170,16 @@ // not alias with the buffer coming from %r3 so it can actually bufferize // inplace. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %r2 = tensor.extract_slice %B[0][4][1] : tensor to tensor<4xf32> // %r3 cannot bufferize inplace since %B is not inplaceable. // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"]} %r3 = tensor.insert_slice %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, -1]} + // CHECK-SAME: __equivalent_func_args__ = [0, -1] return %r1, %r3: tensor, tensor } @@ -195,17 +195,17 @@ // %r0 is a tensor.extract_slice that matches, it can also be bufferized // inplace. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %r0 = tensor.extract_slice %A[0][4][1] : tensor to tensor<4xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor // %r2 is a tensor.extract_slice that matches %r3, it can be bufferized // inplace. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %r2 = tensor.extract_slice %B[0][4][1] : tensor to tensor<4xf32> // tensor.insert_slice cannot bufferize inplace. @@ -213,11 +213,11 @@ // be unproductive to have special logic in bufferization to encode matching // insert_slice(extract_slice(A), A). // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false"]} %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, -1]} + // CHECK-SAME: __equivalent_func_args__ = [0, -1] return %r1, %r3: tensor, tensor } @@ -232,21 +232,21 @@ %cst2 = arith.constant 1.0 : f32 // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} %0 = tensor.extract_slice %A[%idx][%idx][1] : tensor to tensor // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %0) : f32, tensor -> tensor // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} %2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor into tensor %3 = vector.transfer_read %1[%idx2], %cst2 : tensor, vector<5xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, -1]} + // CHECK-SAME: __equivalent_func_args__ = [0, -1] return %2, %3 : tensor, vector<5xf32> } @@ -262,33 +262,33 @@ %cst2 = arith.constant 1.0 : f32 // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]} %0 = tensor.extract_slice %A[%idx][%idx][1] : tensor to tensor // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} %1 = linalg.fill(%cst, %0) : f32, tensor -> tensor // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} %2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor into tensor // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} %4 = tensor.extract_slice %2[%idx3][%idx3][1] : tensor to tensor // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} %5 = linalg.fill(%cst, %4) : f32, tensor -> tensor %3 = vector.transfer_read %1[%idx2], %cst2 : tensor, vector<5xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} %6 = tensor.insert_slice %5 into %2[%idx3][%idx3][1] : tensor into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, -1]} + // CHECK-SAME: __equivalent_func_args__ = [0, -1] return %6, %3 : tensor, vector<5xf32> } @@ -304,25 +304,25 @@ // tensor.extract_slice is only used as a read, no interference irrespective // of user's inplace status. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %sA = tensor.extract_slice %A[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> // matmul output operand is not inplaceable at the function boundary. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %D = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) outs(%B: tensor<4x4xf32>) -> tensor<4x4xf32> // matmul output operand is inplaceable at the function boundary. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) outs(%C: tensor<4x4xf32>) -> tensor<4x4xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [-1, 2]} + // CHECK-SAME: __equivalent_func_args__ = [-1, 2] return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> } @@ -338,12 +338,12 @@ // Step 4. %sB forward propagates to a write in %D but it is not inplace. // So this is only ever read and can bufferize inplace. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> // Step 3. %sB has a read interference in %E, it does not bufferize inplace. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %D = linalg.matmul ins(%B, %C: tensor, tensor) outs(%sB: tensor<4x4xf32>) -> tensor<4x4xf32> @@ -352,13 +352,13 @@ // %sC backward propagates to %C which is inplaceable. // As a consequence this is bufferized inplace. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> // Step 1. %sC backprops to the tensor.extract_slice producer which is not // considered an interference. This bufferizes inplace. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>) outs(%sC: tensor<4x4xf32>) -> tensor<4x4xf32> @@ -374,23 +374,23 @@ %B: tensor<6x6xf32>, %C: tensor<30x20xf32> {linalg.inplaceable = true}) -> tensor<30x20xf32> { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none", "none", "none"]} %15 = tensor.extract_slice %C[%s3, %s4] [%s1, %s2] [1, 1] : tensor<30x20xf32> to tensor // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) outs(%15 : tensor) -> tensor // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} %19 = tensor.extract_slice %18[0, 0] [%s1, %s2] [1, 1] : tensor to tensor // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none", "none", "none"]} %20 = tensor.insert_slice %19 into %C[%s3, %s4] [%s1, %s2] [1, 1] : tensor into tensor<30x20xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [6]} + // CHECK-SAME: __equivalent_func_args__ = [6] return %20 : tensor<30x20xf32> } @@ -411,13 +411,13 @@ // %sB backward propagates to %B which is not inplaceable. // As a consequence this is bufferized out of place. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["false"]} %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> // Step 3. %sB backprops to the tensor.extract_slice producer which is not // considered an interference. This bufferizes inplace. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %D = linalg.matmul ins(%B, %C: tensor, tensor) outs(%sB: tensor<4x4xf32>) -> tensor<4x4xf32> @@ -426,13 +426,13 @@ // %sC backward propagates to %C which is inplaceable. // As a consequence this is bufferized inplace. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> // Step 1. %sC backprops to the tensor.extract_slice producer which is not // considered an interference. This bufferizes inplace. // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) outs(%sC: tensor<4x4xf32>) -> tensor<4x4xf32> @@ -462,15 +462,15 @@ // - %sA would then bufferize to an inplace write (i.e. %FA) but %A is not // inplaceable and so %sA is not inplaceable. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]} // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK-NEXT: fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"]} %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> %FA = linalg.fill(%f0, %ssA) : f32, tensor<4x4xf32> -> tensor<4x4xf32> @@ -480,19 +480,19 @@ // 3-level matching tensor.extract_slice / tensor.insert_slice into // inplaceable %B. // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK-NEXT: fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]} // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor to tensor<4x?xf32> %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32> @@ -505,18 +505,18 @@ // inplaceable %C with a twist. // Throw a wrench in the system: %rsC production sizes do not match %ssC. // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} // The tensor.insert_slice that would be candidate for matching does not actually // match. That tensor.insert_slice can still be bufferized inplace nonetheless // but this tensor.extract_slice, which bufferizes to an inplace write, cannot. // CHECK-NEXT: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"]} // CHECK-NEXT: fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]} // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssC = tensor.extract_slice %sC[0, 0][%sz1, 4][1, 1] : tensor to tensor %FC = linalg.fill(%f0, %ssC) : f32, tensor -> tensor @@ -524,7 +524,7 @@ %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [-1, 1, 2]} + // CHECK-SAME: __equivalent_func_args__ = [-1, 1, 2] return %rA, %rB, %rC: tensor, tensor, tensor } @@ -542,20 +542,22 @@ { // CHECK: scf.for // CHECK-NEXT: scf.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]} %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { scf.yield %t : tensor } // CHECK: scf.for // CHECK-NEXT: scf.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor) { scf.yield %t : tensor } // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [-1, 1]} + // CHECK-SAME: __equivalent_func_args__ = [-1, 1] return %r0, %r1: tensor, tensor } @@ -574,11 +576,11 @@ // which makes bbArg inplaceable. // 2. Or it is already inplaceable and so is bbArg. // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} // CHECK-NEXT: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} - // CHECK-NEXT: scf.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["false", "true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} + // CHECK-NEXT: scf.yield {__inplace_operands_attr__ = ["true", "true"]} + // CHECK-NEXT: } {__inplace_operands_attr__ = ["none", "none", "none", "false", "true"]} %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B) -> (tensor, tensor) { @@ -588,7 +590,7 @@ } // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [-1, 1]} + // CHECK-SAME: __equivalent_func_args__ = [-1, 1] return %r0#0, %r0#1: tensor, tensor } @@ -606,8 +608,10 @@ // of %r1 is read. // CHECK: scf.for // CHECK-NEXT: call + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK-NEXT: scf.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]} %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { call @some_use(%t) : (tensor) -> () scf.yield %t : tensor @@ -616,8 +620,10 @@ // %r1 bufferizes inplace fine. // CHECK: scf.for // CHECK-NEXT: call + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK-NEXT: scf.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor) { call @some_use(%t) : (tensor) -> () scf.yield %t : tensor @@ -627,8 +633,10 @@ // of %r3 is read. // CHECK: linalg.tiled_loop // CHECK-NEXT: call + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK-NEXT: linalg.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]} %r2 = linalg.tiled_loop (%i) = (%lb) to (%ub) step (%step) ins() outs(%t = %B: tensor) { @@ -639,8 +647,10 @@ // %r3 bufferizes inplace fine. // CHECK: linalg.tiled_loop // CHECK-NEXT: call + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK-NEXT: linalg.yield - // CHECK-NEXT: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} %r3 = linalg.tiled_loop (%i) = (%lb) to (%ub) step (%step) ins() outs(%t = %B: tensor) { @@ -649,7 +659,7 @@ } // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, 1]} + // CHECK-SAME: __equivalent_func_args__ = [0, 1] return %r1, %r3: tensor, tensor } @@ -670,12 +680,12 @@ // value. The calls to `foo` are determined to read conservatively, so %A // cannot bufferize inplace. // CHECK: fill - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} %A = linalg.fill(%f1, %I) : f32, tensor<64xf32> -> tensor<64xf32> // 1. Bufferizes inplace: no alias to %A is yet possible. // CHECK: fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} %B = linalg.fill(%f2, %I) : f32, tensor<64xf32> -> tensor<64xf32> call @foo(%A) : (tensor<64xf32>) -> () @@ -706,17 +716,19 @@ // The calls to `foo` are determined to read conservatively, so %A cannot // bufferize inplace. // CHECK: fill - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} %A = linalg.fill(%f1, %I) : f32, tensor<64xf32> -> tensor<64xf32> // 4. Bufferizes inplace: no alias to %A is yet possible. // CHECK: fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} %B = linalg.fill(%f2, %I) : f32, tensor<64xf32> -> tensor<64xf32> // 3. Does not read or write, bufferizes inplace. - // CHECK: scf.for - // CHECK: {__inplace_results_attr__ = ["true", "true"]} + // CHECK: scf.for + // CHECK-NEXT: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true", "true"]} %r:2 = scf.for %i = %c0 to %c10 step %c1 iter_args(%0 = %A, %1 = %B) -> (tensor<64xf32>, tensor<64xf32>) { @@ -729,12 +741,12 @@ // value. The calls to `foo` are determined to read conservatively, so %A2 // cannot bufferize inplace. // CHECK: fill - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} %A2 = linalg.fill(%f1, %I2) : f32, tensor<64xf32> -> tensor<64xf32> // 1. Bufferizes inplace: no alias to %A2 is yet possible. // CHECK: fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} %B2 = linalg.fill(%f2, %I2) : f32, tensor<64xf32> -> tensor<64xf32> call @bar(%A2) : (tensor<64xf32>) -> () @@ -754,10 +766,10 @@ %s3 : index) -> tensor { %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]} %b = tensor.extract_slice %A[%s1][%s2][1] : tensor<4xi32> to tensor // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]} %r = vector.transfer_write %v, %b[%s3] : vector<5xi32>, tensor return %r : tensor } @@ -777,18 +789,18 @@ %7 = linalg.init_tensor [256, 256] : tensor<256x256xf32> // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} %8 = linalg.fill(%cst_0, %7) : f32, tensor<256x256xf32> -> tensor<256x256xf32> %11 = linalg.fill(%cst_1, %7) : f32, tensor<256x256xf32> -> tensor<256x256xf32> // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %sA = tensor.extract_slice %8[0, 0][256, 16][1, 1]: tensor<256x256xf32> to tensor<256x16xf32> %sB = tensor.extract_slice %11[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32> %r = linalg.matmul @@ -796,7 +808,7 @@ outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [2]} + // CHECK-SAME: __equivalent_func_args__ = [2] return %r : tensor<256x256xf32> } @@ -815,27 +827,27 @@ %7 = linalg.init_tensor [256, 256] : tensor<256x256xf32> // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["false"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] %8 = linalg.fill(%cst_0, %7) : f32, tensor<256x256xf32> -> tensor<256x256xf32> %9 = vector.transfer_read %arg0[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32> %10 = vector.transfer_write %9, %8[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32> // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] %11 = linalg.fill(%cst_1, %7) : f32, tensor<256x256xf32> -> tensor<256x256xf32> %12 = vector.transfer_read %arg1[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32> %13 = vector.transfer_write %12, %11[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32> // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} // CHECK: linalg.matmul - // CHECK-SAME: {__inplace_results_attr__ = ["true"]} + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %sA = tensor.extract_slice %10[0, 0][256, 16][1, 1]: tensor<256x256xf32> to tensor<256x16xf32> %sB = tensor.extract_slice %13[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32> %r = linalg.matmul @@ -843,7 +855,7 @@ outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [2]} + // CHECK-SAME: __equivalent_func_args__ = [2] return %r : tensor<256x256xf32> } @@ -866,31 +878,31 @@ %cst = arith.constant 0.000000e+00 : f32 // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] %0 = linalg.fill(%cst, %arg2) : f32, tensor<62x90xf32> -> tensor<62x90xf32> // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true"] %2 = tensor.extract_slice %0[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %8 = tensor.insert_slice %7 into %0[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true"] %10 = tensor.extract_slice %8[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32> // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %15 = tensor.insert_slice %14 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [4]} + // CHECK-SAME: __equivalent_func_args__ = [4] return %15 : tensor<62x90xf32> } @@ -918,7 +930,7 @@ } // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %r : tensor<10x20xf32> } @@ -940,7 +952,7 @@ %t2: tensor {linalg.inplaceable = true}) -> (tensor, tensor){ // CHECK: linalg.generic - // CHECK-SAME: {__inplace_results_attr__ = ["true", "true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"] %o:2 = linalg.generic #trait ins(%t1 : tensor) outs (%t2, %t2 : tensor, tensor) { ^bb(%0: f32, %1: f32, %2 : f32) : @@ -948,7 +960,7 @@ } -> (tensor, tensor) // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, 1]} + // CHECK-SAME: __equivalent_func_args__ = [0, 1] return %o#0, %o#1 : tensor, tensor } @@ -972,7 +984,7 @@ -> (tensor, tensor, tensor){ // CHECK: linalg.generic - // CHECK-SAME: {__inplace_results_attr__ = ["true", "true", "false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true", "false"] %o:3 = linalg.generic #trait ins(%t1 : tensor) outs (%t2, %t2, %t2 : tensor, tensor, tensor) { @@ -981,7 +993,7 @@ } -> (tensor, tensor, tensor) // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, 1, -1]} + // CHECK-SAME: __equivalent_func_args__ = [0, 1, -1] return %o#0, %o#1, %o#2 : tensor, tensor, tensor } @@ -1000,31 +1012,31 @@ // Cannot bufferize inplace this extract_slice because both operand and result // are modified and returned separately. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none", "none", "none"] %e = tensor.extract_slice %arg2[%s1, %s2][%s3, %s4][1, 1] : tensor<62x90xf32> to tensor // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true"] %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %8 = tensor.insert_slice %7 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true"] %10 = tensor.extract_slice %e[32, 0] [30, 90] [1, 1] : tensor to tensor<30x90xf32> // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %15 = tensor.insert_slice %14 into %e[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [2, -1]} + // CHECK-SAME: __equivalent_func_args__ = [2, -1] return %8, %15 : tensor<62x90xf32>, tensor } @@ -1036,26 +1048,26 @@ -> (tensor<62x90xf32>) { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true"] %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> // TODO: This should bufferize inplace once we have a proper range analysis. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false"] %10 = tensor.extract_slice %arg2[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %15 = tensor.insert_slice %10 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %15 : tensor<62x90xf32> } @@ -1067,26 +1079,26 @@ -> (tensor<62x90xf32>) { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true"] %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> // The slices are overlapping, so this can never bufferize inplace. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false"] %10 = tensor.extract_slice %arg2[31, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %15 = tensor.insert_slice %10 into %8[31, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %15 : tensor<62x90xf32> } @@ -1098,19 +1110,19 @@ -> (tensor<62x90xf32>) { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false"] %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] %15 = tensor.insert_slice %2 into %8[15, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %15 : tensor<62x90xf32> } @@ -1134,25 +1146,25 @@ // Write to %t1. // CHECK: vector.transfer_write - // CHECK-SAME: __inplace_results_attr__ = ["false"] + // CHECK-SAME: __inplace_operands_attr__ = ["none", "false", "none"] %t3 = vector.transfer_write %v, %t1[%s] : vector<5xf32>, tensor // Read the old value of %t1 inside the loop via an alias. - // CHECK: scf.for + // CHECK: scf.for {{.*}} { %r, %v3 = scf.for %i = %c0 to %s step %c1 iter_args(%t2 = %t1, %v0 = %v) -> (tensor, vector<5xf32>) { // CHECK: tensor.extract_slice - // CHECK-SAME: __inplace_results_attr__ = ["true"] + // CHECK-SAME: __inplace_operands_attr__ = ["true", "none", "none"] %e = tensor.extract_slice %t2[%s][%s][1] : tensor to tensor // Read from %t1 via alias %e. %v2 = vector.transfer_read %e[%s], %cst : tensor, vector<5xf32> scf.yield %t2, %v2 : tensor, vector<5xf32> } - // CHECK: __inplace_results_attr__ = ["true", "false"] + // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true", "none"]} // Use %t3 in some way without reading it, so that it does not get DCE'd. // CHECK: linalg.generic - // CHECK-SAME: __inplace_results_attr__ = ["true"] + // CHECK-SAME: __inplace_operands_attr__ = ["true"] %o = linalg.generic #trait outs (%t3 : tensor) { ^bb(%0: f32) : linalg.yield %cst : f32 @@ -1181,7 +1193,7 @@ // Write to %t1. // CHECK: vector.transfer_write - // CHECK-SAME: __inplace_results_attr__ = ["true"] + // CHECK-SAME: __inplace_operands_attr__ = ["none", "true", "none"] %t3 = vector.transfer_write %v, %t1[%s] : vector<5xf32>, tensor // This loop does not read from %t1. It only writes to it. @@ -1189,7 +1201,7 @@ %r, %v3 = scf.for %i = %c0 to %s step %c1 iter_args(%t2 = %t1, %v0 = %v) -> (tensor, vector<5xf32>) { // Write to %t1 via %t2. (Overwrite %t3.) // CHECK: linalg.generic - // CHECK-SAME: __inplace_results_attr__ = ["true"] + // CHECK-SAME: __inplace_operands_attr__ = ["true"] %o2 = linalg.generic #trait outs (%t2 : tensor) { ^bb(%0: f32) : linalg.yield %cst : f32 @@ -1202,14 +1214,14 @@ // Use %t3 in some way without reading it, so that it does not get DCE'd. // CHECK: linalg.generic - // CHECK-SAME: __inplace_results_attr__ = ["true"] + // CHECK-SAME: __inplace_operands_attr__ = ["true"] %o = linalg.generic #trait outs (%t3 : tensor) { ^bb(%0: f32) : linalg.yield %cst : f32 } -> (tensor) // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, -1]} + // CHECK-SAME: __equivalent_func_args__ = [0, -1] return %o, %v3 : tensor, vector<5xf32> } @@ -1223,24 +1235,24 @@ func @buffer_forwarding_conflict(%arg0: tensor {linalg.inplaceable = true}, %arg1: index) -> (tensor, tensor) { %cst = arith.constant 0.000000e+00 : f32 // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"] // Instead of allocating, share buffer with some inplace bufferization? %0 = linalg.init_tensor [%arg1] : tensor // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] %1 = linalg.fill(%cst, %0) : f32, tensor -> tensor // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"] %2 = tensor.insert_slice %1 into %arg0[0] [%arg1] [1] : tensor into tensor // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"] %3 = tensor.insert_slice %1 into %arg0[42] [%arg1] [1] : tensor into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [-1, 0]} + // CHECK-SAME: __equivalent_func_args__ = [-1, 0] return %2, %3 : tensor, tensor } @@ -1250,20 +1262,20 @@ func @buffer_forwarding_no_conflict(%arg0: tensor {linalg.inplaceable = true}, %arg1: index) -> (tensor, tensor) { %cst = arith.constant 0.000000e+00 : f32 // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"] // Instead of allocating, share buffer with some inplace bufferization? %0 = linalg.init_tensor [%arg1] : tensor // CHECK: linalg.fill - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] %1 = linalg.fill(%cst, %0) : f32, tensor -> tensor // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"] %2 = tensor.insert_slice %1 into %arg0[42] [%arg1] [1] : tensor into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, 0]} + // CHECK-SAME: __equivalent_func_args__ = [0, 0] return %2, %2 : tensor, tensor } @@ -1279,27 +1291,35 @@ %t2: tensor {linalg.inplaceable = true}, %cond: i1) -> tensor { %r = scf.if %cond -> (tensor) { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t1 : tensor } else { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t2 : tensor } return %r : tensor } +// ----- + // CHECK-LABEL: func @scf_if_inplace2 func @scf_if_inplace2(%t1: tensor {linalg.inplaceable = true}, %v: vector<5xf32>, %idx: index, %cond: i1) -> tensor { %r = scf.if %cond -> (tensor) { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t1 : tensor } else { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor scf.yield %t2 : tensor } // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %r : tensor } @@ -1310,18 +1330,22 @@ %v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index, %cond: i1) -> tensor { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"] %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor %r = scf.if %cond -> (tensor) { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %t2 = vector.transfer_write %v1, %e[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t2 : tensor } else { // Writing the same tensor through an alias. This is OK. // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t3 : tensor } return %r : tensor @@ -1335,23 +1359,31 @@ %cond: i1, %cond2: i1) -> (tensor, vector<10xf32>) { %cst = arith.constant 0.0 : f32 %r = scf.if %cond -> (tensor) { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t1 : tensor } else { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t2 : tensor } %r_alias = scf.if %cond2 -> (tensor) { // Reading %r is OK. No conflict. + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %r : tensor } else { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %r : tensor } %v2 = vector.transfer_read %r_alias[%idx], %cst : tensor, vector<10xf32> // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0, -1]} + // CHECK-SAME: __equivalent_func_args__ = [0, -1] return %r_alias, %v2 : tensor, vector<10xf32> } @@ -1362,24 +1394,28 @@ %idx: index, %cond: i1) -> tensor { %r = scf.if %cond -> (tensor) { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"] %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %e : tensor } else { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"] %f = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %f : tensor } // Inserting into an equivalent tensor at the same offset. This bufferizes // inplace. // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"] %r2 = tensor.insert_slice %r into %t1[%idx][%idx][1] : tensor into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %r2 : tensor } @@ -1394,25 +1430,33 @@ %r = scf.if %cond -> (tensor) { %t2 = scf.if %cond2 -> (tensor) { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %t3 = vector.transfer_write %v1, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t3 : tensor } else { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %t4 = vector.transfer_write %v3, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t4 : tensor } + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t2 : tensor } else { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t3 : tensor } // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %r : tensor } @@ -1425,18 +1469,22 @@ %cst = arith.constant 0.0 : f32 %r, %v_r2 = scf.if %cond -> (tensor, vector<5xf32>) { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %t2 = vector.transfer_write %v1, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} scf.yield %t2, %v1 : tensor, vector<5xf32> } else { // Writing the same tensor through an alias. // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor // Read the original value of %t1. This requires the write in this branch // to be out-of-place. But the write in the other branch can still be // inplace. %v_r = vector.transfer_read %t1[%idx2], %cst : tensor, vector<5xf32> + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} scf.yield %t3, %v_r : tensor, vector<5xf32> } return %r, %v_r2 : tensor, vector<5xf32> @@ -1450,17 +1498,21 @@ %cond: i1) -> tensor { %r = scf.if %cond -> (tensor) { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"] %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %e : tensor } else { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t1 : tensor } // Reading from and writing to the same tensor via different args. This is a // conflict. // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"] %r2 = tensor.insert_slice %r into %t1[%idx2][%idx2][1] : tensor into tensor return %r2 : tensor } @@ -1473,13 +1525,17 @@ %cond: i1) -> tensor { %r = scf.if %cond -> (tensor) { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"] %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %e : tensor } else { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"] %f = tensor.extract_slice %t1[%idx2][%idx2][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %f : tensor } @@ -1489,11 +1545,11 @@ // why the tensor.insert_slice is inplace and the two extract_slices are // out-of-place. // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"] %r2 = tensor.insert_slice %r into %t1[%idx3][%idx3][1] : tensor into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %r2 : tensor } @@ -1504,23 +1560,27 @@ %idx: index, %idx2: index, %cond: i1) -> tensor { %r = scf.if %cond -> (tensor) { // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"] %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %e : tensor } else { // TODO: This one could bufferize inplace, but the analysis is too restrictive. // CHECK: tensor.extract_slice - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"] %f = tensor.extract_slice %t1[%idx2][%idx2][1] : tensor to tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %f : tensor } // CHECK: tensor.insert_slice - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"] %r2 = tensor.insert_slice %r into %t1[%idx2][%idx2][1] : tensor into tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %r2 : tensor } @@ -1535,8 +1595,10 @@ scf.yield %t1 : tensor } else { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t2 : tensor } @@ -1557,14 +1619,20 @@ scf.yield %t1 : tensor } else { // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t2 : tensor } %t1_alias = scf.if %cond2 -> (tensor) { // scf.yield bufferizes to a read. That is a conflict in this example. + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t1 : tensor } else { + // CHECK: scf.yield + // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} scf.yield %t1 : tensor } %v2 = vector.transfer_read %t1_alias[%idx], %cst : tensor, vector<10xf32> @@ -1578,7 +1646,7 @@ %v : vector<5xf32>) -> (tensor) { %idx = arith.constant 0 : index // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["true"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] %0 = vector.transfer_write %v, %A[%idx] : vector<5xf32>, tensor return %0 : tensor } @@ -1587,9 +1655,8 @@ // CHECK-LABEL: func @main_func func @main_func(%A : tensor {linalg.inplaceable = true}, %v : vector<5xf32>) -> (tensor) { - // Function calls always bufferize out-of-place at the moment. // CHECK: call - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"] %0 = call @some_use(%A, %v) : (tensor, vector<5xf32>) -> (tensor) return %0 : tensor } @@ -1604,7 +1671,7 @@ // Write to the tensor. Cannot be inplace due to tensor_load. // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] %w = vector.transfer_write %v, %0[%idx1] : vector<5xf32>, tensor // Read from the tensor and return result. @@ -1622,7 +1689,7 @@ -> (vector<5xf32>, vector<5xf32>) { // Write + read to/from tensor. // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_results_attr__ = ["false"] + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] %1 = vector.transfer_write %v1, %t1[%idx2] : vector<5xf32>, tensor %cst = arith.constant 0.0 : f32 %r1 = vector.transfer_read %1[%idx3], %cst : tensor, vector<5xf32> @@ -1640,7 +1707,7 @@ // CHECK-LABEL: func @inner_func func @inner_func(%t: tensor) -> tensor { // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %t : tensor } @@ -1662,7 +1729,7 @@ %c0 = arith.constant 0 : index %0 = tensor.insert %f into %t[%c0] : tensor // CHECK: return - // CHECK-SAME: {__equivalent_func_args__ = [0]} + // CHECK-SAME: __equivalent_func_args__ = [0] return %0 : tensor }