diff --git a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h
--- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h
+++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h
@@ -194,10 +194,10 @@
 
   /// Set the inPlace bufferization spec to true.
   /// Merge result's and operand's aliasing sets and iterate to a fixed point.
-  void bufferizeInPlace(OpResult result, OpOperand &operand);
+  void bufferizeInPlace(OpOperand &operand, BufferizationState &state);
 
   /// Set the inPlace bufferization spec to false.
-  void bufferizeOutOfPlace(OpResult result);
+  void bufferizeOutOfPlace(OpOperand &operand);
 
   /// Return true if `v1` and `v2` bufferize to equivalent buffers.
   bool areEquivalentBufferizedValues(Value v1, Value v2) const {
@@ -224,10 +224,10 @@
   void applyOnAliases(Value v, function_ref<void(Value)> fun) const;
 
   /// Mark a value as in-place bufferized.
-  void markInPlace(OpResult v) { inplaceBufferized.insert(v); }
+  void markInPlace(OpOperand &o) { inplaceBufferized.insert(&o); }
 
   /// Return `true` if a value was marked as in-place bufferized.
-  bool isInPlace(OpResult opResult) const;
+  bool isInPlace(OpOperand &opOperand) const;
 
 private:
   /// llvm::EquivalenceClasses wants comparable elements. This comparator uses
@@ -245,7 +245,7 @@
   EquivalenceClassRangeType getAliases(Value v) const;
 
   /// Set of all OpResults that were decided to bufferize in-place.
-  llvm::DenseSet<OpResult> inplaceBufferized;
+  llvm::DenseSet<OpOperand *> inplaceBufferized;
 
   /// Auxiliary structure to store all the values a given value may alias with.
   /// Alias information is "may be" conservative: In the presence of branches, a
@@ -379,7 +379,7 @@
   Value lookupBuffer(Value tensor);
 
   /// Return `true` if the given OpResult has been decided to bufferize inplace.
-  bool isInPlace(OpResult opResult) const;
+  bool isInPlace(OpOperand &opOperand) const;
 
   /// Return `true` if the given value is mapped.
   bool isMapped(Value value) const;
diff --git a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td
--- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td
+++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td
@@ -104,18 +104,14 @@
       >,
       InterfaceMethod<
         /*desc=*/[{
-          Return `true` if the given OpResult must bufferize in-place with its
-          corresponding aliasing OpOperand. Alias sets and inplace attributes
-          will be set up accordingly before making any other bufferization
-          decisions. This method will never be called on OpResults that do not
-          have a tensor type.
-
-          Note: This method may not return `true` if the given OpResult does not
-          have an aliasing OpOperand.
+          Return `true` if the given OpOperand must bufferize in-place. Alias
+          sets and inplace attributes will be set up accordingly before making
+          any other bufferization decisions. This method will never be called on
+          OpOperands that do not have a tensor type.
         }],
         /*retType=*/"bool",
         /*methodName=*/"mustBufferizeInPlace",
-        /*args=*/(ins "OpResult":$opResult,
+        /*args=*/(ins "OpOperand &":$opOperand,
                       "BufferizationState &":$state),
         /*methodBody=*/"",
         /*defaultImplementation=*/[{
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp
@@ -114,27 +114,22 @@
 }
 
 /// Return `true` if a value was marked as in-place bufferized.
-bool BufferizationAliasInfo::isInPlace(OpResult opResult) const {
-  return inplaceBufferized.contains(opResult);
+bool BufferizationAliasInfo::isInPlace(OpOperand &operand) const {
+  return inplaceBufferized.contains(&operand);
 }
 
 /// Set the inPlace bufferization spec to true.
-void BufferizationAliasInfo::bufferizeInPlace(OpResult result,
-                                              OpOperand &operand) {
-  LLVM_DEBUG(llvm::dbgs() << "bufferizeInPlace: ");
-  LLVM_DEBUG(result.print(llvm::dbgs()));
-
-  markInPlace(result);
-  aliasInfo.unionSets(result, operand.get());
+void BufferizationAliasInfo::bufferizeInPlace(OpOperand &operand,
+                                              BufferizationState &state) {
+  markInPlace(operand);
+  if (OpResult result = state.getAliasingOpResult(operand))
+    aliasInfo.unionSets(result, operand.get());
 }
 
 /// Set the inPlace bufferization spec to false.
-void BufferizationAliasInfo::bufferizeOutOfPlace(OpResult result) {
-  LLVM_DEBUG(llvm::dbgs() << "bufferizeOutOfPlace: ");
-  LLVM_DEBUG(result.print(llvm::dbgs()));
-
-  if (inplaceBufferized.contains(result))
-    inplaceBufferized.erase(result);
+void BufferizationAliasInfo::bufferizeOutOfPlace(OpOperand &operand) {
+  assert(!inplaceBufferized.contains(&operand) &&
+         "OpOperand was already decided to bufferize inplace");
 }
 
 /// Apply `fun` to all the members of the equivalence class of `v`.
@@ -340,16 +335,13 @@
   op->walk([&](BufferizableOpInterface bufferizableOp) {
     if (!options.isOpAllowed(bufferizableOp))
       return WalkResult::skip();
-    for (OpResult opResult : bufferizableOp->getOpResults()) {
-      if (opResult.getType().isa<TensorType>())
-        if (bufferizableOp.mustBufferizeInPlace(opResult, *this)) {
-          SmallVector<OpOperand *> operands =
-              bufferizableOp.getAliasingOpOperand(opResult, *this);
-          assert(!operands.empty() &&
-                 "expected that OpResult has aliasing OpOperand");
-          for (OpOperand *operand : operands)
-            aliasInfo.unionAliasSets(operand->get(), opResult);
-          aliasInfo.markInPlace(opResult);
+    for (OpOperand &opOperand : bufferizableOp->getOpOperands()) {
+      if (opOperand.get().getType().isa<TensorType>())
+        if (bufferizableOp.mustBufferizeInPlace(opOperand, *this)) {
+          if (OpResult opResult =
+                  bufferizableOp.getAliasingOpResult(opOperand, *this))
+            aliasInfo.unionAliasSets(opOperand.get(), opResult);
+          aliasInfo.markInPlace(opOperand);
         }
     }
     return WalkResult::advance();
@@ -382,7 +374,7 @@
   }
 
   // If bufferizing out-of-place, allocate a new buffer.
-  if (!aliasInfo.isInPlace(result)) {
+  if (!aliasInfo.isInPlace(*opOperand)) {
     // Ops with multiple aliasing operands can currently not bufferize
     // out-of-place.
     assert(
@@ -694,8 +686,8 @@
 }
 
 bool mlir::linalg::comprehensive_bufferize::BufferizationState::isInPlace(
-    OpResult opResult) const {
-  return aliasInfo.isInPlace(opResult);
+    OpOperand &opOperand) const {
+  return aliasInfo.isInPlace(opOperand);
 }
 
 void mlir::linalg::comprehensive_bufferize::BufferizationState::markOpObsolete(
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp
@@ -42,6 +42,19 @@
     return true;
   }
 
+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
+                               BufferizationState &state) const {
+    // It is unknown whether the resulting MemRef will be written or not.
+    return true;
+  }
+
+  bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand,
+                            BufferizationState &state) const {
+    // ToMemrefOps always bufferize inplace.
+    // TODO: Remove ToMemrefOps from the analysis.
+    return true;
+  }
+
   OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
                                BufferizationState &state) const {
     return OpResult();
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp
@@ -147,26 +147,27 @@
 //===----------------------------------------------------------------------===//
 
 /// Attribute marker to specify op results that can be bufferized inPlace.
-constexpr StringLiteral kInPlaceResultsAttrName = "__inplace_results_attr__";
-
-/// Mark whether OpResult can actually be bufferized inplace.
-/// If `inPlace` is `true`, the use-def chain analysis has guaranteed that no
-/// subsequent write would occur to the bufferized tensor value (i.e. the result
-/// can be bufferized inplace).
-static void setInPlaceOpResult(OpResult opResult, bool inPlace) {
-  if (!opResult)
-    return;
+constexpr StringLiteral kInPlaceResultsAttrName = "__inplace_operands_attr__";
 
-  Operation *op = opResult.getOwner();
+/// Mark whether OpOperand will be bufferized inplace.
+static void setInPlaceOpOperand(OpOperand &opOperand, bool inPlace) {
+  Operation *op = opOperand.getOwner();
   auto attr =
       op->getAttr(kInPlaceResultsAttrName).dyn_cast_or_null<ArrayAttr>();
-  SmallVector<StringRef> inPlaceVector =
-      attr ? SmallVector<StringRef>(
-                 llvm::to_vector<4>(attr.getAsValueRange<StringAttr>()))
-           : SmallVector<StringRef>(op->getNumResults(), "false");
-  LDBG("->set inPlace=" << inPlace << " <- #" << opResult.getResultNumber()
+  SmallVector<StringRef> inPlaceVector;
+  if (attr) {
+    inPlaceVector = SmallVector<StringRef>(
+        llvm::to_vector<4>(attr.getAsValueRange<StringAttr>()));
+  } else {
+    inPlaceVector = SmallVector<StringRef>(op->getNumOperands(), "none");
+    for (OpOperand &opOperand : op->getOpOperands())
+      if (opOperand.get().getType().isa<TensorType>())
+        inPlaceVector[opOperand.getOperandNumber()] = "false";
+  }
+
+  LDBG("->set inPlace=" << inPlace << " <- #" << opOperand.getOperandNumber()
                         << ": " << printOperationInfo(op) << "\n");
-  inPlaceVector[opResult.getResultNumber()] = inPlace ? "true" : "false";
+  inPlaceVector[opOperand.getOperandNumber()] = inPlace ? "true" : "false";
   op->setAttr(kInPlaceResultsAttrName,
               OpBuilder(op).getStrArrayAttr(inPlaceVector));
 }
@@ -230,21 +231,11 @@
 static bool isInplaceMemoryWrite(OpOperand &opOperand,
                                  const BufferizationAliasInfo &aliasInfo,
                                  BufferizationState &state) {
-  // The analysis does not know what happens to the result of a ToMemrefOp, so
-  // we assume that it is written to.
-  // TODO: This is a conservative implementation. This rule will have to be
-  // relaxed for partial bufferization.
-  if (isa<bufferization::ToMemrefOp>(opOperand.getOwner()))
-    return true;
-  // OpOperands without an aliasing OpResult do not write.
-  OpResult opResult = state.getAliasingOpResult(opOperand);
-  if (!opResult)
-    return false;
   // OpOperands that do not bufferize to a memory write do not write in-place.
   if (!state.bufferizesToMemoryWrite(opOperand))
     return false;
   // Check current bufferization decisions.
-  return aliasInfo.isInPlace(opResult);
+  return aliasInfo.isInPlace(opOperand);
 }
 
 /// Return true if, under current bufferization decisions, the buffer of `value`
@@ -255,8 +246,8 @@
   LDBG("WRITABILITY ANALYSIS FOR " << printValueInfo(value) << "\n");
   bool foundNonWritableBuffer = false;
   aliasInfo.applyOnAliases(value, [&](Value v) {
-    // Query BufferizableOpInterface to see if the OpResult is writable.
-    // TODO: Out-of-place bufferized OpResult could be considered writable.
+    // Query BufferizableOpInterface to see if the value is writable.
+    // TODO: Out-of-place bufferized value could be considered writable.
     if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(v))
       if (bufferizableOp && bufferizableOp.isWritable(v, state))
         return;
@@ -424,8 +415,8 @@
   return false;
 }
 
-/// Return true if bufferizing result inplace would create a conflict. A read R
-/// and a write W of the same alias set is a conflict if inplace bufferization
+/// Return true if bufferizing `operand` inplace would create a conflict. A read
+/// R and a write W of the same alias set is a conflict if inplace bufferization
 /// of W changes the value read by R to a value different from the one that
 /// would be expected by tracing back R's origin through SSA use-def chains.
 /// A conflict can only be introduced by a new alias and/or an inplace
@@ -453,21 +444,10 @@
 /// Note: If `checkConsistencyOnly`, this function may be called with a null
 /// OpResult. In that case, only the consistency of bufferization decisions
 /// involving aliases of the given OpOperand are checked.
-bool wouldCreateReadAfterWriteInterference(
-    OpOperand &operand, OpResult result, const DominanceInfo &domInfo,
-    BufferizationState &state, const BufferizationAliasInfo &aliasInfo,
+static bool wouldCreateReadAfterWriteInterference(
+    OpOperand &operand, const DominanceInfo &domInfo, BufferizationState &state,
+    const BufferizationAliasInfo &aliasInfo,
     bool checkConsistencyOnly = false) {
-#ifndef NDEBUG
-  if (result) {
-    SmallVector<OpOperand *> opOperands = state.getAliasingOpOperand(result);
-    assert(llvm::find(opOperands, &operand) != opOperands.end() &&
-           "operand and result do not match");
-  } else {
-    assert(checkConsistencyOnly &&
-           "result not provided, can only check consistency");
-  }
-#endif // NDEBUG
-
   // Helper function to iterate on aliases of `root` and capture the reads.
   auto getAliasingReads = [&](DenseSet<OpOperand *> &res, Value root) {
     aliasInfo.applyOnAliases(root, [&](Value alias) {
@@ -491,11 +471,11 @@
   // Collect reads and writes of all aliases of OpOperand and OpResult.
   DenseSet<OpOperand *> usesRead, usesWrite;
   getAliasingReads(usesRead, operand.get());
-  if (result)
-    getAliasingReads(usesRead, result);
   getAliasingInplaceWrites(usesWrite, operand.get());
-  if (result)
+  if (OpResult result = state.getAliasingOpResult(operand)) {
+    getAliasingReads(usesRead, result);
     getAliasingInplaceWrites(usesWrite, result);
+  }
   if (!checkConsistencyOnly && state.bufferizesToMemoryWrite(operand))
     usesWrite.insert(&operand);
 
@@ -503,32 +483,25 @@
                                        aliasInfo);
 }
 
-/// Return true if bufferizing `opOperand` inplace with `opResult` would create
-/// a write to a non-writable buffer.
+/// Return true if bufferizing `opOperand` inplace would create a write to a
+/// non-writable buffer.
 static bool
-wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, OpResult opResult,
+wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand,
                                     const BufferizationAliasInfo &aliasInfo,
                                     BufferizationState &state) {
-#ifndef NDEBUG
-  SmallVector<OpOperand *> opOperands = state.getAliasingOpOperand(opResult);
-  assert(llvm::find(opOperands, &opOperand) != opOperands.end() &&
-         "operand and result do not match");
-#endif // NDEBUG
-
   // Certain buffers are not writeable:
   //   1. A function bbArg that is not inplaceable or
   //   2. A constant op.
-  assert(!aliasesNonWritableBuffer(opResult, aliasInfo, state) &&
-         "expected that opResult does not alias non-writable buffer");
   bool nonWritable =
       aliasesNonWritableBuffer(opOperand.get(), aliasInfo, state);
   if (!nonWritable)
     return false;
 
   // This is a problem only if the buffer is written to via some alias.
-  bool hasWrite = aliasesInPlaceWrite(opResult, aliasInfo, state) ||
-                  aliasesInPlaceWrite(opOperand.get(), aliasInfo, state) ||
+  bool hasWrite = aliasesInPlaceWrite(opOperand.get(), aliasInfo, state) ||
                   state.bufferizesToMemoryWrite(opOperand);
+  if (OpResult opResult = state.getAliasingOpResult(opOperand))
+    hasWrite |= aliasesInPlaceWrite(opResult, aliasInfo, state);
   if (!hasWrite)
     return false;
 
@@ -540,39 +513,23 @@
 // Bufferization analyses.
 //===----------------------------------------------------------------------===//
 
-/// Determine if `operand` can be bufferized in-place with `result`.
+/// Determine if `operand` can be bufferized in-place.
 static LogicalResult bufferizableInPlaceAnalysisImpl(
-    OpOperand &operand, OpResult result, BufferizationAliasInfo &aliasInfo,
+    OpOperand &operand, BufferizationAliasInfo &aliasInfo,
     BufferizationState &state, const DominanceInfo &domInfo) {
-#ifndef NDEBUG
-  SmallVector<OpOperand *> opOperands = state.getAliasingOpOperand(result);
-  assert(llvm::find(opOperands, &operand) != opOperands.end() &&
-         "operand and result do not match");
-#endif // NDEBUG
-
-  int64_t resultNumber = result.getResultNumber();
-  (void)resultNumber;
-  LDBG('\n');
-  LDBG("Inplace analysis for <- #" << resultNumber << " -> #"
-                                   << operand.getOperandNumber() << " in "
-                                   << printValueInfo(result) << '\n');
-
   bool foundInterference =
-      wouldCreateWriteToNonWritableBuffer(operand, result, aliasInfo, state) ||
-      wouldCreateReadAfterWriteInterference(operand, result, domInfo, state,
-                                            aliasInfo);
+      wouldCreateWriteToNonWritableBuffer(operand, aliasInfo, state) ||
+      wouldCreateReadAfterWriteInterference(operand, domInfo, state, aliasInfo);
 
   if (foundInterference)
-    aliasInfo.bufferizeOutOfPlace(result);
+    aliasInfo.bufferizeOutOfPlace(operand);
   else
-    aliasInfo.bufferizeInPlace(result, operand);
-
-  LDBG("Done inplace analysis for result #" << resultNumber << '\n');
+    aliasInfo.bufferizeInPlace(operand, state);
 
   return success();
 }
 
-/// Analyze the `ops` to determine which OpResults are inplaceable. Walk ops in
+/// Analyze the `ops` to determine which OpOperands are inplaceable. Walk ops in
 /// reverse and bufferize ops greedily. This is a good starter heuristic.
 ///
 /// Even if an op does not read or write, it may still create an alias when
@@ -608,11 +565,9 @@
     for (OpOperand &opOperand : op->getOpOperands())
       if (opOperand.get().getType().isa<TensorType>())
         if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op))
-          if (OpResult opResult =
-                  bufferizableOp.getAliasingOpResult(opOperand, state))
-            if (failed(bufferizableInPlaceAnalysisImpl(
-                    opOperand, opResult, aliasInfo, state, domInfo)))
-              return failure();
+          if (failed(bufferizableInPlaceAnalysisImpl(opOperand, aliasInfo,
+                                                     state, domInfo)))
+            return failure();
 
   return success();
 }
@@ -644,15 +599,12 @@
     if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op))
       for (OpResult opResult : op->getOpResults())
         if (opResult.getType().isa<TensorType>())
-          if (aliasInfo.isInPlace(opResult)) {
-            SmallVector<OpOperand *> opOperands =
-                bufferizableOp.getAliasingOpOperand(opResult, state);
-            if (!opOperands.empty())
+          for (OpOperand *opOperand :
+               bufferizableOp.getAliasingOpOperand(opResult, state))
+            if (state.isInPlace(*opOperand))
               if (bufferizableOp.bufferRelation(opResult, aliasInfo, state) ==
                   BufferRelation::Equivalent)
-                for (OpOperand *opOperand : opOperands)
-                  aliasInfo.unionEquivalenceClasses(opResult, opOperand->get());
-          }
+                aliasInfo.unionEquivalenceClasses(opResult, opOperand->get());
 }
 
 /// Analyze equivalence of tied OpResult/OpOperand pairs of all ops contained
@@ -683,15 +635,12 @@
     if (auto bufferizableOp = options.dynCastBufferizableOp(op))
       for (OpOperand &opOperand : op->getOpOperands())
         if (opOperand.get().getType().isa<TensorType>()) {
-          OpResult opResult =
-              bufferizableOp.getAliasingOpResult(opOperand, state);
           if (wouldCreateReadAfterWriteInterference(
-                  opOperand, opResult, domInfo, state, aliasInfo,
+                  opOperand, domInfo, state, aliasInfo,
                   /*checkConsistencyOnly=*/true)) {
-            // This error can happen for two reasons. Either the input IR
-            // already has a read-after-write conflict. Or certain
-            // "mustBufferizeInPlace" interface methods are implemented
-            // incorrectly.
+            // This error can happen if certain "mustBufferizeInPlace" interface
+            // methods are implemented incorrectly, such that the IR already has
+            // a RaW conflict before making any bufferization decisions.
             inconsistentOp = op;
             return WalkResult::interrupt();
           }
@@ -700,10 +649,6 @@
   });
 
   if (walkResult.wasInterrupted())
-    // This can currently happen in one situation: When a tensor is passed into
-    // a ToMemrefOp and read by another op consecutively. ToMemrefOps are
-    // currently handled conservatively. Once a tensor is passed into a
-    // ToMemrefOp, it may longer be read.
     return inconsistentOp->emitError("input IR has RaW conflict");
   return success();
 }
@@ -711,11 +656,13 @@
 /// Annotate the IR with the result of the analysis. For testing/debugging only.
 static void
 annotateOpsWithBufferizationMarkers(Operation *op,
-                                    const BufferizationAliasInfo &aliasInfo) {
+                                    const BufferizationAliasInfo &aliasInfo,
+                                    BufferizationState &state) {
   op->walk([&](Operation *op) {
-    for (OpResult opResult : op->getResults())
-      if (opResult.getType().isa<TensorType>())
-        setInPlaceOpResult(opResult, aliasInfo.isInPlace(opResult));
+    if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op))
+      for (OpOperand &opOperand : op->getOpOperands())
+        if (opOperand.get().getType().isa<TensorType>())
+          setInPlaceOpOperand(opOperand, aliasInfo.isInPlace(opOperand));
   });
 }
 
@@ -762,7 +709,7 @@
 
   // Annotate operations if we only want to report the analysis.
   if (options.testAnalysisOnly) {
-    annotateOpsWithBufferizationMarkers(op, aliasInfo);
+    annotateOpsWithBufferizationMarkers(op, aliasInfo, state);
     return success();
   }
 
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp
@@ -383,6 +383,14 @@
     return OpResult();
   }
 
+  bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand,
+                            BufferizationState &state) const {
+    // Yield operands always bufferize inplace. Otherwise, an alloc + copy
+    // may be generated inside the block. We should not return/yield allocations
+    // when possible.
+    return true;
+  }
+
   LogicalResult bufferize(Operation *op, OpBuilder &b,
                           BufferizationState &state) const {
     auto yieldOp = cast<linalg::YieldOp>(op);
@@ -444,15 +452,16 @@
             OpResult opResult = val.dyn_cast<OpResult>();
             if (!opResult)
               return true;
-            if (!aliasInfo.isInPlace(opResult))
-              return true;
+            //            if (!aliasInfo.isInPlace(opResult))
+            //              return true;
             // Only equivalent tensors are supported at the moment.
             // TODO: Support cases such as extract_slice(init_tensor).
             SmallVector<OpOperand *> opOperands =
                 state.getAliasingOpOperand(opResult);
             if (!llvm::all_of(opOperands, [&](OpOperand *operand) {
                   return aliasInfo.areEquivalentBufferizedValues(operand->get(),
-                                                                 opResult);
+                                                                 opResult) &&
+                         aliasInfo.isInPlace(*operand);
                 }))
               return true;
             return false;
@@ -530,7 +539,7 @@
         if (!insertSliceOp)
           return false;
         // Only inplace bufferized InsertSliceOps are eligible.
-        if (!aliasInfo.isInPlace(insertSliceOp->getOpResult(0)))
+        if (!aliasInfo.isInPlace(operand))
           return false;
         return &operand == &insertSliceOp->getOpOperand(0) /*source*/;
       },
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
@@ -510,6 +510,11 @@
     return true;
   }
 
+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
+                               BufferizationState &state) const {
+    return false;
+  }
+
   OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
                                BufferizationState &state) const {
     // CallOpInterface is special, it needs to wait for the callee to be
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp
@@ -38,14 +38,6 @@
     return {&yieldOp->getOpOperand(resultNum)};
   }
 
-  bool mustBufferizeInPlace(Operation *op, OpResult opResult,
-                            BufferizationState &state) const {
-    // ExecuteRegionOp results always bufferize in-place. Since they have no
-    // OpOperands, they are mostly ignored by the analysis once alias sets are
-    // set up.
-    return true;
-  }
-
   // TODO: For better bufferization results, this could return `true` only if
   // there is a memory write in the region.
   bool isMemoryWrite(Operation *op, OpResult opResult,
@@ -125,13 +117,6 @@
     return true;
   }
 
-  bool mustBufferizeInPlace(Operation *op, OpResult opResult,
-                            BufferizationState &state) const {
-    // IfOp results always bufferize in-place. Since they have no OpOperands,
-    // they are mostly ignored by the analysis once alias sets are set up.
-    return true;
-  }
-
   LogicalResult bufferize(Operation *op, OpBuilder &b,
                           BufferizationState &state) const {
     auto ifOp = cast<scf::IfOp>(op);
@@ -325,9 +310,21 @@
 
   OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
                                BufferizationState &state) const {
+    if (isa<scf::IfOp>(op->getParentOp()))
+      return op->getParentOp()->getResult(opOperand.getOperandNumber());
+    if (isa<scf::ExecuteRegionOp>(op->getParentOp()))
+      return op->getParentOp()->getResult(opOperand.getOperandNumber());
     return OpResult();
   }
 
+  bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand,
+                            BufferizationState &state) const {
+    // Yield operands always bufferize inplace. Otherwise, an alloc + copy
+    // may be generated inside the block. We should not return/yield allocations
+    // when possible.
+    return true;
+  }
+
   LogicalResult bufferize(Operation *op, OpBuilder &b,
                           BufferizationState &state) const {
     auto yieldOp = cast<scf::YieldOp>(op);
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp
@@ -155,7 +155,7 @@
         extractSliceOp.result().getType().cast<RankedTensorType>();
 
     // If not inplaceable, alloc.
-    bool inplace = state.isInPlace(extractSliceOp->getResult(0));
+    bool inplace = state.isInPlace(extractSliceOp->getOpOperand(0));
     Value alloc;
     if (!inplace)
       alloc = state.createAllocDeallocPair(b, loc, extractSliceOp.result());
@@ -285,7 +285,7 @@
     if (extractSliceOp &&
         areEquivalentExtractSliceOps(aliasInfo, extractSliceOp,
                                      insertSliceOp) &&
-        aliasInfo.isInPlace(extractSliceOp->getResult(0))) {
+        aliasInfo.isInPlace(extractSliceOp->getOpOperand(0))) {
       foundOp = true;
     }
   });
@@ -469,7 +469,7 @@
     //     cloned and the clone needs to be updated.
     if (isSourceEquivalentToAMatchingInplaceExtractSliceOp(aliasInfo,
                                                            insertSliceOp) &&
-        state.isInPlace(insertSliceOp->getResult(0)))
+        state.isInPlace(insertSliceOp->getOpOperand(0)))
       tensorState.insertSliceOpsWithoutCopy.insert(insertSliceOp);
   });
   return success();
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
@@ -20,11 +20,11 @@
   // aliasing subviews at all call sites or whether they allocate.
   // This is true irrespective of whether the function argument is inplaceable.
   //     CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
 
   //     CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   %r1 = tensor.extract_slice %B[0][8][1] : tensor<?xf32> to tensor<8xf32>
 
   return %r0, %r1: tensor<4xf32>, tensor<8xf32>
@@ -41,16 +41,16 @@
 {
   // must bufferize out of place.
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false"]}
   %r0 = tensor.insert_slice %C into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
   // bufferizes inplace.
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
   %r1 = tensor.insert_slice %C into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [-1, 1]}
+  // CHECK-SAME: __equivalent_func_args__ = [-1, 1]
   return %r0, %r1: tensor<?xf32>, tensor<?xf32>
 }
 
@@ -64,27 +64,27 @@
 {
   // matmul output operand interferes with input operand.
   //     CHECK: linalg.matmul
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
   %C = linalg.matmul  ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>)
                      outs(%B: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   // matmul output operand interferes with input operand.
   //     CHECK: linalg.matmul
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
   %D = linalg.matmul  ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>)
                      outs(%B: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   // matmul output operand does not interferes with input operand.
   //     CHECK: linalg.matmul
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %E = linalg.matmul  ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>)
                      outs(%B: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [-1, -1, 1]}
+  // CHECK-SAME: __equivalent_func_args__ = [-1, -1, 1]
   return %C, %D, %E: tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>
 }
 
@@ -103,16 +103,16 @@
   // bufferize out of place. Let callers decide whether they want to create
   // aliasing subviews at all call sites or whether they allocate.
   // This is true irrespective of whether the function argument is inplaceable.
-  // CHECK: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
   %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
 
-  // CHECK: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
   %r1 = tensor.extract_slice %r0[0][2][1] : tensor<4xf32> to tensor<2xf32>
 
-  // CHECK: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
   %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
 
-  // CHECK: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
   %r3 = tensor.extract_slice %r2[0][2][1] : tensor<4xf32> to tensor<2xf32>
 
   return %r1, %r3: tensor<2xf32>, tensor<2xf32>
@@ -128,20 +128,20 @@
     %B : tensor<?xf32>, %B2 : tensor<4xf32>, %B3 : tensor<2xf32>)
   -> (tensor<?xf32>, tensor<?xf32>)
 {
-  // CHECK: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["true", "true"]}
   %r0 = tensor.insert_slice %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32>
 
-  // CHECK: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["true", "true"]}
   %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
-  // CHECK: {__inplace_results_attr__ = ["false"]}
+  // CHECK: {__inplace_operands_attr__ = ["true", "false"]}
   %r2 = tensor.insert_slice %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32>
 
-  // CHECK: {__inplace_results_attr__ = ["false"]}
+  // CHECK: {__inplace_operands_attr__ = ["true", "false"]}
   %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0, -1]}
+  // CHECK-SAME: __equivalent_func_args__ = [0, -1]
   return %r1, %r3: tensor<?xf32>, tensor<?xf32>
 }
 
@@ -157,12 +157,12 @@
   // %r0 is an overlapping tensor.extract_slice that does not match, it must be
   // out of place.
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["false"]}
   %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
 
   // %r1 can bufferize inplace fine.
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]}
   %r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
 
   // %r3 does bufferizes inplace because %B is not inplaceable.
@@ -170,16 +170,16 @@
   // not alias with the buffer coming from %r3 so it can actually bufferize
   // inplace.
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
 
   // %r3 cannot bufferize inplace since %B is not inplaceable.
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"]}
   %r3 = tensor.insert_slice %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0, -1]}
+  // CHECK-SAME: __equivalent_func_args__ = [0, -1]
   return %r1, %r3: tensor<?xf32>, tensor<?xf32>
 }
 
@@ -195,17 +195,17 @@
   // %r0 is a tensor.extract_slice that matches, it can also be bufferized
   // inplace.
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
 
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
   %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
   // %r2 is a tensor.extract_slice that matches %r3, it can be bufferized
   // inplace.
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
 
   // tensor.insert_slice cannot bufferize inplace.
@@ -213,11 +213,11 @@
   // be unproductive to have special logic in bufferization to encode matching
   // insert_slice(extract_slice(A), A).
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false"]}
   %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0, -1]}
+  // CHECK-SAME: __equivalent_func_args__ = [0, -1]
   return %r1, %r3: tensor<?xf32>, tensor<?xf32>
 }
 
@@ -232,21 +232,21 @@
   %cst2 = arith.constant 1.0 : f32
 
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
   %0 = tensor.extract_slice %A[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
 
   //      CHECK: linalg.fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
   %1 = linalg.fill(%cst, %0) : f32, tensor<?xf32> -> tensor<?xf32>
 
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
   %2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor<?xf32> into tensor<?xf32>
 
   %3 = vector.transfer_read %1[%idx2], %cst2 : tensor<?xf32>, vector<5xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0, -1]}
+  // CHECK-SAME: __equivalent_func_args__ = [0, -1]
   return %2, %3 : tensor<?xf32>, vector<5xf32>
 }
 
@@ -262,33 +262,33 @@
   %cst2 = arith.constant 1.0 : f32
 
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]}
   %0 = tensor.extract_slice %A[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
 
   //      CHECK: linalg.fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
   %1 = linalg.fill(%cst, %0) : f32, tensor<?xf32> -> tensor<?xf32>
 
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
   %2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor<?xf32> into tensor<?xf32>
 
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
   %4 = tensor.extract_slice %2[%idx3][%idx3][1] : tensor<?xf32> to tensor<?xf32>
 
   //      CHECK: linalg.fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
   %5 = linalg.fill(%cst, %4) : f32, tensor<?xf32> -> tensor<?xf32>
 
   %3 = vector.transfer_read %1[%idx2], %cst2 : tensor<?xf32>, vector<5xf32>
 
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
   %6 = tensor.insert_slice %5 into %2[%idx3][%idx3][1] : tensor<?xf32> into tensor<?xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0, -1]}
+  // CHECK-SAME: __equivalent_func_args__ = [0, -1]
   return %6, %3 : tensor<?xf32>, vector<5xf32>
 }
 
@@ -304,25 +304,25 @@
   // tensor.extract_slice is only used as a read, no interference irrespective
   // of user's inplace status.
   //     CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   %sA = tensor.extract_slice %A[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
 
   // matmul output operand is not inplaceable at the function boundary.
   //     CHECK: linalg.matmul
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
   %D = linalg.matmul  ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>)
                      outs(%B: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   // matmul output operand is inplaceable at the function boundary.
   //     CHECK: linalg.matmul
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %E = linalg.matmul  ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>)
                      outs(%C: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [-1, 2]}
+  // CHECK-SAME: __equivalent_func_args__ = [-1, 2]
   return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
 }
 
@@ -338,12 +338,12 @@
   // Step 4. %sB forward propagates to a write in %D but it is not inplace.
   // So this is only ever read and can bufferize inplace.
   //     CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
 
   // Step 3. %sB has a read interference in %E, it does not bufferize inplace.
   //     CHECK: linalg.matmul
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
   %D = linalg.matmul  ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>)
                      outs(%sB: tensor<4x4xf32>)
     -> tensor<4x4xf32>
@@ -352,13 +352,13 @@
   // %sC backward propagates to %C which is inplaceable.
   // As a consequence this is bufferized inplace.
   //     CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
 
   // Step 1. %sC backprops to the tensor.extract_slice producer which is not
   // considered an interference. This bufferizes inplace.
   //     CHECK: linalg.matmul
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %E = linalg.matmul  ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>)
                      outs(%sC: tensor<4x4xf32>)
     -> tensor<4x4xf32>
@@ -374,23 +374,23 @@
     %B: tensor<6x6xf32>, %C: tensor<30x20xf32> {linalg.inplaceable = true})
         -> tensor<30x20xf32> {
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none", "none", "none"]}
   %15 = tensor.extract_slice %C[%s3, %s4] [%s1, %s2] [1, 1] : tensor<30x20xf32> to tensor<?x?xf32>
 
   //      CHECK: linalg.matmul
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) outs(%15 : tensor<?x?xf32>) -> tensor<?x?xf32>
 
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
   %19 = tensor.extract_slice %18[0, 0] [%s1, %s2] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
 
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none", "none", "none"]}
   %20 = tensor.insert_slice %19 into %C[%s3, %s4] [%s1, %s2] [1, 1] : tensor<?x?xf32> into tensor<30x20xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [6]}
+  // CHECK-SAME: __equivalent_func_args__ = [6]
   return %20 : tensor<30x20xf32>
 }
 
@@ -411,13 +411,13 @@
   // %sB backward propagates to %B which is not inplaceable.
   // As a consequence this is bufferized out of place.
   //     CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["false"]}
   %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
 
   // Step 3. %sB backprops to the tensor.extract_slice producer which is not
   // considered an interference. This bufferizes inplace.
   //     CHECK: linalg.matmul
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %D = linalg.matmul  ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>)
                      outs(%sB: tensor<4x4xf32>)
     -> tensor<4x4xf32>
@@ -426,13 +426,13 @@
   // %sC backward propagates to %C which is inplaceable.
   // As a consequence this is bufferized inplace.
   //     CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
 
   // Step 1. %sC backprops to the tensor.extract_slice producer which is not
   // considered an interference. This bufferizes inplace.
   //     CHECK: linalg.matmul
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %E = linalg.matmul  ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>)
                      outs(%sC: tensor<4x4xf32>)
     -> tensor<4x4xf32>
@@ -462,15 +462,15 @@
   //   - %sA would then bufferize to an inplace write (i.e. %FA) but %A is not
   //     inplaceable and so %sA is not inplaceable.
   //     CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]}
   // CHECK-NEXT: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   // CHECK-NEXT: fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
   // CHECK-NEXT: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
   // CHECK-NEXT: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"]}
   %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
   %FA = linalg.fill(%f0, %ssA) : f32, tensor<4x4xf32> -> tensor<4x4xf32>
@@ -480,19 +480,19 @@
   // 3-level matching tensor.extract_slice / tensor.insert_slice into
   // inplaceable %B.
   // CHECK-NEXT: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
   // CHECK-NEXT: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]}
   // CHECK-NEXT: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   // CHECK-NEXT: fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
   // CHECK-NEXT: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
   // CHECK-NEXT: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]}
   // CHECK-NEXT: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
   %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
   %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32>
@@ -505,18 +505,18 @@
   // inplaceable %C with a twist.
   // Throw a wrench in the system: %rsC production sizes do not match %ssC.
   // CHECK-NEXT: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
   // The tensor.insert_slice that would be candidate for matching does not actually
   // match. That tensor.insert_slice can still be bufferized inplace nonetheless
   // but this tensor.extract_slice, which bufferizes to an inplace write, cannot.
   // CHECK-NEXT: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"]}
   // CHECK-NEXT: fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
   // CHECK-NEXT: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]}
   // CHECK-NEXT: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
   %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   %ssC = tensor.extract_slice %sC[0, 0][%sz1, 4][1, 1] : tensor<?x?xf32> to tensor<?x4xf32>
   %FC = linalg.fill(%f0, %ssC) : f32, tensor<?x4xf32> -> tensor<?x4xf32>
@@ -524,7 +524,7 @@
   %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [-1, 1, 2]}
+  // CHECK-SAME: __equivalent_func_args__ = [-1, 1, 2]
   return %rA, %rB, %rC: tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>
 }
 
@@ -542,20 +542,22 @@
 {
   //      CHECK: scf.for
   // CHECK-NEXT: scf.yield
-  // CHECK-NEXT: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+  //      CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]}
   %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
     scf.yield %t : tensor<?xf32>
   }
 
   //      CHECK: scf.for
   // CHECK-NEXT: scf.yield
-  // CHECK-NEXT: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+  //      CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
   %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor<?xf32>) {
     scf.yield %t : tensor<?xf32>
   }
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [-1, 1]}
+  // CHECK-SAME: __equivalent_func_args__ = [-1, 1]
   return %r0, %r1: tensor<?xf32>, tensor<?xf32>
 }
 
@@ -574,11 +576,11 @@
   //      which makes bbArg inplaceable.
   //   2. Or it is already inplaceable and so is bbArg.
   // CHECK-NEXT:   tensor.insert_slice
-  // CHECK-SAME:     {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME:     {__inplace_operands_attr__ = ["true", "true"]}
   // CHECK-NEXT:   tensor.insert_slice
-  // CHECK-SAME:     {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT:   scf.yield
-  // CHECK-NEXT: {__inplace_results_attr__ = ["false", "true"]}
+  // CHECK-SAME:     {__inplace_operands_attr__ = ["true", "true"]}
+  // CHECK-NEXT:   scf.yield {__inplace_operands_attr__ = ["true", "true"]}
+  // CHECK-NEXT: } {__inplace_operands_attr__ = ["none", "none", "none", "false", "true"]}
   %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
       -> (tensor<?xf32>, tensor<?xf32>)
   {
@@ -588,7 +590,7 @@
   }
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [-1, 1]}
+  // CHECK-SAME: __equivalent_func_args__ = [-1, 1]
   return %r0#0, %r0#1: tensor<?xf32>, tensor<?xf32>
 }
 
@@ -606,8 +608,10 @@
   // of %r1 is read.
   //      CHECK: scf.for
   // CHECK-NEXT: call
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   // CHECK-NEXT: scf.yield
-  // CHECK-NEXT: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+  //      CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]}
   %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
     call @some_use(%t) : (tensor<?xf32>) -> ()
     scf.yield %t : tensor<?xf32>
@@ -616,8 +620,10 @@
   // %r1 bufferizes inplace fine.
   //      CHECK: scf.for
   // CHECK-NEXT: call
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   // CHECK-NEXT: scf.yield
-  // CHECK-NEXT: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+  //      CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
   %r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
     call @some_use(%t) : (tensor<?xf32>) -> ()
     scf.yield %t : tensor<?xf32>
@@ -627,8 +633,10 @@
   // of %r3 is read.
   //      CHECK: linalg.tiled_loop
   // CHECK-NEXT: call
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   // CHECK-NEXT: linalg.yield
-  // CHECK-NEXT: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+  //      CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]}
   %r2 = linalg.tiled_loop (%i) = (%lb) to (%ub) step (%step)
         ins()
         outs(%t = %B: tensor<?xf32>) {
@@ -639,8 +647,10 @@
   // %r3 bufferizes inplace fine.
   //      CHECK: linalg.tiled_loop
   // CHECK-NEXT: call
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   // CHECK-NEXT: linalg.yield
-  // CHECK-NEXT: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
+  //      CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
   %r3 = linalg.tiled_loop (%i) = (%lb) to (%ub) step (%step)
         ins()
         outs(%t = %B: tensor<?xf32>) {
@@ -649,7 +659,7 @@
   }
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0, 1]}
+  // CHECK-SAME: __equivalent_func_args__ = [0, 1]
   return %r1, %r3: tensor<?xf32>, tensor<?xf32>
 }
 
@@ -670,12 +680,12 @@
   // value. The calls to `foo` are determined to read conservatively, so %A
   // cannot bufferize inplace.
   //     CHECK: fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
   %A = linalg.fill(%f1, %I) : f32, tensor<64xf32> -> tensor<64xf32>
 
   // 1. Bufferizes inplace: no alias to %A is yet possible.
   //     CHECK: fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
   %B = linalg.fill(%f2, %I) : f32, tensor<64xf32> -> tensor<64xf32>
 
   call @foo(%A) : (tensor<64xf32>) -> ()
@@ -706,17 +716,19 @@
   // The calls to `foo` are determined to read conservatively, so %A cannot
   // bufferize inplace.
   //     CHECK: fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
   %A = linalg.fill(%f1, %I) : f32, tensor<64xf32> -> tensor<64xf32>
 
   // 4. Bufferizes inplace: no alias to %A is yet possible.
   //     CHECK: fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
   %B = linalg.fill(%f2, %I) : f32, tensor<64xf32> -> tensor<64xf32>
 
   // 3. Does not read or write, bufferizes inplace.
-  //     CHECK: scf.for
-  //     CHECK: {__inplace_results_attr__ = ["true", "true"]}
+  //      CHECK: scf.for
+  // CHECK-NEXT: scf.yield
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
+  //      CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true", "true"]}
   %r:2 = scf.for %i = %c0 to %c10 step %c1 iter_args(%0 = %A, %1 = %B)
     -> (tensor<64xf32>, tensor<64xf32>)
   {
@@ -729,12 +741,12 @@
   // value. The calls to `foo` are determined to read conservatively, so %A2
   // cannot bufferize inplace.
   //     CHECK: fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
   %A2 = linalg.fill(%f1, %I2) : f32, tensor<64xf32> -> tensor<64xf32>
 
   // 1. Bufferizes inplace: no alias to %A2 is yet possible.
   //     CHECK: fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
   %B2 = linalg.fill(%f2, %I2) : f32, tensor<64xf32> -> tensor<64xf32>
 
   call @bar(%A2) : (tensor<64xf32>) -> ()
@@ -754,10 +766,10 @@
                                     %s3 : index) -> tensor<?xi32> {
   %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]}
   %b = tensor.extract_slice %A[%s1][%s2][1] : tensor<4xi32> to tensor<?xi32>
   //      CHECK: vector.transfer_write
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]}
   %r = vector.transfer_write %v, %b[%s3] : vector<5xi32>, tensor<?xi32>
   return %r : tensor<?xi32>
 }
@@ -777,18 +789,18 @@
   %7 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
 
   //      CHECK: linalg.fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
   //      CHECK: linalg.fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
   %8 = linalg.fill(%cst_0, %7) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
   %11 = linalg.fill(%cst_1, %7) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
 
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   //      CHECK: linalg.matmul
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %sA = tensor.extract_slice %8[0, 0][256, 16][1, 1]: tensor<256x256xf32> to tensor<256x16xf32>
   %sB = tensor.extract_slice %11[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32>
   %r = linalg.matmul
@@ -796,7 +808,7 @@
         outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [2]}
+  // CHECK-SAME: __equivalent_func_args__ = [2]
   return %r : tensor<256x256xf32>
 }
 
@@ -815,27 +827,27 @@
   %7 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
 
   //     CHECK: linalg.fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
   //      CHECK: vector.transfer_write
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
   %8 = linalg.fill(%cst_0, %7) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
   %9 = vector.transfer_read %arg0[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32>
   %10 = vector.transfer_write %9, %8[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32>
 
   //      CHECK: linalg.fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
   //      CHECK: vector.transfer_write
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
   %11 = linalg.fill(%cst_1, %7) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
   %12 = vector.transfer_read %arg1[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32>
   %13 = vector.transfer_write %12, %11[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32>
 
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
   //      CHECK: linalg.matmul
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %sA = tensor.extract_slice %10[0, 0][256, 16][1, 1]: tensor<256x256xf32> to tensor<256x16xf32>
   %sB = tensor.extract_slice %13[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32>
   %r = linalg.matmul
@@ -843,7 +855,7 @@
         outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [2]}
+  // CHECK-SAME: __equivalent_func_args__ = [2]
   return %r : tensor<256x256xf32>
 }
 
@@ -866,31 +878,31 @@
   %cst = arith.constant 0.000000e+00 : f32
 
   //      CHECK: linalg.fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
   %0 = linalg.fill(%cst, %arg2) : f32, tensor<62x90xf32> -> tensor<62x90xf32>
 
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]
   %2 = tensor.extract_slice %0[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
   //      CHECK: vector.transfer_write
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
   %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32>
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
   %8 = tensor.insert_slice %7 into %0[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
 
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]
   %10 = tensor.extract_slice %8[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32>
   //      CHECK: vector.transfer_write
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
   %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32>
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
   %15 = tensor.insert_slice %14 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [4]}
+  // CHECK-SAME: __equivalent_func_args__ = [4]
   return %15 : tensor<62x90xf32>
 }
 
@@ -918,7 +930,7 @@
   }
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0]}
+  // CHECK-SAME: __equivalent_func_args__ = [0]
  return %r : tensor<10x20xf32>
 }
 
@@ -940,7 +952,7 @@
     %t2: tensor<?xf32> {linalg.inplaceable = true}) -> (tensor<?xf32>, tensor<?xf32>){
 
   //      CHECK: linalg.generic
-  // CHECK-SAME: {__inplace_results_attr__ = ["true", "true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]
   %o:2 = linalg.generic #trait ins(%t1 : tensor<?xf32>)
                                outs (%t2, %t2 : tensor<?xf32>, tensor<?xf32>) {
       ^bb(%0: f32, %1: f32, %2 : f32) :
@@ -948,7 +960,7 @@
     } -> (tensor<?xf32>, tensor<?xf32>)
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0, 1]}
+  // CHECK-SAME: __equivalent_func_args__ = [0, 1]
   return %o#0, %o#1 : tensor<?xf32>, tensor<?xf32>
 }
 
@@ -972,7 +984,7 @@
         -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>){
 
   //      CHECK: linalg.generic
-  // CHECK-SAME: {__inplace_results_attr__ = ["true", "true", "false"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true", "false"]
   %o:3 = linalg.generic #trait
           ins(%t1 : tensor<?xf32>)
           outs (%t2, %t2, %t2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) {
@@ -981,7 +993,7 @@
     } -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0, 1, -1]}
+  // CHECK-SAME: __equivalent_func_args__ = [0, 1, -1]
   return %o#0, %o#1, %o#2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>
 }
 
@@ -1000,31 +1012,31 @@
   // Cannot bufferize inplace this extract_slice because both operand and result
   // are modified and returned separately.
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none", "none", "none"]
   %e = tensor.extract_slice %arg2[%s1, %s2][%s3, %s4][1, 1] : tensor<62x90xf32> to tensor<?x?xf32>
 
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]
   %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
   //      CHECK: vector.transfer_write
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
   %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32>
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
   %8 = tensor.insert_slice %7 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
 
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]
   %10 = tensor.extract_slice %e[32, 0] [30, 90] [1, 1] : tensor<?x?xf32> to tensor<30x90xf32>
   //      CHECK: vector.transfer_write
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
   %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32>
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
   %15 = tensor.insert_slice %14 into %e[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<?x?xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [2, -1]}
+  // CHECK-SAME: __equivalent_func_args__ = [2, -1]
   return %8, %15 : tensor<62x90xf32>, tensor<?x?xf32>
 }
 
@@ -1036,26 +1048,26 @@
   -> (tensor<62x90xf32>)
 {
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]
   %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
 
   // TODO: This should bufferize inplace once we have a proper range analysis.
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["false"]
   %10 = tensor.extract_slice %arg2[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32>
 
 
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
   %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
 
 
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
   %15 = tensor.insert_slice %10 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0]}
+  // CHECK-SAME: __equivalent_func_args__ = [0]
   return %15 : tensor<62x90xf32>
 }
 
@@ -1067,26 +1079,26 @@
   -> (tensor<62x90xf32>)
 {
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true"]
   %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
 
   // The slices are overlapping, so this can never bufferize inplace.
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["false"]
   %10 = tensor.extract_slice %arg2[31, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32>
 
 
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
   %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
 
 
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
   %15 = tensor.insert_slice %10 into %8[31, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0]}
+  // CHECK-SAME: __equivalent_func_args__ = [0]
   return %15 : tensor<62x90xf32>
 }
 
@@ -1098,19 +1110,19 @@
   -> (tensor<62x90xf32>)
 {
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["false"]
   %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
 
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
   %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
 
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
   %15 = tensor.insert_slice %2 into %8[15, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0]}
+  // CHECK-SAME: __equivalent_func_args__ = [0]
   return %15 : tensor<62x90xf32>
 }
 
@@ -1134,25 +1146,25 @@
 
   // Write to %t1.
   // CHECK:      vector.transfer_write
-  // CHECK-SAME: __inplace_results_attr__ = ["false"]
+  // CHECK-SAME: __inplace_operands_attr__ = ["none", "false", "none"]
   %t3 = vector.transfer_write %v, %t1[%s] : vector<5xf32>, tensor<?xf32>
 
   // Read the old value of %t1 inside the loop via an alias.
-  // CHECK:      scf.for
+  // CHECK: scf.for {{.*}} {
   %r, %v3 = scf.for %i = %c0 to %s step %c1 iter_args(%t2 = %t1, %v0 = %v) -> (tensor<?xf32>, vector<5xf32>) {
     // CHECK:      tensor.extract_slice
-    // CHECK-SAME: __inplace_results_attr__ = ["true"]
+    // CHECK-SAME: __inplace_operands_attr__ = ["true", "none", "none"]
     %e = tensor.extract_slice %t2[%s][%s][1] : tensor<?xf32> to tensor<?xf32>
 
     // Read from %t1 via alias %e.
     %v2 = vector.transfer_read %e[%s], %cst : tensor<?xf32>, vector<5xf32>
     scf.yield %t2, %v2 : tensor<?xf32>, vector<5xf32>
   }
-  // CHECK: __inplace_results_attr__ = ["true", "false"]
+  // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true", "none"]}
 
   // Use %t3 in some way without reading it, so that it does not get DCE'd.
   // CHECK:      linalg.generic
-  // CHECK-SAME: __inplace_results_attr__ = ["true"]
+  // CHECK-SAME: __inplace_operands_attr__ = ["true"]
   %o = linalg.generic #trait outs (%t3 : tensor<?xf32>) {
       ^bb(%0: f32) :
         linalg.yield %cst : f32
@@ -1181,7 +1193,7 @@
 
   // Write to %t1.
   // CHECK:      vector.transfer_write
-  // CHECK-SAME: __inplace_results_attr__ = ["true"]
+  // CHECK-SAME: __inplace_operands_attr__ = ["none", "true", "none"]
   %t3 = vector.transfer_write %v, %t1[%s] : vector<5xf32>, tensor<?xf32>
 
   // This loop does not read from %t1. It only writes to it.
@@ -1189,7 +1201,7 @@
   %r, %v3 = scf.for %i = %c0 to %s step %c1 iter_args(%t2 = %t1, %v0 = %v) -> (tensor<?xf32>, vector<5xf32>) {
     // Write to %t1 via %t2. (Overwrite %t3.)
     // CHECK:      linalg.generic
-    // CHECK-SAME: __inplace_results_attr__ = ["true"]
+    // CHECK-SAME: __inplace_operands_attr__ = ["true"]
     %o2 = linalg.generic #trait outs (%t2 : tensor<?xf32>) {
         ^bb(%0: f32) :
           linalg.yield %cst : f32
@@ -1202,14 +1214,14 @@
 
   // Use %t3 in some way without reading it, so that it does not get DCE'd.
   // CHECK:      linalg.generic
-  // CHECK-SAME: __inplace_results_attr__ = ["true"]
+  // CHECK-SAME: __inplace_operands_attr__ = ["true"]
   %o = linalg.generic #trait outs (%t3 : tensor<?xf32>) {
       ^bb(%0: f32) :
         linalg.yield %cst : f32
     } -> (tensor<?xf32>)
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0, -1]}
+  // CHECK-SAME: __equivalent_func_args__ = [0, -1]
   return %o, %v3 : tensor<?xf32>, vector<5xf32>
 }
 
@@ -1223,24 +1235,24 @@
 func @buffer_forwarding_conflict(%arg0: tensor<?xf32> {linalg.inplaceable = true}, %arg1: index) -> (tensor<?xf32>, tensor<?xf32>) {
   %cst = arith.constant 0.000000e+00 : f32
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"]
   // Instead of allocating, share buffer with some inplace bufferization?
   %0 = linalg.init_tensor [%arg1] : tensor<?xf32>
 
   //      CHECK: linalg.fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
   %1 = linalg.fill(%cst, %0) : f32, tensor<?xf32> -> tensor<?xf32>
 
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"]
   %2 = tensor.insert_slice %1 into %arg0[0] [%arg1] [1] : tensor<?xf32> into tensor<?xf32>
 
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]
   %3 = tensor.insert_slice %1 into %arg0[42] [%arg1] [1] : tensor<?xf32> into tensor<?xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [-1, 0]}
+  // CHECK-SAME: __equivalent_func_args__ = [-1, 0]
   return %2, %3 : tensor<?xf32>, tensor<?xf32>
 }
 
@@ -1250,20 +1262,20 @@
 func @buffer_forwarding_no_conflict(%arg0: tensor<?xf32> {linalg.inplaceable = true}, %arg1: index) -> (tensor<?xf32>, tensor<?xf32>) {
   %cst = arith.constant 0.000000e+00 : f32
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]
   // Instead of allocating, share buffer with some inplace bufferization?
   %0 = linalg.init_tensor [%arg1] : tensor<?xf32>
 
   //      CHECK: linalg.fill
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
   %1 = linalg.fill(%cst, %0) : f32, tensor<?xf32> -> tensor<?xf32>
 
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]
   %2 = tensor.insert_slice %1 into %arg0[42] [%arg1] [1] : tensor<?xf32> into tensor<?xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0, 0]}
+  // CHECK-SAME: __equivalent_func_args__ = [0, 0]
   return %2, %2 : tensor<?xf32>, tensor<?xf32>
 }
 
@@ -1279,27 +1291,35 @@
                       %t2: tensor<?xf32> {linalg.inplaceable = true},
                       %cond: i1) -> tensor<?xf32> {
   %r = scf.if %cond -> (tensor<?xf32>) {
+    // CHECK:      scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %t1 : tensor<?xf32>
   } else {
+    // CHECK:      scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %t2 : tensor<?xf32>
   }
   return %r : tensor<?xf32>
 }
 
+// -----
+
 // CHECK-LABEL: func @scf_if_inplace2
 func @scf_if_inplace2(%t1: tensor<?xf32> {linalg.inplaceable = true},
                       %v: vector<5xf32>, %idx: index,
                       %cond: i1) -> tensor<?xf32> {
   %r = scf.if %cond -> (tensor<?xf32>) {
+    // CHECK:      scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %t1 : tensor<?xf32>
   } else {
     //      CHECK: vector.transfer_write
-    // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+    // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
     %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
     scf.yield %t2 : tensor<?xf32>
   }
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0]}
+  // CHECK-SAME: __equivalent_func_args__ = [0]
   return %r : tensor<?xf32>
 }
 
@@ -1310,18 +1330,22 @@
                       %v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index,
                       %cond: i1) -> tensor<?xf32> {
   //      CHECK: tensor.extract_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
   %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
   %r = scf.if %cond -> (tensor<?xf32>) {
     //      CHECK: vector.transfer_write
-    // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+    // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
     %t2 = vector.transfer_write %v1, %e[%idx] : vector<5xf32>, tensor<?xf32>
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %t2 : tensor<?xf32>
   } else {
     // Writing the same tensor through an alias. This is OK.
     //      CHECK: vector.transfer_write
-    // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+    // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
     %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %t3 : tensor<?xf32>
   }
   return %r : tensor<?xf32>
@@ -1335,23 +1359,31 @@
                        %cond: i1, %cond2: i1) -> (tensor<?xf32>, vector<10xf32>) {
   %cst = arith.constant 0.0 : f32
   %r = scf.if %cond -> (tensor<?xf32>) {
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %t1 : tensor<?xf32>
   } else {
     //      CHECK: vector.transfer_write
-    // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+    // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
     %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %t2 : tensor<?xf32>
   }
   %r_alias = scf.if %cond2 -> (tensor<?xf32>) {
     // Reading %r is OK. No conflict.
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %r : tensor<?xf32>
   } else {
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %r : tensor<?xf32>
   }
   %v2 = vector.transfer_read %r_alias[%idx], %cst : tensor<?xf32>, vector<10xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0, -1]}
+  // CHECK-SAME: __equivalent_func_args__ = [0, -1]
   return %r_alias, %v2 : tensor<?xf32>, vector<10xf32>
 }
 
@@ -1362,24 +1394,28 @@
                       %idx: index, %cond: i1) -> tensor<?xf32> {
   %r = scf.if %cond -> (tensor<?xf32>) {
     //      CHECK: tensor.extract_slice
-    // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
     %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %e : tensor<?xf32>
   } else {
     //      CHECK: tensor.extract_slice
-    // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
     %f = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %f : tensor<?xf32>
   }
 
   // Inserting into an equivalent tensor at the same offset. This bufferizes
   // inplace.
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]
   %r2 = tensor.insert_slice %r into %t1[%idx][%idx][1] : tensor<?xf32> into tensor<?xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0]}
+  // CHECK-SAME: __equivalent_func_args__ = [0]
   return %r2 : tensor<?xf32>
 }
 
@@ -1394,25 +1430,33 @@
   %r = scf.if %cond -> (tensor<?xf32>) {
     %t2 = scf.if %cond2 -> (tensor<?xf32>) {
       //      CHECK: vector.transfer_write
-      // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+      // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
       %t3 = vector.transfer_write %v1, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+      //      CHECK: scf.yield
+      // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
       scf.yield %t3 : tensor<?xf32>
     } else {
       //      CHECK: vector.transfer_write
-      // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+      // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
       %t4 = vector.transfer_write %v3, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+      //      CHECK: scf.yield
+      // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
       scf.yield %t4 : tensor<?xf32>
     }
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %t2 : tensor<?xf32>
   } else {
     //      CHECK: vector.transfer_write
-    // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+    // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
     %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %t3 : tensor<?xf32>
   }
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0]}
+  // CHECK-SAME: __equivalent_func_args__ = [0]
   return %r : tensor<?xf32>
 }
 
@@ -1425,18 +1469,22 @@
   %cst = arith.constant 0.0 : f32
   %r, %v_r2 = scf.if %cond -> (tensor<?xf32>, vector<5xf32>) {
     //      CHECK: vector.transfer_write
-    // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+    // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
     %t2 = vector.transfer_write %v1, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]}
     scf.yield %t2, %v1 : tensor<?xf32>, vector<5xf32>
   } else {
     // Writing the same tensor through an alias.
     //      CHECK: vector.transfer_write
-    // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+    // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
     %t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor<?xf32>
     // Read the original value of %t1. This requires the write in this branch
     // to be out-of-place. But the write in the other branch can still be
     // inplace.
     %v_r = vector.transfer_read %t1[%idx2], %cst : tensor<?xf32>, vector<5xf32>
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]}
     scf.yield %t3, %v_r : tensor<?xf32>, vector<5xf32>
   }
   return %r, %v_r2 : tensor<?xf32>, vector<5xf32>
@@ -1450,17 +1498,21 @@
                             %cond: i1) -> tensor<?xf32> {
   %r = scf.if %cond -> (tensor<?xf32>) {
     //      CHECK: tensor.extract_slice
-    // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
     %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %e : tensor<?xf32>
   } else {
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %t1 : tensor<?xf32>
   }
 
   // Reading from and writing to the same tensor via different args. This is a
   // conflict.
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"]
   %r2 = tensor.insert_slice %r into %t1[%idx2][%idx2][1] : tensor<?xf32> into tensor<?xf32>
   return %r2 : tensor<?xf32>
 }
@@ -1473,13 +1525,17 @@
                             %cond: i1) -> tensor<?xf32> {
   %r = scf.if %cond -> (tensor<?xf32>) {
     //      CHECK: tensor.extract_slice
-    // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+    // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
     %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %e : tensor<?xf32>
   } else {
     //      CHECK: tensor.extract_slice
-    // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+    // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
     %f = tensor.extract_slice %t1[%idx2][%idx2][1] : tensor<?xf32> to tensor<?xf32>
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %f : tensor<?xf32>
   }
 
@@ -1489,11 +1545,11 @@
   // why the tensor.insert_slice is inplace and the two extract_slices are
   // out-of-place.
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]
   %r2 = tensor.insert_slice %r into %t1[%idx3][%idx3][1] : tensor<?xf32> into tensor<?xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0]}
+  // CHECK-SAME: __equivalent_func_args__ = [0]
   return %r2 : tensor<?xf32>
 }
 
@@ -1504,23 +1560,27 @@
                             %idx: index, %idx2: index, %cond: i1) -> tensor<?xf32> {
   %r = scf.if %cond -> (tensor<?xf32>) {
     //      CHECK: tensor.extract_slice
-    // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+    // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
     %e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %e : tensor<?xf32>
   } else {
     // TODO: This one could bufferize inplace, but the analysis is too restrictive.
     //      CHECK: tensor.extract_slice
-    // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+    // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
     %f = tensor.extract_slice %t1[%idx2][%idx2][1] : tensor<?xf32> to tensor<?xf32>
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %f : tensor<?xf32>
   }
 
   //      CHECK: tensor.insert_slice
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]
   %r2 = tensor.insert_slice %r into %t1[%idx2][%idx2][1] : tensor<?xf32> into tensor<?xf32>
 
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0]}
+  // CHECK-SAME: __equivalent_func_args__ = [0]
   return %r2 : tensor<?xf32>
 }
 
@@ -1535,8 +1595,10 @@
     scf.yield %t1 : tensor<?xf32>
   } else {
     //      CHECK: vector.transfer_write
-    // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+    // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
     %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %t2 : tensor<?xf32>
   }
 
@@ -1557,14 +1619,20 @@
     scf.yield %t1 : tensor<?xf32>
   } else {
     //      CHECK: vector.transfer_write
-    // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+    // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
     %t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %t2 : tensor<?xf32>
   }
   %t1_alias = scf.if %cond2 -> (tensor<?xf32>) {
     // scf.yield bufferizes to a read. That is a conflict in this example.
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %t1 : tensor<?xf32>
   } else {
+    //      CHECK: scf.yield
+    // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
     scf.yield %t1 : tensor<?xf32>
   }
   %v2 = vector.transfer_read %t1_alias[%idx], %cst : tensor<?xf32>, vector<10xf32>
@@ -1578,7 +1646,7 @@
                %v : vector<5xf32>) -> (tensor<?xf32>) {
   %idx = arith.constant 0 : index
   //      CHECK: vector.transfer_write
-  // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
   %0 = vector.transfer_write %v, %A[%idx] : vector<5xf32>, tensor<?xf32>
   return %0 : tensor<?xf32>
 }
@@ -1587,9 +1655,8 @@
 // CHECK-LABEL: func @main_func
 func @main_func(%A : tensor<?xf32> {linalg.inplaceable = true},
                 %v : vector<5xf32>) -> (tensor<?xf32>) {
-  // Function calls always bufferize out-of-place at the moment.
   //      CHECK: call
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]
   %0 = call @some_use(%A, %v) : (tensor<?xf32>, vector<5xf32>) -> (tensor<?xf32>)
   return %0 : tensor<?xf32>
 }
@@ -1604,7 +1671,7 @@
 
   // Write to the tensor. Cannot be inplace due to tensor_load.
   //      CHECK: vector.transfer_write
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
   %w = vector.transfer_write %v, %0[%idx1] : vector<5xf32>, tensor<?xf32>
 
   // Read from the tensor and return result.
@@ -1622,7 +1689,7 @@
     -> (vector<5xf32>, vector<5xf32>) {
   // Write + read to/from tensor.
   //      CHECK: vector.transfer_write
-  // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+  // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
   %1 = vector.transfer_write %v1, %t1[%idx2] : vector<5xf32>, tensor<?xf32>
   %cst = arith.constant 0.0 : f32
   %r1 = vector.transfer_read %1[%idx3], %cst : tensor<?xf32>, vector<5xf32>
@@ -1640,7 +1707,7 @@
 // CHECK-LABEL: func @inner_func
 func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0]}
+  // CHECK-SAME: __equivalent_func_args__ = [0]
   return %t : tensor<?xf32>
 }
 
@@ -1662,7 +1729,7 @@
   %c0 = arith.constant 0 : index
   %0 = tensor.insert %f into %t[%c0] : tensor<?xf32>
   //      CHECK: return
-  // CHECK-SAME: {__equivalent_func_args__ = [0]}
+  // CHECK-SAME: __equivalent_func_args__ = [0]
   return %0 : tensor<?xf32>
 }