diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td @@ -415,6 +415,29 @@ return mlir::bufferization::detail::defaultIsRepetitiveRegion( cast($_op.getOperation()), index); }] + >, + InterfaceMethod< + /*desc=*/[{ + Return `true` if allocations are allowed inside the given region of + this op. By default, allocations are allowed. + + This method is queried during TensorCopyInsertion. If an allocation + is attempted to be inserted in a region that does not allow + allocations, it is instead inserted in the parent region. + + Note: This method should be overridden only if setting the insertion + point to the parent region is generally safe. In particular, changing + the insertion point is not safe if the dynamic extents of an + allocation depend on an SSA value defined in the region that disallows + allocations. + }], + /*retType=*/"bool", + /*methodName=*/"areAllocationsAllowedInRegion", + /*args=*/(ins "unsigned":$index), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return true; + }] > ]; diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -96,12 +96,80 @@ return !attr[opResult.getResultNumber()].cast().getValue(); } +/// Compute and set an allocation point for a tensor copy of the given shaped +/// value. This function queries the BufferizableOpInterface to detect regions +/// in which allocations are forbidden. In such a case, an allocation is placed +/// in a parent region. Example: +/// +/// vector.mask ... { +/// vector.transfer_write %v[%c0], %t : vector<5xf32>, tensor +/// } : ... -> tensor +/// +/// In case %t bufferizes out-of-place, the allocation must be placed outside +/// of vector.mask as per the op's BufferizableOpInterface implementation. +/// +/// Note: Allocations may not jump repetitive regions. If an allocation would be +/// placed in a different repetitive region, return failure. This indicates an +/// incorrect implementation of the BufferizableOpInterface. +/// +/// Note: If the new insertion point violates op dominance, return failure. This +/// also indicates an incorrect implementation of the BufferizableOpInterface. +/// +static LogicalResult +setAllocationInsertionPoint(OpBuilder &b, Value shapedValue, + const BufferizationOptions &options) { + Region *r = b.getInsertionBlock()->getParent(); + Region *repetitiveRegion = + getEnclosingRepetitiveRegion(b.getInsertionBlock(), options); + Operation *ip = nullptr; + do { + Operation *op = r->getParentOp(); + if (!op) + return success(); + auto bufferizableOp = options.dynCastBufferizableOp(op); + if (bufferizableOp && + !bufferizableOp.areAllocationsAllowedInRegion(r->getRegionNumber())) + ip = op; + } while ((r = r->getParentRegion())); + + if (ip) { + // A custom insertion point is necessary. + if (getEnclosingRepetitiveRegion(ip, options) != repetitiveRegion) + // It is incorrect to set the buffer allocation point into a different + // repetitive region. This would effectively de-privatize a buffer. + return getOwnerOfValue(shapedValue) + ->emitError( + "unable to move tensor copy ip to different repetitive region"); + + // Check for op dominance errors. + if (auto bbArg = shapedValue.dyn_cast()) { + if (!bbArg.getParentBlock()->findAncestorOpInBlock(*ip)) + // The computed insertion point violates op dominance. + return getOwnerOfValue(shapedValue) + ->emitError( + "unable to find suitable insertion point for tensor copy"); + } else { + Operation *shapedOp = shapedValue.dyn_cast().getDefiningOp(); + Operation *ipInBlock = shapedOp->getBlock()->findAncestorOpInBlock(*ip); + if (!ipInBlock || shapedOp == ipInBlock || + ipInBlock->isBeforeInBlock(shapedOp)) + // The computed insertion point violates op dominance. + return getOwnerOfValue(shapedValue) + ->emitError( + "unable to find suitable insertion point for tensor copy"); + } + b.setInsertionPoint(ip); + } + return success(); +} + /// Create an AllocTensorOp for the given shaped value. If `copy` is set, the /// shaped value is copied. Otherwise, a tensor with undefined contents is /// allocated. FailureOr bufferization::allocateTensorForShapedValue( OpBuilder &b, Location loc, Value shapedValue, bool escape, const BufferizationOptions &options, bool copy) { + OpBuilder::InsertionGuard g(b); Value tensor; if (shapedValue.getType().isa()) { tensor = shapedValue; @@ -137,6 +205,10 @@ populateDynamicDimSizes(b, loc, tensor, dynamicSizes); } + // Compute insertion point for allocation. + if (failed(setAllocationInsertionPoint(b, shapedValue, options))) + return failure(); + // Create AllocTensorOp. auto allocTensorOp = b.create(loc, tensorType, dynamicSizes, copy ? tensor : Value()); diff --git a/mlir/lib/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.cpp --- a/mlir/lib/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.cpp @@ -201,6 +201,10 @@ const AnalysisState &state) const { return BufferRelation::Equivalent; } + + bool areAllocationsAllowedInRegion(Operation *op, unsigned index) const { + return false; + } }; /// Bufferization of vector.yield. Replaced with a new vector.yield that diff --git a/mlir/test/Dialect/Vector/bufferize.mlir b/mlir/test/Dialect/Vector/bufferize.mlir --- a/mlir/test/Dialect/Vector/bufferize.mlir +++ b/mlir/test/Dialect/Vector/bufferize.mlir @@ -44,5 +44,20 @@ return %0 : vector<16xf32> } -// TODO: Add test case for vector.mask. The masked op can currently not -// bufferize out-of-place, so the only test case is in one-shot-bufferize.mlir. +// ----- + +// CHECK-LABEL: func @mask( +// CHECK-SAME: %[[t0:.*]]: tensor, %[[val:.*]]: vector<16xf32> +// CHECK-SAME: %[[idx:.*]]: index, %[[mask:.*]]: vector<16xi1>) +// CHECK-DAG: %[[m:.*]] = bufferization.to_memref %[[t0]] +// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index +// CHECK: %[[dim:.*]] = memref.dim %[[m]], %[[c0]] +// CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) +// CHECK: memref.copy %[[m]], %[[alloc]] +// CHECK: vector.mask %[[mask]] { vector.transfer_write %[[val]], %[[alloc]][%[[idx]]] : vector<16xf32>, memref } : vector<16xi1> +// CHECK: %[[r:.*]] = bufferization.to_tensor %[[alloc]] : memref +// CHECK: return %[[r]] +func.func @mask(%t0: tensor, %val: vector<16xf32>, %idx: index, %mask: vector<16xi1>) -> tensor { + %0 = vector.mask %mask { vector.transfer_write %val, %t0[%idx] : vector<16xf32>, tensor } : vector<16xi1> -> tensor + return %0 : tensor +}