diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -118,6 +118,16 @@ An optional memory space attribute can be specified for the materialized buffer allocation. + If a memory copy is needed, a "memref.tensor_store" is used when possible. + This is an op with tensor semantics that will bufferize to a memory copy + later. Which concrete op will be used for the memory copy is up to the + bufferization framework. Alternatively, a custom memcpy op can be specified + via `memcpy_op`. Currently supported are "memref.copy" and "linalg.copy". + In that case, the source of each memcpy must not have a custom memory space. + Furthermore, because the future buffer layout unknown for a given tensor, + a fully dynamic layout is assumed for best compatibility. Users should use + "memref.tensor_store" when possible. + #### Return modes This operation consumes the `target` handle and produces the @@ -125,7 +135,10 @@ }]; let arguments = (ins TransformHandleTypeInterface:$target, - OptionalAttr:$memory_space); + OptionalAttr:$memory_space, + DefaultValuedAttr: + $memcpy_op); + let hasVerifier = 1; let results = (outs Transform_AnyValue:$allocated_buffer, Transform_AnyOpType:$new_ops); let assemblyFormat = "$target attr-dict `:` type($target)"; diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -46,6 +46,12 @@ // Bufferization-related transforms. //===----------------------------------------------------------------------===// +struct BufferizeToAllocationOptions { + enum class MemcpyOp { MemrefTensorStore = 0, MemrefCopy = 1, LinalgCopy = 2 }; + + MemcpyOp memcpyOp = MemcpyOp::MemrefTensorStore; +}; + /// Materialize a buffer allocation for the given tensor.pad op and lower the /// op to linalg.fill/linalg.generic + memref.tensor_store. E.g.: /// @@ -62,8 +68,9 @@ /// In addition to rewriting the IR as shown above, this function returns the /// newly allocated buffer. The `insertionPoint` parameter can be used to /// specify a custom insertion point for the buffer allocation. -Value bufferizeToAllocation(RewriterBase &rewriter, tensor::PadOp padOp, - Attribute memorySpace = {}, +Value bufferizeToAllocation(RewriterBase &rewriter, + const BufferizeToAllocationOptions &options, + tensor::PadOp padOp, Attribute memorySpace = {}, Operation *insertionPoint = nullptr); /// Materialize a buffer allocation for the given vector.mask op and bufferize @@ -85,8 +92,9 @@ /// In addition to rewriting the IR as shown above, this function returns the /// newly allocated buffer. The `insertionPoint` parameter can be used to /// specify a custom insertion point for the buffer allocation. -Value bufferizeToAllocation(RewriterBase &rewriter, vector::MaskOp maskOp, - Attribute memorySpace = {}, +Value bufferizeToAllocation(RewriterBase &rewriter, + const BufferizeToAllocationOptions &options, + vector::MaskOp maskOp, Attribute memorySpace = {}, Operation *insertionPoint = nullptr); /// Bufferize the given op with tensor semantics and materialize the result in @@ -105,8 +113,9 @@ /// This function returns the newly allocated buffer. The `insertionPoint` /// parameter can be used to specify a custom insertion point for the buffer /// allocation. -Value bufferizeToAllocation(RewriterBase &rewriter, Operation *op, - Attribute memorySpace = {}, +Value bufferizeToAllocation(RewriterBase &rewriter, + const BufferizeToAllocationOptions &options, + Operation *op, Attribute memorySpace = {}, Operation *insertionPoint = nullptr); /// Try to eliminate tensor::EmptyOps inside `op` that are anchored on a diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -207,12 +207,27 @@ NewOpsListener newOpsListener(previousListener); rewriter.setListener(&newOpsListener); + linalg::BufferizeToAllocationOptions options; + if (getMemcpyOp() == "memref.tensor_store") { + options.memcpyOp = + linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefTensorStore; + } else if (getMemcpyOp() == "memref.copy") { + options.memcpyOp = + linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefCopy; + } else if (getMemcpyOp() == "linalg.copy") { + options.memcpyOp = + linalg::BufferizeToAllocationOptions::MemcpyOp::LinalgCopy; + } else { + llvm_unreachable("invalid memcpy op"); + } + // Bufferize ops. Attribute memorySpace = getMemorySpace().has_value() ? getMemorySpace().value() : Attribute(); SmallVector allocatedBuffers; for (Operation *op : state.getPayloadOps(getTarget())) { - Value buffer = linalg::bufferizeToAllocation(rewriter, op, memorySpace); + Value buffer = + linalg::bufferizeToAllocation(rewriter, options, op, memorySpace); if (!buffer) { DiagnosedSilenceableFailure diag = emitSilenceableError() << "failed to bufferize operation"; @@ -236,6 +251,13 @@ modifiesPayload(effects); } +LogicalResult transform::BufferizeToAllocationOp::verify() { + if (getMemcpyOp() != "memref.tensor_store" && + getMemcpyOp() != "memref.copy" && getMemcpyOp() != "linalg.copy") + return emitOpError() << "unsupported memcpy op"; + return success(); +} + //===----------------------------------------------------------------------===// // DecomposeOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp @@ -53,6 +53,42 @@ return destination; } +/// Create a memcpy from the given source tensor to the given destination +/// memref. The copy op type can be specified in the `options`. +static void createMemcpy(OpBuilder &b, Location loc, Value tensorSource, + Value memrefDest, + const linalg::BufferizeToAllocationOptions &options) { + auto tensorType = dyn_cast(tensorSource.getType()); + assert(tensorType && "expected ranked tensor"); + assert(memrefDest.getType().isa() && "expected ranked memref"); + + switch (options.memcpyOp) { + case linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefTensorStore: + // Note: This is the preferred way of memcpy'ing because no layout map + // and/or memory space must be specified for the source. + b.create(loc, tensorSource, memrefDest); + break; + case linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefCopy: { + // TODO: Support custom memory space on source. + // We do not know the layout map of the source yet, so use a fully dynamic + // layout for best compatibility. + Value toMemref = b.create( + loc, bufferization::getMemRefTypeWithFullyDynamicLayout(tensorType), + tensorSource, /*readOnly=*/true); + b.create(loc, toMemref, memrefDest); + } break; + case linalg::BufferizeToAllocationOptions::MemcpyOp::LinalgCopy: { + // TODO: Support custom memory space on source. + // We do not know the layout map of the source yet, so use a fully dynamic + // layout for best compatibility. + Value toMemref = b.create( + loc, bufferization::getMemRefTypeWithFullyDynamicLayout(tensorType), + tensorSource, /*readOnly=*/true); + b.create(loc, toMemref, memrefDest); + } break; + }; +} + static Operation *movePaddingToFillOrGenericOp(RewriterBase &rewriter, Location loc, PadOp padOp, Value dest) { @@ -169,9 +205,9 @@ return alloc; } -Value linalg::bufferizeToAllocation(RewriterBase &rewriter, PadOp padOp, - Attribute memorySpace, - Operation *insertionPoint) { +Value linalg::bufferizeToAllocation( + RewriterBase &rewriter, const linalg::BufferizeToAllocationOptions &options, + PadOp padOp, Attribute memorySpace, Operation *insertionPoint) { OpBuilder::InsertionGuard g(rewriter); rewriter.setInsertionPoint(insertionPoint ? insertionPoint : padOp); Location loc = padOp.getLoc(); @@ -195,7 +231,7 @@ rewriter.getIndexAttr(1)); Value subview = rewriter.create( loc, alloc, /*offsets=*/padOp.getMixedLowPad(), sizes, strides); - rewriter.create(loc, padOp.getSource(), subview); + createMemcpy(rewriter, loc, padOp.getSource(), subview, options); // Create bufferization.to_tensor with "restrict" and "writable". The returned // tensor is a new buffer allocation, so it does not alias with any buffer. @@ -205,27 +241,26 @@ return alloc; } -Value linalg::bufferizeToAllocation(RewriterBase &rewriter, - vector::MaskOp maskOp, - Attribute memorySpace, - Operation *insertionPoint) { +Value linalg::bufferizeToAllocation( + RewriterBase &rewriter, const linalg::BufferizeToAllocationOptions &options, + vector::MaskOp maskOp, Attribute memorySpace, Operation *insertionPoint) { assert(llvm::range_size(maskOp.getMaskBlock()->without_terminator()) == 1 && "expected single masked op"); OpBuilder::InsertionGuard g(rewriter); - bufferization::BufferizationOptions options; + bufferization::BufferizationOptions bufferizationOptions; Operation *yieldOp = maskOp.getMaskRegion().front().getTerminator(); assert(isa(yieldOp) && "expected yield op terminator"); // Bufferize maskable op. By default, place the buffer allocation right before // the mask op. Value alloc = bufferizeToAllocation( - rewriter, maskOp.getMaskableOp(), memorySpace, + rewriter, options, maskOp.getMaskableOp(), memorySpace, /*insertionPoint=*/insertionPoint ? insertionPoint : maskOp); // Bufferize terminator. rewriter.setInsertionPoint(yieldOp); if (failed(cast(yieldOp).bufferize( - rewriter, options))) + rewriter, bufferizationOptions))) return nullptr; // Erase dead to_tensor ops inside of the mask op. This is necessary because @@ -247,7 +282,7 @@ resultUses.push_back(&use); rewriter.setInsertionPoint(maskOp); if (failed(cast(maskOp.getOperation()) - .bufferize(rewriter, options))) + .bufferize(rewriter, bufferizationOptions))) return nullptr; // Set "restrict" attribute, indicating that no other tensor aliases with @@ -392,23 +427,23 @@ return insertSliceOp.getOperation(); } -Value linalg::bufferizeToAllocation(RewriterBase &rewriter, Operation *op, - Attribute memorySpace, - Operation *insertionPoint) { +Value linalg::bufferizeToAllocation( + RewriterBase &rewriter, const linalg::BufferizeToAllocationOptions &options, + Operation *op, Attribute memorySpace, Operation *insertionPoint) { using namespace bufferization; // Call specialized overload for certain ops. if (auto padOp = dyn_cast(op)) - return bufferizeToAllocation(rewriter, padOp, memorySpace); + return bufferizeToAllocation(rewriter, options, padOp, memorySpace); if (auto maskOp = dyn_cast(op)) - return bufferizeToAllocation(rewriter, maskOp, memorySpace); + return bufferizeToAllocation(rewriter, options, maskOp, memorySpace); // Only bufferizable ops are supported. auto bufferizableOp = dyn_cast(op); if (!bufferizableOp) return nullptr; - BufferizationOptions options; - AnalysisState state(options); + BufferizationOptions bufferizationOptions; + AnalysisState state(bufferizationOptions); // Gather tensor results. SmallVector tensorResults; @@ -462,8 +497,7 @@ if (!state.findDefinitions(operand->get()).empty()) { // Initialize buffer with a copy of the operand data. Not needed if the // tensor is uninitialized. - rewriter.create(op->getLoc(), operand->get(), - alloc); + createMemcpy(rewriter, op->getLoc(), operand->get(), alloc, options); } rewriter.updateRootInPlace(op, [&]() { operand->set(rewriter.create(op->getLoc(), alloc)); @@ -472,7 +506,7 @@ // Bufferize the op. rewriter.setInsertionPoint(op); - if (failed(bufferizableOp.bufferize(rewriter, options))) + if (failed(bufferizableOp.bufferize(rewriter, bufferizationOptions))) return nullptr; // Set "restrict" attribute, indicating that no other tensor aliases with diff --git a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir --- a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir @@ -39,7 +39,7 @@ // expected-remark @below{{1}} test_print_number_of_associated_payload_ir_ops %fill_op : !transform.any_op - // Ensure that one memref.tensor_store was generated. + // Ensure that one linalg.copy was generated. %tensor_store = transform.select "memref.tensor_store" in %new : (!transform.any_op) -> !transform.any_op // expected-remark @below{{1}} test_print_number_of_associated_payload_ir_ops %tensor_store : !transform.any_op @@ -47,6 +47,43 @@ // ----- +// CHECK-LABEL: func @tensor_pad_constant_with_custom_copy( +// CHECK-NOT: memref.tensor_store +// CHECK-NOT: memref.copy +// CHECK: linalg.copy +func.func @tensor_pad_constant_with_custom_copy( + %t: tensor, %l2: index, %h1: index, %h2: index) + -> tensor +{ + %0 = tensor.pad %t low[5, %l2] high[%h1, %h2] { + ^bb0(%arg0: index, %arg1: index): + %c = arith.constant 50 : index + tensor.yield %c : index + } : tensor to tensor + return %0 : tensor +} + +transform.sequence failures(propagate) { +^bb1(%arg1: !transform.any_op): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 3, memcpy_op = "linalg.copy"}: !transform.any_op + + // Ensure that one linalg.fill was generated. + %fill_op = transform.select "linalg.fill" in %new : (!transform.any_op) -> !transform.any_op + // expected-remark @below{{1}} + test_print_number_of_associated_payload_ir_ops %fill_op : !transform.any_op + + // Ensure that one linalg.copy was generated. + %linalg_copy = transform.select "linalg.copy" in %new : (!transform.any_op) -> !transform.any_op + // expected-remark @below{{1}} + test_print_number_of_associated_payload_ir_ops %linalg_copy : !transform.any_op + + // Make sure that One-Shot Bufferize can bufferize the rest. + %4 = transform.bufferization.one_shot_bufferize %arg1 : (!transform.any_op) -> !transform.any_op +} + +// ----- + // CHECK-LABEL: func @tensor_pad_constant( // CHECK-SAME: %[[t:.*]]: tensor // CHECK: %[[src:.*]] = bufferization.to_memref %[[t]]