diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -83,49 +83,30 @@ DeclareOpInterfaceMethods, ReportTrackingListenerFailuresOpTrait]> { let description = [{ - This transform materializes an allocation for the targeted tensor value. It - replaces all original uses of the target with the newly allocated buffer, - wrapped in a `bufferization.to_tensor` op. It returns a handle to the newly - allocated buffer. Furthermore, it returns a handle to the result of the - `to_tensor` op. + This transform bufferizes the targeted operation and materializes the + result in a new allocation. It replaces all original uses of the target + result with the newly allocated buffer, wrapped in a + `bufferization.to_tensor` op. It returns a handle to the newly allocated + buffer. Furthermore, it returns a handle to the result of the `to_tensor` + op. - Example: - ``` - %0 = "some_op"() : () -> (tensor<10xf32>) - "some_use"(%0) : (tensor<10xf32>) -> () - ``` - - Is rewritten to: - ``` - %0 = "some_op"() : () -> (tensor<10xf32>) - %1 = memref.alloc() : memref<10xf32> - memref.tensor_store %0, %1 : memref<10xf32> - %2 = bufferization.to_tensor %1 restrict writable : memref<10xf32> - "some_use"(%2) : (tensor<10xf32>) -> () - ``` - - This transform has optimized lowerings for certain targets that are results - of non-DPS ops. For such targets, not only a buffer allocation is emitted - but also the defining op is bufferized. This is to avoid a second - allocation for the missing destination of the non-DPS op (when subsequently - running a bufferization pass/transform). Currently supported ops with - optimized lowerings: - - tensor.pad + Only `tensor.pad` targets are supported at the moment. + - `tensor.pad` is lowered to an allocation, followed by a `linalg.fill` and + and a buffer copy (all on memrefs). An optional memory space attribute can be specified for the materialized buffer allocation. #### Return modes - This operation consumes the `target` handle and produces the `replacement` - and `allocated_buffer` handles. It always succeeds. + This operation consumes the `target` handle and produces the + `allocated_buffer` handle. It always succeeds. }]; - let arguments = (ins Transform_AnyValue:$target, + let arguments = (ins TransformHandleTypeInterface:$target, OptionalAttr:$memory_space); - let results = (outs Transform_AnyValue:$allocated_buffer, - Transform_AnyValue:$replacement); - let assemblyFormat = "$target attr-dict"; + let results = (outs Transform_AnyValue:$allocated_buffer); + let assemblyFormat = "$target attr-dict `:` type($target)"; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -60,27 +60,18 @@ /// %0 = bufferization.to_tensor %alloc restrict writable /// /// In addition to rewriting the IR as shown above, this function returns the -/// newly allocated buffer. Furthermore, the result of the -/// bufferization.to_tensor op is optionally returned via `replacement`. +/// newly allocated buffer. Value bufferizeToAllocation(RewriterBase &rewriter, tensor::PadOp padOp, - Attribute memorySpace = {}, - Value *replacement = nullptr); + Attribute memorySpace = {}); -/// Materialize a buffer allocation for the given tensor value. E.g.: +/// Bufferize the given op with tensor semantics and materialize the result in +/// a newly allocated buffer. E.g.: /// -/// %alloc = memref.alloc -/// memref.tensor_store %value, %alloc -/// %0 = bufferization.to_tensor %alloc restrict writable +/// Only tensor.pad is supported at the moment. /// -/// In case `value` is a tensor.pad result, the corresponding overload is used -/// internally to produce a better bufferization. -/// -/// In addition to rewriting the IR as shown above, this function returns the -/// newly allocated buffer. Furthermore, the result of the -/// bufferization.to_tensor op is optionally returned via `replacement`. -Value bufferizeToAllocation(RewriterBase &rewriter, Value value, - Attribute memorySpace = {}, - Value *replacement = nullptr); +/// This function returns the newly allocated buffer. +Value bufferizeToAllocation(RewriterBase &rewriter, Operation *op, + Attribute memorySpace = {}); /// Try to eliminate tensor::EmptyOps inside `op` that are anchored on a /// LinalgOp. This transforms looks for LinalgOps that have an unused output diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -176,16 +176,17 @@ transform::TransformResults &results, transform::TransformState &state) { Attribute memorySpace = getMemorySpace().has_value() ? getMemorySpace().value() : Attribute(); - SmallVector replacements; SmallVector allocatedBuffers; - for (Value value : state.getPayloadValues(getTarget())) { - Value replacement; - Value buffer = linalg::bufferizeToAllocation(rewriter, value, memorySpace, - &replacement); - replacements.push_back(replacement); + for (Operation *op : state.getPayloadOps(getTarget())) { + Value buffer = linalg::bufferizeToAllocation(rewriter, op, memorySpace); + if (!buffer) { + DiagnosedSilenceableFailure diag = emitSilenceableError() + << "failed to bufferize operation"; + diag.attachNote(op->getLoc()) << "target payload op"; + return diag; + } allocatedBuffers.push_back(buffer); } - results.setValues(cast(getReplacement()), replacements); results.setValues(cast(getAllocatedBuffer()), allocatedBuffers); return DiagnosedSilenceableFailure::success(); } @@ -193,7 +194,6 @@ void transform::BufferizeToAllocationOp::getEffects( SmallVectorImpl &effects) { consumesHandle(getTarget(), effects); - producesHandle(getReplacement(), effects); producesHandle(getAllocatedBuffer(), effects); modifiesPayload(effects); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp @@ -170,7 +170,7 @@ } Value linalg::bufferizeToAllocation(RewriterBase &rewriter, PadOp padOp, - Attribute memorySpace, Value *replacement) { + Attribute memorySpace) { OpBuilder::InsertionGuard g(rewriter); rewriter.setInsertionPoint(padOp); Location loc = padOp.getLoc(); @@ -198,9 +198,6 @@ Value toTensorOp = rewriter.create( loc, alloc, /*restrict=*/true, /*writable=*/true); rewriter.replaceOp(padOp, toTensorOp); - - if (replacement) - *replacement = toTensorOp; return alloc; } @@ -331,43 +328,14 @@ return insertSliceOp.getOperation(); } -Value linalg::bufferizeToAllocation(RewriterBase &rewriter, Value value, - Attribute memorySpace, Value *replacement) { +Value linalg::bufferizeToAllocation(RewriterBase &rewriter, Operation *op, + Attribute memorySpace) { // Call specialized overload for certain ops. - if (auto padOp = value.getDefiningOp()) - return bufferizeToAllocation(rewriter, padOp, memorySpace, replacement); - - // Collect all uses. - SmallVector uses = llvm::to_vector( - llvm::map_range(value.getUses(), [](OpOperand &use) { return &use; })); - - OpBuilder::InsertionGuard g(rewriter); - if (auto bbArg = dyn_cast(value)) { - rewriter.setInsertionPointToStart(bbArg.getOwner()); - } else { - rewriter.setInsertionPointAfter(value.getDefiningOp()); - } - Location loc = value.getLoc(); + if (auto padOp = dyn_cast(op)) + return bufferizeToAllocation(rewriter, padOp, memorySpace); - // Create buffer allocation. - Value alloc = createAllocationForTensor(rewriter, loc, value, memorySpace); - - // Create memref.tensor_store. - rewriter.setInsertionPointAfter(alloc.getDefiningOp()); - rewriter.create(loc, value, alloc); - - // Create bufferization.to_tensor with "restrict" and "writable". The returned - // tensor is a new buffer allocation, so it does not alias with any buffer. - Value toTensorOp = rewriter.create( - loc, alloc, /*restrict=*/true, /*writable=*/true); - for (OpOperand *use : uses) { - rewriter.updateRootInPlace(use->getOwner(), - [&]() { use->set(toTensorOp); }); - } - - if (replacement) - *replacement = toTensorOp; - return alloc; + // TODO: Support other ops. + return nullptr; } namespace { diff --git a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir --- a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir +++ b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir @@ -54,8 +54,7 @@ padding_dimensions=[0, 1, 2], pack_paddings=[1, 1, 1] } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) - %pad_result = transform.get_result %pad[0] : (!transform.any_op) -> !transform.any_value - %buffer, %replacement = transform.structured.bufferize_to_allocation %pad_result {memory_space = 3} + %buffer = transform.structured.bufferize_to_allocation %pad {memory_space = 3} : !transform.any_op %2 = transform.bufferization.one_shot_bufferize %arg1 {bufferize_function_boundaries=true} : (!transform.any_op) -> !transform.any_op } diff --git a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir --- a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir @@ -32,8 +32,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !transform.any_op): %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_result %0[0] : (!transform.any_op) -> !transform.any_value - %2, %3 = transform.structured.bufferize_to_allocation %1 + %2 = transform.structured.bufferize_to_allocation %0 : !transform.any_op } // ----- @@ -58,77 +57,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !transform.any_op): %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_result %0[0] : (!transform.any_op) -> !transform.any_value - %2, %3 = transform.structured.bufferize_to_allocation %1 + %2 = transform.structured.bufferize_to_allocation %0 : !transform.any_op // Make sure that One-Shot Bufferize can bufferize the rest. %4 = transform.bufferization.one_shot_bufferize %arg1 : (!transform.any_op) -> !transform.any_op } -// ----- - -// CHECK-LABEL: func @materialization_of_bbarg( -// CHECK-SAME: %[[t:.*]]: tensor -// CHECK: %[[c0:.*]] = arith.constant 0 : index -// CHECK: %[[dim:.*]] = tensor.dim %[[t]], %[[c0]] -// CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) : memref -// CHECK: memref.tensor_store %[[t]], %[[alloc]] -// CHECK: %[[alloc_t:.*]] = bufferization.to_tensor %[[alloc]] restrict writable -// CHECK: %[[r:.*]] = tensor.extract %[[alloc_t]] -// CHECK: memref.dealloc %[[alloc]] -// CHECK: return %[[r]] -func.func @materialization_of_bbarg(%t: tensor, %idx: index) -> index { - %r = tensor.extract %t[%idx, %idx] : tensor - return %r : index -} - -transform.sequence failures(propagate) { -^bb1(%arg1: !transform.any_op): - %0 = transform.structured.match ops{["tensor.extract"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = test_produce_value_handle_to_argument_of_parent_block %0, 0 : (!transform.any_op) -> !transform.any_value - %2, %3 = transform.structured.bufferize_to_allocation %1 {memory_space = 4} -} - -// ----- - -// CHECK-LABEL: func @materialization_of_bbarg( -// CHECK-SAME: %[[t:.*]]: tensor -// CHECK: %[[m:.*]] = bufferization.to_memref %[[t]] -// CHECK: %[[alloc:.*]] = memref.alloc(%{{.*}}) : memref -// CHECK: memref.copy %[[m]], %[[alloc]] -// CHECK: %[[r:.*]] = memref.load %[[alloc]] -// CHECK: return %[[r]] -func.func @materialization_of_bbarg(%t: tensor, %idx: index) -> index { - %r = tensor.extract %t[%idx, %idx] : tensor - return %r : index -} - -transform.sequence failures(propagate) { -^bb1(%arg1: !transform.any_op): - %0 = transform.structured.match ops{["tensor.extract"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = test_produce_value_handle_to_argument_of_parent_block %0, 0 : (!transform.any_op) -> !transform.any_value - %2, %3 = transform.structured.bufferize_to_allocation %1 {memory_space = 4} - // Make sure that One-Shot Bufferize can bufferize the rest. - %4 = transform.bufferization.one_shot_bufferize %arg1 : (!transform.any_op) -> !transform.any_op -} - -// ----- - -// CHECK-LABEL: func @materialization_of_opresult( -// CHECK: %[[t:.*]] = "dummy.some_op" -// CHECK: %[[alloc:.*]] = memref.alloc(%{{.*}}) : memref -// CHECK: memref.tensor_store %[[t]], %[[alloc]] -// CHECK: %[[r:.*]] = bufferization.to_tensor %[[alloc]] -// CHECK: return %[[r]] -func.func @materialization_of_opresult(%idx: index) -> tensor { - %t = "dummy.some_op"() : () -> (tensor) - return %t : tensor -} - -transform.sequence failures(propagate) { -^bb1(%arg1: !transform.any_op): - %0 = transform.structured.match ops{["dummy.some_op"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_result %0[0] : (!transform.any_op) -> !transform.any_value - %2, %3 = transform.structured.bufferize_to_allocation %1 {memory_space = 4} -} - -