diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -961,7 +961,8 @@ "{}">:$transpose_paddings, DefaultValuedAttr:$copy_back_op); let results = (outs TransformHandleTypeInterface:$padded, - TransformHandleTypeInterface:$pad); + TransformHandleTypeInterface:$pad, + TransformHandleTypeInterface:$copy); let assemblyFormat = "$target attr-dict `:` functional-type(operands, results)"; diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -1612,7 +1612,7 @@ transform::PadOp::apply(transform::TransformRewriter &rewriter, transform::TransformResults &results, transform::TransformState &state) { - SmallVector paddedOps, padOps; + SmallVector paddedOps, padOps, copyBackOps; for (Operation *target : state.getPayloadOps(getTarget())) { auto linalgTarget = dyn_cast(target); @@ -1707,10 +1707,18 @@ rewriter.replaceOp(linalgTarget, replacements); paddedOps.push_back(paddedOp); padOps.append(newPadOps.begin(), newPadOps.end()); + if (options.copyBackOp != LinalgPaddingOptions::CopyBackOp::None) { + for (Value v : replacements) { + Operation *copyBackOp = v.getDefiningOp(); + if (llvm::find(copyBackOps, copyBackOp) == copyBackOps.end()) + copyBackOps.push_back(copyBackOp); + } + } } results.set(cast(getPadded()), paddedOps); results.set(cast(getPad()), padOps); + results.set(cast(getCopy()), copyBackOps); return DiagnosedSilenceableFailure::success(); } diff --git a/mlir/python/mlir/dialects/_structured_transform_ops_ext.py b/mlir/python/mlir/dialects/_structured_transform_ops_ext.py --- a/mlir/python/mlir/dialects/_structured_transform_ops_ext.py +++ b/mlir/python/mlir/dialects/_structured_transform_ops_ext.py @@ -524,6 +524,7 @@ pdl_operation_type = pdl.OperationType.get() super().__init__( + pdl_operation_type, pdl_operation_type, pdl_operation_type, target, diff --git a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir --- a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir +++ b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir @@ -49,11 +49,11 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !transform.any_op): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %padded, %pad = transform.structured.pad %0 { + %padded, %pad, %copy_back = transform.structured.pad %0 { padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], pack_paddings=[1, 1, 1] - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) %buffer, %new_ops = transform.structured.bufferize_to_allocation %pad {memory_space = 3} : !transform.any_op %2 = transform.bufferization.one_shot_bufferize %arg1 {bufferize_function_boundaries=true} : (!transform.any_op) -> !transform.any_op @@ -106,11 +106,11 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !transform.any_op): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %padded, %pad = transform.structured.pad %0 { + %padded, %pad, %copy_back = transform.structured.pad %0 { padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], pack_paddings=[1, 1, 1] - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) transform.structured.masked_vectorize %pad vector_sizes [10, 12] : !transform.any_op %vector_write = transform.structured.match ops{["vector.transfer_write"]} in %arg1 : (!transform.any_op) -> !transform.any_op %mask_op = transform.get_parent_op %vector_write {op_name = "vector.mask"} : (!transform.any_op) -> !transform.any_op diff --git a/mlir/test/Dialect/Linalg/transform-op-hoist-pad-build-packing-loop-nest.mlir b/mlir/test/Dialect/Linalg/transform-op-hoist-pad-build-packing-loop-nest.mlir --- a/mlir/test/Dialect/Linalg/transform-op-hoist-pad-build-packing-loop-nest.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-hoist-pad-build-packing-loop-nest.mlir @@ -17,10 +17,10 @@ %matmul_l1, %loops_l1 = transform.structured.tile_to_scf_for %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) - %matmul_padded, %0 = transform.structured.pad %matmul_l1 { + %matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 { padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2] - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) // In this case, the pad op is actually empty: we only tile the first dimension // and it does not have an impact on the RHS operand. @@ -49,10 +49,10 @@ %matmul_l1, %loops_l1 = transform.structured.tile_to_scf_for %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) - %matmul_padded, %0 = transform.structured.pad %matmul_l1 { + %matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 { padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2] - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) %pad = transform.get_producer_of_operand %matmul_padded[2] : (!transform.any_op) -> !transform.any_op @@ -87,10 +87,10 @@ %matmul_l1, %loops_l1 = transform.structured.tile_to_scf_for %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) - %matmul_padded, %0 = transform.structured.pad %matmul_l1 { + %matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 { padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2] - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) %pad = transform.get_producer_of_operand %matmul_padded[0] : (!transform.any_op) -> !transform.any_op @@ -125,10 +125,10 @@ %matmul_l1, %loops_l1 = transform.structured.tile_to_scf_for %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) - %matmul_padded, %0 = transform.structured.pad %matmul_l1 { + %matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 { padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2] - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) %pad = transform.get_producer_of_operand %matmul_padded[0] : (!transform.any_op) -> !transform.any_op @@ -161,10 +161,10 @@ %matmul_l1, %loops_l1:2 = transform.structured.tile_to_scf_for %matmul [5, 0, 7] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) - %matmul_padded, %0 = transform.structured.pad %matmul_l1 { + %matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 { padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2] - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) %pad = transform.get_producer_of_operand %matmul_padded[2] : (!transform.any_op) -> !transform.any_op diff --git a/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir --- a/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir @@ -17,11 +17,11 @@ %matmul_l1, %loops_l1 = transform.structured.tile_to_scf_for %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) - %matmul_padded, %0 = transform.structured.pad %matmul_l1 { + %matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 { padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], copy_back_op = "none" - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) // In this case, the pad op is actually empty: we only tile the first dimension // and it does not have an impact on the RHS operand. @@ -53,11 +53,11 @@ %matmul_l1, %loops_l1 = transform.structured.tile_to_scf_for %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) - %matmul_padded, %0 = transform.structured.pad %matmul_l1 { + %matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 { padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], copy_back_op = "none" - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) %pad = transform.get_producer_of_operand %matmul_padded[2] : (!transform.any_op) -> !transform.op<"tensor.pad"> @@ -96,11 +96,11 @@ %matmul_l1, %loops_l1 = transform.structured.tile_to_scf_for %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) - %matmul_padded, %0 = transform.structured.pad %matmul_l1 { + %matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 { padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], copy_back_op = "none" - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) %pad = transform.get_producer_of_operand %matmul_padded[0] : (!transform.any_op) -> !transform.any_op @@ -141,11 +141,11 @@ %matmul_l1, %loops_l1 = transform.structured.tile_to_scf_for %matmul [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) - %matmul_padded, %0 = transform.structured.pad %matmul_l1 { + %matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 { padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], copy_back_op = "none" - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) %pad = transform.get_producer_of_operand %matmul_padded[0] : (!transform.any_op) -> !transform.any_op @@ -185,11 +185,11 @@ %matmul_l1, %loops_l1:2 = transform.structured.tile_to_scf_for %matmul [5, 0, 7] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) - %matmul_padded, %0 = transform.structured.pad %matmul_l1 { + %matmul_padded, %0, %copy_back = transform.structured.pad %matmul_l1 { padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], copy_back_op = "none" - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) %pad = transform.get_producer_of_operand %matmul_padded[2] : (!transform.any_op) -> !transform.op<"tensor.pad"> diff --git a/mlir/test/Dialect/Linalg/transform-op-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-pad.mlir --- a/mlir/test/Dialect/Linalg/transform-op-pad.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-pad.mlir @@ -25,6 +25,9 @@ // CHECK: %[[T5:.*]] = linalg.matmul // CHECK-SAME: ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>) // CHECK-SAME: outs(%[[T2]] : tensor<4x5xf32>) + + // CHECK: %[[T6:.*]] = tensor.extract_slice %[[T5]] + // CHECK: %[[T7:.*]] = bufferization.copy_tensor %[[T6]], %[[T2]] %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> func.return %5 : tensor<24x25xf32> @@ -33,11 +36,13 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !transform.any_op): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %padded, %pad = transform.structured.pad %0 { + %padded, %pad, %copy_back = transform.structured.pad %0 { padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], pack_paddings=[1, 1, 0] - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.op<"bufferization.copy_tensor">) + // expected-remark @below {{1}} + test_print_number_of_associated_payload_ir_ops %copy_back : !transform.op<"bufferization.copy_tensor"> } // ----- @@ -65,12 +70,12 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !transform.any_op): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %padded, %pad = transform.structured.pad %0 { + %padded, %pad, %copy_back = transform.structured.pad %0 { padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], pad_to_multiple_of=[2, 2, 1], pack_paddings=[1, 1, 0] - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) } // ----- @@ -107,11 +112,11 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !transform.any_op): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %padded, %pad = transform.structured.pad %0 { + %padded, %pad, %copy_back = transform.structured.pad %0 { padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], pack_paddings=[1, 1, 0] - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) } // ----- @@ -128,11 +133,11 @@ ^bb1(%arg1: !transform.any_op): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{op expects a padding value of type 'f32', got 0 : i32}} - %padded, %pad = transform.structured.pad %0 { + %padded, %pad, %copy_back = transform.structured.pad %0 { padding_values=[0: i32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], pack_paddings=[1, 1, 0] - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) } // ----- @@ -149,11 +154,11 @@ ^bb1(%arg1: !transform.any_op): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{expects a padding that parses to 'f32', got "{foo}"}} - %padded, %pad = transform.structured.pad %0 { + %padded, %pad, %copy_back = transform.structured.pad %0 { padding_values=["{foo}", 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], pack_paddings=[1, 1, 0] - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) } // ----- @@ -173,11 +178,11 @@ %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op // This error is silenceable and is not reported by this transform // {{transform.structured.pad failed to apply}} - %padded, %pad = transform.structured.pad %0 { + %padded, %pad, %copy_back = transform.structured.pad %0 { padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], pack_paddings=[1, 1, 0] - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) } // ----- @@ -228,11 +233,11 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !transform.any_op): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %padded, %pad = transform.structured.pad %0 { + %padded, %pad, %copy_back = transform.structured.pad %0 { padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], pack_paddings=[1, 1, 1] - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) } // ----- @@ -278,9 +283,9 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !transform.any_op): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %padded, %pad = transform.structured.pad %0 { + %padded, %pad, %copy_back = transform.structured.pad %0 { padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], padding_dimensions=[0, 1, 2], pack_paddings=[1, 1, 1] - } : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) } diff --git a/mlir/test/Dialect/Linalg/transform-ops-invalid.mlir b/mlir/test/Dialect/Linalg/transform-ops-invalid.mlir --- a/mlir/test/Dialect/Linalg/transform-ops-invalid.mlir +++ b/mlir/test/Dialect/Linalg/transform-ops-invalid.mlir @@ -11,7 +11,7 @@ transform.sequence failures(propagate) { ^bb0(%arg0: !transform.any_op): // expected-error@below {{expects padding_dimensions to contain positive integers, found [1, -7]}} - transform.structured.pad %arg0 {padding_dimensions=[1, -7]} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.structured.pad %arg0 {padding_dimensions=[1, -7]} : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) } // ----- @@ -19,7 +19,7 @@ transform.sequence failures(propagate) { ^bb0(%arg0: !transform.any_op): // expected-error@below {{expects pack_paddings to contain booleans (0/1), found [1, 7]}} - transform.structured.pad %arg0 {pack_paddings=[1, 7]} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.structured.pad %arg0 {pack_paddings=[1, 7]} : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) } // ----- @@ -27,7 +27,7 @@ transform.sequence failures(propagate) { ^bb0(%arg0: !transform.any_op): // expected-error@below {{expects transpose_paddings to be a permutation, found [1, 1]}} - transform.structured.pad %arg0 {transpose_paddings=[[1, 1]]} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.structured.pad %arg0 {transpose_paddings=[[1, 1]]} : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) } // ----- diff --git a/mlir/test/Dialect/Linalg/transform-ops.mlir b/mlir/test/Dialect/Linalg/transform-ops.mlir --- a/mlir/test/Dialect/Linalg/transform-ops.mlir +++ b/mlir/test/Dialect/Linalg/transform-ops.mlir @@ -21,7 +21,7 @@ transform.sequence failures(propagate) { ^bb1(%arg0: !transform.any_op): // CHECK: transform.structured.pad - %0, %1 = transform.structured.pad %arg0 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + %0, %1, %2 = transform.structured.pad %arg0 : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) } transform.sequence failures(propagate) {