diff --git a/mlir/docs/Tutorials/transform/Ch1.md b/mlir/docs/Tutorials/transform/Ch1.md --- a/mlir/docs/Tutorials/transform/Ch1.md +++ b/mlir/docs/Tutorials/transform/Ch1.md @@ -69,9 +69,9 @@ ^bb0(%arg0: !transform.any_op, %arg1: !transform.op<"linalg.matmul">, %arg2: !transform.op<"linalg.elemwise_binary">): - transform.test_print_remark_at_operand %matmul, "matmul" + transform.test_print_remark_at_operand %arg1, "matmul" : !transform.op<"linalg.matmul"> - transform.test_print_remark_at_operand %elemwise, "elemwise_binaries" + transform.test_print_remark_at_operand %arg2, "elemwise_binaries" : !transform.op<"linalg.elemwise_binary"> transform.yield } @@ -93,7 +93,7 @@ ```sh matmul.mlir:7:13: remark: matmul - %matmul = linalg.matmul + %matmul = linalg.matmul ins(%lhs, %rhs: tensor<512x512xf32>, tensor<512x512xf32>) ^ matmul.mlir:7:13: note: see current operation: %0 = linalg.matmul ins(%arg0, %arg1 : tensor<512x512xf32>, tensor<512x512xf32>) outs(%arg3 : tensor<512x512xf32>) -> tensor<512x512xf32> matmul.mlir:10:13: remark: elemwise_binaries @@ -120,7 +120,7 @@ %arg2: !transform.op<"linalg.elemwise_binary">): // The actual tiling transformation takes tile sizes as attributes. %loop, %tiled = transform.structured.tile_to_forall_op %arg1 tile_sizes [4, 32] - : (!transform.op<"linalg.matmul") -> (!transform.any_op, !transform.any_op) + : (!transform.op<"linalg.matmul">) -> (!transform.any_op, !transform.any_op) transform.yield } ``` @@ -191,7 +191,7 @@ $ mlir-opt matmul.mlir --pass-pipeline=" builtin.module(test-transform-dialect-interpreter{ bind-first-extra-to-ops=linalg.matmul - bind-second-extra-to-ops=linalg.elemwise_binary, + bind-second-extra-to-ops=linalg.elemwise_binary enable-expensive-checks})" ``` @@ -259,25 +259,29 @@ : (!transform.op<"linalg.elemwise_binary">) -> (!transform.any_op, !transform.any_op) - // The actual tiling transformation takes tile sizes as attributes. It produces a - // handle to the loop generated during tiling. - %loop, %tiled = transform.structured.tile_to_forall_op %max tile_sizes [8, 32] - : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + // The actual tiling transformation takes tile sizes as attributes. It + // produces a handle to the loop generated during tiling. + %loop, %tiled_max = + transform.structured.tile_to_forall_op %max tile_sizes [8, 32] + : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // We can now fuse the other operations into the loop. Here, we fuse - // operations one-by-one. This requires the operation that is being fused - // to define the value used within the loop, so the order of such fusions - // is important. We could also use "transform.merge_handles" to obtain - // a single handle to all operations and give it to `fuse_into_containing_op` - // that would take care of the ordering in this case. - %add_fused = transform.structured.fuse_into_containing_op %add into %loop - : (!transform.any_op, !transform.any_op) -> !transform.any_op - %matmul_fused = transform.structured.fuse_into_containing_op %arg1 into %loop - : (!transform.op<"linalg.matmul">, !transform.any_op) -> !transform.any_op + // operations one by one. This requires the operation that is being fused to + // define the value used within the loop, so the order of such fusions is + // important. We could also use "transform.merge_handles" to obtain a single + // handle to all operations and give it to `fuse_into_containing_op` that + // would take care of the ordering in this case. + %loop_0, %tiled_add = + transform.structured.fuse_into_containing_op %add into %loop + : (!transform.any_op, !transform.any_op) + -> (!transform.any_op, !transform.any_op) + %loop_1, %tiled_matmul = + transform.structured.fuse_into_containing_op %arg1 into %loop + : (!transform.op<"linalg.matmul">, !transform.any_op) + -> (!transform.any_op, !transform.any_op) transform.yield } - ``` This achieves the desired tiling and fusion. @@ -294,48 +298,58 @@ // Since the %arg2 handle is associated with both elementwise operations, // we need to split it into two handles so we can target only the second // elementwise operation. - %add, %max = transform.split_handle %arg2 : (!transform.op<"linalg.elemwise_binary">) - -> (!transform.any_op, !transform.any_op) + %add, %max = transform.split_handle %arg2 + : (!transform.op<"linalg.elemwise_binary">) + -> (!transform.any_op, !transform.any_op) - // The actual tiling transformation takes tile sizes as attributes. It produces a - // handle to the loop generated during tiling. + // The actual tiling transformation takes tile sizes as attributes. It + // produces a handle to the loop generated during tiling. %loop, %tiled = transform.structured.tile_to_forall_op %max tile_sizes [8, 32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op) // We can now fuse the other operations into the loop. Here, we fuse - // operations one-by-one. This requires the operation that is being fused - // to define the value used within the loop, so the order of such fusions - // is important. We could also use "transform.merge_handles" to obtain - // a single handle to all operations and give it to `fuse_into_containing_op` - // that would take care of the ordering in this case. - %add_fused = transform.structured.fuse_into_containing_op %add into %loop - : (!transform.any_op, !transform.any_op) -> !transform.any_op - %matmul_fused = transform.structured.fuse_into_containing_op %arg1 into %loop - : (!transform.op<"linalg.matmul">, !transform.any_op) -> !transform.any_op + // operations one by one. This requires the operation that is being fused to + // define the value used within the loop, so the order of such fusions is + // important. We could also use "transform.merge_handles" to obtain a single + // handle to all operations and give it to `fuse_into_containing_op` that + // would take care of the ordering in this case. + %add_fused, %loop_0 = + transform.structured.fuse_into_containing_op %add into %loop + : (!transform.any_op, !transform.any_op) + -> (!transform.any_op, !transform.any_op) + %matmul_fused, %loop_1 = + transform.structured.fuse_into_containing_op %arg1 into %loop + : (!transform.op<"linalg.matmul">, !transform.any_op) + -> (!transform.any_op, !transform.any_op) // Tile again to get the desired size. Note that this time this tiles the // "add" operation and fuses matmul into the loop, but doesn't affect the // "max" operation. This illustrates the precise targeting with the transform // dialect. Otherwise, it is difficult to differentiate "add" and "max", both // of which having the same kind. - %loop_2, %tiled_2 = transform.structured.tile_to_forall_op %add_fused tile_sizes [4, 4] - : (!transform.any_op) -> (!transform.any_op, !transform.any_op) - %matmul_fused_2 = transform.structured.fuse_into_containing_op %matmul_fused into %loop_2 - : (!transform.any_op, !transform.any_op) -> !transform.any_op + %loop_2, %tiled_2 = + transform.structured.tile_to_forall_op %add_fused tile_sizes [4, 4] + : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + %matmul_fused_2, %loop_3 = + transform.structured.fuse_into_containing_op %matmul_fused into %loop_2 + : (!transform.any_op, !transform.any_op) + -> (!transform.any_op, !transform.any_op) // Since outlining is currently only implemented for region-holding operations // such as loops, use tiling to size 1 to materialize the outer loop that is // going to be outlined. - %outline_target, %_ = transform.structured.tile_to_forall_op %tiled_2 tile_sizes [1] - : (!transform.any_op) -> (!transform.any_op, !transform.any_op) - transform.structured.fuse_into_containing_op %matmul_fused_2 into %outline_target - : (!transform.any_op, !transform.any_op) -> !transform.any_op + %outline_target, %_ = + transform.structured.tile_to_forall_op %tiled_2 tile_sizes [1] + : (!transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.structured.fuse_into_containing_op %matmul_fused_2 + into %outline_target + : (!transform.any_op, !transform.any_op) + -> (!transform.any_op, !transform.any_op) %func, %call = transform.loop.outline %outline_target {func_name = "outlined"} : (!transform.any_op) -> (!transform.any_op, !transform.op<"func.call">) transform.yield } - ``` This additional transformation also illustrates handle invalidation for nested operations. The `transform.loop.outline` operation consumes the handle to the loop, which invalidates it and all handles to any operations nested in it, such as `%2`. Attempting to use this handle will cause undefined behavior. (Note that it isn’t strictly necessary for this specific form of the outlining to consume the operand as the implementation only _moves_ the region without recreating the operations, but the author of the transformation chose to invalidate the handle anyway.)