diff --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h --- a/mlir/include/mlir/Dialect/GPU/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Passes.h @@ -25,7 +25,8 @@ namespace mlir { /// Replaces `gpu.launch` with `gpu.launch_func` by moving the region into /// a separate kernel function. -std::unique_ptr> createGpuKernelOutliningPass(); +std::unique_ptr> +createGpuKernelOutliningPass(StringRef dataLayoutStr = StringRef()); /// Rewrites a function region so that GPU ops execute asynchronously. std::unique_ptr> createGpuAsyncRegionPass(); diff --git a/mlir/include/mlir/Dialect/GPU/Passes.td b/mlir/include/mlir/Dialect/GPU/Passes.td --- a/mlir/include/mlir/Dialect/GPU/Passes.td +++ b/mlir/include/mlir/Dialect/GPU/Passes.td @@ -14,6 +14,7 @@ def GpuKernelOutlining : Pass<"gpu-kernel-outlining", "ModuleOp"> { let summary = "Outline gpu.launch bodies to kernel functions"; let constructor = "mlir::createGpuKernelOutliningPass()"; + let dependentDialects = ["mlir::DLTIDialect"]; } def GpuAsyncRegionPass : FunctionPass<"gpu-async-region"> { diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -12,6 +12,7 @@ #include "PassDetail.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" +#include "mlir/Dialect/DLTI/DLTI.h" #include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Dialect/GPU/Passes.h" #include "mlir/Dialect/GPU/Utils.h" @@ -20,6 +21,7 @@ #include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/Builders.h" #include "mlir/IR/SymbolTable.h" +#include "mlir/Parser.h" #include "mlir/Support/LLVM.h" #include "mlir/Transforms/RegionUtils.h" @@ -239,6 +241,31 @@ class GpuKernelOutliningPass : public GpuKernelOutliningBase { public: + GpuKernelOutliningPass(StringRef dlStr) { + if (!dlStr.empty() && !dataLayoutStr.hasValue()) + dataLayoutStr = dlStr.str(); + } + + GpuKernelOutliningPass(const GpuKernelOutliningPass &other) + : dataLayoutSpec(other.dataLayoutSpec) { + dataLayoutStr = other.dataLayoutStr; + } + + LogicalResult initialize(MLIRContext *context) override { + // Initialize the data layout specification from the data layout string. + if (!dataLayoutStr.empty()) { + Attribute resultAttr = mlir::parseAttribute(dataLayoutStr, context); + if (!resultAttr) + return failure(); + + dataLayoutSpec = resultAttr.dyn_cast(); + if (!dataLayoutSpec) + return failure(); + } + + return success(); + } + void runOnOperation() override { SymbolTable symbolTable(getOperation()); bool modified = false; @@ -290,6 +317,12 @@ OpBuilder builder(context); auto kernelModule = builder.create(kernelFunc.getLoc(), kernelFunc.getName()); + + // If a valid data layout spec was provided, attach it to the kernel module. + // Otherwise, the default data layout will be used. + if (dataLayoutSpec) + kernelModule->setAttr("dlspec", dataLayoutSpec); + SymbolTable symbolTable(kernelModule); symbolTable.insert(kernelFunc); @@ -313,10 +346,18 @@ return kernelModule; } + + Option dataLayoutStr{ + *this, "data-layout-str", + llvm::cl::desc("String containing the data layout specification to be " + "attached to the GPU kernel module")}; + + DataLayoutSpecInterface dataLayoutSpec; }; } // namespace -std::unique_ptr> mlir::createGpuKernelOutliningPass() { - return std::make_unique(); +std::unique_ptr> +mlir::createGpuKernelOutliningPass(StringRef dataLayoutStr) { + return std::make_unique(dataLayoutStr); } diff --git a/mlir/lib/Dialect/GPU/Transforms/PassDetail.h b/mlir/lib/Dialect/GPU/Transforms/PassDetail.h --- a/mlir/lib/Dialect/GPU/Transforms/PassDetail.h +++ b/mlir/lib/Dialect/GPU/Transforms/PassDetail.h @@ -10,6 +10,7 @@ #define DIALECT_GPU_TRANSFORMS_PASSDETAIL_H_ #include "mlir/Dialect/Async/IR/Async.h" +#include "mlir/Dialect/DLTI/DLTI.h" #include "mlir/Pass/Pass.h" namespace mlir { diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir --- a/mlir/test/Dialect/GPU/outlining.mlir +++ b/mlir/test/Dialect/GPU/outlining.mlir @@ -1,4 +1,5 @@ // RUN: mlir-opt -allow-unregistered-dialect -gpu-kernel-outlining -split-input-file -verify-diagnostics %s | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect -gpu-kernel-outlining=data-layout-str='#dlti.dl_spec<#dlti.dl_entry>' -split-input-file %s | FileCheck --check-prefix CHECK-DL %s // CHECK: module attributes {gpu.container_module} @@ -35,8 +36,9 @@ return } +// CHECK-DL-LABEL: gpu.module @launch_kernel attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry>} -// CHECK-LABEL: module @launch_kernel +// CHECK-LABEL: gpu.module @launch_kernel // CHECK-NEXT: gpu.func @launch_kernel // CHECK-SAME: (%[[KERNEL_ARG0:.*]]: f32, %[[KERNEL_ARG1:.*]]: memref) // CHECK-NEXT: %[[BID:.*]] = "gpu.block_id"() {dimension = "x"} : () -> index @@ -81,7 +83,10 @@ return } -// CHECK: module @multiple_launches_kernel +// CHECK-DL-LABEL: gpu.module @multiple_launches_kernel attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry>} +// CHECK-DL-LABEL: gpu.module @multiple_launches_kernel_0 attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry>} + +// CHECK: gpu.module @multiple_launches_kernel // CHECK: func @multiple_launches_kernel // CHECK: module @multiple_launches_kernel_0 // CHECK: func @multiple_launches_kernel @@ -106,6 +111,8 @@ return } +// CHECK-DL-LABEL: gpu.module @extra_constants_not_inlined_kernel attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry>} + // CHECK-LABEL: func @extra_constants_not_inlined_kernel(%{{.*}}: memref, %{{.*}}: index) // CHECK: arith.constant 2 @@ -130,6 +137,8 @@ return } +// CHECK-DL-LABEL: gpu.module @extra_constants_kernel attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry>} + // CHECK-LABEL: func @extra_constants_kernel( // CHECK-SAME: %[[KARG0:.*]]: memref // CHECK: arith.constant 2 @@ -158,6 +167,8 @@ return } +// CHECK-DL-LABEL: gpu.module @extra_constants_noarg_kernel attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry>} + // CHECK-LABEL: func @extra_constants_noarg_kernel( // CHECK-SAME: %[[KARG0:.*]]: memref, %[[KARG1:.*]]: index // CHECK: %[[KCST:.*]] = arith.constant 2 @@ -186,6 +197,8 @@ return } +// CHECK-DL-LABEL: gpu.module @multiple_uses_kernel attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry>} + // ----- // CHECK-LABEL: @multiple_uses2 @@ -213,6 +226,8 @@ return } +// CHECK-DL-LABEL: gpu.module @multiple_uses2_kernel attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry>} + // ----- llvm.mlir.global internal @global(42 : i64) : i64 @@ -242,6 +257,8 @@ return } +// CHECK-DL-LABEL: gpu.module @function_call_kernel attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry>} + // CHECK: gpu.module @function_call_kernel { // CHECK: gpu.func @function_call_kernel() // CHECK: call @device_function() : () -> () diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -3000,6 +3000,7 @@ deps = [ ":ArithmeticDialect", ":Async", + ":DLTIDialect", ":GPUDialect", ":GPUPassIncGen", ":MemRefDialect",