diff --git a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h --- a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h +++ b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h @@ -26,6 +26,9 @@ namespace gpu { class GPUModuleOp; + +/// Returns the default annotation name for GPU binary blobs. +std::string getDefaultGpuBinaryAnnotation(); } // namespace gpu namespace LLVM { @@ -44,13 +47,17 @@ /// This pass does not generate code to call GPU runtime APIs directly but /// instead uses a small wrapper library that exports a stable and conveniently /// typed ABI on top of GPU runtimes such as CUDA or ROCm (HIP). +/// +/// A non-empty gpuBinaryAnnotation overrides the pass' command line option. std::unique_ptr> -createGpuToLLVMConversionPass(StringRef gpuBinaryAnnotation = ""); +createGpuToLLVMConversionPass(StringRef gpuBinaryAnnotation = {}); /// Collect a set of patterns to convert from the GPU dialect to LLVM. +/// +/// A non-empty gpuBinaryAnnotation overrides the pass' command line option. void populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter, OwningRewritePatternList &patterns, - StringRef gpuBinaryAnnotation); + StringRef gpuBinaryAnnotation = {}); /// Creates a pass to convert kernel functions into GPU target object blobs. /// @@ -74,11 +81,13 @@ /// /// After the transformation, the body of the kernel function is removed (i.e., /// it is turned into a declaration). +/// +/// A non-empty gpuBinaryAnnotation overrides the pass' command line option. std::unique_ptr> createConvertGPUKernelToBlobPass(LoweringCallback loweringCallback, BlobGenerator blobGenerator, StringRef triple, StringRef targetChip, StringRef features, - StringRef gpuBinaryAnnotation); + StringRef gpuBinaryAnnotation = {}); } // namespace mlir diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -106,10 +106,6 @@ let summary = "Convert GPU dialect to LLVM dialect with GPU runtime calls"; let constructor = "mlir::createGpuToLLVMConversionPass()"; let dependentDialects = ["LLVM::LLVMDialect"]; - let options = [ - Option<"gpuBinaryAnnotation", "gpu-binary-annotation", "std::string", - "", "Annotation attribute string for GPU binary">, - ]; } def LowerHostCodeToLLVM : Pass<"lower-host-to-llvm", "ModuleOp"> { diff --git a/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp b/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp --- a/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp +++ b/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp @@ -52,8 +52,15 @@ StringRef targetChip, StringRef features, StringRef gpuBinaryAnnotation) : loweringCallback(loweringCallback), blobGenerator(blobGenerator), - triple(triple), targetChip(targetChip), features(features), - blobAnnotation(gpuBinaryAnnotation) {} + triple(triple), targetChip(targetChip), features(features) { + if (!gpuBinaryAnnotation.empty()) + this->gpuBinaryAnnotation = gpuBinaryAnnotation.str(); + } + + GpuKernelToBlobPass(const GpuKernelToBlobPass &other) + : loweringCallback(other.loweringCallback), + blobGenerator(other.blobGenerator), triple(other.triple), + targetChip(other.targetChip), features(other.features) {} void runOnOperation() override { gpu::GPUModuleOp module = getOperation(); @@ -70,7 +77,7 @@ // attribute to the module. if (auto blobAttr = translateGPUModuleToBinaryAnnotation( *llvmModule, module.getLoc(), module.getName())) - module->setAttr(blobAnnotation, blobAttr); + module->setAttr(gpuBinaryAnnotation, blobAttr); else signalPassFailure(); } @@ -92,14 +99,21 @@ LoweringCallback loweringCallback; BlobGenerator blobGenerator; + llvm::Triple triple; std::string targetChip; std::string features; - std::string blobAnnotation; + + Option gpuBinaryAnnotation{ + *this, "gpu-binary-annotation", + llvm::cl::desc("Annotation attribute string for GPU binary"), + llvm::cl::init(gpu::getDefaultGpuBinaryAnnotation())}; }; } // anonymous namespace +std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; } + std::string GpuKernelToBlobPass::translateModuleToISA(llvm::Module &module, llvm::TargetMachine &targetMachine) { diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp --- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp +++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp @@ -48,8 +48,17 @@ this->gpuBinaryAnnotation = gpuBinaryAnnotation.str(); } + GpuToLLVMConversionPass(const GpuToLLVMConversionPass &other) + : GpuToLLVMConversionPassBase(other) {} + // Run the dialect converter on the module. void runOnOperation() override; + +private: + Option gpuBinaryAnnotation{ + *this, "gpu-binary-annotation", + llvm::cl::desc("Annotation attribute string for GPU binary"), + llvm::cl::init(gpu::getDefaultGpuBinaryAnnotation())}; }; struct FunctionCallBuilder { diff --git a/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp b/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp --- a/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp +++ b/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp @@ -114,7 +114,8 @@ : public mlir::PassPipelineOptions { Option gpuBinaryAnnotation{ *this, "gpu-binary-annotation", - llvm::cl::desc("Annotation attribute string for GPU binary")}; + llvm::cl::desc("Annotation attribute string for GPU binary"), + llvm::cl::init(gpu::getDefaultGpuBinaryAnnotation())}; }; // Register cuda-runner specific passes.