diff --git a/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir b/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir --- a/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir +++ b/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s --test-kernel-to-cubin -split-input-file | FileCheck %s +// RUN: mlir-opt %s --test-gpu-to-cubin | FileCheck %s -// CHECK: attributes {nvvm.cubin = "CUBIN"} +// CHECK: gpu.module @foo attributes {gpu.binary = "CUBIN"} gpu.module @foo { llvm.func @kernel(%arg0 : f32, %arg1 : !llvm.ptr) // CHECK: attributes {gpu.kernel} @@ -9,8 +9,7 @@ } } -// ----- - +// CHECK: gpu.module @bar attributes {gpu.binary = "CUBIN"} gpu.module @bar { // CHECK: func @kernel_a llvm.func @kernel_a() diff --git a/mlir/test/lib/Transforms/TestConvertGPUKernelToCubin.cpp b/mlir/test/lib/Transforms/TestConvertGPUKernelToCubin.cpp --- a/mlir/test/lib/Transforms/TestConvertGPUKernelToCubin.cpp +++ b/mlir/test/lib/Transforms/TestConvertGPUKernelToCubin.cpp @@ -6,11 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Conversion/GPUCommon/GPUCommonPass.h" -#include "mlir/Dialect/LLVMIR/NVVMDialect.h" +#include "mlir/Dialect/GPU/Passes.h" + #include "mlir/Pass/Pass.h" -#include "mlir/Pass/PassManager.h" -#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "llvm/Support/TargetSelect.h" @@ -18,36 +16,53 @@ using namespace mlir; #if MLIR_CUDA_CONVERSIONS_ENABLED -static OwnedBlob compilePtxToCubinForTesting(const std::string &, Location, - StringRef) { - const char data[] = "CUBIN"; - return std::make_unique>(data, data + sizeof(data) - 1); +namespace { +class TestSerializeToCubinPass + : public PassWrapper { +public: + TestSerializeToCubinPass(); + +private: + void getDependentDialects(DialectRegistry ®istry) const override; + + // Serializes PTX to CUBIN. + std::unique_ptr> + serializeISA(const std::string &isa) override; +}; +} // namespace + +TestSerializeToCubinPass::TestSerializeToCubinPass() { + this->triple = "nvptx64-nvidia-cuda"; + this->chip = "sm_35"; + this->features = "+ptx60"; +} + +void TestSerializeToCubinPass::getDependentDialects( + DialectRegistry ®istry) const { + registerNVVMDialectTranslation(registry); + gpu::SerializeToBlobPass::getDependentDialects(registry); } -static std::unique_ptr -translateModuleToNVVMIR(Operation *m, llvm::LLVMContext &llvmContext, - StringRef moduleName) { - registerLLVMDialectTranslation(*m->getContext()); - registerNVVMDialectTranslation(*m->getContext()); - return translateModuleToLLVMIR(m, llvmContext, moduleName); +std::unique_ptr> +TestSerializeToCubinPass::serializeISA(const std::string &) { + std::string data = "CUBIN"; + return std::make_unique>(data.begin(), data.end()); } namespace mlir { namespace test { -void registerTestConvertGPUKernelToCubinPass() { - PassPipelineRegistration<>( - "test-kernel-to-cubin", - "Convert all kernel functions to CUDA cubin blobs", - [](OpPassManager &pm) { +// Register test pass to serialize GPU module to a CUBIN binary annotation. +void registerTestGpuSerializeToCubinPass() { + PassRegistration registerSerializeToCubin( + "test-gpu-to-cubin", + "Lower GPU kernel function to CUBIN binary annotations", [] { // Initialize LLVM NVPTX backend. LLVMInitializeNVPTXTarget(); LLVMInitializeNVPTXTargetInfo(); LLVMInitializeNVPTXTargetMC(); LLVMInitializeNVPTXAsmPrinter(); - pm.addPass(createConvertGPUKernelToBlobPass( - translateModuleToNVVMIR, compilePtxToCubinForTesting, - "nvptx64-nvidia-cuda", "sm_35", "+ptx60", "nvvm.cubin")); + return std::make_unique(); }); } } // namespace test diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -64,7 +64,7 @@ void registerTestCallGraphPass(); void registerTestConstantFold(); void registerTestConvVectorization(); -void registerTestConvertGPUKernelToCubinPass(); +void registerTestGpuSerializeToCubinPass(); void registerTestConvertGPUKernelToHsacoPass(); void registerTestDecomposeCallGraphTypes(); void registerTestDialect(DialectRegistry &); @@ -136,7 +136,7 @@ test::registerTestCallGraphPass(); test::registerTestConstantFold(); #if MLIR_CUDA_CONVERSIONS_ENABLED - test::registerTestConvertGPUKernelToCubinPass(); + test::registerTestGpuSerializeToCubinPass(); #endif #if MLIR_ROCM_CONVERSIONS_ENABLED test::registerTestConvertGPUKernelToHsacoPass();