diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt --- a/mlir/test/CMakeLists.txt +++ b/mlir/test/CMakeLists.txt @@ -88,12 +88,6 @@ ) endif() -if(MLIR_CUDA_RUNNER_ENABLED) - list(APPEND MLIR_TEST_DEPENDS - mlir-cuda-runner - ) -endif() - if(MLIR_ROCM_RUNNER_ENABLED) list(APPEND MLIR_TEST_DEPENDS mlir-rocm-runner diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir @@ -1,6 +1,8 @@ -// RUN: mlir-cuda-runner %s \ -// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ -// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ +// RUN: mlir-opt %s \ +// RUN: -gpu-kernel-outlining \ +// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: -gpu-to-llvm \ +// RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir @@ -1,6 +1,8 @@ -// RUN: mlir-cuda-runner %s \ -// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ -// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ +// RUN: mlir-opt %s \ +// RUN: -gpu-kernel-outlining \ +// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: -gpu-to-llvm \ +// RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir @@ -1,6 +1,8 @@ -// RUN: mlir-cuda-runner %s \ -// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ -// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ +// RUN: mlir-opt %s \ +// RUN: -gpu-kernel-outlining \ +// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: -gpu-to-llvm \ +// RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir @@ -1,6 +1,8 @@ -// RUN: mlir-cuda-runner %s \ -// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ -// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ +// RUN: mlir-opt %s \ +// RUN: -gpu-kernel-outlining \ +// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: -gpu-to-llvm \ +// RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir @@ -1,6 +1,8 @@ -// RUN: mlir-cuda-runner %s \ -// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ -// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ +// RUN: mlir-opt %s \ +// RUN: -gpu-kernel-outlining \ +// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: -gpu-to-llvm \ +// RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir @@ -1,6 +1,8 @@ -// RUN: mlir-cuda-runner %s \ -// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ -// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ +// RUN: mlir-opt %s \ +// RUN: -gpu-kernel-outlining \ +// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: -gpu-to-llvm \ +// RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir @@ -1,6 +1,8 @@ -// RUN: mlir-cuda-runner %s \ -// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ -// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ +// RUN: mlir-opt %s \ +// RUN: -gpu-kernel-outlining \ +// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: -gpu-to-llvm \ +// RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ diff --git a/mlir/test/Integration/GPU/CUDA/async.mlir b/mlir/test/Integration/GPU/CUDA/async.mlir --- a/mlir/test/Integration/GPU/CUDA/async.mlir +++ b/mlir/test/Integration/GPU/CUDA/async.mlir @@ -1,8 +1,9 @@ -// RUN: mlir-cuda-runner %s \ -// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ -// RUN: -gpu-async-region -async-ref-counting \ -// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ +// RUN: mlir-opt %s \ +// RUN: -gpu-kernel-outlining \ +// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: -gpu-async-region -async-ref-counting -gpu-to-llvm \ // RUN: -async-to-async-runtime -convert-async-to-llvm -convert-std-to-llvm \ +// RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_async_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir --- a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir +++ b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir @@ -1,6 +1,8 @@ -// RUN: mlir-cuda-runner %s \ -// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ -// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ +// RUN: mlir-opt %s \ +// RUN: -gpu-kernel-outlining \ +// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: -gpu-to-llvm \ +// RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ diff --git a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir --- a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir +++ b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir @@ -1,6 +1,8 @@ -// RUN: mlir-cuda-runner %s \ -// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ -// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ +// RUN: mlir-opt %s \ +// RUN: -gpu-kernel-outlining \ +// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: -gpu-to-llvm \ +// RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ diff --git a/mlir/test/Integration/GPU/CUDA/shuffle.mlir b/mlir/test/Integration/GPU/CUDA/shuffle.mlir --- a/mlir/test/Integration/GPU/CUDA/shuffle.mlir +++ b/mlir/test/Integration/GPU/CUDA/shuffle.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ // RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{gpu-binary-annotation=nvvm.cubin})' \ -// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ +// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/two-modules.mlir b/mlir/test/Integration/GPU/CUDA/two-modules.mlir --- a/mlir/test/Integration/GPU/CUDA/two-modules.mlir +++ b/mlir/test/Integration/GPU/CUDA/two-modules.mlir @@ -1,6 +1,8 @@ -// RUN: mlir-cuda-runner %s \ -// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ -// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ +// RUN: mlir-opt %s \ +// RUN: -gpu-kernel-outlining \ +// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ +// RUN: -gpu-to-llvm \ +// RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ diff --git a/mlir/tools/CMakeLists.txt b/mlir/tools/CMakeLists.txt --- a/mlir/tools/CMakeLists.txt +++ b/mlir/tools/CMakeLists.txt @@ -1,4 +1,3 @@ -add_subdirectory(mlir-cuda-runner) add_subdirectory(mlir-cpu-runner) add_subdirectory(mlir-opt) add_subdirectory(mlir-reduce) diff --git a/mlir/tools/mlir-cuda-runner/CMakeLists.txt b/mlir/tools/mlir-cuda-runner/CMakeLists.txt deleted file mode 100644 --- a/mlir/tools/mlir-cuda-runner/CMakeLists.txt +++ /dev/null @@ -1,75 +0,0 @@ -set(LLVM_OPTIONAL_SOURCES - mlir-cuda-runner.cpp - ) -set(LLVM_LINK_COMPONENTS - Core - Support -) - -if(MLIR_CUDA_RUNNER_ENABLED) - if (NOT ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)) - message(SEND_ERROR - "Building the mlir cuda runner requires the NVPTX backend") - endif() - - # Configure CUDA runner support. Using check_language first allows us to give - # a custom error message. - include(CheckLanguage) - check_language(CUDA) - if (CMAKE_CUDA_COMPILER) - enable_language(CUDA) - else() - message(SEND_ERROR - "Building the mlir cuda runner requires a working CUDA install") - endif() - - # We need the libcuda.so library. - find_library(CUDA_RUNTIME_LIBRARY cuda) - - get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS) - set(LIBS - ${conversion_libs} - MLIRJitRunner - MLIRAnalysis - MLIRAsync - MLIREDSC - MLIRExecutionEngine - MLIRGPU - MLIRIR - MLIRLLVMIR - MLIRLLVMToLLVMIRTranslation - MLIRNVVMIR - MLIRParser - MLIRStandard - MLIRSupport - MLIRTargetLLVMIRExport - MLIRNVVMToLLVMIRTranslation - MLIRTransforms - MLIRTranslation - ${CUDA_RUNTIME_LIBRARY} - ) - - # Manually expand the target library, since our MLIR libraries - # aren't plugged into the LLVM dependency tracking. If we don't - # do this then we can't insert the CodeGen library after ourselves - llvm_expand_pseudo_components(TARGET_LIBS AllTargetsCodeGens) - # Prepend LLVM in front of every target, this is how the library - # are named with CMake - SET(targets_to_link) - FOREACH(t ${TARGET_LIBS}) - LIST(APPEND targets_to_link "LLVM${t}") - ENDFOREACH(t) - - add_llvm_tool(mlir-cuda-runner - mlir-cuda-runner.cpp - - DEPENDS - mlir_cuda_runtime - ) - target_include_directories(mlir-cuda-runner - PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} - ) - llvm_update_compile_flags(mlir-cuda-runner) - target_link_libraries(mlir-cuda-runner PRIVATE ${LIBS} ${targets_to_link}) - -endif() diff --git a/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp b/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp deleted file mode 100644 --- a/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp +++ /dev/null @@ -1,191 +0,0 @@ -//===- mlir-cuda-runner.cpp - MLIR CUDA Execution Driver-------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This is a command line utility that executes an MLIR file on the GPU by -// translating MLIR to NVVM/LVVM IR before JIT-compiling and executing the -// latter. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/STLExtras.h" - -#include "mlir/Conversion/GPUCommon/GPUCommonPass.h" -#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" -#include "mlir/Conversion/Passes.h" -#include "mlir/Dialect/Async/IR/Async.h" -#include "mlir/Dialect/Async/Passes.h" -#include "mlir/Dialect/GPU/GPUDialect.h" -#include "mlir/Dialect/GPU/Passes.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" -#include "mlir/Dialect/LLVMIR/NVVMDialect.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/ExecutionEngine/JitRunner.h" -#include "mlir/ExecutionEngine/OptUtils.h" -#include "mlir/IR/BuiltinOps.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Pass/PassManager.h" -#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Export.h" -#include "mlir/Transforms/DialectConversion.h" -#include "mlir/Transforms/Passes.h" - -#include "llvm/Support/InitLLVM.h" -#include "llvm/Support/TargetSelect.h" - -#include "cuda.h" - -using namespace mlir; - -static void emitCudaError(const llvm::Twine &expr, const char *buffer, - CUresult result, Location loc) { - const char *error; - cuGetErrorString(result, &error); - emitError(loc, expr.concat(" failed with error code ") - .concat(llvm::Twine{error}) - .concat("[") - .concat(buffer) - .concat("]")); -} - -#define RETURN_ON_CUDA_ERROR(expr) \ - do { \ - if (auto status = (expr)) { \ - emitCudaError(#expr, jitErrorBuffer, status, loc); \ - return {}; \ - } \ - } while (false) - -OwnedBlob compilePtxToCubin(const std::string ptx, Location loc, - StringRef name) { - char jitErrorBuffer[4096] = {0}; - - // Initialize CUDA once in a thread-safe manner. - static CUresult cuInitResult = [] { return cuInit(/*flags=*/0); }(); - RETURN_ON_CUDA_ERROR(cuInitResult); - - // Linking requires a device context. - CUdevice device; - RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0)); - CUcontext context; - RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device)); - CUlinkState linkState; - - CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER, - CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}; - void *jitOptionsVals[] = {jitErrorBuffer, - reinterpret_cast(sizeof(jitErrorBuffer))}; - - RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */ - jitOptions, /* jit options */ - jitOptionsVals, /* jit option values */ - &linkState)); - - RETURN_ON_CUDA_ERROR( - cuLinkAddData(linkState, CUjitInputType::CU_JIT_INPUT_PTX, - const_cast(static_cast(ptx.c_str())), - ptx.length(), name.str().data(), /* kernel name */ - 0, /* number of jit options */ - nullptr, /* jit options */ - nullptr /* jit option values */ - )); - - void *cubinData; - size_t cubinSize; - RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize)); - - char *cubinAsChar = static_cast(cubinData); - OwnedBlob result = - std::make_unique>(cubinAsChar, cubinAsChar + cubinSize); - - // This will also destroy the cubin data. - RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState)); - RETURN_ON_CUDA_ERROR(cuCtxDestroy(context)); - - return result; -} - -struct GpuToCubinPipelineOptions - : public mlir::PassPipelineOptions { - Option gpuBinaryAnnotation{ - *this, "gpu-binary-annotation", - llvm::cl::desc("Annotation attribute string for GPU binary"), - llvm::cl::init(gpu::getDefaultGpuBinaryAnnotation())}; -}; - -// Register cuda-runner specific passes. -static void registerCudaRunnerPasses() { - PassPipelineRegistration registerGpuToCubin( - "gpu-to-cubin", "Generate CUBIN from gpu.launch regions", - [&](OpPassManager &pm, const GpuToCubinPipelineOptions &options) { - pm.addPass(createGpuKernelOutliningPass()); - auto &kernelPm = pm.nest(); - kernelPm.addPass(createStripDebugInfoPass()); - kernelPm.addPass(createLowerGpuOpsToNVVMOpsPass()); - kernelPm.addPass(createConvertGPUKernelToBlobPass( - translateModuleToLLVMIR, compilePtxToCubin, "nvptx64-nvidia-cuda", - "sm_35", "+ptx60", options.gpuBinaryAnnotation)); - }); - registerGPUPasses(); - registerGpuToLLVMConversionPassPass(); - registerAsyncPasses(); - registerConvertAsyncToLLVMPass(); - registerConvertStandardToLLVMPass(); -} - -static LogicalResult runMLIRPasses(ModuleOp module, - PassPipelineCLParser &passPipeline) { - PassManager pm(module.getContext(), PassManager::Nesting::Implicit); - applyPassManagerCLOptions(pm); - - auto errorHandler = [&](const Twine &msg) { - emitError(UnknownLoc::get(module.getContext())) << msg; - return failure(); - }; - - // Build the provided pipeline. - if (failed(passPipeline.addToPipeline(pm, errorHandler))) - return failure(); - - // Run the pipeline. - return pm.run(module); -} - -int main(int argc, char **argv) { - llvm::InitLLVM y(argc, argv); - llvm::InitializeNativeTarget(); - llvm::InitializeNativeTargetAsmPrinter(); - - // Initialize LLVM NVPTX backend. - LLVMInitializeNVPTXTarget(); - LLVMInitializeNVPTXTargetInfo(); - LLVMInitializeNVPTXTargetMC(); - LLVMInitializeNVPTXAsmPrinter(); - - mlir::initializeLLVMPasses(); - - registerCudaRunnerPasses(); - PassPipelineCLParser passPipeline("", "Compiler passes to run"); - registerPassManagerCLOptions(); - - auto mlirTransformer = [&](ModuleOp module) { - return runMLIRPasses(module, passPipeline); - }; - - mlir::JitRunnerConfig jitRunnerConfig; - jitRunnerConfig.mlirTransformer = mlirTransformer; - - mlir::DialectRegistry registry; - registry.insert(); - mlir::registerLLVMDialectTranslation(registry); - mlir::registerNVVMDialectTranslation(registry); - - return mlir::JitRunnerMain(argc, argv, registry, jitRunnerConfig); -}