diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -91,7 +91,8 @@ message(SEND_ERROR "Building mlir with cuda support requires the NVPTX backend") endif() - + + find_package(CUDAToolkit) # Configure CUDA language support. Using check_language first allows us to # give a custom error message. include(CheckLanguage) @@ -115,12 +116,12 @@ ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ) - find_library(CUDA_DRIVER_LIBRARY cuda) + #find_library(CUDA_NVPTX_LIBRARY nvptxcompiler_static) target_link_libraries(MLIRGPUTransforms PRIVATE MLIRNVVMToLLVMIRTranslation - ${CUDA_DRIVER_LIBRARY} + CUDA::nvptxcompiler_static ) endif() diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp @@ -20,24 +20,63 @@ #include "llvm/Support/TargetSelect.h" #include +#include using namespace mlir; -static void emitCudaError(const llvm::Twine &expr, const char *buffer, - CUresult result, Location loc) { +static void emitNvptxError(const llvm::Twine &expr, + nvPTXCompilerHandle compiler, + nvPTXCompileResult result, Location loc) { const char *error; - cuGetErrorString(result, &error); + auto GetErrMsg = [](nvPTXCompileResult result) -> const char * { + switch (result) { + case NVPTXCOMPILE_SUCCESS: + return "Success"; + case NVPTXCOMPILE_ERROR_INVALID_COMPILER_HANDLE: + return "Invalid compiler handle"; + case NVPTXCOMPILE_ERROR_INVALID_INPUT: + return "Invalid input"; + case NVPTXCOMPILE_ERROR_COMPILATION_FAILURE: + return "Compilation failure"; + case NVPTXCOMPILE_ERROR_INTERNAL: + return "Internal error"; + case NVPTXCOMPILE_ERROR_OUT_OF_MEMORY: + return "Out of memory"; + case NVPTXCOMPILE_ERROR_COMPILER_INVOCATION_INCOMPLETE: + return "Invocation incomplete"; + case NVPTXCOMPILE_ERROR_UNSUPPORTED_PTX_VERSION: + return "Unsupported PTX version"; + } + }; + size_t errorSize; + auto status = nvPTXCompilerGetErrorLogSize(compiler, &errorSize); + std::string error_log; + if (status == NVPTXCOMPILE_SUCCESS) { + error_log.resize(errorSize); + status = nvPTXCompilerGetErrorLog(compiler, error_log.data()); + if (status != NVPTXCOMPILE_SUCCESS) + error_log = ""; + } emitError(loc, expr.concat(" failed with error code ") - .concat(llvm::Twine{error}) + .concat(llvm::Twine{GetErrMsg(result)}) .concat("[") - .concat(buffer) + .concat(error_log) .concat("]")); } #define RETURN_ON_CUDA_ERROR(expr) \ do { \ if (auto status = (expr)) { \ - emitCudaError(#expr, jitErrorBuffer, status, loc); \ + emitNvptxError(#expr, compiler, status, loc); \ + return {}; \ + } \ + } while (false) + +#define RETURN_ON_NVPTX_ERROR(expr) \ + do { \ + nvPTXCompileResult result = (expr); \ + if (result != NVPTXCOMPILE_SUCCESS) { \ + emitNvptxError(#expr, compiler, result, loc); \ return {}; \ } \ } while (false) @@ -88,46 +127,17 @@ SerializeToCubinPass::serializeISA(const std::string &isa) { Location loc = getOperation().getLoc(); char jitErrorBuffer[4096] = {0}; + nvPTXCompilerHandle compiler; + nvPTXCompilerCreate(&compiler, isa.length(), isa.c_str()); - RETURN_ON_CUDA_ERROR(cuInit(0)); - - // Linking requires a device context. - CUdevice device; - RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0)); - CUcontext context; - RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device)); - CUlinkState linkState; - - CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER, - CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}; - void *jitOptionsVals[] = {jitErrorBuffer, - reinterpret_cast(sizeof(jitErrorBuffer))}; - - RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */ - jitOptions, /* jit options */ - jitOptionsVals, /* jit option values */ - &linkState)); - - auto kernelName = getOperation().getName().str(); - RETURN_ON_CUDA_ERROR(cuLinkAddData( - linkState, CUjitInputType::CU_JIT_INPUT_PTX, - const_cast(static_cast(isa.c_str())), isa.length(), - kernelName.c_str(), 0, /* number of jit options */ - nullptr, /* jit options */ - nullptr /* jit option values */ - )); - - void *cubinData; - size_t cubinSize; - RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize)); + nvPTXCompilerCompile(compiler, 0, nullptr); - char *cubinAsChar = static_cast(cubinData); - auto result = - std::make_unique>(cubinAsChar, cubinAsChar + cubinSize); + size_t cubinSize; + nvPTXCompilerGetCompiledProgramSize(compiler, &cubinSize); - // This will also destroy the cubin data. - RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState)); - RETURN_ON_CUDA_ERROR(cuCtxDestroy(context)); + auto result = std::make_unique>(cubinSize); + nvPTXCompilerGetCompiledProgram(compiler, result->data()); + nvPTXCompilerDestroy(&compiler); return result; } diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt --- a/mlir/lib/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/CMakeLists.txt @@ -169,6 +169,7 @@ target_compile_definitions(mlir_async_runtime PRIVATE mlir_async_runtime_EXPORTS) if(MLIR_ENABLE_CUDA_RUNNER) + find_package(CUDAToolkit) # Configure CUDA support. Using check_language first allows us to give a # custom error message. include(CheckLanguage) @@ -180,9 +181,6 @@ "Building the mlir cuda runner requires a working CUDA install") endif() - # We need the libcuda.so library. - find_library(CUDA_RUNTIME_LIBRARY cuda) - add_mlir_library(mlir_cuda_runtime SHARED CudaRuntimeWrappers.cpp @@ -196,7 +194,7 @@ ) target_link_libraries(mlir_cuda_runtime PRIVATE - ${CUDA_RUNTIME_LIBRARY} + CUDA::cuda_driver ) endif()