diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt --- a/mlir/lib/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/CMakeLists.txt @@ -4,6 +4,7 @@ set(LLVM_OPTIONAL_SOURCES AsyncRuntime.cpp CRunnerUtils.cpp + CudaRuntimeWrappers.cpp SparseUtils.cpp ExecutionEngine.cpp RunnerUtils.cpp @@ -102,3 +103,34 @@ set_property(TARGET mlir_async_runtime PROPERTY CXX_VISIBILITY_PRESET hidden) target_compile_definitions(mlir_async_runtime PRIVATE mlir_async_runtime_EXPORTS) +if(MLIR_CUDA_RUNNER_ENABLED) + # Configure CUDA support. Using check_language first allows us to give a + # custom error message. + include(CheckLanguage) + check_language(CUDA) + if (CMAKE_CUDA_COMPILER) + enable_language(CUDA) + else() + message(SEND_ERROR + "Building the mlir cuda runner requires a working CUDA install") + endif() + + # We need the libcuda.so library. + find_library(CUDA_RUNTIME_LIBRARY cuda) + + add_mlir_library(mlir_cuda_runtime + SHARED + CudaRuntimeWrappers.cpp + + EXCLUDE_FROM_LIBMLIR + ) + set_property(TARGET mlir_cuda_runtime PROPERTY CXX_STANDARD 11) + target_include_directories(mlir_cuda_runtime + PRIVATE + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} + ) + target_link_libraries(mlir_cuda_runtime + PRIVATE + ${CUDA_RUNTIME_LIBRARY} + ) +endif() diff --git a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp rename from mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp rename to mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp --- a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp +++ b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp @@ -1,4 +1,4 @@ -//===- cuda-runtime-wrappers.cpp - MLIR CUDA runner wrapper library -------===// +//===- CudaRuntimeWrappers.cpp - MLIR CUDA API wrapper library ------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt --- a/mlir/test/CMakeLists.txt +++ b/mlir/test/CMakeLists.txt @@ -21,8 +21,7 @@ set(MLIR_RUNNER_UTILS_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) # Passed to lit.site.cfg.py.in to set up the path where to find the libraries -# for the mlir cuda / rocm / spirv / vulkan runner tests. -set(MLIR_CUDA_WRAPPER_LIBRARY_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) +# for the mlir rocm / spirv / vulkan runner tests. set(MLIR_ROCM_WRAPPER_LIBRARY_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) set(MLIR_SPIRV_WRAPPER_LIBRARY_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) set(MLIR_VULKAN_WRAPPER_LIBRARY_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) @@ -70,6 +69,10 @@ mlir_async_runtime ) +if(MLIR_CUDA_RUNNER_ENABLED) + list(APPEND MLIR_TEST_DEPENDS mlir_cuda_runtime) +endif() + list(APPEND MLIR_TEST_DEPENDS MLIRUnitTests) if(LLVM_BUILD_EXAMPLES) diff --git a/mlir/test/mlir-cuda-runner/all-reduce-and.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir rename from mlir/test/mlir-cuda-runner/all-reduce-and.mlir rename to mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir --- a/mlir/test/mlir-cuda-runner/all-reduce-and.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir @@ -1,7 +1,7 @@ // RUN: mlir-cuda-runner %s \ // RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ // RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ -// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \ +// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cuda-runner/all-reduce-max.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir rename from mlir/test/mlir-cuda-runner/all-reduce-max.mlir rename to mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir --- a/mlir/test/mlir-cuda-runner/all-reduce-max.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir @@ -1,7 +1,7 @@ // RUN: mlir-cuda-runner %s \ // RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ // RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ -// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \ +// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cuda-runner/all-reduce-min.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir rename from mlir/test/mlir-cuda-runner/all-reduce-min.mlir rename to mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir --- a/mlir/test/mlir-cuda-runner/all-reduce-min.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir @@ -1,7 +1,7 @@ // RUN: mlir-cuda-runner %s \ // RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ // RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ -// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \ +// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cuda-runner/all-reduce-op.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir rename from mlir/test/mlir-cuda-runner/all-reduce-op.mlir rename to mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir --- a/mlir/test/mlir-cuda-runner/all-reduce-op.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir @@ -1,7 +1,7 @@ // RUN: mlir-cuda-runner %s \ // RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ // RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ -// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \ +// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cuda-runner/all-reduce-or.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir rename from mlir/test/mlir-cuda-runner/all-reduce-or.mlir rename to mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir --- a/mlir/test/mlir-cuda-runner/all-reduce-or.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir @@ -1,7 +1,7 @@ // RUN: mlir-cuda-runner %s \ // RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ // RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ -// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \ +// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cuda-runner/all-reduce-region.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir rename from mlir/test/mlir-cuda-runner/all-reduce-region.mlir rename to mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir --- a/mlir/test/mlir-cuda-runner/all-reduce-region.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir @@ -1,7 +1,7 @@ // RUN: mlir-cuda-runner %s \ // RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ // RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ -// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \ +// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cuda-runner/all-reduce-xor.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir rename from mlir/test/mlir-cuda-runner/all-reduce-xor.mlir rename to mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir --- a/mlir/test/mlir-cuda-runner/all-reduce-xor.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir @@ -1,7 +1,7 @@ // RUN: mlir-cuda-runner %s \ // RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ // RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ -// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \ +// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cuda-runner/async.mlir b/mlir/test/Integration/GPU/CUDA/async.mlir rename from mlir/test/mlir-cuda-runner/async.mlir rename to mlir/test/Integration/GPU/CUDA/async.mlir --- a/mlir/test/mlir-cuda-runner/async.mlir +++ b/mlir/test/Integration/GPU/CUDA/async.mlir @@ -3,7 +3,7 @@ // RUN: -gpu-async-region -async-ref-counting \ // RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ // RUN: -async-to-async-runtime -convert-async-to-llvm -convert-std-to-llvm \ -// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \ +// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_async_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void -O0 \ diff --git a/mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir rename from mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir rename to mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir --- a/mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir +++ b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir @@ -1,7 +1,7 @@ // RUN: mlir-cuda-runner %s \ // RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ // RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ -// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \ +// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cuda-runner/lit.local.cfg b/mlir/test/Integration/GPU/CUDA/lit.local.cfg rename from mlir/test/mlir-cuda-runner/lit.local.cfg rename to mlir/test/Integration/GPU/CUDA/lit.local.cfg diff --git a/mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir rename from mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir rename to mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir --- a/mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir +++ b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir @@ -1,7 +1,7 @@ // RUN: mlir-cuda-runner %s \ // RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ // RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ -// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \ +// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cuda-runner/shuffle.mlir b/mlir/test/Integration/GPU/CUDA/shuffle.mlir rename from mlir/test/mlir-cuda-runner/shuffle.mlir rename to mlir/test/Integration/GPU/CUDA/shuffle.mlir --- a/mlir/test/mlir-cuda-runner/shuffle.mlir +++ b/mlir/test/Integration/GPU/CUDA/shuffle.mlir @@ -1,7 +1,7 @@ // RUN: mlir-cuda-runner %s \ // RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ // RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ -// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \ +// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cuda-runner/two-modules.mlir b/mlir/test/Integration/GPU/CUDA/two-modules.mlir rename from mlir/test/mlir-cuda-runner/two-modules.mlir rename to mlir/test/Integration/GPU/CUDA/two-modules.mlir --- a/mlir/test/mlir-cuda-runner/two-modules.mlir +++ b/mlir/test/Integration/GPU/CUDA/two-modules.mlir @@ -1,7 +1,7 @@ // RUN: mlir-cuda-runner %s \ // RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \ // RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \ -// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \ +// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \ // RUN: --entry-point-result=void \ // RUN: | FileCheck %s diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py --- a/mlir/test/lit.cfg.py +++ b/mlir/test/lit.cfg.py @@ -75,7 +75,6 @@ ToolSubst('toy-ch3', unresolved='ignore'), ToolSubst('toy-ch4', unresolved='ignore'), ToolSubst('toy-ch5', unresolved='ignore'), - ToolSubst('%cuda_wrapper_library_dir', config.cuda_wrapper_library_dir, unresolved='ignore'), ToolSubst('%linalg_test_lib_dir', config.linalg_test_lib_dir, unresolved='ignore'), ToolSubst('%mlir_runner_utils_dir', config.mlir_runner_utils_dir, unresolved='ignore'), ToolSubst('%rocm_wrapper_library_dir', config.rocm_wrapper_library_dir, unresolved='ignore'), diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in --- a/mlir/test/lit.site.cfg.py.in +++ b/mlir/test/lit.site.cfg.py.in @@ -36,7 +36,6 @@ config.linalg_test_lib_dir = "@MLIR_DIALECT_LINALG_INTEGRATION_TEST_LIB_DIR@" config.build_examples = @LLVM_BUILD_EXAMPLES@ config.run_cuda_tests = @MLIR_CUDA_CONVERSIONS_ENABLED@ -config.cuda_wrapper_library_dir = "@MLIR_CUDA_WRAPPER_LIBRARY_DIR@" config.enable_cuda_runner = @MLIR_CUDA_RUNNER_ENABLED@ config.run_rocm_tests = @MLIR_ROCM_CONVERSIONS_ENABLED@ config.rocm_wrapper_library_dir = "@MLIR_ROCM_WRAPPER_LIBRARY_DIR@" diff --git a/mlir/tools/mlir-cuda-runner/CMakeLists.txt b/mlir/tools/mlir-cuda-runner/CMakeLists.txt --- a/mlir/tools/mlir-cuda-runner/CMakeLists.txt +++ b/mlir/tools/mlir-cuda-runner/CMakeLists.txt @@ -1,5 +1,4 @@ set(LLVM_OPTIONAL_SOURCES - cuda-runtime-wrappers.cpp mlir-cuda-runner.cpp ) set(LLVM_LINK_COMPONENTS @@ -27,21 +26,6 @@ # We need the libcuda.so library. find_library(CUDA_RUNTIME_LIBRARY cuda) - add_mlir_library(cuda-runtime-wrappers - SHARED - cuda-runtime-wrappers.cpp - - EXCLUDE_FROM_LIBMLIR - ) - target_include_directories(cuda-runtime-wrappers - PRIVATE - ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} - ) - target_link_libraries(cuda-runtime-wrappers - PRIVATE - ${CUDA_RUNTIME_LIBRARY} - ) - get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS) set(LIBS ${conversion_libs} @@ -79,7 +63,7 @@ mlir-cuda-runner.cpp DEPENDS - cuda-runtime-wrappers + mlir_cuda_runtime ) target_include_directories(mlir-cuda-runner PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}