diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -305,13 +305,26 @@ # OpenMP offloading requires at least sm_35 because we use shuffle instructions # to generate efficient code for reductions and the atomicMax instruction on # 64-bit integers in the implementation of conditional lastprivate. -set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING - "Default architecture for OpenMP offloading to Nvidia GPUs.") -string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}") -if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 35) - message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35") +set(CUDA_ARCH_FLAGS "sm_35") + +# Try to find the highest Nvidia GPU architecture the system supports +if (NOT DEFINED CLANG_OPENMP_NVPTX_DEFAULT_ARCH) + find_package(CUDA QUIET) + if (CUDA_FOUND) + cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS) + endif() +else() + set(CUDA_ARCH_FLAGS ${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}) +endif() + +string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH_MATCH ${CUDA_ARCH_FLAGS}) +if (NOT DEFINED CUDA_ARCH_MATCH OR "${CMAKE_MATCH_1}" LESS 35) set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE) + message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35") +else() + set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH ${CUDA_ARCH_MATCH} CACHE STRING + "Default architecture for OpenMP offloading to Nvidia GPUs.") endif() set(CLANG_SYSTEMZ_DEFAULT_ARCH "z10" CACHE STRING "SystemZ Default Arch") diff --git a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake --- a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake +++ b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake @@ -117,6 +117,18 @@ endif() find_package(CUDA QUIET) +# Try to get the highest Nvidia GPU architecture the system supports +if (CUDA_FOUND) + cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS) + string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH_MATCH_OUTPUT ${CUDA_ARCH_FLAGS}) + if (NOT DEFINED CUDA_ARCH_MATCH_OUTPUT OR "${CMAKE_MATCH_1}" LESS 35) + libomptarget_warning_say("Setting Nvidia GPU architecture support for OpenMP target runtime library to sm_35 by default") + set(LIBOMPTARGET_DEP_CUDA_ARCH "35") + else() + set(LIBOMPTARGET_DEP_CUDA_ARCH "${CMAKE_MATCH_1}") + endif() +endif() + set(LIBOMPTARGET_DEP_CUDA_FOUND ${CUDA_FOUND}) set(LIBOMPTARGET_DEP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS}) diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -67,9 +67,14 @@ set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu) - # Get the compute capability the user requested or use SM_35 by default. - # SM_35 is what clang uses by default. - set(default_capabilities 35) + # Build library support for the highest compute capability the system supports + # and always build support for sm_35 by default + if (${LIBOMPTARGET_DEP_CUDA_ARCH} EQUAL 35) + set(default_capabilities 35) + else() + set(default_capabilities "35,${LIBOMPTARGET_DEP_CUDA_ARCH}") + endif() + if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY) set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY}) libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES")