diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -305,13 +305,26 @@ # OpenMP offloading requires at least sm_35 because we use shuffle instructions # to generate efficient code for reductions and the atomicMax instruction on # 64-bit integers in the implementation of conditional lastprivate. -set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING - "Default architecture for OpenMP offloading to Nvidia GPUs.") -string(REGEX MATCH "^sm_([0-9]+)$" MATCHED_ARCH "${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}") -if (NOT DEFINED MATCHED_ARCH OR "${CMAKE_MATCH_1}" LESS 35) - message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35") +set(CUDA_ARCH_FLAGS "sm_35") + +# Try to find the highest architecture the host supports +if (NOT DEFINED CLANG_OPENMP_NVPTX_DEFAULT_ARCH) + find_package(CUDA QUIET) + if (CUDA_FOUND) + cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS) + endif() +else() + set(CUDA_ARCH_FLAGS ${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}) +endif() + +string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH ${CUDA_ARCH_FLAGS}) +if (NOT DEFINED CUDA_ARCH OR "${CMAKE_MATCH_1}" LESS 35) set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE) + message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35") +else() + set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH ${CUDA_ARCH} CACHE STRING + "Default architecture for OpenMP offloading to Nvidia GPUs.") endif() set(CLANG_SYSTEMZ_DEFAULT_ARCH "z10" CACHE STRING "SystemZ Default Arch") diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -68,13 +68,26 @@ set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu) # Get the compute capability the user requested or use SM_35 by default. - # SM_35 is what clang uses by default. - set(default_capabilities 35) + set(compute_capabilities 35) + if (NOT DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES) + find_package(CUDA QUIET) + if (CUDA_FOUND) + cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS) + string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH ${CUDA_ARCH_FLAGS}) + if (NOT DEFINED CUDA_ARCH OR "${CMAKE_MATCH_1}" LESS 35) + message(WARNING "Setting default architecture for OpenMP target library to sm_35") + else() + list(APPEND compute_capabilities ${CMAKE_MATCH_1}) + endif() + endif() + endif() + + if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY) set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY}) libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES") endif() - set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${default_capabilities} CACHE STRING + set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${compute_capabilities} CACHE STRING "List of CUDA Compute Capabilities to be used to compile the NVPTX device RTL.") string(REPLACE "," ";" nvptx_sm_list ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES})