Index: README.rst =================================================================== --- README.rst +++ README.rst @@ -280,10 +280,10 @@ compatible with NVCC, this option can be use to pass to NVCC a valid compiler to avoid the error. - **LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY** = ``35`` - CUDA compute capability that should be supported by the NVPTX device RTL. E.g. - for compute capability 6.0, the option "60" should be used. Compute capability - 3.5 is the minimum required. + **LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES** = ``35`` + List of CUDA compute capabilities that should be supported by the NVPTX + device RTL. E.g. for compute capabilities 6.0 and 7.0, the option "60,70" + should be used. Compute capability 3.5 is the minimum required. **LIBOMPTARGET_NVPTX_DEBUG** = ``OFF|ON`` Enable printing of debug messages from the NVPTX device RTL. Index: libomptarget/deviceRTLs/nvptx/CMakeLists.txt =================================================================== --- libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -60,9 +60,18 @@ # Get the compute capability the user requested or use SM_35 by default. # SM_35 is what clang uses by default. - set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY 35 CACHE STRING - "CUDA Compute Capability to be used to compile the NVPTX device RTL.") - set(CUDA_ARCH -arch sm_${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY}) + set(default_capabilities 35) + if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY) + set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY}) + libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES") + endif() + set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${default_capabilities} CACHE STRING + "List of CUDA Compute Capabilities to be used to compile the NVPTX device RTL.") + string(REPLACE "," ";" nvptx_sm_list ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES}) + + foreach(sm ${nvptx_sm_list}) + set(CUDA_ARCH ${CUDA_ARCH} -gencode arch=compute_${sm},code=sm_${sm}) + endforeach() # Activate RTL message dumps if requested by the user. set(LIBOMPTARGET_NVPTX_DEBUG FALSE CACHE BOOL @@ -152,46 +161,47 @@ # Get the compute capability the user requested or use SM_35 by default. set(CUDA_ARCH "") - set(CUDA_ARCH --cuda-gpu-arch=sm_${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY}) + foreach(sm ${nvptx_sm_list}) + set(CUDA_ARCH --cuda-gpu-arch=sm_${sm}) - # Compile cuda files to bitcode. - set(bc_files "") - foreach(src ${cuda_src_files}) - get_filename_component(infile ${src} ABSOLUTE) - get_filename_component(outfile ${src} NAME) - - add_custom_command(OUTPUT ${outfile}.bc - COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${CUDA_FLAGS} ${CUDA_ARCH} ${CUDA_INCLUDES} - -c ${infile} -o ${outfile}.bc - DEPENDS ${infile} - IMPLICIT_DEPENDS CXX ${infile} - COMMENT "Building LLVM bitcode ${outfile}.bc" - VERBATIM + # Compile cuda files to bitcode. + set(bc_files "") + foreach(src ${cuda_src_files}) + get_filename_component(infile ${src} ABSOLUTE) + get_filename_component(outfile ${src} NAME) + + add_custom_command(OUTPUT ${outfile}-sm_${sm}.bc + COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${CUDA_FLAGS} ${CUDA_ARCH} ${CUDA_INCLUDES} + -c ${infile} -o ${outfile}-sm_${sm}.bc + DEPENDS ${infile} + IMPLICIT_DEPENDS CXX ${infile} + COMMENT "Building LLVM bitcode ${outfile}-sm_${sm}.bc" + VERBATIM + ) + set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}-sm_${sm}.bc) + + list(APPEND bc_files ${outfile}-sm_${sm}.bc) + endforeach() + + # Link to a bitcode library. + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc + COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER} + -o ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc ${bc_files} + DEPENDS ${bc_files} + COMMENT "Linking LLVM bitcode libomptarget-nvptx-sm_${sm}.bc" ) - set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}.bc) - - list(APPEND bc_files ${outfile}.bc) - endforeach() + set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES libomptarget-nvptx-sm_${sm}.bc) - # Link to a bitcode library. - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc - COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER} - -o ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc ${bc_files} - DEPENDS ${bc_files} - COMMENT "Linking LLVM bitcode libomptarget-nvptx.bc" - ) - set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES libomptarget-nvptx.bc) + add_custom_target(omptarget-nvptx-${sm}-bc ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc) - add_custom_target(omptarget-nvptx-bc ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc) - - # Copy library to destination. - add_custom_command(TARGET omptarget-nvptx-bc POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc - $) - - # Install device RTL under the lib destination folder. - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx.bc DESTINATION "lib") + # Copy library to destination. + add_custom_command(TARGET omptarget-nvptx-${sm}-bc POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc + $) + # Install device RTL under the lib destination folder. + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libomptarget-nvptx-sm_${sm}.bc DESTINATION "lib") + endforeach() endif() endif()