diff --git a/openmp/README.rst b/openmp/README.rst --- a/openmp/README.rst +++ b/openmp/README.rst @@ -281,7 +281,7 @@ **LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES** = ``35`` List of CUDA compute capabilities that should be supported by the NVPTX - device RTL. E.g. for compute capabilities 6.0 and 7.0, the option "60,70" + device RTL. E.g. for compute capabilities 6.0 and 7.0, the option "60;70" should be used. Compute capability 3.5 is the minimum required. **LIBOMPTARGET_NVPTX_DEBUG** = ``OFF|ON`` diff --git a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake --- a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake +++ b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake @@ -12,7 +12,7 @@ # components of libomptarget. These are the dependencies we have: # # libelf : required by some targets to handle the ELF files at runtime. -# libffi : required to launch target kernels given function and argument +# libffi : required to launch target kernels given function and argument # pointers. # CUDA : required to control offloading to NVIDIA GPUs. # VEOS : required to control offloading to NEC Aurora. @@ -47,18 +47,18 @@ /sw/lib ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) - + set(LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIRS ${LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIR}) find_package_handle_standard_args( - LIBOMPTARGET_DEP_LIBELF + LIBOMPTARGET_DEP_LIBELF DEFAULT_MSG LIBOMPTARGET_DEP_LIBELF_LIBRARIES LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIRS) mark_as_advanced( - LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIRS + LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIRS LIBOMPTARGET_DEP_LIBELF_LIBRARIES) - + ################################################################################ # Looking for libffi... ################################################################################ @@ -100,15 +100,15 @@ set(LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIRS ${LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIR}) find_package_handle_standard_args( - LIBOMPTARGET_DEP_LIBFFI + LIBOMPTARGET_DEP_LIBFFI DEFAULT_MSG LIBOMPTARGET_DEP_LIBFFI_LIBRARIES LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIRS) mark_as_advanced( - LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIRS + LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIRS LIBOMPTARGET_DEP_LIBFFI_LIBRARIES) - + ################################################################################ # Looking for CUDA... ################################################################################ @@ -118,7 +118,9 @@ find_package(CUDA QUIET) # Try to get the highest Nvidia GPU architecture the system supports -if (CUDA_FOUND) +set(LIBOMPTARGET_NVPTX_AUTODETECT_COMPUTE_CAPABILITY TRUE CACHE BOOL + "Auto detect CUDA Compute Capability if CUDA is detected.") +if (CUDA_FOUND AND LIBOMPTARGET_NVPTX_AUTODETECT_COMPUTE_CAPABILITY) cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS) string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH_MATCH_OUTPUT ${CUDA_ARCH_FLAGS}) if (NOT DEFINED CUDA_ARCH_MATCH_OUTPUT OR "${CMAKE_MATCH_1}" LESS 35) @@ -133,7 +135,7 @@ set(LIBOMPTARGET_DEP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS}) mark_as_advanced( - LIBOMPTARGET_DEP_CUDA_FOUND + LIBOMPTARGET_DEP_CUDA_FOUND LIBOMPTARGET_DEP_CUDA_INCLUDE_DIRS) ################################################################################ diff --git a/openmp/libomptarget/cmake/Modules/LibomptargetNVPTXBitcodeLibrary.cmake b/openmp/libomptarget/cmake/Modules/LibomptargetNVPTXBitcodeLibrary.cmake deleted file mode 100644 --- a/openmp/libomptarget/cmake/Modules/LibomptargetNVPTXBitcodeLibrary.cmake +++ /dev/null @@ -1,111 +0,0 @@ -# -#//===----------------------------------------------------------------------===// -#// -#// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -#// See https://llvm.org/LICENSE.txt for license information. -#// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#// -#//===----------------------------------------------------------------------===// -# - -# We use the compiler and linker provided by the user, attempt to use the one -# used to build libomptarget or just fail. -set(LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED FALSE) - -if (NOT LIBOMPTARGET_NVPTX_CUDA_COMPILER STREQUAL "") - set(LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER ${LIBOMPTARGET_NVPTX_CUDA_COMPILER}) -elseif(${CMAKE_C_COMPILER_ID} STREQUAL "Clang") - set(LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER ${CMAKE_C_COMPILER}) -else() - return() -endif() - -# Get compiler directory to try to locate a suitable linker. -get_filename_component(compiler_dir ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} DIRECTORY) -set(llvm_link "${compiler_dir}/llvm-link") - -if (NOT LIBOMPTARGET_NVPTX_BC_LINKER STREQUAL "") - set(LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER ${LIBOMPTARGET_NVPTX_BC_LINKER}) -elseif (EXISTS "${llvm_link}") - # Use llvm-link from the compiler directory. - set(LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER "${llvm_link}") -else() - return() -endif() - -function(try_compile_bitcode output source) - set(srcfile ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/src.cu) - file(WRITE ${srcfile} "${source}\n") - set(bcfile ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/out.bc) - - # The remaining arguments are the flags to be tested. - # FIXME: Don't hardcode GPU version. This is currently required because - # Clang refuses to compile its default of sm_20 with CUDA 9. - execute_process( - COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${ARGN} - --cuda-gpu-arch=sm_35 -c ${srcfile} -o ${bcfile} - RESULT_VARIABLE result - OUTPUT_QUIET ERROR_QUIET) - if (result EQUAL 0) - set(${output} TRUE PARENT_SCOPE) - else() - set(${output} FALSE PARENT_SCOPE) - endif() -endfunction() - -# Save for which compiler we are going to do the following checks so that we -# can discard cached values if the user specifies a different value. -set(discard_cached FALSE) -if (DEFINED LIBOMPTARGET_NVPTX_CHECKED_CUDA_COMPILER AND - NOT("${LIBOMPTARGET_NVPTX_CHECKED_CUDA_COMPILER}" STREQUAL "${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER}")) - set(discard_cached TRUE) -endif() -set(LIBOMPTARGET_NVPTX_CHECKED_CUDA_COMPILER "${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER}" CACHE INTERNAL "" FORCE) - -function(check_bitcode_compilation output source) - if (${discard_cached} OR NOT DEFINED ${output}) - message(STATUS "Performing Test ${output}") - # Forward additional arguments which contain the flags. - try_compile_bitcode(result "${source}" ${ARGN}) - set(${output} ${result} CACHE INTERNAL "" FORCE) - if(${result}) - message(STATUS "Performing Test ${output} - Success") - else() - message(STATUS "Performing Test ${output} - Failed") - endif() - endif() -endfunction() - -# These flags are required to emit LLVM Bitcode. We check them together because -# if any of them are not supported, there is no point in finding out which are. -set(compiler_flags_required -emit-llvm -O1 --cuda-device-only -std=c++14 --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}) -set(compiler_flags_required_src "extern \"C\" __device__ int thread() { return threadIdx.x; }") -check_bitcode_compilation(LIBOMPTARGET_NVPTX_CUDA_COMPILER_SUPPORTS_FLAGS_REQUIRED "${compiler_flags_required_src}" ${compiler_flags_required}) - -# It makes no sense to continue given that the compiler doesn't support -# emitting basic LLVM Bitcode -if (NOT LIBOMPTARGET_NVPTX_CUDA_COMPILER_SUPPORTS_FLAGS_REQUIRED) - return() -endif() - -set(LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER_FLAGS ${compiler_flags_required}) - -# Declaring external shared device variables might need an additional flag -# since Clang 7.0 and was entirely unsupported since version 4.0. -set(extern_device_shared_src "extern __device__ __shared__ int test;") - -check_bitcode_compilation(LIBOMPTARGET_NVPTX_CUDA_COMPILER_SUPPORTS_EXTERN_SHARED "${extern_device_shared_src}" ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER_FLAGS}) -if (NOT LIBOMPTARGET_NVPTX_CUDA_COMPILER_SUPPORTS_EXTERN_SHARED) - set(compiler_flag_fcuda_rdc -fcuda-rdc) - set(compiler_flag_fcuda_rdc_full ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER_FLAGS} ${compiler_flag_fcuda_rdc}) - check_bitcode_compilation(LIBOMPTARGET_NVPTX_CUDA_COMPILER_SUPPORTS_FCUDA_RDC "${extern_device_shared_src}" ${compiler_flag_fcuda_rdc_full}) - - if (NOT LIBOMPTARGET_NVPTX_CUDA_COMPILER_SUPPORTS_FCUDA_RDC) - return() - endif() - - set(LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER_FLAGS "${compiler_flag_fcuda_rdc_full}") -endif() - -# We can compile LLVM Bitcode from CUDA source code! -set(LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED TRUE) diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -10,6 +10,37 @@ # ##===----------------------------------------------------------------------===## +# Check if we can create an LLVM bitcode implementation of the runtime library +# that could be inlined in the user application. For that we need to find +# a Clang compiler capable of compiling our CUDA files to LLVM bitcode and +# an LLVM linker. +set(LIBOMPTARGET_NVPTX_CUDA_COMPILER "" CACHE STRING + "Location of a CUDA compiler capable of emitting LLVM bitcode.") +set(LIBOMPTARGET_NVPTX_BC_LINKER "" CACHE STRING + "Location of a linker capable of linking LLVM bitcode objects.") + +if (NOT LIBOMPTARGET_NVPTX_CUDA_COMPILER STREQUAL "") + set(cuda_compiler ${LIBOMPTARGET_NVPTX_CUDA_COMPILER}) +elseif(${CMAKE_C_COMPILER_ID} STREQUAL "Clang") + set(cuda_compiler ${CMAKE_C_COMPILER}) +else() + libomptarget_say("Not building NVPTX deviceRTL: clang not found") + return() +endif() + +# Get compiler directory to try to locate a suitable linker. +get_filename_component(compiler_dir ${cuda_compiler} DIRECTORY) +set(llvm_link "${compiler_dir}/llvm-link") + +if (NOT LIBOMPTARGET_NVPTX_BC_LINKER STREQUAL "") + set(bc_linker ${LIBOMPTARGET_NVPTX_BC_LINKER}) +elseif (EXISTS ${llvm_link}) + set(bc_linker ${llvm_link}) +else() + libomptarget_say("Not building NVPTX deviceRTL: llvm-link not found") + return() +endif() + # TODO: This part needs to be refined when libomptarget is going to support # Windows! # TODO: This part can also be removed if we can change the clang driver to make @@ -33,156 +64,147 @@ set(devicertl_nvptx_directory ${devicertl_base_directory}/nvptx) -if(LIBOMPTARGET_DEP_CUDA_FOUND) - # Build library support for the highest compute capability the system supports - # and always build support for sm_35 by default - if (${LIBOMPTARGET_DEP_CUDA_ARCH} EQUAL 35) - set(default_capabilities 35) - else() - set(default_capabilities "35,${LIBOMPTARGET_DEP_CUDA_ARCH}") - endif() +if (DEFINED LIBOMPTARGET_DEP_CUDA_ARCH) + set(default_capabilities ${LIBOMPTARGET_DEP_CUDA_ARCH}) +else() + set(default_capabilities 35 37 50 52 53 60 61 62 70 72 75 80) +endif() - if (DEFINED LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY) - set(default_capabilities ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY}) - libomptarget_warning_say("LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITY is deprecated, please use LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES") - endif() - set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${default_capabilities} CACHE STRING - "List of CUDA Compute Capabilities to be used to compile the NVPTX device RTL.") - string(REPLACE "," ";" nvptx_sm_list ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES}) +set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${default_capabilities} CACHE STRING + "List of CUDA Compute Capabilities to be used to compile the NVPTX device RTL.") - foreach(sm ${nvptx_sm_list}) - set(CUDA_ARCH ${CUDA_ARCH} -gencode arch=compute_${sm},code=sm_${sm}) - endforeach() +set(nvptx_sm_list ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES}) + +# If user set LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES to empty, we disable the +# build. +if (NOT nvptx_sm_list) + libomptarget_say("Not building CUDA offloading device RTL: empty compute capability list") + return() +endif() - # Override default MAX_SM in src/target_impl.h if requested - if (DEFINED LIBOMPTARGET_NVPTX_MAX_SM) - set(MAX_SM_DEFINITION "-DMAX_SM=${LIBOMPTARGET_NVPTX_MAX_SM}") +# Check all SM values +foreach(sm ${nvptx_sm_list}) + if (NOT ${sm} IN_LIST default_capabilities) + message(FATAL_ERROR "LIBOMPTARGET-NVPTX: compute capability ${sm} is not supported. Supported values: ${default_capabilities}") endif() +endforeach() - # Activate RTL message dumps if requested by the user. - set(LIBOMPTARGET_NVPTX_DEBUG FALSE CACHE BOOL - "Activate NVPTX device RTL debug messages.") - - # Check if we can create an LLVM bitcode implementation of the runtime library - # that could be inlined in the user application. For that we need to find - # a Clang compiler capable of compiling our CUDA files to LLVM bitcode and - # an LLVM linker. - set(LIBOMPTARGET_NVPTX_CUDA_COMPILER "" CACHE STRING - "Location of a CUDA compiler capable of emitting LLVM bitcode.") - set(LIBOMPTARGET_NVPTX_BC_LINKER "" CACHE STRING - "Location of a linker capable of linking LLVM bitcode objects.") - - include(LibomptargetNVPTXBitcodeLibrary) - - if (LIBOMPTARGET_NVPTX_BCLIB_SUPPORTED) - libomptarget_say("Building CUDA LLVM bitcode offloading device RTL.") - - set(cuda_src_files - ${devicertl_common_directory}/src/cancel.cu - ${devicertl_common_directory}/src/critical.cu - ${devicertl_common_directory}/src/data_sharing.cu - ${devicertl_common_directory}/src/libcall.cu - ${devicertl_common_directory}/src/loop.cu - ${devicertl_common_directory}/src/omp_data.cu - ${devicertl_common_directory}/src/omptarget.cu - ${devicertl_common_directory}/src/parallel.cu - ${devicertl_common_directory}/src/reduction.cu - ${devicertl_common_directory}/src/support.cu - ${devicertl_common_directory}/src/sync.cu - ${devicertl_common_directory}/src/task.cu - src/target_impl.cu - ) +# Override default MAX_SM in src/target_impl.h if requested +if (DEFINED LIBOMPTARGET_NVPTX_MAX_SM) + set(MAX_SM_DEFINITION "-DMAX_SM=${LIBOMPTARGET_NVPTX_MAX_SM}") +endif() + +# Activate RTL message dumps if requested by the user. +set(LIBOMPTARGET_NVPTX_DEBUG FALSE CACHE BOOL + "Activate NVPTX device RTL debug messages.") + +libomptarget_say("Building CUDA LLVM bitcode offloading device RTL.") + +set(cuda_src_files + ${devicertl_common_directory}/src/cancel.cu + ${devicertl_common_directory}/src/critical.cu + ${devicertl_common_directory}/src/data_sharing.cu + ${devicertl_common_directory}/src/libcall.cu + ${devicertl_common_directory}/src/loop.cu + ${devicertl_common_directory}/src/omp_data.cu + ${devicertl_common_directory}/src/omptarget.cu + ${devicertl_common_directory}/src/parallel.cu + ${devicertl_common_directory}/src/reduction.cu + ${devicertl_common_directory}/src/support.cu + ${devicertl_common_directory}/src/sync.cu + ${devicertl_common_directory}/src/task.cu + src/target_impl.cu +) + +# Set flags for LLVM Bitcode compilation. +set(bc_flags -S -x c++ + -target nvptx64 + -Xclang -emit-llvm-bc + -Xclang -aux-triple -Xclang ${aux_triple} + -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device + -D__CUDACC__ + -I${devicertl_base_directory} + -I${devicertl_nvptx_directory}/src) + +if(${LIBOMPTARGET_NVPTX_DEBUG}) + list(APPEND bc_flags -DOMPTARGET_NVPTX_DEBUG=-1) +else() + list(APPEND bc_flags -DOMPTARGET_NVPTX_DEBUG=0) +endif() - # Set flags for LLVM Bitcode compilation. - set(bc_flags -S -x c++ - -target nvptx64 - -Xclang -emit-llvm-bc - -Xclang -aux-triple -Xclang ${aux_triple} - -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device - -D__CUDACC__ - -I${devicertl_base_directory} - -I${devicertl_nvptx_directory}/src) - - if(${LIBOMPTARGET_NVPTX_DEBUG}) - list(APPEND bc_flags -DOMPTARGET_NVPTX_DEBUG=-1) - else() - list(APPEND bc_flags -DOMPTARGET_NVPTX_DEBUG=0) - endif() - - # Create target to build all Bitcode libraries. - add_custom_target(omptarget-nvptx-bc) - - # This map is from clang/lib/Driver/ToolChains/Cuda.cpp. - # The last element is the default case. - set(cuda_version_list 110 102 101 100 92 91 90 80) - set(ptx_feature_list 70 65 64 63 61 61 60 42) - # The following two lines of ugly code is not needed when the minimal CMake - # version requirement is 3.17+. - list(LENGTH cuda_version_list num_version_supported) - math(EXPR loop_range "${num_version_supported} - 1") - - # Generate a Bitcode library for all the compute capabilities the user - # requested and all PTX version we know for now. - foreach(sm ${nvptx_sm_list}) - set(sm_flags -Xclang -target-cpu -Xclang sm_${sm} "-D__CUDA_ARCH__=${sm}0") - - # Uncomment the following code and remove those ugly part if the feature - # is available. - # foreach(cuda_version ptx_num IN ZIP_LISTS cuda_version_list ptx_feature_list) - foreach(itr RANGE ${loop_range}) - list(GET cuda_version_list ${itr} cuda_version) - list(GET ptx_feature_list ${itr} ptx_num) - set(cuda_flags ${sm_flags}) - list(APPEND cuda_flags -Xclang -target-feature -Xclang +ptx${ptx_num}) - list(APPEND cuda_flags "-DCUDA_VERSION=${cuda_version}00") - - set(bc_files "") - foreach(src ${cuda_src_files}) - get_filename_component(infile ${src} ABSOLUTE) - get_filename_component(outfile ${src} NAME) - set(outfile "${outfile}-cuda_${cuda_version}-sm_${sm}.bc") - - add_custom_command(OUTPUT ${outfile} - COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER} ${bc_flags} - ${cuda_flags} ${MAX_SM_DEFINITION} ${infile} -o ${outfile} - DEPENDS ${infile} - IMPLICIT_DEPENDS CXX ${infile} - COMMENT "Building LLVM bitcode ${outfile}" - VERBATIM - ) - set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}) - - list(APPEND bc_files ${outfile}) - endforeach() - - set(bclib_name "libomptarget-nvptx-cuda_${cuda_version}-sm_${sm}.bc") - - # Link to a bitcode library. - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} - COMMAND ${LIBOMPTARGET_NVPTX_SELECTED_BC_LINKER} - -o ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} ${bc_files} - DEPENDS ${bc_files} - COMMENT "Linking LLVM bitcode ${bclib_name}" - ) - set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${bclib_name}) - - set(bclib_target_name "omptarget-nvptx-cuda_${cuda_version}-sm_${sm}-bc") - - add_custom_target(${bclib_target_name} ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}) - add_dependencies(omptarget-nvptx-bc ${bclib_target_name}) - - # Copy library to destination. - add_custom_command(TARGET ${bclib_target_name} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} - ${LIBOMPTARGET_LIBRARY_DIR}) - - # Install bitcode library under the lib destination folder. - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} DESTINATION "${OPENMP_INSTALL_LIBDIR}") - endforeach() +# Create target to build all Bitcode libraries. +add_custom_target(omptarget-nvptx-bc) + +# This map is from clang/lib/Driver/ToolChains/Cuda.cpp. +# The last element is the default case. +set(cuda_version_list 110 102 101 100 92 91 90 80) +set(ptx_feature_list 70 65 64 63 61 61 60 42) +# The following two lines of ugly code is not needed when the minimal CMake +# version requirement is 3.17+. +list(LENGTH cuda_version_list num_version_supported) +math(EXPR loop_range "${num_version_supported} - 1") + +# Generate a Bitcode library for all the compute capabilities the user +# requested and all PTX version we know for now. +foreach(sm ${nvptx_sm_list}) + set(sm_flags -Xclang -target-cpu -Xclang sm_${sm} "-D__CUDA_ARCH__=${sm}0") + + # Uncomment the following code and remove those ugly part if the feature + # is available. + # foreach(cuda_version ptx_num IN ZIP_LISTS cuda_version_list ptx_feature_list) + foreach(itr RANGE ${loop_range}) + list(GET cuda_version_list ${itr} cuda_version) + list(GET ptx_feature_list ${itr} ptx_num) + set(cuda_flags ${sm_flags}) + list(APPEND cuda_flags -Xclang -target-feature -Xclang +ptx${ptx_num}) + list(APPEND cuda_flags "-DCUDA_VERSION=${cuda_version}00") + + set(bc_files "") + foreach(src ${cuda_src_files}) + get_filename_component(infile ${src} ABSOLUTE) + get_filename_component(outfile ${src} NAME) + set(outfile "${outfile}-cuda_${cuda_version}-sm_${sm}.bc") + + add_custom_command(OUTPUT ${outfile} + COMMAND ${cuda_compiler} ${bc_flags} + ${cuda_flags} ${MAX_SM_DEFINITION} ${infile} -o ${outfile} + DEPENDS ${infile} + IMPLICIT_DEPENDS CXX ${infile} + COMMENT "Building LLVM bitcode ${outfile}" + VERBATIM + ) + set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile}) + + list(APPEND bc_files ${outfile}) endforeach() - endif() + set(bclib_name "libomptarget-nvptx-cuda_${cuda_version}-sm_${sm}.bc") + + # Link to a bitcode library. + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} + COMMAND ${bc_linker} + -o ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} ${bc_files} + DEPENDS ${bc_files} + COMMENT "Linking LLVM bitcode ${bclib_name}" + ) + set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${bclib_name}) + + set(bclib_target_name "omptarget-nvptx-cuda_${cuda_version}-sm_${sm}-bc") + + add_custom_target(${bclib_target_name} ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}) + add_dependencies(omptarget-nvptx-bc ${bclib_target_name}) + + # Copy library to destination. + add_custom_command(TARGET ${bclib_target_name} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} + ${LIBOMPTARGET_LIBRARY_DIR}) + + # Install bitcode library under the lib destination folder. + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} DESTINATION "${OPENMP_INSTALL_LIBDIR}") + endforeach() +endforeach() + +# Test will be enabled if the building machine supports CUDA +if (LIBOMPTARGET_DEP_CUDA_FOUND) add_subdirectory(test) -else() - libomptarget_say("Not building CUDA offloading device RTL: tools to build bc lib not found in the system.") endif()