diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -6,75 +6,38 @@ # ##===----------------------------------------------------------------------===## # -# Build the Device RTL for all toolchains that are available +# Build the DeviceRTL for all toolchains that are available # ##===----------------------------------------------------------------------===## -# TODO: copied from NVPTX, need to be generalized. +set(LIBOMPTARGET_BUILD_DEVICERTL_BCLIB TRUE CACHE BOOL + "Can be set to false to disable building this library.") -# By default we will not build NVPTX deviceRTL on a CUDA free system -set(LIBOMPTARGET_BUILD_NVPTX_BCLIB FALSE CACHE BOOL - "Whether build NVPTX deviceRTL on CUDA free system.") - -if (NOT (LIBOMPTARGET_DEP_CUDA_FOUND OR LIBOMPTARGET_BUILD_NVPTX_BCLIB)) - libomptarget_say("Not building NVPTX deviceRTL by default on CUDA free system.") +if (NOT LIBOMPTARGET_BUILD_DEVICERTL_BCLIB) + libomptarget_say("Not building DeviceRTL: Disabled by LIBOMPTARGET_BUILD_DEVICERTL_BCLIB") return() endif() if (NOT LIBOMPTARGET_LLVM_INCLUDE_DIRS) - libomptarget_say("Not building device RTL: Missing definition for LIBOMPTARGET_LLVM_INCLUDE_DIRS") - return() -endif() - - -# Check if we can create an LLVM bitcode implementation of the runtime library -# that could be inlined in the user application. For that we need to find -# a Clang compiler capable of compiling our CUDA files to LLVM bitcode and -# an LLVM linker. -set(LIBOMPTARGET_NVPTX_CUDA_COMPILER "" CACHE STRING - "Location of a CUDA compiler capable of emitting LLVM bitcode.") -set(LIBOMPTARGET_NVPTX_BC_LINKER "" CACHE STRING - "Location of a linker capable of linking LLVM bitcode objects.") - -if (NOT LIBOMPTARGET_NVPTX_CUDA_COMPILER STREQUAL "") - set(cuda_compiler ${LIBOMPTARGET_NVPTX_CUDA_COMPILER}) -elseif (LLVM_TOOL_CLANG_BUILD AND NOT CMAKE_CROSSCOMPILING) - # Compile the deviceRTL with the clang that is built in the project. - set(cuda_compiler "$") -elseif(${CMAKE_C_COMPILER_ID} STREQUAL "Clang") - set(cuda_compiler ${CMAKE_C_COMPILER}) -else() - libomptarget_say("Not building deviceRTL: clang not found") - return() -endif() - -# Get compiler directory to try to locate a suitable linker. -get_filename_component(compiler_dir ${cuda_compiler} DIRECTORY) - -set(bc_linker_candidate "${compiler_dir}/llvm-link") -if (NOT LIBOMPTARGET_NVPTX_BC_LINKER STREQUAL "") - set(bc_linker ${LIBOMPTARGET_NVPTX_BC_LINKER}) -elseif (EXISTS "${bc_linker_candidate}" AND NOT IS_DIRECTORY "${bc_linker_candidate}") - # Try to use the linker consistent with the CUDA compiler unless explicitly - # set to a different linker. - set(bc_linker "${bc_linker_candidate}") -elseif (NOT OPENMP_STANDALONE_BUILD AND NOT CMAKE_CROSSCOMPILING) - # Use the linker also built in the same project. - set(bc_linker "$") -else() - libomptarget_say("Not building deviceRTL: llvm-link not found") + libomptarget_say("Not building DeviceRTL: Missing definition for LIBOMPTARGET_LLVM_INCLUDE_DIRS") return() endif() -set(opt_candidate "${compiler_dir}/opt") -if (EXISTS "${opt_candidate}" AND NOT IS_DIRECTORY "${opt_candidate}") - # Try to use the opt consistent with the CUDA compiler. - set(opt "${opt_candidate}") -elseif (NOT OPENMP_STANDALONE_BUILD AND NOT CMAKE_CROSSCOMPILING) - # Use opt that is also built in the same project. - set(opt "$") +if (LLVM_DIR) + # Builds that use pre-installed LLVM have LLVM_DIR set. + find_program(CLANG_TOOL clang PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) + find_program(LINK_TOOL llvm-link PATHS ${LLVM_TOOLS_BINARY_DIR} + NO_DEFAULT_PATH) + find_program(OPT_TOOL opt PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) + libomptarget_say("Building Device RTL. Using clang: ${CLANG_TOOL}") +elseif (LLVM_TOOL_CLANG_BUILD AND NOT CMAKE_CROSSCOMPILING AND NOT OPENMP_STANDALONE_BUILD) + # LLVM in-tree builds may use CMake target names to discover the tools. + set(CLANG_TOOL $) + set(LINK_TOOL $) + set(OPT_TOOL $) + libomptarget_say("Building DeviceRTL. Using clang from in-tree build") else() - libomptarget_say("Not building deviceRTL: opt not found") + libomptarget_say("Not building DeviceRTL. No appropriate clang found") return() endif() @@ -89,7 +52,7 @@ elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") set(aux_triple aarch64-unknown-linux-gnu) else() - libomptarget_say("Not building CUDA offloading device RTL: unknown host arch: ${CMAKE_HOST_SYSTEM_PROCESSOR}") + libomptarget_say("Not building DeviceRTL: unknown host arch: ${CMAKE_HOST_SYSTEM_PROCESSOR}") return() endif() @@ -128,20 +91,16 @@ endif() endforeach() -# Override default MAX_SM in src/target_impl.h if requested -if (DEFINED LIBOMPTARGET_NVPTX_MAX_SM) - set(MAX_SM_DEFINITION "-DMAX_SM=${LIBOMPTARGET_NVPTX_MAX_SM}") +set(amdgpu_mcpus gfx700 gfx701 gfx801 gfx803 gfx900 gfx902 gfx906 gfx908 gfx90a gfx1010 gfx1030 gfx1031) +if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST) + set(amdgpu_mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST}) endif() + # Activate RTL message dumps if requested by the user. set(LIBOMPTARGET_DEVICE_DEBUG FALSE CACHE BOOL - "Activate NVPTX device RTL debug messages.") + "Activate DeviceRTL debug messages.") -if ("${cuda_compiler}" STREQUAL "$") - libomptarget_say("Building LLVM bitcode offloading device RTL using in-tree clang.") -else () - libomptarget_say("Building LLVM bitcode offloading device RTL using ${cuda_compiler}") -endif () set(src_files ${source_directory}/Configuration.cpp @@ -168,11 +127,9 @@ # Set flags for LLVM Bitcode compilation. set(bc_flags -S -x c++ -std=c++17 ${clang_opt_flags} - -target nvptx64 -Xclang -emit-llvm-bc -Xclang -aux-triple -Xclang ${aux_triple} -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device - -Xclang -target-feature -Xclang +ptx61 -I${include_directory} -I${devicertl_base_directory}/../include ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL} @@ -184,29 +141,28 @@ list(APPEND bc_flags -DOMPTARGET_DEBUG=0) endif() -# Create target to build all Bitcode libraries. -add_custom_target(omptarget-new-nvptx-bc) -add_dependencies(omptarget-new-nvptx-bc opt llvm-link) -# Generate a Bitcode library for all the compute capabilities the user requested -foreach(sm ${nvptx_sm_list}) - # TODO: replace this with declare variant and isa selector. - set(cuda_flags -Xclang -target-cpu -Xclang sm_${sm} "-D__CUDA_ARCH__=${sm}0") +macro(instantiate_DeviceRTL) + # parameters target_cpu, target_name, target_bc_flags + set(bc_files "") foreach(src ${src_files}) get_filename_component(infile ${src} ABSOLUTE) get_filename_component(outfile ${src} NAME) - set(outfile "${outfile}-sm_${sm}.bc") + set(outfile "${outfile}-${target_cpu}.bc") add_custom_command(OUTPUT ${outfile} - COMMAND ${cuda_compiler} ${bc_flags} - ${cuda_flags} ${MAX_SM_DEFINITION} ${infile} -o ${outfile} + COMMAND ${CLANG_TOOL} + ${bc_flags} + -Xclang -target-cpu -Xclang ${target_cpu} + ${target_bc_flags} + ${infile} -o ${outfile} DEPENDS ${infile} IMPLICIT_DEPENDS CXX ${infile} COMMENT "Building LLVM bitcode ${outfile}" VERBATIM ) - if("${cuda_compiler}" STREQUAL "$") + if("${CLANG_TOOL}" STREQUAL "$") # Add a file-level dependency to ensure that clang is up-to-date. # By default, add_custom_command only builds clang if the # executable is missing. @@ -220,53 +176,79 @@ list(APPEND bc_files ${outfile}) endforeach() - set(bclib_name "libomptarget-new-nvptx-sm_${sm}.bc") + set(bclib_name "libomptarget-new-${target_name}-${target_cpu}.bc") + # Link to a bitcode library. add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} - COMMAND ${bc_linker} + COMMAND ${LINK_TOOL} -o ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} ${bc_files} DEPENDS ${bc_files} COMMENT "Linking LLVM bitcode ${bclib_name}" ) - if("${bc_linker}" STREQUAL "$") - # Add a file-level dependency to ensure that llvm-link is up-to-date. - # By default, add_custom_command only builds llvm-link if the - # executable is missing. - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} - DEPENDS llvm-link - APPEND - ) - endif() add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}_opt - COMMAND ${opt} ${link_opt_flags} ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} + COMMAND ${OPT_TOOL} ${link_opt_flags} ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} -o ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} COMMENT "Optimizing LLVM bitcode ${bclib_name}" ) - if("${opt}" STREQUAL "$") - # Add a file-level dependency to ensure that opt is up-to-date. - # By default, add_custom_command only builds opt if the - # executable is missing. + + # Add a file-level dependency to ensure that llvm-link and opt are up-to-date. + # By default, add_custom_command only builds the tool if the executable is missing + if("${LINK_TOOL}" STREQUAL "$") + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} + DEPENDS llvm-link + APPEND) + endif() + if("${OPT_TOOL}" STREQUAL "$") add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}_opt DEPENDS opt - APPEND - ) + APPEND) endif() + set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${bclib_name}) - set(bclib_target_name "omptarget-new-nvptx-sm_${sm}-bc") + set(bclib_target_name "omptarget-new-${target_name}-${target_cpu}-bc") add_custom_target(${bclib_target_name} ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}_opt) - add_dependencies(omptarget-new-nvptx-bc ${bclib_target_name}) - add_dependencies(${bclib_target_name} opt llvm-link) # Copy library to destination. + # Note: This is acting on the llvm-link'ed library, not the opt'ed one add_custom_command(TARGET ${bclib_target_name} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} ${LIBOMPTARGET_LIBRARY_DIR}) # Install bitcode library under the lib destination folder. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} DESTINATION "${OPENMP_INSTALL_LIBDIR}") +endmacro() + +# Generate a Bitcode library for all the compute capabilities the user requested +foreach(sm ${nvptx_sm_list}) + set(target_cpu sm_${sm}) + set(target_name nvptx) + + # TODO: replace CUDA_ARCH with declare variant and isa selector. + set(target_bc_flags + -target nvptx64 + -Xclang -target-feature + -Xclang +ptx61 + "-D__CUDA_ARCH__=${sm}0") + + instantiate_DeviceRTL() +endforeach() + +foreach(mcpu ${amdgpu_mcpus}) + set(target_cpu ${mcpu}) + set(target_name amdgpu) + + set(target_bc_flags + -target amdgcn-amd-amdhsa + "-D__AMDGCN__" + -fvisibility=default + -nogpulib) + + # Disabled on this branch, amdgpu specific part requires code changes + # before it can build. + # instantiate_DeviceRTL() endforeach()