diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -91,6 +91,13 @@ endif() endforeach() + +set(amdgpu_mcpus gfx700 gfx701 gfx801 gfx803 gfx900 gfx902 gfx906 gfx908 gfx90a gfx1010 gfx1030 gfx1031) +if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST) + set(amdgpu_mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST}) +endif() + + # Activate RTL message dumps if requested by the user. set(LIBOMPTARGET_DEVICE_DEBUG FALSE CACHE BOOL "Activate DeviceRTL debug messages.") @@ -121,11 +128,9 @@ # Set flags for LLVM Bitcode compilation. set(bc_flags -S -x c++ -std=c++17 ${clang_opt_flags} - -target nvptx64 -Xclang -emit-llvm-bc -Xclang -aux-triple -Xclang ${aux_triple} -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device - -Xclang -target-feature -Xclang +ptx61 -I${include_directory} -I${devicertl_base_directory}/../include ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL} @@ -137,23 +142,22 @@ list(APPEND bc_flags -DOMPTARGET_DEBUG=0) endif() -# Create target to build all Bitcode libraries. -add_custom_target(omptarget-new-nvptx-bc) -add_dependencies(omptarget-new-nvptx-bc opt llvm-link) -# Generate a Bitcode library for all the compute capabilities the user requested -foreach(sm ${nvptx_sm_list}) - # TODO: replace this with declare variant and isa selector. - set(cuda_flags -Xclang -target-cpu -Xclang sm_${sm} "-D__CUDA_ARCH__=${sm}0") +macro(instantiate_DeviceRTL) + # parameters target_cpu, target_name, target_bc_flags + set(bc_files "") foreach(src ${src_files}) get_filename_component(infile ${src} ABSOLUTE) get_filename_component(outfile ${src} NAME) - set(outfile "${outfile}-sm_${sm}.bc") + set(outfile "${outfile}-${target_cpu}.bc") add_custom_command(OUTPUT ${outfile} - COMMAND ${CLANG_TOOL} ${bc_flags} - ${cuda_flags} ${infile} -o ${outfile} + COMMAND ${CLANG_TOOL} + ${bc_flags} + -Xclang -target-cpu -Xclang ${target_cpu} + ${target_bc_flags} + ${infile} -o ${outfile} DEPENDS ${infile} IMPLICIT_DEPENDS CXX ${infile} COMMENT "Building LLVM bitcode ${outfile}" @@ -173,7 +177,7 @@ list(APPEND bc_files ${outfile}) endforeach() - set(bclib_name "libomptarget-new-nvptx-sm_${sm}.bc") + set(bclib_name "libomptarget-new-${target_name}-${target_cpu}.bc") # Add a file-level dependency to ensure that llvm-link and opt are up-to-date. # By default, add_custom_command only builds the tool if the executable is missing @@ -205,17 +209,44 @@ set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${bclib_name}) - set(bclib_target_name "omptarget-new-nvptx-sm_${sm}-bc") + set(bclib_target_name "omptarget-new-${target_name}-${target_cpu}-bc") add_custom_target(${bclib_target_name} ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}_opt) - add_dependencies(omptarget-new-nvptx-bc ${bclib_target_name}) - add_dependencies(${bclib_target_name} opt llvm-link) # Copy library to destination. + # Note: This is acting on the llvm-link'ed library, not the opt'ed one add_custom_command(TARGET ${bclib_target_name} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} ${LIBOMPTARGET_LIBRARY_DIR}) # Install bitcode library under the lib destination folder. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} DESTINATION "${OPENMP_INSTALL_LIBDIR}") +endmacro() + +# Generate a Bitcode library for all the compute capabilities the user requested +foreach(sm ${nvptx_sm_list}) + set(target_cpu sm_${sm}) + set(target_name nvptx) + + # TODO: replace CUDA_ARCH with declare variant and isa selector. + set(target_bc_flags + -target nvptx64 + -Xclang -target-feature + -Xclang +ptx61 + "-D__CUDA_ARCH__=${sm}0") + + instantiate_DeviceRTL() +endforeach() + +foreach(mcpu ${amdgpu_mcpus}) + set(target_cpu ${mcpu}) + set(target_name amdgpu) + + set(target_bc_flags + -target amdgcn-amd-amdhsa + "-D__AMDGCN__" + -fvisibility=default + -nogpulib) + + instantiate_DeviceRTL() endforeach()