diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -28,8 +28,8 @@ # Builds that use pre-installed LLVM have LLVM_DIR set. # A standalone or LLVM_ENABLE_RUNTIMES=openmp build takes this route find_program(CLANG_TOOL clang PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) - find_program(LINK_TOOL llvm-link PATHS ${LLVM_TOOLS_BINARY_DIR} - NO_DEFAULT_PATH) + find_program(PACKAGER_TOOL clang-offload-packager PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) + find_program(LINK_TOOL llvm-link PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) find_program(OPT_TOOL opt PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) if ((NOT CLANG_TOOL) OR (NOT LINK_TOOL) OR (NOT OPT_TOOL)) libomptarget_say("Not building DeviceRTL. Missing clang: ${CLANG_TOOL}, llvm-link: ${LINK_TOOL} or opt: ${OPT_TOOL}") @@ -41,6 +41,7 @@ # LLVM in-tree builds may use CMake target names to discover the tools. # A LLVM_ENABLE_PROJECTS=openmp build takes this route set(CLANG_TOOL $) + set(PACKAGER_TOOL $) set(LINK_TOOL $) set(OPT_TOOL $) libomptarget_say("Building DeviceRTL. Using clang from in-tree build") @@ -138,7 +139,9 @@ list(APPEND bc_flags -DOMPTARGET_DEBUG=0) endif() -function(compileDeviceRTLLibrary target_cpu target_name) +# first create an object target +add_library(omptarget.devicertl.all_objs OBJECT IMPORTED) +function(compileDeviceRTLLibrary target_cpu target_name target_triple) set(target_bc_flags ${ARGN}) set(bc_files "") @@ -196,6 +199,27 @@ COMMENT "Optimizing LLVM bitcode ${bclib_name}" ) + # Package the bitcode in the bitcode and embed it in an ELF for the static library + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name} + COMMAND ${PACKAGER_TOOL} -o ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name} + "--image=file=${CMAKE_CURRENT_BINARY_DIR}/${bclib_name},triple=${target_triple},arch=${target_cpu},kind=openmp" + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} + COMMENT "Packaging LLVM offloading binary ${bclib_name}.out" + ) + + set(output_name "${CMAKE_CURRENT_BINARY_DIR}/devicertl-${target_name}-${target_cpu}.o") + add_custom_command(OUTPUT ${output_name} + COMMAND ${CLANG_TOOL} --std=c++17 -c -nostdlib + -Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name} + -o ${output_name} + ${source_directory}/Stub.cpp + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name} + COMMENT "Embedding LLVM offloading binary in ${output_name}" + VERBATIM + ) + set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${output_name}) + set_property(TARGET omptarget.devicertl.all_objs APPEND PROPERTY IMPORTED_OBJECTS ${output_name}) + # Add a file-level dependency to ensure that llvm-link and opt are up-to-date. # By default, add_custom_command only builds the tool if the executable is missing if("${LINK_TOOL}" STREQUAL "$") @@ -208,6 +232,16 @@ DEPENDS opt APPEND) endif() + if("${PACKAGER_TOOL}" STREQUAL "$") + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name} + DEPENDS clang-offload-packager + APPEND) + endif() + if("${CLANG_TOOL}" STREQUAL "$") + add_custom_command(OUTPUT ${output_name} + DEPENDS clang + APPEND) + endif() set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${bclib_name}) @@ -228,59 +262,15 @@ # Generate a Bitcode library for all the compute capabilities the user requested add_custom_target(omptarget.devicertl.nvptx) foreach(sm ${nvptx_sm_list}) - compileDeviceRTLLibrary(sm_${sm} nvptx -fopenmp-targets=nvptx64-nvidia-cuda -DLIBOMPTARGET_BC_TARGET --cuda-feature=+ptx61) + compileDeviceRTLLibrary(sm_${sm} nvptx nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -DLIBOMPTARGET_BC_TARGET --cuda-feature=+ptx61) endforeach() add_custom_target(omptarget.devicertl.amdgpu) foreach(mcpu ${amdgpu_mcpus}) - compileDeviceRTLLibrary(${mcpu} amdgpu -fopenmp-targets=amdgcn-amd-amdhsa -DLIBOMPTARGET_BC_TARGET -D__AMDGCN__ -nogpulib) -endforeach() - -# Set the flags to build the device runtime from clang. -set(clang_lib_flags -fopenmp -fopenmp-cuda-mode -foffload-lto -fvisibility=hidden -Xopenmp-target=nvptx64-nvidia-cuda --cuda-feature=+ptx61 -nocudalib -nogpulib -nostdinc ${clang_opt_flags}) -foreach(arch ${nvptx_sm_list}) - set(clang_lib_flags ${clang_lib_flags} --offload-arch=sm_${arch}) -endforeach() -foreach(arch ${amdgpu_mcpus}) - set(clang_lib_flags ${clang_lib_flags} --offload-arch=${arch}) -endforeach() - -# Build the static library version of the device runtime. -# first create an object target -add_library(omptarget.devicertl.all_objs OBJECT IMPORTED) -foreach(src ${src_files}) - get_filename_component(infile ${src} ABSOLUTE) - get_filename_component(outfile ${src} NAME) - set(outfile "${outfile}.o") - set(outfile_full_path "${CMAKE_CURRENT_BINARY_DIR}/${outfile}") - - add_custom_command(OUTPUT ${outfile_full_path} - COMMAND ${CLANG_TOOL} ${clang_lib_flags} --std=c++17 -c - -o ${outfile_full_path} - -I${include_directory} - -I${devicertl_base_directory}/../include - ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL} - ${infile} - DEPENDS ${infile} ${include_files} - IMPLICIT_DEPENDS CXX ${infile} - COMMENT "Building device code ${outfile}" - VERBATIM - ) - if("${CLANG_TOOL}" STREQUAL "$") - # Add a file-level dependency to ensure that clang is up-to-date. - # By default, add_custom_command only builds clang if the - # executable is missing. - add_custom_command(OUTPUT ${outfile_full_path} - DEPENDS clang - APPEND - ) - endif() - set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile_full_path}) - - set_property(TARGET omptarget.devicertl.all_objs APPEND PROPERTY IMPORTED_OBJECTS ${outfile_full_path}) + compileDeviceRTLLibrary(${mcpu} amdgpu amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -DLIBOMPTARGET_BC_TARGET -D__AMDGCN__ -nogpulib) endforeach() -# second archive all the object files into a static library +# Archive all the object files generated above into a static library add_library(omptarget.devicertl STATIC) set_target_properties(omptarget.devicertl PROPERTIES LINKER_LANGUAGE CXX) target_link_libraries(omptarget.devicertl PRIVATE omptarget.devicertl.all_objs) diff --git a/openmp/libomptarget/DeviceRTL/src/Stub.cpp b/openmp/libomptarget/DeviceRTL/src/Stub.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/DeviceRTL/src/Stub.cpp @@ -0,0 +1 @@ +// This is an empty file used to create a device fatbinary.