diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake --- a/libc/cmake/modules/LLVMLibCObjectRules.cmake +++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake @@ -59,19 +59,23 @@ set(${output_var} ${compile_options} PARENT_SCOPE) endfunction() -# Builds the entrypoint target for the GPU. +# Builds the object target for the GPU. +# This compiles the target for all supported architectures and embeds it into +# host binary for installing. The internal target contains the GPU code directly +# compiled for a single architecture used internally. # Usage: -# _build_gpu_entrypoint_objects( +# _build_gpu_objects( # +# # SRCS # HDRS # DEPENDS # COMPILE_OPTIONS # FLAGS # ) -function(_build_gpu_entrypoint_objects fq_target_name) +function(_build_gpu_objects fq_target_name internal_target_name) cmake_parse_arguments( - "ADD_GPU_ENTRYPOINT_OBJ" + "ADD_GPU_OBJ" "" # No optional arguments "NAME;CXX_STANDARD" # Single value arguments "SRCS;HDRS;DEPENDS;COMPILE_OPTIONS;FLAGS" # Multi value arguments @@ -82,7 +86,7 @@ # this so we can support multiple accelerators on the same machine. foreach(gpu_arch ${all_gpu_architectures}) set(gpu_target_name ${fq_target_name}.${gpu_arch}) - set(compile_options ${ADD_GPU_ENTRYPOINT_OBJ_COMPILE_OPTIONS}) + set(compile_options ${ADD_GPU_OBJ_COMPILE_OPTIONS}) # Derive the triple from the specified architecture. if("${gpu_arch}" IN_LIST all_amdgpu_architectures) set(gpu_target_triple "amdgcn-amd-amdhsa") @@ -101,14 +105,16 @@ add_library(${gpu_target_name} EXCLUDE_FROM_ALL OBJECT - ${ADD_GPU_ENTRYPOINT_OBJ_SRCS} - ${ADD_GPU_ENTRYPOINT_OBJ_HDRS} + ${ADD_GPU_OBJ_SRCS} + ${ADD_GPU_OBJ_HDRS} ) target_compile_options(${gpu_target_name} PRIVATE ${compile_options}) target_include_directories(${gpu_target_name} PRIVATE ${include_dirs}) - add_dependencies(${gpu_target_name} ${ADD_GPU_ENTRYPOINT_OBJ_DEPENDS}) target_compile_definitions(${gpu_target_name} PRIVATE LIBC_COPT_PUBLIC_PACKAGING) + if(ADD_GPU_OBJ_DEPENDS) + add_dependencies(${gpu_target_name} ${ADD_GPU_OBJ_DEPENDS}) + endif() # Append this target to a list of images to package into a single binary. set(input_file $) @@ -135,7 +141,7 @@ # TODO: In the future we will want to combine every architecture for a target # into a single bitcode file and use that. For now we simply build for # every single one and let the offloading linker handle it. - get_filename_component(stub_filename ${ADD_GPU_ENTRYPOINT_OBJ_SRCS} NAME) + get_filename_component(stub_filename ${ADD_GPU_OBJ_SRCS} NAME) file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stubs) file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/stubs/${stub_filename} "// Empty file.\n") add_library( @@ -151,19 +157,7 @@ target_include_directories(${fq_target_name} PRIVATE ${include_dirs}) add_dependencies(${fq_target_name} ${full_deps_list} ${packaged_target_name}) - set_target_properties( - ${fq_target_name} - PROPERTIES - ENTRYPOINT_NAME ${ADD_ENTRYPOINT_OBJ_NAME} - TARGET_TYPE ${ENTRYPOINT_OBJ_TARGET_TYPE} - OBJECT_FILE "$" - CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} - DEPS "${fq_deps_list}" - FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" - ) - # We only build the internal target for a single supported architecture. - set(internal_target_name ${fq_target_name}.__internal__) set(include_dirs ${LIBC_BUILD_DIR}/include ${LIBC_SOURCE_DIR} ${LIBC_BUILD_DIR}) if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU OR LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) @@ -171,8 +165,8 @@ ${internal_target_name} EXCLUDE_FROM_ALL OBJECT - ${ADD_ENTRYPOINT_OBJ_SRCS} - ${ADD_ENTRYPOINT_OBJ_HDRS} + ${ADD_GPU_OBJ_SRCS} + ${ADD_GPU_OBJ_HDRS} ) target_compile_options(${internal_target_name} BEFORE PRIVATE ${common_compile_options} --target=${LIBC_GPU_TARGET_TRIPLE}) @@ -182,17 +176,9 @@ target_compile_options(${internal_target_name} PRIVATE -march=${LIBC_GPU_TARGET_ARCHITECTURE}) endif() target_include_directories(${internal_target_name} PRIVATE ${include_dirs}) - add_dependencies(${internal_target_name} ${full_deps_list}) - set_target_properties( - ${internal_target_name} - PROPERTIES - CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} - FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" - ) - set_target_properties( - ${fq_target_name} - PROPERTIES OBJECT_FILE_RAW "$" - ) + if(full_deps_list) + add_dependencies(${internal_target_name} ${full_deps_list}) + endif() endif() endfunction() @@ -209,7 +195,7 @@ function(create_object_library fq_target_name) cmake_parse_arguments( "ADD_OBJECT" - "" # No optional arguments + "NO_GPU_BUNDLE" # No optional arguments "CXX_STANDARD" # Single value arguments "SRCS;HDRS;COMPILE_OPTIONS;DEPENDS;FLAGS" # Multivalue arguments ${ARGN} @@ -219,28 +205,49 @@ message(FATAL_ERROR "'add_object_library' rule requires SRCS to be specified.") endif() - add_library( - ${fq_target_name} - EXCLUDE_FROM_ALL - OBJECT - ${ADD_OBJECT_SRCS} - ${ADD_OBJECT_HDRS} - ) - target_include_directories( - ${fq_target_name} - PRIVATE - ${LIBC_BUILD_DIR}/include - ${LIBC_SOURCE_DIR} - ${LIBC_BUILD_DIR} - ) + # The GPU build uses a separate internal file. + if(LIBC_TARGET_ARCHITECTURE_IS_GPU AND NOT ${ADD_OBJECT_NO_GPU_BUNDLE}) + set(internal_target_name ${fq_target_name}.__internal__) + else() + set(internal_target_name ${fq_target_name}) + endif() + + get_fq_deps_list(fq_deps_list ${ADD_OBJECT_DEPENDS}) _get_common_compile_options( compile_options "${ADD_OBJECT_FLAGS}" ${ADD_OBJECT_COMPILE_OPTIONS} ) - target_compile_options(${fq_target_name} PRIVATE ${compile_options}) - get_fq_deps_list(fq_deps_list ${ADD_OBJECT_DEPENDS}) + # GPU builds require special handling for the objects because we want to + # export several different targets at once, e.g. for both Nvidia and AMD. + if(LIBC_TARGET_ARCHITECTURE_IS_GPU AND NOT ${ADD_OBJECT_NO_GPU_BUNDLE}) + _build_gpu_objects( + ${fq_target_name} + ${internal_target_name} + SRCS ${ADD_OBJECT_SRCS} + HDRS ${ADD_OBJECT_HDRS} + DEPENDS ${fq_deps_list} + COMPILE_OPTIONS ${common_compile_options} + FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" + ) + else() + add_library( + ${fq_target_name} + EXCLUDE_FROM_ALL + OBJECT + ${ADD_OBJECT_SRCS} + ${ADD_OBJECT_HDRS} + ) + target_include_directories( + ${fq_target_name} + PRIVATE + ${LIBC_BUILD_DIR}/include + ${LIBC_SOURCE_DIR} + ${LIBC_BUILD_DIR} + ) + target_compile_options(${fq_target_name} PRIVATE ${compile_options}) + endif() if(SHOW_INTERMEDIATE_OBJECTS) message(STATUS "Adding object library ${fq_target_name}") @@ -262,11 +269,18 @@ ${fq_target_name} PROPERTIES TARGET_TYPE ${OBJECT_LIBRARY_TARGET_TYPE} - OBJECT_FILES "$" CXX_STANDARD ${ADD_OBJECT_CXX_STANDARD} DEPS "${fq_deps_list}" FLAGS "${ADD_OBJECT_FLAGS}" ) + + if(TARGET ${internal_target_name}) + set_target_properties( + ${fq_target_name} + PROPERTIES + OBJECT_FILES "$" + ) + endif() endfunction(create_object_library) # Internal function, used by `add_object_library`. @@ -483,13 +497,13 @@ # GPU builds require special handling for the objects because we want to # export several different targets at once, e.g. for both Nvidia and AMD. if(LIBC_TARGET_ARCHITECTURE_IS_GPU) - _build_gpu_entrypoint_objects( + _build_gpu_objects( ${fq_target_name} + ${internal_target_name} SRCS ${ADD_ENTRYPOINT_OBJ_SRCS} HDRS ${ADD_ENTRYPOINT_OBJ_HDRS} COMPILE_OPTIONS ${common_compile_options} DEPENDS ${full_deps_list} - CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" ) else() @@ -505,12 +519,6 @@ target_compile_options(${internal_target_name} BEFORE PRIVATE ${common_compile_options}) target_include_directories(${internal_target_name} PRIVATE ${include_dirs}) add_dependencies(${internal_target_name} ${full_deps_list}) - set_target_properties( - ${internal_target_name} - PROPERTIES - CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} - FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" - ) add_library( ${fq_target_name} @@ -524,22 +532,36 @@ target_compile_options(${fq_target_name} BEFORE PRIVATE ${common_compile_options} -DLIBC_COPT_PUBLIC_PACKAGING) target_include_directories(${fq_target_name} PRIVATE ${include_dirs}) add_dependencies(${fq_target_name} ${full_deps_list}) + endif() + + set_target_properties( + ${fq_target_name} + PROPERTIES + ENTRYPOINT_NAME ${ADD_ENTRYPOINT_OBJ_NAME} + TARGET_TYPE ${ENTRYPOINT_OBJ_TARGET_TYPE} + OBJECT_FILE "$" + CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} + DEPS "${fq_deps_list}" + FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" + ) + if(TARGET ${internal_target_name}) + set_target_properties( + ${internal_target_name} + PROPERTIES + CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} + FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" + ) set_target_properties( ${fq_target_name} PROPERTIES - ENTRYPOINT_NAME ${ADD_ENTRYPOINT_OBJ_NAME} - TARGET_TYPE ${ENTRYPOINT_OBJ_TARGET_TYPE} - OBJECT_FILE "$" # TODO: We don't need to list internal object files if the internal # target is a normal static library. OBJECT_FILE_RAW "$" - CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD} - DEPS "${fq_deps_list}" - FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}" ) endif() + if(LLVM_LIBC_ENABLE_LINTING AND TARGET ${internal_target_name}) if(NOT LLVM_LIBC_CLANG_TIDY) message(FATAL_ERROR "Something is wrong! LLVM_LIBC_ENABLE_LINTING is " diff --git a/libc/startup/gpu/amdgpu/CMakeLists.txt b/libc/startup/gpu/amdgpu/CMakeLists.txt --- a/libc/startup/gpu/amdgpu/CMakeLists.txt +++ b/libc/startup/gpu/amdgpu/CMakeLists.txt @@ -10,6 +10,7 @@ -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -emit-llvm # AMDGPU's intermediate object file format is bitcode. --target=${LIBC_GPU_TARGET_TRIPLE} + NO_GPU_BUNDLE # Compile this file directly without special GPU handling. ) get_fq_target_name(crt1 fq_name) diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt --- a/libc/startup/gpu/nvptx/CMakeLists.txt +++ b/libc/startup/gpu/nvptx/CMakeLists.txt @@ -10,6 +10,7 @@ -x cuda # Use the CUDA toolchain to emit the `_start` kernel. --offload-device-only --offload-arch=${LIBC_GPU_TARGET_ARCHITECTURE} + NO_GPU_BUNDLE # Compile this file directly without special GPU handling. ) get_fq_target_name(crt1 fq_name)