diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake --- a/libc/cmake/modules/LLVMLibCObjectRules.cmake +++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake @@ -64,6 +64,50 @@ set(${output_var} ${compile_options} PARENT_SCOPE) endfunction() +# Obtains NVPTX specific arguments for compilation. +# The PTX feature is primarily based on the CUDA toolchain version. We want to +# be able to target NVPTX without an existing architecture, so we need to set +# this manually. This simply sets the PTX feature to the minimum required for +# the features we wish to use on that target. +# Adjust as needed for desired PTX features. +function(get_nvptx_compile_options output_var gpu_arch) + list(APPEND nvptx_options "-march=${gpu_arch}") + if(${gpu_arch} STREQUAL "sm_35") + list(APPEND nvptx_options "--cuda-feature=+ptx42") + elseif(${gpu_arch} STREQUAL "sm_37") + list(APPEND nvptx_options "--cuda-feature=+ptx43") + elseif(${gpu_arch} STREQUAL "sm_50") + list(APPEND nvptx_options "--cuda-feature=+ptx43") + elseif(${gpu_arch} STREQUAL "sm_52") + list(APPEND nvptx_options "--cuda-feature=+ptx43") + elseif(${gpu_arch} STREQUAL "sm_53") + list(APPEND nvptx_options "--cuda-feature=+ptx43") + elseif(${gpu_arch} STREQUAL "sm_60") + list(APPEND nvptx_options "--cuda-feature=+ptx50") + elseif(${gpu_arch} STREQUAL "sm_61") + list(APPEND nvptx_options "--cuda-feature=+ptx50") + elseif(${gpu_arch} STREQUAL "sm_62") + list(APPEND nvptx_options "--cuda-feature=+ptx50") + elseif(${gpu_arch} STREQUAL "sm_70") + list(APPEND nvptx_options "--cuda-feature=+ptx63") + elseif(${gpu_arch} STREQUAL "sm_72") + list(APPEND nvptx_options "--cuda-feature=+ptx63") + elseif(${gpu_arch} STREQUAL "sm_75") + list(APPEND nvptx_options "--cuda-feature=+ptx63") + elseif(${gpu_arch} STREQUAL "sm_80") + list(APPEND nvptx_options "--cuda-feature=+ptx72") + elseif(${gpu_arch} STREQUAL "sm_86") + list(APPEND nvptx_options "--cuda-feature=+ptx72") + else() + message(FATAL_ERROR "Unknown Nvidia GPU architecture '${gpu_arch}'") + endif() + + if(LIBC_CUDA_ROOT) + list(APPEND nvptx_options "--cuda-path=${LIBC_CUDA_ROOT}") + endif() + set(${output_var} ${nvptx_options} PARENT_SCOPE) +endfunction() + # Builds the object target for the GPU. # This compiles the target for all supported architectures and embeds it into # host binary for installing. The internal target contains the GPU code directly @@ -103,7 +147,8 @@ list(APPEND compile_options "-mcpu=${gpu_arch}") elseif("${gpu_arch}" IN_LIST all_nvptx_architectures) set(gpu_target_triple "nvptx64-nvidia-cuda") - list(APPEND compile_options "-march=${gpu_arch}") + get_nvptx_compile_options(nvptx_options ${gpu_arch}) + list(APPEND compile_options "${nvptx_options}") else() message(FATAL_ERROR "Unknown GPU architecture '${gpu_arch}'") endif() @@ -200,9 +245,8 @@ if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU) target_compile_options(${internal_target_name} PRIVATE -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto) elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) - target_compile_options(${internal_target_name} PRIVATE - -march=${LIBC_GPU_TARGET_ARCHITECTURE} - --cuda-path=${LIBC_CUDA_ROOT}) + get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE}) + target_compile_options(${internal_target_name} PRIVATE ${nvptx_options}) endif() target_include_directories(${internal_target_name} PRIVATE ${include_dirs}) if(full_deps_list) diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake --- a/libc/cmake/modules/LLVMLibCTestRules.cmake +++ b/libc/cmake/modules/LLVMLibCTestRules.cmake @@ -506,9 +506,9 @@ -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto --target=${LIBC_GPU_TARGET_TRIPLE}) elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX) + get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE}) target_compile_options(${fq_build_target_name} PRIVATE - --cuda-path=${LIBC_CUDA_ROOT} - -march=${LIBC_GPU_TARGET_ARCHITECTURE} + ${nvptx_options} --target=${LIBC_GPU_TARGET_TRIPLE}) endif() diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt --- a/libc/startup/gpu/nvptx/CMakeLists.txt +++ b/libc/startup/gpu/nvptx/CMakeLists.txt @@ -1,3 +1,4 @@ +get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE}) add_startup_object( crt1 SRC @@ -8,9 +9,8 @@ -ffreestanding # To avoid compiler warnings about calling the main function. -fno-builtin -nogpulib # Do not include any GPU vendor libraries. - -march=${LIBC_GPU_TARGET_ARCHITECTURE} --target=${LIBC_GPU_TARGET_TRIPLE} - --cuda-path=${LIBC_CUDA_ROOT} + ${nvptx_options} NO_GPU_BUNDLE # Compile this file directly without special GPU handling. ) get_fq_target_name(crt1 fq_name)