This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
libc/
-
cmake/modules/
-
modules/
-
LLVMLibCObjectRules.cmake
-
LLVMLibCTestRules.cmake
-
startup/gpu/nvptx/
-
gpu/
-
nvptx/
-
CMakeLists.txt

Differential D148532

[libc] Add special handling for CUDA PTX features
ClosedPublic

Authored by jhuber6 on Apr 17 2023, 7:22 AM.

Download Raw Diff

Details

Reviewers

jdoerfert
tianshilei1992
JonChesterfield
lntue
sivachandra
tra

Commits

rGe2356fb07e57: [libc] Add special handling for CUDA PTX features

Summary

The NVIDIA compilation path requires some special options. This is
mostly because compilation is dependent on having a valid CUDA
toolchain. We don't actually need the CUDA toolchain to create the
exported libcgpu.a library because it's pure LLVM-IR. However, for
some language features we need the PTX version to be set. This is
normally set by checking the CUDA version, but without one installed it
will fail to build. We instead choose a minimum set of features on the
desired target, inferred from
https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes
and the PTX refernece for functions like nanosleep.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

jhuber6 created this revision.Apr 17 2023, 7:22 AM

Herald added projects: Restricted Project, Restricted Project. · View Herald TranscriptApr 17 2023, 7:22 AM

Herald added subscribers: libc-commits, mattd, gchakrabarti and 4 others. · View Herald Transcript

jhuber6 requested review of this revision.Apr 17 2023, 7:22 AM

Nice, thank you! It's great seeing the list of misc flags needed to get freestanding nvptx to run shrink. This works for me.

Looks reasonable

This revision is now accepted and ready to land.Apr 17 2023, 7:31 AM

Harbormaster completed remote builds in B226096: Diff 514224.Apr 17 2023, 8:19 AM

Closed by commit rGe2356fb07e57: [libc] Add special handling for CUDA PTX features (authored by jhuber6). · Explain WhyApr 17 2023, 9:51 AM

This revision was automatically updated to reflect the committed changes.

jhuber6 added a commit: rGe2356fb07e57: [libc] Add special handling for CUDA PTX features.

Revision Contents

Path

Size

libc/

cmake/

modules/

LLVMLibCObjectRules.cmake

52 lines

LLVMLibCTestRules.cmake

4 lines

startup/

gpu/

nvptx/

CMakeLists.txt

4 lines

Diff 514282

libc/cmake/modules/LLVMLibCObjectRules.cmake

Show First 20 Lines • Show All 58 Lines • ▼ Show 20 Lines	function(_get_common_compile_options output_var flags)
endif()		endif()
if (LIBC_TARGET_ARCHITECTURE_IS_GPU)		if (LIBC_TARGET_ARCHITECTURE_IS_GPU)
list(APPEND compile_options "-nogpulib")		list(APPEND compile_options "-nogpulib")
list(APPEND compile_options "-fvisibility=hidden")		list(APPEND compile_options "-fvisibility=hidden")
endif()		endif()
set(${output_var} ${compile_options} PARENT_SCOPE)		set(${output_var} ${compile_options} PARENT_SCOPE)
endfunction()		endfunction()

		# Obtains NVPTX specific arguments for compilation.
		# The PTX feature is primarily based on the CUDA toolchain version. We want to
		# be able to target NVPTX without an existing architecture, so we need to set
		# this manually. This simply sets the PTX feature to the minimum required for
		# the features we wish to use on that target.
		# Adjust as needed for desired PTX features.
		function(get_nvptx_compile_options output_var gpu_arch)
		list(APPEND nvptx_options "-march=${gpu_arch}")
		if(${gpu_arch} STREQUAL "sm_35")
		list(APPEND nvptx_options "--cuda-feature=+ptx42")
		elseif(${gpu_arch} STREQUAL "sm_37")
		list(APPEND nvptx_options "--cuda-feature=+ptx43")
		elseif(${gpu_arch} STREQUAL "sm_50")
		list(APPEND nvptx_options "--cuda-feature=+ptx43")
		elseif(${gpu_arch} STREQUAL "sm_52")
		list(APPEND nvptx_options "--cuda-feature=+ptx43")
		elseif(${gpu_arch} STREQUAL "sm_53")
		list(APPEND nvptx_options "--cuda-feature=+ptx43")
		elseif(${gpu_arch} STREQUAL "sm_60")
		list(APPEND nvptx_options "--cuda-feature=+ptx50")
		elseif(${gpu_arch} STREQUAL "sm_61")
		list(APPEND nvptx_options "--cuda-feature=+ptx50")
		elseif(${gpu_arch} STREQUAL "sm_62")
		list(APPEND nvptx_options "--cuda-feature=+ptx50")
		elseif(${gpu_arch} STREQUAL "sm_70")
		list(APPEND nvptx_options "--cuda-feature=+ptx63")
		elseif(${gpu_arch} STREQUAL "sm_72")
		list(APPEND nvptx_options "--cuda-feature=+ptx63")
		elseif(${gpu_arch} STREQUAL "sm_75")
		list(APPEND nvptx_options "--cuda-feature=+ptx63")
		elseif(${gpu_arch} STREQUAL "sm_80")
		list(APPEND nvptx_options "--cuda-feature=+ptx72")
		elseif(${gpu_arch} STREQUAL "sm_86")
		list(APPEND nvptx_options "--cuda-feature=+ptx72")
		else()
		message(FATAL_ERROR "Unknown Nvidia GPU architecture '${gpu_arch}'")
		endif()

		if(LIBC_CUDA_ROOT)
		list(APPEND nvptx_options "--cuda-path=${LIBC_CUDA_ROOT}")
		endif()
		set(${output_var} ${nvptx_options} PARENT_SCOPE)
		endfunction()

# Builds the object target for the GPU.		# Builds the object target for the GPU.
# This compiles the target for all supported architectures and embeds it into		# This compiles the target for all supported architectures and embeds it into
# host binary for installing. The internal target contains the GPU code directly		# host binary for installing. The internal target contains the GPU code directly
# compiled for a single architecture used internally.		# compiled for a single architecture used internally.
# Usage:		# Usage:
# _build_gpu_objects(		# _build_gpu_objects(
# <target_name>		# <target_name>
# <internal_target_name>		# <internal_target_name>
Show All 23 Lines	foreach(gpu_arch ${LIBC_GPU_ARCHITECTURES})
set(gpu_target_name ${fq_target_name}.${src_name}.${gpu_arch})		set(gpu_target_name ${fq_target_name}.${src_name}.${gpu_arch})
set(compile_options ${ADD_GPU_OBJ_COMPILE_OPTIONS})		set(compile_options ${ADD_GPU_OBJ_COMPILE_OPTIONS})
# Derive the triple from the specified architecture.		# Derive the triple from the specified architecture.
if("${gpu_arch}" IN_LIST all_amdgpu_architectures)		if("${gpu_arch}" IN_LIST all_amdgpu_architectures)
set(gpu_target_triple "amdgcn-amd-amdhsa")		set(gpu_target_triple "amdgcn-amd-amdhsa")
list(APPEND compile_options "-mcpu=${gpu_arch}")		list(APPEND compile_options "-mcpu=${gpu_arch}")
elseif("${gpu_arch}" IN_LIST all_nvptx_architectures)		elseif("${gpu_arch}" IN_LIST all_nvptx_architectures)
set(gpu_target_triple "nvptx64-nvidia-cuda")		set(gpu_target_triple "nvptx64-nvidia-cuda")
list(APPEND compile_options "-march=${gpu_arch}")		get_nvptx_compile_options(nvptx_options ${gpu_arch})
		list(APPEND compile_options "${nvptx_options}")
else()		else()
message(FATAL_ERROR "Unknown GPU architecture '${gpu_arch}'")		message(FATAL_ERROR "Unknown GPU architecture '${gpu_arch}'")
endif()		endif()
list(APPEND compile_options "--target=${gpu_target_triple}")		list(APPEND compile_options "--target=${gpu_target_triple}")
list(APPEND compile_options "-emit-llvm")		list(APPEND compile_options "-emit-llvm")

# Build the library for this target architecture. We always emit LLVM-IR for		# Build the library for this target architecture. We always emit LLVM-IR for
# packaged GPU binaries.		# packaged GPU binaries.
▲ Show 20 Lines • Show All 80 Lines • ▼ Show 20 Lines	add_library(
${ADD_GPU_OBJ_SRCS}		${ADD_GPU_OBJ_SRCS}
${ADD_GPU_OBJ_HDRS}		${ADD_GPU_OBJ_HDRS}
)		)
target_compile_options(${internal_target_name} BEFORE PRIVATE		target_compile_options(${internal_target_name} BEFORE PRIVATE
${common_compile_options} --target=${LIBC_GPU_TARGET_TRIPLE})		${common_compile_options} --target=${LIBC_GPU_TARGET_TRIPLE})
if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU)		if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU)
target_compile_options(${internal_target_name} PRIVATE -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto)		target_compile_options(${internal_target_name} PRIVATE -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto)
elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)		elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
target_compile_options(${internal_target_name} PRIVATE		get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE})
-march=${LIBC_GPU_TARGET_ARCHITECTURE}		target_compile_options(${internal_target_name} PRIVATE ${nvptx_options})
--cuda-path=${LIBC_CUDA_ROOT})
endif()		endif()
target_include_directories(${internal_target_name} PRIVATE ${include_dirs})		target_include_directories(${internal_target_name} PRIVATE ${include_dirs})
if(full_deps_list)		if(full_deps_list)
add_dependencies(${internal_target_name} ${full_deps_list})		add_dependencies(${internal_target_name} ${full_deps_list})
endif()		endif()
endif()		endif()
endfunction()		endfunction()

▲ Show 20 Lines • Show All 612 Lines • Show Last 20 Lines

libc/cmake/modules/LLVMLibCTestRules.cmake

Show First 20 Lines • Show All 500 Lines • ▼ Show 20 Lines	function(add_integration_test test_name)
target_compile_options(${fq_build_target_name}		target_compile_options(${fq_build_target_name}
PRIVATE -fpie -ffreestanding ${INTEGRATION_TEST_COMPILE_OPTIONS})		PRIVATE -fpie -ffreestanding ${INTEGRATION_TEST_COMPILE_OPTIONS})
# The GPU build requires overriding the default CMake triple and architecture.		# The GPU build requires overriding the default CMake triple and architecture.
if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU)		if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU)
target_compile_options(${fq_build_target_name} PRIVATE		target_compile_options(${fq_build_target_name} PRIVATE
-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto		-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
--target=${LIBC_GPU_TARGET_TRIPLE})		--target=${LIBC_GPU_TARGET_TRIPLE})
elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)		elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
		get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE})
target_compile_options(${fq_build_target_name} PRIVATE		target_compile_options(${fq_build_target_name} PRIVATE
--cuda-path=${LIBC_CUDA_ROOT}		${nvptx_options}
-march=${LIBC_GPU_TARGET_ARCHITECTURE}
--target=${LIBC_GPU_TARGET_TRIPLE})		--target=${LIBC_GPU_TARGET_TRIPLE})
endif()		endif()

target_link_options(${fq_build_target_name} PRIVATE -nostdlib -static)		target_link_options(${fq_build_target_name} PRIVATE -nostdlib -static)
target_link_libraries(		target_link_libraries(
${fq_build_target_name}		${fq_build_target_name}
# The NVIDIA 'nvlink' linker does not currently support static libraries.		# The NVIDIA 'nvlink' linker does not currently support static libraries.
$<$<NOT:$<BOOL:${LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX}>>:${fq_target_name}.__libc__>		$<$<NOT:$<BOOL:${LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX}>>:${fq_target_name}.__libc__>
Show All 21 Lines

libc/startup/gpu/nvptx/CMakeLists.txt

				get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE})
	add_startup_object(			add_startup_object(
	crt1			crt1
	SRC			SRC
	start.cpp			start.cpp
	DEPENDS			DEPENDS
	libc.src.__support.RPC.rpc_client			libc.src.__support.RPC.rpc_client
	COMPILE_OPTIONS			COMPILE_OPTIONS
	-ffreestanding # To avoid compiler warnings about calling the main function.			-ffreestanding # To avoid compiler warnings about calling the main function.
	-fno-builtin			-fno-builtin
	-nogpulib # Do not include any GPU vendor libraries.			-nogpulib # Do not include any GPU vendor libraries.
	-march=${LIBC_GPU_TARGET_ARCHITECTURE}
	--target=${LIBC_GPU_TARGET_TRIPLE}			--target=${LIBC_GPU_TARGET_TRIPLE}
	--cuda-path=${LIBC_CUDA_ROOT}			${nvptx_options}
	NO_GPU_BUNDLE # Compile this file directly without special GPU handling.			NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
	)			)
	get_fq_target_name(crt1 fq_name)			get_fq_target_name(crt1 fq_name)

	# Ensure that clang uses the correct linker for this object type.			# Ensure that clang uses the correct linker for this object type.
	target_link_libraries(${fq_name}			target_link_libraries(${fq_name}
	PUBLIC			PUBLIC
	"-march=${LIBC_GPU_TARGET_ARCHITECTURE}"			"-march=${LIBC_GPU_TARGET_ARCHITECTURE}"
	"--target=${LIBC_GPU_TARGET_TRIPLE}"			"--target=${LIBC_GPU_TARGET_TRIPLE}"
	"--cuda-path=${LIBC_CUDA_ROOT}"			"--cuda-path=${LIBC_CUDA_ROOT}"
	)			)