Diff 516523

libc/src/__support/GPU/amdgpu/utils.h

	//===-------------- AMDGPU implementation of GPU utils ----------- C++ --===//			//===-------------- AMDGPU implementation of GPU utils ----------- C++ --===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIBC_SRC_SUPPORT_GPU_AMDGPU_IO_H			#ifndef LLVM_LIBC_SRC_SUPPORT_GPU_AMDGPU_IO_H
	#define LLVM_LIBC_SRC_SUPPORT_GPU_AMDGPU_IO_H			#define LLVM_LIBC_SRC_SUPPORT_GPU_AMDGPU_IO_H

	#include "src/__support/common.h"			#include "src/__support/common.h"

	#include <stdint.h>			#include <stdint.h>

	namespace __llvm_libc {			namespace __llvm_libc {
				namespace gpu {

				/// The number of threads that execute in lock-step in a lane.
				constexpr const uint64_t LANE_SIZE = __AMDGCN_WAVEFRONT_SIZE;

				/// Returns the number of workgroups in the 'x' dimension of the grid.
				LIBC_INLINE uint32_t get_num_blocks_x() {
				return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x();
				}

				/// Returns the number of workgroups in the 'y' dimension of the grid.
				LIBC_INLINE uint32_t get_num_blocks_y() {
				return __builtin_amdgcn_grid_size_y() / __builtin_amdgcn_workgroup_size_y();
				}

				/// Returns the number of workgroups in the 'z' dimension of the grid.
				LIBC_INLINE uint32_t get_num_blocks_z() {
				return __builtin_amdgcn_grid_size_z() / __builtin_amdgcn_workgroup_size_z();
				}

				/// Returns the 'x' dimension of the current AMD workgroup's id.
	LIBC_INLINE uint32_t get_block_id_x() {			LIBC_INLINE uint32_t get_block_id_x() {
	return __builtin_amdgcn_workgroup_id_x();			return __builtin_amdgcn_workgroup_id_x();
	}			}

				/// Returns the 'y' dimension of the current AMD workgroup's id.
				LIBC_INLINE uint32_t get_block_id_y() {
				return __builtin_amdgcn_workgroup_id_y();
				}

				/// Returns the 'z' dimension of the current AMD workgroup's id.
				LIBC_INLINE uint32_t get_block_id_z() {
				return __builtin_amdgcn_workgroup_id_z();
				}

				/// Returns the absolute id of the AMD workgroup.
				LIBC_INLINE uint64_t get_block_id() {
				return get_block_id_x() + get_num_blocks_x() * get_block_id_y() +
				get_num_blocks_x() * get_num_blocks_y() * get_block_id_z();
				}

				/// Returns the number of workitems in the 'x' dimension.
				LIBC_INLINE uint32_t get_num_threads_x() {
				return __builtin_amdgcn_workgroup_size_x();
				}

				/// Returns the number of workitems in the 'y' dimension.
				LIBC_INLINE uint32_t get_num_threads_y() {
				return __builtin_amdgcn_workgroup_size_y();
				}

				/// Returns the number of workitems in the 'z' dimension.
				LIBC_INLINE uint32_t get_num_threads_z() {
				return __builtin_amdgcn_workgroup_size_z();
				}

				/// Returns the 'x' dimension id of the workitem in the current AMD workgroup.
				traUnsubmitted Not Done Reply Inline Actions wrong dimension label. Here and below. tra: wrong dimension label. Here and below.
				LIBC_INLINE uint32_t get_thread_id_x() {
				return __builtin_amdgcn_workitem_id_x();
				}

				/// Returns the 'y' dimension id of the workitem in the current AMD workgroup.
				LIBC_INLINE uint32_t get_thread_id_y() {
				return __builtin_amdgcn_workitem_id_y();
				}

				/// Returns the 'z' dimension id of the workitem in the current AMD workgroup.
				LIBC_INLINE uint32_t get_thread_id_z() {
				return __builtin_amdgcn_workitem_id_z();
				}

				/// Returns the absolute id of the thread in the current AMD workgroup.
				LIBC_INLINE uint64_t get_thread_id() {
				return get_thread_id_x() + get_num_threads_x() * get_thread_id_y() +
				get_num_threads_x() * get_num_threads_y() * get_thread_id_z();
				}

				/// Returns the size of an AMD wavefront. Either 32 or 64 depending on hardware.
				LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; }

				/// Returns the id of the thread inside of an AMD wavefront executing together.
				[[clang::convergent]] LIBC_INLINE uint32_t get_lane_id() {
				if (LANE_SIZE == 64)
				return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
				else
				return __builtin_amdgcn_mbcnt_lo(~0u, 0u);
				}

				/// Returns the bit-mask of active threads in the current wavefront.
				[[clang::convergent]] LIBC_INLINE uint64_t get_lane_mask() {
				return __builtin_amdgcn_read_exec();
				}

				/// Copies the value from the first active thread in the wavefront to the rest.
				[[clang::convergent]] LIBC_INLINE uint32_t broadcast_value(uint32_t x) {
				return __builtin_amdgcn_readfirstlane(x);
				}

				/// Waits for all the threads in the block to converge and issues a fence.
				[[clang::convergent]] LIBC_INLINE void sync_threads() {
				__builtin_amdgcn_s_barrier();
				__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
				}

				/// Wait for all threads in the wavefront to converge, this is a noop on AMDGPU.
				[[clang::convergent]] LIBC_INLINE void sync_lane(uint64_t) {}

				} // namespace gpu
	} // namespace __llvm_libc			} // namespace __llvm_libc

	#endif			#endif

libc/src/__support/GPU/generic/utils.h

	//===-------------- Generic implementation of GPU utils ---------- C++ --===//			//===-------------- Generic implementation of GPU utils ---------- C++ --===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIBC_SRC_SUPPORT_GPU_GENERIC_IO_H			#ifndef LLVM_LIBC_SRC_SUPPORT_GPU_GENERIC_IO_H
	#define LLVM_LIBC_SRC_SUPPORT_GPU_GENERIC_IO_H			#define LLVM_LIBC_SRC_SUPPORT_GPU_GENERIC_IO_H

	#include "src/__support/common.h"			#include "src/__support/common.h"

	#include <stdint.h>			#include <stdint.h>

	namespace __llvm_libc {			namespace __llvm_libc {
				namespace gpu {

				constexpr const uint64_t LANE_SIZE = 1;

				LIBC_INLINE uint32_t get_num_blocks_x() { return 1; }

				LIBC_INLINE uint32_t get_num_blocks_y() { return 0; }

				LIBC_INLINE uint32_t get_num_blocks_z() { return 0; }

	LIBC_INLINE uint32_t get_block_id_x() { return 0; }			LIBC_INLINE uint32_t get_block_id_x() { return 0; }

				LIBC_INLINE uint32_t get_block_id_y() { return 0; }

				LIBC_INLINE uint32_t get_block_id_z() { return 0; }

				LIBC_INLINE uint64_t get_block_id() { return 0; }

				LIBC_INLINE uint32_t get_num_threads_x() { return 1; }

				LIBC_INLINE uint32_t get_num_threads_y() { return 0; }

				LIBC_INLINE uint32_t get_num_threads_z() { return 0; }

				LIBC_INLINE uint32_t get_thread_id_x() { return 0; }

				LIBC_INLINE uint32_t get_thread_id_y() { return 0; }

				LIBC_INLINE uint32_t get_thread_id_z() { return 0; }

				LIBC_INLINE uint64_t get_thread_id() { return 0; }

				LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; }

				LIBC_INLINE uint32_t get_lane_id() { return 0; }

				LIBC_INLINE uint64_t get_lane_mask() { return 1; }

				LIBC_INLINE uint32_t broadcast_value(uint32_t x) { return x; }

				LIBC_INLINE void sync_threads() {}

				LIBC_INLINE void sync_lane(uint64_t) {}

				} // namespace gpu
	} // namespace __llvm_libc			} // namespace __llvm_libc

	#endif			#endif

libc/src/__support/GPU/nvptx/utils.h

	//===-------------- NVPTX implementation of GPU utils ------------ C++ --===//			//===-------------- NVPTX implementation of GPU utils ------------ C++ --===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-id: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIBC_SRC_SUPPORT_GPU_NVPTX_IO_H			#ifndef LLVM_LIBC_SRC_SUPPORT_GPU_NVPTX_IO_H
	#define LLVM_LIBC_SRC_SUPPORT_GPU_NVPTX_IO_H			#define LLVM_LIBC_SRC_SUPPORT_GPU_NVPTX_IO_H

	#include "src/__support/common.h"			#include "src/__support/common.h"

	#include <stdint.h>			#include <stdint.h>

	namespace __llvm_libc {			namespace __llvm_libc {
				namespace gpu {

				/// The number of threads that execute in lock-step in a warp.
				constexpr const uint64_t LANE_SIZE = 32;

				/// Returns the number of CUDA blocks in the 'x' dimension.
				LIBC_INLINE uint32_t get_num_blocks_x() {
				return __nvvm_read_ptx_sreg_nctaid_x();
				}

				/// Returns the number of CUDA blocks in the 'y' dimension.
				LIBC_INLINE uint32_t get_num_blocks_y() {
				return __nvvm_read_ptx_sreg_nctaid_y();
				}

				/// Returns the number of CUDA blocks in the 'z' dimension.
				LIBC_INLINE uint32_t get_num_blocks_z() {
				return __nvvm_read_ptx_sreg_nctaid_z();
				}

				/// Returns the 'x' dimension of the current CUDA block's id.
	LIBC_INLINE uint32_t get_block_id_x() { return __nvvm_read_ptx_sreg_ctaid_x(); }			LIBC_INLINE uint32_t get_block_id_x() { return __nvvm_read_ptx_sreg_ctaid_x(); }

				/// Returns the 'y' dimension of the current CUDA block's id.
				LIBC_INLINE uint32_t get_block_id_y() { return __nvvm_read_ptx_sreg_ctaid_y(); }

				/// Returns the 'z' dimension of the current CUDA block's id.
				LIBC_INLINE uint32_t get_block_id_z() { return __nvvm_read_ptx_sreg_ctaid_z(); }

				/// Returns the absolute id of the CUDA block.
				LIBC_INLINE uint64_t get_block_id() {
				return get_block_id_x() + get_num_blocks_x() * get_block_id_y() +
				get_num_blocks_x() * get_num_blocks_y() * get_block_id_z();
				}

				/// Returns the number of CUDA threads in the 'x' dimension.
				LIBC_INLINE uint32_t get_num_threads_x() {
				return __nvvm_read_ptx_sreg_ntid_x();
				}

				/// Returns the number of CUDA threads in the 'y' dimension.
				LIBC_INLINE uint32_t get_num_threads_y() {
				return __nvvm_read_ptx_sreg_ntid_y();
				}

				/// Returns the number of CUDA threads in the 'z' dimension.
				LIBC_INLINE uint32_t get_num_threads_z() {
				return __nvvm_read_ptx_sreg_ntid_z();
				}

				/// Returns the 'x' dimension id of the thread in the current CUDA block.
				LIBC_INLINE uint32_t get_thread_id_x() { return __nvvm_read_ptx_sreg_tid_x(); }

				/// Returns the 'y' dimension id of the thread in the current CUDA block.
				LIBC_INLINE uint32_t get_thread_id_y() { return __nvvm_read_ptx_sreg_tid_y(); }

				/// Returns the 'z' dimension id of the thread in the current CUDA block.
				LIBC_INLINE uint32_t get_thread_id_z() { return __nvvm_read_ptx_sreg_tid_z(); }

				/// Returns the absolute id of the thread in the current CUDA block.
				LIBC_INLINE uint64_t get_thread_id() {
				traUnsubmitted Not Done Reply Inline Actions 32 bits are not sufficient to represent all threads, as block dimensions alone can take up to 63 bits. (31 bits for x, 16 for y,z). tra: 32 bits are not sufficient to represent all threads, as block dimensions alone can take up to…
				jhuber6AuthorUnsubmitted Done Reply Inline Actions So, the individual thread_id_x() functions can be kept 32-bit but this one global one should be 64-bit? jhuber6: So, the individual thread_id_x() functions can be kept 32-bit but this one global one should be…
				traUnsubmitted Not Done Reply Inline Actions Yup. https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications-technical-specifications-per-compute-capability tra: Yup. https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical…
				return get_thread_id_x() + get_num_threads_x() * get_thread_id_y() +
				get_num_threads_x() * get_num_threads_y() * get_thread_id_z();
				}

				/// Returns the size of a CUDA warp.
				LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; }

				/// Returns the id of the thread inside of a CUDA warp executing together.
				[[clang::convergent]] LIBC_INLINE uint32_t get_lane_id() {
				return get_thread_id() & (get_lane_size() - 1);
				}

				/// Returns the bit-mask of active threads in the current warp.
				[[clang::convergent]] LIBC_INLINE uint64_t get_lane_mask() {
				uint32_t mask;
				asm volatile("activemask.b32 %0;" : "=r"(mask));
				return mask;
				}

				/// Copies the value from the first active thread in the warp to the rest.
				[[clang::convergent]] LIBC_INLINE uint32_t broadcast_value(uint32_t x) {
				// NOTE: This is not sufficient in all cases on Volta hardware or later. The
				// lane mask returned here is not always the true lane mask used by the
				// intrinsics in cases of incedental or enforced divergence by the user.
				uint64_t lane_mask = get_lane_mask();
				uint64_t id = __builtin_ffsl(lane_mask) - 1;
				#if __CUDA_ARCH__ >= 600
				return __nvvm_shfl_sync_idx_i32(lane_mask, x, id, get_lane_size() - 1);
				#else
				return __nvvm_shfl_idx_i32(x, id, get_lane_size() - 1);
				#endif
				}

				/// Waits for all the threads in the block to converge and issues a fence.
				[[clang::convergent]] LIBC_INLINE void sync_threads() { __syncthreads(); }

				/// Waits for all threads in the warp to reconverge for independent scheduling.
				[[clang::convergent]] LIBC_INLINE void sync_lane(uint64_t mask) {
				#if __CUDA_ARCH__ >= 700
				__nvvm_bar_warp_sync(mask);
				#else
				(void)mask;
				#endif
				}

				} // namespace gpu
	} // namespace __llvm_libc			} // namespace __llvm_libc

	#endif			#endif

libc/src/__support/RPC/CMakeLists.txt

	Show All 14 Lines

	add_object_library(			add_object_library(
	rpc_client			rpc_client
	SRCS			SRCS
	rpc_client.cpp			rpc_client.cpp
	HDRS			HDRS
	rpc_client.h			rpc_client.h
	DEPENDS			DEPENDS
				libc.src.__support.GPU.utils
	.rpc			.rpc
	)			)

libc/startup/gpu/amdgpu/CMakeLists.txt

	add_startup_object(			add_startup_object(
	crt1			crt1
	SRC			SRC
	start.cpp			start.cpp
	DEPENDS			DEPENDS
	libc.src.__support.RPC.rpc_client			libc.src.__support.RPC.rpc_client
				libc.src.__support.GPU.utils
	COMPILE_OPTIONS			COMPILE_OPTIONS
	-ffreestanding # To avoid compiler warnings about calling the main function.			-ffreestanding # To avoid compiler warnings about calling the main function.
	-fno-builtin			-fno-builtin
	-nogpulib # Do not include any GPU vendor libraries.			-nogpulib # Do not include any GPU vendor libraries.
	-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE}			-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE}
	-emit-llvm # AMDGPU's intermediate object file format is bitcode.			-emit-llvm # AMDGPU's intermediate object file format is bitcode.
	--target=${LIBC_GPU_TARGET_TRIPLE}			--target=${LIBC_GPU_TARGET_TRIPLE}
	NO_GPU_BUNDLE # Compile this file directly without special GPU handling.			NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
	Show All 11 Lines

libc/startup/gpu/amdgpu/start.cpp

	//===-- Implementation of crt for amdgpu ----------------------------------===//			//===-- Implementation of crt for amdgpu ----------------------------------===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

				#include "src/__support/GPU/utils.h"
	#include "src/__support/RPC/rpc_client.h"			#include "src/__support/RPC/rpc_client.h"

	static __llvm_libc::cpp::Atomic<uint32_t> lock;

	extern "C" int main(int argc, char argv, char envp);			extern "C" int main(int argc, char argv, char envp);

				namespace __llvm_libc {

				static cpp::Atomic<uint32_t> lock = 0;

				static cpp::Atomic<uint32_t> init = 0;

				void init_rpc(void in, void out, void *buffer) {
				// Only a single thread should update the RPC data.
				if (gpu::get_thread_id() == 0 && gpu::get_block_id() == 0) {
				rpc::client.reset(&lock, in, out, buffer);
				init.store(1, cpp::MemoryOrder::RELAXED);
				}

				// Wait until the previous thread signals that the data has been written.
				while (!init.load(cpp::MemoryOrder::RELAXED))
				rpc::sleep_briefly();

				// Wait for the threads in the block to converge and fence the write.
				gpu::sync_threads();
				}

				} // namespace __llvm_libc

	extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void			extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void
	_start(int argc, char argv, char envp, int ret, void in, void *out,			_start(int argc, char argv, char envp, int ret, void in, void *out,
	void *buffer) {			void *buffer) {
	__llvm_libc::rpc::client.reset(&lock, in, out, buffer);			__llvm_libc::init_rpc(in, out, buffer);

	__atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED);			__atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED);
	}			}

libc/startup/gpu/nvptx/CMakeLists.txt

	get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE})			get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE})
	add_startup_object(			add_startup_object(
	crt1			crt1
	SRC			SRC
	start.cpp			start.cpp
	DEPENDS			DEPENDS
	libc.src.__support.RPC.rpc_client			libc.src.__support.RPC.rpc_client
				libc.src.__support.GPU.utils
	COMPILE_OPTIONS			COMPILE_OPTIONS
	-ffreestanding # To avoid compiler warnings about calling the main function.			-ffreestanding # To avoid compiler warnings about calling the main function.
	-fno-builtin			-fno-builtin
	-nogpulib # Do not include any GPU vendor libraries.			-nogpulib # Do not include any GPU vendor libraries.
	--target=${LIBC_GPU_TARGET_TRIPLE}			--target=${LIBC_GPU_TARGET_TRIPLE}
	${nvptx_options}			${nvptx_options}
	NO_GPU_BUNDLE # Compile this file directly without special GPU handling.			NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
	)			)
	Show All 9 Lines

libc/startup/gpu/nvptx/start.cpp

	//===-- Implementation of crt for nvptx -----------------------------------===//			//===-- Implementation of crt for nvptx -----------------------------------===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

				#include "src/__support/GPU/utils.h"
	#include "src/__support/RPC/rpc_client.h"			#include "src/__support/RPC/rpc_client.h"

	static __llvm_libc::cpp::Atomic<uint32_t> lock;

	extern "C" int main(int argc, char argv, char envp);			extern "C" int main(int argc, char argv, char envp);

	extern "C" [[gnu::visibility("protected")]] __attribute__((nvptx_kernel)) void			namespace __llvm_libc {

				static cpp::Atomic<uint32_t> lock = 0;

				static cpp::Atomic<uint32_t> init = 0;

				void init_rpc(void in, void out, void *buffer) {
				// Only a single thread should update the RPC data.
				if (gpu::get_thread_id() == 0 && gpu::get_block_id() == 0) {
				rpc::client.reset(&lock, in, out, buffer);
				init.store(1, cpp::MemoryOrder::RELAXED);
				}

				// Wait until the previous thread signals that the data has been written.
				while (!init.load(cpp::MemoryOrder::RELAXED))
				rpc::sleep_briefly();

				// Wait for the threads in the block to converge and fence the write.
				gpu::sync_threads();
				}

				} // namespace __llvm_libc

				extern "C" [[gnu::visibility("protected"), clang::nvptx_kernel]] void
	_start(int argc, char argv, char envp, int ret, void in, void *out,			_start(int argc, char argv, char envp, int ret, void in, void *out,
	void *buffer) {			void *buffer) {
	__llvm_libc::rpc::client.reset(&lock, in, out, buffer);			__llvm_libc::init_rpc(in, out, buffer);

	__atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED);			__atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED);
	}			}

libc/test/integration/startup/gpu/rpc_test.cpp

	//===-- Loader test to check the RPC interface with the loader ------------===//			//===-- Loader test to check the RPC interface with the loader ------------===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "src/__support/GPU/utils.h"			#include "src/__support/GPU/utils.h"
	#include "src/__support/RPC/rpc_client.h"			#include "src/__support/RPC/rpc_client.h"
	#include "test/IntegrationTest/test.h"			#include "test/IntegrationTest/test.h"

	using namespace __llvm_libc;			using namespace __llvm_libc;

	static void test_add_simple() {			static void test_add_simple() {
	uint32_t num_additions = 1000 + 10 * get_block_id_x();			uint32_t num_additions = 1000 + 10 * gpu::get_block_id_x();
	uint64_t cnt = 0;			uint64_t cnt = 0;
	for (uint32_t i = 0; i < num_additions; ++i) {			for (uint32_t i = 0; i < num_additions; ++i) {
	rpc::Port port = rpc::client.open(rpc::TEST_INCREMENT);			rpc::Port port = rpc::client.open(rpc::TEST_INCREMENT);
	port.send_and_recv(			port.send_and_recv(
	[=](rpc::Buffer *buffer) {			[=](rpc::Buffer *buffer) {
	reinterpret_cast<uint64_t *>(buffer->data)[0] = cnt;			reinterpret_cast<uint64_t *>(buffer->data)[0] = cnt;
	},			},
	[&](rpc::Buffer *buffer) {			[&](rpc::Buffer *buffer) {
	Show All 12 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[libc] Add more utility functions for the GPU
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 516523

libc/src/__support/GPU/amdgpu/utils.h

libc/src/__support/GPU/generic/utils.h

libc/src/__support/GPU/nvptx/utils.h

libc/src/__support/RPC/CMakeLists.txt

libc/startup/gpu/amdgpu/CMakeLists.txt

libc/startup/gpu/amdgpu/start.cpp

libc/startup/gpu/nvptx/CMakeLists.txt

libc/startup/gpu/nvptx/start.cpp

libc/test/integration/startup/gpu/rpc_test.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[libc] Add more utility functions for the GPUClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 516523

libc/src/__support/GPU/amdgpu/utils.h

libc/src/__support/GPU/generic/utils.h

libc/src/__support/GPU/nvptx/utils.h

libc/src/__support/RPC/CMakeLists.txt

libc/startup/gpu/amdgpu/CMakeLists.txt

libc/startup/gpu/amdgpu/start.cpp

libc/startup/gpu/nvptx/CMakeLists.txt

libc/startup/gpu/nvptx/start.cpp

libc/test/integration/startup/gpu/rpc_test.cpp

[libc] Add more utility functions for the GPU
ClosedPublic