Diff 528613

libc/src/__support/RPC/rpc.h

	Show All 29 Lines
	namespace __llvm_libc {			namespace __llvm_libc {
	namespace rpc {			namespace rpc {

	/// A list of opcodes that we use to invoke certain actions on the server.			/// A list of opcodes that we use to invoke certain actions on the server.
	enum Opcode : uint16_t {			enum Opcode : uint16_t {
	NOOP = 0,			NOOP = 0,
	PRINT_TO_STDERR = 1,			PRINT_TO_STDERR = 1,
	EXIT = 2,			EXIT = 2,
	TEST_INCREMENT = 3,			MALLOC = 3,
	TEST_INTERFACE = 4,			FREE = 4,
	TEST_STREAM = 5,			TEST_INCREMENT = 5,
				TEST_INTERFACE = 6,
				TEST_STREAM = 7,
				jplehrUnsubmitted Not Done Reply Inline Actions Out of curiosity: Why not append these values to the existing ones instead? jplehr: Out of curiosity: Why not append these values to the existing ones instead?
				jhuber6AuthorUnsubmitted Done Reply Inline Actions Tentatively planned, but the goal is to later provide this more as a library interface. Users of the library can then register their own custom opcodes, probably reserving anything that has a MSB of `1` to be a custom opcode, the rest we can reserve for internal `libc` use. So the goal is to move the test ones somewhere else. jhuber6: Tentatively planned, but the goal is to later provide this more as a library interface. Users…
	};			};

	/// A fixed size channel used to communicate between the RPC client and server.			/// A fixed size channel used to communicate between the RPC client and server.
	struct Buffer {			struct Buffer {
	uint64_t data[8];			uint64_t data[8];
	};			};
	static_assert(sizeof(Buffer) == 64, "Buffer size mismatch");			static_assert(sizeof(Buffer) == 64, "Buffer size mismatch");

	▲ Show 20 Lines • Show All 536 Lines • Show Last 20 Lines

libc/src/stdlib/CMakeLists.txt

Show First 20 Lines • Show All 238 Lines • ▼ Show 20 Lines	SRCS
srand.cpp		srand.cpp
HDRS		HDRS
srand.h		srand.h
DEPENDS		DEPENDS
.rand_util		.rand_util
libc.include.stdlib		libc.include.stdlib
)		)

if(LLVM_LIBC_INCLUDE_SCUDO)		if(LLVM_LIBC_INCLUDE_SCUDO)
		sivachandraUnsubmitted Done Reply Inline Actions Instead of spreading out the GPU conditionals, can we do this: if(LIBC_TARGET_ARCHITECTURE_IS_GPU) ... elseif(LLVM_LIBC_INCLUDE_SCUDO) ... else() ... endif() sivachandra: Instead of spreading out the GPU conditionals, can we do this: ``` if…
		jhuber6AuthorUnsubmitted Done Reply Inline Actions I can't move the GPU entrypoints here because they alias to something that is only defined after we include the `gpu/` subdirectory after the FULL_BUILD check. jhuber6: I can't move the GPU entrypoints here because they alias to something that is only defined…
		sivachandraUnsubmitted Done Reply Inline Actions I think the flow here has become more complicated than it should. But, fixing it is definitely not the scope of this change. sivachandra: I think the flow here has become more complicated than it should. But, fixing it is definitely…
set(SCUDO_DEPS "")		set(SCUDO_DEPS "")

include(${LIBC_SOURCE_DIR}/../compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake)		include(${LIBC_SOURCE_DIR}/../compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake)
if(NOT (LIBC_TARGET_ARCHITECTURE IN_LIST ALL_SCUDO_STANDALONE_SUPPORTED_ARCH))		if(NOT (LIBC_TARGET_ARCHITECTURE IN_LIST ALL_SCUDO_STANDALONE_SUPPORTED_ARCH))
message(FATAL_ERROR "Architecture ${LIBC_TARGET_ARCHITECTURE} is not supported by SCUDO.		message(FATAL_ERROR "Architecture ${LIBC_TARGET_ARCHITECTURE} is not supported by SCUDO.
Either disable LLVM_LIBC_INCLUDE_SCUDO or change your target architecture.")		Either disable LLVM_LIBC_INCLUDE_SCUDO or change your target architecture.")
endif()		endif()

Show All 26 Lines	add_entrypoint_external(
DEPENDS		DEPENDS
${SCUDO_DEPS}		${SCUDO_DEPS}
)		)
add_entrypoint_external(		add_entrypoint_external(
free		free
DEPENDS		DEPENDS
${SCUDO_DEPS}		${SCUDO_DEPS}
)		)
		elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU)
		add_entrypoint_external(
		calloc
		)
		add_entrypoint_external(
		realloc
		)
		add_entrypoint_external(
		aligned_alloc
		)
else()		else()
add_entrypoint_external(		add_entrypoint_external(
malloc		malloc
)		)
add_entrypoint_external(		add_entrypoint_external(
		free
		)
		add_entrypoint_external(
calloc		calloc
)		)
add_entrypoint_external(		add_entrypoint_external(
realloc		realloc
)		)
add_entrypoint_external(		add_entrypoint_external(
aligned_alloc		aligned_alloc
)		)
add_entrypoint_external(
free
)
endif()		endif()

if(NOT LLVM_LIBC_FULL_BUILD)		if(NOT LLVM_LIBC_FULL_BUILD)
return()		return()
endif()		endif()

if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})		if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})		add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
Show All 37 Lines
)		)

add_entrypoint_object(		add_entrypoint_object(
abort		abort
ALIAS		ALIAS
DEPENDS		DEPENDS
.${LIBC_TARGET_OS}.abort		.${LIBC_TARGET_OS}.abort
)		)

		if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
		add_entrypoint_object(
		malloc
		ALIAS
		DEPENDS
		.${LIBC_TARGET_OS}.malloc
		)

		add_entrypoint_object(
		free
		ALIAS
		DEPENDS
		.${LIBC_TARGET_OS}.free
		)
		endif()

libc/src/stdlib/free.h

This file was added.

				//===-- Implementation header for free --------------------------- C++ --===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//

				#include <stdlib.h>

				#ifndef LLVM_LIBC_SRC_STDLIB_FREE_H
				#define LLVM_LIBC_SRC_STDLIB_FREE_H

				namespace __llvm_libc {

				void free(void *ptr);

				} // namespace __llvm_libc

				#endif // LLVM_LIBC_SRC_STDLIB_LDIV_H

libc/src/stdlib/gpu/CMakeLists.txt

This file was added.

				add_entrypoint_object(
				malloc
				SRCS
				malloc.cpp
				HDRS
				../malloc.h
				DEPENDS
				libc.include.stdlib
				libc.src.__support.RPC.rpc_client
				)

				add_entrypoint_object(
				free
				SRCS
				free.cpp
				HDRS
				../free.h
				DEPENDS
				libc.include.stdlib
				libc.src.__support.RPC.rpc_client
				)

libc/src/stdlib/gpu/free.cpp

This file was added.

				//===-- GPU Implementation of free ----------------------------------------===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//

				#include "src/stdlib/free.h"
				#include "src/__support/RPC/rpc_client.h"
				#include "src/__support/common.h"

				namespace __llvm_libc {

				LLVM_LIBC_FUNCTION(void, free, (void *ptr)) {
				rpc::Client::Port port = rpc::client.open<rpc::FREE>();
				port.send([=](rpc::Buffer *buffer) {
				buffer->data[0] = reinterpret_cast<uintptr_t>(ptr);
				});
				port.close();
				}

				} // namespace __llvm_libc

libc/src/stdlib/gpu/malloc.cpp

This file was added.

				//===-- GPU Implementation of malloc --------------------------------------===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//

				#include "src/stdlib/malloc.h"
				#include "src/__support/RPC/rpc_client.h"
				#include "src/__support/common.h"

				namespace __llvm_libc {

				LLVM_LIBC_FUNCTION(void *, malloc, (size_t size)) {
				void *ptr = nullptr;
				rpc::Client::Port port = rpc::client.open<rpc::MALLOC>();
				port.send_and_recv([=](rpc::Buffer *buffer) { buffer->data[0] = size; },
				[&](rpc::Buffer *buffer) {
				ptr = reinterpret_cast<void *>(buffer->data[0]);
				});
				port.close();
				return ptr;
				}

				} // namespace __llvm_libc

libc/src/stdlib/malloc.h

This file was added.

				//===-- Implementation header for malloc ------------------------- C++ --===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//

				#include <stdlib.h>

				#ifndef LLVM_LIBC_SRC_STDLIB_MALLOC_H
				#define LLVM_LIBC_SRC_STDLIB_MALLOC_H

				namespace __llvm_libc {

				void *malloc(size_t size);

				} // namespace __llvm_libc

				#endif // LLVM_LIBC_SRC_STDLIB_LDIV_H

libc/test/src/stdlib/CMakeLists.txt

Show First 20 Lines • Show All 314 Lines • ▼ Show 20 Lines	add_libc_test(
DEPENDS		DEPENDS
libc.include.stdlib		libc.include.stdlib
libc.include.signal		libc.include.signal
libc.src.stdlib.abort		libc.src.stdlib.abort
libc.src.stdlib._Exit		libc.src.stdlib._Exit
libc.src.signal.raise		libc.src.signal.raise
)		)

		# Only the GPU has an in-tree 'malloc' implementation.
		if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
		sivachandraUnsubmitted Not Done Reply Inline Actions In the interest of eliminating unnecessary conditionals, will it fail on linux if you remove this conditional? sivachandra: In the interest of eliminating unnecessary conditionals, will it fail on linux if you remove…
		jhuber6AuthorUnsubmitted Done Reply Inline Actions I think so, because the hermetic tests use `malloc` and this uses `__llvm_libc::malloc` which isn't defined anywhere AFAIK. jhuber6: I think so, because the hermetic tests use `malloc` and this uses `__llvm_libc::malloc` which…
		sivachandraUnsubmitted Done Reply Inline Actions Ah, yes. sivachandra: Ah, yes.
		add_libc_test(
		malloc_test
		HERMETIC_TEST_ONLY
		SUITE
		libc-stdlib-tests
		SRCS
		malloc_test.cpp
		DEPENDS
		libc.include.stdlib
		libc.src.stdlib.malloc
		libc.src.stdlib.free
		)
		endif()
endif()		endif()

libc/test/src/stdlib/malloc_test.cpp

This file was added.

				//===-- Unittests for malloc ----------------------------------------------===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//

				#include "src/stdlib/free.h"
				#include "src/stdlib/malloc.h"
				#include "test/UnitTest/Test.h"

				TEST(LlvmLibcMallocTest, Allocate) {
				int ptr = reinterpret_cast<int >(__llvm_libc::malloc(sizeof(int)));
				EXPECT_NE(reinterpret_cast<void >(ptr), static_cast<void >(nullptr));
				*ptr = 1;
				jplehrUnsubmitted Done Reply Inline Actions Should we first assert that this is not `nullptr`? jplehr: Should we first assert that this is not `nullptr`?
				EXPECT_EQ(*ptr, 1);
				__llvm_libc::free(ptr);
				}

libc/utils/gpu/loader/CMakeLists.txt

	add_library(gpu_loader OBJECT Main.cpp)			add_library(gpu_loader OBJECT Main.cpp)
	target_include_directories(gpu_loader PUBLIC			target_include_directories(gpu_loader PUBLIC
	${CMAKE_CURRENT_SOURCE_DIR}			${CMAKE_CURRENT_SOURCE_DIR}
	${LIBC_SOURCE_DIR}			${LIBC_SOURCE_DIR}
	)			)

	find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)			find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
	if(hsa-runtime64_FOUND)			if(hsa-runtime64_FOUND)
	add_subdirectory(amdgpu)			add_subdirectory(amdgpu)
	else()			else()
	message(STATUS "Skipping HSA loader for gpu target, no HSA was detected")			message(STATUS "Skipping HSA loader for gpu target, no HSA was detected")
	endif()			endif()

	find_package(CUDAToolkit QUIET)			find_package(CUDAToolkit QUIET)
	# The CUDA loader requires LLVM to traverse the ELF image for symbols.			# The CUDA loader requires LLVM to traverse the ELF image for symbols.
	find_package(LLVM QUIET)			find_package(LLVM QUIET)
	if(CUDAToolkit_FOUND AND LLVM_FOUND)			if(CUDAToolkit_FOUND AND LLVM_FOUND AND
				${CUDAToolkit_VERSION} VERSION_GREATER_EQUAL "11.2")
	add_subdirectory(nvptx)			add_subdirectory(nvptx)
	else()			else()
				if(${CUDAToolkit_VERSION} VERSION_LESS "11.2")
				message(WARNING
				"Skipping CUDA loader for gpu target, CUDA must be version 11.2 or later.
				Found CUDA Version ${CUDAToolkit_VERSION}")
				else()
	message(STATUS "Skipping CUDA loader for gpu target, no CUDA was detected")			message(STATUS "Skipping CUDA loader for gpu target, no CUDA was detected")
	endif()			endif()
				endif()

	# Add a custom target to be used for testing.			# Add a custom target to be used for testing.
	if(TARGET amdhsa_loader AND LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU)			if(TARGET amdhsa_loader AND LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU)
	add_custom_target(libc.utils.gpu.loader)			add_custom_target(libc.utils.gpu.loader)
	add_dependencies(libc.utils.gpu.loader amdhsa_loader)			add_dependencies(libc.utils.gpu.loader amdhsa_loader)
	set_target_properties(			set_target_properties(
	libc.utils.gpu.loader			libc.utils.gpu.loader
	PROPERTIES			PROPERTIES
	Show All 11 Lines

libc/utils/gpu/loader/Server.h

Show All 15 Lines

#include <stddef.h>

#include "src/__support/RPC/rpc.h"

static __llvm_libc::rpc::Server server;

/// Queries the RPC client at least once and performs server-side work if there

/// are any active requests.

void handle_server() {

template <typename Alloc, typename Dealloc>

void handle_server(Alloc allocator, Dealloc deallocator) {

jplehrUnsubmitted

Done

template <typename Alloc, typename Dealloc>

- void handle_server(Alloc alloctor, Dealloc deallocator) {

+ void handle_server(Alloc allocator, Dealloc deallocator) {

using namespace __llvm_libc;

typo

jplehr: typo

using namespace __llvm_libc;

// Continue servicing the client until there is no work left and we return.

for (;;) {

auto port = server.try_open();

if (!port)

return;

Show All 12 Lines

case rpc::Opcode::PRINT_TO_STDERR: {

break;

}

case rpc::Opcode::EXIT: {

port->recv([](rpc::Buffer *buffer) {

exit(reinterpret_cast<uint32_t *>(buffer->data)[0]);

});

break;

}

case rpc::Opcode::MALLOC: {

port->recv_and_send([&](rpc::Buffer *buffer) {

buffer->data[0] =

reinterpret_cast<uintptr_t>(allocator(buffer->data[0]));

});

break;

}

case rpc::Opcode::FREE: {

port->recv([&](rpc::Buffer *buffer) {

deallocator(reinterpret_cast<void *>(buffer->data[0]));

});

break;

}

case rpc::Opcode::TEST_INCREMENT: {

port->recv_and_send([](rpc::Buffer *buffer) {

reinterpret_cast<uint64_t *>(buffer->data)[0] += 1;

});

break;

}

case rpc::Opcode::TEST_INTERFACE: {

uint64_t cnt = 0;

Show All 36 Lines

libc/utils/gpu/loader/amdgpu/Loader.cpp

Show First 20 Lines • Show All 128 Lines • ▼ Show 20 Lines	auto cb = [&](hsa_amd_memory_pool_t memory_pool) {
return HSA_STATUS_SUCCESS;		return HSA_STATUS_SUCCESS;
};		};
return iterate_agent_memory_pools(agent, cb);		return iterate_agent_memory_pools(agent, cb);
}		}

template <typename args_t>		template <typename args_t>
hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,		hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
hsa_amd_memory_pool_t kernargs_pool,		hsa_amd_memory_pool_t kernargs_pool,
		hsa_amd_memory_pool_t coarsegrained_pool,
hsa_queue_t *queue, const LaunchParameters &params,		hsa_queue_t *queue, const LaunchParameters &params,
const char *kernel_name, args_t kernel_args) {		const char *kernel_name, args_t kernel_args) {
// Look up the '_start' kernel in the loaded executable.		// Look up the '_start' kernel in the loaded executable.
hsa_executable_symbol_t symbol;		hsa_executable_symbol_t symbol;
if (hsa_status_t err = hsa_executable_get_symbol_by_name(		if (hsa_status_t err = hsa_executable_get_symbol_by_name(
executable, kernel_name, &dev_agent, &symbol))		executable, kernel_name, &dev_agent, &symbol))
return err;		return err;

		auto allocator = [&](uint64_t size) -> void * {
		void *dev_ptr = nullptr;
		if (hsa_status_t err =
		hsa_amd_memory_pool_allocate(coarsegrained_pool, size,
		/flags=/0, &dev_ptr))
		handle_error(err);
		hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_ptr);
		return dev_ptr;
		};

		auto deallocator = [](void *ptr) -> void {
		if (hsa_status_t err = hsa_amd_memory_pool_free(ptr))
		handle_error(err);
		};

// Retrieve different properties of the kernel symbol used for launch.		// Retrieve different properties of the kernel symbol used for launch.
uint64_t kernel;		uint64_t kernel;
uint32_t args_size;		uint32_t args_size;
uint32_t group_size;		uint32_t group_size;
uint32_t private_size;		uint32_t private_size;

std::pair<hsa_executable_symbol_info_t, void *> symbol_infos[] = {		std::pair<hsa_executable_symbol_info_t, void *> symbol_infos[] = {
{HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel},		{HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel},
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines	hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
__atomic_store_n(&packet->header, header \| (setup << 16), __ATOMIC_RELEASE);		__atomic_store_n(&packet->header, header \| (setup << 16), __ATOMIC_RELEASE);
hsa_signal_store_relaxed(queue->doorbell_signal, packet_id);		hsa_signal_store_relaxed(queue->doorbell_signal, packet_id);

// Wait until the kernel has completed execution on the device. Periodically		// Wait until the kernel has completed execution on the device. Periodically
// check the RPC client for work to be performed on the server.		// check the RPC client for work to be performed on the server.
while (hsa_signal_wait_scacquire(		while (hsa_signal_wait_scacquire(
packet->completion_signal, HSA_SIGNAL_CONDITION_EQ, 0,		packet->completion_signal, HSA_SIGNAL_CONDITION_EQ, 0,
/timeout_hint=/1024, HSA_WAIT_STATE_ACTIVE) != 0)		/timeout_hint=/1024, HSA_WAIT_STATE_ACTIVE) != 0)
handle_server();		handle_server(allocator, deallocator);

// Handle the server one more time in case the kernel exited with a pending		// Handle the server one more time in case the kernel exited with a pending
// send still in flight.		// send still in flight.
handle_server();		handle_server(allocator, deallocator);

// Destroy the resources acquired to launch the kernel and return.		// Destroy the resources acquired to launch the kernel and return.
if (hsa_status_t err = hsa_amd_memory_pool_free(args))		if (hsa_status_t err = hsa_amd_memory_pool_free(args))
handle_error(err);		handle_error(err);
if (hsa_status_t err = hsa_signal_destroy(packet->completion_signal))		if (hsa_status_t err = hsa_signal_destroy(packet->completion_signal))
handle_error(err);		handle_error(err);

return HSA_STATUS_SUCCESS;		return HSA_STATUS_SUCCESS;
▲ Show 20 Lines • Show All 126 Lines • ▼ Show 20 Lines	int load(int argc, char argv, char envp, void *image, size_t size,
hsa_queue_t *queue = nullptr;		hsa_queue_t *queue = nullptr;
if (hsa_status_t err =		if (hsa_status_t err =
hsa_queue_create(dev_agent, queue_size, HSA_QUEUE_TYPE_MULTI, nullptr,		hsa_queue_create(dev_agent, queue_size, HSA_QUEUE_TYPE_MULTI, nullptr,
nullptr, UINT32_MAX, UINT32_MAX, &queue))		nullptr, UINT32_MAX, UINT32_MAX, &queue))
handle_error(err);		handle_error(err);

LaunchParameters single_threaded_params = {1, 1, 1, 1, 1, 1};		LaunchParameters single_threaded_params = {1, 1, 1, 1, 1, 1};
begin_args_t init_args = {argc, dev_argv, dev_envp, rpc_shared_buffer};		begin_args_t init_args = {argc, dev_argv, dev_envp, rpc_shared_buffer};
if (hsa_status_t err =		if (hsa_status_t err = launch_kernel(
launch_kernel(dev_agent, executable, kernargs_pool, queue,		dev_agent, executable, kernargs_pool, coarsegrained_pool, queue,
single_threaded_params, "_begin.kd", init_args))		single_threaded_params, "_begin.kd", init_args))
handle_error(err);		handle_error(err);

start_args_t args = {argc, dev_argv, dev_envp, dev_ret};		start_args_t args = {argc, dev_argv, dev_envp, dev_ret};
if (hsa_status_t err = launch_kernel(dev_agent, executable, kernargs_pool,		if (hsa_status_t err =
queue, params, "_start.kd", args))		launch_kernel(dev_agent, executable, kernargs_pool,
		coarsegrained_pool, queue, params, "_start.kd", args))
handle_error(err);		handle_error(err);

// Create a memory signal and copy the return value back from the device into		// Create a memory signal and copy the return value back from the device into
// a new buffer.		// a new buffer.
hsa_signal_t memory_signal;		hsa_signal_t memory_signal;
if (hsa_status_t err = hsa_signal_create(1, 0, nullptr, &memory_signal))		if (hsa_status_t err = hsa_signal_create(1, 0, nullptr, &memory_signal))
handle_error(err);		handle_error(err);

Show All 12 Lines	int load(int argc, char argv, char envp, void *image, size_t size,
while (hsa_signal_wait_scacquire(memory_signal, HSA_SIGNAL_CONDITION_EQ, 0,		while (hsa_signal_wait_scacquire(memory_signal, HSA_SIGNAL_CONDITION_EQ, 0,
UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0)		UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0)
;		;

// Save the return value and perform basic clean-up.		// Save the return value and perform basic clean-up.
int ret = static_cast<int >(host_ret);		int ret = static_cast<int >(host_ret);

end_args_t fini_args = {ret};		end_args_t fini_args = {ret};
if (hsa_status_t err =		if (hsa_status_t err = launch_kernel(
launch_kernel(dev_agent, executable, kernargs_pool, queue,		dev_agent, executable, kernargs_pool, coarsegrained_pool, queue,
single_threaded_params, "_end.kd", fini_args))		single_threaded_params, "_end.kd", fini_args))
handle_error(err);		handle_error(err);

// Free the memory allocated for the device.		// Free the memory allocated for the device.
if (hsa_status_t err = hsa_amd_memory_pool_free(dev_argv))		if (hsa_status_t err = hsa_amd_memory_pool_free(dev_argv))
handle_error(err);		handle_error(err);
if (hsa_status_t err = hsa_amd_memory_pool_free(dev_ret))		if (hsa_status_t err = hsa_amd_memory_pool_free(dev_ret))
handle_error(err);		handle_error(err);
if (hsa_status_t err = hsa_amd_memory_pool_free(rpc_shared_buffer))		if (hsa_status_t err = hsa_amd_memory_pool_free(rpc_shared_buffer))
Show All 20 Lines

libc/utils/gpu/loader/nvptx/Loader.cpp

Show First 20 Lines • Show All 168 Lines • ▼ Show 20 Lines	if (CUresult err = cuModuleGetFunction(&function, binary, kernel_name))
handle_error(err);		handle_error(err);

// Set up the arguments to the '_start' kernel on the GPU.		// Set up the arguments to the '_start' kernel on the GPU.
uint64_t args_size = sizeof(args_t);		uint64_t args_size = sizeof(args_t);
void *args_config[] = {CU_LAUNCH_PARAM_BUFFER_POINTER, &kernel_args,		void *args_config[] = {CU_LAUNCH_PARAM_BUFFER_POINTER, &kernel_args,
CU_LAUNCH_PARAM_BUFFER_SIZE, &args_size,		CU_LAUNCH_PARAM_BUFFER_SIZE, &args_size,
CU_LAUNCH_PARAM_END};		CU_LAUNCH_PARAM_END};

		// Initialize a non-blocking CUDA stream to allocate memory if needed. This
		// needs to be done on a separate stream or else it will deadlock with the
		// executing kernel.
		CUstream memory_stream;
		if (CUresult err = cuStreamCreate(&memory_stream, CU_STREAM_NON_BLOCKING))
		handle_error(err);

		auto allocator = [&](uint64_t size) -> void * {
		CUdeviceptr dev_ptr;
		if (CUresult err = cuMemAllocAsync(&dev_ptr, size, memory_stream))
		handle_error(err);

		// Wait until the memory allocation is complete.
		while (cuStreamQuery(memory_stream) == CUDA_ERROR_NOT_READY)
		;
		return reinterpret_cast<void *>(dev_ptr);
		};
		auto deallocator = [&](void *ptr) -> void {
		if (CUresult err =
		cuMemFreeAsync(reinterpret_cast<CUdeviceptr>(ptr), memory_stream))
		handle_error(err);
		};

// Call the kernel with the given arguments.		// Call the kernel with the given arguments.
if (CUresult err = cuLaunchKernel(		if (CUresult err = cuLaunchKernel(
function, params.num_blocks_x, params.num_blocks_y,		function, params.num_blocks_x, params.num_blocks_y,
params.num_blocks_z, params.num_threads_x, params.num_threads_y,		params.num_blocks_z, params.num_threads_x, params.num_threads_y,
params.num_threads_z, 0, stream, nullptr, args_config))		params.num_threads_z, 0, stream, nullptr, args_config))
handle_error(err);		handle_error(err);

// Wait until the kernel has completed execution on the device. Periodically		// Wait until the kernel has completed execution on the device. Periodically
// check the RPC client for work to be performed on the server.		// check the RPC client for work to be performed on the server.
while (cuStreamQuery(stream) == CUDA_ERROR_NOT_READY)		while (cuStreamQuery(stream) == CUDA_ERROR_NOT_READY)
handle_server();		handle_server(allocator, deallocator);

// Handle the server one more time in case the kernel exited with a pending		// Handle the server one more time in case the kernel exited with a pending
// send still in flight.		// send still in flight.
handle_server();		handle_server(allocator, deallocator);

return CUDA_SUCCESS;		return CUDA_SUCCESS;
}		}

int load(int argc, char argv, char envp, void *image, size_t size,		int load(int argc, char argv, char envp, void *image, size_t size,
const LaunchParameters &params) {		const LaunchParameters &params) {

if (CUresult err = cuInit(0))		if (CUresult err = cuInit(0))
▲ Show 20 Lines • Show All 108 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[libc] Implement basic `malloc` and `free` support on the GPU
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 528613

libc/src/__support/RPC/rpc.h

libc/src/stdlib/CMakeLists.txt

libc/src/stdlib/free.h

libc/src/stdlib/gpu/CMakeLists.txt

libc/src/stdlib/gpu/free.cpp

libc/src/stdlib/gpu/malloc.cpp

libc/src/stdlib/malloc.h

libc/test/src/stdlib/CMakeLists.txt

libc/test/src/stdlib/malloc_test.cpp

libc/utils/gpu/loader/CMakeLists.txt

libc/utils/gpu/loader/Server.h

libc/utils/gpu/loader/amdgpu/Loader.cpp

libc/utils/gpu/loader/nvptx/Loader.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[libc] Implement basic `malloc` and `free` support on the GPUClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 528613

libc/src/__support/RPC/rpc.h

libc/src/stdlib/CMakeLists.txt

libc/src/stdlib/free.h

libc/src/stdlib/gpu/CMakeLists.txt

libc/src/stdlib/gpu/free.cpp

libc/src/stdlib/gpu/malloc.cpp

libc/src/stdlib/malloc.h

libc/test/src/stdlib/CMakeLists.txt

libc/test/src/stdlib/malloc_test.cpp

libc/utils/gpu/loader/CMakeLists.txt

libc/utils/gpu/loader/Server.h

libc/utils/gpu/loader/amdgpu/Loader.cpp

libc/utils/gpu/loader/nvptx/Loader.cpp

[libc] Implement basic `malloc` and `free` support on the GPU
ClosedPublic