Diff 552752

libc/src/__support/RPC/rpc_util.h

	Show All 11 Lines
	#include "src/__support/CPP/type_traits.h"			#include "src/__support/CPP/type_traits.h"
	#include "src/__support/GPU/utils.h"			#include "src/__support/GPU/utils.h"
	#include "src/__support/macros/attributes.h" // LIBC_INLINE			#include "src/__support/macros/attributes.h" // LIBC_INLINE
	#include "src/__support/macros/properties/architectures.h"			#include "src/__support/macros/properties/architectures.h"

	namespace __llvm_libc {			namespace __llvm_libc {
	namespace rpc {			namespace rpc {

	/// Maximum amount of data a single lane can use.
	constexpr uint64_t MAX_LANE_SIZE = 64;

	/// Suspend the thread briefly to assist the thread scheduler during busy loops.			/// Suspend the thread briefly to assist the thread scheduler during busy loops.
	LIBC_INLINE void sleep_briefly() {			LIBC_INLINE void sleep_briefly() {
	#if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700			#if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700
	LIBC_INLINE_ASM("nanosleep.u32 64;" ::: "memory");			LIBC_INLINE_ASM("nanosleep.u32 64;" ::: "memory");
	#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)			#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
	__builtin_amdgcn_s_sleep(2);			__builtin_amdgcn_s_sleep(2);
	#elif defined(LIBC_TARGET_ARCH_IS_X86)			#elif defined(LIBC_TARGET_ARCH_IS_X86)
	__builtin_ia32_pause();			__builtin_ia32_pause();
	▲ Show 20 Lines • Show All 53 Lines • Show Last 20 Lines

libc/utils/gpu/loader/Loader.h

Show First 20 Lines • Show All 101 Lines • ▼ Show 20 Lines	inline void handle_error(const char *msg) {
fprintf(stderr, "%s\n", msg);		fprintf(stderr, "%s\n", msg);
exit(EXIT_FAILURE);		exit(EXIT_FAILURE);
}		}

inline void handle_error(rpc_status_t) {		inline void handle_error(rpc_status_t) {
handle_error("Failure in the RPC server\n");		handle_error("Failure in the RPC server\n");
}		}

		template <uint32_t lane_size>
inline void register_rpc_callbacks(uint32_t device_id) {		inline void register_rpc_callbacks(uint32_t device_id) {
		static_assert(lane_size == 32 && lane_size == 64, "Invalid Lane size");
		JonChesterfieldUnsubmitted Not Done Reply Inline Actions This will never pass JonChesterfield: This will never pass
// Register the ping test for the `libc` tests.		// Register the ping test for the `libc` tests.
rpc_register_callback(		rpc_register_callback(
		JonChesterfieldUnsubmitted Not Done Reply Inline Actions Static assert that it's 32 or 64 here, or maybe go so far as a valid-for-target call? JonChesterfield: Static assert that it's 32 or 64 here, or maybe go so far as a valid-for-target call?
device_id, static_cast<rpc_opcode_t>(RPC_TEST_INCREMENT),		device_id, static_cast<rpc_opcode_t>(RPC_TEST_INCREMENT),
[](rpc_port_t port, void *data) {		[](rpc_port_t port, void *data) {
rpc_recv_and_send(		rpc_recv_and_send(
port,		port,
[](rpc_buffer_t buffer, void data) {		[](rpc_buffer_t buffer, void data) {
reinterpret_cast<uint64_t *>(buffer->data)[0] += 1;		reinterpret_cast<uint64_t *>(buffer->data)[0] += 1;
},		},
data);		data);
▲ Show 20 Lines • Show All 81 Lines • ▼ Show 20 Lines	rpc_register_callback(
&cnt);		&cnt);
},		},
nullptr);		nullptr);

// Register the stream test handler.		// Register the stream test handler.
rpc_register_callback(		rpc_register_callback(
device_id, static_cast<rpc_opcode_t>(RPC_TEST_STREAM),		device_id, static_cast<rpc_opcode_t>(RPC_TEST_STREAM),
[](rpc_port_t port, void *data) {		[](rpc_port_t port, void *data) {
uint64_t sizes[RPC_MAXIMUM_LANE_SIZE] = {0};		uint64_t sizes[lane_size] = {0};
void *dst[RPC_MAXIMUM_LANE_SIZE] = {nullptr};		void *dst[lane_size] = {nullptr};
rpc_recv_n(		rpc_recv_n(
port, dst, sizes,		port, dst, sizes,
[](uint64_t size, void ) -> void { return new char[size]; },		[](uint64_t size, void ) -> void { return new char[size]; },
nullptr);		nullptr);
rpc_send_n(port, dst, sizes);		rpc_send_n(port, dst, sizes);
for (uint64_t i = 0; i < RPC_MAXIMUM_LANE_SIZE; ++i) {		for (uint64_t i = 0; i < lane_size; ++i) {
if (dst[i])		if (dst[i])
delete[] reinterpret_cast<uint8_t *>(dst[i]);		delete[] reinterpret_cast<uint8_t *>(dst[i]);
}		}
},		},
nullptr);		nullptr);
}		}

#endif		#endif

libc/utils/gpu/loader/amdgpu/Loader.cpp

Show First 20 Lines • Show All 144 Lines • ▼ Show 20 Lines	hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
// Look up the '_start' kernel in the loaded executable.		// Look up the '_start' kernel in the loaded executable.
hsa_executable_symbol_t symbol;		hsa_executable_symbol_t symbol;
if (hsa_status_t err = hsa_executable_get_symbol_by_name(		if (hsa_status_t err = hsa_executable_get_symbol_by_name(
executable, kernel_name, &dev_agent, &symbol))		executable, kernel_name, &dev_agent, &symbol))
return err;		return err;

// Register RPC callbacks for the malloc and free functions on HSA.		// Register RPC callbacks for the malloc and free functions on HSA.
uint32_t device_id = 0;		uint32_t device_id = 0;
register_rpc_callbacks(device_id);

auto tuple = std::make_tuple(dev_agent, coarsegrained_pool);		auto tuple = std::make_tuple(dev_agent, coarsegrained_pool);
rpc_register_callback(		rpc_register_callback(
device_id, RPC_MALLOC,		device_id, RPC_MALLOC,
[](rpc_port_t port, void *data) {		[](rpc_port_t port, void *data) {
auto malloc_handler = [](rpc_buffer_t buffer, void data) -> void {		auto malloc_handler = [](rpc_buffer_t buffer, void data) -> void {
auto &[dev_agent, pool] = static_cast<decltype(tuple) >(data);		auto &[dev_agent, pool] = static_cast<decltype(tuple) >(data);
uint64_t size = buffer->data[0];		uint64_t size = buffer->data[0];
void *dev_ptr = nullptr;		void *dev_ptr = nullptr;
▲ Show 20 Lines • Show All 256 Lines • ▼ Show 20 Lines	if (hsa_status_t err = hsa_amd_memory_pool_allocate(finegrained_pool, size,
handle_error(err);		handle_error(err);
hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_ptr);		hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_ptr);
return dev_ptr;		return dev_ptr;
};		};
if (rpc_status_t err = rpc_server_init(device_id, RPC_MAXIMUM_PORT_COUNT,		if (rpc_status_t err = rpc_server_init(device_id, RPC_MAXIMUM_PORT_COUNT,
wavefront_size, rpc_alloc, &tuple))		wavefront_size, rpc_alloc, &tuple))
handle_error(err);		handle_error(err);

		// Register callbacks for the RPC unit tests.
		JonChesterfieldUnsubmitted Not Done Reply Inline Actions handle_error if it isn't 32 or 64 please. Maybe a switch on size with handle_error in the default? JonChesterfield: handle_error if it isn't 32 or 64 please. Maybe a switch on size with handle_error in the…
		if (wavefront_size == 32)
		register_rpc_callbacks<32>(device_id);
		else if (wavefront_size == 64)
		register_rpc_callbacks<64>(device_id);
		else
		handle_error("Invalid wavefront size");

// Obtain the GPU's fixed-frequency clock rate and copy it to the GPU.		// Obtain the GPU's fixed-frequency clock rate and copy it to the GPU.
// If the clock_freq symbol is missing, no work to do.		// If the clock_freq symbol is missing, no work to do.
hsa_executable_symbol_t freq_sym;		hsa_executable_symbol_t freq_sym;
if (HSA_STATUS_SUCCESS ==		if (HSA_STATUS_SUCCESS ==
hsa_executable_get_symbol_by_name(executable, "__llvm_libc_clock_freq",		hsa_executable_get_symbol_by_name(executable, "__llvm_libc_clock_freq",
&dev_agent, &freq_sym)) {		&dev_agent, &freq_sym)) {

void *host_clock_freq;		void *host_clock_freq;
▲ Show 20 Lines • Show All 98 Lines • Show Last 20 Lines

libc/utils/gpu/loader/nvptx/Loader.cpp

Show First 20 Lines • Show All 171 Lines • ▼ Show 20 Lines	CUresult launch_kernel(CUmodule binary, CUstream stream,
// needs to be done on a separate stream or else it will deadlock with the		// needs to be done on a separate stream or else it will deadlock with the
// executing kernel.		// executing kernel.
CUstream memory_stream;		CUstream memory_stream;
if (CUresult err = cuStreamCreate(&memory_stream, CU_STREAM_NON_BLOCKING))		if (CUresult err = cuStreamCreate(&memory_stream, CU_STREAM_NON_BLOCKING))
handle_error(err);		handle_error(err);

// Register RPC callbacks for the malloc and free functions on HSA.		// Register RPC callbacks for the malloc and free functions on HSA.
uint32_t device_id = 0;		uint32_t device_id = 0;
register_rpc_callbacks(device_id);		register_rpc_callbacks<32>(device_id);

rpc_register_callback(		rpc_register_callback(
device_id, RPC_MALLOC,		device_id, RPC_MALLOC,
[](rpc_port_t port, void *data) {		[](rpc_port_t port, void *data) {
auto malloc_handler = [](rpc_buffer_t buffer, void data) -> void {		auto malloc_handler = [](rpc_buffer_t buffer, void data) -> void {
CUstream memory_stream = static_cast<CUstream >(data);		CUstream memory_stream = static_cast<CUstream >(data);
uint64_t size = buffer->data[0];		uint64_t size = buffer->data[0];
CUdeviceptr dev_ptr;		CUdeviceptr dev_ptr;
▲ Show 20 Lines • Show All 169 Lines • Show Last 20 Lines

libc/utils/gpu/server/rpc_server.h

	Show All 14 Lines

	#ifdef __cplusplus			#ifdef __cplusplus
	extern "C" {			extern "C" {
	#endif			#endif

	/// The maxium number of ports that can be opened for any server.			/// The maxium number of ports that can be opened for any server.
	const uint64_t RPC_MAXIMUM_PORT_COUNT = 512;			const uint64_t RPC_MAXIMUM_PORT_COUNT = 512;

	/// The maximum number of parallel lanes that we can support.
	const uint64_t RPC_MAXIMUM_LANE_SIZE = 64;

	/// The symbol name associated with the client for use with the LLVM C library			/// The symbol name associated with the client for use with the LLVM C library
	/// implementation.			/// implementation.
	const char *const rpc_client_symbol_name = "__llvm_libc_rpc_client";			const char *const rpc_client_symbol_name = "__llvm_libc_rpc_client";

	/// status codes.			/// status codes.
	typedef enum {			typedef enum {
	RPC_STATUS_SUCCESS = 0x0,			RPC_STATUS_SUCCESS = 0x0,
	RPC_STATUS_CONTINUE = 0x1,			RPC_STATUS_CONTINUE = 0x1,
	▲ Show 20 Lines • Show All 94 Lines • Show Last 20 Lines

libc/utils/gpu/server/rpc_server.cpp

Show All 20 Lines
using namespace __llvm_libc;		using namespace __llvm_libc;

static_assert(sizeof(rpc_buffer_t) == sizeof(rpc::Buffer),		static_assert(sizeof(rpc_buffer_t) == sizeof(rpc::Buffer),
"Buffer size mismatch");		"Buffer size mismatch");

static_assert(RPC_MAXIMUM_PORT_COUNT == rpc::MAX_PORT_COUNT,		static_assert(RPC_MAXIMUM_PORT_COUNT == rpc::MAX_PORT_COUNT,
"Incorrect maximum port count");		"Incorrect maximum port count");

static_assert(RPC_MAXIMUM_LANE_SIZE == rpc::MAX_LANE_SIZE,
"Incorrect maximum port count");

// The client needs to support different lane sizes for the SIMT model. Because		// The client needs to support different lane sizes for the SIMT model. Because
// of this we need to select between the possible sizes that the client can use.		// of this we need to select between the possible sizes that the client can use.
struct Server {		struct Server {
template <uint32_t lane_size>		template <uint32_t lane_size>
Server(std::unique_ptr<rpc::Server<lane_size>> &&server)		Server(std::unique_ptr<rpc::Server<lane_size>> &&server)
: server(std::move(server)) {}		: server(std::move(server)) {}

void reset(uint64_t port_count, void *buffer) {		void reset(uint64_t port_count, void *buffer) {
Show All 35 Lines	rpc_status_t handle_server(
auto port = server.try_open();		auto port = server.try_open();
if (!port)		if (!port)
return RPC_STATUS_SUCCESS;		return RPC_STATUS_SUCCESS;

switch (port->get_opcode()) {		switch (port->get_opcode()) {
case RPC_WRITE_TO_STREAM:		case RPC_WRITE_TO_STREAM:
case RPC_WRITE_TO_STDERR:		case RPC_WRITE_TO_STDERR:
case RPC_WRITE_TO_STDOUT: {		case RPC_WRITE_TO_STDOUT: {
uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};		uint64_t sizes[lane_size] = {0};
void *strs[rpc::MAX_LANE_SIZE] = {nullptr};		void *strs[lane_size] = {nullptr};
FILE *files[rpc::MAX_LANE_SIZE] = {nullptr};		FILE *files[lane_size] = {nullptr};
if (port->get_opcode() == RPC_WRITE_TO_STREAM)		if (port->get_opcode() == RPC_WRITE_TO_STREAM)
port->recv([&](rpc::Buffer *buffer, uint32_t id) {		port->recv([&](rpc::Buffer *buffer, uint32_t id) {
files[id] = reinterpret_cast<FILE *>(buffer->data[0]);		files[id] = reinterpret_cast<FILE *>(buffer->data[0]);
});		});
port->recv_n(strs, sizes, [&](uint64_t size) { return new char[size]; });		port->recv_n(strs, sizes, [&](uint64_t size) { return new char[size]; });
port->send([&](rpc::Buffer *buffer, uint32_t id) {		port->send([&](rpc::Buffer *buffer, uint32_t id) {
FILE *file =		FILE *file =
port->get_opcode() == RPC_WRITE_TO_STDOUT		port->get_opcode() == RPC_WRITE_TO_STDOUT
? stdout		? stdout
: (port->get_opcode() == RPC_WRITE_TO_STDERR ? stderr		: (port->get_opcode() == RPC_WRITE_TO_STDERR ? stderr
: files[id]);		: files[id]);
uint64_t ret = fwrite(strs[id], 1, sizes[id], file);		uint64_t ret = fwrite(strs[id], 1, sizes[id], file);
std::memcpy(buffer->data, &ret, sizeof(uint64_t));		std::memcpy(buffer->data, &ret, sizeof(uint64_t));
		delete[] reinterpret_cast<uint8_t *>(strs[id]);
		JonChesterfieldUnsubmitted Not Done Reply Inline Actions Was a functional change intended here? JonChesterfield: Was a functional change intended here?
		jhuber6AuthorUnsubmitted Done Reply Inline Actions This isn't technically a functional change, since this callback automatically uses the current thread mask I could put this part in here instead which serves the purpose of getting rid of the invocation of the lane size. jhuber6: This isn't technically a functional change, since this callback automatically uses the current…
});		});
for (uint64_t i = 0; i < rpc::MAX_LANE_SIZE; ++i) {
if (strs[i])
delete[] reinterpret_cast<uint8_t *>(strs[i]);
}
break;		break;
}		}
case RPC_READ_FROM_STREAM:		case RPC_READ_FROM_STREAM:
case RPC_READ_FROM_STDIN: {		case RPC_READ_FROM_STDIN: {
uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};		uint64_t sizes[lane_size] = {0};
void *data[rpc::MAX_LANE_SIZE] = {nullptr};		void *data[lane_size] = {nullptr};
uint64_t rets[rpc::MAX_LANE_SIZE] = {0};		uint64_t rets[lane_size] = {0};
port->recv([&](rpc::Buffer *buffer, uint32_t id) {		port->recv([&](rpc::Buffer *buffer, uint32_t id) {
sizes[id] = buffer->data[0];		sizes[id] = buffer->data[0];
data[id] = new char[sizes[id]];		data[id] = new char[sizes[id]];
FILE *file = port->get_opcode() == RPC_READ_FROM_STREAM		FILE *file = port->get_opcode() == RPC_READ_FROM_STREAM
? reinterpret_cast<FILE *>(buffer->data[1])		? reinterpret_cast<FILE *>(buffer->data[1])
: stdin;		: stdin;
rets[id] = fread(data[id], 1, sizes[id], file);		rets[id] = fread(data[id], 1, sizes[id], file);
});		});
port->send_n(data, sizes);		port->send_n(data, sizes);
port->send([&](rpc::Buffer *buffer, uint32_t id) {		port->send([&](rpc::Buffer *buffer, uint32_t id) {
delete[] reinterpret_cast<uint8_t *>(data[id]);		delete[] reinterpret_cast<uint8_t *>(data[id]);
std::memcpy(buffer->data, &rets[id], sizeof(uint64_t));		std::memcpy(buffer->data, &rets[id], sizeof(uint64_t));
});		});
break;		break;
}		}
case RPC_OPEN_FILE: {		case RPC_OPEN_FILE: {
uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};		uint64_t sizes[lane_size] = {0};
void *paths[rpc::MAX_LANE_SIZE] = {nullptr};		void *paths[lane_size] = {nullptr};
port->recv_n(paths, sizes, [&](uint64_t size) { return new char[size]; });		port->recv_n(paths, sizes, [&](uint64_t size) { return new char[size]; });
port->recv_and_send([&](rpc::Buffer *buffer, uint32_t id) {		port->recv_and_send([&](rpc::Buffer *buffer, uint32_t id) {
FILE file = fopen(reinterpret_cast<char >(paths[id]),		FILE file = fopen(reinterpret_cast<char >(paths[id]),
reinterpret_cast<char *>(buffer->data));		reinterpret_cast<char *>(buffer->data));
buffer->data[0] = reinterpret_cast<uintptr_t>(file);		buffer->data[0] = reinterpret_cast<uintptr_t>(file);
});		});
break;		break;
}		}
Show All 10 Lines	case RPC_EXIT: {
port->recv([](rpc::Buffer *buffer) {		port->recv([](rpc::Buffer *buffer) {
int status = 0;		int status = 0;
std::memcpy(&status, buffer->data, sizeof(int));		std::memcpy(&status, buffer->data, sizeof(int));
exit(status);		exit(status);
});		});
break;		break;
}		}
case RPC_HOST_CALL: {		case RPC_HOST_CALL: {
uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};		uint64_t sizes[lane_size] = {0};
void *args[rpc::MAX_LANE_SIZE] = {nullptr};		void *args[lane_size] = {nullptr};
port->recv_n(args, sizes, [&](uint64_t size) { return new char[size]; });		port->recv_n(args, sizes, [&](uint64_t size) { return new char[size]; });
port->recv([&](rpc::Buffer *buffer, uint32_t id) {		port->recv([&](rpc::Buffer *buffer, uint32_t id) {
reinterpret_cast<void ()(void )>(buffer->data[0])(args[id]);		reinterpret_cast<void ()(void )>(buffer->data[0])(args[id]);
});		});
port->send([&](rpc::Buffer *, uint32_t id) {		port->send([&](rpc::Buffer *, uint32_t id) {
delete[] reinterpret_cast<uint8_t *>(args[id]);		delete[] reinterpret_cast<uint8_t *>(args[id]);
});		});
break;		break;
▲ Show 20 Lines • Show All 232 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[libc] Remove `MAX_LANE_SIZE` definition from the RPC server
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 552752

libc/src/__support/RPC/rpc_util.h

libc/utils/gpu/loader/Loader.h

libc/utils/gpu/loader/amdgpu/Loader.cpp

libc/utils/gpu/loader/nvptx/Loader.cpp

libc/utils/gpu/server/rpc_server.h

libc/utils/gpu/server/rpc_server.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[libc] Remove `MAX_LANE_SIZE` definition from the RPC serverClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 552752

libc/src/__support/RPC/rpc_util.h

libc/utils/gpu/loader/Loader.h

libc/utils/gpu/loader/amdgpu/Loader.cpp

libc/utils/gpu/loader/nvptx/Loader.cpp

libc/utils/gpu/server/rpc_server.h

libc/utils/gpu/server/rpc_server.cpp

[libc] Remove `MAX_LANE_SIZE` definition from the RPC server
ClosedPublic