diff --git a/libc/docs/gpu/rpc.rst b/libc/docs/gpu/rpc.rst --- a/libc/docs/gpu/rpc.rst +++ b/libc/docs/gpu/rpc.rst @@ -15,3 +15,10 @@ require support from the operating system. We instead implement a remote procedure call (RPC) interface to allow submitting work from the GPU to a host server that forwards it to the host system. + +Extensions +---------- + +We describe which operation the RPC server should take with a 16-bit opcode. We +consider the first 32768 numbers to be reserved while the others are free to +use. diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h --- a/libc/include/llvm-libc-types/rpc_opcodes_t.h +++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h @@ -20,11 +20,6 @@ RPC_MALLOC = 7, RPC_FREE = 8, RPC_HOST_CALL = 9, - // TODO: Move these out of here and handle then with custom handlers in the - // loader. - RPC_TEST_INCREMENT = 1000, - RPC_TEST_INTERFACE = 1001, - RPC_TEST_STREAM = 1002, } rpc_opcode_t; #endif // __LLVM_LIBC_TYPES_RPC_OPCODE_H__ diff --git a/libc/include/llvm-libc-types/test_rpc_opcodes_t.h b/libc/include/llvm-libc-types/test_rpc_opcodes_t.h new file mode 100644 --- /dev/null +++ b/libc/include/llvm-libc-types/test_rpc_opcodes_t.h @@ -0,0 +1,21 @@ +//===-- Definition of RPC opcodes used for internal tests -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __LLVM_LIBC_TYPES_TEST_RPC_OPCODE_H__ +#define __LLVM_LIBC_TYPES_TEST_RPC_OPCODE_H__ + +// We consider the first 32768 opcodes as reserved for libc purposes. We allow +// extensions to use any other number without conflicting with anything else. +typedef enum : unsigned short { + RPC_TEST_NOOP = 1 << 15, + RPC_TEST_INCREMENT, + RPC_TEST_INTERFACE, + RPC_TEST_STREAM, +} rpc_test_opcode_t; + +#endif // __LLVM_LIBC_TYPES_TEST_RPC_OPCODE_H__ diff --git a/libc/test/integration/startup/gpu/rpc_interface_test.cpp b/libc/test/integration/startup/gpu/rpc_interface_test.cpp --- a/libc/test/integration/startup/gpu/rpc_interface_test.cpp +++ b/libc/test/integration/startup/gpu/rpc_interface_test.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "include/llvm-libc-types/test_rpc_opcodes_t.h" #include "src/__support/GPU/utils.h" #include "src/__support/RPC/rpc_client.h" #include "test/IntegrationTest/test.h" diff --git a/libc/test/integration/startup/gpu/rpc_stream_test.cpp b/libc/test/integration/startup/gpu/rpc_stream_test.cpp --- a/libc/test/integration/startup/gpu/rpc_stream_test.cpp +++ b/libc/test/integration/startup/gpu/rpc_stream_test.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "include/llvm-libc-types/test_rpc_opcodes_t.h" #include "src/__support/GPU/utils.h" #include "src/__support/RPC/rpc_client.h" #include "src/__support/integer_to_string.h" diff --git a/libc/test/integration/startup/gpu/rpc_test.cpp b/libc/test/integration/startup/gpu/rpc_test.cpp --- a/libc/test/integration/startup/gpu/rpc_test.cpp +++ b/libc/test/integration/startup/gpu/rpc_test.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "include/llvm-libc-types/test_rpc_opcodes_t.h" #include "src/__support/GPU/utils.h" #include "src/__support/RPC/rpc_client.h" #include "test/IntegrationTest/test.h" diff --git a/libc/utils/gpu/loader/Loader.h b/libc/utils/gpu/loader/Loader.h --- a/libc/utils/gpu/loader/Loader.h +++ b/libc/utils/gpu/loader/Loader.h @@ -10,6 +10,9 @@ #define LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H #include "utils/gpu/server/rpc_server.h" + +#include "include/llvm-libc-types/test_rpc_opcodes_t.h" + #include #include #include @@ -104,4 +107,119 @@ handle_error("Failure in the RPC server\n"); } +inline void register_rpc_callbacks(uint32_t device_id) { + // Register the ping test for the `libc` tests. + rpc_register_callback( + device_id, static_cast(RPC_TEST_INCREMENT), + [](rpc_port_t port, void *data) { + rpc_recv_and_send( + port, + [](rpc_buffer_t *buffer, void *data) { + reinterpret_cast(buffer->data)[0] += 1; + }, + data); + }, + nullptr); + + // Register the interface test callbacks. + rpc_register_callback( + device_id, static_cast(RPC_TEST_INTERFACE), + [](rpc_port_t port, void *data) { + uint64_t cnt = 0; + bool end_with_recv; + rpc_recv( + port, + [](rpc_buffer_t *buffer, void *data) { + *reinterpret_cast(data) = buffer->data[0]; + }, + &end_with_recv); + rpc_recv( + port, + [](rpc_buffer_t *buffer, void *data) { + *reinterpret_cast(data) = buffer->data[0]; + }, + &cnt); + rpc_send( + port, + [](rpc_buffer_t *buffer, void *data) { + uint64_t &cnt = *reinterpret_cast(data); + buffer->data[0] = cnt = cnt + 1; + }, + &cnt); + rpc_recv( + port, + [](rpc_buffer_t *buffer, void *data) { + *reinterpret_cast(data) = buffer->data[0]; + }, + &cnt); + rpc_send( + port, + [](rpc_buffer_t *buffer, void *data) { + uint64_t &cnt = *reinterpret_cast(data); + buffer->data[0] = cnt = cnt + 1; + }, + &cnt); + rpc_recv( + port, + [](rpc_buffer_t *buffer, void *data) { + *reinterpret_cast(data) = buffer->data[0]; + }, + &cnt); + rpc_recv( + port, + [](rpc_buffer_t *buffer, void *data) { + *reinterpret_cast(data) = buffer->data[0]; + }, + &cnt); + rpc_send( + port, + [](rpc_buffer_t *buffer, void *data) { + uint64_t &cnt = *reinterpret_cast(data); + buffer->data[0] = cnt = cnt + 1; + }, + &cnt); + rpc_send( + port, + [](rpc_buffer_t *buffer, void *data) { + uint64_t &cnt = *reinterpret_cast(data); + buffer->data[0] = cnt = cnt + 1; + }, + &cnt); + if (end_with_recv) + rpc_recv( + port, + [](rpc_buffer_t *buffer, void *data) { + *reinterpret_cast(data) = buffer->data[0]; + }, + &cnt); + else + rpc_send( + port, + [](rpc_buffer_t *buffer, void *data) { + uint64_t &cnt = *reinterpret_cast(data); + buffer->data[0] = cnt = cnt + 1; + }, + &cnt); + }, + nullptr); + + // Register the stream test handler. + rpc_register_callback( + device_id, static_cast(RPC_TEST_STREAM), + [](rpc_port_t port, void *data) { + uint64_t sizes[RPC_MAXIMUM_LANE_SIZE] = {0}; + void *dst[RPC_MAXIMUM_LANE_SIZE] = {nullptr}; + rpc_recv_n( + port, dst, sizes, + [](uint64_t size, void *) -> void * { return new char[size]; }, + nullptr); + rpc_send_n(port, dst, sizes); + for (uint64_t i = 0; i < RPC_MAXIMUM_LANE_SIZE; ++i) { + if (dst[i]) + delete[] reinterpret_cast(dst[i]); + } + }, + nullptr); +} + #endif diff --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp --- a/libc/utils/gpu/loader/amdgpu/Loader.cpp +++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp @@ -150,6 +150,8 @@ // Register RPC callbacks for the malloc and free functions on HSA. uint32_t device_id = 0; + register_rpc_callbacks(device_id); + auto tuple = std::make_tuple(dev_agent, coarsegrained_pool); rpc_register_callback( device_id, RPC_MALLOC, diff --git a/libc/utils/gpu/loader/nvptx/Loader.cpp b/libc/utils/gpu/loader/nvptx/Loader.cpp --- a/libc/utils/gpu/loader/nvptx/Loader.cpp +++ b/libc/utils/gpu/loader/nvptx/Loader.cpp @@ -177,6 +177,8 @@ // Register RPC callbacks for the malloc and free functions on HSA. uint32_t device_id = 0; + register_rpc_callbacks(device_id); + rpc_register_callback( device_id, RPC_MALLOC, [](rpc_port_t port, void *data) { diff --git a/libc/utils/gpu/server/rpc_server.h b/libc/utils/gpu/server/rpc_server.h --- a/libc/utils/gpu/server/rpc_server.h +++ b/libc/utils/gpu/server/rpc_server.h @@ -20,9 +20,12 @@ /// The maxium number of ports that can be opened for any server. const uint64_t RPC_MAXIMUM_PORT_COUNT = 512; +/// The maximum number of parallel lanes that we can support. +const uint64_t RPC_MAXIMUM_LANE_SIZE = 64; + /// The symbol name associated with the client for use with the LLVM C library /// implementation. -inline const char *rpc_client_symbol_name = "__llvm_libc_rpc_client"; +const char *const rpc_client_symbol_name = "__llvm_libc_rpc_client"; /// status codes. typedef enum { @@ -100,9 +103,19 @@ /// Use the \p port to send a buffer using the \p callback. void rpc_send(rpc_port_t port, rpc_port_callback_ty callback, void *data); +/// Use the \p port to send \p bytes using the \p callback. The input is an +/// array of at least the configured lane size. +void rpc_send_n(rpc_port_t port, const void *const *src, uint64_t *size); + /// Use the \p port to recieve a buffer using the \p callback. void rpc_recv(rpc_port_t port, rpc_port_callback_ty callback, void *data); +/// Use the \p port to recieve \p bytes using the \p callback. The inputs is an +/// array of at least the configured lane size. The \p alloc function allocates +/// memory for the recieved bytes. +void rpc_recv_n(rpc_port_t port, void **dst, uint64_t *size, rpc_alloc_ty alloc, + void *data); + /// Use the \p port to receive and send a buffer using the \p callback. void rpc_recv_and_send(rpc_port_t port, rpc_port_callback_ty callback, void *data); diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp --- a/libc/utils/gpu/server/rpc_server.cpp +++ b/libc/utils/gpu/server/rpc_server.cpp @@ -26,6 +26,9 @@ static_assert(RPC_MAXIMUM_PORT_COUNT == rpc::MAX_PORT_COUNT, "Incorrect maximum port count"); +static_assert(RPC_MAXIMUM_LANE_SIZE == rpc::MAX_LANE_SIZE, + "Incorrect maximum port count"); + // The client needs to support different lane sizes for the SIMT model. Because // of this we need to select between the possible sizes that the client can use. struct Server { @@ -141,43 +144,6 @@ }); break; } - // TODO: Move handling of these test cases to the loader implementation. - case RPC_TEST_INCREMENT: { - port->recv_and_send([](rpc::Buffer *buffer) { - reinterpret_cast(buffer->data)[0] += 1; - }); - break; - } - case RPC_TEST_INTERFACE: { - uint64_t cnt = 0; - bool end_with_recv; - port->recv([&](rpc::Buffer *buffer) { end_with_recv = buffer->data[0]; }); - port->recv([&](rpc::Buffer *buffer) { cnt = buffer->data[0]; }); - port->send([&](rpc::Buffer *buffer) { buffer->data[0] = cnt = cnt + 1; }); - port->recv([&](rpc::Buffer *buffer) { cnt = buffer->data[0]; }); - port->send([&](rpc::Buffer *buffer) { buffer->data[0] = cnt = cnt + 1; }); - port->recv([&](rpc::Buffer *buffer) { cnt = buffer->data[0]; }); - port->recv([&](rpc::Buffer *buffer) { cnt = buffer->data[0]; }); - port->send([&](rpc::Buffer *buffer) { buffer->data[0] = cnt = cnt + 1; }); - port->send([&](rpc::Buffer *buffer) { buffer->data[0] = cnt = cnt + 1; }); - if (end_with_recv) - port->recv([&](rpc::Buffer *buffer) { cnt = buffer->data[0]; }); - else - port->send( - [&](rpc::Buffer *buffer) { buffer->data[0] = cnt = cnt + 1; }); - break; - } - case RPC_TEST_STREAM: { - uint64_t sizes[rpc::MAX_LANE_SIZE] = {0}; - void *dst[rpc::MAX_LANE_SIZE] = {nullptr}; - port->recv_n(dst, sizes, [](uint64_t size) { return new char[size]; }); - port->send_n(dst, sizes); - for (uint64_t i = 0; i < rpc::MAX_LANE_SIZE; ++i) { - if (dst[i]) - delete[] reinterpret_cast(dst[i]); - } - break; - } case RPC_NOOP: { port->recv([](rpc::Buffer *) {}); break; @@ -375,6 +341,11 @@ port); } +void rpc_send_n(rpc_port_t ref, const void *const *src, uint64_t *size) { + auto port = get_port(ref); + std::visit([=](auto &port) { port->send_n(src, size); }, port); +} + void rpc_recv(rpc_port_t ref, rpc_port_callback_ty callback, void *data) { auto port = get_port(ref); std::visit( @@ -386,6 +357,13 @@ port); } +void rpc_recv_n(rpc_port_t ref, void **dst, uint64_t *size, rpc_alloc_ty alloc, + void *data) { + auto port = get_port(ref); + auto alloc_fn = [=](uint64_t size) { return alloc(size, data); }; + std::visit([=](auto &port) { port->recv_n(dst, size, alloc_fn); }, port); +} + void rpc_recv_and_send(rpc_port_t ref, rpc_port_callback_ty callback, void *data) { auto port = get_port(ref);