diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt --- a/libc/startup/gpu/nvptx/CMakeLists.txt +++ b/libc/startup/gpu/nvptx/CMakeLists.txt @@ -2,6 +2,8 @@ crt1 SRC start.cpp + DEPENDS + libc.src.__support.RPC.rpc_client COMPILE_OPTIONS -ffreestanding # To avoid compiler warnings about calling the main function. -fno-builtin diff --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp --- a/libc/startup/gpu/nvptx/start.cpp +++ b/libc/startup/gpu/nvptx/start.cpp @@ -1,4 +1,4 @@ -//===-- Implementation of crt for amdgpu ----------------------------------===// +//===-- Implementation of crt for nvptx -----------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,10 +6,14 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/RPC/rpc_client.h" + extern "C" int main(int argc, char **argv, char **envp); extern "C" [[gnu::visibility("protected")]] __attribute__((nvptx_kernel)) void _start(int argc, char **argv, char **envp, int *ret, void *in, void *out, void *buffer) { + __llvm_libc::rpc::client.reset(in, out, buffer); + __atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED); } diff --git a/libc/utils/gpu/loader/nvptx/Loader.cpp b/libc/utils/gpu/loader/nvptx/Loader.cpp --- a/libc/utils/gpu/loader/nvptx/Loader.cpp +++ b/libc/utils/gpu/loader/nvptx/Loader.cpp @@ -15,6 +15,8 @@ #include "Loader.h" +#include "src/__support/RPC/rpc.h" + #include "cuda.h" #include #include @@ -32,6 +34,30 @@ void *buffer; }; +static __llvm_libc::rpc::Server server; + +/// Queries the RPC client at least once and performs server-side work if there +/// are any active requests. +void handle_server() { + while (server.handle( + [&](__llvm_libc::rpc::Buffer *buffer) { + switch (static_cast<__llvm_libc::rpc::Opcode>(buffer->data[0])) { + case __llvm_libc::rpc::Opcode::PRINT_TO_STDERR: { + fputs(reinterpret_cast(&buffer->data[1]), stderr); + break; + } + case __llvm_libc::rpc::Opcode::EXIT: { + exit(buffer->data[1]); + break; + } + default: + return; + }; + }, + [](__llvm_libc::rpc::Buffer *buffer) {})) + ; +} + static void handle_error(CUresult err) { if (err == CUDA_SUCCESS) return; @@ -106,8 +132,13 @@ if (CUresult err = cuMemsetD32(dev_ret, 0, 1)) handle_error(err); + void *server_inbox = allocator(sizeof(__llvm_libc::cpp::Atomic)); + void *server_outbox = allocator(sizeof(__llvm_libc::cpp::Atomic)); + void *buffer = allocator(sizeof(__llvm_libc::rpc::Buffer)); + if (!server_inbox || !server_outbox || !buffer) + handle_error("Failed to allocate memory the RPC client / server."); + // Set up the arguments to the '_start' kernel on the GPU. - // TODO: Setup RPC server implementation; uint64_t args_size = sizeof(kernel_args_t); kernel_args_t args; std::memset(&args, 0, args_size); @@ -115,10 +146,16 @@ args.argv = dev_argv; args.envp = dev_envp; args.ret = reinterpret_cast(dev_ret); + args.inbox = server_outbox; + args.outbox = server_inbox; + args.buffer = buffer; void *args_config[] = {CU_LAUNCH_PARAM_BUFFER_POINTER, &args, CU_LAUNCH_PARAM_BUFFER_SIZE, &args_size, CU_LAUNCH_PARAM_END}; + // Initialize the RPC server's buffer for host-device communication. + server.reset(server_inbox, server_outbox, buffer); + // Call the kernel with the given arguments. if (CUresult err = cuLaunchKernel(function, /*gridDimX=*/1, /*gridDimY=*/1, @@ -126,9 +163,10 @@ /*bloackDimZ=*/1, 0, stream, nullptr, args_config)) handle_error(err); - // TODO: Query the RPC server periodically while the kernel is running. + // Wait until the kernel has completed execution on the device. Periodically + // check the RPC client for work to be performed on the server. while (cuStreamQuery(stream) == CUDA_ERROR_NOT_READY) - ; + handle_server(); // Copy the return value back from the kernel and wait. int host_ret = 0;