diff --git a/libc/utils/gpu/CMakeLists.txt b/libc/utils/gpu/CMakeLists.txt
--- a/libc/utils/gpu/CMakeLists.txt
+++ b/libc/utils/gpu/CMakeLists.txt
@@ -1 +1,2 @@
+add_subdirectory(server)
 add_subdirectory(loader)
diff --git a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt b/libc/utils/gpu/loader/amdgpu/CMakeLists.txt
--- a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt
+++ b/libc/utils/gpu/loader/amdgpu/CMakeLists.txt
@@ -1,9 +1,8 @@
 add_executable(amdhsa_loader Loader.cpp)
-add_dependencies(amdhsa_loader libc.src.__support.RPC.rpc)
 
-target_include_directories(amdhsa_loader PRIVATE ${LIBC_SOURCE_DIR})
 target_link_libraries(amdhsa_loader
   PRIVATE
   hsa-runtime64::hsa-runtime64
+  rpc_server
   gpu_loader
 )
diff --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp
--- a/libc/utils/gpu/loader/amdgpu/Loader.cpp
+++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp
@@ -15,7 +15,7 @@
 
 #include "Loader.h"
 
-#include "src/__support/RPC/rpc.h"
+#include "rpc_server.h"
 
 #include <hsa/hsa.h>
 #include <hsa/hsa_ext_amd.h>
@@ -23,6 +23,7 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <tuple>
 #include <utility>
 
 /// The name of the kernel we will launch. All AMDHSA kernels end with '.kd'.
@@ -39,30 +40,6 @@
   void *buffer;
 };
 
-static __llvm_libc::rpc::Server server;
-
-/// Queries the RPC client at least once and performs server-side work if there
-/// are any active requests.
-void handle_server() {
-  while (server.handle(
-      [&](__llvm_libc::rpc::Buffer *buffer) {
-        switch (static_cast<__llvm_libc::rpc::Opcode>(buffer->data[0])) {
-        case __llvm_libc::rpc::Opcode::PRINT_TO_STDERR: {
-          fputs(reinterpret_cast<const char *>(&buffer->data[1]), stderr);
-          break;
-        }
-        case __llvm_libc::rpc::Opcode::EXIT: {
-          exit(buffer->data[1]);
-          break;
-        }
-        default:
-          return;
-        };
-      },
-      [](__llvm_libc::rpc::Buffer *buffer) {}))
-    ;
-}
-
 /// Print the error code and exit if \p code indicates an error.
 static void handle_error(hsa_status_t code) {
   if (code == HSA_STATUS_SUCCESS || code == HSA_STATUS_INFO_BREAK)
@@ -311,24 +288,18 @@
   hsa_amd_memory_fill(dev_ret, 0, sizeof(int));
 
   // Allocate finegrained memory for the RPC server and client to share.
-  void *server_inbox;
-  void *server_outbox;
-  void *buffer;
-  if (hsa_status_t err = hsa_amd_memory_pool_allocate(
-          finegrained_pool, sizeof(__llvm_libc::cpp::Atomic<int>),
-          /*flags=*/0, &server_inbox))
-    handle_error(err);
-  if (hsa_status_t err = hsa_amd_memory_pool_allocate(
-          finegrained_pool, sizeof(__llvm_libc::cpp::Atomic<int>),
-          /*flags=*/0, &server_outbox))
-    handle_error(err);
-  if (hsa_status_t err = hsa_amd_memory_pool_allocate(
-          finegrained_pool, sizeof(__llvm_libc::rpc::Buffer),
-          /*flags=*/0, &buffer))
-    handle_error(err);
-  hsa_amd_agents_allow_access(1, &dev_agent, nullptr, server_inbox);
-  hsa_amd_agents_allow_access(1, &dev_agent, nullptr, server_outbox);
-  hsa_amd_agents_allow_access(1, &dev_agent, nullptr, buffer);
+  auto rpc_data = std::make_tuple(finegrained_pool, dev_agent);
+  auto rpc_allocator = [](uint64_t size, void *data) -> void * {
+    auto &[finegrained_pool, dev_agent] =
+        *reinterpret_cast<decltype(rpc_data) *>(data);
+    void *dev_ptr = nullptr;
+    if (hsa_status_t err = hsa_amd_memory_pool_allocate(finegrained_pool, size,
+                                                        /*flags=*/0, &dev_ptr))
+      handle_error(err);
+    hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_ptr);
+    return dev_ptr;
+  };
+  rpc_init(rpc_allocator, &rpc_data);
 
   // Initialie all the arguments (explicit and implicit) to zero, then set the
   // explicit arguments to the values created above.
@@ -338,9 +309,9 @@
   kernel_args->argv = dev_argv;
   kernel_args->envp = dev_envp;
   kernel_args->ret = dev_ret;
-  kernel_args->inbox = server_outbox;
-  kernel_args->outbox = server_inbox;
-  kernel_args->buffer = buffer;
+  kernel_args->inbox = rpc_get_outbox();
+  kernel_args->outbox = rpc_get_inbox();
+  kernel_args->buffer = rpc_get_buffer();
 
   // Obtain a packet from the queue.
   uint64_t packet_id = hsa_queue_add_write_index_relaxed(queue, 1);
@@ -372,9 +343,6 @@
           hsa_signal_create(1, 0, nullptr, &packet->completion_signal))
     handle_error(err);
 
-  // Initialize the RPC server's buffer for host-device communication.
-  server.reset(server_inbox, server_outbox, buffer);
-
   // Initialize the packet header and set the doorbell signal to begin execution
   // by the HSA runtime.
   uint16_t header =
@@ -390,7 +358,7 @@
   while (hsa_signal_wait_scacquire(
              packet->completion_signal, HSA_SIGNAL_CONDITION_EQ, 0,
              /*timeout_hint=*/1024, HSA_WAIT_STATE_ACTIVE) != 0)
-    handle_server();
+    rpc_handle();
 
   // Create a memory signal and copy the return value back from the device into
   // a new buffer.
diff --git a/libc/utils/gpu/loader/nvptx/CMakeLists.txt b/libc/utils/gpu/loader/nvptx/CMakeLists.txt
--- a/libc/utils/gpu/loader/nvptx/CMakeLists.txt
+++ b/libc/utils/gpu/loader/nvptx/CMakeLists.txt
@@ -1,9 +1,8 @@
 add_executable(nvptx_loader Loader.cpp)
-add_dependencies(nvptx_loader libc.src.__support.RPC.rpc)
 
-target_include_directories(nvptx_loader PRIVATE ${LIBC_SOURCE_DIR})
 target_link_libraries(nvptx_loader
   PRIVATE
   gpu_loader
+  rpc_server
   CUDA::cuda_driver
 )
diff --git a/libc/utils/gpu/loader/nvptx/Loader.cpp b/libc/utils/gpu/loader/nvptx/Loader.cpp
--- a/libc/utils/gpu/loader/nvptx/Loader.cpp
+++ b/libc/utils/gpu/loader/nvptx/Loader.cpp
@@ -15,7 +15,7 @@
 
 #include "Loader.h"
 
-#include "src/__support/RPC/rpc.h"
+#include "rpc_server.h"
 
 #include "cuda.h"
 #include <cstddef>
@@ -34,30 +34,6 @@
   void *buffer;
 };
 
-static __llvm_libc::rpc::Server server;
-
-/// Queries the RPC client at least once and performs server-side work if there
-/// are any active requests.
-void handle_server() {
-  while (server.handle(
-      [&](__llvm_libc::rpc::Buffer *buffer) {
-        switch (static_cast<__llvm_libc::rpc::Opcode>(buffer->data[0])) {
-        case __llvm_libc::rpc::Opcode::PRINT_TO_STDERR: {
-          fputs(reinterpret_cast<const char *>(&buffer->data[1]), stderr);
-          break;
-        }
-        case __llvm_libc::rpc::Opcode::EXIT: {
-          exit(buffer->data[1]);
-          break;
-        }
-        default:
-          return;
-        };
-      },
-      [](__llvm_libc::rpc::Buffer *buffer) {}))
-    ;
-}
-
 static void handle_error(CUresult err) {
   if (err == CUDA_SUCCESS)
     return;
@@ -132,11 +108,14 @@
   if (CUresult err = cuMemsetD32(dev_ret, 0, 1))
     handle_error(err);
 
-  void *server_inbox = allocator(sizeof(__llvm_libc::cpp::Atomic<int>));
-  void *server_outbox = allocator(sizeof(__llvm_libc::cpp::Atomic<int>));
-  void *buffer = allocator(sizeof(__llvm_libc::rpc::Buffer));
-  if (!server_inbox || !server_outbox || !buffer)
-    handle_error("Failed to allocate memory the RPC client / server.");
+  // Allocate finegrained memory for the RPC server and client to share.
+  auto rpc_allocator = [](uint64_t size, void *) -> void * {
+    void *dev_ptr;
+    if (CUresult err = cuMemAllocHost(&dev_ptr, size))
+      handle_error(err);
+    return dev_ptr;
+  };
+  rpc_init(rpc_allocator, nullptr);
 
   // Set up the arguments to the '_start' kernel on the GPU.
   uint64_t args_size = sizeof(kernel_args_t);
@@ -146,16 +125,13 @@
   args.argv = dev_argv;
   args.envp = dev_envp;
   args.ret = reinterpret_cast<void *>(dev_ret);
-  args.inbox = server_outbox;
-  args.outbox = server_inbox;
-  args.buffer = buffer;
+  args.inbox = rpc_get_outbox();
+  args.outbox = rpc_get_inbox();
+  args.buffer = rpc_get_buffer();
   void *args_config[] = {CU_LAUNCH_PARAM_BUFFER_POINTER, &args,
                          CU_LAUNCH_PARAM_BUFFER_SIZE, &args_size,
                          CU_LAUNCH_PARAM_END};
 
-  // Initialize the RPC server's buffer for host-device communication.
-  server.reset(server_inbox, server_outbox, buffer);
-
   // Call the kernel with the given arguments.
   if (CUresult err =
           cuLaunchKernel(function, /*gridDimX=*/1, /*gridDimY=*/1,
@@ -166,7 +142,7 @@
   // Wait until the kernel has completed execution on the device. Periodically
   // check the RPC client for work to be performed on the server.
   while (cuStreamQuery(stream) == CUDA_ERROR_NOT_READY)
-    handle_server();
+    rpc_handle();
 
   // Copy the return value back from the kernel and wait.
   int host_ret = 0;
diff --git a/libc/utils/gpu/server/CMakeLists.txt b/libc/utils/gpu/server/CMakeLists.txt
new file mode 100644
--- /dev/null
+++ b/libc/utils/gpu/server/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_library(rpc_server STATIC rpc_server.h rpc_server.cpp)
+
+# Include the RPC implemenation from libc.
+add_dependencies(rpc_server libc.src.__support.RPC.rpc)
+target_include_directories(rpc_server PRIVATE ${LIBC_SOURCE_DIR})
+target_include_directories(rpc_server PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
diff --git a/libc/utils/gpu/server/rpc_server.h b/libc/utils/gpu/server/rpc_server.h
new file mode 100644
--- /dev/null
+++ b/libc/utils/gpu/server/rpc_server.h
@@ -0,0 +1,46 @@
+//===-- Shared memory RPC server instantiation ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_UTILS_GPU_SERVER_RPC_SERVER_H
+#define LLVM_LIBC_UTILS_GPU_SERVER_RPC_SERVER_H
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void *(rpc_alloc_ty)(uint64_t size, void *data);
+
+typedef void(rpc_dealloc_ty)(void *ptr, void *data);
+
+/// Initialize the server with unified memory to communicate with the client.
+void rpc_init(rpc_alloc_ty alloc, void *data);
+
+/// Deallocate the memory associated with the server.
+void rpc_deinit(rpc_dealloc_ty, void *data);
+
+/// Queries the RPC client at least once and performs server-side work if there
+/// are any active requests.
+void rpc_handle();
+
+/// Get the pointer to the data inbox.
+/// TODO: We should try to compress this into a single buffer.
+void *rpc_get_inbox();
+
+/// Get the pointer to the data outbox.
+void *rpc_get_outbox();
+
+/// Get the pointer to the data buffer.
+void *rpc_get_buffer();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp
new file mode 100644
--- /dev/null
+++ b/libc/utils/gpu/server/rpc_server.cpp
@@ -0,0 +1,56 @@
+//===-- Shared memory RPC server instantiation ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "rpc_server.h"
+
+#include "src/__support/RPC/rpc.h"
+
+#include <cstdio>
+#include <cstdlib>
+
+/// The server instance used to communicate with the libc client.
+__llvm_libc::rpc::Server server;
+
+void rpc_init(rpc_alloc_ty alloc, void *data) {
+  void *inbox = alloc(sizeof(__llvm_libc::cpp::Atomic<int>), data);
+  void *outbox = alloc(sizeof(__llvm_libc::cpp::Atomic<int>), data);
+  void *buffer = alloc(sizeof(__llvm_libc::rpc::Buffer), data);
+  server.reset(inbox, outbox, buffer);
+}
+
+void rpc_deinit(rpc_dealloc_ty dealloc, void *data) {
+  dealloc(server.inbox, data);
+  dealloc(server.outbox, data);
+  dealloc(server.buffer, data);
+}
+
+void rpc_handle() {
+  while (server.handle(
+      [&](__llvm_libc::rpc::Buffer *buffer) {
+        switch (static_cast<__llvm_libc::rpc::Opcode>(buffer->data[0])) {
+        case __llvm_libc::rpc::Opcode::PRINT_TO_STDERR: {
+          fputs(reinterpret_cast<const char *>(&buffer->data[1]), stderr);
+          break;
+        }
+        case __llvm_libc::rpc::Opcode::EXIT: {
+          exit(buffer->data[1]);
+          break;
+        }
+        default:
+          return;
+        };
+      },
+      [](__llvm_libc::rpc::Buffer *buffer) {}))
+    ;
+}
+
+void *rpc_get_inbox() { return server.inbox; }
+
+void *rpc_get_outbox() { return server.outbox; }
+
+void *rpc_get_buffer() { return server.buffer; }