diff --git a/libc/src/__support/RPC/rpc.h b/libc/src/__support/RPC/rpc.h --- a/libc/src/__support/RPC/rpc.h +++ b/libc/src/__support/RPC/rpc.h @@ -74,7 +74,7 @@ /// - The client will always start with a 'send' operation. /// - The server will always start with a 'recv' operation. /// - Every 'send' or 'recv' call is mirrored by the other process. -template struct Process { +template struct Process { LIBC_INLINE Process() = default; LIBC_INLINE Process(const Process &) = delete; LIBC_INLINE Process &operator=(const Process &) = delete; @@ -85,7 +85,7 @@ uint64_t port_count; cpp::Atomic *inbox; cpp::Atomic *outbox; - Packet *packet; + Packet *packet; cpp::Atomic lock[DEFAULT_PORT_COUNT] = {0}; @@ -96,8 +96,8 @@ advance(buffer, inbox_offset(port_count))); this->outbox = reinterpret_cast *>( advance(buffer, outbox_offset(port_count))); - this->packet = reinterpret_cast *>( - advance(buffer, buffer_offset(port_count))); + this->packet = + reinterpret_cast(advance(buffer, buffer_offset(port_count))); } /// Returns the beginning of the unified buffer. Intended for initializing the @@ -221,30 +221,6 @@ gpu::sync_lane(lane_mask); } - /// Invokes a function accross every active buffer across the total lane size. - LIBC_INLINE void invoke_rpc(cpp::function fn, - Packet &packet) { - if constexpr (is_process_gpu()) { - fn(&packet.payload.slot[gpu::get_lane_id()]); - } else { - for (uint32_t i = 0; i < lane_size; i += gpu::get_lane_size()) - if (packet.header.mask & 1ul << i) - fn(&packet.payload.slot[i]); - } - } - - /// Alternate version that also provides the index of the current lane. - LIBC_INLINE void invoke_rpc(cpp::function fn, - Packet &packet) { - if constexpr (is_process_gpu()) { - fn(&packet.payload.slot[gpu::get_lane_id()], gpu::get_lane_id()); - } else { - for (uint32_t i = 0; i < lane_size; i += gpu::get_lane_size()) - if (packet.header.mask & 1ul << i) - fn(&packet.payload.slot[i], i); - } - } - /// Number of bytes to allocate for an inbox or outbox. LIBC_INLINE static constexpr uint64_t mailbox_bytes(uint64_t port_count) { return port_count * sizeof(cpp::Atomic); @@ -252,7 +228,7 @@ /// Number of bytes to allocate for the buffer containing the packets. LIBC_INLINE static constexpr uint64_t buffer_bytes(uint64_t port_count) { - return port_count * sizeof(Packet); + return port_count * sizeof(Packet); } /// Offset of the inbox in memory. This is the same as the outbox if inverted. @@ -267,14 +243,40 @@ /// Offset of the buffer containing the packets after the inbox and outbox. LIBC_INLINE static constexpr uint64_t buffer_offset(uint64_t port_count) { - return align_up(2 * mailbox_bytes(port_count), alignof(Packet)); + return align_up(2 * mailbox_bytes(port_count), alignof(Packet)); } }; +/// Invokes a function accross every active buffer across the total lane size. +template +static LIBC_INLINE void invoke_rpc(cpp::function fn, + Packet &packet) { + if constexpr (is_process_gpu()) { + fn(&packet.payload.slot[gpu::get_lane_id()]); + } else { + for (uint32_t i = 0; i < lane_size; i += gpu::get_lane_size()) + if (packet.header.mask & 1ul << i) + fn(&packet.payload.slot[i]); + } +} + +/// Alternate version that also provides the index of the current lane. +template +static LIBC_INLINE void invoke_rpc(cpp::function fn, + Packet &packet) { + if constexpr (is_process_gpu()) { + fn(&packet.payload.slot[gpu::get_lane_id()], gpu::get_lane_id()); + } else { + for (uint32_t i = 0; i < lane_size; i += gpu::get_lane_size()) + if (packet.header.mask & 1ul << i) + fn(&packet.payload.slot[i], i); + } +} + /// The port provides the interface to communicate between the multiple /// processes. A port is conceptually an index into the memory provided by the /// underlying process that is guarded by a lock bit. -template struct Port { +template struct Port { LIBC_INLINE Port(Process &process, uint64_t lane_mask, uint64_t index, uint32_t out) : process(process), lane_mask(lane_mask), index(index), out(out), @@ -330,7 +332,7 @@ LIBC_INLINE Client &operator=(const Client &) = delete; LIBC_INLINE ~Client() = default; - using Port = rpc::Port; + using Port = rpc::Port>; template LIBC_INLINE cpp::optional try_open(); template LIBC_INLINE Port open(); @@ -339,7 +341,7 @@ } private: - Process process; + Process> process; }; /// The RPC server used to respond to the client. @@ -349,7 +351,7 @@ LIBC_INLINE Server &operator=(const Server &) = delete; LIBC_INLINE ~Server() = default; - using Port = rpc::Port; + using Port = rpc::Port>; LIBC_INLINE cpp::optional try_open(); LIBC_INLINE Port open(); @@ -362,15 +364,15 @@ } LIBC_INLINE static uint64_t allocation_size(uint64_t port_count) { - return Process::allocation_size(port_count); + return Process>::allocation_size(port_count); } private: - Process process; + Process> process; }; /// Applies \p fill to the shared buffer and initiates a send operation. -template +template template LIBC_INLINE void Port::send(F fill) { uint32_t in = owns_buffer ? out ^ T : process.load_inbox(index); @@ -379,14 +381,14 @@ process.wait_for_ownership(index, out, in); // Apply the \p fill function to initialize the buffer and release the memory. - process.invoke_rpc(fill, process.packet[index]); + invoke_rpc(fill, process.packet[index]); out = process.invert_outbox(index, out); owns_buffer = false; receive = false; } /// Applies \p use to the shared buffer and acknowledges the send. -template +template template LIBC_INLINE void Port::recv(U use) { // We only exchange ownership of the buffer during a receive if we are waiting @@ -402,13 +404,13 @@ process.wait_for_ownership(index, out, in); // Apply the \p use function to read the memory out of the buffer. - process.invoke_rpc(use, process.packet[index]); + invoke_rpc(use, process.packet[index]); receive = true; owns_buffer = true; } /// Combines a send and receive into a single function. -template +template template LIBC_INLINE void Port::send_and_recv(F fill, U use) { send(fill); @@ -418,7 +420,7 @@ /// Combines a receive and send operation into a single function. The \p work /// function modifies the buffer in-place and the send is only used to initiate /// the copy back. -template +template template LIBC_INLINE void Port::recv_and_send(W work) { recv(work); @@ -427,7 +429,7 @@ /// Helper routine to simplify the interface when sending from the GPU using /// thread private pointers to the underlying value. -template +template LIBC_INLINE void Port::send_n(const void *src, uint64_t size) { static_assert(is_process_gpu(), "Only valid when running on the GPU"); const void **src_ptr = &src; @@ -437,7 +439,7 @@ /// Sends an arbitrarily sized data buffer \p src across the shared channel in /// multiples of the packet length. -template +template LIBC_INLINE void Port::send_n(const void *const *src, uint64_t *size) { uint64_t num_sends = 0; send([&](Buffer *buffer, uint32_t id) { @@ -467,7 +469,7 @@ /// Receives an arbitrarily sized data buffer across the shared channel in /// multiples of the packet length. The \p alloc function is called with the /// size of the data so that we can initialize the size of the \p dst buffer. -template +template template LIBC_INLINE void Port::recv_n(void **dst, uint64_t *size, A &&alloc) { uint64_t num_recvs = 0; diff --git a/libc/test/src/__support/RPC/rpc_smoke_test.cpp b/libc/test/src/__support/RPC/rpc_smoke_test.cpp --- a/libc/test/src/__support/RPC/rpc_smoke_test.cpp +++ b/libc/test/src/__support/RPC/rpc_smoke_test.cpp @@ -13,8 +13,12 @@ namespace { enum { lane_size = 8, port_count = 4 }; -using ProcAType = __llvm_libc::rpc::Process; -using ProcBType = __llvm_libc::rpc::Process; +struct Packet { + uint64_t unused; +}; + +using ProcAType = __llvm_libc::rpc::Process; +using ProcBType = __llvm_libc::rpc::Process; static_assert(ProcAType::inbox_offset(port_count) == ProcBType::outbox_offset(port_count));