diff --git a/libc/src/__support/RPC/rpc.h b/libc/src/__support/RPC/rpc.h --- a/libc/src/__support/RPC/rpc.h +++ b/libc/src/__support/RPC/rpc.h @@ -106,11 +106,14 @@ uint64_t port_count; uint32_t lane_size; + +private: cpp::Atomic *lock; cpp::Atomic *inbox; cpp::Atomic *outbox; Packet *packet; +public: /// Initialize the communication channels. LIBC_INLINE void reset(uint64_t port_count, uint32_t lane_size, void *lock, void *inbox, void *outbox, void *packet) { @@ -146,6 +149,22 @@ return InvertInbox ? !i : i; } + /// Retrieve the outbox state from memory shared between processes. + /// Never needs to invert the associated read. + LIBC_INLINE uint32_t load_outbox(uint64_t index) { + return outbox[index].load(cpp::MemoryOrder::RELAXED); + } + + /// Signal to the other process that this one is finished with the buffer. + /// Equivalent to loading outbox followed by store of the inverted value + /// The outbox is write only by this warp and tracking the value locally is + /// cheaper than calling load_outbox to get the value to store. + LIBC_INLINE uint32_t invert_outbox(uint64_t index, uint32_t current_outbox) { + uint32_t inverted_outbox = !current_outbox; + outbox[index].store(inverted_outbox, cpp::MemoryOrder::RELAXED); + return inverted_outbox; + } + /// Determines if this process needs to wait for ownership of the buffer. LIBC_INLINE static bool buffer_unavailable(uint32_t in, uint32_t out) { return in != out; @@ -301,9 +320,8 @@ // Apply the \p fill function to initialize the buffer and release the memory. process.invoke_rpc(fill, process.get_packet(index)); - out = !out; atomic_thread_fence(cpp::MemoryOrder::RELEASE); - process.outbox[index].store(out, cpp::MemoryOrder::RELAXED); + out = process.invert_outbox(index, out); } /// Applies \p use to the shared buffer and acknowledges the send. @@ -319,8 +337,7 @@ // Apply the \p use function to read the memory out of the buffer. process.invoke_rpc(use, process.get_packet(index)); - out = !out; - process.outbox[index].store(out, cpp::MemoryOrder::RELAXED); + out = process.invert_outbox(index, out); } /// Combines a send and receive into a single function. @@ -426,7 +443,7 @@ atomic_thread_fence(cpp::MemoryOrder::ACQUIRE); uint32_t in = load_inbox(index); - uint32_t out = outbox[index].load(cpp::MemoryOrder::RELAXED); + uint32_t out = load_outbox(index); // Once we acquire the index we need to check if we are in a valid sending // state. @@ -460,7 +477,7 @@ // Perform a naive linear scan for a port that has a pending request. for (uint64_t index = 0; index < port_count; ++index) { uint32_t in = load_inbox(index); - uint32_t out = outbox[index].load(cpp::MemoryOrder::RELAXED); + uint32_t out = load_outbox(index); // The server is passive, if there is no work pending don't bother // opening a port. @@ -477,7 +494,7 @@ atomic_thread_fence(cpp::MemoryOrder::ACQUIRE); in = load_inbox(index); - out = outbox[index].load(cpp::MemoryOrder::RELAXED); + out = load_outbox(index); if (buffer_unavailable(in, out)) { unlock(lane_mask, index);