diff --git a/libc/src/__support/RPC/rpc.h b/libc/src/__support/RPC/rpc.h --- a/libc/src/__support/RPC/rpc.h +++ b/libc/src/__support/RPC/rpc.h @@ -80,7 +80,7 @@ LIBC_INLINE Process &operator=(Process &&) = default; LIBC_INLINE ~Process() = default; - uint64_t port_count = 0; + uint32_t port_count = 0; cpp::Atomic *inbox = nullptr; cpp::Atomic *outbox = nullptr; Packet *packet = nullptr; @@ -89,7 +89,7 @@ cpp::Atomic lock[MAX_PORT_COUNT / NUM_BITS_IN_WORD] = {0}; /// Initialize the communication channels. - LIBC_INLINE void reset(uint64_t port_count, void *buffer) { + LIBC_INLINE void reset(uint32_t port_count, void *buffer) { this->port_count = port_count; this->inbox = reinterpret_cast *>( advance(buffer, inbox_offset(port_count))); @@ -111,17 +111,17 @@ /// Atomic secondary[port_count]; /// Packet buffer[port_count]; /// }; - LIBC_INLINE static constexpr uint64_t allocation_size(uint64_t port_count) { + LIBC_INLINE static constexpr uint64_t allocation_size(uint32_t port_count) { return buffer_offset(port_count) + buffer_bytes(port_count); } /// Retrieve the inbox state from memory shared between processes. - LIBC_INLINE uint32_t load_inbox(uint64_t index) { + LIBC_INLINE uint32_t load_inbox(uint32_t index) { return inbox[index].load(cpp::MemoryOrder::RELAXED); } /// Retrieve the outbox state from memory shared between processes. - LIBC_INLINE uint32_t load_outbox(uint64_t index) { + LIBC_INLINE uint32_t load_outbox(uint32_t index) { return outbox[index].load(cpp::MemoryOrder::RELAXED); } @@ -129,7 +129,7 @@ /// Equivalent to loading outbox followed by store of the inverted value /// The outbox is write only by this warp and tracking the value locally is /// cheaper than calling load_outbox to get the value to store. - LIBC_INLINE uint32_t invert_outbox(uint64_t index, uint32_t current_outbox) { + LIBC_INLINE uint32_t invert_outbox(uint32_t index, uint32_t current_outbox) { uint32_t inverted_outbox = !current_outbox; atomic_thread_fence(cpp::MemoryOrder::RELEASE); outbox[index].store(inverted_outbox, cpp::MemoryOrder::RELAXED); @@ -138,7 +138,7 @@ // Given the current outbox and inbox values, wait until the inbox changes // to indicate that this thread owns the buffer element. - LIBC_INLINE void wait_for_ownership(uint64_t index, uint32_t outbox, + LIBC_INLINE void wait_for_ownership(uint32_t index, uint32_t outbox, uint32_t in) { while (buffer_unavailable(in, outbox)) { sleep_briefly(); @@ -160,7 +160,7 @@ /// single lock on success, e.g. the result of gpu::get_lane_mask() /// The lock is held when the n-th bit of the lock bitfield is set. [[clang::convergent]] LIBC_INLINE bool try_lock(uint64_t lane_mask, - uint64_t index) { + uint32_t index) { // On amdgpu, test and set to the nth lock bit and a sync_lane would suffice // On volta, need to handle differences between the threads running and // the threads that were detected in the previous call to get_lane_mask() @@ -201,7 +201,7 @@ /// Unlock the lock at index. We need a lane sync to keep this function /// convergent, otherwise the compiler will sink the store and deadlock. [[clang::convergent]] LIBC_INLINE void unlock(uint64_t lane_mask, - uint64_t index) { + uint32_t index) { // Do not move any writes past the unlock atomic_thread_fence(cpp::MemoryOrder::RELEASE); @@ -217,35 +217,35 @@ } /// Number of bytes to allocate for an inbox or outbox. - LIBC_INLINE static constexpr uint64_t mailbox_bytes(uint64_t port_count) { + LIBC_INLINE static constexpr uint64_t mailbox_bytes(uint32_t port_count) { return port_count * sizeof(cpp::Atomic); } /// Number of bytes to allocate for the buffer containing the packets. - LIBC_INLINE static constexpr uint64_t buffer_bytes(uint64_t port_count) { + LIBC_INLINE static constexpr uint64_t buffer_bytes(uint32_t port_count) { return port_count * sizeof(Packet); } /// Offset of the inbox in memory. This is the same as the outbox if inverted. - LIBC_INLINE static constexpr uint64_t inbox_offset(uint64_t port_count) { + LIBC_INLINE static constexpr uint64_t inbox_offset(uint32_t port_count) { return Invert ? mailbox_bytes(port_count) : 0; } /// Offset of the outbox in memory. This is the same as the inbox if inverted. - LIBC_INLINE static constexpr uint64_t outbox_offset(uint64_t port_count) { + LIBC_INLINE static constexpr uint64_t outbox_offset(uint32_t port_count) { return Invert ? 0 : mailbox_bytes(port_count); } /// Offset of the buffer containing the packets after the inbox and outbox. - LIBC_INLINE static constexpr uint64_t buffer_offset(uint64_t port_count) { + LIBC_INLINE static constexpr uint64_t buffer_offset(uint32_t port_count) { return align_up(2 * mailbox_bytes(port_count), alignof(Packet)); } /// Conditionally set the n-th bit in the atomic bitfield. LIBC_INLINE static constexpr uint32_t set_nth(cpp::Atomic *bits, - uint64_t index, bool cond) { - uint64_t slot = index / NUM_BITS_IN_WORD; - uint64_t bit = index % NUM_BITS_IN_WORD; + uint32_t index, bool cond) { + uint32_t slot = index / NUM_BITS_IN_WORD; + uint32_t bit = index % NUM_BITS_IN_WORD; return bits[slot].fetch_or(static_cast(cond) << bit, cpp::MemoryOrder::RELAXED) & (1u << bit); @@ -253,9 +253,9 @@ /// Conditionally clear the n-th bit in the atomic bitfield. LIBC_INLINE static constexpr uint32_t clear_nth(cpp::Atomic *bits, - uint64_t index, bool cond) { - uint64_t slot = index / NUM_BITS_IN_WORD; - uint64_t bit = index % NUM_BITS_IN_WORD; + uint32_t index, bool cond) { + uint32_t slot = index / NUM_BITS_IN_WORD; + uint32_t bit = index % NUM_BITS_IN_WORD; return bits[slot].fetch_and(~0u ^ (static_cast(cond) << bit), cpp::MemoryOrder::RELAXED) & (1u << bit); @@ -292,7 +292,7 @@ /// processes. A port is conceptually an index into the memory provided by the /// underlying process that is guarded by a lock bit. template struct Port { - LIBC_INLINE Port(Process &process, uint64_t lane_mask, uint64_t index, + LIBC_INLINE Port(Process &process, uint64_t lane_mask, uint32_t index, uint32_t out) : process(process), lane_mask(lane_mask), index(index), out(out), receive(false), owns_buffer(true) {} @@ -334,7 +334,7 @@ private: Process &process; uint64_t lane_mask; - uint64_t index; + uint32_t index; uint32_t out; bool receive; bool owns_buffer; @@ -351,7 +351,7 @@ template LIBC_INLINE cpp::optional try_open(); template LIBC_INLINE Port open(); - LIBC_INLINE void reset(uint64_t port_count, void *buffer) { + LIBC_INLINE void reset(uint32_t port_count, void *buffer) { process.reset(port_count, buffer); } @@ -374,7 +374,7 @@ LIBC_INLINE cpp::optional try_open(); LIBC_INLINE Port open(); - LIBC_INLINE void reset(uint64_t port_count, void *buffer) { + LIBC_INLINE void reset(uint32_t port_count, void *buffer) { process.reset(port_count, buffer); } @@ -382,7 +382,7 @@ return process.get_buffer_start(); } - LIBC_INLINE static uint64_t allocation_size(uint64_t port_count) { + LIBC_INLINE static uint64_t allocation_size(uint32_t port_count) { return Process>::allocation_size(port_count); } @@ -525,7 +525,7 @@ [[clang::convergent]] LIBC_INLINE cpp::optional Client::try_open() { // Perform a naive linear scan for a port that can be opened to send data. - for (uint64_t index = 0; index < process.port_count; ++index) { + for (uint32_t index = 0; index < process.port_count; ++index) { // Attempt to acquire the lock on this index. uint64_t lane_mask = gpu::get_lane_mask(); if (!process.try_lock(lane_mask, index)) @@ -566,7 +566,7 @@ cpp::optional::Port> Server::try_open() { // Perform a naive linear scan for a port that has a pending request. - for (uint64_t index = 0; index < process.port_count; ++index) { + for (uint32_t index = 0; index < process.port_count; ++index) { uint32_t in = process.load_inbox(index); uint32_t out = process.load_outbox(index);