diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu @@ -23,7 +23,7 @@ //////////////////////////////////////////////////////////////////////////////// __device__ - omptarget_nvptx_Queue + omptarget_state_queue omptarget_nvptx_device_State[MAX_SM]; __device__ omptarget_nvptx_SimpleMemoryManager diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu @@ -17,7 +17,7 @@ //////////////////////////////////////////////////////////////////////////////// extern __device__ - omptarget_nvptx_Queue + omptarget_state_queue omptarget_nvptx_device_State[MAX_SM]; //////////////////////////////////////////////////////////////////////////////// diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/state-queue.h b/openmp/libomptarget/deviceRTLs/nvptx/src/state-queue.h --- a/openmp/libomptarget/deviceRTLs/nvptx/src/state-queue.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/state-queue.h @@ -1,4 +1,4 @@ -//===--------- statequeue.h - NVPTX OpenMP GPU State Queue ------- CUDA -*-===// +//===--- state-queue.h --- OpenMP target state queue ------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This file contains a queue to hand out OpenMP state objects to teams of -// one or more kernels. +// This file provides a queue to hand out OpenMP state objects to teams of one +// or more kernels. // // Reference: // Thomas R.W. Scogland and Wu-chun Feng. 2015. @@ -16,36 +16,100 @@ // //===----------------------------------------------------------------------===// -#ifndef __STATE_QUEUE_H -#define __STATE_QUEUE_H +#ifndef STATE_QUEUE_H +#define STATE_QUEUE_H -#include +#include "target_impl.h" -#include "option.h" // choices we have +#include -template class omptarget_nvptx_Queue { -private: - ElementType elements[SIZE]; - volatile ElementType *elementQueue[SIZE]; - volatile uint32_t head; - volatile uint32_t ids[SIZE]; - volatile uint32_t tail; +template class omptarget_state_queue { + ElementType Elements[SIZE]; + ElementType *ElementQueue[SIZE]; + uint32_t Head; + uint32_t Tail; + uint32_t Ids[SIZE]; static const uint32_t MAX_ID = (1u << 31) / SIZE / 2; - INLINE uint32_t ENQUEUE_TICKET(); - INLINE uint32_t DEQUEUE_TICKET(); - INLINE static uint32_t ID(uint32_t ticket); - INLINE bool IsServing(uint32_t slot, uint32_t id); - INLINE void PushElement(uint32_t slot, ElementType *element); - INLINE ElementType *PopElement(uint32_t slot); - INLINE void DoneServing(uint32_t slot, uint32_t id); + INLINE uint32_t enqueueTicket(); + INLINE uint32_t dequeueTicket(); + INLINE static uint32_t getID(uint32_t Ticket); + INLINE bool isServing(uint32_t Slot, uint32_t ID); + INLINE void pushElement(uint32_t Slot, ElementType *element); + INLINE ElementType *popElement(uint32_t Slot); + INLINE void doneServing(uint32_t Slot, uint32_t ID); public: - INLINE omptarget_nvptx_Queue() {} - INLINE void Enqueue(ElementType *element); - INLINE ElementType *Dequeue(); + INLINE void enqueue(ElementType *element); + INLINE ElementType *dequeue(); }; -#include "state-queuei.h" +template +INLINE uint32_t omptarget_state_queue::enqueueTicket() { + return __kmpc_impl_atomic_add(&Tail, 1); +} + +template +INLINE uint32_t omptarget_state_queue::dequeueTicket() { + return __kmpc_impl_atomic_add(&Head, 1); +} + +template +INLINE uint32_t +omptarget_state_queue::getID(uint32_t Ticket) { + return (Ticket / SIZE) * 2; +} + +template +INLINE bool omptarget_state_queue::isServing(uint32_t Slot, + uint32_t ID) { + return __kmpc_impl_atomic_add(&Ids[Slot], 0) == ID; +} + +template +INLINE void +omptarget_state_queue::pushElement(uint32_t Slot, + ElementType *element) { + __kmpc_impl_atomic_exchange(&ElementQueue[Slot], element); +} + +template +INLINE ElementType * +omptarget_state_queue::popElement(uint32_t Slot) { + return (ElementType *)__kmpc_impl_atomic_add(&ElementQueue[Slot], 0); +} + +template +INLINE void omptarget_state_queue::doneServing(uint32_t Slot, + uint32_t ID) { + __kmpc_impl_atomic_exchange(&Ids[Slot], (ID + 1) % MAX_ID); +} + +template +INLINE void +omptarget_state_queue::enqueue(ElementType *element) { + uint32_t Ticket = enqueueTicket(); + uint32_t Slot = Ticket % SIZE; + uint32_t ID = getID(Ticket) + 1; + while (!isServing(Slot, ID)) + ; + pushElement(Slot, element); + doneServing(Slot, ID); +} + +template +INLINE ElementType *omptarget_state_queue::dequeue() { + uint32_t Ticket = dequeueTicket(); + uint32_t Slot = Ticket % SIZE; + uint32_t ID = getID(Ticket); + while (!isServing(Slot, ID)) + ; + ElementType *element = popElement(Slot); + // This is to populate the queue because of the lack of GPU constructors. + if (element == 0) + element = &Elements[Slot]; + doneServing(Slot, ID); + return element; +} -#endif +#endif // STATE_QUEUE_H diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/state-queuei.h b/openmp/libomptarget/deviceRTLs/nvptx/src/state-queuei.h deleted file mode 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/state-queuei.h +++ /dev/null @@ -1,89 +0,0 @@ -//===------- state-queue.cu - NVPTX OpenMP GPU State Queue ------- CUDA -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains the implementation of a queue to hand out OpenMP state -// objects to teams of one or more kernels. -// -// Reference: -// Thomas R.W. Scogland and Wu-chun Feng. 2015. -// Design and Evaluation of Scalable Concurrent Queues for Many-Core -// Architectures. International Conference on Performance Engineering. -// -//===----------------------------------------------------------------------===// - -#include "state-queue.h" - -template -INLINE uint32_t omptarget_nvptx_Queue::ENQUEUE_TICKET() { - return atomicAdd((unsigned int *)&tail, 1); -} - -template -INLINE uint32_t omptarget_nvptx_Queue::DEQUEUE_TICKET() { - return atomicAdd((unsigned int *)&head, 1); -} - -template -INLINE uint32_t -omptarget_nvptx_Queue::ID(uint32_t ticket) { - return (ticket / SIZE) * 2; -} - -template -INLINE bool omptarget_nvptx_Queue::IsServing(uint32_t slot, - uint32_t id) { - return atomicAdd((unsigned int *)&ids[slot], 0) == id; -} - -template -INLINE void -omptarget_nvptx_Queue::PushElement(uint32_t slot, - ElementType *element) { - atomicExch((unsigned long long *)&elementQueue[slot], - (unsigned long long)element); -} - -template -INLINE ElementType * -omptarget_nvptx_Queue::PopElement(uint32_t slot) { - return (ElementType *)atomicAdd((unsigned long long *)&elementQueue[slot], - (unsigned long long)0); -} - -template -INLINE void omptarget_nvptx_Queue::DoneServing(uint32_t slot, - uint32_t id) { - atomicExch((unsigned int *)&ids[slot], (id + 1) % MAX_ID); -} - -template -INLINE void -omptarget_nvptx_Queue::Enqueue(ElementType *element) { - uint32_t ticket = ENQUEUE_TICKET(); - uint32_t slot = ticket % SIZE; - uint32_t id = ID(ticket) + 1; - while (!IsServing(slot, id)) - ; - PushElement(slot, element); - DoneServing(slot, id); -} - -template -INLINE ElementType *omptarget_nvptx_Queue::Dequeue() { - uint32_t ticket = DEQUEUE_TICKET(); - uint32_t slot = ticket % SIZE; - uint32_t id = ID(ticket); - while (!IsServing(slot, id)) - ; - ElementType *element = PopElement(slot); - // This is to populate the queue because of the lack of GPU constructors. - if (element == 0) - element = &elements[slot]; - DoneServing(slot, id); - return element; -} diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -0,0 +1,29 @@ +//===--- target_impl.h - OpenMP device RTL target code impl. ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Definitions of target specific functions needed in the generic part of the +// device RTL implementation. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_IMPL_H +#define TARGET_IMPL_H + +/// Atomically increment the pointee of \p Ptr by \p Val and return the original +/// value of the pointee. +template T __kmpc_impl_atomic_add(T *Ptr, T Val) { + return atomicAdd(Ptr, Val); +} + +/// Atomically exchange the pointee of \p Ptr with \p Val and return the +/// original value of the pointee. +template T __kmpc_impl_atomic_exchange(T *Ptr, T Val) { + return atomicExch(Ptr, Val); +} + +#endif // TARGET_IMPL_H