Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/counter_group.h =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/counter_group.h +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/counter_group.h @@ -1,51 +0,0 @@ -//===------ counter_group.h - NVPTX OpenMP loop scheduling ------- CUDA -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// -// -// Interface to implement OpenMP loop scheduling -// -//===----------------------------------------------------------------------===// - -#ifndef _OMPTARGET_NVPTX_COUNTER_GROUP_H_ -#define _OMPTARGET_NVPTX_COUNTER_GROUP_H_ - -#include "option.h" - -// counter group type for synchronizations -class omptarget_nvptx_CounterGroup { -public: - // getters and setters - INLINE Counter &Event() { return v_event; } - INLINE volatile Counter &Start() { return v_start; } - INLINE Counter &Init() { return v_init; } - - // Synchronization Interface - - INLINE void Clear(); // first time start=event - INLINE void Reset(); // init = first - INLINE void Init(Counter &priv); // priv = init - INLINE Counter Next(); // just counts number of events - - // set priv to n, to be used in later waitOrRelease - INLINE void Complete(Counter &priv, Counter n); - - // check priv and decide if we have to wait or can free the other warps - INLINE void Release(Counter priv, Counter current_event_value); - INLINE void WaitOrRelease(Counter priv, Counter current_event_value); - -private: - Counter v_event; // counter of events (atomic) - - // volatile is needed to force loads to read from global - // memory or L2 cache and see the write by the last master - volatile Counter v_start; // signal when events registered are finished - - Counter v_init; // used to initialize local thread variables -}; - -#endif /* SRC_COUNTER_GROUP_H_ */ Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/counter_groupi.h =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/counter_groupi.h +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/counter_groupi.h @@ -1,82 +0,0 @@ -//===----- counter_groupi.h - NVPTX OpenMP loop scheduling ------- CUDA -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// -// -// Interface implementation for OpenMP loop scheduling -// -//===----------------------------------------------------------------------===// - -#include "option.h" - -INLINE void omptarget_nvptx_CounterGroup::Clear() { - PRINT0(LD_SYNCD, "clear counters\n") - v_event = 0; - v_start = 0; - // v_init does not need to be reset (its value is dead) -} - -INLINE void omptarget_nvptx_CounterGroup::Reset() { - // done by master before entering parallel - ASSERT(LT_FUSSY, v_event == v_start, - "error, entry %lld !=start %lld at reset\n", P64(v_event), - P64(v_start)); - v_init = v_start; -} - -INLINE void omptarget_nvptx_CounterGroup::Init(Counter &priv) { - PRINT(LD_SYNCD, "init priv counter 0x%llx with val %lld\n", P64(&priv), - P64(v_start)); - priv = v_start; -} - -// just counts number of events -INLINE Counter omptarget_nvptx_CounterGroup::Next() { - Counter oldVal = atomicAdd(&v_event, (Counter)1); - PRINT(LD_SYNCD, "next event counter 0x%llx with val %lld->%lld\n", - P64(&v_event), P64(oldVal), P64(oldVal + 1)); - - return oldVal; -} - -// set priv to n, to be used in later waitOrRelease -INLINE void omptarget_nvptx_CounterGroup::Complete(Counter &priv, Counter n) { - PRINT(LD_SYNCD, "complete priv counter 0x%llx with val %llu->%llu (+%llu)\n", - P64(&priv), P64(priv), P64(priv + n), n); - priv += n; -} - -INLINE void omptarget_nvptx_CounterGroup::Release(Counter priv, - Counter current_event_value) { - if (priv - 1 == current_event_value) { - PRINT(LD_SYNCD, "Release start counter 0x%llx with val %lld->%lld\n", - P64(&v_start), P64(v_start), P64(priv)); - v_start = priv; - } -} - -// check priv and decide if we have to wait or can free the other warps -INLINE void -omptarget_nvptx_CounterGroup::WaitOrRelease(Counter priv, - Counter current_event_value) { - if (priv - 1 == current_event_value) { - PRINT(LD_SYNCD, "Release start counter 0x%llx with val %lld->%lld\n", - P64(&v_start), P64(v_start), P64(priv)); - v_start = priv; - } else { - PRINT(LD_SYNCD, - "Start waiting while start counter 0x%llx with val %lld < %lld\n", - P64(&v_start), P64(v_start), P64(priv)); - while (priv > v_start) { - // IDLE LOOP - // start is volatile: it will be re-loaded at each while loop - } - PRINT(LD_SYNCD, - "Done waiting as start counter 0x%llx with val %lld >= %lld\n", - P64(&v_start), P64(v_start), P64(priv)); - } -} Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h @@ -20,6 +20,8 @@ #ifndef _INTERFACES_H_ #define _INTERFACES_H_ +#include "option.h" + //////////////////////////////////////////////////////////////////////////////// // OpenMP interface //////////////////////////////////////////////////////////////////////////////// Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -26,7 +26,6 @@ #include // local includes -#include "counter_group.h" #include "debug.h" // debug #include "interface.h" // interfaces with omp, compiler, and user #include "option.h" // choices we have @@ -242,15 +241,10 @@ public: // access to data - INLINE omptarget_nvptx_CounterGroup &CounterGroup() { return cg; } INLINE omptarget_nvptx_TaskDescr *WorkTaskDescr() { return &masterTaskICV; } - // init - INLINE void InitWorkDescr(); private: - omptarget_nvptx_CounterGroup cg; // for barrier (no other needed) omptarget_nvptx_TaskDescr masterTaskICV; - bool hasCancel; }; //////////////////////////////////////////////////////////////////////////////// @@ -347,9 +341,6 @@ INLINE uint16_t &SimdLimitForNextSimd(int tid) { return nextRegion.slim[tid]; } - // sync - INLINE Counter &Priv(int tid) { return priv[tid]; } - INLINE void IncrementPriv(int tid, Counter val) { priv[tid] += val; } // schedule (for dispatch) INLINE kmp_sched_t &ScheduleType(int tid) { return schedule[tid]; } INLINE int64_t &Chunk(int tid) { return chunk[tid]; } @@ -377,8 +368,6 @@ // simd limit uint16_t slim[MAX_THREADS_PER_TEAM]; } nextRegion; - // sync - Counter priv[MAX_THREADS_PER_TEAM]; // schedule (for dispatch) kmp_sched_t schedule[MAX_THREADS_PER_TEAM]; // remember schedule type for #for int64_t chunk[MAX_THREADS_PER_TEAM]; @@ -469,7 +458,6 @@ // inlined implementation //////////////////////////////////////////////////////////////////////////////// -#include "counter_groupi.h" #include "omptarget-nvptxi.h" #include "supporti.h" Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu @@ -21,25 +21,10 @@ omptarget_nvptx_Queue omptarget_nvptx_device_State[MAX_SM]; -extern __device__ __shared__ - omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext; - extern __device__ omptarget_nvptx_Queue< omptarget_nvptx_SimpleThreadPrivateContext, OMP_STATE_COUNT> omptarget_nvptx_device_simpleState[MAX_SM]; -extern __device__ __shared__ omptarget_nvptx_SimpleThreadPrivateContext - *omptarget_nvptx_simpleThreadPrivateContext; - -// -// The team master sets the outlined function and its arguments in these -// variables to communicate with the workers. Since they are in shared memory, -// there is one copy of these variables for each kernel, instance, and team. -// -extern volatile __device__ __shared__ omptarget_nvptx_WorkFn - omptarget_nvptx_workFn; -extern __device__ __shared__ uint32_t execution_param; - //////////////////////////////////////////////////////////////////////////////// // init entry points //////////////////////////////////////////////////////////////////////////////// @@ -146,8 +131,6 @@ omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor(); // init team context currTeamDescr.InitTeamDescr(); - // init counters (copy start to init) - workDescr.CounterGroup().Reset(); } __syncthreads(); @@ -168,8 +151,6 @@ newTaskDescr); // init thread private from init value - workDescr.CounterGroup().Init( - omptarget_nvptx_threadPrivateContext->Priv(threadId)); PRINT(LD_PAR, "thread will execute parallel region with id %d in a team of " "%d threads\n", Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h @@ -168,31 +168,17 @@ topTaskDescr[tid] = NULL; // no num threads value has been pushed nextRegion.tnum[tid] = 0; - // priv counter init to zero - priv[tid] = 0; // the following don't need to be init here; they are init when using dyn // sched // current_Event, events_Number, chunk, num_Iterations, schedule } //////////////////////////////////////////////////////////////////////////////// -// Work Descriptor -//////////////////////////////////////////////////////////////////////////////// - -INLINE void omptarget_nvptx_WorkDescr::InitWorkDescr() { - cg.Clear(); // start and stop to zero too - // threadsInParallelTeam does not need to be init (done in start parallel) - hasCancel = FALSE; -} - -//////////////////////////////////////////////////////////////////////////////// // Team Descriptor //////////////////////////////////////////////////////////////////////////////// INLINE void omptarget_nvptx_TeamDescr::InitTeamDescr() { levelZeroTaskDescr.InitLevelZeroTaskDescr(); - workDescrForActiveParallel.InitWorkDescr(); - // omp_init_lock(criticalLock); } //////////////////////////////////////////////////////////////////////////////// Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/option.h =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/option.h +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/option.h @@ -47,13 +47,6 @@ //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -// data options -//////////////////////////////////////////////////////////////////////////////// - -// decide if counters are 32 or 64 bit -#define Counter unsigned long long - -//////////////////////////////////////////////////////////////////////////////// // misc options (by def everythig here is device) //////////////////////////////////////////////////////////////////////////////// Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu =================================================================== --- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu +++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu @@ -306,8 +306,6 @@ omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor(); workDescr.WorkTaskDescr()->CopyToWorkDescr(currTaskDescr, CudaThreadsForParallel / NumLanes); - // init counters (copy start to init) - workDescr.CounterGroup().Reset(); } // All workers call this function. Deactivate those not needed. @@ -345,8 +343,6 @@ omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(threadId, newTaskDescr); // init private from int value - workDescr.CounterGroup().Init( - omptarget_nvptx_threadPrivateContext->Priv(threadId)); PRINT(LD_PAR, "thread will execute parallel region with id %d in a team of " "%d threads\n",