Index: libomptarget/deviceRTLs/nvptx/src/interface.h =================================================================== --- libomptarget/deviceRTLs/nvptx/src/interface.h +++ libomptarget/deviceRTLs/nvptx/src/interface.h @@ -275,6 +275,31 @@ int64_t *pstride, int64_t incr, int64_t chunk); EXTERN + +void __kmpc_distribute_default_init_4(kmp_Indent *loc, int32_t global_tid, + int32_t sched, int32_t *plastiter, + int32_t *plower, int32_t *pupper, + int32_t *pstride, int32_t incr, + int32_t chunk); +EXTERN +void __kmpc_distribute_default_init_4u(kmp_Indent *loc, int32_t global_tid, + int32_t sched, int32_t *plastiter, + uint32_t *plower, uint32_t *pupper, + int32_t *pstride, int32_t incr, + int32_t chunk); +EXTERN +void __kmpc_distribute_default_init_8(kmp_Indent *loc, int32_t global_tid, + int32_t sched, int32_t *plastiter, + int64_t *plower, int64_t *pupper, + int64_t *pstride, int64_t incr, + int64_t chunk); +EXTERN +void __kmpc_distribute_default_init_8u(kmp_Indent *loc, int32_t global_tid, + int32_t sched, int32_t *plastiter1, + uint64_t *plower, uint64_t *pupper, + int64_t *pstride, int64_t incr, + int64_t chunk); +EXTERN void __kmpc_for_static_init_4_simple_spmd(kmp_Indent *loc, int32_t global_tid, int32_t sched, int32_t *plastiter, int32_t *plower, int32_t *pupper, Index: libomptarget/deviceRTLs/nvptx/src/loop.cu =================================================================== --- libomptarget/deviceRTLs/nvptx/src/loop.cu +++ libomptarget/deviceRTLs/nvptx/src/loop.cu @@ -231,6 +231,34 @@ } //////////////////////////////////////////////////////////////////////////////// + // Support for Static Init + + INLINE static void distribute_default_init(T *plower, T *pupper, ST *pstride, + bool IsRuntimeUninitialized) { + int tid = GetLogicalThreadIdInBlock(); + + // copy + T lb = *plower; + T ub = *pupper; + ST stride = *pstride; + T entityId, numberOfEntities, chunkSize; + + // init - the default schedule is static with chunk size equal to team size. + entityId = GetOmpTeamId(); + numberOfEntities = GetNumberOfOmpTeams(); + chunkSize = GetNumberOfOmpThreads(tid, true, IsRuntimeUninitialized); + + stride = numberOfEntities * chunkSize; + lb = lb + entityId * chunkSize; + ub = lb + chunkSize - 1; // Clang uses i <= ub + + // copy back + *plower = lb; + *pupper = ub; + *pstride = stride; + } + + //////////////////////////////////////////////////////////////////////////////// // Support for dispatch Init INLINE static int OrderedSchedule(kmp_sched_t schedule) { @@ -670,6 +698,54 @@ isRuntimeUninitialized()); } +//////////////////////////////////////////////////////////////////////////////// +// Static loops for distribute default schedule +//////////////////////////////////////////////////////////////////////////////// + +EXTERN +void __kmpc_distribute_default_init_4(kmp_Indent *loc, int32_t global_tid, + int32_t schedtype, int32_t *plastiter, + int32_t *plower, int32_t *pupper, + int32_t *pstride, int32_t incr, + int32_t chunk) { + PRINT0(LD_IO, "call kmpc_distribute_default_init_4\n"); + omptarget_nvptx_LoopSupport::distribute_default_init( + plower, pupper, pstride, isRuntimeUninitialized()); +} + +EXTERN +void __kmpc_distribute_default_init_4u(kmp_Indent *loc, int32_t global_tid, + int32_t schedtype, int32_t *plastiter, + uint32_t *plower, uint32_t *pupper, + int32_t *pstride, int32_t incr, + int32_t chunk) { + PRINT0(LD_IO, "call kmpc_distribute_default_init_4u\n"); + omptarget_nvptx_LoopSupport::distribute_default_init( + plower, pupper, pstride, isRuntimeUninitialized()); +} + +EXTERN +void __kmpc_distribute_default_init_8(kmp_Indent *loc, int32_t global_tid, + int32_t schedtype, int32_t *plastiter, + int64_t *plower, int64_t *pupper, + int64_t *pstride, int64_t incr, + int64_t chunk) { + PRINT0(LD_IO, "call kmpc_distribute_default_init_8\n"); + omptarget_nvptx_LoopSupport::distribute_default_init( + plower, pupper, pstride, isRuntimeUninitialized()); +} + +EXTERN +void __kmpc_distribute_default_init_8u(kmp_Indent *loc, int32_t global_tid, + int32_t schedtype, int32_t *plastiter, + uint64_t *plower, uint64_t *pupper, + int64_t *pstride, int64_t incr, + int64_t chunk) { + PRINT0(LD_IO, "call kmpc_distribute_default_init_8u\n"); + omptarget_nvptx_LoopSupport::distribute_default_init( + plower, pupper, pstride, isRuntimeUninitialized()); +} + EXTERN void __kmpc_for_static_init_4_simple_spmd(kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,