diff --git a/openmp/libomptarget/DeviceRTL/include/Debug.h b/openmp/libomptarget/DeviceRTL/include/Debug.h --- a/openmp/libomptarget/DeviceRTL/include/Debug.h +++ b/openmp/libomptarget/DeviceRTL/include/Debug.h @@ -49,13 +49,13 @@ /// Enter a debugging scope for performing function traces. Enabled with /// FunctionTracting set in the debug kind. #define FunctionTracingRAII() \ - DebugEntryRAII Entry(__LINE__, __PRETTY_FUNCTION__); + DebugEntryRAII Entry(__FILE__, __LINE__, __PRETTY_FUNCTION__); /// An RAII class for handling entries to debug locations. The current location /// and function will be printed on entry. Nested levels increase the /// indentation shown in the debugging output. struct DebugEntryRAII { - DebugEntryRAII(const unsigned Line, const char *Function); + DebugEntryRAII(const char *File, const unsigned Line, const char *Function); ~DebugEntryRAII(); }; diff --git a/openmp/libomptarget/DeviceRTL/src/Debug.cpp b/openmp/libomptarget/DeviceRTL/src/Debug.cpp --- a/openmp/libomptarget/DeviceRTL/src/Debug.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Debug.cpp @@ -12,6 +12,7 @@ #include "Debug.h" #include "Configuration.h" +#include "Interface.h" #include "Mapping.h" #include "Types.h" @@ -41,14 +42,15 @@ static uint32_t Level = 0; #pragma omp allocate(Level) allocator(omp_pteam_mem_alloc) -DebugEntryRAII::DebugEntryRAII(const unsigned Line, const char *Function) { +DebugEntryRAII::DebugEntryRAII(const char *File, const unsigned Line, + const char *Function) { if (config::isDebugMode(config::DebugKind::FunctionTracing) && - mapping::getThreadIdInBlock() == 0) { + mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) { for (int I = 0; I < Level; ++I) PRINTF("%s", " "); - PRINTF("Line %u: Thread %u Entering %s:%u\n", Line, + PRINTF("%s:%u: Thread %u Entering %s\n", File, Line, mapping::getThreadIdInBlock(), Function); Level++; } @@ -56,7 +58,7 @@ DebugEntryRAII::~DebugEntryRAII() { if (config::isDebugMode(config::DebugKind::FunctionTracing) && - mapping::getThreadIdInBlock() == 0) + mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) Level--; } diff --git a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp --- a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp @@ -30,6 +30,7 @@ /// Simple generic state machine for worker threads. static void genericStateMachine(IdentTy *Ident) { + FunctionTracingRAII(); uint32_t TId = mapping::getThreadIdInBlock(); @@ -66,6 +67,7 @@ /// int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, bool UseGenericStateMachine, bool) { + FunctionTracingRAII(); const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; if (IsSPMD) { inititializeRuntime(/* IsSPMD */ true); @@ -98,6 +100,7 @@ /// \param Ident Source location identification, can be NULL. /// void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool) { + FunctionTracingRAII(); const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; state::assumeInitialState(IsSPMD); if (IsSPMD) @@ -107,7 +110,10 @@ state::ParallelRegionFn = nullptr; } -int8_t __kmpc_is_spmd_exec_mode() { return mapping::isSPMDMode(); } +int8_t __kmpc_is_spmd_exec_mode() { + FunctionTracingRAII(); + return mapping::isSPMDMode(); +} } #pragma omp end declare target diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp --- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp @@ -231,10 +231,12 @@ extern "C" { __attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() { + FunctionTracingRAII(); return mapping::getThreadIdInBlock(); } __attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() { + FunctionTracingRAII(); return mapping::getNumberOfProcessorElements(); } } diff --git a/openmp/libomptarget/DeviceRTL/src/Misc.cpp b/openmp/libomptarget/DeviceRTL/src/Misc.cpp --- a/openmp/libomptarget/DeviceRTL/src/Misc.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Misc.cpp @@ -11,6 +11,8 @@ #include "Types.h" +#include "Debug.h" + #pragma omp declare target namespace _OMP { @@ -60,9 +62,15 @@ ///{ extern "C" { -int32_t __kmpc_cancellationpoint(IdentTy *, int32_t, int32_t) { return 0; } +int32_t __kmpc_cancellationpoint(IdentTy *, int32_t, int32_t) { + FunctionTracingRAII(); + return 0; +} -int32_t __kmpc_cancel(IdentTy *, int32_t, int32_t) { return 0; } +int32_t __kmpc_cancel(IdentTy *, int32_t, int32_t) { + FunctionTracingRAII(); + return 0; +} double omp_get_wtick(void) { return _OMP::impl::getWTick(); } diff --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp --- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp @@ -66,6 +66,7 @@ // Invoke an outlined parallel function unwrapping arguments (up to 32). void invokeMicrotask(int32_t global_tid, int32_t bound_tid, void *fn, void **args, int64_t nargs) { + DebugEntryRAII Entry(__FILE__, __LINE__, ""); switch (nargs) { #include "generated_microtask_cases.gen" default: @@ -81,6 +82,7 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr, int32_t num_threads, int proc_bind, void *fn, void *wrapper_fn, void **args, int64_t nargs) { + FunctionTracingRAII(); uint32_t TId = mapping::getThreadIdInBlock(); // Handle the serialized case first, same for SPMD/non-SPMD. @@ -149,6 +151,7 @@ __attribute__((noinline)) bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) { + FunctionTracingRAII(); // Work function and arguments for L1 parallel region. *WorkFn = state::ParallelRegionFn; @@ -163,6 +166,7 @@ } __attribute__((noinline)) void __kmpc_kernel_end_parallel() { + FunctionTracingRAII(); // In case we have modified an ICV for this thread before a ThreadState was // created. We drop it now to not contaminate the next parallel region. ASSERT(!mapping::isSPMDMode()); @@ -172,27 +176,40 @@ } void __kmpc_serialized_parallel(IdentTy *, uint32_t TId) { + FunctionTracingRAII(); state::enterDataEnvironment(); ++icv::Level; } void __kmpc_end_serialized_parallel(IdentTy *, uint32_t TId) { + FunctionTracingRAII(); state::exitDataEnvironment(); --icv::Level; } -uint16_t __kmpc_parallel_level(IdentTy *, uint32_t) { return omp_get_level(); } +uint16_t __kmpc_parallel_level(IdentTy *, uint32_t) { + FunctionTracingRAII(); + return omp_get_level(); +} -int32_t __kmpc_global_thread_num(IdentTy *) { return omp_get_thread_num(); } +int32_t __kmpc_global_thread_num(IdentTy *) { + FunctionTracingRAII(); + return omp_get_thread_num(); +} void __kmpc_push_num_threads(IdentTy *, int32_t, int32_t NumThreads) { + FunctionTracingRAII(); icv::NThreads = NumThreads; } void __kmpc_push_num_teams(IdentTy *loc, int32_t tid, int32_t num_teams, - int32_t thread_limit) {} + int32_t thread_limit) { + FunctionTracingRAII(); +} -void __kmpc_push_proc_bind(IdentTy *loc, uint32_t tid, int proc_bind) {} +void __kmpc_push_proc_bind(IdentTy *loc, uint32_t tid, int proc_bind) { + FunctionTracingRAII(); +} } #pragma omp end declare target diff --git a/openmp/libomptarget/DeviceRTL/src/Reduction.cpp b/openmp/libomptarget/DeviceRTL/src/Reduction.cpp --- a/openmp/libomptarget/DeviceRTL/src/Reduction.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Reduction.cpp @@ -176,6 +176,7 @@ int32_t __kmpc_nvptx_parallel_reduce_nowait_v2( IdentTy *Loc, int32_t TId, int32_t num_vars, uint64_t reduce_size, void *reduce_data, ShuffleReductFnTy shflFct, InterWarpCopyFnTy cpyFct) { + FunctionTracingRAII(); return nvptx_parallel_reduce_nowait(TId, num_vars, reduce_size, reduce_data, shflFct, cpyFct, mapping::isSPMDMode(), false); @@ -186,6 +187,7 @@ void *reduce_data, ShuffleReductFnTy shflFct, InterWarpCopyFnTy cpyFct, ListGlobalFnTy lgcpyFct, ListGlobalFnTy lgredFct, ListGlobalFnTy glcpyFct, ListGlobalFnTy glredFct) { + FunctionTracingRAII(); // Terminate all threads in non-SPMD mode except for the master thread. uint32_t ThreadId = mapping::getThreadIdInBlock(); @@ -310,9 +312,9 @@ return 0; } -void __kmpc_nvptx_end_reduce(int32_t TId) {} +void __kmpc_nvptx_end_reduce(int32_t TId) { FunctionTracingRAII(); } -void __kmpc_nvptx_end_reduce_nowait(int32_t TId) {} +void __kmpc_nvptx_end_reduce_nowait(int32_t TId) { FunctionTracingRAII(); } } #pragma omp end declare target diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -496,10 +496,12 @@ extern "C" { __attribute__((noinline)) void *__kmpc_alloc_shared(uint64_t Bytes) { + FunctionTracingRAII(); return memory::allocShared(Bytes, "Frontend alloc shared"); } __attribute__((noinline)) void __kmpc_free_shared(void *Ptr, uint64_t Bytes) { + FunctionTracingRAII(); memory::freeShared(Ptr, Bytes, "Frontend free shared"); } @@ -521,6 +523,7 @@ allocator(omp_pteam_mem_alloc) void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t nArgs) { + FunctionTracingRAII(); if (nArgs <= NUM_SHARED_VARIABLES_IN_SHARED_MEM) { SharedMemVariableSharingSpacePtr = &SharedMemVariableSharingSpace[0]; } else { @@ -531,11 +534,13 @@ } void __kmpc_end_sharing_variables() { + FunctionTracingRAII(); if (SharedMemVariableSharingSpacePtr != &SharedMemVariableSharingSpace[0]) memory::freeGlobal(SharedMemVariableSharingSpacePtr, "new extended args"); } void __kmpc_get_shared_variables(void ***GlobalArgs) { + FunctionTracingRAII(); *GlobalArgs = SharedMemVariableSharingSpacePtr; } } diff --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp --- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp @@ -254,16 +254,18 @@ } extern "C" { -void __kmpc_ordered(IdentTy *Loc, int32_t TId) {} +void __kmpc_ordered(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); } -void __kmpc_end_ordered(IdentTy *Loc, int32_t TId) {} +void __kmpc_end_ordered(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); } int32_t __kmpc_cancel_barrier(IdentTy *Loc, int32_t TId) { + FunctionTracingRAII(); __kmpc_barrier(Loc, TId); return 0; } void __kmpc_barrier(IdentTy *Loc, int32_t TId) { + FunctionTracingRAII(); if (mapping::isMainThreadInGenericMode()) return __kmpc_flush(Loc); @@ -275,34 +277,49 @@ __attribute__((noinline)) void __kmpc_barrier_simple_spmd(IdentTy *Loc, int32_t TId) { + FunctionTracingRAII(); synchronize::threads(); } int32_t __kmpc_master(IdentTy *Loc, int32_t TId) { + FunctionTracingRAII(); return omp_get_team_num() == 0; } -void __kmpc_end_master(IdentTy *Loc, int32_t TId) {} +void __kmpc_end_master(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); } int32_t __kmpc_single(IdentTy *Loc, int32_t TId) { + FunctionTracingRAII(); return __kmpc_master(Loc, TId); } void __kmpc_end_single(IdentTy *Loc, int32_t TId) { + FunctionTracingRAII(); // The barrier is explicitly called. } -void __kmpc_flush(IdentTy *Loc) { fence::kernel(__ATOMIC_SEQ_CST); } +void __kmpc_flush(IdentTy *Loc) { + FunctionTracingRAII(); + fence::kernel(__ATOMIC_SEQ_CST); +} -uint64_t __kmpc_warp_active_thread_mask(void) { return mapping::activemask(); } +uint64_t __kmpc_warp_active_thread_mask(void) { + FunctionTracingRAII(); + return mapping::activemask(); +} -void __kmpc_syncwarp(uint64_t Mask) { synchronize::warp(Mask); } +void __kmpc_syncwarp(uint64_t Mask) { + FunctionTracingRAII(); + synchronize::warp(Mask); +} void __kmpc_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name) { + FunctionTracingRAII(); omp_set_lock(reinterpret_cast(Name)); } void __kmpc_end_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name) { + FunctionTracingRAII(); omp_unset_lock(reinterpret_cast(Name)); } diff --git a/openmp/libomptarget/DeviceRTL/src/Tasking.cpp b/openmp/libomptarget/DeviceRTL/src/Tasking.cpp --- a/openmp/libomptarget/DeviceRTL/src/Tasking.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Tasking.cpp @@ -26,6 +26,7 @@ uint64_t TaskSizeInclPrivateValues, uint64_t SharedValuesSize, TaskFnTy TaskFn) { + FunctionTracingRAII(); auto TaskSizeInclPrivateValuesPadded = utils::roundUp(TaskSizeInclPrivateValues, uint64_t(sizeof(void *))); auto TaskSizeTotal = TaskSizeInclPrivateValuesPadded + SharedValuesSize; @@ -40,12 +41,14 @@ int32_t __kmpc_omp_task(IdentTy *Loc, uint32_t TId, TaskDescriptorTy *TaskDescriptor) { + FunctionTracingRAII(); return __kmpc_omp_task_with_deps(Loc, TId, TaskDescriptor, 0, 0, 0, 0); } int32_t __kmpc_omp_task_with_deps(IdentTy *Loc, uint32_t TId, TaskDescriptorTy *TaskDescriptor, int32_t, void *, int32_t, void *) { + FunctionTracingRAII(); state::DateEnvironmentRAII DERAII; TaskDescriptor->TaskFn(0, TaskDescriptor); @@ -56,31 +59,42 @@ void __kmpc_omp_task_begin_if0(IdentTy *Loc, uint32_t TId, TaskDescriptorTy *TaskDescriptor) { + FunctionTracingRAII(); state::enterDataEnvironment(); } void __kmpc_omp_task_complete_if0(IdentTy *Loc, uint32_t TId, TaskDescriptorTy *TaskDescriptor) { + FunctionTracingRAII(); state::exitDataEnvironment(); memory::freeGlobal(TaskDescriptor, "explicit task descriptor"); } void __kmpc_omp_wait_deps(IdentTy *Loc, uint32_t TId, int32_t, void *, int32_t, - void *) {} + void *) { + FunctionTracingRAII(); +} -void __kmpc_taskgroup(IdentTy *Loc, uint32_t TId) {} +void __kmpc_taskgroup(IdentTy *Loc, uint32_t TId) { FunctionTracingRAII(); } -void __kmpc_end_taskgroup(IdentTy *Loc, uint32_t TId) {} +void __kmpc_end_taskgroup(IdentTy *Loc, uint32_t TId) { FunctionTracingRAII(); } -int32_t __kmpc_omp_taskyield(IdentTy *Loc, uint32_t TId, int) { return 0; } +int32_t __kmpc_omp_taskyield(IdentTy *Loc, uint32_t TId, int) { + FunctionTracingRAII(); + return 0; +} -int32_t __kmpc_omp_taskwait(IdentTy *Loc, uint32_t TId) { return 0; } +int32_t __kmpc_omp_taskwait(IdentTy *Loc, uint32_t TId) { + FunctionTracingRAII(); + return 0; +} void __kmpc_taskloop(IdentTy *Loc, uint32_t TId, TaskDescriptorTy *TaskDescriptor, int, uint64_t *LowerBound, uint64_t *UpperBound, int64_t, int, int32_t, uint64_t, void *) { + FunctionTracingRAII(); // Skip task entirely if empty iteration space. if (*LowerBound > *UpperBound) return; diff --git a/openmp/libomptarget/DeviceRTL/src/Utils.cpp b/openmp/libomptarget/DeviceRTL/src/Utils.cpp --- a/openmp/libomptarget/DeviceRTL/src/Utils.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Utils.cpp @@ -11,6 +11,7 @@ #include "Utils.h" +#include "Debug.h" #include "Interface.h" #include "Mapping.h" @@ -129,10 +130,12 @@ extern "C" { int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) { + FunctionTracingRAII(); return impl::shuffleDown(lanes::All, Val, Delta, SrcLane); } int64_t __kmpc_shuffle_int64(int64_t Val, int16_t Delta, int16_t Width) { + FunctionTracingRAII(); uint32_t lo, hi; utils::unpack(Val, lo, hi); hi = impl::shuffleDown(lanes::All, hi, Delta, Width); diff --git a/openmp/libomptarget/DeviceRTL/src/Workshare.cpp b/openmp/libomptarget/DeviceRTL/src/Workshare.cpp --- a/openmp/libomptarget/DeviceRTL/src/Workshare.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Workshare.cpp @@ -474,6 +474,7 @@ // init void __kmpc_dispatch_init_4(IdentTy *loc, int32_t tid, int32_t schedule, int32_t lb, int32_t ub, int32_t st, int32_t chunk) { + FunctionTracingRAII(); DynamicScheduleTracker *DST = pushDST(); omptarget_nvptx_LoopSupport::dispatch_init( loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST); @@ -482,6 +483,7 @@ void __kmpc_dispatch_init_4u(IdentTy *loc, int32_t tid, int32_t schedule, uint32_t lb, uint32_t ub, int32_t st, int32_t chunk) { + FunctionTracingRAII(); DynamicScheduleTracker *DST = pushDST(); omptarget_nvptx_LoopSupport::dispatch_init( loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST); @@ -489,6 +491,7 @@ void __kmpc_dispatch_init_8(IdentTy *loc, int32_t tid, int32_t schedule, int64_t lb, int64_t ub, int64_t st, int64_t chunk) { + FunctionTracingRAII(); DynamicScheduleTracker *DST = pushDST(); omptarget_nvptx_LoopSupport::dispatch_init( loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST); @@ -497,6 +500,7 @@ void __kmpc_dispatch_init_8u(IdentTy *loc, int32_t tid, int32_t schedule, uint64_t lb, uint64_t ub, int64_t st, int64_t chunk) { + FunctionTracingRAII(); DynamicScheduleTracker *DST = pushDST(); omptarget_nvptx_LoopSupport::dispatch_init( loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST); @@ -505,6 +509,7 @@ // next int __kmpc_dispatch_next_4(IdentTy *loc, int32_t tid, int32_t *p_last, int32_t *p_lb, int32_t *p_ub, int32_t *p_st) { + FunctionTracingRAII(); DynamicScheduleTracker *DST = peekDST(); return omptarget_nvptx_LoopSupport::dispatch_next( loc, tid, p_last, p_lb, p_ub, p_st, DST); @@ -512,6 +517,7 @@ int __kmpc_dispatch_next_4u(IdentTy *loc, int32_t tid, int32_t *p_last, uint32_t *p_lb, uint32_t *p_ub, int32_t *p_st) { + FunctionTracingRAII(); DynamicScheduleTracker *DST = peekDST(); return omptarget_nvptx_LoopSupport::dispatch_next( loc, tid, p_last, p_lb, p_ub, p_st, DST); @@ -519,6 +525,7 @@ int __kmpc_dispatch_next_8(IdentTy *loc, int32_t tid, int32_t *p_last, int64_t *p_lb, int64_t *p_ub, int64_t *p_st) { + FunctionTracingRAII(); DynamicScheduleTracker *DST = peekDST(); return omptarget_nvptx_LoopSupport::dispatch_next( loc, tid, p_last, p_lb, p_ub, p_st, DST); @@ -526,6 +533,7 @@ int __kmpc_dispatch_next_8u(IdentTy *loc, int32_t tid, int32_t *p_last, uint64_t *p_lb, uint64_t *p_ub, int64_t *p_st) { + FunctionTracingRAII(); DynamicScheduleTracker *DST = peekDST(); return omptarget_nvptx_LoopSupport::dispatch_next( loc, tid, p_last, p_lb, p_ub, p_st, DST); @@ -533,21 +541,25 @@ // fini void __kmpc_dispatch_fini_4(IdentTy *loc, int32_t tid) { + FunctionTracingRAII(); omptarget_nvptx_LoopSupport::dispatch_fini(); popDST(); } void __kmpc_dispatch_fini_4u(IdentTy *loc, int32_t tid) { + FunctionTracingRAII(); omptarget_nvptx_LoopSupport::dispatch_fini(); popDST(); } void __kmpc_dispatch_fini_8(IdentTy *loc, int32_t tid) { + FunctionTracingRAII(); omptarget_nvptx_LoopSupport::dispatch_fini(); popDST(); } void __kmpc_dispatch_fini_8u(IdentTy *loc, int32_t tid) { + FunctionTracingRAII(); omptarget_nvptx_LoopSupport::dispatch_fini(); popDST(); } @@ -560,6 +572,7 @@ int32_t schedtype, int32_t *plastiter, int32_t *plower, int32_t *pupper, int32_t *pstride, int32_t incr, int32_t chunk) { + FunctionTracingRAII(); omptarget_nvptx_LoopSupport::for_static_init( global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, mapping::isSPMDMode()); @@ -569,6 +582,7 @@ int32_t schedtype, int32_t *plastiter, uint32_t *plower, uint32_t *pupper, int32_t *pstride, int32_t incr, int32_t chunk) { + FunctionTracingRAII(); omptarget_nvptx_LoopSupport::for_static_init( global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, mapping::isSPMDMode()); @@ -578,6 +592,7 @@ int32_t schedtype, int32_t *plastiter, int64_t *plower, int64_t *pupper, int64_t *pstride, int64_t incr, int64_t chunk) { + FunctionTracingRAII(); omptarget_nvptx_LoopSupport::for_static_init( global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, mapping::isSPMDMode()); @@ -587,6 +602,7 @@ int32_t schedtype, int32_t *plastiter, uint64_t *plower, uint64_t *pupper, int64_t *pstride, int64_t incr, int64_t chunk) { + FunctionTracingRAII(); omptarget_nvptx_LoopSupport::for_static_init( global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, mapping::isSPMDMode()); @@ -597,6 +613,7 @@ int32_t *plower, int32_t *pupper, int32_t *pstride, int32_t incr, int32_t chunk) { + FunctionTracingRAII(); omptarget_nvptx_LoopSupport::for_static_init( global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, mapping::isSPMDMode()); @@ -607,6 +624,7 @@ uint32_t *plower, uint32_t *pupper, int32_t *pstride, int32_t incr, int32_t chunk) { + FunctionTracingRAII(); omptarget_nvptx_LoopSupport::for_static_init( global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, mapping::isSPMDMode()); @@ -617,6 +635,7 @@ int64_t *plower, int64_t *pupper, int64_t *pstride, int64_t incr, int64_t chunk) { + FunctionTracingRAII(); omptarget_nvptx_LoopSupport::for_static_init( global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, mapping::isSPMDMode()); @@ -627,14 +646,19 @@ uint64_t *plower, uint64_t *pupper, int64_t *pstride, int64_t incr, int64_t chunk) { + FunctionTracingRAII(); omptarget_nvptx_LoopSupport::for_static_init( global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, mapping::isSPMDMode()); } -void __kmpc_for_static_fini(IdentTy *loc, int32_t global_tid) {} +void __kmpc_for_static_fini(IdentTy *loc, int32_t global_tid) { + FunctionTracingRAII(); +} -void __kmpc_distribute_static_fini(IdentTy *loc, int32_t global_tid) {} +void __kmpc_distribute_static_fini(IdentTy *loc, int32_t global_tid) { + FunctionTracingRAII(); +} } #pragma omp end declare target