Index: openmp/trunk/runtime/src/kmp_barrier.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_barrier.cpp +++ openmp/trunk/runtime/src/kmp_barrier.cpp @@ -50,7 +50,7 @@ void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void * itt_sync_obj) ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather); register kmp_team_t *team = this_thr->th.th_team; register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb; register kmp_info_t **other_threads = team->t.t_threads; @@ -130,7 +130,7 @@ int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj) ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release); register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; register kmp_team_t *team; @@ -149,7 +149,7 @@ if (nproc > 1) { #if KMP_BARRIER_ICV_PUSH { - KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); if (propagate_icvs) { ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs); for (i=1; ith.th_team; register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; register kmp_info_t **other_threads = team->t.t_threads; @@ -323,7 +323,7 @@ int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj) ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release); register kmp_team_t *team; register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; register kmp_uint32 nproc; @@ -393,7 +393,7 @@ #if KMP_BARRIER_ICV_PUSH { - KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); if (propagate_icvs) { __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid], team, child_tid, FALSE); @@ -426,7 +426,7 @@ void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj) ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather); register kmp_team_t *team = this_thr->th.th_team; register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; register kmp_info_t **other_threads = team->t.t_threads; @@ -535,7 +535,7 @@ int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj) ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release); register kmp_team_t *team; register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb; register kmp_info_t **other_threads; @@ -742,7 +742,7 @@ int gtid, int tid, void (*reduce) (void *, void *) USE_ITT_BUILD_ARG(void * itt_sync_obj) ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather); register kmp_team_t *team = this_thr->th.th_team; register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb; register kmp_uint32 nproc = this_thr->th.th_team_nproc; @@ -883,7 +883,7 @@ int propagate_icvs USE_ITT_BUILD_ARG(void * itt_sync_obj) ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release); register kmp_team_t *team; register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; register kmp_uint32 nproc; @@ -1067,9 +1067,8 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size, void *reduce_data, void (*reduce)(void *, void *)) { - KMP_TIME_DEVELOPER_BLOCK(KMP_barrier); - KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER); KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier); + KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER); register int tid = __kmp_tid_from_gtid(gtid); register kmp_info_t *this_thr = __kmp_threads[gtid]; register kmp_team_t *team = this_thr->th.th_team; @@ -1333,7 +1332,8 @@ void __kmp_end_split_barrier(enum barrier_type bt, int gtid) { - KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier); + KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER); int tid = __kmp_tid_from_gtid(gtid); kmp_info_t *this_thr = __kmp_threads[gtid]; kmp_team_t *team = this_thr->th.th_team; @@ -1376,9 +1376,8 @@ void __kmp_join_barrier(int gtid) { - KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier); + KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier); KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER); - KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier); register kmp_info_t *this_thr = __kmp_threads[gtid]; register kmp_team_t *team; register kmp_uint nproc; @@ -1592,9 +1591,8 @@ void __kmp_fork_barrier(int gtid, int tid) { - KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier); + KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier); KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER); - KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier); kmp_info_t *this_thr = __kmp_threads[gtid]; kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL; #if USE_ITT_BUILD @@ -1707,7 +1705,7 @@ the fixed ICVs in the master's thread struct, because it is not always the case that the threads arrays have been allocated when __kmp_fork_call() is executed. */ { - KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs // Copy the initial ICVs from the master's thread struct to the implicit task for this tid. KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid)); @@ -1762,7 +1760,7 @@ void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy); KMP_DEBUG_ASSERT(team && new_nproc && new_icvs); KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); Index: openmp/trunk/runtime/src/kmp_global.c =================================================================== --- openmp/trunk/runtime/src/kmp_global.c +++ openmp/trunk/runtime/src/kmp_global.c @@ -28,10 +28,10 @@ kmp_tas_lock_t __kmp_stats_lock; // global list of per thread stats, the head is a sentinel node which accumulates all stats produced before __kmp_create_worker is called. -kmp_stats_list __kmp_stats_list; +kmp_stats_list* __kmp_stats_list; // thread local pointer to stats node within list -__thread kmp_stats_list* __kmp_stats_thread_ptr = &__kmp_stats_list; +__thread kmp_stats_list* __kmp_stats_thread_ptr = NULL; // gives reference tick for all events (considered the 0 tick) tsc_tick_count __kmp_stats_start_time; Index: openmp/trunk/runtime/src/kmp_runtime.c =================================================================== --- openmp/trunk/runtime/src/kmp_runtime.c +++ openmp/trunk/runtime/src/kmp_runtime.c @@ -1417,7 +1417,7 @@ kmp_hot_team_ptr_t **p_hot_teams; #endif { // KMP_TIME_BLOCK - KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid )); @@ -2199,7 +2199,6 @@ { KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); - // KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke); if (! team->t.t_invoke( gtid )) { KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" ); } @@ -2258,7 +2257,7 @@ #endif /* OMP_40_ENABLED */ ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_join_call); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call); kmp_team_t *team; kmp_team_t *parent_team; kmp_info_t *master_th; @@ -3681,6 +3680,13 @@ KMP_DEBUG_ASSERT( ! root->r.r_root_team ); } +#if KMP_STATS_ENABLED + // Initialize stats as soon as possible (right after gtid assignment). + __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid); + KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life); + KMP_SET_THREAD_STATE(SERIAL_REGION); + KMP_INIT_PARTITIONED_TIMERS(OMP_serial); +#endif __kmp_initialize_root( root ); /* setup new root thread structure */ @@ -4748,7 +4754,7 @@ kmp_internal_control_t *new_icvs, int argc USE_NESTED_HOT_ARG(kmp_info_t *master) ) { - KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team); int f; kmp_team_t *team; int use_hot_team = ! root->r.r_active; @@ -5504,14 +5510,11 @@ } #endif - KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop); { - KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke); KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); rc = (*pteam)->t.t_invoke( gtid ); } - KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop); KMP_ASSERT( rc ); #if OMPT_SUPPORT @@ -6332,7 +6335,7 @@ #endif #endif #if KMP_STATS_ENABLED - __kmp_init_tas_lock( & __kmp_stats_lock ); + __kmp_stats_init(); #endif __kmp_init_lock( & __kmp_global_lock ); __kmp_init_queuing_lock( & __kmp_dispatch_lock ); @@ -7293,8 +7296,7 @@ __kmp_i18n_catclose(); #if KMP_STATS_ENABLED - __kmp_accumulate_stats_at_exit(); - __kmp_stats_list.deallocate(); + __kmp_stats_fini(); #endif KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) ); Index: openmp/trunk/runtime/src/kmp_stats.h =================================================================== --- openmp/trunk/runtime/src/kmp_stats.h +++ openmp/trunk/runtime/src/kmp_stats.h @@ -104,8 +104,7 @@ macro (OMP_TASKLOOP, 0, arg) \ macro (TASK_executed, 0, arg) \ macro (TASK_cancelled, 0, arg) \ - macro (TASK_stolen, 0, arg) \ - macro (LAST,0,arg) + macro (TASK_stolen, 0, arg) /*! * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h @@ -123,31 +122,31 @@ * @ingroup STATS_GATHERING2 */ #define KMP_FOREACH_TIMER(macro, arg) \ - macro (OMP_worker_thread_life, 0, arg) \ + macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \ macro (FOR_static_scheduling, 0, arg) \ macro (FOR_dynamic_scheduling, 0, arg) \ macro (OMP_critical, 0, arg) \ macro (OMP_critical_wait, 0, arg) \ macro (OMP_single, 0, arg) \ macro (OMP_master, 0, arg) \ - macro (OMP_idle, 0, arg) \ - macro (OMP_plain_barrier, 0, arg) \ - macro (OMP_fork_join_barrier, 0, arg) \ - macro (OMP_parallel, 0, arg) \ + macro (OMP_idle, stats_flags_e::logEvent, arg) \ + macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \ + macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \ + macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \ + macro (OMP_parallel, stats_flags_e::logEvent, arg) \ macro (OMP_task_immediate, 0, arg) \ macro (OMP_task_taskwait, 0, arg) \ macro (OMP_task_taskyield, 0, arg) \ macro (OMP_task_taskgroup, 0, arg) \ macro (OMP_task_join_bar, 0, arg) \ macro (OMP_task_plain_bar, 0, arg) \ - macro (OMP_serial, 0, arg) \ + macro (OMP_serial, stats_flags_e::logEvent, arg) \ macro (OMP_taskloop_scheduling, 0, arg) \ macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ macro (FOR_static_iterations, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ macro (FOR_dynamic_iterations,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ - KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ - macro (LAST,0, arg) + KMP_FOREACH_DEVELOPER_TIMER(macro, arg) // OMP_start_end -- Time from when OpenMP is initialized until the stats are printed at exit @@ -190,28 +189,22 @@ // KMP_tree_release -- time in __kmp_tree_barrier_release // KMP_hyper_gather -- time in __kmp_hyper_barrier_gather // KMP_hyper_release -- time in __kmp_hyper_barrier_release -# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ - macro (KMP_fork_call, 0, arg) \ - macro (KMP_join_call, 0, arg) \ - macro (KMP_fork_barrier, stats_flags_e::logEvent, arg) \ - macro (KMP_join_barrier, stats_flags_e::logEvent, arg) \ - macro (KMP_barrier, 0, arg) \ - macro (KMP_end_split_barrier, 0, arg) \ - macro (KMP_hier_gather, 0, arg) \ - macro (KMP_hier_release, 0, arg) \ - macro (KMP_hyper_gather, stats_flags_e::logEvent, arg) \ - macro (KMP_hyper_release, stats_flags_e::logEvent, arg) \ - macro (KMP_linear_gather, 0, arg) \ - macro (KMP_linear_release, 0, arg) \ - macro (KMP_tree_gather, 0, arg) \ - macro (KMP_tree_release, 0, arg) \ - macro (USER_master_invoke, stats_flags_e::logEvent, arg) \ - macro (USER_worker_invoke, stats_flags_e::logEvent, arg) \ - macro (USER_resume, stats_flags_e::logEvent, arg) \ - macro (USER_suspend, stats_flags_e::logEvent, arg) \ - macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg) \ - macro (KMP_allocate_team, 0, arg) \ - macro (KMP_setup_icv_copy, 0, arg) \ +# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ + macro (KMP_fork_call, 0, arg) \ + macro (KMP_join_call, 0, arg) \ + macro (KMP_end_split_barrier, 0, arg) \ + macro (KMP_hier_gather, 0, arg) \ + macro (KMP_hier_release, 0, arg) \ + macro (KMP_hyper_gather, 0, arg) \ + macro (KMP_hyper_release, 0, arg) \ + macro (KMP_linear_gather, 0, arg) \ + macro (KMP_linear_release, 0, arg) \ + macro (KMP_tree_gather, 0, arg) \ + macro (KMP_tree_release, 0, arg) \ + macro (USER_resume, 0, arg) \ + macro (USER_suspend, 0, arg) \ + macro (KMP_allocate_team, 0, arg) \ + macro (KMP_setup_icv_copy, 0, arg) \ macro (USER_icv_copy, 0, arg) #else # define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) @@ -233,47 +226,23 @@ * * @ingroup STATS_GATHERING */ -#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \ - macro(OMP_worker_thread_life, 0, arg) \ - macro(FOR_static_scheduling, 0, arg) \ - macro(FOR_dynamic_scheduling, 0, arg) \ - macro(OMP_critical, 0, arg) \ - macro(OMP_critical_wait, 0, arg) \ - macro(OMP_single, 0, arg) \ - macro(OMP_master, 0, arg) \ - macro(OMP_idle, 0, arg) \ - macro(OMP_plain_barrier, 0, arg) \ - macro(OMP_fork_join_barrier, 0, arg) \ - macro(OMP_parallel, 0, arg) \ - macro(OMP_task_immediate, 0, arg) \ - macro(OMP_task_taskwait, 0, arg) \ - macro(OMP_task_taskyield, 0, arg) \ - macro(OMP_task_taskgroup, 0, arg) \ - macro(OMP_task_join_bar, 0, arg) \ - macro(OMP_task_plain_bar, 0, arg) \ - macro(OMP_serial, 0, arg) \ - macro(OMP_taskloop_scheduling, 0, arg) \ - KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \ - macro(LAST, 0, arg) - -#if (KMP_DEVELOPER_STATS) -# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) \ - macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg) -#else -# define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) -#endif +#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \ + KMP_FOREACH_TIMER(macro, arg) #define ENUMERATE(name,ignore,prefix) prefix##name, enum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) + TIMER_LAST }; enum explicit_timer_e { KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) + EXPLICIT_TIMER_LAST }; enum counter_e { KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) + COUNTER_LAST }; #undef ENUMERATE @@ -370,7 +339,7 @@ void start(timer_e timerEnumValue); void pause() { pauseStartTime = tsc_tick_count::now(); } void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); } - void stop(timer_e timerEnumValue); + void stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr = nullptr); void reset() { startTime = 0; pauseStartTime = 0; totalPauseTime = 0; } }; @@ -716,13 +685,14 @@ extern "C" { #endif void __kmp_stats_init(); +void __kmp_stats_fini(); void __kmp_reset_stats(); void __kmp_output_stats(const char *); void __kmp_accumulate_stats_at_exit(void); // thread local pointer to stats node within list extern __thread kmp_stats_list* __kmp_stats_thread_ptr; // head to stats list. -extern kmp_stats_list __kmp_stats_list; +extern kmp_stats_list* __kmp_stats_list; // lock for __kmp_stats_list extern kmp_tas_lock_t __kmp_stats_lock; // reference start time @@ -866,6 +836,7 @@ # define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n) # define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n) # define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n) +# define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n) #else // Null definitions # define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) @@ -873,6 +844,7 @@ # define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) # define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) # define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) +# define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) #endif #else // KMP_STATS_ENABLED @@ -894,6 +866,7 @@ #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) #define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0) #define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0) +#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) #define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0) #define KMP_POP_PARTITIONED_TIMER() ((void)0) #define KMP_SET_THREAD_STATE(state_name) ((void)0) Index: openmp/trunk/runtime/src/kmp_stats.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_stats.cpp +++ openmp/trunk/runtime/src/kmp_stats.cpp @@ -29,11 +29,11 @@ #define expandName(name,flags,ignore) {STRINGIZE(name),flags}, statInfo timeStat::timerInfo[] = { KMP_FOREACH_TIMER(expandName,0) - {0,0} + {"TIMER_LAST", 0} }; const statInfo counter::counterInfo[] = { KMP_FOREACH_COUNTER(expandName,0) - {0,0} + {"COUNTER_LAST", 0} }; #undef expandName @@ -71,7 +71,7 @@ static uint32_t statsPrinted = 0; // output interface -static kmp_stats_output_module __kmp_stats_global_output; +static kmp_stats_output_module* __kmp_stats_global_output = NULL; /* ****************************************************** */ /* ************* statistic member functions ************* */ @@ -164,7 +164,7 @@ return; } -void explicitTimer::stop(timer_e timerEnumValue) { +void explicitTimer::stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr /* = nullptr */) { if (startTime.getValue() == 0) return; @@ -174,8 +174,10 @@ stat->addSample(((finishTime - startTime) - totalPauseTime).ticks()); if(timeStat::logEvent(timerEnumValue)) { - __kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue); - __kmp_stats_thread_ptr->decrementNestValue(); + if(!stats_ptr) + stats_ptr = __kmp_stats_thread_ptr; + stats_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue); + stats_ptr->decrementNestValue(); } /* We accept the risk that we drop a sample because it really did start at t==0. */ @@ -481,18 +483,18 @@ // and say "it's over". // If the timer wasn't running, this won't record anything anyway. kmp_stats_list::iterator it; - for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { + for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { kmp_stats_list* ptr = *it; ptr->getPartitionedTimers()->windup(); for (int timer=0; timergetExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer); + ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer, ptr); } } } void kmp_stats_output_module::printPloticusFile() { int i; - int size = __kmp_stats_list.size(); + int size = __kmp_stats_list->size(); FILE* plotOut = fopen(plotFileName, "w+"); fprintf(plotOut, "#proc page\n" @@ -602,7 +604,7 @@ fprintf(statsOut, "%s\n",heading); // Accumulate across threads. kmp_stats_list::iterator it; - for (it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { + for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { int t = (*it)->getGtid(); // Output per thread stats if requested. if (printPerThreadFlag) { @@ -666,7 +668,7 @@ void __kmp_reset_stats() { kmp_stats_list::iterator it; - for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { + for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { timeStat * timers = (*it)->getTimers(); counter * counters = (*it)->getCounters(); explicitTimer * eTimers = (*it)->getExplicitTimers(); @@ -688,7 +690,7 @@ // This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already. void __kmp_output_stats(const char * heading) { - __kmp_stats_global_output.outputStats(heading); + __kmp_stats_global_output->outputStats(heading); __kmp_reset_stats(); } @@ -703,6 +705,18 @@ void __kmp_stats_init(void) { + __kmp_init_tas_lock( & __kmp_stats_lock ); + __kmp_stats_start_time = tsc_tick_count::now(); + __kmp_stats_global_output = new kmp_stats_output_module(); + __kmp_stats_list = new kmp_stats_list(); +} + +void __kmp_stats_fini(void) +{ + __kmp_accumulate_stats_at_exit(); + __kmp_stats_list->deallocate(); + delete __kmp_stats_global_output; + delete __kmp_stats_list; } } // extern "C" Index: openmp/trunk/runtime/src/z_Linux_util.c =================================================================== --- openmp/trunk/runtime/src/z_Linux_util.c +++ openmp/trunk/runtime/src/z_Linux_util.c @@ -866,14 +866,12 @@ // th->th.th_stats is used to transfer thread specific stats-pointer to __kmp_launch_worker // So when thread is created (goes into __kmp_launch_worker) it will // set it's __thread local pointer to th->th.th_stats - th->th.th_stats = __kmp_stats_list.push_back(gtid); - if(KMP_UBER_GTID(gtid)) { - __kmp_stats_start_time = tsc_tick_count::now(); - __kmp_stats_thread_ptr = th->th.th_stats; - __kmp_stats_init(); - KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life); - KMP_SET_THREAD_STATE(SERIAL_REGION); - KMP_INIT_PARTITIONED_TIMERS(OMP_serial); + if(!KMP_UBER_GTID(gtid)) { + th->th.th_stats = __kmp_stats_list->push_back(gtid); + } else { + // For root threads, the __kmp_stats_thread_ptr is set in __kmp_register_root(), so + // set the th->th.th_stats field to it. + th->th.th_stats = __kmp_stats_thread_ptr; } __kmp_release_tas_lock(&__kmp_stats_lock, gtid); @@ -1541,7 +1539,7 @@ template static inline void __kmp_suspend_template( int th_gtid, C *flag ) { - KMP_TIME_DEVELOPER_BLOCK(USER_suspend); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_suspend); kmp_info_t *th = __kmp_threads[th_gtid]; int status; typename C::flag_t old_spin; @@ -1675,7 +1673,7 @@ template static inline void __kmp_resume_template( int target_gtid, C *flag ) { - KMP_TIME_DEVELOPER_BLOCK(USER_resume); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume); kmp_info_t *th = __kmp_threads[target_gtid]; int status; @@ -1750,7 +1748,7 @@ void __kmp_resume_monitor() { - KMP_TIME_DEVELOPER_BLOCK(USER_resume); + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume); int status; #ifdef KMP_DEBUG int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;