Index: openmp/trunk/runtime/src/kmp_stats.h =================================================================== --- openmp/trunk/runtime/src/kmp_stats.h +++ openmp/trunk/runtime/src/kmp_stats.h @@ -47,9 +47,8 @@ public: const static int onlyInMaster = 1<<0; //!< statistic is valid only for master const static int noUnits = 1<<1; //!< statistic doesn't need units printed next to it in output - const static int synthesized = 1<<2; //!< statistic's value is created atexit time in the __kmp_output_stats function - const static int notInMaster = 1<<3; //!< statistic is valid for non-master threads - const static int logEvent = 1<<4; //!< statistic can be logged when KMP_STATS_EVENTS is on (valid only for timers) + const static int notInMaster = 1<<2; //!< statistic is valid for non-master threads + const static int logEvent = 1<<3; //!< statistic can be logged when KMP_STATS_EVENTS is on (valid only for timers) }; /*! @@ -108,34 +107,38 @@ * * @ingroup STATS_GATHERING2 */ -#define KMP_FOREACH_TIMER(macro, arg) \ - macro (OMP_start_end, stats_flags_e::onlyInMaster, arg) \ - macro (OMP_serial, stats_flags_e::onlyInMaster, arg) \ - macro (OMP_work, 0, arg) \ - macro (Total_work, stats_flags_e::synthesized, arg) \ - macro (OMP_barrier, 0, arg) \ - macro (Total_barrier, stats_flags_e::synthesized, arg) \ - macro (FOR_static_iterations, stats_flags_e::noUnits, arg) \ - macro (FOR_static_scheduling, 0, arg) \ - macro (FOR_dynamic_iterations, stats_flags_e::noUnits, arg) \ - macro (FOR_dynamic_scheduling, 0, arg) \ - macro (TASK_execution, 0, arg) \ - macro (OMP_set_numthreads, stats_flags_e::noUnits, arg) \ - macro (OMP_PARALLEL_args, stats_flags_e::noUnits, arg) \ - macro (OMP_single, 0, arg) \ - macro (OMP_master, 0, arg) \ - KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ +#define KMP_FOREACH_TIMER(macro, arg) \ + macro (OMP_start_end, stats_flags_e::onlyInMaster, arg) \ + macro (OMP_serial, stats_flags_e::onlyInMaster, arg) \ + macro (OMP_work, 0, arg) \ + macro (OMP_barrier, 0, arg) \ + macro (FOR_static_scheduling, 0, arg) \ + macro (FOR_dynamic_scheduling, 0, arg) \ + macro (TASK_execution, 0, arg) \ + macro (OMP_single, 0, arg) \ + macro (OMP_master, 0, arg) \ + macro (OMP_set_numthreads, stats_flags_e::noUnits, arg) \ + macro (OMP_PARALLEL_args, stats_flags_e::noUnits, arg) \ + macro (FOR_static_iterations, stats_flags_e::noUnits, arg) \ + macro (FOR_dynamic_iterations, stats_flags_e::noUnits, arg) \ + KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ macro (LAST,0, arg) // OMP_start_end -- time from when OpenMP is initialized until the stats are printed at exit // OMP_serial -- thread zero time executing serial code // OMP_work -- elapsed time in code dispatched by a fork (measured in the thread) -// Total_work -- a synthesized statistic summarizing how much parallel work each thread executed. -// OMP_barrier -- time at "real" barriers -// Total_barrier -- a synthesized statistic summarizing how much time at real barriers in each thread +// OMP_barrier -- time at "real" barriers (includes task time) +// TASK_execution -- time spent executing tasks +// FOR_static_iterations -- Available parallelism in statically scheduled for loops +// FOR_dynamic_iterations -- Available parallelism in dynamically scheduled for loops // FOR_static_scheduling -- time spent doing scheduling for a static "for" // FOR_dynamic_scheduling -- time spent doing scheduling for a dynamic "for" +// OMP_set_numthreads -- Values passed to omp_set_num_threads +// OMP_PARALLEL_args -- Number of arguments passed to a parallel region +// OMP_single + + #if (KMP_DEVELOPER_STATS) // Timers which are of interest tio runtime library developers, not end users. @@ -276,7 +279,6 @@ static bool masterOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::onlyInMaster; } static bool workerOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::notInMaster; } static bool noUnits (timer_e e) { return timerInfo[e].flags & stats_flags_e::noUnits; } - static bool synthesized(timer_e e) { return timerInfo[e].flags & stats_flags_e::synthesized; } static bool logEvent (timer_e e) { return timerInfo[e].flags & stats_flags_e::logEvent; } static void clearEventFlags() { int i; @@ -573,7 +575,8 @@ void init(); static void setupEventColors(); static void printPloticusFile(); - static void printStats(FILE *statsOut, statistic const * theStats, bool areTimers); + static void printTimerStats(FILE *statsOut, statistic const * theStats, statistic const * totalStats); + static void printCounterStats(FILE *statsOut, statistic const * theStats); static void printCounters(FILE * statsOut, counter const * theCounters); static void printEvents(FILE * eventsOut, kmp_stats_event_vector* theEvents, int gtid); static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; } Index: openmp/trunk/runtime/src/kmp_stats.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_stats.cpp +++ openmp/trunk/runtime/src/kmp_stats.cpp @@ -334,43 +334,29 @@ return; } -void kmp_stats_output_module::printStats(FILE *statsOut, statistic const * theStats, bool areTimers) +void kmp_stats_output_module::printTimerStats(FILE *statsOut, statistic const * theStats, statistic const * totalStats) { - if (areTimers) - { - // Check if we have useful timers, since we don't print zero value timers we need to avoid - // printing a header and then no data. - bool haveTimers = false; - for (int s = 0; sformat(tag, true).c_str()); + // Also print the Total_ versions of times. + if (totalStats && tag == 'T' && !timeStat::masterOnly(timer_e(s))) { + fprintf(statsOut, "Total_%-22s, %s\n", timeStat::name(timer_e(s)), totalStats[s].format(tag, true).c_str()); } - if (!haveTimers) - return; } +} - // Print - const char * title = areTimers ? "Timer, SampleCount," : "Counter, ThreadCount,"; - fprintf (statsOut, "%s Min, Mean, Max, Total, SD\n", title); - if (areTimers) { - for (int s = 0; sgetCount() != 0) { - char tag = timeStat::noUnits(timer_e(s)) ? ' ' : 'T'; - fprintf (statsOut, "%-25s, %s\n", timeStat::name(timer_e(s)), stat->format(tag, true).c_str()); - } - } - } else { // Counters - for (int s = 0; sformat(' ', true).c_str()); - } +void kmp_stats_output_module::printCounterStats(FILE *statsOut, statistic const * theStats) +{ + fprintf (statsOut, "Counter, ThreadCount, Min, Mean, Max, Total, SD\n"); + for (int s = 0; sformat(' ', true).c_str()); } -} +} void kmp_stats_output_module::printCounters(FILE * statsOut, counter const * theCounters) { @@ -480,6 +466,7 @@ void kmp_stats_output_module::outputStats(const char* heading) { statistic allStats[TIMER_LAST]; + statistic totalStats[TIMER_LAST]; /* Synthesized, cross threads versions of normal timer stats */ statistic allCounters[COUNTER_LAST]; // stop all the explicit timers for all threads @@ -503,7 +490,7 @@ // Output per thread stats if requested. if (perThreadPrintingEnabled()) { fprintf (statsOut, "Thread %d\n", t); - printStats(statsOut, (*it)->getTimers(), true); + printTimerStats(statsOut, (*it)->getTimers(), 0); printCounters(statsOut, (*it)->getCounters()); fprintf(statsOut,"\n"); } @@ -513,11 +500,11 @@ printEvents(eventsOut, &events, t); } + // Accumulate timers. for (int s = 0; sgetTimer(timer_e(s)); allStats[s] += *threadStat; - } - - // Special handling for synthesized statistics. - // These just have to be coded specially here for now. - // At present we only have a few: - // The total parallel work done in each thread. - // The variance here makes it easy to see load imbalance over the whole program (though, of course, - // it's possible to have a code with awful load balance in every parallel region but perfect load - // balance oever the whole program.) - // The time spent in barriers in each thread. - allStats[TIMER_Total_work].addSample ((*it)->getTimer(TIMER_OMP_work)->getTotal()); - // Time in explicit barriers. - allStats[TIMER_Total_barrier].addSample ((*it)->getTimer(TIMER_OMP_barrier)->getTotal()); + // Add Total stats for all real times (not counts) that are valid in more than one thread + if (!timeStat::noUnits(timer_e(s)) && !timeStat::masterOnly(timer_e(s))) + { + totalStats[s].addSample(threadStat->getTotal()); + } + } + // Accumulate counters. for (int c = 0; c