Skip to content

Commit d2b53ca

Browse files
committedApr 3, 2019
[OpenMP][Stats] Fix stats gathering for distribute and team clause
The distribute clause needs an explicit push of a timer. The teams clause needs a timer added and also, similarly to parallel, exchanged with the serial timer when encountered so that serial regions are counted properly. Differential Revision: https://reviews.llvm.org/D59801 llvm-svn: 357621
1 parent 3d90e7e commit d2b53ca

File tree

6 files changed

+107
-32
lines changed

6 files changed

+107
-32
lines changed
 

‎openmp/runtime/src/kmp_csupport.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,15 @@ void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
382382
va_list ap;
383383
va_start(ap, microtask);
384384

385+
#if KMP_STATS_ENABLED
385386
KMP_COUNT_BLOCK(OMP_TEAMS);
387+
stats_state_e previous_state = KMP_GET_THREAD_STATE();
388+
if (previous_state == stats_state_e::SERIAL_REGION) {
389+
KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
390+
} else {
391+
KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
392+
}
393+
#endif
386394

387395
// remember teams entry point and nesting level
388396
this_thr->th.th_teams_microtask = microtask;
@@ -442,6 +450,13 @@ void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
442450
this_thr->th.th_teams_level = 0;
443451
*(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
444452
va_end(ap);
453+
#if KMP_STATS_ENABLED
454+
if (previous_state == stats_state_e::SERIAL_REGION) {
455+
KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
456+
} else {
457+
KMP_POP_PARTITIONED_TIMER();
458+
}
459+
#endif // KMP_STATS_ENABLED
445460
}
446461
#endif /* OMP_40_ENABLED */
447462

‎openmp/runtime/src/kmp_dispatch.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,12 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
283283
}
284284
}
285285

286+
#if KMP_STATS_ENABLED
287+
if (KMP_MASTER_GTID(gtid)) {
288+
KMP_COUNT_VALUE(OMP_loop_dynamic_total_iterations, tc);
289+
}
290+
#endif
291+
286292
pr->u.p.lb = lb;
287293
pr->u.p.ub = ub;
288294
pr->u.p.st = st;

‎openmp/runtime/src/kmp_runtime.cpp

+38-10
Original file line numberDiff line numberDiff line change
@@ -2285,9 +2285,25 @@ int __kmp_fork_call(ident_t *loc, int gtid,
22852285
team->t.t_id, team->t.t_pkfn));
22862286
} // END of timer KMP_fork_call block
22872287

2288+
#if KMP_STATS_ENABLED && OMP_40_ENABLED
2289+
// If beginning a teams construct, then change thread state
2290+
stats_state_e previous_state = KMP_GET_THREAD_STATE();
2291+
if (!ap) {
2292+
KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2293+
}
2294+
#endif
2295+
22882296
if (!team->t.t_invoke(gtid)) {
22892297
KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
22902298
}
2299+
2300+
#if KMP_STATS_ENABLED && OMP_40_ENABLED
2301+
// If was beginning of a teams construct, then reset thread state
2302+
if (!ap) {
2303+
KMP_SET_THREAD_STATE(previous_state);
2304+
}
2305+
#endif
2306+
22912307
KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
22922308
team->t.t_id, team->t.t_pkfn));
22932309
KMP_MB(); /* Flush all pending memory write invalidates. */
@@ -7106,21 +7122,33 @@ int __kmp_invoke_task_func(int gtid) {
71067122
}
71077123
#endif
71087124

7109-
{
7110-
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
7111-
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
7112-
rc =
7113-
__kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7114-
tid, (int)team->t.t_argc, (void **)team->t.t_argv
7125+
#if KMP_STATS_ENABLED
7126+
stats_state_e previous_state = KMP_GET_THREAD_STATE();
7127+
if (previous_state == stats_state_e::TEAMS_REGION) {
7128+
KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7129+
} else {
7130+
KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7131+
}
7132+
KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7133+
#endif
7134+
7135+
rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7136+
tid, (int)team->t.t_argc, (void **)team->t.t_argv
71157137
#if OMPT_SUPPORT
7116-
,
7117-
exit_runtime_p
7138+
,
7139+
exit_runtime_p
71187140
#endif
7119-
);
7141+
);
71207142
#if OMPT_SUPPORT
7121-
*exit_runtime_p = NULL;
7143+
*exit_runtime_p = NULL;
71227144
#endif
7145+
7146+
#if KMP_STATS_ENABLED
7147+
if (previous_state == stats_state_e::TEAMS_REGION) {
7148+
KMP_SET_THREAD_STATE(previous_state);
71237149
}
7150+
KMP_POP_PARTITIONED_TIMER();
7151+
#endif
71247152

71257153
#if USE_ITT_BUILD
71267154
if (__itt_stack_caller_create_ptr) {

‎openmp/runtime/src/kmp_sched.cpp

+36-20
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,29 @@ char const *traits_t<long>::spec = "ld";
3838
//-------------------------------------------------------------------------
3939
#endif
4040

41+
#if KMP_STATS_ENABLED
42+
#define KMP_STATS_LOOP_END(stat) \
43+
{ \
44+
kmp_int64 t; \
45+
kmp_int64 u = (kmp_int64)(*pupper); \
46+
kmp_int64 l = (kmp_int64)(*plower); \
47+
kmp_int64 i = (kmp_int64)incr; \
48+
if (i == 1) { \
49+
t = u - l + 1; \
50+
} else if (i == -1) { \
51+
t = l - u + 1; \
52+
} else if (i > 0) { \
53+
t = (u - l) / i + 1; \
54+
} else { \
55+
t = (l - u) / (-i) + 1; \
56+
} \
57+
KMP_COUNT_VALUE(stat, t); \
58+
KMP_POP_PARTITIONED_TIMER(); \
59+
}
60+
#else
61+
#define KMP_STATS_LOOP_END(stat) /* Nothing */
62+
#endif
63+
4164
template <typename T>
4265
static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
4366
kmp_int32 schedtype, kmp_int32 *plastiter,
@@ -151,6 +174,7 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
151174
&(task_info->task_data), 0, codeptr);
152175
}
153176
#endif
177+
KMP_STATS_LOOP_END(OMP_loop_static_iterations);
154178
return;
155179
}
156180

@@ -202,6 +226,7 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
202226
&(task_info->task_data), *pstride, codeptr);
203227
}
204228
#endif
229+
KMP_STATS_LOOP_END(OMP_loop_static_iterations);
205230
return;
206231
}
207232
nth = team->t.t_nproc;
@@ -231,6 +256,7 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
231256
&(task_info->task_data), *pstride, codeptr);
232257
}
233258
#endif
259+
KMP_STATS_LOOP_END(OMP_loop_static_iterations);
234260
return;
235261
}
236262

@@ -246,6 +272,12 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
246272
trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
247273
}
248274

275+
#if KMP_STATS_ENABLED
276+
if (KMP_MASTER_GTID(gtid)) {
277+
KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
278+
}
279+
#endif
280+
249281
if (__kmp_env_consistency_check) {
250282
/* tripcount overflow? */
251283
if (trip_count == 0 && *pupper != *plower) {
@@ -388,26 +420,7 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
388420
}
389421
#endif
390422

391-
#if KMP_STATS_ENABLED
392-
{
393-
kmp_int64 t;
394-
kmp_int64 u = (kmp_int64)(*pupper);
395-
kmp_int64 l = (kmp_int64)(*plower);
396-
kmp_int64 i = (kmp_int64)incr;
397-
/* compute trip count */
398-
if (i == 1) {
399-
t = u - l + 1;
400-
} else if (i == -1) {
401-
t = l - u + 1;
402-
} else if (i > 0) {
403-
t = (u - l) / i + 1;
404-
} else {
405-
t = (l - u) / (-i) + 1;
406-
}
407-
KMP_COUNT_VALUE(OMP_loop_static_iterations, t);
408-
KMP_POP_PARTITIONED_TIMER();
409-
}
410-
#endif
423+
KMP_STATS_LOOP_END(OMP_loop_static_iterations);
411424
return;
412425
}
413426

@@ -419,6 +432,8 @@ static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
419432
typename traits_t<T>::signed_t incr,
420433
typename traits_t<T>::signed_t chunk) {
421434
KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
435+
KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
436+
KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
422437
typedef typename traits_t<T>::unsigned_t UT;
423438
typedef typename traits_t<T>::signed_t ST;
424439
kmp_uint32 tid;
@@ -648,6 +663,7 @@ end:;
648663
}
649664
#endif
650665
KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
666+
KMP_STATS_LOOP_END(OMP_distribute_iterations);
651667
return;
652668
}
653669

‎openmp/runtime/src/kmp_stats.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,6 @@ static std::string generateFilename(char const *prototype,
546546
// of __kmp_stats_global_output
547547
void kmp_stats_output_module::init() {
548548

549-
fprintf(stderr, "*** Stats enabled OpenMP* runtime ***\n");
550549
char *statsFileName = getenv("KMP_STATS_FILE");
551550
eventsFileName = getenv("KMP_STATS_EVENTS_FILE");
552551
plotFileName = getenv("KMP_STATS_PLOT_FILE");

‎openmp/runtime/src/kmp_stats.h

+12-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ enum stats_state_e {
6969
TASKYIELD,
7070
TASKGROUP,
7171
IMPLICIT_TASK,
72-
EXPLICIT_TASK
72+
EXPLICIT_TASK,
73+
TEAMS_REGION
7374
};
7475

7576
/*!
@@ -137,10 +138,14 @@ enum stats_state_e {
137138
macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \
138139
macro (OMP_parallel, stats_flags_e::logEvent, arg) \
139140
macro (OMP_parallel_overhead, stats_flags_e::logEvent, arg) \
141+
macro (OMP_teams, stats_flags_e::logEvent, arg) \
142+
macro (OMP_teams_overhead, stats_flags_e::logEvent, arg) \
140143
macro (OMP_loop_static, 0, arg) \
141144
macro (OMP_loop_static_scheduling, 0, arg) \
142145
macro (OMP_loop_dynamic, 0, arg) \
143146
macro (OMP_loop_dynamic_scheduling, 0, arg) \
147+
macro (OMP_distribute, 0, arg) \
148+
macro (OMP_distribute_scheduling, 0, arg) \
144149
macro (OMP_critical, 0, arg) \
145150
macro (OMP_critical_wait, 0, arg) \
146151
macro (OMP_single, 0, arg) \
@@ -163,8 +168,14 @@ enum stats_state_e {
163168
arg) \
164169
macro (OMP_loop_static_iterations, \
165170
stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
171+
macro (OMP_loop_static_total_iterations, \
172+
stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
166173
macro (OMP_loop_dynamic_iterations, \
167174
stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
175+
macro (OMP_loop_dynamic_total_iterations, \
176+
stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
177+
macro (OMP_distribute_iterations, \
178+
stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
168179
KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
169180
// clang-format on
170181

0 commit comments

Comments
 (0)
Please sign in to comment.