diff --git a/openmp/runtime/src/include/omp-tools.h.var b/openmp/runtime/src/include/omp-tools.h.var --- a/openmp/runtime/src/include/omp-tools.h.var +++ b/openmp/runtime/src/include/omp-tools.h.var @@ -266,7 +266,10 @@ typedef enum ompt_dispatch_t { ompt_dispatch_iteration = 1, - ompt_dispatch_section = 2 + ompt_dispatch_section = 2, + ompt_dispatch_ws_loop_chunk = 3, + ompt_dispatch_taskloop_chunk = 4, + ompt_dispatch_distribute_chunk = 5 } ompt_dispatch_t; typedef enum ompt_sync_region_t { @@ -303,7 +306,11 @@ ompt_work_workshare = 5, ompt_work_distribute = 6, ompt_work_taskloop = 7, - ompt_work_scope = 8 + ompt_work_scope = 8, + ompt_work_loop_static = 10, + ompt_work_loop_dynamic = 11, + ompt_work_loop_guided = 12, + ompt_work_loop_other = 13 } ompt_work_t; typedef enum ompt_mutex_t { @@ -554,6 +561,11 @@ ompt_dependence_type_t dependence_type; } ompt_dependence_t; +typedef struct ompt_dispatch_chunk_t { + uint64_t start; + uint64_t iterations; +} ompt_dispatch_chunk_t; + typedef int (*ompt_enumerate_states_t) ( int current_state, int *next_state, @@ -745,7 +757,7 @@ } ompt_record_parallel_end_t; typedef void (*ompt_callback_work_t) ( - ompt_work_t wstype, + ompt_work_t work_type, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, @@ -754,7 +766,7 @@ ); typedef struct ompt_record_work_t { - ompt_work_t wstype; + ompt_work_t work_type; ompt_scope_endpoint_t endpoint; ompt_id_t parallel_id; ompt_id_t task_id; diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp --- a/openmp/runtime/src/kmp_dispatch.cpp +++ b/openmp/runtime/src/kmp_dispatch.cpp @@ -1964,9 +1964,22 @@ &(task_info->task_data), 0, codeptr); \ } \ } +#define OMPT_LOOP_DISPATCH(lb, ub, st, status) \ + if (ompt_enabled.ompt_callback_dispatch && status) { \ + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \ + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \ + ompt_dispatch_chunk_t chunk; \ + ompt_data_t instance = ompt_data_none; \ + OMPT_GET_DISPATCH_CHUNK(chunk, lb, ub, st); \ + instance.ptr = &chunk; \ + ompt_callbacks.ompt_callback(ompt_callback_dispatch)( \ + &(team_info->parallel_data), &(task_info->task_data), \ + ompt_dispatch_ws_loop_chunk, instance); \ + } // TODO: implement count #else #define OMPT_LOOP_END // no-op +#define OMPT_LOOP_DISPATCH // no-op #endif #if KMP_STATS_ENABLED @@ -2142,6 +2155,7 @@ #if INCLUDE_SSC_MARKS SSC_MARK_DISPATCH_NEXT(); #endif + OMPT_LOOP_DISPATCH(*p_lb, *p_ub, pr->u.p.st, status); OMPT_LOOP_END; KMP_STATS_LOOP_END; return status; @@ -2265,6 +2279,7 @@ #if INCLUDE_SSC_MARKS SSC_MARK_DISPATCH_NEXT(); #endif + OMPT_LOOP_DISPATCH(*p_lb, *p_ub, pr->u.p.st, status); OMPT_LOOP_END; KMP_STATS_LOOP_END; return status; diff --git a/openmp/runtime/src/kmp_sched.cpp b/openmp/runtime/src/kmp_sched.cpp --- a/openmp/runtime/src/kmp_sched.cpp +++ b/openmp/runtime/src/kmp_sched.cpp @@ -101,7 +101,7 @@ static kmp_int8 warn = 0; - if (ompt_enabled.ompt_callback_work) { + if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) { // Only fully initialize variables needed by OMPT if OMPT is enabled. team_info = __ompt_get_teaminfo(0, NULL); task_info = __ompt_get_task_info_object(0); @@ -438,6 +438,24 @@ ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), &(task_info->task_data), trip_count, codeptr); } + if (ompt_enabled.ompt_callback_dispatch) { + ompt_dispatch_t dispatch_type; + ompt_data_t instance = ompt_data_none; + ompt_dispatch_chunk_t dispatch_chunk; + if (ompt_work_type == ompt_work_sections) { + dispatch_type = ompt_dispatch_section; + instance.ptr = codeptr; + } else { + OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr); + dispatch_type = (ompt_work_type == ompt_work_distribute) + ? ompt_dispatch_distribute_chunk + : ompt_dispatch_ws_loop_chunk; + instance.ptr = &dispatch_chunk; + } + ompt_callbacks.ompt_callback(ompt_callback_dispatch)( + &(team_info->parallel_data), &(task_info->task_data), dispatch_type, + instance); + } #endif KMP_STATS_LOOP_END(OMP_loop_static_iterations); @@ -450,7 +468,12 @@ T *plower, T *pupper, T *pupperDist, typename traits_t::signed_t *pstride, typename traits_t::signed_t incr, - typename traits_t::signed_t chunk) { + typename traits_t::signed_t chunk +#if OMPT_SUPPORT && OMPT_OPTIONAL + , + void *codeptr +#endif +) { KMP_COUNT_BLOCK(OMP_DISTRIBUTE); KMP_PUSH_PARTITIONED_TIMER(OMP_distribute); KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling); @@ -682,6 +705,26 @@ } #endif KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) { + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + if (ompt_enabled.ompt_callback_work) { + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), 0, codeptr); + } + if (ompt_enabled.ompt_callback_dispatch) { + ompt_data_t instance = ompt_data_none; + ompt_dispatch_chunk_t dispatch_chunk; + OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr); + instance.ptr = &dispatch_chunk; + ompt_callbacks.ompt_callback(ompt_callback_dispatch)( + &(team_info->parallel_data), &(task_info->task_data), + ompt_dispatch_distribute_chunk, instance); + } + } +#endif // OMPT_SUPPORT && OMPT_OPTIONAL KMP_STATS_LOOP_END(OMP_distribute_iterations); return; } @@ -887,6 +930,12 @@ @} */ +#if OMPT_SUPPORT && OMPT_OPTIONAL +#define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0) +#else +#define OMPT_CODEPTR_ARG +#endif + /*! @ingroup WORK_SHARING @param loc Source code location @@ -915,7 +964,8 @@ kmp_int32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk) { __kmp_dist_for_static_init(loc, gtid, schedule, plastiter, plower, - pupper, pupperD, pstride, incr, chunk); + pupper, pupperD, pstride, incr, + chunk OMPT_CODEPTR_ARG); } /*! @@ -927,7 +977,8 @@ kmp_uint32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk) { __kmp_dist_for_static_init(loc, gtid, schedule, plastiter, plower, - pupper, pupperD, pstride, incr, chunk); + pupper, pupperD, pstride, incr, + chunk OMPT_CODEPTR_ARG); } /*! @@ -939,7 +990,8 @@ kmp_int64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk) { __kmp_dist_for_static_init(loc, gtid, schedule, plastiter, plower, - pupper, pupperD, pstride, incr, chunk); + pupper, pupperD, pstride, incr, + chunk OMPT_CODEPTR_ARG); } /*! @@ -951,7 +1003,8 @@ kmp_uint64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk) { __kmp_dist_for_static_init(loc, gtid, schedule, plastiter, plower, - pupper, pupperD, pstride, incr, chunk); + pupper, pupperD, pstride, incr, + chunk OMPT_CODEPTR_ARG); } /*! @} diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1747,6 +1747,18 @@ if (UNLIKELY(ompt_enabled.enabled)) __ompt_task_start(task, current_task, gtid); #endif +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (UNLIKELY(ompt_enabled.ompt_callback_dispatch && + taskdata->ompt_task_info.dispatch_chunk.iterations > 0)) { + ompt_data_t instance = ompt_data_none; + instance.ptr = &(taskdata->ompt_task_info.dispatch_chunk); + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + ompt_callbacks.ompt_callback(ompt_callback_dispatch)( + &(team_info->parallel_data), &(taskdata->ompt_task_info.task_data), + ompt_dispatch_taskloop_chunk, instance); + taskdata->ompt_task_info.dispatch_chunk = {0, 0}; + } +#endif // OMPT_SUPPORT && OMPT_OPTIONAL #if OMPD_SUPPORT if (ompd_state & OMPD_ENABLE_BP) @@ -4643,6 +4655,12 @@ #if OMPT_SUPPORT __kmp_omp_taskloop_task(NULL, gtid, next_task, codeptr_ra); // schedule new task +#if OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_dispatch) { + OMPT_GET_DISPATCH_CHUNK(next_taskdata->ompt_task_info.dispatch_chunk, + lower, upper, st); + } +#endif // OMPT_OPTIONAL #else __kmp_omp_task(gtid, next_task, true); // schedule new task #endif diff --git a/openmp/runtime/src/ompt-event-specific.h b/openmp/runtime/src/ompt-event-specific.h --- a/openmp/runtime/src/ompt-event-specific.h +++ b/openmp/runtime/src/ompt-event-specific.h @@ -104,7 +104,7 @@ #define ompt_callback_reduction_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_callback_dispatch_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_dispatch_implemented ompt_event_MAY_ALWAYS_OPTIONAL #define ompt_callback_error_implemented ompt_event_MAY_ALWAYS_OPTIONAL diff --git a/openmp/runtime/src/ompt-internal.h b/openmp/runtime/src/ompt-internal.h --- a/openmp/runtime/src/ompt-internal.h +++ b/openmp/runtime/src/ompt-internal.h @@ -57,6 +57,7 @@ ompt_data_t task_data; struct kmp_taskdata *scheduling_parent; int thread_num; + ompt_dispatch_chunk_t dispatch_chunk = {0, 0}; } ompt_task_info_t; typedef struct { diff --git a/openmp/runtime/src/ompt-specific.h b/openmp/runtime/src/ompt-specific.h --- a/openmp/runtime/src/ompt-specific.h +++ b/openmp/runtime/src/ompt-specific.h @@ -89,6 +89,17 @@ ? __ompt_load_return_address(gtid) \ : __builtin_return_address(0)) +#define OMPT_GET_DISPATCH_CHUNK(chunk, lb, ub, incr) \ + do { \ + if (incr > 0) { \ + chunk.start = static_cast(lb); \ + chunk.iterations = static_cast(((ub) - (lb)) / (incr) + 1); \ + } else { \ + chunk.start = static_cast(ub); \ + chunk.iterations = static_cast(((lb) - (ub)) / -(incr) + 1); \ + } \ + } while (0) + //****************************************************************************** // inline functions //****************************************************************************** diff --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h --- a/openmp/runtime/test/ompt/callback.h +++ b/openmp/runtime/test/ompt/callback.h @@ -790,6 +790,12 @@ switch(wstype) { case ompt_work_loop: + case ompt_work_loop_static: + case ompt_work_loop_dynamic: + case ompt_work_loop_guided: + case ompt_work_loop_other: + // TODO: add schedule attribute for the different work_loop types. + // e.g., ", schedule=%s", ..., ompt_schedule_values[wstype] printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_loop_begin: parallel_id=%" PRIu64 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 @@ -854,6 +860,10 @@ switch(wstype) { case ompt_work_loop: + case ompt_work_loop_static: + case ompt_work_loop_dynamic: + case ompt_work_loop_guided: + case ompt_work_loop_other: printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_loop_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", @@ -917,6 +927,43 @@ } } +static void on_ompt_callback_dispatch( + ompt_data_t *parallel_data, + ompt_data_t *task_data, + ompt_dispatch_t kind, + ompt_data_t instance) { + char *event_name = NULL; + void *codeptr_ra = NULL; + ompt_dispatch_chunk_t *dispatch_chunk = NULL; + switch (kind) { + case ompt_dispatch_section: + event_name = "ompt_event_section_begin"; + codeptr_ra = instance.ptr; + break; + case ompt_dispatch_ws_loop_chunk: + event_name = "ompt_event_ws_loop_chunk_begin"; + dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr; + break; + case ompt_dispatch_taskloop_chunk: + event_name = "ompt_event_taskloop_chunk_begin"; + dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr; + break; + case ompt_dispatch_distribute_chunk: + event_name = "ompt_event_distribute_chunk_begin"; + dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr; + break; + default: + event_name = "ompt_ws_loop_iteration_begin"; + } + printf("%" PRIu64 ":" _TOOL_PREFIX + " %s: parallel_id=%" PRIu64 ", task_id=%" PRIu64 + ", codeptr_ra=%p, chunk_start=%" PRIu64 ", chunk_iterations=%" PRIu64 + "\n", ompt_get_thread_data()->value, event_name, parallel_data->value, + task_data->value, codeptr_ra, + dispatch_chunk ? dispatch_chunk->start : 0, + dispatch_chunk ? dispatch_chunk->iterations : 0); +} + static void on_ompt_callback_masked(ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, @@ -1178,6 +1225,7 @@ register_ompt_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t); register_ompt_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t); register_ompt_callback(ompt_callback_work); + register_ompt_callback(ompt_callback_dispatch); register_ompt_callback(ompt_callback_masked); register_ompt_callback(ompt_callback_parallel_begin); register_ompt_callback(ompt_callback_parallel_end); diff --git a/openmp/runtime/test/ompt/tasks/taskloop_dispatch.c b/openmp/runtime/test/ompt/tasks/taskloop_dispatch.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/ompt/tasks/taskloop_dispatch.c @@ -0,0 +1,53 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gnu, intel-16.0 + +#include "callback.h" +#include + +int main() { + unsigned int i, x; + +#pragma omp parallel num_threads(2) + { +#pragma omp barrier + +#pragma omp master +#pragma omp taskloop grainsize(4) + for (i = 0; i < 16; i++) { + // Make every iteration takes at least 1ms + delay(1000); + } + } + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: + // CHECK-SAME: parent_task_id={{[0-9]+}} + // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]] + // CHECK-SAME: requested_team_size=2 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID1:[0-9]+]] + // CHECK-SAME: team_size=2, thread_num=0 + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_taskloop_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID1]] + // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]], count=16 + + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: + // CHECK-SAME: new_task_id=[[TASK_ID0:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: + // CHECK-SAME: new_task_id=[[TASK_ID1:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: + // CHECK-SAME: new_task_id=[[TASK_ID2:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: + // CHECK-SAME: new_task_id=[[TASK_ID3:[0-9]+]] + + // CHECK-DAG: {{.*}}: ompt_event_taskloop_chunk_begin:{{.*}}task_id=[[TASK_ID0]]{{.*}}chunk_iterations=4 + // CHECK-DAG: {{.*}}: ompt_event_taskloop_chunk_begin:{{.*}}task_id=[[TASK_ID1]]{{.*}}chunk_iterations=4 + // CHECK-DAG: {{.*}}: ompt_event_taskloop_chunk_begin:{{.*}}task_id=[[TASK_ID2]]{{.*}}chunk_iterations=4 + // CHECK-DAG: {{.*}}: ompt_event_taskloop_chunk_begin:{{.*}}task_id=[[TASK_ID3]]{{.*}}chunk_iterations=4 + + return 0; +} diff --git a/openmp/runtime/test/ompt/teams/distribute_dispatch.c b/openmp/runtime/test/ompt/teams/distribute_dispatch.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/ompt/teams/distribute_dispatch.c @@ -0,0 +1,47 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include "callback.h" + +#define WORK_SIZE 64 + +int main() { + int i; +#pragma omp teams num_teams(4) thread_limit(1) +#pragma omp distribute dist_schedule(static, WORK_SIZE / 4) + for (i = 0; i < WORK_SIZE; i++) {} + + return 0; +} + +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_dispatch' + +// CHECK: 0: NULL_POINTER=[[NULL:.*$]] + +// CHECK: {{^}}[[THREAD_ID0:[0-9]+]]: ompt_event_distribute_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID0:[0-9]+]] +// CHECK-SAME: parent_task_id=[[TASK_ID0:[0-9]+]] +// CHECK: {{^}}[[THREAD_ID0]]: ompt_event_distribute_chunk_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID0]], task_id=[[TASK_ID0]] +// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=16 + +// CHECK: {{^}}[[THREAD_ID1:[0-9]+]]: ompt_event_distribute_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID1:[0-9]+]] +// CHECK-SAME: parent_task_id=[[TASK_ID1:[0-9]+]] +// CHECK: {{^}}[[THREAD_ID1]]: ompt_event_distribute_chunk_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID1]], task_id=[[TASK_ID1]] +// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=16 + +// CHECK: {{^}}[[THREAD_ID2:[0-9]+]]: ompt_event_distribute_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID2:[0-9]+]] +// CHECK-SAME: parent_task_id=[[TASK_ID2:[0-9]+]] +// CHECK: {{^}}[[THREAD_ID2]]: ompt_event_distribute_chunk_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID2]], task_id=[[TASK_ID2]] +// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=16 + +// CHECK: {{^}}[[THREAD_ID3:[0-9]+]]: ompt_event_distribute_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID3:[0-9]+]] +// CHECK-SAME: parent_task_id=[[TASK_ID3:[0-9]+]] +// CHECK: {{^}}[[THREAD_ID3]]: ompt_event_distribute_chunk_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID3]], task_id=[[TASK_ID3]] +// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=16 diff --git a/openmp/runtime/test/ompt/worksharing/for/loop_dispatch.c b/openmp/runtime/test/ompt/worksharing/for/loop_dispatch.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/ompt/worksharing/for/loop_dispatch.c @@ -0,0 +1,116 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// XFAIL: gcc +// GCC doesn't call runtime for static schedule + +#include "callback.h" + +#define WORK_SIZE 64 + +int main() { + int i; + +#pragma omp parallel num_threads(4) + { +#pragma omp for schedule(static, WORK_SIZE / 4) + for (i = 0; i < WORK_SIZE; i++) {} + +#pragma omp for schedule(dynamic) + for (i = 0; i < WORK_SIZE; i++) { + // Make every iteration takes at least 1ms. + delay(1000); + } + +#pragma omp for schedule(guided) + for (i = 0; i < WORK_SIZE; i++) { + // Make every iteration takes at least 1ms. + delay(1000); + } + } + + return 0; +} + +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_dispatch' + +// CHECK: 0: NULL_POINTER=[[NULL:.*$]] +// CHECK: {{^}}[[THREAD_ID0:[0-9]+]]: ompt_event_parallel_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]] + +// Each thread should have at least one ws-loop-chunk-begin event for each +// for loop. + +// CHECK: {{^}}[[THREAD_ID0]]: ompt_event_implicit_task_begin: +// CHECK-SAME: task_id=[[TASK_ID0:[0-9]+]] +// CHECK: {{^}}[[THREAD_ID0]]: ompt_event_loop_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID0]] +// CHECK: {{^}}[[THREAD_ID0]]: ompt_event_ws_loop_chunk_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID0]] +// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=16 +// CHECK: {{^}}[[THREAD_ID0]]: ompt_event_loop_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID0]] +// CHECK: {{^}}[[THREAD_ID0]]: ompt_event_ws_loop_chunk_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID0]] +// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=1 +// CHECK: {{^}}[[THREAD_ID0]]: ompt_event_loop_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID0]] +// CHECK: {{^}}[[THREAD_ID0]]: ompt_event_ws_loop_chunk_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID0]] +// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations={{[1-9][0-9]*}} + +// CHECK: {{^}}[[THREAD_ID1:[0-9]+]]: ompt_event_implicit_task_begin: +// CHECK-SAME: task_id=[[TASK_ID1:[0-9]+]] +// CHECK: {{^}}[[THREAD_ID1]]: ompt_event_loop_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID1]] +// CHECK: {{^}}[[THREAD_ID1]]: ompt_event_ws_loop_chunk_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID1]] +// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=16 +// CHECK: {{^}}[[THREAD_ID1]]: ompt_event_loop_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID1]] +// CHECK: {{^}}[[THREAD_ID1]]: ompt_event_ws_loop_chunk_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID1]] +// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=1 +// CHECK: {{^}}[[THREAD_ID1]]: ompt_event_loop_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID1]] +// CHECK: {{^}}[[THREAD_ID1]]: ompt_event_ws_loop_chunk_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID1]] +// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations={{[1-9][0-9]*}} + +// CHECK: {{^}}[[THREAD_ID2:[0-9]+]]: ompt_event_implicit_task_begin: +// CHECK-SAME: task_id=[[TASK_ID2:[0-9]+]] +// CHECK: {{^}}[[THREAD_ID2]]: ompt_event_loop_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID2]] +// CHECK: {{^}}[[THREAD_ID2]]: ompt_event_ws_loop_chunk_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID2]] +// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=16 +// CHECK: {{^}}[[THREAD_ID2]]: ompt_event_loop_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID2]] +// CHECK: {{^}}[[THREAD_ID2]]: ompt_event_ws_loop_chunk_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID2]] +// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=1 +// CHECK: {{^}}[[THREAD_ID2]]: ompt_event_loop_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID2]] +// CHECK: {{^}}[[THREAD_ID2]]: ompt_event_ws_loop_chunk_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID2]] +// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations={{[1-9][0-9]*}} + +// CHECK: {{^}}[[THREAD_ID3:[0-9]+]]: ompt_event_implicit_task_begin: +// CHECK-SAME: task_id=[[TASK_ID3:[0-9]+]] +// CHECK: {{^}}[[THREAD_ID3]]: ompt_event_loop_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID3]] +// CHECK: {{^}}[[THREAD_ID3]]: ompt_event_ws_loop_chunk_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID3]] +// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=16 +// CHECK: {{^}}[[THREAD_ID3]]: ompt_event_loop_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID3]] +// CHECK: {{^}}[[THREAD_ID3]]: ompt_event_ws_loop_chunk_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID3]] +// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations=1 +// CHECK: {{^}}[[THREAD_ID3]]: ompt_event_loop_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID3]] +// CHECK: {{^}}[[THREAD_ID3]]: ompt_event_ws_loop_chunk_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID3]] +// CHECK-SAME: chunk_start={{[0-9]+}}, chunk_iterations={{[1-9][0-9]*}} diff --git a/openmp/runtime/test/ompt/worksharing/sections_dispatch.c b/openmp/runtime/test/ompt/worksharing/sections_dispatch.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/ompt/worksharing/sections_dispatch.c @@ -0,0 +1,51 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: intel-15.0, intel-16.0, intel-17.0, intel-18.0 +// GCC generates code that does not distinguish between sections and loops + +#include "callback.h" +#include + +int main() +{ + #pragma omp parallel sections num_threads(2) + { + #pragma omp section + { + printf("%lu: section 1\n", ompt_get_thread_data()->value); + } + #pragma omp section + { + printf("%lu: section 2\n", ompt_get_thread_data()->value); + } + } + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_work' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_dispatch' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_sections_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], + // CHECK-SAME: parent_task_id=[[TASK_ID:[0-9]+]], + // CHECK-SAME: codeptr_ra=[[SECT_BEGIN:0x[0-f]+]], count=2 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_section_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]] + // CHECK-SAME: codeptr_ra=[[SECT_BEGIN]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_sections_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, + // CHECK-SAME: codeptr_ra=[[SECT_END:0x[0-f]+]] + + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_sections_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], parent_task_id=[[TASK_ID:[0-9]+]], + // CHECK-SAME: codeptr_ra=[[SECT_BEGIN]], count=2 + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_section_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]] + // CHECK-SAME: codeptr_ra=[[SECT_BEGIN]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_sections_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id={{[0-9]+}}, + // CHECK-SAME: codeptr_ra=[[SECT_END]] + + return 0; +}