Index: runtime/src/kmp_barrier.cpp =================================================================== --- runtime/src/kmp_barrier.cpp +++ runtime/src/kmp_barrier.cpp @@ -1623,6 +1623,8 @@ ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data, my_task_data, codeptr); } + if (!KMP_MASTER_TID(ds_tid)) + this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr); #endif this_thr->th.ompt_thread_info.state = omp_state_wait_barrier_implicit; } Index: runtime/src/kmp_runtime.cpp =================================================================== --- runtime/src/kmp_runtime.cpp +++ runtime/src/kmp_runtime.cpp @@ -5669,7 +5669,6 @@ __ompt_get_task_info_object(0)->frame.exit_frame = NULL; this_thr->th.ompt_thread_info.state = omp_state_overhead; - this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr); } #endif /* join barrier after parallel region */ Index: runtime/test/ompt/synchronization/barrier/implicit_task_data.c =================================================================== --- /dev/null +++ runtime/test/ompt/synchronization/barrier/implicit_task_data.c @@ -0,0 +1,183 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#define _BSD_SOURCE +#include +#include +#include +#include +#include + +static const char* ompt_thread_type_t_values[] = { + NULL, + "ompt_thread_initial", + "ompt_thread_worker", + "ompt_thread_other" +}; + +static ompt_get_unique_id_t ompt_get_unique_id; +static ompt_get_thread_data_t ompt_get_thread_data; + +int main() +{ + #pragma omp parallel num_threads(4) + { + #pragma omp master + { + sleep(1); + } + } + + + // Check if libomp supports the callbacks for this test. + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region' + // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_sync_region_wait' + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // master thread implicit barrier at parallel end + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id=0, task_id=[[TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_begin: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_wait_barrier_end: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]*}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra={{0x[0-f]*}} + + + // worker thread implicit barrier at parallel end + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_barrier_begin: parallel_id=0, task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_begin: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_wait_barrier_end: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra=[[NULL]] + // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=0, task_id=[[TASK_ID]], codeptr_ra=[[NULL]] + + return 0; +} + +static void +on_ompt_callback_thread_begin( + ompt_thread_type_t thread_type, + ompt_data_t *thread_data) +{ + if(thread_data->ptr) + printf("%s\n", "0: thread_data initially not null"); + thread_data->value = ompt_get_unique_id(); + printf("%" PRIu64 ": ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n", ompt_get_thread_data()->value, ompt_thread_type_t_values[thread_type], thread_type, thread_data->value); +} + +static void +on_ompt_callback_sync_region( + ompt_sync_region_kind_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) +{ + switch(endpoint) + { + case ompt_scope_begin: + task_data->value = ompt_get_unique_id(); + switch(kind) + { + case ompt_sync_region_barrier: + printf("%" PRIu64 ": ompt_event_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskwait: + printf("%" PRIu64 ": ompt_event_taskwait_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskgroup: + printf("%" PRIu64 ": ompt_event_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + } + break; + case ompt_scope_end: + switch(kind) + { + case ompt_sync_region_barrier: + printf("%" PRIu64 ": ompt_event_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskwait: + printf("%" PRIu64 ": ompt_event_taskwait_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskgroup: + printf("%" PRIu64 ": ompt_event_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + } + break; + } +} + +static void +on_ompt_callback_sync_region_wait( + ompt_sync_region_kind_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) +{ + switch(endpoint) + { + case ompt_scope_begin: + switch(kind) + { + case ompt_sync_region_barrier: + printf("%" PRIu64 ": ompt_event_wait_barrier_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskwait: + printf("%" PRIu64 ": ompt_event_wait_taskwait_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskgroup: + printf("%" PRIu64 ": ompt_event_wait_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra); + break; + } + break; + case ompt_scope_end: + switch(kind) + { + case ompt_sync_region_barrier: + printf("%" PRIu64 ": ompt_event_wait_barrier_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskwait: + printf("%" PRIu64 ": ompt_event_wait_taskwait_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + case ompt_sync_region_taskgroup: + printf("%" PRIu64 ": ompt_event_wait_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra); + break; + } + break; + } +} + +#define register_callback_t(name, type) \ +do{ \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == \ + ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ +}while(0) + +#define register_callback(name) register_callback_t(name, name##_t) + +int ompt_initialize( + ompt_function_lookup_t lookup, + ompt_data_t *tool_data) +{ + ompt_set_callback_t ompt_set_callback; + ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback"); + ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id"); + ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data"); + register_callback(ompt_callback_sync_region); + register_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t); + register_callback(ompt_callback_thread_begin); + printf("0: NULL_POINTER=%p\n", (void*)NULL); + return 1; //success +} + +void ompt_finalize(ompt_data_t *tool_data) +{ + printf("0: ompt_event_runtime_shutdown\n"); +} + +ompt_start_tool_result_t* ompt_start_tool( + unsigned int omp_version, + const char *runtime_version) +{ + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0}; + return &ompt_start_tool_result; +}