Index: openmp/runtime/src/kmp_tasking.cpp =================================================================== --- openmp/runtime/src/kmp_tasking.cpp +++ openmp/runtime/src/kmp_tasking.cpp @@ -577,24 +577,20 @@ // __ompt_task_finish: // Build and trigger final task-schedule event -static inline void -__ompt_task_finish(kmp_task_t *task, kmp_taskdata_t *resumed_task, - ompt_task_status_t status = ompt_task_complete) { - kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - if (__kmp_omp_cancellation && taskdata->td_taskgroup && - taskdata->td_taskgroup->cancel_request == cancel_taskgroup) { - status = ompt_task_cancel; - } - - /* let OMPT know that we're returning to the callee task */ +static inline void __ompt_task_finish(kmp_task_t *task, + kmp_taskdata_t *resumed_task, + ompt_task_status_t status) { if (ompt_enabled.ompt_callback_task_schedule) { + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + if (__kmp_omp_cancellation && taskdata->td_taskgroup && + taskdata->td_taskgroup->cancel_request == cancel_taskgroup) { + status = ompt_task_cancel; + } + + /* let OMPT know that we're returning to the callee task */ ompt_callbacks.ompt_callback(ompt_callback_task_schedule)( &(taskdata->ompt_task_info.task_data), status, - &((resumed_task ? resumed_task - : (taskdata->ompt_task_info.scheduling_parent - ? taskdata->ompt_task_info.scheduling_parent - : taskdata->td_parent)) - ->ompt_task_info.task_data)); + (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL)); } } #endif @@ -803,6 +799,10 @@ // gtid: global thread ID for calling thread // task: task to be finished // resumed_task: task to be resumed. (may be NULL if task is serialized) +// +// template: effectively ompt_enabled.enabled!=0 +// the version with ompt=false is inlined, allowing to optimize away all ompt +// code in this case template static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task) { @@ -849,10 +849,6 @@ return; } } -#if OMPT_SUPPORT - if (ompt) - __ompt_task_finish(task, resumed_task); -#endif // Check mutexinoutset dependencies, release locks kmp_depnode_t *node = taskdata->td_depnode; @@ -907,8 +903,18 @@ // task finished execution KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1); taskdata->td_flags.executing = 0; // suspend the finishing task + +#if OMPT_SUPPORT + // For a detached task, which is not completed, we switch back + // the omp_fulfill_event signals completion + // locking is necessary to avoid a race with ompt_task_late_fulfill + if (ompt) + __ompt_task_finish(task, resumed_task, ompt_task_detach); +#endif + // no access to taskdata after this point! // __kmp_fulfill_event might free taskdata at any time from now + taskdata->td_flags.proxy = TASK_PROXY; // proxify! detach = true; } @@ -919,6 +925,12 @@ if (!detach) { taskdata->td_flags.complete = 1; // mark the task as completed +#if OMPT_SUPPORT + // This is not a detached task, we are done here + if (ompt) + __ompt_task_finish(task, resumed_task, ompt_task_complete); +#endif + // Only need to keep track of count if team parallel and tasking not // serialized, or task is detachable and event has already been fulfilled if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) || @@ -3867,12 +3879,26 @@ // point. // We need to take the lock to avoid races __kmp_acquire_tas_lock(&event->lock, gtid); - if (taskdata->td_flags.proxy == TASK_PROXY) + if (taskdata->td_flags.proxy == TASK_PROXY) { detached = true; + } else { +#if OMPT_SUPPORT + // The OMPT event must occur under mutual exclusion, + // otherwise the tool might access ptask after free + if (UNLIKELY(ompt_enabled.enabled)) + __ompt_task_finish(ptask, NULL, ompt_task_early_fulfill); +#endif + } event->type = KMP_EVENT_UNINITIALIZED; __kmp_release_tas_lock(&event->lock, gtid); if (detached) { +#if OMPT_SUPPORT + // We free ptask afterwards and know the task is finished, + // so locking is not necessary + if (UNLIKELY(ompt_enabled.enabled)) + __ompt_task_finish(ptask, NULL, ompt_task_late_fulfill); +#endif // If the task detached complete the proxy task if (gtid >= 0) { kmp_team_t *team = taskdata->td_team; Index: openmp/runtime/test/ompt/callback.h =================================================================== --- openmp/runtime/test/ompt/callback.h +++ openmp/runtime/test/ompt/callback.h @@ -734,9 +734,13 @@ ompt_task_status_t prior_task_status, ompt_data_t *second_task_data) { - printf("%" PRIu64 ": ompt_event_task_schedule: first_task_id=%" PRIu64 ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n", ompt_get_thread_data()->value, first_task_data->value, second_task_data->value, ompt_task_status_t_values[prior_task_status], prior_task_status); - if(prior_task_status == ompt_task_complete) - { + printf("%" PRIu64 ": ompt_event_task_schedule: first_task_id=%" PRIu64 + ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n", + ompt_get_thread_data()->value, first_task_data->value, + (second_task_data ? second_task_data->value : -1), + ompt_task_status_t_values[prior_task_status], prior_task_status); + if (prior_task_status == ompt_task_complete || + prior_task_status == ompt_task_late_fulfill) { printf("%" PRIu64 ": ompt_event_task_end: task_id=%" PRIu64 "\n", ompt_get_thread_data()->value, first_task_data->value); } } Index: openmp/runtime/test/ompt/tasks/task_early_fulfill.c =================================================================== --- /dev/null +++ openmp/runtime/test/ompt/tasks/task_early_fulfill.c @@ -0,0 +1,67 @@ +// RUN: %libomp-compile -fopenmp-version=50 && env OMP_NUM_THREADS='3' %libomp-run | %sort-threads | FileCheck %s + +// Checked gcc 9.2 still does not support detach clause on task construct. +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8, gcc-9 +// clang supports detach clause since version 11. +// UNSUPPORTED: clang-10, clang-9, clang-8, clang-7 +// icc compiler does not support detach clause. +// UNSUPPORTED: icc + +#include "callback.h" +#include + +int main() { +#pragma omp parallel +#pragma omp master + { + omp_event_handle_t event; +#pragma omp task detach(event) if (0) + { omp_fulfill_event(event); } +#pragma omp taskwait + } + return 0; +} + +// Check if libomp supports the callbacks for this test. +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + +// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + +// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: +// CHECK-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], +// CHECK-SAME: parent_task_frame.exit=[[NULL]], +// CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}}, +// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], +// CHECK-SAME: requested_team_size=3, + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], +// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: +// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]], +// CHECK-SAME: parent_task_frame.exit=0x{{[0-f]+}}, +// CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}}, +// CHECK-SAME: new_task_id=[[TASK_ID:[0-9]+]], + +// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule: +// CHECK-SAME: first_task_id=[[IMPLICIT_TASK_ID]], +// CHECK-SAME: second_task_id=[[TASK_ID]], +// CHECK-SAME: prior_task_status=ompt_task_switch=7 + +// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule: +// CHECK-SAME: first_task_id=[[TASK_ID]], +// CHECK-SAME: second_task_id=18446744073709551615, +// CHECK-SAME: prior_task_status=ompt_task_early_fulfill=5 + +// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule: +// CHECK-SAME: first_task_id=[[TASK_ID]], +// CHECK-SAME: second_task_id=[[IMPLICIT_TASK_ID]], +// CHECK-SAME: prior_task_status=ompt_task_complete=1 Index: openmp/runtime/test/ompt/tasks/task_late_fulfill.c =================================================================== --- /dev/null +++ openmp/runtime/test/ompt/tasks/task_late_fulfill.c @@ -0,0 +1,75 @@ +// RUN: %libomp-compile -fopenmp-version=50 && env OMP_NUM_THREADS='3' %libomp-run | %sort-threads | FileCheck %s + +// Checked gcc 9.2 still does not support detach clause on task construct. +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8, gcc-9 +// clang supports detach clause since version 11. +// UNSUPPORTED: clang-10, clang-9, clang-8, clang-7 +// icc compiler does not support detach clause. +// UNSUPPORTED: icc + +#include "callback.h" +#include + +int main() { +#pragma omp parallel +#pragma omp master + { + omp_event_handle_t event; + omp_event_handle_t *f_event; +#pragma omp task detach(event) depend(out : f_event) shared(f_event) if (0) + { + printf("task 1\n"); + f_event = &event; + } +#pragma omp task depend(in : f_event) + { printf("task 2\n"); } + printf("calling omp_fulfill_event\n"); + omp_fulfill_event(*f_event); +#pragma omp taskwait + } + return 0; +} + +// Check if libomp supports the callbacks for this test. +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_schedule' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_begin' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_parallel_end' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_implicit_task' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquire' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_acquired' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_mutex_released' + +// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + +// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: +// CHECK-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], +// CHECK-SAME: parent_task_frame.exit=[[NULL]], +// CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}}, +// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], +// CHECK-SAME: requested_team_size=3, + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID]], +// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: +// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]], +// CHECK-SAME: parent_task_frame.exit=0x{{[0-f]+}}, +// CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}}, +// CHECK-SAME: new_task_id=[[TASK_ID:[0-9]+]], + +// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule: +// CHECK-SAME: first_task_id=[[IMPLICIT_TASK_ID]], +// CHECK-SAME: second_task_id=[[TASK_ID]], +// CHECK-SAME: prior_task_status=ompt_task_switch=7 + +// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule: +// CHECK-SAME: first_task_id=[[TASK_ID]], +// CHECK-SAME: second_task_id=[[IMPLICIT_TASK_ID]], +// CHECK-SAME: prior_task_status=ompt_task_detach=4 + +// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_task_schedule: +// CHECK-SAME: first_task_id=[[TASK_ID]], +// CHECK-SAME: second_task_id=18446744073709551615, +// CHECK-SAME: prior_task_status=ompt_task_late_fulfill=6