Index: runtime/src/kmp_tasking.cpp =================================================================== --- runtime/src/kmp_tasking.cpp +++ runtime/src/kmp_tasking.cpp @@ -488,13 +488,10 @@ // __ompt_task_finish: // Build and trigger final task-schedule event -static inline void __ompt_task_finish(kmp_task_t *task, - kmp_taskdata_t *resumed_task) { +static inline void +__ompt_task_finish(kmp_task_t *task, kmp_taskdata_t *resumed_task, + ompt_task_status_t status = ompt_task_complete) { kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); - ompt_task_status_t status = ompt_task_complete; - if (taskdata->td_flags.tiedness == TASK_UNTIED && - KMP_TEST_THEN_ADD32(&(taskdata->td_untied_count), 0) > 1) - status = ompt_task_others; if (__kmp_omp_cancellation && taskdata->td_taskgroup && taskdata->td_taskgroup->cancel_request == cancel_taskgroup) { status = ompt_task_cancel; @@ -699,6 +696,7 @@ // gtid: global thread ID for calling thread // task: task to be finished // resumed_task: task to be resumed. (may be NULL if task is serialized) +template static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task) { kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); @@ -744,6 +742,10 @@ return; } } +#if OMPT_SUPPORT + if (ompt) + __ompt_task_finish(task, resumed_task); +#endif KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0); taskdata->td_flags.complete = 1; // mark the task as completed @@ -835,14 +837,13 @@ KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); // this routine will provide task to resume - __kmp_task_finish(gtid, task, NULL); + __kmp_task_finish(gtid, task, NULL); KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n", gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); #if OMPT_SUPPORT if (ompt) { - __ompt_task_finish(task, NULL); omp_frame_t *ompt_frame; __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); ompt_frame->enter_frame = NULL; @@ -884,7 +885,8 @@ KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); - __kmp_task_finish(gtid, task, NULL); // Not sure how to find task to resume + __kmp_task_finish(gtid, task, + NULL); // Not sure how to find task to resume KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); @@ -1188,6 +1190,10 @@ else taskdata->td_last_tied = taskdata; +#if OMPT_SUPPORT + if (UNLIKELY(ompt_enabled.enabled)) + __ompt_task_init(taskdata, gtid); +#endif // Only need to keep track of child task counts if team parallel and tasking not // serialized or if it is a proxy task #if OMP_45_ENABLED @@ -1213,11 +1219,6 @@ gtid, taskdata, taskdata->td_parent)); ANNOTATE_HAPPENS_BEFORE(task); -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) - __ompt_task_init(taskdata, gtid); -#endif - return task; } @@ -1296,17 +1297,9 @@ } #endif -#if OMP_45_ENABLED - // Proxy tasks are not handled by the runtime - if (taskdata->td_flags.proxy != TASK_PROXY) { -#endif - ANNOTATE_HAPPENS_AFTER(task); - __kmp_task_start(gtid, task, current_task); // OMPT only if not discarded -#if OMP_45_ENABLED - } -#endif - #if OMPT_SUPPORT + // For untied tasks, the first task executed only calls __kmpc_omp_task and + // does not execute code. ompt_thread_info_t oldInfo; kmp_info_t *thread; if (UNLIKELY(ompt_enabled.enabled)) { @@ -1321,6 +1314,19 @@ } #endif +#if OMP_45_ENABLED + // Proxy tasks are not handled by the runtime + if (taskdata->td_flags.proxy != TASK_PROXY) { +#endif + ANNOTATE_HAPPENS_AFTER(task); + __kmp_task_start(gtid, task, current_task); // OMPT only if not discarded +#if OMP_45_ENABLED + } +#endif + +#if OMPT_SUPPORT +#endif + #if OMP_40_ENABLED // TODO: cancel tasks if the parallel region has also been cancelled // TODO: check if this sequence can be hoisted above __kmp_task_start @@ -1397,27 +1403,28 @@ } KMP_POP_PARTITIONED_TIMER(); -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) - __ompt_task_finish(task, current_task); -#endif #if OMP_40_ENABLED } #endif // OMP_40_ENABLED -#if OMPT_SUPPORT - if (UNLIKELY(ompt_enabled.enabled)) { - thread->th.ompt_thread_info = oldInfo; - taskdata->ompt_task_info.frame.exit_frame = NULL; - } -#endif #if OMP_45_ENABLED // Proxy tasks are not handled by the runtime if (taskdata->td_flags.proxy != TASK_PROXY) { #endif ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent); - __kmp_task_finish(gtid, task, current_task); // OMPT only if not discarded +#if OMPT_SUPPORT + if (UNLIKELY(ompt_enabled.enabled)) { + thread->th.ompt_thread_info = oldInfo; + if (taskdata->td_flags.tiedness == TASK_TIED) { + taskdata->ompt_task_info.frame.exit_frame = NULL; + } + __kmp_task_finish(gtid, task, + current_task); // OMPT only if not discarded + } else +#endif + __kmp_task_finish(gtid, task, + current_task); // OMPT only if not discarded #if OMP_45_ENABLED } #endif @@ -1554,19 +1561,29 @@ #if OMPT_SUPPORT kmp_taskdata_t *parent = NULL; - if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) { - OMPT_STORE_RETURN_ADDRESS(gtid); - parent = new_taskdata->td_parent; - if (!parent->ompt_task_info.frame.enter_frame) - parent->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1); - if (ompt_enabled.ompt_callback_task_create) { - ompt_data_t task_data = ompt_data_none; - ompt_callbacks.ompt_callback(ompt_callback_task_create)( - parent ? &(parent->ompt_task_info.task_data) : &task_data, - parent ? &(parent->ompt_task_info.frame) : NULL, - &(new_taskdata->ompt_task_info.task_data), - ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, - OMPT_LOAD_RETURN_ADDRESS(gtid)); + if (UNLIKELY(ompt_enabled.enabled)) { + if (!new_taskdata->td_flags.started) { + OMPT_STORE_RETURN_ADDRESS(gtid); + parent = new_taskdata->td_parent; + if (!parent->ompt_task_info.frame.enter_frame) { + parent->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1); + } + if (ompt_enabled.ompt_callback_task_create) { + ompt_data_t task_data = ompt_data_none; + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + parent ? &(parent->ompt_task_info.task_data) : &task_data, + parent ? &(parent->ompt_task_info.frame) : NULL, + &(new_taskdata->ompt_task_info.task_data), + ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, + OMPT_LOAD_RETURN_ADDRESS(gtid)); + } + } else { + // We are scheduling the continuation of an UNTIED task. + // Scheduling back to the parent task. + __ompt_task_finish(new_task, + new_taskdata->ompt_task_info.scheduling_parent, + ompt_task_others); + new_taskdata->ompt_task_info.frame.exit_frame = NULL; } } #endif @@ -3758,7 +3775,7 @@ // free the pattern task and exit __kmp_task_start(gtid, task, current_task); // make internal bookkeeping // do not execute the pattern task, just do internal bookkeeping - __kmp_task_finish(gtid, task, current_task); + __kmp_task_finish(gtid, task, current_task); } // Structure to keep taskloop parameters for auxiliary task @@ -3990,7 +4007,7 @@ // free the pattern task and exit __kmp_task_start(gtid, task, current_task); // do not execute anything for zero-trip loop - __kmp_task_finish(gtid, task, current_task); + __kmp_task_finish(gtid, task, current_task); return; } if (num_tasks_min == 0) Index: runtime/test/ompt/tasks/task_types.c =================================================================== --- runtime/test/ompt/tasks/task_types.c +++ runtime/test/ompt/tasks/task_types.c @@ -43,6 +43,19 @@ // Output of thread_id is needed to know on which thread task is executed printf("%" PRIu64 ": explicit_untied\n", ompt_get_thread_data()->value); print_ids(0); + print_frame(1); + x++; +#pragma omp taskyield + printf("%" PRIu64 ": explicit_untied(2)\n", + ompt_get_thread_data()->value); + print_ids(0); + print_frame(1); + x++; +#pragma omp taskwait + printf("%" PRIu64 ": explicit_untied(3)\n", + ompt_get_thread_data()->value); + print_ids(0); + print_frame(1); x++; } // explicit task with final @@ -146,8 +159,24 @@ // may be multiple of those // CHECK: [[THREAD_ID_3:[0-9]+]]: explicit_untied // CHECK: [[THREAD_ID_3]]: task level 0: parallel_id=[[PARALLEL_ID]] - // CHECK-SAME: task_id=[[EXPLICIT_UNTIED_TASK_ID]], exit_frame={{[^\,]*}} - // CHECK-SAME: reenter_frame=[[NULL]] + // CHECK-SAME: task_id=[[EXPLICIT_UNTIED_TASK_ID]] + // CHECK-SAME: exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_untied=268435460 + // CHECK-SAME: thread_num={{[01]}} + + // after taskyield + // CHECK: [[THREAD_ID_3_2:[0-9]+]]: explicit_untied(2) + // CHECK: [[THREAD_ID_3_2]]: task level 0: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: task_id=[[EXPLICIT_UNTIED_TASK_ID]] + // CHECK-SAME: exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] + // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_untied=268435460 + // CHECK-SAME: thread_num={{[01]}} + + // after taskwait + // CHECK: [[THREAD_ID_3_3:[0-9]+]]: explicit_untied(3) + // CHECK: [[THREAD_ID_3_3]]: task level 0: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: task_id=[[EXPLICIT_UNTIED_TASK_ID]] + // CHECK-SAME: exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // CHECK-SAME: task_type=ompt_task_explicit|ompt_task_untied=268435460 // CHECK-SAME: thread_num={{[01]}}