diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -205,7 +205,7 @@ return new_head; } -static inline void __kmp_track_dependence(kmp_depnode_t *source, +static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source, kmp_depnode_t *sink, kmp_task_t *sink_task) { #ifdef KMP_SUPPORT_GRAPH_OUTPUT @@ -224,11 +224,14 @@ */ if (ompt_enabled.ompt_callback_task_dependence) { kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task); - kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task); + ompt_data_t *sink_data; + if (sink_task) + sink_data = &(KMP_TASK_TO_TASKDATA(sink_task)->ompt_task_info.task_data); + else + sink_data = &__kmp_threads[gtid]->th.ompt_thread_info.task_data; ompt_callbacks.ompt_callback(ompt_callback_task_dependence)( - &(task_source->ompt_task_info.task_data), - &(task_sink->ompt_task_info.task_data)); + &(task_source->ompt_task_info.task_data), sink_data); } #endif /* OMPT_SUPPORT && OMPT_OPTIONAL */ } @@ -246,7 +249,7 @@ if (dep->dn.task) { KMP_ACQUIRE_DEPNODE(gtid, dep); if (dep->dn.task) { - __kmp_track_dependence(dep, node, task); + __kmp_track_dependence(gtid, dep, node, task); dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node); KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to " "%p\n", @@ -272,7 +275,7 @@ // synchronously add source to sink' list of successors KMP_ACQUIRE_DEPNODE(gtid, sink); if (sink->dn.task) { - __kmp_track_dependence(sink, source, task); + __kmp_track_dependence(gtid, sink, source, task); sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source); KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to " "%p\n", @@ -635,6 +638,23 @@ return ret; } +void __ompt_taskwait_dep_finish(kmp_taskdata_t *current_task, + ompt_data_t *taskwait_task_data) { +#if OMPT_SUPPORT + if (ompt_enabled.ompt_callback_task_schedule) { + ompt_data_t task_data = ompt_data_none; + ompt_callbacks.ompt_callback(ompt_callback_task_schedule)( + current_task ? &(current_task->ompt_task_info.task_data) : &task_data, + ompt_task_switch, taskwait_task_data); + ompt_callbacks.ompt_callback(ompt_callback_task_schedule)( + taskwait_task_data, ompt_task_complete, + current_task ? &(current_task->ompt_task_info.task_data) : &task_data); + } + current_task->ompt_task_info.frame.enter_frame.ptr = NULL; + *taskwait_task_data = ompt_data_none; +#endif /* OMPT_SUPPORT */ +} + /*! @ingroup TASKING @param loc_ref location of the original task directive @@ -661,6 +681,74 @@ kmp_info_t *thread = __kmp_threads[gtid]; kmp_taskdata_t *current_task = thread->th.th_current_task; +#if OMPT_SUPPORT + // this function represents a taskwait construct with depend clause + // We signal 4 events: + // - creation of the taskwait task + // - dependences of the taskwait task + // - schedule and finish of the taskwait task + ompt_data_t *taskwait_task_data = &thread->th.ompt_thread_info.task_data; + KMP_ASSERT(taskwait_task_data->ptr == NULL); + if (ompt_enabled.enabled) { + if (!current_task->ompt_task_info.frame.enter_frame.ptr) + current_task->ompt_task_info.frame.enter_frame.ptr = + OMPT_GET_FRAME_ADDRESS(0); + if (ompt_enabled.ompt_callback_task_create) { + ompt_data_t task_data = ompt_data_none; + ompt_callbacks.ompt_callback(ompt_callback_task_create)( + current_task ? &(current_task->ompt_task_info.task_data) : &task_data, + current_task ? &(current_task->ompt_task_info.frame) : NULL, + taskwait_task_data, + ompt_task_explicit | ompt_task_undeferred | ompt_task_mergeable, 1, + OMPT_GET_RETURN_ADDRESS(0)); + } + } + +#if OMPT_OPTIONAL + /* OMPT grab all dependences if requested by the tool */ + if (ndeps + ndeps_noalias > 0 && ompt_enabled.ompt_callback_dependences) { + kmp_int32 i; + + int ompt_ndeps = ndeps + ndeps_noalias; + ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC( + thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t)); + + KMP_ASSERT(ompt_deps != NULL); + + for (i = 0; i < ndeps; i++) { + ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr; + if (dep_list[i].flags.in && dep_list[i].flags.out) + ompt_deps[i].dependence_type = ompt_dependence_type_inout; + else if (dep_list[i].flags.out) + ompt_deps[i].dependence_type = ompt_dependence_type_out; + else if (dep_list[i].flags.in) + ompt_deps[i].dependence_type = ompt_dependence_type_in; + else if (dep_list[i].flags.mtx) + ompt_deps[ndeps + i].dependence_type = + ompt_dependence_type_mutexinoutset; + } + for (i = 0; i < ndeps_noalias; i++) { + ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr; + if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out) + ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout; + else if (noalias_dep_list[i].flags.out) + ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out; + else if (noalias_dep_list[i].flags.in) + ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in; + else if (noalias_dep_list[i].flags.mtx) + ompt_deps[ndeps + i].dependence_type = + ompt_dependence_type_mutexinoutset; + } + ompt_callbacks.ompt_callback(ompt_callback_dependences)( + taskwait_task_data, ompt_deps, ompt_ndeps); + /* We can now free the allocated memory for the dependencies */ + /* For OMPD we might want to delay the free until end of this function */ + KMP_OMPT_DEPS_FREE(thread, ompt_deps); + ompt_deps = NULL; + } +#endif /* OMPT_OPTIONAL */ +#endif /* OMPT_SUPPORT */ + // We can return immediately as: // - dependences are not computed in serial teams (except with proxy tasks) // - if the dephash is not yet created it means we have nothing to wait for @@ -675,6 +763,7 @@ KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking " "dependencies : loc=%p\n", gtid, loc_ref)); + __ompt_taskwait_dep_finish(current_task, taskwait_task_data); return; } @@ -687,6 +776,7 @@ KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking " "dependencies : loc=%p\n", gtid, loc_ref)); + __ompt_taskwait_dep_finish(current_task, taskwait_task_data); return; } @@ -698,6 +788,7 @@ __kmp_task_stealing_constraint); } + __ompt_taskwait_dep_finish(current_task, taskwait_task_data); KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n", gtid, loc_ref)); } diff --git a/openmp/runtime/test/ompt/tasks/task_late_fulfill.c b/openmp/runtime/test/ompt/tasks/task_late_fulfill.c --- a/openmp/runtime/test/ompt/tasks/task_late_fulfill.c +++ b/openmp/runtime/test/ompt/tasks/task_late_fulfill.c @@ -54,6 +54,13 @@ // CHECK-SAME: parallel_id=[[PARALLEL_ID]], // CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] +// The following is to match the taskwait task created in __kmpc_omp_wait_deps +// this should go away, once codegen for "detached if(0)" is fixed + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: +// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]], +// CHECK-SAME: has_dependences=yes + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]], // CHECK-SAME: parent_task_frame.exit=0x{{[0-f]+}}, diff --git a/openmp/runtime/test/ompt/tasks/taskwait-depend.c b/openmp/runtime/test/ompt/tasks/taskwait-depend.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/ompt/tasks/taskwait-depend.c @@ -0,0 +1,80 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt + +// The GOMP wrapper does not handle `task if(0) depend()` and drops the +// dependency. Once this is fixed, reevaluate the GCC status: +// XFAIL: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8, gcc-9, gcc-10 + +#include "callback.h" +#include + +int main() { + int x = 0; +#pragma omp parallel num_threads(2) + { +#pragma omp master + { + print_ids(0); + printf("%" PRIu64 ": address of x: %p\n", ompt_get_thread_data()->value, + &x); +#pragma omp task depend(out : x) + { x++; } + print_fuzzy_address(1); + //#pragma omp taskwait depend(in: x) <-- currently not supported in clang +#pragma omp task if (0) depend(in : x) + {} + print_fuzzy_address(2); + } + } + + return 0; +} + +// Check if libomp supports the callbacks for this test. +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_dependences' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_depende + +// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] + +// make sure initial data pointers are null +// CHECK-NOT: 0: new_task_data initially not null + +// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], +// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] + +// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], +// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT:0x[0-f]+]], +// CHECK-SAME: reenter_frame=[[NULL]] + +// CHECK: {{^}}[[MASTER_ID]]: address of x: [[ADDRX:0x[0-f]+]] + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: +// CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: new_task_id=[[FIRST_TASK:[0-f]+]], +// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, +// CHECK-SAME: task_type=ompt_task_explicit=4, has_dependences=yes + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_dependences: +// CHECK-SAME: task_id=[[FIRST_TASK]], deps=[([[ADDRX]], +// CHECK-SAME: ompt_dependence_type_inout)], ndeps=1 + +// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: +// CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: new_task_id=[[SECOND_TASK:[0-f]+]], +// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, +// CHECK-SAME: task_type=ompt_task_explicit|ompt_task_undeferred| +// CHECK-SAME: ompt_task_mergeable=1207959556, has_dependences=yes + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_dependences: +// CHECK-SAME: task_id=[[SECOND_TASK]], deps=[([[ADDRX]], +// CHECK-SAME: ompt_dependence_type_in)], ndeps=1 + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_end: task_id=[[SECOND_TASK]] + +// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]