Index: runtime/src/include/40/ompt.h.var =================================================================== --- runtime/src/include/40/ompt.h.var +++ runtime/src/include/40/ompt.h.var @@ -171,7 +171,10 @@ macro (ompt_event_destroy_lock, ompt_wait_callback_t, 59) /* lock destruction */ \ macro (ompt_event_destroy_nest_lock, ompt_wait_callback_t, 60) /* nest lock destruction */ \ \ - macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ + macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ \ + \ + macro (ompt_event_task_dependences, ompt_task_dependences_callback_t, 69) /* new task dependences */\ + macro (ompt_event_task_dependence_pair, ompt_task_pair_callback_t, 70) /* new task dependence pair */ @@ -206,6 +209,23 @@ } ompt_frame_t; +/*--------------------- + * dependences types + *---------------------*/ + +typedef enum ompt_task_dependence_flag_e { + // a two bit field for the dependence type + ompt_task_dependence_type_out = 1, + ompt_task_dependence_type_in = 2, + ompt_task_dependence_type_inout = 3, +} ompt_task_dependence_flag_t; + +typedef struct ompt_task_dependence_s { + void *variable_addr; + uint32_t dependence_flags; +} ompt_task_dependence_t; + + /***************************************************************************** * enumerations for thread states and runtime events *****************************************************************************/ @@ -325,6 +345,13 @@ void *task_function /* pointer to outlined function */ ); +/* task dependences */ +typedef void (*ompt_task_dependences_callback_t) ( + ompt_task_id_t task_id, /* ID of task with dependences */ + const ompt_task_dependence_t *deps,/* vector of task dependences */ + int ndeps /* number of dependences */ +); + /* program */ typedef void (*ompt_control_callback_t) ( uint64_t command, /* command of control call */ Index: runtime/src/include/41/ompt.h.var =================================================================== --- runtime/src/include/41/ompt.h.var +++ runtime/src/include/41/ompt.h.var @@ -171,7 +171,10 @@ macro (ompt_event_destroy_lock, ompt_wait_callback_t, 59) /* lock destruction */ \ macro (ompt_event_destroy_nest_lock, ompt_wait_callback_t, 60) /* nest lock destruction */ \ \ - macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ + macro (ompt_event_flush, ompt_callback_t, 61) /* after executing flush */ \ + \ + macro (ompt_event_task_dependences, ompt_task_dependences_callback_t, 69) /* new task dependences */\ + macro (ompt_event_task_dependence_pair, ompt_task_pair_callback_t, 70) /* new task dependence pair */ @@ -206,6 +209,23 @@ } ompt_frame_t; +/*--------------------- + * dependences types + *---------------------*/ + +typedef enum ompt_task_dependence_flag_e { + // a two bit field for the dependence type + ompt_task_dependence_type_out = 1, + ompt_task_dependence_type_in = 2, + ompt_task_dependence_type_inout = 3, +} ompt_task_dependence_flag_t; + +typedef struct ompt_task_dependence_s { + void *variable_addr; + uint32_t dependence_flags; +} ompt_task_dependence_t; + + /***************************************************************************** * enumerations for thread states and runtime events *****************************************************************************/ @@ -325,6 +345,13 @@ void *task_function /* pointer to outlined function */ ); +/* task dependences */ +typedef void (*ompt_task_dependences_callback_t) ( + ompt_task_id_t task_id, /* ID of task with dependences */ + const ompt_task_dependence_t *deps,/* vector of task dependences */ + int ndeps /* number of dependences */ +); + /* program */ typedef void (*ompt_control_callback_t) ( uint64_t command, /* command of control call */ Index: runtime/src/kmp_taskdeps.cpp =================================================================== --- runtime/src/kmp_taskdeps.cpp +++ runtime/src/kmp_taskdeps.cpp @@ -196,7 +196,8 @@ } static inline void -__kmp_track_dependence ( kmp_depnode_t *source, kmp_depnode_t *sink ) +__kmp_track_dependence ( kmp_depnode_t *source, kmp_depnode_t *sink, + kmp_task_t *sink_task ) { #ifdef KMP_SUPPORT_GRAPH_OUTPUT kmp_taskdata_t * task_source = KMP_TASK_TO_TASKDATA(source->dn.task); @@ -204,12 +205,27 @@ __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id, task_source->td_ident->psource, sink->dn.id, task_sink->td_ident->psource); #endif +#if OMPT_SUPPORT && OMPT_TRACE + /* OMPT tracks dependences between task (a=source, b=sink) in which + task a blocks the execution of b through the ompt_new_dependence_callback */ + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)) + { + kmp_taskdata_t * task_source = KMP_TASK_TO_TASKDATA(source->dn.task); + kmp_taskdata_t * task_sink = KMP_TASK_TO_TASKDATA(sink_task); + + ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)( + task_source->ompt_task_info.task_id, + task_sink->ompt_task_info.task_id); + } +#endif /* OMPT_SUPPORT && OMPT_TRACE */ } template< bool filter > static inline kmp_int32 __kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, - bool dep_barrier,kmp_int32 ndeps, kmp_depend_info_t *dep_list) + bool dep_barrier,kmp_int32 ndeps, kmp_depend_info_t *dep_list, + kmp_task_t *task ) { KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d depencies : dep_barrier = %d\n", filter, gtid, ndeps, dep_barrier ) ); @@ -231,7 +247,7 @@ if ( indep->dn.task ) { KMP_ACQUIRE_DEPNODE(gtid,indep); if ( indep->dn.task ) { - __kmp_track_dependence(indep,node); + __kmp_track_dependence(indep,node,task); indep->dn.successors = __kmp_add_node(thread, indep->dn.successors, node); KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p", filter,gtid, KMP_TASK_TO_TASKDATA(indep->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task))); @@ -247,7 +263,7 @@ } else if ( last_out && last_out->dn.task ) { KMP_ACQUIRE_DEPNODE(gtid,last_out); if ( last_out->dn.task ) { - __kmp_track_dependence(last_out,node); + __kmp_track_dependence(last_out,node,task); last_out->dn.successors = __kmp_add_node(thread, last_out->dn.successors, node); KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p", filter,gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task))); @@ -312,8 +328,10 @@ // used to pack all npredecessors additions into a single atomic operation at the end int npredecessors; - npredecessors = __kmp_process_deps(gtid, node, hash, dep_barrier, ndeps, dep_list); - npredecessors += __kmp_process_deps(gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list); + npredecessors = __kmp_process_deps(gtid, node, hash, dep_barrier, + ndeps, dep_list, task); + npredecessors += __kmp_process_deps(gtid, node, hash, dep_barrier, + ndeps_noalias, noalias_dep_list, task); node->dn.task = task; KMP_MB(); @@ -404,6 +422,58 @@ kmp_info_t *thread = __kmp_threads[ gtid ]; kmp_taskdata_t * current_task = thread->th.th_current_task; +#if OMPT_SUPPORT && OMPT_TRACE + /* OMPT grab all dependences if requested by the tool */ + if (ompt_enabled && ndeps+ndeps_noalias > 0 && + ompt_callbacks.ompt_callback(ompt_event_task_dependences)) + { + kmp_int32 i; + + new_taskdata->ompt_task_info.ndeps = ndeps+ndeps_noalias; + +# if USE_FAST_MEMORY + new_taskdata->ompt_task_info.deps = (ompt_task_dependence_t *) + __kmp_fast_allocate(thread, + (ndeps+ndeps_noalias)*sizeof(ompt_task_dependence_t)); +# else + new_taskdata->ompt_task_info.deps = (ompt_task_dependence_t *) + __kmp_thread_malloc(thread, + (ndeps+ndeps_noalias)*sizeof(ompt_task_dependence_t)); +# endif + + KMP_ASSERT(new_taskdata->ompt_task_info.deps != NULL); + + for (i = 0; i < ndeps; i++) + { + new_taskdata->ompt_task_info.deps[i].variable_addr = + (void*) dep_list[i].base_addr; + if (dep_list[i].flags.in && dep_list[i].flags.out) + new_taskdata->ompt_task_info.deps[i].dependence_flags = + ompt_task_dependence_type_inout; + else if (dep_list[i].flags.out) + new_taskdata->ompt_task_info.deps[i].dependence_flags = + ompt_task_dependence_type_out; + else if (dep_list[i].flags.in) + new_taskdata->ompt_task_info.deps[i].dependence_flags = + ompt_task_dependence_type_in; + } + for (i = 0; i < ndeps_noalias; i++) + { + new_taskdata->ompt_task_info.deps[ndeps+i].variable_addr = + (void*) noalias_dep_list[i].base_addr; + if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out) + new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags = + ompt_task_dependence_type_inout; + else if (noalias_dep_list[i].flags.out) + new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags = + ompt_task_dependence_type_out; + else if (noalias_dep_list[i].flags.in) + new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags = + ompt_task_dependence_type_in; + } + } +#endif /* OMPT_SUPPORT && OMPT_TRACE */ + bool serial = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final; #if OMP_41_ENABLED serial = serial && !(new_taskdata->td_flags.proxy == TASK_PROXY); Index: runtime/src/kmp_tasking.c =================================================================== --- runtime/src/kmp_tasking.c +++ runtime/src/kmp_tasking.c @@ -463,6 +463,26 @@ taskdata->ompt_task_info.function); } #endif +#if OMPT_SUPPORT && OMPT_TRACE + /* OMPT emit all dependences if requested by the tool */ + if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 && + ompt_callbacks.ompt_callback(ompt_event_task_dependences)) + { + ompt_callbacks.ompt_callback(ompt_event_task_dependences)( + taskdata->ompt_task_info.task_id, + taskdata->ompt_task_info.deps, + taskdata->ompt_task_info.ndeps + ); + /* We can now free the allocated memory for the dependencies */ +# if USE_FAST_MEMORY + ___kmp_fast_free (thread, taskdata->ompt_task_info.deps); +# else + __kmp_thread_free (thread, taskdata->ompt_task_info.deps); +# endif + taskdata->ompt_task_info.deps = NULL; + taskdata->ompt_task_info.ndeps = 0; + } +#endif /* OMPT_SUPPORT && OMPT_TRACE */ return; } @@ -762,6 +782,8 @@ task->ompt_task_info.function = function; task->ompt_task_info.frame.exit_runtime_frame = NULL; task->ompt_task_info.frame.reenter_runtime_frame = NULL; + task->ompt_task_info.ndeps = 0; + task->ompt_task_info.deps = NULL; } } #endif Index: runtime/src/ompt-event-specific.h =================================================================== --- runtime/src/ompt-event-specific.h +++ runtime/src/ompt-event-specific.h @@ -141,4 +141,7 @@ #define ompt_event_flush_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_task_dependences_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_task_dependence_pair_implemented ompt_event_MAY_ALWAYS_TRACE + #endif Index: runtime/src/ompt-internal.h =================================================================== --- runtime/src/ompt-internal.h +++ runtime/src/ompt-internal.h @@ -26,9 +26,11 @@ typedef struct { - ompt_frame_t frame; - void* function; - ompt_task_id_t task_id; + ompt_frame_t frame; + void* function; + ompt_task_id_t task_id; + int ndeps; + ompt_task_dependence_t *deps; } ompt_task_info_t;