diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -4237,6 +4237,11 @@ ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); + +KMP_EXPORT kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task); + +KMP_EXPORT kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task); + KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -284,6 +284,16 @@ #endif /* OMPT_SUPPORT && OMPT_OPTIONAL */ } +kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task) { + kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task); + return td->td_depnode ? &(td->td_depnode->dn) : NULL; +} + +kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task) { + kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task); + return td->td_depnode->dn.successors; +} + static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread, kmp_task_t *task, kmp_depnode_t *node, @@ -307,16 +317,18 @@ if (dep->dn.task) { KMP_ACQUIRE_DEPNODE(gtid, dep); if (dep->dn.task) { + if (!dep->dn.successors || dep->dn.successors->node != node) { #if OMPX_TASKGRAPH - if (!(__kmp_tdg_is_recording(tdg_status)) && task) + if (!(__kmp_tdg_is_recording(tdg_status)) && task) #endif - __kmp_track_dependence(gtid, dep, node, task); - dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node); - KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to " - "%p\n", - gtid, KMP_TASK_TO_TASKDATA(dep->dn.task), - KMP_TASK_TO_TASKDATA(task))); - npredecessors++; + __kmp_track_dependence(gtid, dep, node, task); + dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node); + KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to " + "%p\n", + gtid, KMP_TASK_TO_TASKDATA(dep->dn.task), + KMP_TASK_TO_TASKDATA(task))); + npredecessors++; + } } KMP_RELEASE_DEPNODE(gtid, dep); } @@ -324,6 +336,7 @@ return npredecessors; } +// Add the edge 'sink' -> 'source' in the task dependency graph static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread, kmp_task_t *task, @@ -346,29 +359,31 @@ // synchronously add source to sink' list of successors KMP_ACQUIRE_DEPNODE(gtid, sink); if (sink->dn.task) { + if (!sink->dn.successors || sink->dn.successors->node != source) { #if OMPX_TASKGRAPH - if (!(__kmp_tdg_is_recording(tdg_status)) && task) + if (!(__kmp_tdg_is_recording(tdg_status)) && task) #endif - __kmp_track_dependence(gtid, sink, source, task); - sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source); - KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to " + __kmp_track_dependence(gtid, sink, source, task); + sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source); + KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to " "%p\n", gtid, KMP_TASK_TO_TASKDATA(sink->dn.task), KMP_TASK_TO_TASKDATA(task))); #if OMPX_TASKGRAPH - if (__kmp_tdg_is_recording(tdg_status)) { - kmp_taskdata_t *tdd = KMP_TASK_TO_TASKDATA(sink->dn.task); - if (tdd->is_taskgraph) { - if (tdd->td_flags.onced) - // decrement npredecessors if sink->dn.task belongs to a taskgraph - // and - // 1) the task is reset to its initial state (by kmp_free_task) or - // 2) the task is complete but not yet reset - npredecessors--; + if (__kmp_tdg_is_recording(tdg_status)) { + kmp_taskdata_t *tdd = KMP_TASK_TO_TASKDATA(sink->dn.task); + if (tdd->is_taskgraph) { + if (tdd->td_flags.onced) + // decrement npredecessors if sink->dn.task belongs to a taskgraph + // and + // 1) the task is reset to its initial state (by kmp_free_task) or + // 2) the task is complete but not yet reset + npredecessors--; + } } - } #endif npredecessors++; + } } KMP_RELEASE_DEPNODE(gtid, sink); } diff --git a/openmp/runtime/test/tasking/kmp_task_deps.h b/openmp/runtime/test/tasking/kmp_task_deps.h new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/tasking/kmp_task_deps.h @@ -0,0 +1,56 @@ +#ifndef KMP_TASK_DEPS_H +#define KMP_TASK_DEPS_H + +#include /* size_t */ + +// --------------------------------------------------------------------------- +// internal data to emulate compiler codegen +typedef struct DEP { + size_t addr; + size_t len; + unsigned char flags; +} dep; + +typedef struct task { + void **shareds; + void *entry; + int part_id; + void *destr_thunk; + int priority; + long long device_id; + int f_priv; +} kmp_task_t; +typedef int (*entry_t)(int, kmp_task_t *); +typedef struct ID { + int reserved_1; + int flags; + int reserved_2; + int reserved_3; + char *psource; +} id; + +#define TIED 1 + +struct kmp_depnode_list; + +typedef struct kmp_base_depnode { + struct kmp_depnode_list *successors; + /* [...] more stuff down here */ +} kmp_base_depnode_t; + +typedef struct kmp_depnode_list { + struct kmp_base_depnode *node; + struct kmp_depnode_list *next; +} kmp_depnode_list_t; + +static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; +kmp_task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags, size_t sz, + size_t shar, entry_t rtn); +int __kmpc_omp_task_with_deps(id *loc, int gtid, kmp_task_t *task, int nd, + dep *dep_lst, int nd_noalias, + dep *noalias_dep_lst); +kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task); +kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task); +int __kmpc_global_thread_num(id *); + +#endif /* KMP_TASK_DEPS_H */ diff --git a/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c b/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c @@ -0,0 +1,67 @@ +// REQUIRES: linux +// RUN: %libomp-compile && env OMP_NUM_THREADS='2' %libomp-run + +#include +#include + +#include "kmp_task_deps.h" + +// the test +int main(void) { + volatile int done = 0; + +#pragma omp parallel num_threads(2) + { + while (omp_get_thread_num() != 0 && !done) + ; + +#pragma omp single + { + kmp_task_t *A, *B; + kmp_depnode_list_t *A_succ; + kmp_base_depnode_t *B_node; + dep deps[2]; + int gtid; + int x, y; + + gtid = __kmpc_global_thread_num(&loc); + + // A - out(x, y) + A = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + deps[0].addr = (size_t)&x; + deps[0].len = 0; + deps[0].flags = 2; // OUT + + deps[1].addr = (size_t)&y; + deps[1].len = 0; + deps[1].flags = 2; // OUT + + __kmpc_omp_task_with_deps(&loc, gtid, A, 2, deps, 0, 0); + + // B - in(x, y) + B = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + deps[0].addr = (size_t)&x; + deps[0].len = 0; + deps[0].flags = 1; // IN + + deps[1].addr = (size_t)&y; + deps[1].len = 0; + deps[1].flags = 1; // IN + + __kmpc_omp_task_with_deps(&loc, gtid, B, 2, deps, 0, 0); + + // Retrieve TDG nodes + A_succ = __kmpc_task_get_successors(A); + B_node = __kmpc_task_get_depnode(B); + + // 'B' should only be added once to 'A' successors list + assert(A_succ->node == B_node); + assert(A_succ->next == NULL); + +#pragma omp taskwait + + done = 1; + } + } + return 0; +} diff --git a/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges_inoutset.c b/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges_inoutset.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges_inoutset.c @@ -0,0 +1,137 @@ +// REQUIRES: linux +// RUN: %libomp-compile && env OMP_NUM_THREADS='2' %libomp-run + +#include +#include + +#include "kmp_task_deps.h" + +// Expected dependency graph (directed from top to bottom) +// +// A B C // inoutset(x), inoutset(x, y), inoutset(y) +// | \ | / | +// D E F // in(x), in(x, y), in(y) +// \ / +// G // out(y) + +// the test +int main(void) { + volatile int done = 0; + +#pragma omp parallel num_threads(2) + { + while (omp_get_thread_num() != 0 && !done) + ; + +#pragma omp single + { + kmp_task_t *A, *B, *C, *D, *E, *F, *G; + kmp_depnode_list_t *A_succ, *B_succ, *C_succ, *E_succ, *F_succ; + kmp_base_depnode_t *D_node, *E_node, *F_node, *G_node; + dep deps[2]; + int gtid; + int x, y; + + gtid = __kmpc_global_thread_num(&loc); + + deps[0].addr = (size_t)&x; + deps[0].len = 0; + deps[0].flags = 8; // INOUTSET + + deps[1].addr = (size_t)&y; + deps[1].len = 0; + deps[1].flags = 8; // INOUTSET + + // A inoutset(x) + A = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + __kmpc_omp_task_with_deps(&loc, gtid, A, 1, deps + 0, 0, 0); + + // B inoutset(x, y) + B = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + __kmpc_omp_task_with_deps(&loc, gtid, B, 2, deps + 0, 0, 0); + + // C inoutset(y) + C = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + __kmpc_omp_task_with_deps(&loc, gtid, C, 1, deps + 1, 0, 0); + + deps[0].flags = 1; // IN + deps[1].flags = 1; // IN + + // D in(x) + D = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + __kmpc_omp_task_with_deps(&loc, gtid, D, 1, deps + 0, 0, 0); + + // E in(x, y) + E = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + __kmpc_omp_task_with_deps(&loc, gtid, E, 2, deps + 0, 0, 0); + + // F in(y) + F = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + __kmpc_omp_task_with_deps(&loc, gtid, F, 1, deps + 1, 0, 0); + + deps[1].flags = 2; // OUT + + // G out(y) + G = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + __kmpc_omp_task_with_deps(&loc, gtid, G, 1, deps + 1, 0, 0); + + // Retrieve TDG nodes and check edges + A_succ = __kmpc_task_get_successors(A); + B_succ = __kmpc_task_get_successors(B); + C_succ = __kmpc_task_get_successors(C); + E_succ = __kmpc_task_get_successors(E); + F_succ = __kmpc_task_get_successors(F); + + D_node = __kmpc_task_get_depnode(D); + E_node = __kmpc_task_get_depnode(E); + F_node = __kmpc_task_get_depnode(F); + + G_node = __kmpc_task_get_depnode(G); + + // A -> D and A -> E + assert(A_succ && A_succ->next && !A_succ->next->next); + assert((A_succ->node == D_node && A_succ->next->node == E_node) || + (A_succ->node == E_node && A_succ->next->node == D_node)); + + // B -> D and B -> E and B -> F + // valid lists are + // (D, E, F) + // (D, F, E) + // (E, D, F) + // (E, F, D) + // (F, D, E) + // (F, E, D) + assert(B_succ && B_succ->next && B_succ->next->next && + !B_succ->next->next->next); + assert((B_succ->node == D_node && B_succ->next->node == E_node && + B_succ->next->next->node == F_node) || + (B_succ->node == D_node && B_succ->next->node == F_node && + B_succ->next->next->node == E_node) || + (B_succ->node == E_node && B_succ->next->node == D_node && + B_succ->next->next->node == F_node) || + (B_succ->node == E_node && B_succ->next->node == F_node && + B_succ->next->next->node == D_node) || + (B_succ->node == F_node && B_succ->next->node == D_node && + B_succ->next->next->node == E_node) || + (B_succ->node == F_node && B_succ->next->node == E_node && + B_succ->next->next->node == D_node)); + + // C -> E and C -> F + assert(C_succ && C_succ->next && !C_succ->next->next); + assert((C_succ->node == E_node && C_succ->next->node == F_node) || + (C_succ->node == F_node && C_succ->next->node == E_node)); + + // E -> G and F -> G + assert(E_succ && !E_succ->next); + assert(E_succ->node == G_node); + + assert(F_succ && !F_succ->next); + assert(F_succ->node == G_node); + +#pragma omp taskwait + + done = 1; + } + } + return 0; +}