Index: openmp/runtime/src/kmp.h =================================================================== --- openmp/runtime/src/kmp.h +++ openmp/runtime/src/kmp.h @@ -4155,6 +4155,11 @@ ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); + +KMP_EXPORT kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task); + +KMP_EXPORT kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task); + KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, Index: openmp/runtime/src/kmp_taskdeps.cpp =================================================================== --- openmp/runtime/src/kmp_taskdeps.cpp +++ openmp/runtime/src/kmp_taskdeps.cpp @@ -284,6 +284,16 @@ #endif /* OMPT_SUPPORT && OMPT_OPTIONAL */ } +kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task) { + kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task); + return td->td_depnode ? &(td->td_depnode->dn) : NULL; +} + +kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task) { + kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task); + return td->td_depnode->dn.successors; +} + static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread, kmp_task_t *task, kmp_depnode_t *node, @@ -307,6 +317,7 @@ if (dep->dn.task) { KMP_ACQUIRE_DEPNODE(gtid, dep); if (dep->dn.task) { + if (!dep->dn.successors || dep->dn.successors->node != node) { #if OMPX_TASKGRAPH if (!(__kmp_tdg_is_recording(tdg_status)) && task) #endif @@ -317,6 +328,7 @@ gtid, KMP_TASK_TO_TASKDATA(dep->dn.task), KMP_TASK_TO_TASKDATA(task))); npredecessors++; + } } KMP_RELEASE_DEPNODE(gtid, dep); } @@ -324,6 +336,7 @@ return npredecessors; } +// Add the edge 'sink' -> 'source' in the task dependency graph static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread, kmp_task_t *task, @@ -346,6 +359,7 @@ // synchronously add source to sink' list of successors KMP_ACQUIRE_DEPNODE(gtid, sink); if (sink->dn.task) { + if (!sink->dn.successors || sink->dn.successors->node != source) { #if OMPX_TASKGRAPH if (!(__kmp_tdg_is_recording(tdg_status)) && task) #endif @@ -369,6 +383,7 @@ } #endif npredecessors++; + } } KMP_RELEASE_DEPNODE(gtid, sink); } Index: openmp/runtime/test/tasking/kmp_task_deps.h =================================================================== --- /dev/null +++ openmp/runtime/test/tasking/kmp_task_deps.h @@ -0,0 +1,56 @@ +#ifndef KMP_TASK_DEPS_H +#define KMP_TASK_DEPS_H + +#include /* size_t */ + +// --------------------------------------------------------------------------- +// internal data to emulate compiler codegen +typedef struct DEP { + size_t addr; + size_t len; + unsigned char flags; +} dep; + +typedef struct task { + void **shareds; + void *entry; + int part_id; + void *destr_thunk; + int priority; + long long device_id; + int f_priv; +} kmp_task_t; +typedef int (*entry_t)(int, kmp_task_t *); +typedef struct ID { + int reserved_1; + int flags; + int reserved_2; + int reserved_3; + char *psource; +} id; + +#define TIED 1 + +struct kmp_depnode_list; + +typedef struct kmp_base_depnode { + struct kmp_depnode_list *successors; + /* [...] more stuff down here */ +} kmp_base_depnode_t; + +typedef struct kmp_depnode_list { + struct kmp_base_depnode *node; + struct kmp_depnode_list *next; +} kmp_depnode_list_t; + +static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; +kmp_task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags, size_t sz, + size_t shar, entry_t rtn); +int __kmpc_omp_task_with_deps(id *loc, int gtid, kmp_task_t *task, int nd, + dep *dep_lst, int nd_noalias, + dep *noalias_dep_lst); +kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task); +kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task); +int __kmpc_global_thread_num(id *); + +#endif /* KMP_TASK_DEPS_H */ Index: openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c =================================================================== --- /dev/null +++ openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c @@ -0,0 +1,64 @@ +#include +#include + +#include "kmp_task_deps.h" + +// the test +int main(void) { + volatile int done = 0; + +#pragma omp parallel num_threads(2) + { + while (omp_get_thread_num() != 0 && !done) + ; + +#pragma omp single + { + kmp_task_t *A, *B; + kmp_depnode_list_t *A_succ; + kmp_base_depnode_t *B_node; + dep deps[2]; + int gtid; + int x, y; + + gtid = __kmpc_global_thread_num(&loc); + + // A + A = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + deps[0].addr = (size_t)&x; + deps[0].len = 0; + deps[0].flags = 2; // OUT + + deps[1].addr = (size_t)&y; + deps[1].len = 0; + deps[1].flags = 2; // OUT + + __kmpc_omp_task_with_deps(&loc, gtid, A, 2, deps, 0, 0); + + // B + B = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + deps[0].addr = (size_t)&x; + deps[0].len = 0; + deps[0].flags = 1; // IN + + deps[1].addr = (size_t)&y; + deps[1].len = 0; + deps[1].flags = 1; // IN + + __kmpc_omp_task_with_deps(&loc, gtid, B, 2, deps, 0, 0); + + // Retrieve TDG nodes + A_succ = __kmpc_task_get_successors(A); + B_node = __kmpc_task_get_depnode(B); + + // 'B' should only be added once to 'A' successors list + assert(A_succ->node == B_node); + assert(A_succ->next == NULL); + +#pragma omp taskwait + + done = 1; + } + } + return 0; +} Index: openmp/runtime/test/tasking/kmp_task_deps_multiple_edges_inoutset.c =================================================================== --- /dev/null +++ openmp/runtime/test/tasking/kmp_task_deps_multiple_edges_inoutset.c @@ -0,0 +1,109 @@ +#include +#include + +#include "kmp_task_deps.h" + +// the test +int main(void) { + volatile int done = 0; + +#pragma omp parallel num_threads(2) + { + while (omp_get_thread_num() != 0 && !done) + ; + +#pragma omp single + { + kmp_task_t *A, *B, *C, *D, *E, *F; + kmp_depnode_list_t *A_succ, *B_succ, *C_succ; + kmp_base_depnode_t *D_node, *E_node, *F_node; + dep deps[2]; + int gtid; + int x, y; + + gtid = __kmpc_global_thread_num(&loc); + + deps[0].addr = (size_t)&x; + deps[0].len = 0; + deps[0].flags = 8; // INOUTSET + + deps[1].addr = (size_t)&y; + deps[1].len = 0; + deps[1].flags = 8; // INOUTSET + + // A inoutset(x) + A = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + __kmpc_omp_task_with_deps(&loc, gtid, A, 1, deps + 0, 0, 0); + + // B inoutset(x, y) + B = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + __kmpc_omp_task_with_deps(&loc, gtid, B, 2, deps + 0, 0, 0); + + // C inoutset(y) + C = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + __kmpc_omp_task_with_deps(&loc, gtid, C, 1, deps + 1, 0, 0); + + deps[0].flags = 1; // IN + deps[1].flags = 1; // IN + + // D in(x) + D = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + __kmpc_omp_task_with_deps(&loc, gtid, D, 1, deps + 0, 0, 0); + + // E in(x, y) + E = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + __kmpc_omp_task_with_deps(&loc, gtid, E, 2, deps + 0, 0, 0); + + // F in(y) + F = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); + __kmpc_omp_task_with_deps(&loc, gtid, F, 1, deps + 1, 0, 0); + + // Retrieve TDG nodes and check edges + A_succ = __kmpc_task_get_successors(A); + B_succ = __kmpc_task_get_successors(B); + C_succ = __kmpc_task_get_successors(C); + + D_node = __kmpc_task_get_depnode(D); + E_node = __kmpc_task_get_depnode(E); + F_node = __kmpc_task_get_depnode(F); + + // A -> D and A -> E + assert(A_succ && A_succ->next && !A_succ->next->next); + assert((A_succ->node == D_node && A_succ->next->node == E_node) || + (A_succ->node == E_node && A_succ->next->node == D_node)); + + // B -> D and B -> E and B -> F + // valid lists are + // (D, E, F) + // (D, F, E) + // (E, D, F) + // (E, F, D) + // (F, D, E) + // (F, E, D) + assert(B_succ && B_succ->next && B_succ->next->next && + !B_succ->next->next->next); + assert((B_succ->node == D_node && B_succ->next->node == E_node && + B_succ->next->next->node == F_node) || + (B_succ->node == D_node && B_succ->next->node == F_node && + B_succ->next->next->node == E_node) || + (B_succ->node == E_node && B_succ->next->node == D_node && + B_succ->next->next->node == F_node) || + (B_succ->node == E_node && B_succ->next->node == F_node && + B_succ->next->next->node == D_node) || + (B_succ->node == F_node && B_succ->next->node == D_node && + B_succ->next->next->node == E_node) || + (B_succ->node == F_node && B_succ->next->node == E_node && + B_succ->next->next->node == D_node)); + + // C -> E and C -> F + assert(C_succ && C_succ->next && !C_succ->next->next); + assert((C_succ->node == E_node && C_succ->next->node == F_node) || + (C_succ->node == F_node && C_succ->next->node == E_node)); + +#pragma omp taskwait + + done = 1; + } + } + return 0; +}