diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -2255,22 +2255,26 @@ typedef struct kmp_depnode_list kmp_depnode_list_t; typedef struct kmp_dephash_entry kmp_dephash_entry_t; +// macros for checking dep flag as an integer #define KMP_DEP_IN 0x1 #define KMP_DEP_OUT 0x2 #define KMP_DEP_INOUT 0x3 #define KMP_DEP_MTX 0x4 #define KMP_DEP_SET 0x8 +#define KMP_DEP_ALL 0x80 // Compiler sends us this info: typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; union { - kmp_uint8 flag; - struct { + kmp_uint8 flag; // flag as an unsigned char + struct { // flag as a set of 8 bits unsigned in : 1; unsigned out : 1; unsigned mtx : 1; unsigned set : 1; + unsigned unused : 3; + unsigned all : 1; } flags; }; } kmp_depend_info_t; @@ -2316,6 +2320,7 @@ typedef struct kmp_dephash { kmp_dephash_entry_t **buckets; size_t size; + kmp_depnode_t *last_all; size_t generation; kmp_uint32 nelements; kmp_uint32 nconflicts; diff --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h --- a/openmp/runtime/src/kmp_taskdeps.h +++ b/openmp/runtime/src/kmp_taskdeps.h @@ -73,6 +73,8 @@ h->buckets[i] = 0; } } + __kmp_node_deref(thread, h->last_all); + h->last_all = NULL; } static inline void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) { diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -86,6 +86,7 @@ h->buckets = (kmp_dephash_entry **)(h + 1); h->generation = gen; h->nconflicts = 0; + h->last_all = current_dephash->last_all; // make sure buckets are properly initialized for (size_t i = 0; i < new_size; i++) { @@ -142,6 +143,7 @@ h->nelements = 0; h->nconflicts = 0; h->buckets = (kmp_dephash_entry **)(h + 1); + h->last_all = NULL; for (size_t i = 0; i < h_size; i++) h->buckets[i] = 0; @@ -174,7 +176,10 @@ thread, sizeof(kmp_dephash_entry_t)); #endif entry->addr = addr; - entry->last_out = NULL; + if (!h->last_all) // no predecessor task with omp_all_memory dependence + entry->last_out = NULL; + else // else link the omp_all_memory depnode to the new entry + entry->last_out = __kmp_node_ref(h->last_all); entry->last_set = NULL; entry->prev_set = NULL; entry->last_flag = 0; @@ -290,6 +295,63 @@ return npredecessors; } +static inline kmp_int32 +__kmp_process_dep_all(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *h, + bool dep_barrier, kmp_task_t *task) { + KA_TRACE(30, ("__kmp_process_dep_all: T#%d processing dep_all, " + "dep_barrier = %d\n", + gtid, dep_barrier)); + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_int32 npredecessors = 0; + + // process previous omp_all_memory node if any + npredecessors += + __kmp_depnode_link_successor(gtid, thread, task, node, h->last_all); + __kmp_node_deref(thread, h->last_all); + if (!dep_barrier) { + h->last_all = __kmp_node_ref(node); + } else { + // if this is a sync point in the serial sequence, then the previous + // outputs are guaranteed to be completed after the execution of this + // task so the previous output nodes can be cleared. + h->last_all = NULL; + } + + // process all regular dependences + for (size_t i = 0; i < h->size; i++) { + kmp_dephash_entry_t *info = h->buckets[i]; + if (!info) // skip empty slots in dephash + continue; + for (; info; info = info->next_in_bucket) { + // for each entry the omp_all_memory works as OUT dependence + kmp_depnode_t *last_out = info->last_out; + kmp_depnode_list_t *last_set = info->last_set; + kmp_depnode_list_t *prev_set = info->prev_set; + if (last_set) { + npredecessors += + __kmp_depnode_link_successor(gtid, thread, task, node, last_set); + __kmp_depnode_list_free(thread, last_set); + __kmp_depnode_list_free(thread, prev_set); + info->last_set = NULL; + info->prev_set = NULL; + info->last_flag = 0; // no sets in this dephash entry + } else { + npredecessors += + __kmp_depnode_link_successor(gtid, thread, task, node, last_out); + } + __kmp_node_deref(thread, last_out); + if (!dep_barrier) { + info->last_out = __kmp_node_ref(node); + } else { + info->last_out = NULL; + } + } + } + KA_TRACE(30, ("__kmp_process_dep_all: T#%d found %d predecessors\n", gtid, + npredecessors)); + return npredecessors; +} + template static inline kmp_int32 __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash, @@ -417,7 +479,7 @@ kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) { - int i, n_mtxs = 0; + int i, n_mtxs = 0, dep_all = 0; #if KMP_DEBUG kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); #endif @@ -429,7 +491,7 @@ // Filter deps in dep_list // TODO: Different algorithm for large dep_list ( > 10 ? ) for (i = 0; i < ndeps; i++) { - if (dep_list[i].base_addr != 0) { + if (dep_list[i].base_addr != 0 && dep_list[i].base_addr != KMP_SIZE_T_MAX) { KMP_DEBUG_ASSERT( dep_list[i].flag == KMP_DEP_IN || dep_list[i].flag == KMP_DEP_OUT || dep_list[i].flag == KMP_DEP_INOUT || @@ -451,6 +513,13 @@ dep_list[i].flag = KMP_DEP_OUT; // downgrade mutexinoutset to inout } } + } else if (dep_list[i].flag == KMP_DEP_ALL || + dep_list[i].base_addr == KMP_SIZE_T_MAX) { + // omp_all_memory dependence can be marked by compiler by either + // (addr=0 && flag=0x80) (flag KMP_DEP_ALL), or (addr=-1). + // omp_all_memory overrides all other dependences if any + dep_all = 1; + break; } } @@ -464,10 +533,14 @@ // the end int npredecessors; - npredecessors = __kmp_process_deps(gtid, node, hash, dep_barrier, ndeps, - dep_list, task); - npredecessors += __kmp_process_deps( - gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task); + if (!dep_all) { // regular dependences + npredecessors = __kmp_process_deps(gtid, node, hash, dep_barrier, + ndeps, dep_list, task); + npredecessors += __kmp_process_deps( + gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task); + } else { // omp_all_memory dependence + npredecessors = __kmp_process_dep_all(gtid, node, *hash, dep_barrier, task); + } node->dn.task = task; KMP_MB(); diff --git a/openmp/runtime/test/tasking/kmp_task_depend_all.c b/openmp/runtime/test/tasking/kmp_task_depend_all.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/tasking/kmp_task_depend_all.c @@ -0,0 +1,298 @@ +// RUN: %libomp-compile-and-run +// The runtime currently does not get dependency information from GCC. +// UNSUPPORTED: gcc + +// Tests OMP 5.x task dependence "omp_all_memory", +// emulates compiler codegen versions for new dep kind +// +// Task tree created: +// task0 - task1 (in: i1, i2) +// \ +// task2 (inoutset: i2), (in: i1) +// / +// task3 (omp_all_memory) via flag=0x80 +// / +// task4 - task5 (in: i1, i2) +// / +// task6 (omp_all_memory) via addr=-1 +// / +// task7 (omp_all_memory) via flag=0x80 +// / +// task8 (in: i3) +// +#include +#include + +#ifdef _WIN32 +#include +#define mysleep(n) Sleep(n) +#else +#include +#define mysleep(n) usleep((n)*1000) +#endif + +// to check the # of concurrent tasks (must be 1 for MTX, <3 for other kinds) +static int checker = 0; +static int err = 0; +#ifndef DELAY +#define DELAY 100 +#endif + +// --------------------------------------------------------------------------- +// internal data to emulate compiler codegen +typedef struct DEP { + size_t addr; + size_t len; + unsigned char flags; +} dep; +#define DEP_ALL_MEM 0x80 +typedef struct task { + void** shareds; + void* entry; + int part_id; + void* destr_thunk; + int priority; + long long device_id; + int f_priv; +} task_t; +#define TIED 1 +typedef int(*entry_t)(int, task_t*); +typedef struct ID { + int reserved_1; + int flags; + int reserved_2; + int reserved_3; + char *psource; +} id; +// thunk routine for tasks with ALL dependency +int thunk_m(int gtid, task_t* ptask) { + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; + th = omp_get_thread_num(); + printf("task m_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck); + if (lcheck != 1) { // no more than 1 task at a time + err++; + printf("Error m1, checker %d != 1\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // must still be equal to 1 + if (lcheck != 1) { + err++; + printf("Error m2, checker %d != 1\n", lcheck); + } + #pragma omp atomic + --checker; + return 0; +} +// thunk routine for tasks with inoutset dependency +int thunk_s(int gtid, task_t* ptask) { + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 + th = omp_get_thread_num(); + printf("task 2_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck); + if (lcheck != 1) { // no more than 1 task at a time + err++; + printf("Error s1, checker %d != 1\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // must still be equal to 1 + if (lcheck != 1) { + err++; + printf("Error s2, checker %d != 1\n", lcheck); + } + #pragma omp atomic + --checker; + return 0; +} + +#ifdef __cplusplus +extern "C" { +#endif +int __kmpc_global_thread_num(id*); +task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags, + size_t sz, size_t shar, entry_t rtn); +int __kmpc_omp_task_with_deps(id *loc, int gtid, task_t *task, int ndeps, + dep *dep_lst, int nd_noalias, dep *noalias_lst); +static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; +#ifdef __cplusplus +} // extern "C" +#endif +// End of internal data +// --------------------------------------------------------------------------- + +int main() +{ + int i1,i2,i3; + omp_set_num_threads(8); + omp_set_dynamic(0); + #pragma omp parallel + { + #pragma omp single nowait + { + dep sdep[2]; + task_t *ptr; + int gtid = __kmpc_global_thread_num(&loc); + int t = omp_get_thread_num(); + #pragma omp task depend(in: i1, i2) + { // task 0 + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 or 2 + th = omp_get_thread_num(); + printf("task 0_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck > 2 || lcheck < 1) { + err++; // no more than 2 tasks concurrently + printf("Error1, checker %d, not 1 or 2\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // 1 or 2 + if (lcheck > 2 || lcheck < 1) { + #pragma omp atomic + err++; + printf("Error2, checker %d, not 1 or 2\n", lcheck); + } + #pragma omp atomic + --checker; + } + #pragma omp task depend(in: i1, i2) + { // task 1 + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 or 2 + th = omp_get_thread_num(); + printf("task 1_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck > 2 || lcheck < 1) { + err++; // no more than 2 tasks concurrently + printf("Error3, checker %d, not 1 or 2\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // 1 or 2 + if (lcheck > 2 || lcheck < 1) { + err++; + printf("Error4, checker %d, not 1 or 2\n", lcheck); + } + #pragma omp atomic + --checker; + } +// compiler codegen start + // task2 + ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s); + sdep[0].addr = (size_t)&i1; + sdep[0].len = 0; // not used + sdep[0].flags = 1; // IN + sdep[1].addr = (size_t)&i2; + sdep[1].len = 0; // not used + sdep[1].flags = 8; // INOUTSET + ptr->f_priv = t + 10; // init single first-private variable + __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0); + + // task3 + ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m); + sdep[0].addr = (size_t)&i1; // to be ignored + sdep[0].len = 0; // not used + sdep[0].flags = 1; // IN + sdep[1].addr = 0; + sdep[1].len = 0; // not used + sdep[1].flags = DEP_ALL_MEM; // omp_all_memory + ptr->f_priv = t + 20; // init single first-private variable + __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0); +// compiler codegen end + #pragma omp task depend(in: i1, i2) + { // task 4 + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 or 2 + th = omp_get_thread_num(); + printf("task 4_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck > 2 || lcheck < 1) { + err++; // no more than 2 tasks concurrently + printf("Error5, checker %d, not 1 or 2\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // 1 or 2 + if (lcheck > 2 || lcheck < 1) { + err++; + printf("Error6, checker %d, not 1 or 2\n", lcheck); + } + #pragma omp atomic + --checker; + } + #pragma omp task depend(in: i1, i2) + { // task 5 + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 or 2 + th = omp_get_thread_num(); + printf("task 5_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck > 2 || lcheck < 1) { + err++; // no more than 2 tasks concurrently + printf("Error7, checker %d, not 1 or 2\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // 1 or 2 + if (lcheck > 2 || lcheck < 1) { + err++; + printf("Error8, checker %d, not 1 or 2\n", lcheck); + } + #pragma omp atomic + --checker; + } +// compiler codegen start + // task6 + ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m); + sdep[0].addr = (size_t)(-1); // omp_all_memory + sdep[0].len = 0; // not used + sdep[0].flags = 2; // OUT + ptr->f_priv = t + 30; // init single first-private variable + __kmpc_omp_task_with_deps(&loc, gtid, ptr, 1, sdep, 0, 0); + + // task7 + ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m); + sdep[0].addr = 0; + sdep[0].len = 0; // not used + sdep[0].flags = DEP_ALL_MEM; // omp_all_memory + sdep[1].addr = (size_t)&i3; // to be ignored + sdep[1].len = 0; // not used + sdep[1].flags = 4; // MUTEXINOUTSET + ptr->f_priv = t + 40; // init single first-private variable + __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0); +// compiler codegen end + #pragma omp task depend(in: i3) + { // task 8 + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 + th = omp_get_thread_num(); + printf("task 8_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck != 1) { + err++; + printf("Error9, checker %d, != 1\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; + if (lcheck != 1) { + err++; + printf("Error10, checker %d, != 1\n", lcheck); + } + #pragma omp atomic + --checker; + } + } // single + } // parallel + if (err == 0 && checker == 0) { + printf("passed\n"); + return 0; + } else { + printf("failed, err = %d, checker = %d\n", err, checker); + return 1; + } +} diff --git a/openmp/runtime/test/tasking/kmp_taskwait_depend_all.c b/openmp/runtime/test/tasking/kmp_taskwait_depend_all.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/tasking/kmp_taskwait_depend_all.c @@ -0,0 +1,334 @@ +// RUN: %libomp-compile-and-run +// The runtime currently does not get dependency information from GCC. +// UNSUPPORTED: gcc + +// Tests OMP 5.x task dependence "omp_all_memory", +// emulates compiler codegen versions for new dep kind +// +// Task tree created: +// task0 - task1 (in: i1, i2) +// \ +// task2 (inoutset: i2), (in: i1) +// / +// task3 (omp_all_memory) via flag=0x80 +// / +// task4 - task5 (in: i1, i2) +// / +// task6 (omp_all_memory) via addr=-1 +// / +// task7 (omp_all_memory) via flag=0x80 +// / +// task8 (in: i3) +// / +// task9 - no dependences +// / +// taskwait (omp_all_memory) (should not wait for task9, see prints) +// +#include +#include + +#ifdef _WIN32 +#include +#define mysleep(n) Sleep(n) +#else +#include +#define mysleep(n) usleep((n)*1000) +#endif + +// to check the # of concurrent tasks (must be 1 for MTX, <3 for other kinds) +static int checker = 0; +static int err = 0; +static int taskwait_flag = 0; +#ifndef DELAY +// set delay interval in ms for dependent tasks +#define DELAY 100 +#endif + +// --------------------------------------------------------------------------- +// internal data to emulate compiler codegen +typedef struct DEP { + size_t addr; + size_t len; + unsigned char flags; +} dep; +#define DEP_ALL_MEM 0x80 +typedef struct task { + void** shareds; + void* entry; + int part_id; + void* destr_thunk; + int priority; + long long device_id; + int f_priv; +} task_t; +#define TIED 1 +typedef int(*entry_t)(int, task_t*); +typedef struct ID { + int reserved_1; + int flags; + int reserved_2; + int reserved_3; + char *psource; +} id; +// thunk routine for tasks with ALL dependency +int thunk_m(int gtid, task_t* ptask) { + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; + th = omp_get_thread_num(); + printf("task m_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck); + if (lcheck != 1) { // no more than 1 task at a time + err++; + printf("Error m1, checker %d != 1\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // must still be equal to 1 + if (lcheck != 1) { + err++; + printf("Error m2, checker %d != 1\n", lcheck); + } + #pragma omp atomic + --checker; + return 0; +} +// thunk routine for tasks with inoutset dependency +int thunk_s(int gtid, task_t* ptask) { + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 + th = omp_get_thread_num(); + printf("task 2_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck); + if (lcheck != 1) { // no more than 1 task at a time + err++; + printf("Error s1, checker %d != 1\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // must still be equal to 1 + if (lcheck != 1) { + err++; + printf("Error s2, checker %d != 1\n", lcheck); + } + #pragma omp atomic + --checker; + return 0; +} + +#ifdef __cplusplus +extern "C" { +#endif +int __kmpc_global_thread_num(id*); +task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags, + size_t sz, size_t shar, entry_t rtn); +int __kmpc_omp_task_with_deps(id *loc, int gtid, task_t *task, int ndeps, + dep *dep_lst, int nd_noalias, dep *noalias_lst); +void __kmpc_omp_wait_deps(id *loc, int gtid, int ndeps, dep *dep_lst, + int ndeps_noalias, dep *noalias_dep_lst); +static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; +#ifdef __cplusplus +} // extern "C" +#endif +// End of internal data +// --------------------------------------------------------------------------- + +int main() +{ + int i1,i2,i3; + omp_set_num_threads(8); + omp_set_dynamic(0); + #pragma omp parallel + { + #pragma omp single nowait + { + dep sdep[2]; + task_t *ptr; + int gtid = __kmpc_global_thread_num(&loc); + int t = omp_get_thread_num(); + // Create longest task first to ensure it is stolen. + // The test may hang if the task created last and + // executed by a thread which executes taskwait. + #pragma omp task + { // task 9 - long running task + int flag; + int th = omp_get_thread_num(); + printf("signalled independent task 9_%d, th %d started....\n", t, th); + // Wait for taskwait depend() to finish + // If the taskwait depend() improperly depends on this task + // to finish, then the test will hang and a timeout should trigger + while (1) { + #pragma omp atomic read + flag = taskwait_flag; + if (flag == 1) + break; + } + printf("signalled independent task 9_%d, th %d finished....\n", t, th); + } + #pragma omp task depend(in: i1, i2) + { // task 0 + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 or 2 + th = omp_get_thread_num(); + printf("task 0_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck > 2 || lcheck < 1) { + err++; // no more than 2 tasks concurrently + printf("Error1, checker %d, not 1 or 2\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // 1 or 2 + if (lcheck > 2 || lcheck < 1) { + #pragma omp atomic + err++; + printf("Error2, checker %d, not 1 or 2\n", lcheck); + } + #pragma omp atomic + --checker; + } + #pragma omp task depend(in: i1, i2) + { // task 1 + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 or 2 + th = omp_get_thread_num(); + printf("task 1_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck > 2 || lcheck < 1) { + err++; // no more than 2 tasks concurrently + printf("Error3, checker %d, not 1 or 2\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // 1 or 2 + if (lcheck > 2 || lcheck < 1) { + err++; + printf("Error4, checker %d, not 1 or 2\n", lcheck); + } + #pragma omp atomic + --checker; + } +// compiler codegen start + // task2 + ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s); + sdep[0].addr = (size_t)&i1; + sdep[0].len = 0; // not used + sdep[0].flags = 1; // IN + sdep[1].addr = (size_t)&i2; + sdep[1].len = 0; // not used + sdep[1].flags = 8; // INOUTSET + ptr->f_priv = t + 10; // init single first-private variable + __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0); + + // task3 + ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m); + sdep[0].addr = (size_t)&i1; // to be ignored + sdep[0].len = 0; // not used + sdep[0].flags = 1; // IN + sdep[1].addr = 0; + sdep[1].len = 0; // not used + sdep[1].flags = DEP_ALL_MEM; // omp_all_memory + ptr->f_priv = t + 20; // init single first-private variable + __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0); +// compiler codegen end + #pragma omp task depend(in: i1, i2) + { // task 4 + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 or 2 + th = omp_get_thread_num(); + printf("task 4_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck > 2 || lcheck < 1) { + err++; // no more than 2 tasks concurrently + printf("Error5, checker %d, not 1 or 2\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // 1 or 2 + if (lcheck > 2 || lcheck < 1) { + err++; + printf("Error6, checker %d, not 1 or 2\n", lcheck); + } + #pragma omp atomic + --checker; + } + #pragma omp task depend(in: i1, i2) + { // task 5 + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 or 2 + th = omp_get_thread_num(); + printf("task 5_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck > 2 || lcheck < 1) { + err++; // no more than 2 tasks concurrently + printf("Error7, checker %d, not 1 or 2\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // 1 or 2 + if (lcheck > 2 || lcheck < 1) { + err++; + printf("Error8, checker %d, not 1 or 2\n", lcheck); + } + #pragma omp atomic + --checker; + } +// compiler codegen start + // task6 + ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m); + sdep[0].addr = (size_t)(-1); // omp_all_memory + sdep[0].len = 0; // not used + sdep[0].flags = 2; // OUT + ptr->f_priv = t + 30; // init single first-private variable + __kmpc_omp_task_with_deps(&loc, gtid, ptr, 1, sdep, 0, 0); + + // task7 + ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m); + sdep[0].addr = 0; + sdep[0].len = 0; // not used + sdep[0].flags = DEP_ALL_MEM; // omp_all_memory + sdep[1].addr = (size_t)&i3; // to be ignored + sdep[1].len = 0; // not used + sdep[1].flags = 4; // MUTEXINOUTSET + ptr->f_priv = t + 40; // init single first-private variable + __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0); +// compiler codegen end + #pragma omp task depend(in: i3) + { // task 8 + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 + th = omp_get_thread_num(); + printf("task 8_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck != 1) { + err++; + printf("Error9, checker %d, != 1\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; + if (lcheck != 1) { + err++; + printf("Error10, checker %d, != 1\n", lcheck); + } + #pragma omp atomic + --checker; + } + mysleep(1); // wait a bit to ensure at least first task is stolen +// #pragma omp taskwait depend(omp_all_memory: out) + printf("all 10 tasks generated;\n" + "taskwait depend(omp_all_memory: out) started, th %d\n", t); + __kmpc_omp_wait_deps(&loc, gtid, 1, sdep, 0, 0); + #pragma omp atomic write + taskwait_flag = 1; + printf("taskwait depend(omp_all_memory: out) passed, th %d\n", t); + fflush(0); + } // single + } // parallel + if (err == 0 && checker == 0) { + printf("passed\n"); + return 0; + } else { + printf("failed, err = %d, checker = %d\n", err, checker); + return 1; + } +}