Index: openmp/tools/archer/README.md =================================================================== --- openmp/tools/archer/README.md +++ openmp/tools/archer/README.md @@ -133,6 +133,18 @@ +all_memory +0 +Turn on tracking and analysis of omp_all_memory +dependencies. Archer will activate the support automatically when +such dependency is seen during execution. At this time the analysis +already missed synchronization semantics, which will lead to false +reports in most cases. + + + + + report_data_leak 0 Report leaking OMPT data for execution under Index: openmp/tools/archer/ompt-tsan.cpp =================================================================== --- openmp/tools/archer/ompt-tsan.cpp +++ openmp/tools/archer/ompt-tsan.cpp @@ -63,6 +63,7 @@ int enabled{1}; int report_data_leak{0}; int ignore_serial{0}; + std::atomic all_memory{0}; ArcherFlags(const char *env) { if (env) { @@ -70,6 +71,7 @@ std::string token; std::string str(env); std::istringstream iss(str); + int tmp_int; while (std::getline(iss, token, ' ')) tokens.push_back(token); @@ -89,6 +91,10 @@ continue; if (sscanf(it->c_str(), "ignore_serial=%d", &ignore_serial)) continue; + if (sscanf(it->c_str(), "all_memory=%d", &tmp_int)) { + all_memory = tmp_int; + continue; + } std::cerr << "Illegal values for ARCHER_OPTIONS variable: " << token << std::endl; } @@ -480,6 +486,9 @@ /// this task. ompt_tsan_clockid Taskwait{0}; + /// Child tasks use its address to model omp_all_memory dependencies + ompt_tsan_clockid AllMemory[2]{0}; + /// Whether this task is currently executing a barrier. bool InBarrier{false}; @@ -538,10 +547,16 @@ char getBarrierIndex() { return BarrierIndex; } void toggleBarrierIndex() { BarrierIndex ^= 1; } + void setAllMemoryDep() { AllMemory[0] = 1; } + bool hasAllMemoryDep() { return AllMemory[0]; } + void *GetTaskPtr() { return &Task; } void *GetTaskwaitPtr() { return &Taskwait; } + void *GetLastAllMemoryPtr() { return AllMemory; } + void *GetNextAllMemoryPtr() { return AllMemory + 1; } + TaskData *Init(TaskData *parent, int taskType) { TaskType = taskType; Parent = parent; @@ -886,13 +901,30 @@ } } +// LastAllMemoryPtr marks the beginning of an all_memory epoch +// NextAllMemoryPtr marks the end of an all_memory epoch +// All tasks with depend begin execution after LastAllMemoryPtr +// and end before NextAllMemoryPtr static void releaseDependencies(TaskData *task) { + if (archer_flags->all_memory) { + if (task->hasAllMemoryDep()) { + TsanHappensBefore(task->Parent->GetLastAllMemoryPtr()); + TsanHappensBefore(task->Parent->GetNextAllMemoryPtr()); + } else if (task->DependencyCount) + TsanHappensBefore(task->Parent->GetNextAllMemoryPtr()); + } for (unsigned i = 0; i < task->DependencyCount; i++) { task->Dependencies[i].AnnotateEnd(); } } static void acquireDependencies(TaskData *task) { + if (archer_flags->all_memory) { + if (task->hasAllMemoryDep()) + TsanHappensAfter(task->Parent->GetNextAllMemoryPtr()); + else if (task->DependencyCount) + TsanHappensAfter(task->Parent->GetLastAllMemoryPtr()); + } for (unsigned i = 0; i < task->DependencyCount; i++) { task->Dependencies[i].AnnotateBegin(); } @@ -1010,13 +1042,28 @@ Data->Dependencies = (TaskDependency *)malloc(sizeof(TaskDependency) * ndeps); Data->DependencyCount = ndeps; - for (int i = 0; i < ndeps; i++) { + for (int i = 0, d = 0; i < ndeps; i++, d++) { + if (deps[i].dependence_type == ompt_dependence_type_out_all_memory || + deps[i].dependence_type == ompt_dependence_type_inout_all_memory) { + Data->setAllMemoryDep(); + Data->DependencyCount--; + if (!archer_flags->all_memory) { + printf("The application uses omp_all_memory, but Archer was\n" + "started to not consider omp_all_memory. This can lead\n" + "to false data race alerts.\n" + "Include all_memory=1 in ARCHER_OPTIONS to consider\n" + "omp_all_memory from the beginning.\n"); + archer_flags->all_memory = 1; + } + d--; + continue; + } auto ret = Data->Parent->DependencyMap->insert( std::make_pair(deps[i].variable.ptr, nullptr)); if (ret.second) { ret.first->second = DependencyData::New(); } - new ((void *)(Data->Dependencies + i)) + new ((void *)(Data->Dependencies + d)) TaskDependency(ret.first->second, deps[i].dependence_type); } Index: openmp/tools/archer/tests/task/omp_task_depend_all.c =================================================================== --- /dev/null +++ openmp/tools/archer/tests/task/omp_task_depend_all.c @@ -0,0 +1,352 @@ +// RUN--: %libarcher-compile-and-run | FileCheck %s --check-prefix=NOENV +// RUN: %libarcher-compile && env ARCHER_OPTIONS="all_memory=1" \ +// RUN: %libarcher-run | FileCheck %s --check-prefix=ENV +// REQUIRES: tsan + +// The runtime currently does not get dependency information from GCC. +// UNSUPPORTED: gcc + +// Tests OMP 5.x task dependence "omp_all_memory", +// emulates compiler codegen versions for new dep kind +// +// Task tree created: +// task0 - task1 (in: i1, i2) +// \ +// task2 (inoutset: i2), (in: i1) +// / +// task3 (omp_all_memory) via flag=0x80 +// / +// task4 - task5 (in: i1, i2) +// / +// task6 (omp_all_memory) via addr=-1 +// / +// task7 (omp_all_memory) via flag=0x80 +// / +// task8 (in: i3) +// + +#include +#include + +#ifdef _WIN32 +#include +#define mysleep(n) Sleep(n) +#else +#include +#define mysleep(n) usleep((n)*1000) +#endif + +// to check the # of concurrent tasks (must be 1 for MTX, <3 for other kinds) +static int checker = 0; +static int err = 0; +#ifndef DELAY +#define DELAY 100 +#endif + +// --------------------------------------------------------------------------- +// internal data to emulate compiler codegen +typedef struct DEP { + size_t addr; + size_t len; + unsigned char flags; +} dep; +#define DEP_ALL_MEM 0x80 +typedef struct task { + void** shareds; + void* entry; + int part_id; + void* destr_thunk; + int priority; + long long device_id; + int f_priv; +} task_t; +#define TIED 1 +typedef int(*entry_t)(int, task_t*); +typedef struct ID { + int reserved_1; + int flags; + int reserved_2; + int reserved_3; + char *psource; +} id; +// thunk routine for tasks with ALL dependency +int thunk_m(int gtid, task_t* ptask) { + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; + th = omp_get_thread_num(); + printf("task m_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck); + if (lcheck != 1) { // no more than 1 task at a time + err++; + printf("Error m1, checker %d != 1\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // must still be equal to 1 + if (lcheck != 1) { + err++; + printf("Error m2, checker %d != 1\n", lcheck); + } + #pragma omp atomic + --checker; + return 0; +} +// thunk routine for tasks with inoutset dependency +int thunk_s(int gtid, task_t* ptask) { + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 + th = omp_get_thread_num(); + printf("task 2_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck); + if (lcheck != 1) { // no more than 1 task at a time + err++; + printf("Error s1, checker %d != 1\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // must still be equal to 1 + if (lcheck != 1) { + err++; + printf("Error s2, checker %d != 1\n", lcheck); + } + #pragma omp atomic + --checker; + return 0; +} + +#ifdef __cplusplus +extern "C" { +#endif +int __kmpc_global_thread_num(id*); +task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags, + size_t sz, size_t shar, entry_t rtn); +int __kmpc_omp_task_with_deps(id *loc, int gtid, task_t *task, int ndeps, + dep *dep_lst, int nd_noalias, dep *noalias_lst); +static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; +#ifdef __cplusplus +} // extern "C" +#endif +// End of internal data +// --------------------------------------------------------------------------- + +int main() +{ + char* ompx_all_memory = (void*)0xffffffffffffffff; + int i1,i2,i3; + omp_set_num_threads(8); + omp_set_dynamic(0); + #pragma omp parallel + { + #pragma omp single nowait + { + dep sdep[2]; + task_t *ptr; + int gtid = __kmpc_global_thread_num(&loc); + int t = omp_get_thread_num(); + #pragma omp task depend(in: i1, i2) + { // task 0 + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 or 2 + th = omp_get_thread_num(); + printf("task 0_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck > 2 || lcheck < 1) { + err++; // no more than 2 tasks concurrently + printf("Error1, checker %d, not 1 or 2\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // 1 or 2 + if (lcheck > 2 || lcheck < 1) { + #pragma omp atomic + err++; + printf("Error2, checker %d, not 1 or 2\n", lcheck); + } + #pragma omp atomic + --checker; + } + #pragma omp task depend(in: i1, i2) + { // task 1 + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 or 2 + th = omp_get_thread_num(); + printf("task 1_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck > 2 || lcheck < 1) { + err++; // no more than 2 tasks concurrently + printf("Error3, checker %d, not 1 or 2\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // 1 or 2 + if (lcheck > 2 || lcheck < 1) { + err++; + printf("Error4, checker %d, not 1 or 2\n", lcheck); + } + #pragma omp atomic + --checker; + } +// compiler codegen start + // task2 + ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s); + sdep[0].addr = (size_t)&i1; + sdep[0].len = 0; // not used + sdep[0].flags = 1; // IN + sdep[1].addr = (size_t)&i2; + sdep[1].len = 0; // not used + sdep[1].flags = 8; // INOUTSET + ptr->f_priv = t + 10; // init single first-private variable + __kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0); + + + // task3 + #pragma omp task depend(in:i1) depend(inout: ompx_all_memory[0]) + { + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; + th = omp_get_thread_num(); + printf("task 3_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck != 1) { // no more than 1 task at a time + err++; + printf("Error m1, checker %d != 1\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // must still be equal to 1 + if (lcheck != 1) { + err++; + printf("Error m2, checker %d != 1\n", lcheck); + } + #pragma omp atomic + --checker; + } +// compiler codegen end + #pragma omp task depend(in: i1, i2) + { // task 4 + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 or 2 + th = omp_get_thread_num(); + printf("task 4_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck > 2 || lcheck < 1) { + err++; // no more than 2 tasks concurrently + printf("Error5, checker %d, not 1 or 2\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // 1 or 2 + if (lcheck > 2 || lcheck < 1) { + err++; + printf("Error6, checker %d, not 1 or 2\n", lcheck); + } + #pragma omp atomic + --checker; + } + #pragma omp task depend(in: i1, i2) + { // task 5 + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 or 2 + th = omp_get_thread_num(); + printf("task 5_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck > 2 || lcheck < 1) { + err++; // no more than 2 tasks concurrently + printf("Error7, checker %d, not 1 or 2\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // 1 or 2 + if (lcheck > 2 || lcheck < 1) { + err++; + printf("Error8, checker %d, not 1 or 2\n", lcheck); + } + #pragma omp atomic + --checker; + } + // task6 + #pragma omp task depend(inout: ompx_all_memory[0]) + { + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; + th = omp_get_thread_num(); + printf("task 6_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck != 1) { // no more than 1 task at a time + err++; + printf("Error m1, checker %d != 1\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // must still be equal to 1 + if (lcheck != 1) { + err++; + printf("Error m2, checker %d != 1\n", lcheck); + } + #pragma omp atomic + --checker; + } + // task7 + #pragma omp task depend(inout: ompx_all_memory[0]) depend(mutexinoutset:i3) + { + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; + th = omp_get_thread_num(); + printf("task 7_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck != 1) { // no more than 1 task at a time + err++; + printf("Error m1, checker %d != 1\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; // must still be equal to 1 + if (lcheck != 1) { + err++; + printf("Error m2, checker %d != 1\n", lcheck); + } + #pragma omp atomic + --checker; + } + #pragma omp task depend(in: i3) + { // task 8 + int lcheck, th; + #pragma omp atomic capture + lcheck = ++checker; // 1 + th = omp_get_thread_num(); + printf("task 8_%d, th %d, checker %d\n", t, th, lcheck); + if (lcheck != 1) { + err++; + printf("Error9, checker %d, != 1\n", lcheck); + } + mysleep(DELAY); + #pragma omp atomic read + lcheck = checker; + if (lcheck != 1) { + err++; + printf("Error10, checker %d, != 1\n", lcheck); + } + #pragma omp atomic + --checker; + } + } // single + } // parallel + if (err == 0 && checker == 0) { + printf("passed\n"); + return 0; + } else { + printf("failed, err = %d, checker = %d\n", err, checker); + return 1; + } +} + +// NOENV-NOT: ThreadSanitizer: data race +// NOENV-NOT: ThreadSanitizer: reported +// NOENV: omp_all_memory +// NOENV-NOT: ThreadSanitizer: data race +// NOENV-NOT: ThreadSanitizer: reported +// NOENV: passed + +// ENV-NOT: ThreadSanitizer: data race +// ENV-NOT: ThreadSanitizer: reported +// ENV: passed