Index: openmp/tools/archer/ompt-tsan.cpp =================================================================== --- openmp/tools/archer/ompt-tsan.cpp +++ openmp/tools/archer/ompt-tsan.cpp @@ -712,75 +712,80 @@ } } -static void ompt_tsan_task_schedule(ompt_data_t *first_task_data, - ompt_task_status_t prior_task_status, - ompt_data_t *second_task_data) { - TaskData *FromTask = ToTaskData(first_task_data); - TaskData *ToTask = ToTaskData(second_task_data); - - if (ToTask->Included && prior_task_status != ompt_task_complete) - return; // No further synchronization for begin included tasks - if (FromTask->Included && prior_task_status == ompt_task_complete) { - // Just delete the task: - while (FromTask != nullptr && --FromTask->RefCount == 0) { - TaskData *Parent = FromTask->Parent; - if (FromTask->DependencyCount > 0) { - delete[] FromTask->Dependencies; - } - delete FromTask; - FromTask = Parent; +static void __ompt_tsan_release_task(TaskData *task) { + while (task != nullptr && --task->RefCount == 0) { + TaskData *Parent = task->Parent; + if (task->DependencyCount > 0) { + delete[] task->Dependencies; } - return; + delete task; + task = Parent; } +} - if (ToTask->execution == 0) { - ToTask->execution++; - // 1. Task will begin execution after it has been created. - TsanHappensAfter(ToTask->GetTaskPtr()); - for (unsigned i = 0; i < ToTask->DependencyCount; i++) { - ompt_dependence_t *Dependency = &ToTask->Dependencies[i]; +static void ompt_tsan_task_schedule(ompt_data_t *first_task_data, + ompt_task_status_t prior_task_status, + ompt_data_t *second_task_data) { - TsanHappensAfter(Dependency->variable.ptr); - // in and inout dependencies are also blocked by prior in dependencies! - if (Dependency->dependence_type == ompt_dependence_type_out || Dependency->dependence_type == ompt_dependence_type_inout) { - TsanHappensAfter(ToInAddr(Dependency->variable.ptr)); - } - } - } else { - // 2. Task will resume after it has been switched away. - TsanHappensAfter(ToTask->GetTaskPtr()); - } + // + // The necessary action depends on prior_task_status: + // + // ompt_task_early_fulfill = 5, + // -> ignored + // + // ompt_task_late_fulfill = 6, + // -> first completed, first freed, second ignored + // + // ompt_task_complete = 1, + // ompt_task_cancel = 3, + // -> first completed, first freed, second starts + // + // ompt_task_detach = 4, + // ompt_task_yield = 2, + // ompt_task_switch = 7 + // -> first suspended, second starts + // - if (prior_task_status != ompt_task_complete) { - ToTask->ImplicitTask = FromTask->ImplicitTask; - assert(ToTask->ImplicitTask != NULL && - "A task belongs to a team and has an implicit task on the stack"); - } + if (prior_task_status == ompt_task_early_fulfill) + return; - // Task may be resumed at a later point in time. - TsanHappensBefore(FromTask->GetTaskPtr()); + TaskData *FromTask = ToTaskData(first_task_data); + // Legacy handling for missing reduction callback if (hasReductionCallback < ompt_set_always && FromTask->InBarrier) { // We want to ignore writes in the runtime code during barriers, // but not when executing tasks with user code! TsanIgnoreWritesEnd(); } - if (prior_task_status == ompt_task_complete) { // task finished - - // Task will finish before a barrier in the surrounding parallel region ... - ParallelData *PData = FromTask->Team; - TsanHappensBefore( - PData->GetBarrierPtr(FromTask->ImplicitTask->BarrierIndex)); - - // ... and before an eventual taskwait by the parent thread. - TsanHappensBefore(FromTask->Parent->GetTaskwaitPtr()); - - if (FromTask->TaskGroup != nullptr) { - // This task is part of a taskgroup, so it will finish before the - // corresponding taskgroup_end. - TsanHappensBefore(FromTask->TaskGroup->GetPtr()); + // The late fulfill happens after the detached task finished execution + if (prior_task_status == ompt_task_late_fulfill) + TsanHappensAfter(FromTask->GetTaskPtr()); + + // task completed execution + if (prior_task_status == ompt_task_complete || + prior_task_status == ompt_task_cancel || + prior_task_status == ompt_task_late_fulfill) { + // Included tasks are executed sequentially, no need to track + // synchronization + if (!FromTask->Included) { + // Task will finish before a barrier in the surrounding parallel region + // ... + ParallelData *PData = FromTask->Team; + TsanHappensBefore( + PData->GetBarrierPtr(FromTask->ImplicitTask->BarrierIndex)); + + // ... and before an eventual taskwait by the parent thread. + TsanHappensBefore(FromTask->Parent->GetTaskwaitPtr()); + + if (FromTask->TaskGroup != nullptr) { + // This task is part of a taskgroup, so it will finish before the + // corresponding taskgroup_end. + TsanHappensBefore(FromTask->TaskGroup->GetPtr()); + } } + + // release dependencies for (unsigned i = 0; i < FromTask->DependencyCount; i++) { ompt_dependence_t *Dependency = &FromTask->Dependencies[i]; @@ -790,19 +795,50 @@ TsanHappensBefore(Dependency->variable.ptr); } } - while (FromTask != nullptr && --FromTask->RefCount == 0) { - TaskData *Parent = FromTask->Parent; - if (FromTask->DependencyCount > 0) { - delete[] FromTask->Dependencies; - } - delete FromTask; - FromTask = Parent; - } + // free the previously running task + __ompt_tsan_release_task(FromTask); + } + + // For late fulfill of detached task, there is no task to schedule to + if (prior_task_status == ompt_task_late_fulfill) { + return; } + + TaskData *ToTask = ToTaskData(second_task_data); + // Legacy handling for missing reduction callback if (hasReductionCallback < ompt_set_always && ToTask->InBarrier) { // We re-enter runtime code which currently performs a barrier. TsanIgnoreWritesBegin(); } + + // task suspended + if (prior_task_status == ompt_task_switch || + prior_task_status == ompt_task_yield || + prior_task_status == ompt_task_detach) { + // Task may be resumed at a later point in time. + TsanHappensBefore(FromTask->GetTaskPtr()); + ToTask->ImplicitTask = FromTask->ImplicitTask; + assert(ToTask->ImplicitTask != NULL && + "A task belongs to a team and has an implicit task on the stack"); + } + + // Handle dependencies on first execution of the task + if (ToTask->execution == 0) { + ToTask->execution++; + for (unsigned i = 0; i < ToTask->DependencyCount; i++) { + ompt_dependence_t *Dependency = &ToTask->Dependencies[i]; + + TsanHappensAfter(Dependency->variable.ptr); + // in and inout dependencies are also blocked by prior in dependencies! + if (Dependency->dependence_type == ompt_dependence_type_out || + Dependency->dependence_type == ompt_dependence_type_inout) { + TsanHappensAfter(ToInAddr(Dependency->variable.ptr)); + } + } + } + // 1. Task will begin execution after it has been created. + // 2. Task will resume after it has been switched away. + TsanHappensAfter(ToTask->GetTaskPtr()); } static void ompt_tsan_dependences(ompt_data_t *task_data, Index: openmp/tools/archer/tests/CMakeLists.txt =================================================================== --- openmp/tools/archer/tests/CMakeLists.txt +++ openmp/tools/archer/tests/CMakeLists.txt @@ -30,7 +30,13 @@ pythonize_bool(LIBARCHER_HAVE_LIBATOMIC) pythonize_bool(OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS) -add_openmp_testsuite(check-libarcher "Running libarcher tests" ${CMAKE_CURRENT_BINARY_DIR} DEPENDS archer omp) +set(ARCHER_TSAN_TEST_DEPENDENCE "") +if(TARGET tsan) + set(ARCHER_TSAN_TEST_DEPENDENCE tsan) +endif() + +add_openmp_testsuite(check-libarcher "Running libarcher tests" ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS archer omp ${ARCHER_TSAN_TEST_DEPENDENCE}) # Configure the lit.site.cfg.in file set(AUTO_GEN_COMMENT "## Autogenerated by libarcher configuration.\n# Do not edit!") Index: openmp/tools/archer/tests/task/task_early_fulfill.c =================================================================== --- /dev/null +++ openmp/tools/archer/tests/task/task_early_fulfill.c @@ -0,0 +1,26 @@ +// RUN: %libarcher-compile -fopenmp-version=50 && env OMP_NUM_THREADS='3' \ +// RUN: %libarcher-run +//| FileCheck %s + +// Checked gcc 9.2 still does not support detach clause on task construct. +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8, gcc-9 +// clang supports detach clause since version 11. +// UNSUPPORTED: clang-10, clang-9, clang-8, clang-7 +// icc compiler does not support detach clause. +// UNSUPPORTED: icc +// REQUIRES: tsan + +#include +#include + +int main() { +#pragma omp parallel +#pragma omp master + { + omp_event_handle_t event; +#pragma omp task detach(event) if (0) + { omp_fulfill_event(event); } +#pragma omp taskwait + } + return 0; +} Index: openmp/tools/archer/tests/task/task_late_fulfill.c =================================================================== --- /dev/null +++ openmp/tools/archer/tests/task/task_late_fulfill.c @@ -0,0 +1,62 @@ +// RUN: %libarcher-compile -fopenmp-version=50 && env OMP_NUM_THREADS='3' \ +// RUN: %libarcher-run-race | FileCheck %s + +// Checked gcc 9.2 still does not support detach clause on task construct. +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8, gcc-9 +// clang supports detach clause since version 11. +// UNSUPPORTED: clang-10, clang-9, clang-8, clang-7 +// icc compiler does not support detach clause. +// UNSUPPORTED: icc +// REQUIRES: tsan + +#include +#include +#include + +int main() { +#pragma omp parallel +#pragma omp master + { + omp_event_handle_t event; + int a = 0, b = 0; + omp_event_handle_t *f_event; +#pragma omp task detach(event) depend(out : f_event) shared(f_event) + { + printf("%i: task 1\n", omp_get_thread_num()); + f_event = &event; + } + usleep(10000); +#pragma omp task depend(in : f_event) shared(f_event, a, b) + { + printf("%i: task 2, %p, %i, %i\n", omp_get_thread_num(), f_event, a, b); + f_event = &event; + } + usleep(10000); + a++; + printf("%i: calling omp_fulfill_event\n", omp_get_thread_num()); + omp_fulfill_event(*f_event); +//#pragma omp task if (0) depend(in : f_event) +// {} + b++; + usleep(10000); +#pragma omp taskwait + } + return 0; +} + +// no race for a++ in line 32: +// CHECK-NOT: #0 {{.*}}task_late_fulfill.c:35 + +// we expect a race on f_event: + +// CHECK: WARNING: ThreadSanitizer: data race +// CHECK-NEXT: {{(Write|Read)}} of size 8 +// CHECK-NEXT: #0 {{.*}}task_late_fulfill.c:37 +// CHECK: Previous write of size 8 +// CHECK-NEXT: #0 {{.*}}task_late_fulfill.c:26 + +// CHECK: WARNING: ThreadSanitizer: data race +// CHECK-NEXT: {{(Write|Read)}} of size 4 +// CHECK-NEXT: #0 {{.*}}task_late_fulfill.c:31 +// CHECK: Previous write of size 4 +// CHECK-NEXT: #0 {{.*}}task_late_fulfill.c:40