diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -4023,6 +4023,9 @@ lo = pr_buf->th_doacross_info[2]; up = pr_buf->th_doacross_info[3]; st = pr_buf->th_doacross_info[4]; +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_dependence_t deps[num_dims]; +#endif if (st == 1) { // most common case if (vec[0] < lo || vec[0] > up) { KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " @@ -4048,6 +4051,10 @@ } iter_number = (kmp_uint64)(lo - vec[0]) / (-st); } +#if OMPT_SUPPORT && OMPT_OPTIONAL + deps[0].variable.value = iter_number; + deps[0].dependence_type = ompt_dependence_type_sink; +#endif for (i = 1; i < num_dims; ++i) { kmp_int64 iter, ln; kmp_int32 j = i * 4; @@ -4081,6 +4088,10 @@ iter = (kmp_uint64)(lo - vec[i]) / (-st); } iter_number = iter + ln * iter_number; +#if OMPT_SUPPORT && OMPT_OPTIONAL + deps[i].variable.value = iter; + deps[i].dependence_type = ompt_dependence_type_sink; +#endif } shft = iter_number % 32; // use 32-bit granularity iter_number >>= 5; // divided by 32 @@ -4089,6 +4100,12 @@ KMP_YIELD(TRUE); } KMP_MB(); +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_dependences) { + ompt_callbacks.ompt_callback(ompt_callback_dependences)( + &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims); + } +#endif KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", gtid, (iter_number << 5) + shft)); @@ -4116,6 +4133,9 @@ num_dims = pr_buf->th_doacross_info[0]; lo = pr_buf->th_doacross_info[2]; st = pr_buf->th_doacross_info[4]; +#if OMPT_SUPPORT && OMPT_OPTIONAL + ompt_dependence_t deps[num_dims]; +#endif if (st == 1) { // most common case iter_number = vec[0] - lo; } else if (st > 0) { @@ -4123,6 +4143,10 @@ } else { // negative increment iter_number = (kmp_uint64)(lo - vec[0]) / (-st); } +#if OMPT_SUPPORT && OMPT_OPTIONAL + deps[0].variable.value = iter_number; + deps[0].dependence_type = ompt_dependence_type_source; +#endif for (i = 1; i < num_dims; ++i) { kmp_int64 iter, ln; kmp_int32 j = i * 4; @@ -4137,7 +4161,17 @@ iter = (kmp_uint64)(lo - vec[i]) / (-st); } iter_number = iter + ln * iter_number; +#if OMPT_SUPPORT && OMPT_OPTIONAL + deps[i].variable.value = iter; + deps[i].dependence_type = ompt_dependence_type_source; +#endif + } +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_dependences) { + ompt_callbacks.ompt_callback(ompt_callback_dependences)( + &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims); } +#endif shft = iter_number % 32; // use 32-bit granularity iter_number >>= 5; // divided by 32 flag = 1 << shft; diff --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h --- a/openmp/runtime/test/ompt/callback.h +++ b/openmp/runtime/test/ompt/callback.h @@ -47,6 +47,17 @@ "ompt_cancel_discarded_task" }; +static const char *ompt_dependence_type_t_values[] = { + NULL, + "ompt_dependence_type_in", // 1 + "ompt_dependence_type_out", // 2 + "ompt_dependence_type_inout", // 3 + "ompt_dependence_type_mutexinoutset", // 4 + "ompt_dependence_type_source", // 5 + "ompt_dependence_type_sink", // 6 + "ompt_dependence_type_inoutset" // 7 +}; + static void format_task_type(int type, char *buffer) { char *progress = buffer; if (type & ompt_task_initial) @@ -971,10 +982,24 @@ const ompt_dependence_t *deps, int ndeps) { - printf("%" PRIu64 ":" _TOOL_PREFIX - " ompt_event_task_dependences: task_id=%" PRIu64 - ", deps=%p, ndeps=%d\n", - ompt_get_thread_data()->value, task_data->value, (void *)deps, ndeps); + char buffer[2048]; + char *progress = buffer; + for (int i = 0; i < ndeps && progress < buffer + 2000; i++) { + if (deps[i].dependence_type == ompt_dependence_type_source || + deps[i].dependence_type == ompt_dependence_type_sink) + progress += + sprintf(progress, "(%ld, %s), ", deps[i].variable.value, + ompt_dependence_type_t_values[deps[i].dependence_type]); + else + progress += + sprintf(progress, "(%p, %s), ", deps[i].variable.ptr, + ompt_dependence_type_t_values[deps[i].dependence_type]); + } + if (ndeps > 0) + progress[-2] = 0; + printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_dependences: task_id=%" PRIu64 + ", deps=[%s], ndeps=%d\n", + ompt_get_thread_data()->value, task_data->value, buffer, ndeps); } static void diff --git a/openmp/runtime/test/ompt/synchronization/ordered_dependences.c b/openmp/runtime/test/ompt/synchronization/ordered_dependences.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/ompt/synchronization/ordered_dependences.c @@ -0,0 +1,63 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +#include "callback.h" +#include + +int main() { + int a[10][10]; + int i, j; +#pragma omp parallel num_threads(2) +#pragma omp for ordered(2) + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) { + a[i][j] = i + j + 1; + printf("%d, %d\n", i, j); +#pragma omp ordered depend(sink : i - 1, j) depend(sink : i, j - 1) + if (i > 0 && j > 0) + a[i][j] = a[i - 1][j] + a[i][j - 1] + 1; + printf("%d, %d\n", i, j); +#pragma omp ordered depend(source) + } + + return 0; +} +// CHECK: 0: NULL_POINTER=[[NULL:.*$]] + +// CHECK: {{^}}[[MASTER:[0-9]+]]: ompt_event_loop_begin: +// CHECK-SAME: parallel_id={{[0-9]+}}, parent_task_id=[[IMPL_TASK:[0-9]+]], + +// CHECK: {{^}}[[MASTER]]: ompt_event_dependences: task_id=[[IMPL_TASK]], +// CHECK-SAME: deps=[(0, ompt_dependence_type_source), (0, +// CHECK-SAME: ompt_dependence_type_source)], ndeps=2 + +// CHECK: {{^}}[[MASTER]]: ompt_event_dependences: task_id=[[IMPL_TASK]], +// CHECK-SAME: deps=[(0, ompt_dependence_type_sink), (0, +// CHECK-SAME: ompt_dependence_type_sink)], ndeps=2 + +// CHECK: {{^}}[[MASTER]]: ompt_event_dependences: task_id=[[IMPL_TASK]], +// CHECK-SAME: deps=[(0, ompt_dependence_type_source), (1, +// CHECK-SAME: ompt_dependence_type_source)], ndeps=2 + +// CHECK: {{^}}[[WORKER:[0-9]+]]: ompt_event_loop_begin: +// CHECK-SAME: parallel_id={{[0-9]+}}, parent_task_id=[[IMPL_TASK:[0-9]+]], + +// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]], +// CHECK-SAME: deps=[(0, ompt_dependence_type_sink), (0, +// CHECK-SAME: ompt_dependence_type_sink)], ndeps=2 + +// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]], +// CHECK-SAME: deps=[(1, ompt_dependence_type_source), (0, +// CHECK-SAME: ompt_dependence_type_source)], ndeps=2 + +// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]], +// CHECK-SAME: deps=[(0, ompt_dependence_type_sink), (1, +// CHECK-SAME: ompt_dependence_type_sink)], ndeps=2 + +// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]], +// CHECK-SAME: deps=[(1, ompt_dependence_type_sink), (0, +// CHECK-SAME: ompt_dependence_type_sink)], ndeps=2 + +// CHECK: {{^}}[[WORKER]]: ompt_event_dependences: task_id=[[IMPL_TASK]], +// CHECK-SAME: deps=[(1, ompt_dependence_type_source), (1, +// CHECK-SAME: ompt_dependence_type_source)], ndeps=2 diff --git a/openmp/runtime/test/ompt/tasks/dependences.c b/openmp/runtime/test/ompt/tasks/dependences.c --- a/openmp/runtime/test/ompt/tasks/dependences.c +++ b/openmp/runtime/test/ompt/tasks/dependences.c @@ -3,59 +3,86 @@ // UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 #include "callback.h" -#include +#include #include #include -int main() -{ +int main() { int x = 0; - #pragma omp parallel num_threads(2) +#pragma omp parallel num_threads(2) { - #pragma omp master - { +#pragma omp master + { print_ids(0); - #pragma omp task depend(out:x) + printf("%" PRIu64 ": address of x: %p\n", ompt_get_thread_data()->value, + &x); +#pragma omp task depend(out : x) { x++; delay(100); } print_fuzzy_address(1); print_ids(0); - - #pragma omp task depend(in:x) - { - x = -1; - } + +#pragma omp task depend(in : x) + { x = -1; } print_ids(0); } } x++; + return 0; +} - // Check if libomp supports the callbacks for this test. - // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' - // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependences' - // CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_dependence' - - // CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] +// Check if libomp supports the callbacks for this test. +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_create' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_dependences' +// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_task_depende - // make sure initial data pointers are null - // CHECK-NOT: 0: new_task_data initially not null +// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID:[0-9]+]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT:0x[0-f]+]], reenter_frame=[[NULL]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[FIRST_TASK:[0-f]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, task_type=ompt_task_explicit=4, has_dependences=yes - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependences: task_id=[[FIRST_TASK]], deps={{0x[0-f]+}}, ndeps=1 - // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] - // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] +// make sure initial data pointers are null +// CHECK-NOT: 0: new_task_data initially not null - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[SECOND_TASK:[0-f]+]], codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, has_dependences=yes - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependences: task_id=[[SECOND_TASK]], deps={{0x[0-f]+}}, ndeps=1 - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependence_pair: first_task_id=[[FIRST_TASK]], second_task_id=[[SECOND_TASK]] - // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] +// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_implicit_task_begin: +// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], +// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID:[0-9]+]] +// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], +// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT:0x[0-f]+]], +// CHECK-SAME: reenter_frame=[[NULL]] - return 0; -} +// CHECK: {{^}}[[MASTER_ID]]: address of x: [[ADDRX:0x[0-f]+]] +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: +// CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: new_task_id=[[FIRST_TASK:[0-f]+]], +// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, +// CHECK-SAME: task_type=ompt_task_explicit=4, has_dependences=yes + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_dependences: +// CHECK-SAME: task_id=[[FIRST_TASK]], deps=[([[ADDRX]], +// CHECK-SAME: ompt_dependence_type_inout)], ndeps=1 + +// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] +// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], +// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], +// CHECK-SAME: reenter_frame=[[NULL]] + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: +// CHECK-SAME: parent_task_id={{[0-9]+}}, parent_task_frame.exit=[[EXIT]], +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: new_task_id=[[SECOND_TASK:[0-f]+]], codeptr_ra={{0x[0-f]+}}, +// CHECK-SAME: task_type=ompt_task_explicit=4, has_dependences=yes + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_dependences: +// CHECK-SAME: task_id=[[SECOND_TASK]], deps=[([[ADDRX]], +// CHECK-SAME: ompt_dependence_type_in)], ndeps=1 + +// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_dependence_pair: +// CHECK-SAME: first_task_id=[[FIRST_TASK]], second_task_id=[[SECOND_TASK]] + +// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], +// CHECK-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], +// CHECK-SAME: reenter_frame=[[NULL]]