diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp
--- a/openmp/runtime/src/kmp_barrier.cpp
+++ b/openmp/runtime/src/kmp_barrier.cpp
@@ -128,8 +128,25 @@
                   gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
                   team->t.t_id, i));
         ANNOTATE_REDUCE_AFTER(reduce);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  ompt_data_t *my_task_data = OMPT_CUR_TASK_DATA(this_thr);
+  ompt_data_t *my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
+  void *return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) {
+      ompt_callbacks.ompt_callback(ompt_callback_reduction)(
+          ompt_sync_region_reduction, ompt_scope_begin, my_parallel_data,
+          my_task_data, return_address);
+    }
+#endif
         (*reduce)(this_thr->th.th_local.reduce_data,
                   other_threads[i]->th.th_local.reduce_data);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) {
+      ompt_callbacks.ompt_callback(ompt_callback_reduction)(
+          ompt_sync_region_reduction, ompt_scope_end, my_parallel_data,
+          my_task_data, return_address);
+    }
+#endif
         ANNOTATE_REDUCE_BEFORE(reduce);
         ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
       }
@@ -355,8 +372,25 @@
                   gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
                   team->t.t_id, child_tid));
         ANNOTATE_REDUCE_AFTER(reduce);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  ompt_data_t *my_task_data = OMPT_CUR_TASK_DATA(this_thr);
+  ompt_data_t *my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
+  void *return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) {
+      ompt_callbacks.ompt_callback(ompt_callback_reduction)(
+          ompt_sync_region_reduction, ompt_scope_begin, my_parallel_data,
+          my_task_data, return_address);
+    }
+#endif
         (*reduce)(this_thr->th.th_local.reduce_data,
                   child_thr->th.th_local.reduce_data);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) {
+      ompt_callbacks.ompt_callback(ompt_callback_reduction)(
+          ompt_sync_region_reduction, ompt_scope_end, my_parallel_data,
+          my_task_data, return_address);
+    }
+#endif
         ANNOTATE_REDUCE_BEFORE(reduce);
         ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
       }
@@ -600,8 +634,25 @@
                   gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
                   team->t.t_id, child_tid));
         ANNOTATE_REDUCE_AFTER(reduce);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  ompt_data_t *my_task_data = OMPT_CUR_TASK_DATA(this_thr);
+  ompt_data_t *my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
+  void *return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) {
+      ompt_callbacks.ompt_callback(ompt_callback_reduction)(
+          ompt_sync_region_reduction, ompt_scope_begin, my_parallel_data,
+          my_task_data, return_address);
+    }
+#endif
         (*reduce)(this_thr->th.th_local.reduce_data,
                   child_thr->th.th_local.reduce_data);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) {
+      ompt_callbacks.ompt_callback(ompt_callback_reduction)(
+          ompt_sync_region_reduction, ompt_scope_end, my_parallel_data,
+          my_task_data, return_address);
+    }
+#endif
         ANNOTATE_REDUCE_BEFORE(reduce);
         ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
       }
@@ -912,6 +963,16 @@
         flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
         if (reduce) {
           ANNOTATE_REDUCE_AFTER(reduce);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  ompt_data_t *my_task_data = OMPT_CUR_TASK_DATA(this_thr);
+  ompt_data_t *my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
+  void *return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
+  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) {
+      ompt_callbacks.ompt_callback(ompt_callback_reduction)(
+          ompt_sync_region_reduction, ompt_scope_begin, my_parallel_data,
+          my_task_data, return_address);
+    }
+#endif
           for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids;
                ++child_tid) {
             KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
@@ -923,6 +984,13 @@
             (*reduce)(this_thr->th.th_local.reduce_data,
                       other_threads[child_tid]->th.th_local.reduce_data);
           }
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) {
+      ompt_callbacks.ompt_callback(ompt_callback_reduction)(
+          ompt_sync_region_reduction, ompt_scope_end, my_parallel_data,
+          my_task_data, return_address);
+    }
+#endif
           ANNOTATE_REDUCE_BEFORE(reduce);
           ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
         }
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -3429,13 +3429,35 @@
       loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
   __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  kmp_info_t *this_thr = __kmp_threads[global_tid];
+  ompt_data_t *my_task_data = OMPT_CUR_TASK_DATA(this_thr);
+  ompt_data_t *my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
+  void *return_address = OMPT_LOAD_RETURN_ADDRESS(global_tid);
+#endif
   if (packed_reduction_method == critical_reduce_block) {
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) {
+      ompt_callbacks.ompt_callback(ompt_callback_reduction)(
+          ompt_sync_region_reduction, ompt_scope_begin, my_parallel_data,
+          my_task_data, return_address);
+    }
+#endif
+
     __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
     retval = 1;
 
   } else if (packed_reduction_method == empty_reduce_block) {
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) {
+      ompt_callbacks.ompt_callback(ompt_callback_reduction)(
+          ompt_sync_region_reduction, ompt_scope_begin, my_parallel_data,
+          my_task_data, return_address);
+    }
+#endif
+
     // usage: if team size == 1, no synchronization is required ( Intel
     // platforms only )
     retval = 1;
@@ -3536,15 +3558,38 @@
 
   packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  kmp_info_t *this_thr = __kmp_threads[global_tid];
+  ompt_data_t *my_task_data = OMPT_CUR_TASK_DATA(this_thr);
+  ompt_data_t *my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
+  void *return_address = OMPT_LOAD_RETURN_ADDRESS(global_tid);
+#endif
+
   if (packed_reduction_method == critical_reduce_block) {
 
     __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) {
+      ompt_callbacks.ompt_callback(ompt_callback_reduction)(
+          ompt_sync_region_reduction, ompt_scope_end, my_parallel_data,
+          my_task_data, return_address);
+    }
+#endif
 
   } else if (packed_reduction_method == empty_reduce_block) {
 
     // usage: if team size == 1, no synchronization is required ( on Intel
     // platforms only )
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+
+    if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) {
+      ompt_callbacks.ompt_callback(ompt_callback_reduction)(
+          ompt_sync_region_reduction, ompt_scope_end, my_parallel_data,
+          my_task_data, return_address);
+    }
+#endif
+
   } else if (packed_reduction_method == atomic_reduce_block) {
 
     // neither master nor other workers should get here
@@ -3556,6 +3601,7 @@
                                    tree_reduce_block)) {
 
     // only master gets here
+    // OMPT: tree reduction is annotated in the barrier code
 
   } else {
 
@@ -3629,13 +3675,34 @@
       loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
   __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  kmp_info_t *this_thr = __kmp_threads[global_tid];
+  ompt_data_t *my_task_data = OMPT_CUR_TASK_DATA(this_thr);
+  ompt_data_t *my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
+  void *return_address = OMPT_LOAD_RETURN_ADDRESS(global_tid);
+#endif
+
   if (packed_reduction_method == critical_reduce_block) {
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) {
+      ompt_callbacks.ompt_callback(ompt_callback_reduction)(
+          ompt_sync_region_reduction, ompt_scope_begin, my_parallel_data,
+          my_task_data, return_address);
+    }
+#endif
     __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
     retval = 1;
 
   } else if (packed_reduction_method == empty_reduce_block) {
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) {
+      ompt_callbacks.ompt_callback(ompt_callback_reduction)(
+          ompt_sync_region_reduction, ompt_scope_begin, my_parallel_data,
+          my_task_data, return_address);
+    }
+#endif
     // usage: if team size == 1, no synchronization is required ( Intel
     // platforms only )
     retval = 1;
@@ -3723,9 +3790,23 @@
 
   // this barrier should be visible to a customer and to the threading profile
   // tool (it's a terminating barrier on constructs if NOWAIT not specified)
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  kmp_info_t *this_thr = __kmp_threads[global_tid];
+  ompt_data_t *my_task_data = OMPT_CUR_TASK_DATA(this_thr);
+  ompt_data_t *my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
+  void *return_address = OMPT_LOAD_RETURN_ADDRESS(global_tid);
+#endif
 
   if (packed_reduction_method == critical_reduce_block) {
     __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
+ 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) {
+      ompt_callbacks.ompt_callback(ompt_callback_reduction)(
+          ompt_sync_region_reduction, ompt_scope_end, my_parallel_data,
+          my_task_data, return_address);
+    }
+#endif
 
 // TODO: implicit barrier: should be exposed
 #if OMPT_SUPPORT
@@ -3749,6 +3830,14 @@
 
   } else if (packed_reduction_method == empty_reduce_block) {
 
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) {
+      ompt_callbacks.ompt_callback(ompt_callback_reduction)(
+          ompt_sync_region_reduction, ompt_scope_end, my_parallel_data,
+          my_task_data, return_address);
+    }
+#endif
+
 // usage: if team size==1, no synchronization is required (Intel platforms only)
 
 // TODO: implicit barrier: should be exposed
diff --git a/openmp/runtime/src/ompt-event-specific.h b/openmp/runtime/src/ompt-event-specific.h
--- a/openmp/runtime/src/ompt-event-specific.h
+++ b/openmp/runtime/src/ompt-event-specific.h
@@ -99,7 +99,7 @@
 
 #define ompt_callback_cancel_implemented ompt_event_MAY_ALWAYS_OPTIONAL
 
-#define ompt_callback_reduction_implemented ompt_event_UNIMPLEMENTED
+#define ompt_callback_reduction_implemented ompt_event_MAY_ALWAYS_OPTIONAL
 
 #define ompt_callback_dispatch_implemented ompt_event_UNIMPLEMENTED
 
diff --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h
--- a/openmp/runtime/test/ompt/callback.h
+++ b/openmp/runtime/test/ompt/callback.h
@@ -408,7 +408,7 @@
         case ompt_sync_region_taskgroup:
           printf("%" PRIu64 ": ompt_event_wait_taskgroup_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, parallel_data->value, task_data->value, codeptr_ra);
           break;
-        case ompt_sync_region_reduction:
+        default:
           break;
       }
       break;
@@ -427,13 +427,32 @@
         case ompt_sync_region_taskgroup:
           printf("%" PRIu64 ": ompt_event_wait_taskgroup_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
           break;
-        case ompt_sync_region_reduction:
+        default:
           break;
       }
       break;
   }
 }
 
+static void
+on_ompt_callback_reduction(
+  ompt_sync_region_kind_t kind,
+  ompt_scope_endpoint_t endpoint,
+  ompt_data_t *parallel_data,
+  ompt_data_t *task_data,
+  const void *codeptr_ra)
+{
+  switch(endpoint)
+  {
+    case ompt_scope_begin:
+        printf("%" PRIu64 ": ompt_event_reduction_begin: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
+      break;
+    case ompt_scope_end:
+        printf("%" PRIu64 ": ompt_event_reduction_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", ompt_get_thread_data()->value, (parallel_data)?parallel_data->value:0, task_data->value, codeptr_ra);
+      break;
+  }
+}
+
 static void
 on_ompt_callback_flush(
     ompt_data_t *thread_data,
@@ -784,6 +803,7 @@
   register_callback(ompt_callback_nest_lock);
   register_callback(ompt_callback_sync_region);
   register_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t);
+  register_callback_t(ompt_callback_reduction, ompt_callback_sync_region_t);
   register_callback(ompt_callback_control_tool);
   register_callback(ompt_callback_flush);
   register_callback(ompt_callback_cancel);
diff --git a/openmp/runtime/test/ompt/synchronization/reduction/empty_reduce.c b/openmp/runtime/test/ompt/synchronization/reduction/empty_reduce.c
new file mode 100644
--- /dev/null
+++ b/openmp/runtime/test/ompt/synchronization/reduction/empty_reduce.c
@@ -0,0 +1,34 @@
+// RUN: %libomp-compile-and-run | FileCheck %s
+// RUN: %libomp-compile -DNOWAIT && %libomp-run | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+#ifdef NOWAIT
+  #define FOR_CLAUSE nowait
+#else
+  #define FOR_CLAUSE
+#endif
+
+int main()
+{
+  int sum = 0;
+  int i;
+  #pragma omp parallel num_threads(1)
+  #pragma omp for reduction(+:sum) FOR_CLAUSE
+  for(i = 0; i < 10000; i++)
+  {
+    sum += i;
+  }
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID:[0-9]+]]
+
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_reduction_begin: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_reduction_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=
+
+  return 0;
+}
diff --git a/openmp/runtime/test/ompt/synchronization/reduction/tree_reduce.c b/openmp/runtime/test/ompt/synchronization/reduction/tree_reduce.c
new file mode 100644
--- /dev/null
+++ b/openmp/runtime/test/ompt/synchronization/reduction/tree_reduce.c
@@ -0,0 +1,40 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include "callback.h"
+#include <omp.h>
+
+#ifdef NOWAIT
+  #define FOR_CLAUSE nowait
+#else
+  #define FOR_CLAUSE
+#endif
+
+int main()
+{
+  int sum = 0;
+  int i;
+  #pragma omp parallel num_threads(5)
+  #pragma omp for reduction(+:sum) FOR_CLAUSE
+  for(i = 0; i < 10000; i++)
+  {
+    sum += i;
+  }
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID:[0-9]+]]
+
+  // order and distribution to threads not determined
+  // CHECK-DAG: {{^}}{{[0-f]+}}: ompt_event_reduction_begin: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=
+  // CHECK-DAG: {{^}}{{[0-f]+}}: ompt_event_reduction_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=
+  // CHECK-DAG: {{^}}{{[0-f]+}}: ompt_event_reduction_begin: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=
+  // CHECK-DAG: {{^}}{{[0-f]+}}: ompt_event_reduction_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=
+  // CHECK-DAG: {{^}}{{[0-f]+}}: ompt_event_reduction_begin: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=
+  // CHECK-DAG: {{^}}{{[0-f]+}}: ompt_event_reduction_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=
+  // CHECK-DAG: {{^}}{{[0-f]+}}: ompt_event_reduction_begin: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=
+  // CHECK-DAG: {{^}}{{[0-f]+}}: ompt_event_reduction_end: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], codeptr_ra=
+
+  return 0;
+}