diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -538,6 +538,8 @@ __itt_get_timestamp(); } #endif + KMP_MB(); // Synchronize writes to child threads. + /* Perform a hypercube-embedded tree gather to wait until all of the threads have arrived, and reduce any required data as we go. */ kmp_flag_64 p_flag(&thr_bar->b_arrived); @@ -589,6 +591,7 @@ // Wait for child to arrive kmp_flag_64 c_flag(&child_bar->b_arrived, new_state); c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); + KMP_MB(); // Synchronize writes to child threads. ANNOTATE_BARRIER_END(child_thr); #if USE_ITT_BUILD && USE_ITT_NOTIFY // Barrier imbalance - write min of the thread time and a child time to