diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -3795,7 +3795,8 @@ extern void __kmp_affinity_uninitialize(void); extern void __kmp_affinity_set_init_mask( int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */ -extern void __kmp_affinity_set_place(int gtid); +void __kmp_affinity_bind_init_mask(int gtid); +extern void __kmp_affinity_bind_place(int gtid); extern void __kmp_affinity_determine_capable(const char *env_var); extern int __kmp_aux_set_affinity(void **mask); extern int __kmp_aux_get_affinity(void **mask); @@ -3811,7 +3812,8 @@ int gtid = __kmp_entry_gtid(); kmp_root_t *r = __kmp_threads[gtid]->th.th_root; if (r->r.r_uber_thread == __kmp_threads[gtid] && !r->r.r_affinity_assigned) { - __kmp_affinity_set_init_mask(gtid, TRUE); + __kmp_affinity_set_init_mask(gtid, /*isa_root=*/TRUE); + __kmp_affinity_bind_init_mask(gtid); r->r.r_affinity_assigned = TRUE; } } diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -4260,8 +4260,8 @@ // Called when __kmp_topology is ready static void __kmp_aux_affinity_initialize_other_data(kmp_affinity_t &affinity) { - // Initialize data dependent on __kmp_topology - if (__kmp_topology) { + // Initialize other data structures which depend on the topology + if (__kmp_topology && __kmp_topology->get_num_hw_threads()) { machine_hierarchy.init(__kmp_topology->get_num_hw_threads()); __kmp_affinity_get_topology_info(affinity); } @@ -4527,8 +4527,6 @@ if (is_regular_affinity && !__kmp_topology) { bool success = __kmp_aux_affinity_initialize_topology(affinity); if (success) { - // Initialize other data structures which depend on the topology - machine_hierarchy.init(__kmp_topology->get_num_hw_threads()); KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads()); } else { affinity.type = affinity_none; @@ -4866,14 +4864,12 @@ kmp_affin_mask_t *mask; int i; const kmp_affinity_t *affinity; - const char *env_var; bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid); if (is_hidden_helper) affinity = &__kmp_hh_affinity; else affinity = &__kmp_affinity; - env_var = __kmp_get_affinity_env_var(*affinity, /*for_binding=*/true); if (KMP_AFFINITY_NON_PROC_BIND || is_hidden_helper) { if ((affinity->type == affinity_none) || @@ -4923,19 +4919,34 @@ } if (i == KMP_PLACE_ALL) { - KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n", + KA_TRACE(100, ("__kmp_affinity_set_init_mask: setting T#%d to all places\n", gtid)); } else { - KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n", + KA_TRACE(100, ("__kmp_affinity_set_init_mask: setting T#%d to place %d\n", gtid, i)); } KMP_CPU_COPY(th->th.th_affin_mask, mask); +} + +void __kmp_affinity_bind_init_mask(int gtid) { + if (!KMP_AFFINITY_CAPABLE()) { + return; + } + kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]); + const kmp_affinity_t *affinity; + const char *env_var; + bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid); + if (is_hidden_helper) + affinity = &__kmp_hh_affinity; + else + affinity = &__kmp_affinity; + env_var = __kmp_get_affinity_env_var(*affinity, /*for_binding=*/true); /* to avoid duplicate printing (will be correctly printed on barrier) */ - if (affinity->flags.verbose && - (affinity->type == affinity_none || - (i != KMP_PLACE_ALL && affinity->type != affinity_balanced)) && + if (affinity->flags.verbose && (affinity->type == affinity_none || + (th->th.th_current_place != KMP_PLACE_ALL && + affinity->type != affinity_balanced)) && !KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)) { char buf[KMP_AFFIN_MASK_PRINT_LEN]; __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, @@ -4955,7 +4966,7 @@ __kmp_set_system_affinity(th->th.th_affin_mask, TRUE); } -void __kmp_affinity_set_place(int gtid) { +void __kmp_affinity_bind_place(int gtid) { // Hidden helper threads should not be affected by OMP_PLACES/OMP_PROC_BIND if (!KMP_AFFINITY_CAPABLE() || KMP_HIDDEN_HELPER_THREAD(gtid)) { return; @@ -4963,7 +4974,7 @@ kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]); - KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current " + KA_TRACE(100, ("__kmp_affinity_bind_place: binding T#%d to place %d (current " "place = %d)\n", gtid, th->th.th_new_place, th->th.th_current_place)); @@ -4985,9 +4996,6 @@ KMP_CPU_INDEX(__kmp_affinity.masks, th->th.th_new_place); KMP_CPU_COPY(th->th.th_affin_mask, mask); th->th.th_current_place = th->th.th_new_place; - // Copy topology information associated with the place - th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place]; - th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place]; if (__kmp_affinity.flags.verbose) { char buf[KMP_AFFIN_MASK_PRINT_LEN]; diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -2591,7 +2591,7 @@ __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place)); } else { - __kmp_affinity_set_place(gtid); + __kmp_affinity_bind_place(gtid); } } #endif // KMP_AFFINITY_SUPPORTED diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -4671,6 +4671,11 @@ } #endif /* KMP_ADJUST_BLOCKTIME */ +#if KMP_AFFINITY_SUPPORTED + // Set the affinity and topology information for new thread + __kmp_affinity_set_init_mask(new_gtid, /*isa_root=*/FALSE); +#endif + /* actually fork it and create the new worker thread */ KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr)); @@ -4764,6 +4769,19 @@ } #if KMP_AFFINITY_SUPPORTED +static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th, + int first, int last, int newp) { + th->th.th_first_place = first; + th->th.th_last_place = last; + th->th.th_new_place = newp; + if (newp != th->th.th_current_place) { + if (__kmp_display_affinity && team->t.t_display_affinity != 1) + team->t.t_display_affinity = 1; + // Copy topology information associated with the new place + th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place]; + th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place]; + } +} // __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism. // It calculates the worker + primary thread's partition based upon the parent @@ -4803,13 +4821,7 @@ for (f = 1; f < n_th; f++) { kmp_info_t *th = team->t.t_threads[f]; KMP_DEBUG_ASSERT(th != NULL); - th->th.th_first_place = first_place; - th->th.th_last_place = last_place; - th->th.th_new_place = masters_place; - if (__kmp_display_affinity && masters_place != th->th.th_current_place && - team->t.t_display_affinity != 1) { - team->t.t_display_affinity = 1; - } + __kmp_set_thread_place(team, th, first_place, last_place, masters_place); KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d " "partition = [%d,%d]\n", @@ -4840,13 +4852,7 @@ } else { place++; } - th->th.th_first_place = first_place; - th->th.th_last_place = last_place; - th->th.th_new_place = place; - if (__kmp_display_affinity && place != th->th.th_current_place && - team->t.t_display_affinity != 1) { - team->t.t_display_affinity = 1; - } + __kmp_set_thread_place(team, th, first_place, last_place, place); KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d " "partition = [%d,%d]\n", @@ -4865,13 +4871,7 @@ kmp_info_t *th = team->t.t_threads[f]; KMP_DEBUG_ASSERT(th != NULL); - th->th.th_first_place = first_place; - th->th.th_last_place = last_place; - th->th.th_new_place = place; - if (__kmp_display_affinity && place != th->th.th_current_place && - team->t.t_display_affinity != 1) { - team->t.t_display_affinity = 1; - } + __kmp_set_thread_place(team, th, first_place, last_place, place); s_count++; if ((s_count == S) && rem && (gap_ct == gap)) { @@ -4938,12 +4938,7 @@ kmp_info_t *th = team->t.t_threads[f]; KMP_DEBUG_ASSERT(th != NULL); - th->th.th_first_place = place; - th->th.th_new_place = place; - if (__kmp_display_affinity && place != th->th.th_current_place && - team->t.t_display_affinity != 1) { - team->t.t_display_affinity = 1; - } + int fplace = place, nplace = place; s_count = 1; while (s_count < S) { if (place == last_place) { @@ -4966,7 +4961,7 @@ rem--; gap_ct = 0; } - th->th.th_last_place = place; + __kmp_set_thread_place(team, th, fplace, place, nplace); gap_ct++; if (place == last_place) { @@ -5032,13 +5027,7 @@ KMP_DEBUG_ASSERT(last_place >= first_place); th = team->t.t_threads[f]; KMP_DEBUG_ASSERT(th); - th->th.th_first_place = first; - th->th.th_new_place = place; - th->th.th_last_place = last; - if (__kmp_display_affinity && place != th->th.th_current_place && - team->t.t_display_affinity != 1) { - team->t.t_display_affinity = 1; - } + __kmp_set_thread_place(team, th, first, last, place); KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " "partition = [%d,%d], spacing = %.4f\n", @@ -5064,13 +5053,7 @@ kmp_info_t *th = team->t.t_threads[f]; KMP_DEBUG_ASSERT(th != NULL); - th->th.th_first_place = place; - th->th.th_last_place = place; - th->th.th_new_place = place; - if (__kmp_display_affinity && place != th->th.th_current_place && - team->t.t_display_affinity != 1) { - team->t.t_display_affinity = 1; - } + __kmp_set_thread_place(team, th, place, place, place); s_count++; if ((s_count == S) && rem && (gap_ct == gap)) { diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -486,7 +486,7 @@ #endif /* USE_ITT_BUILD */ #if KMP_AFFINITY_SUPPORTED - __kmp_affinity_set_init_mask(gtid, FALSE); + __kmp_affinity_bind_init_mask(gtid); #endif #ifdef KMP_CANCEL_THREADS diff --git a/openmp/runtime/src/z_Windows_NT_util.cpp b/openmp/runtime/src/z_Windows_NT_util.cpp --- a/openmp/runtime/src/z_Windows_NT_util.cpp +++ b/openmp/runtime/src/z_Windows_NT_util.cpp @@ -1006,7 +1006,7 @@ __kmp_itt_thread_name(gtid); #endif /* USE_ITT_BUILD */ - __kmp_affinity_set_init_mask(gtid, FALSE); + __kmp_affinity_bind_init_mask(gtid); #if KMP_ARCH_X86 || KMP_ARCH_X86_64 // Set FP control regs to be a copy of the parallel initialization thread's.