diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -299,6 +299,7 @@ * ``respect`` (default) and ``norespect`` - determine whether to respect the original process affinity mask. * ``verbose`` and ``noverbose`` (default) - determine whether to display affinity information. * ``warnings`` (default) and ``nowarnings`` - determine whether to display warnings during affinity detection. +* ``reset`` and ``noreset`` (default) - determine whether to reset primary thread's affinity after outermost parallel region(s) * ``granularity=`` - takes the following specifiers ``thread``, ``core`` (default), ``tile``, ``socket``, ``die``, ``group`` (Windows only). The granularity describes the lowest topology levels that OpenMP threads are allowed to float within a topology map. diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -841,7 +841,9 @@ extern void __kmp_affinity_bind_thread(int which); extern kmp_affin_mask_t *__kmp_affin_fullMask; +extern kmp_affin_mask_t *__kmp_affin_origMask; extern char *__kmp_cpuinfo_file; +extern bool __kmp_affin_reset; #endif /* KMP_AFFINITY_SUPPORTED */ @@ -3627,8 +3629,18 @@ r->r.r_affinity_assigned = TRUE; } } +static inline void __kmp_reset_root_init_mask(int gtid) { + kmp_info_t *th = __kmp_threads[gtid]; + kmp_root_t *r = th->th.th_root; + if (r->r.r_uber_thread == th && r->r.r_affinity_assigned) { + __kmp_set_system_affinity(__kmp_affin_origMask, FALSE); + KMP_CPU_COPY(th->th.th_affin_mask, __kmp_affin_origMask); + r->r.r_affinity_assigned = FALSE; + } +} #else /* KMP_AFFINITY_SUPPORTED */ #define __kmp_assign_root_init_mask() /* Nothing */ +static inline void __kmp_reset_root_init_mask(int gtid) {} #endif /* KMP_AFFINITY_SUPPORTED */ // No need for KMP_AFFINITY_SUPPORTED guard as only one field in the // format string is for affinity, so platforms that do not support diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -1536,6 +1536,8 @@ // internal topology object and set the layer ids for it. Each routine // returns a boolean on whether it was successful at doing so. kmp_affin_mask_t *__kmp_affin_fullMask = NULL; +// Original mask is a subset of full mask in multiple processor groups topology +kmp_affin_mask_t *__kmp_affin_origMask = NULL; #if KMP_USE_HWLOC static inline bool __kmp_hwloc_is_cache_type(hwloc_obj_t obj) { @@ -4072,8 +4074,13 @@ if (__kmp_affin_fullMask == NULL) { KMP_CPU_ALLOC(__kmp_affin_fullMask); } + if (__kmp_affin_origMask == NULL) { + KMP_CPU_ALLOC(__kmp_affin_origMask); + } if (KMP_AFFINITY_CAPABLE()) { __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE); + // Make a copy before possible expanding to the entire machine mask + __kmp_affin_origMask->copy(__kmp_affin_fullMask); if (__kmp_affinity_respect_mask) { // Count the number of available processors. unsigned i; @@ -4111,6 +4118,10 @@ __kmp_avail_proc = __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask); #if KMP_OS_WINDOWS + if (__kmp_num_proc_groups <= 1) { + // Copy expanded full mask if topology has single processor group + __kmp_affin_origMask->copy(__kmp_affin_fullMask); + } // Set the process affinity mask since threads' affinity // masks must be subset of process mask in Windows* OS __kmp_affin_fullMask->set_process_affinity(true); @@ -4283,6 +4294,13 @@ if (__kmp_affinity_verbose) __kmp_topology->print("KMP_AFFINITY"); bool filtered = __kmp_topology->filter_hw_subset(); + if (filtered) { +#if KMP_OS_WINDOWS + // Copy filtered full mask if topology has single processor group + if (__kmp_num_proc_groups <= 1) +#endif + __kmp_affin_origMask->copy(__kmp_affin_fullMask); + } if (filtered && __kmp_affinity_verbose) __kmp_topology->print("KMP_HW_SUBSET"); machine_hierarchy.init(__kmp_topology->get_num_hw_threads()); @@ -4506,6 +4524,10 @@ KMP_CPU_FREE(__kmp_affin_fullMask); __kmp_affin_fullMask = NULL; } + if (__kmp_affin_origMask != NULL) { + KMP_CPU_FREE(__kmp_affin_origMask); + __kmp_affin_origMask = NULL; + } __kmp_affinity_num_masks = 0; __kmp_affinity_type = affinity_default; __kmp_affinity_num_places = 0; diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -632,6 +632,11 @@ "team %p\n", global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); } +#if KMP_AFFINITY_SUPPORTED + if (this_thr->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + __kmp_reset_root_init_mask(global_tid); + } +#endif } else { if (__kmp_tasking_mode != tskm_immediate_exec) { KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " @@ -2021,6 +2026,11 @@ } __kmp_assign_root_init_mask(); gtid = __kmp_get_gtid(); +#if KMP_AFFINITY_SUPPORTED + if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + __kmp_reset_root_init_mask(gtid); + } +#endif __kmp_aux_display_affinity(gtid, format); } @@ -2034,6 +2044,11 @@ } __kmp_assign_root_init_mask(); gtid = __kmp_get_gtid(); +#if KMP_AFFINITY_SUPPORTED + if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + __kmp_reset_root_init_mask(gtid); + } +#endif __kmp_str_buf_init(&capture_buf); num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); if (buffer && buf_size) { diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -238,6 +238,10 @@ __kmp_middle_initialize(); } __kmp_assign_root_init_mask(); + int gtid = __kmp_get_gtid(); + if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + __kmp_reset_root_init_mask(gtid); + } return __kmp_aux_get_affinity(mask); #endif } @@ -358,9 +362,13 @@ if (!TCR_4(__kmp_init_middle)) { __kmp_middle_initialize(); } - __kmp_assign_root_init_mask(); gtid = __kmp_entry_gtid(); thread = __kmp_threads[gtid]; +#if KMP_AFFINITY_SUPPORTED + if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) { + __kmp_assign_root_init_mask(); + } +#endif // return thread -> th.th_team -> t.t_current_task[ // thread->th.th_info.ds.ds_tid ] -> icvs.nproc; return thread->th.th_current_task->td_icvs.nproc; @@ -509,6 +517,11 @@ } __kmp_assign_root_init_mask(); gtid = __kmp_get_gtid(); +#if KMP_AFFINITY_SUPPORTED + if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + __kmp_reset_root_init_mask(gtid); + } +#endif ConvertedString cformat(format, size); __kmp_aux_display_affinity(gtid, cformat.get()); #endif @@ -537,6 +550,11 @@ } __kmp_assign_root_init_mask(); gtid = __kmp_get_gtid(); +#if KMP_AFFINITY_SUPPORTED + if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + __kmp_reset_root_init_mask(gtid); + } +#endif __kmp_str_buf_init(&capture_buf); ConvertedString cformat(format, for_size); num_required = __kmp_aux_capture_affinity(gtid, cformat.get(), &capture_buf); @@ -612,7 +630,16 @@ if (!TCR_4(__kmp_init_middle)) { __kmp_middle_initialize(); } - __kmp_assign_root_init_mask(); +#if KMP_AFFINITY_SUPPORTED + if (!__kmp_affin_reset) { + // only bind root here if its affinity reset is not requested + int gtid = __kmp_entry_gtid(); + kmp_info_t *thread = __kmp_threads[gtid]; + if (thread->th.th_team->t.t_level == 0) { + __kmp_assign_root_init_mask(); + } + } +#endif return __kmp_avail_proc; #endif } @@ -802,9 +829,16 @@ if (!TCR_4(__kmp_init_middle)) { __kmp_middle_initialize(); } - __kmp_assign_root_init_mask(); if (!KMP_AFFINITY_CAPABLE()) return 0; + if (!__kmp_affin_reset) { + // only bind root here if its affinity reset is not requested + int gtid = __kmp_entry_gtid(); + kmp_info_t *thread = __kmp_threads[gtid]; + if (thread->th.th_team->t.t_level == 0) { + __kmp_assign_root_init_mask(); + } + } return __kmp_affinity_num_masks; #endif } @@ -818,9 +852,16 @@ if (!TCR_4(__kmp_init_middle)) { __kmp_middle_initialize(); } - __kmp_assign_root_init_mask(); if (!KMP_AFFINITY_CAPABLE()) return 0; + if (!__kmp_affin_reset) { + // only bind root here if its affinity reset is not requested + int gtid = __kmp_entry_gtid(); + kmp_info_t *thread = __kmp_threads[gtid]; + if (thread->th.th_team->t.t_level == 0) { + __kmp_assign_root_init_mask(); + } + } if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks) return 0; kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); @@ -844,9 +885,16 @@ if (!TCR_4(__kmp_init_middle)) { __kmp_middle_initialize(); } - __kmp_assign_root_init_mask(); if (!KMP_AFFINITY_CAPABLE()) return; + if (!__kmp_affin_reset) { + // only bind root here if its affinity reset is not requested + int gtid = __kmp_entry_gtid(); + kmp_info_t *thread = __kmp_threads[gtid]; + if (thread->th.th_team->t.t_level == 0) { + __kmp_assign_root_init_mask(); + } + } if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks) return; kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); @@ -870,11 +918,13 @@ if (!TCR_4(__kmp_init_middle)) { __kmp_middle_initialize(); } - __kmp_assign_root_init_mask(); if (!KMP_AFFINITY_CAPABLE()) return -1; gtid = __kmp_entry_gtid(); thread = __kmp_thread_from_gtid(gtid); + if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) { + __kmp_assign_root_init_mask(); + } if (thread->th.th_current_place < 0) return -1; return thread->th.th_current_place; @@ -890,11 +940,13 @@ if (!TCR_4(__kmp_init_middle)) { __kmp_middle_initialize(); } - __kmp_assign_root_init_mask(); if (!KMP_AFFINITY_CAPABLE()) return 0; gtid = __kmp_entry_gtid(); thread = __kmp_thread_from_gtid(gtid); + if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) { + __kmp_assign_root_init_mask(); + } first_place = thread->th.th_first_place; last_place = thread->th.th_last_place; if (first_place < 0 || last_place < 0) @@ -917,11 +969,13 @@ if (!TCR_4(__kmp_init_middle)) { __kmp_middle_initialize(); } - __kmp_assign_root_init_mask(); if (!KMP_AFFINITY_CAPABLE()) return; gtid = __kmp_entry_gtid(); thread = __kmp_thread_from_gtid(gtid); + if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) { + __kmp_assign_root_init_mask(); + } first_place = thread->th.th_first_place; last_place = thread->th.th_last_place; if (first_place < 0 || last_place < 0) diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -285,6 +285,7 @@ unsigned __kmp_affinity_num_masks = 0; char *__kmp_cpuinfo_file = NULL; +bool __kmp_affin_reset = 0; #endif /* KMP_AFFINITY_SUPPORTED */ diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -2641,6 +2641,11 @@ __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); +#if KMP_AFFINITY_SUPPORTED + if (master_th->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + __kmp_reset_root_init_mask(gtid); + } +#endif #if OMPT_SUPPORT int flags = OMPT_INVOKER(fork_context) | diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -2169,6 +2169,7 @@ int respect = 0; int gran = 0; int dups = 0; + int reset = 0; bool set = false; KMP_ASSERT(value != NULL); @@ -2224,6 +2225,7 @@ #define set_respect(val) _set_param(respect, *out_respect, val) #define set_dups(val) _set_param(dups, *out_dups, val) #define set_proclist(val) _set_param(proclist, *out_proclist, val) +#define set_reset(val) _set_param(reset, __kmp_affin_reset, val) #define set_gran(val, levels) \ { \ @@ -2293,6 +2295,12 @@ } else if (__kmp_match_str("norespect", buf, CCAST(const char **, &next))) { set_respect(FALSE); buf = next; + } else if (__kmp_match_str("reset", buf, CCAST(const char **, &next))) { + set_reset(TRUE); + buf = next; + } else if (__kmp_match_str("noreset", buf, CCAST(const char **, &next))) { + set_reset(FALSE); + buf = next; } else if (__kmp_match_str("duplicates", buf, CCAST(const char **, &next)) || __kmp_match_str("dups", buf, CCAST(const char **, &next))) { @@ -2433,6 +2441,7 @@ #undef set_warnings #undef set_respect #undef set_granularity +#undef set_reset __kmp_str_free(&buffer); @@ -2564,6 +2573,11 @@ } else { __kmp_str_buf_print(buffer, "%s,", "norespect"); } + if (__kmp_affin_reset) { + __kmp_str_buf_print(buffer, "%s,", "reset"); + } else { + __kmp_str_buf_print(buffer, "%s,", "noreset"); + } __kmp_str_buf_print(buffer, "granularity=%s,", __kmp_hw_get_keyword(__kmp_affinity_gran, false)); } diff --git a/openmp/runtime/test/affinity/kmp-affinity-reset.c b/openmp/runtime/test/affinity/kmp-affinity-reset.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/affinity/kmp-affinity-reset.c @@ -0,0 +1,66 @@ +// RUN: %libomp-compile -D_GNU_SOURCE +// RUN: env OMP_NUM_THREADS=2,2 KMP_AFFINITY=reset,granularity=thread,compact %libomp-run +// REQUIRES: linux + +#include +#include +#include +#include +#include "libomp_test_affinity.h" + +#define CHECK_EQUAL 0 +#define CHECK_NOT_EQUAL 1 + +void check_primary_thread_affinity(int line, affinity_mask_t *other_aff, + int type) { + #pragma omp master + { + affinity_mask_t *primary_aff = affinity_mask_alloc(); + get_thread_affinity(primary_aff); + if (type == CHECK_EQUAL && !affinity_mask_equal(primary_aff, other_aff)) { + fprintf(stderr, "error: line %d: primary affinity was not equal\n", line); + exit(EXIT_FAILURE); + } else if (type == CHECK_NOT_EQUAL && + affinity_mask_equal(primary_aff, other_aff)) { + fprintf(stderr, "error: line %d: primary affinity was equal\n", line); + exit(EXIT_FAILURE); + } + affinity_mask_free(primary_aff); + } +} + +#define CHECK_PRIMARY_THREAD_AFFINITY_EQUAL(other_aff) \ + check_primary_thread_affinity(__LINE__, other_aff, CHECK_EQUAL) +#define CHECK_PRIMARY_THREAD_AFFINITY_NOT_EQUAL(other_aff) \ + check_primary_thread_affinity(__LINE__, other_aff, CHECK_NOT_EQUAL) + +int main() { + int i; + affinity_mask_t *initial_mask = affinity_mask_alloc(); + get_thread_affinity(initial_mask); + + for (i = 0; i < 10; ++i) { + #pragma omp parallel + { + CHECK_PRIMARY_THREAD_AFFINITY_NOT_EQUAL(initial_mask); + } + CHECK_PRIMARY_THREAD_AFFINITY_EQUAL(initial_mask); + } + + omp_set_max_active_levels(2); + for (i = 0; i < 10; ++i) { + #pragma omp parallel + { + CHECK_PRIMARY_THREAD_AFFINITY_NOT_EQUAL(initial_mask); + + #pragma omp parallel + CHECK_PRIMARY_THREAD_AFFINITY_NOT_EQUAL(initial_mask); + + CHECK_PRIMARY_THREAD_AFFINITY_NOT_EQUAL(initial_mask); + } + CHECK_PRIMARY_THREAD_AFFINITY_EQUAL(initial_mask); + } + + affinity_mask_free(initial_mask); + return EXIT_SUCCESS; +}