diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -823,27 +823,49 @@ affinity_top_method_default }; -#define affinity_respect_mask_default (-1) +#define affinity_respect_mask_default (2) + +typedef struct kmp_affinity_flags_t { + unsigned dups : 1; + unsigned verbose : 1; + unsigned warnings : 1; + unsigned respect : 2; + unsigned reset : 1; + unsigned initialized : 1; + unsigned reserved : 25; +} kmp_affinity_flags_t; +KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4); + +typedef struct kmp_affinity_t { + char *proclist; + enum affinity_type type; + kmp_hw_t gran; + int gran_levels; + int compact; + int offset; + kmp_affinity_flags_t flags; + unsigned num_masks; + kmp_affin_mask_t *masks; + unsigned num_os_id_masks; + kmp_affin_mask_t *os_id_masks; + const char *env_var; +} kmp_affinity_t; + +#define KMP_AFFINITY_INIT(env) \ + { \ + nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0, \ + {TRUE, FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE}, 0, \ + nullptr, 0, nullptr, env \ + } -extern enum affinity_type __kmp_affinity_type; /* Affinity type */ -extern kmp_hw_t __kmp_affinity_gran; /* Affinity granularity */ -extern int __kmp_affinity_gran_levels; /* corresponding int value */ -extern int __kmp_affinity_dups; /* Affinity duplicate masks */ extern enum affinity_top_method __kmp_affinity_top_method; -extern int __kmp_affinity_compact; /* Affinity 'compact' value */ -extern int __kmp_affinity_offset; /* Affinity offset value */ -extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */ -extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */ -extern int __kmp_affinity_respect_mask; // Respect process' init affinity mask? -extern char *__kmp_affinity_proclist; /* proc ID list */ -extern kmp_affin_mask_t *__kmp_affinity_masks; -extern unsigned __kmp_affinity_num_masks; +extern kmp_affinity_t __kmp_affinity; + extern void __kmp_affinity_bind_thread(int which); extern kmp_affin_mask_t *__kmp_affin_fullMask; extern kmp_affin_mask_t *__kmp_affin_origMask; extern char *__kmp_cpuinfo_file; -extern bool __kmp_affin_reset; #endif /* KMP_AFFINITY_SUPPORTED */ @@ -882,7 +904,7 @@ #define KMP_AFFINITY_NON_PROC_BIND \ ((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || \ __kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) && \ - (__kmp_affinity_num_masks > 0 || __kmp_affinity_type == affinity_balanced)) + (__kmp_affinity.num_masks > 0 || __kmp_affinity.type == affinity_balanced)) #endif /* KMP_AFFINITY_SUPPORTED */ extern int __kmp_affinity_num_places; @@ -3606,7 +3628,7 @@ kmp_affin_mask_t *mask); extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf, kmp_affin_mask_t *mask); -extern void __kmp_affinity_initialize(void); +extern void __kmp_affinity_initialize(kmp_affinity_t &affinity); extern void __kmp_affinity_uninitialize(void); extern void __kmp_affinity_set_init_mask( int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */ diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h --- a/openmp/runtime/src/kmp_affinity.h +++ b/openmp/runtime/src/kmp_affinity.h @@ -128,13 +128,15 @@ if (__kmp_hwloc_topology == NULL) { if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { __kmp_hwloc_error = TRUE; - if (__kmp_affinity_verbose) + if (__kmp_affinity.flags.verbose) { KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); + } } if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { __kmp_hwloc_error = TRUE; - if (__kmp_affinity_verbose) + if (__kmp_affinity.flags.verbose) { KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); + } } } topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); @@ -792,7 +794,12 @@ void canonicalize(); void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores); - // Functions used after canonicalize() called +// Functions used after canonicalize() called + +#if KMP_AFFINITY_SUPPORTED + // Set the granularity for affinity settings + void set_granularity(kmp_affinity_t &stgs) const; +#endif bool filter_hw_subset(); bool is_close(int hwt1, int hwt2, int level) const; bool is_uniform() const { return flags.uniform; } diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -142,8 +142,9 @@ // If affinity is supported, check the affinity // verbose and warning flags before printing warning #define KMP_AFF_WARNING(...) \ - if (__kmp_affinity_verbose || \ - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { \ + if (__kmp_affinity.flags.verbose || \ + (__kmp_affinity.flags.warnings && \ + (__kmp_affinity.type != affinity_none))) { \ KMP_WARNING(__VA_ARGS__); \ } #else @@ -175,9 +176,9 @@ const kmp_hw_thread_t *aa = (const kmp_hw_thread_t *)a; const kmp_hw_thread_t *bb = (const kmp_hw_thread_t *)b; int depth = __kmp_topology->get_depth(); - KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0); - KMP_DEBUG_ASSERT(__kmp_affinity_compact <= depth); - for (i = 0; i < __kmp_affinity_compact; i++) { + KMP_DEBUG_ASSERT(__kmp_affinity.compact >= 0); + KMP_DEBUG_ASSERT(__kmp_affinity.compact <= depth); + for (i = 0; i < __kmp_affinity.compact; i++) { int j = depth - i - 1; if (aa->sub_ids[j] < bb->sub_ids[j]) return -1; @@ -185,7 +186,7 @@ return 1; } for (; i < depth; i++) { - int j = i - __kmp_affinity_compact; + int j = i - __kmp_affinity.compact; if (aa->sub_ids[j] < bb->sub_ids[j]) return -1; if (aa->sub_ids[j] > bb->sub_ids[j]) @@ -790,41 +791,12 @@ __kmp_str_buf_free(&buf); } -void kmp_topology_t::canonicalize() { -#if KMP_GROUP_AFFINITY - _insert_windows_proc_groups(); -#endif - _remove_radix1_layers(); - _gather_enumeration_information(); - _discover_uniformity(); - _set_sub_ids(); - _set_globals(); - _set_last_level_cache(); - -#if KMP_MIC_SUPPORTED - // Manually Add L2 = Tile equivalence - if (__kmp_mic_type == mic3) { - if (get_level(KMP_HW_L2) != -1) - set_equivalent_type(KMP_HW_TILE, KMP_HW_L2); - else if (get_level(KMP_HW_TILE) != -1) - set_equivalent_type(KMP_HW_L2, KMP_HW_TILE); - } -#endif - - // Perform post canonicalization checking - KMP_ASSERT(depth > 0); - for (int level = 0; level < depth; ++level) { - // All counts, ratios, and types must be valid - KMP_ASSERT(count[level] > 0 && ratio[level] > 0); - KMP_ASSERT_VALID_HW_TYPE(types[level]); - // Detected types must point to themselves - KMP_ASSERT(equivalent[types[level]] == types[level]); - } - #if KMP_AFFINITY_SUPPORTED +void kmp_topology_t::set_granularity(kmp_affinity_t &affinity) const { + const char *env_var = affinity.env_var; // Set the number of affinity granularity levels - if (__kmp_affinity_gran_levels < 0) { - kmp_hw_t gran_type = get_equivalent_type(__kmp_affinity_gran); + if (affinity.gran_levels < 0) { + kmp_hw_t gran_type = get_equivalent_type(affinity.gran); // Check if user's granularity request is valid if (gran_type == KMP_HW_UNKNOWN) { // First try core, then thread, then package @@ -837,10 +809,10 @@ } KMP_ASSERT(gran_type != KMP_HW_UNKNOWN); // Warn user what granularity setting will be used instead - KMP_AFF_WARNING(AffGranularityBad, "KMP_AFFINITY", - __kmp_hw_get_catalog_string(__kmp_affinity_gran), + KMP_AFF_WARNING(AffGranularityBad, env_var, + __kmp_hw_get_catalog_string(affinity.gran), __kmp_hw_get_catalog_string(gran_type)); - __kmp_affinity_gran = gran_type; + affinity.gran = gran_type; } #if KMP_GROUP_AFFINITY // If more than one processor group exists, and the level of @@ -855,17 +827,49 @@ int proc_group_depth = get_level(KMP_HW_PROC_GROUP); if (gran_depth >= 0 && proc_group_depth >= 0 && gran_depth < proc_group_depth) { - KMP_AFF_WARNING(AffGranTooCoarseProcGroup, "KMP_AFFINITY", - __kmp_hw_get_catalog_string(__kmp_affinity_gran)); - __kmp_affinity_gran = gran_type = KMP_HW_PROC_GROUP; + KMP_AFF_WARNING(AffGranTooCoarseProcGroup, env_var, + __kmp_hw_get_catalog_string(affinity.gran)); + affinity.gran = gran_type = KMP_HW_PROC_GROUP; } } #endif - __kmp_affinity_gran_levels = 0; + affinity.gran_levels = 0; for (int i = depth - 1; i >= 0 && get_type(i) != gran_type; --i) - __kmp_affinity_gran_levels++; + affinity.gran_levels++; + } +} +#endif + +void kmp_topology_t::canonicalize() { +#if KMP_GROUP_AFFINITY + _insert_windows_proc_groups(); +#endif + _remove_radix1_layers(); + _gather_enumeration_information(); + _discover_uniformity(); + _set_sub_ids(); + _set_globals(); + _set_last_level_cache(); + +#if KMP_MIC_SUPPORTED + // Manually Add L2 = Tile equivalence + if (__kmp_mic_type == mic3) { + if (get_level(KMP_HW_L2) != -1) + set_equivalent_type(KMP_HW_TILE, KMP_HW_L2); + else if (get_level(KMP_HW_TILE) != -1) + set_equivalent_type(KMP_HW_L2, KMP_HW_TILE); + } +#endif + + // Perform post canonicalization checking + KMP_ASSERT(depth > 0); + for (int level = 0; level < depth; ++level) { + // All counts, ratios, and types must be valid + KMP_ASSERT(count[level] > 0 && ratio[level] > 0); + KMP_ASSERT_VALID_HW_TYPE(types[level]); + // Detected types must point to themselves + KMP_ASSERT(equivalent[types[level]] == types[level]); } -#endif // KMP_AFFINITY_SUPPORTED } // Canonicalize an explicit packages X cores/pkg X threads/core topology @@ -1301,7 +1305,7 @@ // Only use Hwloc if affinity isn't explicitly disabled and // user requests Hwloc topology method if (__kmp_affinity_top_method == affinity_top_method_hwloc && - __kmp_affinity_type != affinity_disabled) { + __kmp_affinity.type != affinity_disabled) { affinity_dispatch = new KMPHwlocAffinity(); } else #endif @@ -1663,14 +1667,14 @@ hwloc_topology_t tp = __kmp_hwloc_topology; *msg_id = kmp_i18n_null; - if (__kmp_affinity_verbose) { + if (__kmp_affinity.flags.verbose) { KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); } if (!KMP_AFFINITY_CAPABLE()) { // Hack to try and infer the machine topology using only the data // available from hwloc on the current thread, and __kmp_xproc. - KMP_ASSERT(__kmp_affinity_type == affinity_none); + KMP_ASSERT(__kmp_affinity.type == affinity_none); // hwloc only guarantees existance of PU object, so check PACKAGE and CORE hwloc_obj_t o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0); if (o != NULL) @@ -1864,15 +1868,15 @@ int depth = 3; kmp_hw_t types[] = {KMP_HW_SOCKET, KMP_HW_CORE, KMP_HW_THREAD}; - if (__kmp_affinity_verbose) { + if (__kmp_affinity.flags.verbose) { KMP_INFORM(UsingFlatOS, "KMP_AFFINITY"); } - // Even if __kmp_affinity_type == affinity_none, this routine might still - // called to set __kmp_ncores, as well as + // Even if __kmp_affinity.type == affinity_none, this routine might still + // be called to set __kmp_ncores, as well as // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. if (!KMP_AFFINITY_CAPABLE()) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); + KMP_ASSERT(__kmp_affinity.type == affinity_none); __kmp_ncores = nPackages = __kmp_xproc; __kmp_nThreadsPerCore = nCoresPerPkg = 1; return true; @@ -1902,7 +1906,7 @@ hw_thread.ids[2] = 0; avail_ct++; } - if (__kmp_affinity_verbose) { + if (__kmp_affinity.flags.verbose) { KMP_INFORM(OSProcToPackage, "KMP_AFFINITY"); } return true; @@ -1919,13 +1923,13 @@ kmp_hw_t types[] = {KMP_HW_PROC_GROUP, KMP_HW_CORE, KMP_HW_THREAD}; const static size_t BITS_PER_GROUP = CHAR_BIT * sizeof(DWORD_PTR); - if (__kmp_affinity_verbose) { + if (__kmp_affinity.flags.verbose) { KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY"); } // If we aren't affinity capable, then use flat topology if (!KMP_AFFINITY_CAPABLE()) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); + KMP_ASSERT(__kmp_affinity.type == affinity_none); nPackages = __kmp_num_proc_groups; __kmp_nThreadsPerCore = 1; __kmp_ncores = __kmp_xproc; @@ -2065,7 +2069,7 @@ kmp_cpuid buf; *msg_id = kmp_i18n_null; - if (__kmp_affinity_verbose) { + if (__kmp_affinity.flags.verbose) { KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC)); } @@ -2084,7 +2088,7 @@ if (!KMP_AFFINITY_CAPABLE()) { // Hack to try and infer the machine topology using only the data // available from cpuid on the current thread, and __kmp_xproc. - KMP_ASSERT(__kmp_affinity_type == affinity_none); + KMP_ASSERT(__kmp_affinity.type == affinity_none); // Get an upper bound on the number of threads per package using cpuid(1). // On some OS/chps combinations where HT is supported by the chip but is @@ -2136,7 +2140,7 @@ // From here on, we can assume that it is safe to call // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if - // __kmp_affinity_type = affinity_none. + // __kmp_affinity.type = affinity_none. // Save the affinity mask for the current thread. kmp_affinity_raii_t previous_affinity; @@ -2521,7 +2525,7 @@ KMP_BUILD_ASSERT(sizeof(known_levels) * CHAR_BIT > KMP_HW_LAST); *msg_id = kmp_i18n_null; - if (__kmp_affinity_verbose) { + if (__kmp_affinity.flags.verbose) { KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC)); } @@ -2585,7 +2589,7 @@ if (!KMP_AFFINITY_CAPABLE()) { // Hack to try and infer the machine topology using only the data // available from cpuid on the current thread, and __kmp_xproc. - KMP_ASSERT(__kmp_affinity_type == affinity_none); + KMP_ASSERT(__kmp_affinity.type == affinity_none); for (unsigned i = 0; i < levels_index; ++i) { if (levels[i].level_type == INTEL_LEVEL_TYPE_SMT) { __kmp_nThreadsPerCore = levels[i].nitems; @@ -2624,7 +2628,7 @@ // From here on, we can assume that it is safe to call // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if - // __kmp_affinity_type = affinity_none. + // __kmp_affinity.type = affinity_none. // Save the affinity mask for the current thread. kmp_affinity_raii_t previous_affinity; @@ -2808,7 +2812,7 @@ const char *envvar = __kmp_cpuinfo_get_envvar(); *msg_id = kmp_i18n_null; - if (__kmp_affinity_verbose) { + if (__kmp_affinity.flags.verbose) { KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename); } @@ -3239,7 +3243,7 @@ // not enabled. __kmp_ncores = totals[coreIdIndex]; if (!KMP_AFFINITY_CAPABLE()) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); + KMP_ASSERT(__kmp_affinity.type == affinity_none); return true; } @@ -3339,13 +3343,14 @@ // Create and return a table of affinity masks, indexed by OS thread ID. // This routine handles OR'ing together all the affinity masks of threads // that are sufficiently close, if granularity > fine. -static kmp_affin_mask_t *__kmp_create_masks(unsigned *maxIndex, - unsigned *numUnique) { +static void __kmp_create_os_id_masks(unsigned *numUnique, + kmp_affinity_t &affinity) { // First form a table of affinity masks in order of OS thread id. int maxOsId; int i; int numAddrs = __kmp_topology->get_num_hw_threads(); int depth = __kmp_topology->get_depth(); + const char *env_var = affinity.env_var; KMP_ASSERT(numAddrs); KMP_ASSERT(depth); @@ -3358,13 +3363,13 @@ if (i == 0) break; } - kmp_affin_mask_t *osId2Mask; - KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1)); - KMP_ASSERT(__kmp_affinity_gran_levels >= 0); - if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) { - KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels); + affinity.num_os_id_masks = maxOsId + 1; + KMP_CPU_ALLOC_ARRAY(affinity.os_id_masks, affinity.num_os_id_masks); + KMP_ASSERT(affinity.gran_levels >= 0); + if (affinity.flags.verbose && (affinity.gran_levels > 0)) { + KMP_INFORM(ThreadsMigrate, env_var, affinity.gran_levels); } - if (__kmp_affinity_gran_levels >= (int)depth) { + if (affinity.gran_levels >= (int)depth) { KMP_AFF_WARNING(AffThreadsMayMigrate); } @@ -3383,17 +3388,17 @@ // If this thread is sufficiently close to the leader (within the // granularity setting), then set the bit for this os thread in the // affinity mask for this group, and go on to the next thread. - if (__kmp_topology->is_close(leader, i, __kmp_affinity_gran_levels)) { + if (__kmp_topology->is_close(leader, i, affinity.gran_levels)) { KMP_CPU_SET(__kmp_topology->at(i).os_id, sum); continue; } // For every thread in this group, copy the mask to the thread's entry in - // the osId2Mask table. Mark the first address as a leader. + // the OS Id mask table. Mark the first address as a leader. for (; j < i; j++) { int osId = __kmp_topology->at(j).os_id; KMP_DEBUG_ASSERT(osId <= maxOsId); - kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId); + kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId); KMP_CPU_COPY(mask, sum); __kmp_topology->at(j).leader = (j == leader); } @@ -3406,20 +3411,18 @@ } // For every thread in last group, copy the mask to the thread's - // entry in the osId2Mask table. + // entry in the OS Id mask table. for (; j < i; j++) { int osId = __kmp_topology->at(j).os_id; KMP_DEBUG_ASSERT(osId <= maxOsId); - kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId); + kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId); KMP_CPU_COPY(mask, sum); __kmp_topology->at(j).leader = (j == leader); } unique++; KMP_CPU_FREE_FROM_STACK(sum); - *maxIndex = maxOsId; *numUnique = unique; - return osId2Mask; } // Stuff for the affinity proclist parsers. It's easier to declare these vars @@ -3460,12 +3463,13 @@ // Re-parse the proclist (for the explicit affinity type), and form the list // of affinity newMasks indexed by gtid. -static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks, - unsigned int *out_numMasks, - const char *proclist, - kmp_affin_mask_t *osId2Mask, - int maxOsId) { +static void __kmp_affinity_process_proclist(kmp_affinity_t &affinity) { int i; + kmp_affin_mask_t **out_masks = &affinity.masks; + unsigned *out_numMasks = &affinity.num_masks; + const char *proclist = affinity.proclist; + kmp_affin_mask_t *osId2Mask = affinity.os_id_masks; + int maxOsId = affinity.num_os_id_masks - 1; const char *scan = proclist; const char *next = proclist; @@ -3670,10 +3674,11 @@ signed := - signed -----------------------------------------------------------------------------*/ static void __kmp_process_subplace_list(const char **scan, - kmp_affin_mask_t *osId2Mask, - int maxOsId, kmp_affin_mask_t *tempMask, + kmp_affinity_t &affinity, int maxOsId, + kmp_affin_mask_t *tempMask, int *setSize) { const char *next; + kmp_affin_mask_t *osId2Mask = affinity.os_id_masks; for (;;) { int start, count, stride, i; @@ -3787,21 +3792,22 @@ } } -static void __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask, +static void __kmp_process_place(const char **scan, kmp_affinity_t &affinity, int maxOsId, kmp_affin_mask_t *tempMask, int *setSize) { const char *next; + kmp_affin_mask_t *osId2Mask = affinity.os_id_masks; // valid follow sets are '{' '!' and num SKIP_WS(*scan); if (**scan == '{') { (*scan)++; // skip '{' - __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize); + __kmp_process_subplace_list(scan, affinity, maxOsId, tempMask, setSize); KMP_ASSERT2(**scan == '}', "bad explicit places list"); (*scan)++; // skip '}' } else if (**scan == '!') { (*scan)++; // skip '!' - __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize); + __kmp_process_place(scan, affinity, maxOsId, tempMask, setSize); KMP_CPU_COMPLEMENT(maxOsId, tempMask); } else if ((**scan >= '0') && (**scan <= '9')) { next = *scan; @@ -3822,12 +3828,13 @@ } // static void -void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, - unsigned int *out_numMasks, - const char *placelist, - kmp_affin_mask_t *osId2Mask, - int maxOsId) { +void __kmp_affinity_process_placelist(kmp_affinity_t &affinity) { int i, j, count, stride, sign; + kmp_affin_mask_t **out_masks = &affinity.masks; + unsigned *out_numMasks = &affinity.num_masks; + const char *placelist = affinity.proclist; + kmp_affin_mask_t *osId2Mask = affinity.os_id_masks; + int maxOsId = affinity.num_os_id_masks - 1; const char *scan = placelist; const char *next = placelist; @@ -3847,7 +3854,7 @@ int setSize = 0; for (;;) { - __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize); + __kmp_process_place(&scan, affinity, maxOsId, tempMask, &setSize); // valid follow sets are ',' ':' and EOL SKIP_WS(scan); @@ -4029,25 +4036,27 @@ // Create a one element mask array (set of places) which only contains the // initial process's affinity mask -static void __kmp_create_affinity_none_places() { +static void __kmp_create_affinity_none_places(kmp_affinity_t &affinity) { KMP_ASSERT(__kmp_affin_fullMask != NULL); - KMP_ASSERT(__kmp_affinity_type == affinity_none); - __kmp_affinity_num_masks = 1; - KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks); - kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, 0); + KMP_ASSERT(affinity.type == affinity_none); + affinity.num_masks = 1; + KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks); + kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, 0); KMP_CPU_COPY(dest, __kmp_affin_fullMask); } -static void __kmp_aux_affinity_initialize(void) { - if (__kmp_affinity_masks != NULL) { - KMP_ASSERT(__kmp_affin_fullMask != NULL); - return; - } - +static void __kmp_aux_affinity_initialize_masks(kmp_affinity_t &affinity) { // Create the "full" mask - this defines all of the processors that we // consider to be in the machine model. If respect is set, then it is the // initialization thread's affinity mask. Otherwise, it is all processors that // we know about on the machine. + int verbose = affinity.flags.verbose; + const char *env_var = affinity.env_var; + + // Already initialized + if (__kmp_affin_fullMask && __kmp_affin_origMask) + return; + if (__kmp_affin_fullMask == NULL) { KMP_CPU_ALLOC(__kmp_affin_fullMask); } @@ -4058,7 +4067,7 @@ __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE); // Make a copy before possible expanding to the entire machine mask __kmp_affin_origMask->copy(__kmp_affin_fullMask); - if (__kmp_affinity_respect_mask) { + if (affinity.flags.respect) { // Count the number of available processors. unsigned i; __kmp_avail_proc = 0; @@ -4070,23 +4079,23 @@ } if (__kmp_avail_proc > __kmp_xproc) { KMP_AFF_WARNING(ErrorInitializeAffinity); - __kmp_affinity_type = affinity_none; + affinity.type = affinity_none; KMP_AFFINITY_DISABLE(); return; } - if (__kmp_affinity_verbose) { + if (verbose) { char buf[KMP_AFFIN_MASK_PRINT_LEN]; __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask); - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); + KMP_INFORM(InitOSProcSetRespect, env_var, buf); } } else { - if (__kmp_affinity_verbose) { + if (verbose) { char buf[KMP_AFFIN_MASK_PRINT_LEN]; __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask); - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); + KMP_INFORM(InitOSProcSetNotRespect, env_var, buf); } __kmp_avail_proc = __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask); @@ -4101,8 +4110,13 @@ #endif } } +} +static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) { + bool success = false; + const char *env_var = affinity.env_var; kmp_i18n_id_t msg_id = kmp_i18n_null; + int verbose = affinity.flags.verbose; // For backward compatibility, setting KMP_CPUINFO_FILE => // KMP_TOPOLOGY_METHOD=cpuinfo @@ -4111,7 +4125,6 @@ __kmp_affinity_top_method = affinity_top_method_cpuinfo; } - bool success = false; if (__kmp_affinity_top_method == affinity_top_method_all) { // In the default code path, errors are not fatal - we just try using // another method. We only emit a warning message if affinity is on, or the @@ -4121,11 +4134,11 @@ __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) { if (!__kmp_hwloc_error) { success = __kmp_affinity_create_hwloc_map(&msg_id); - if (!success && __kmp_affinity_verbose) { - KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY"); + if (!success && verbose) { + KMP_INFORM(AffIgnoringHwloc, env_var); } - } else if (__kmp_affinity_verbose) { - KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY"); + } else if (verbose) { + KMP_INFORM(AffIgnoringHwloc, env_var); } } #endif @@ -4133,14 +4146,14 @@ #if KMP_ARCH_X86 || KMP_ARCH_X86_64 if (!success) { success = __kmp_affinity_create_x2apicid_map(&msg_id); - if (!success && __kmp_affinity_verbose && msg_id != kmp_i18n_null) { - KMP_INFORM(AffInfoStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id)); + if (!success && verbose && msg_id != kmp_i18n_null) { + KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id)); } } if (!success) { success = __kmp_affinity_create_apicid_map(&msg_id); - if (!success && __kmp_affinity_verbose && msg_id != kmp_i18n_null) { - KMP_INFORM(AffInfoStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id)); + if (!success && verbose && msg_id != kmp_i18n_null) { + KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id)); } } #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ @@ -4149,8 +4162,8 @@ if (!success) { int line = 0; success = __kmp_affinity_create_cpuinfo_map(&line, &msg_id); - if (!success && __kmp_affinity_verbose && msg_id != kmp_i18n_null) { - KMP_INFORM(AffInfoStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id)); + if (!success && verbose && msg_id != kmp_i18n_null) { + KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id)); } } #endif /* KMP_OS_LINUX */ @@ -4158,16 +4171,16 @@ #if KMP_GROUP_AFFINITY if (!success && (__kmp_num_proc_groups > 1)) { success = __kmp_affinity_create_proc_group_map(&msg_id); - if (!success && __kmp_affinity_verbose && msg_id != kmp_i18n_null) { - KMP_INFORM(AffInfoStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id)); + if (!success && verbose && msg_id != kmp_i18n_null) { + KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id)); } } #endif /* KMP_GROUP_AFFINITY */ if (!success) { success = __kmp_affinity_create_flat_map(&msg_id); - if (!success && __kmp_affinity_verbose && msg_id != kmp_i18n_null) { - KMP_INFORM(AffInfoStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id)); + if (!success && verbose && msg_id != kmp_i18n_null) { + KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id)); } KMP_ASSERT(success); } @@ -4246,24 +4259,17 @@ __kmp_topology = kmp_topology_t::allocate(0, 0, NULL); __kmp_topology->canonicalize(nPackages, nCoresPerPkg, __kmp_nThreadsPerCore, __kmp_ncores); - if (__kmp_affinity_verbose) { - __kmp_topology->print("KMP_AFFINITY"); + if (verbose) { + __kmp_topology->print(env_var); } } - __kmp_affinity_type = affinity_none; - __kmp_create_affinity_none_places(); -#if KMP_USE_HIER_SCHED - __kmp_dispatch_set_hierarchy_values(); -#endif - KMP_AFFINITY_DISABLE(); - return; + return false; } - // Canonicalize, print (if requested), apply KMP_HW_SUBSET, and - // initialize other data structures which depend on the topology + // Canonicalize, print (if requested), apply KMP_HW_SUBSET __kmp_topology->canonicalize(); - if (__kmp_affinity_verbose) - __kmp_topology->print("KMP_AFFINITY"); + if (verbose) + __kmp_topology->print(env_var); bool filtered = __kmp_topology->filter_hw_subset(); if (filtered) { #if KMP_OS_WINDOWS @@ -4272,97 +4278,122 @@ #endif __kmp_affin_origMask->copy(__kmp_affin_fullMask); } - if (filtered && __kmp_affinity_verbose) + if (filtered && verbose) __kmp_topology->print("KMP_HW_SUBSET"); - machine_hierarchy.init(__kmp_topology->get_num_hw_threads()); - KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads()); + return success; +} + +static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) { + bool is_regular_affinity = (&affinity == &__kmp_affinity); + const char *env_var = affinity.env_var; + + if (affinity.flags.initialized) { + KMP_ASSERT(__kmp_affin_fullMask != NULL); + return; + } + + if (is_regular_affinity && (!__kmp_affin_fullMask || !__kmp_affin_origMask)) + __kmp_aux_affinity_initialize_masks(affinity); + + if (is_regular_affinity && !__kmp_topology) { + bool success = __kmp_aux_affinity_initialize_topology(affinity); + if (success) { + // Initialize other data structures which depend on the topology + machine_hierarchy.init(__kmp_topology->get_num_hw_threads()); + KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads()); + } else { + affinity.type = affinity_none; + KMP_AFFINITY_DISABLE(); + } + } + // If KMP_AFFINITY=none, then only create the single "none" place // which is the process's initial affinity mask or the number of // hardware threads depending on respect,norespect - if (__kmp_affinity_type == affinity_none) { - __kmp_create_affinity_none_places(); + if (affinity.type == affinity_none) { + __kmp_create_affinity_none_places(affinity); #if KMP_USE_HIER_SCHED __kmp_dispatch_set_hierarchy_values(); #endif + affinity.flags.initialized = TRUE; return; } + + __kmp_topology->set_granularity(affinity); int depth = __kmp_topology->get_depth(); // Create the table of masks, indexed by thread Id. - unsigned maxIndex; unsigned numUnique; - kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique); - if (__kmp_affinity_gran_levels == 0) { + __kmp_create_os_id_masks(&numUnique, affinity); + if (affinity.gran_levels == 0) { KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc); } - switch (__kmp_affinity_type) { + switch (affinity.type) { case affinity_explicit: - KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL); + KMP_DEBUG_ASSERT(affinity.proclist != NULL); if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) { - __kmp_affinity_process_proclist( - &__kmp_affinity_masks, &__kmp_affinity_num_masks, - __kmp_affinity_proclist, osId2Mask, maxIndex); + __kmp_affinity_process_proclist(affinity); } else { - __kmp_affinity_process_placelist( - &__kmp_affinity_masks, &__kmp_affinity_num_masks, - __kmp_affinity_proclist, osId2Mask, maxIndex); + __kmp_affinity_process_placelist(affinity); } - if (__kmp_affinity_num_masks == 0) { + if (affinity.num_masks == 0) { KMP_AFF_WARNING(AffNoValidProcID); - __kmp_affinity_type = affinity_none; - __kmp_create_affinity_none_places(); + affinity.type = affinity_none; + __kmp_create_affinity_none_places(affinity); + affinity.flags.initialized = TRUE; return; } break; // The other affinity types rely on sorting the hardware threads according to - // some permutation of the machine topology tree. Set __kmp_affinity_compact - // and __kmp_affinity_offset appropriately, then jump to a common code + // some permutation of the machine topology tree. Set affinity.compact + // and affinity.offset appropriately, then jump to a common code // fragment to do the sort and create the array of affinity masks. case affinity_logical: - __kmp_affinity_compact = 0; - if (__kmp_affinity_offset) { - __kmp_affinity_offset = - __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc; + affinity.compact = 0; + if (affinity.offset) { + affinity.offset = + __kmp_nThreadsPerCore * affinity.offset % __kmp_avail_proc; } goto sortTopology; case affinity_physical: if (__kmp_nThreadsPerCore > 1) { - __kmp_affinity_compact = 1; - if (__kmp_affinity_compact >= depth) { - __kmp_affinity_compact = 0; + affinity.compact = 1; + if (affinity.compact >= depth) { + affinity.compact = 0; } } else { - __kmp_affinity_compact = 0; + affinity.compact = 0; } - if (__kmp_affinity_offset) { - __kmp_affinity_offset = - __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc; + if (affinity.offset) { + affinity.offset = + __kmp_nThreadsPerCore * affinity.offset % __kmp_avail_proc; } goto sortTopology; case affinity_scatter: - if (__kmp_affinity_compact >= depth) { - __kmp_affinity_compact = 0; + if (affinity.compact >= depth) { + affinity.compact = 0; } else { - __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact; + affinity.compact = depth - 1 - affinity.compact; } goto sortTopology; case affinity_compact: - if (__kmp_affinity_compact >= depth) { - __kmp_affinity_compact = depth - 1; + if (affinity.compact >= depth) { + affinity.compact = depth - 1; } goto sortTopology; case affinity_balanced: if (depth <= 1) { - KMP_AFF_WARNING(AffBalancedNotAvail, "KMP_AFFINITY"); - __kmp_affinity_type = affinity_none; - __kmp_create_affinity_none_places(); + KMP_AFF_WARNING(AffBalancedNotAvail, env_var); + affinity.type = affinity_none; + __kmp_create_affinity_none_places(affinity); + affinity.flags.initialized = TRUE; return; } else if (!__kmp_topology->is_uniform()) { // Save the depth for further usage @@ -4377,8 +4408,9 @@ int nproc = ncores * maxprocpercore; if ((nproc < 2) || (nproc < __kmp_avail_proc)) { - KMP_AFF_WARNING(AffBalancedNotAvail, "KMP_AFFINITY"); - __kmp_affinity_type = affinity_none; + KMP_AFF_WARNING(AffBalancedNotAvail, env_var); + affinity.type = affinity_none; + affinity.flags.initialized = TRUE; return; } @@ -4403,48 +4435,48 @@ procarr[core * maxprocpercore + inlastcore] = proc; } } - if (__kmp_affinity_compact >= depth) { - __kmp_affinity_compact = depth - 1; + if (affinity.compact >= depth) { + affinity.compact = depth - 1; } sortTopology: // Allocate the gtid->affinity mask table. - if (__kmp_affinity_dups) { - __kmp_affinity_num_masks = __kmp_avail_proc; + if (affinity.flags.dups) { + affinity.num_masks = __kmp_avail_proc; } else { - __kmp_affinity_num_masks = numUnique; + affinity.num_masks = numUnique; } if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) && (__kmp_affinity_num_places > 0) && - ((unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) { - __kmp_affinity_num_masks = __kmp_affinity_num_places; + ((unsigned)__kmp_affinity_num_places < affinity.num_masks)) { + affinity.num_masks = __kmp_affinity_num_places; } - KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks); + KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks); // Sort the topology table according to the current setting of - // __kmp_affinity_compact, then fill out __kmp_affinity_masks. + // affinity.compact, then fill out affinity.masks. __kmp_topology->sort_compact(); { int i; unsigned j; int num_hw_threads = __kmp_topology->get_num_hw_threads(); for (i = 0, j = 0; i < num_hw_threads; i++) { - if ((!__kmp_affinity_dups) && (!__kmp_topology->at(i).leader)) { + if ((!affinity.flags.dups) && (!__kmp_topology->at(i).leader)) { continue; } int osId = __kmp_topology->at(i).os_id; - kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId); - kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j); + kmp_affin_mask_t *src = KMP_CPU_INDEX(affinity.os_id_masks, osId); + kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, j); KMP_ASSERT(KMP_CPU_ISSET(osId, src)); KMP_CPU_COPY(dest, src); - if (++j >= __kmp_affinity_num_masks) { + if (++j >= affinity.num_masks) { break; } } - KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks); + KMP_DEBUG_ASSERT(j == affinity.num_masks); } // Sort the topology back using ids __kmp_topology->sort_ids(); @@ -4453,52 +4485,44 @@ default: KMP_ASSERT2(0, "Unexpected affinity setting"); } - - KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1); + affinity.flags.initialized = TRUE; } -void __kmp_affinity_initialize(void) { +void __kmp_affinity_initialize(kmp_affinity_t &affinity) { // Much of the code above was written assuming that if a machine was not - // affinity capable, then __kmp_affinity_type == affinity_none. We now - // explicitly represent this as __kmp_affinity_type == affinity_disabled. - // There are too many checks for __kmp_affinity_type == affinity_none - // in this code. Instead of trying to change them all, check if - // __kmp_affinity_type == affinity_disabled, and if so, slam it with - // affinity_none, call the real initialization routine, then restore - // __kmp_affinity_type to affinity_disabled. - int disabled = (__kmp_affinity_type == affinity_disabled); - if (!KMP_AFFINITY_CAPABLE()) { + // affinity capable, then affinity type == affinity_none. + // We now explicitly represent this as affinity type == affinity_disabled. + // There are too many checks for affinity type == affinity_none in this code. + // Instead of trying to change them all, check if + // affinity type == affinity_disabled, and if so, slam it with affinity_none, + // call the real initialization routine, then restore affinity type to + // affinity_disabled. + int disabled = (affinity.type == affinity_disabled); + if (!KMP_AFFINITY_CAPABLE()) KMP_ASSERT(disabled); - } - if (disabled) { - __kmp_affinity_type = affinity_none; - } - __kmp_aux_affinity_initialize(); - if (disabled) { - __kmp_affinity_type = affinity_disabled; - } + if (disabled) + affinity.type = affinity_none; + __kmp_aux_affinity_initialize(affinity); + if (disabled) + affinity.type = affinity_disabled; } void __kmp_affinity_uninitialize(void) { - if (__kmp_affinity_masks != NULL) { - KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks); - __kmp_affinity_masks = NULL; - } - if (__kmp_affin_fullMask != NULL) { - KMP_CPU_FREE(__kmp_affin_fullMask); - __kmp_affin_fullMask = NULL; + { + kmp_affinity_t *affinity = &__kmp_affinity; + if (affinity->masks != NULL) + KMP_CPU_FREE_ARRAY(affinity->masks, affinity->num_masks); + if (affinity->os_id_masks != NULL) + KMP_CPU_FREE_ARRAY(affinity->os_id_masks, affinity->num_os_id_masks); + if (affinity->proclist != NULL) + __kmp_free(affinity->proclist); + *affinity = KMP_AFFINITY_INIT(affinity->env_var); } if (__kmp_affin_origMask != NULL) { KMP_CPU_FREE(__kmp_affin_origMask); __kmp_affin_origMask = NULL; } - __kmp_affinity_num_masks = 0; - __kmp_affinity_type = affinity_default; __kmp_affinity_num_places = 0; - if (__kmp_affinity_proclist != NULL) { - __kmp_free(__kmp_affinity_proclist); - __kmp_affinity_proclist = NULL; - } if (procarr != NULL) { __kmp_free(procarr); procarr = NULL; @@ -4533,15 +4557,18 @@ } // Copy the thread mask to the kmp_info_t structure. If - // __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one that - // has all of the OS proc ids set, or if __kmp_affinity_respect_mask is set, - // then the full mask is the same as the mask of the initialization thread. + // __kmp_affinity.type == affinity_none, copy the "full" mask, i.e. + // one that has all of the OS proc ids set, or if + // __kmp_affinity.flags.respect is set, then the full mask is the + // same as the mask of the initialization thread. kmp_affin_mask_t *mask; int i; + const kmp_affinity_t *affinity = &__kmp_affinity; + const char *env_var = affinity->env_var; if (KMP_AFFINITY_NON_PROC_BIND) { - if ((__kmp_affinity_type == affinity_none) || - (__kmp_affinity_type == affinity_balanced) || + if ((affinity->type == affinity_none) || + (affinity->type == affinity_balanced) || KMP_HIDDEN_HELPER_THREAD(gtid)) { #if KMP_GROUP_AFFINITY if (__kmp_num_proc_groups > 1) { @@ -4553,9 +4580,9 @@ mask = __kmp_affin_fullMask; } else { int mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid); - KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0); - i = (mask_idx + __kmp_affinity_offset) % __kmp_affinity_num_masks; - mask = KMP_CPU_INDEX(__kmp_affinity_masks, i); + KMP_DEBUG_ASSERT(affinity->num_masks > 0); + i = (mask_idx + affinity->offset) % affinity->num_masks; + mask = KMP_CPU_INDEX(affinity->masks, i); } } else { if ((!isa_root) || KMP_HIDDEN_HELPER_THREAD(gtid) || @@ -4572,9 +4599,9 @@ // int i = some hash function or just a counter that doesn't // always start at 0. Use adjusted gtid for now. int mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid); - KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0); - i = (mask_idx + __kmp_affinity_offset) % __kmp_affinity_num_masks; - mask = KMP_CPU_INDEX(__kmp_affinity_masks, i); + KMP_DEBUG_ASSERT(affinity->num_masks > 0); + i = (mask_idx + affinity->offset) % affinity->num_masks; + mask = KMP_CPU_INDEX(affinity->masks, i); } } @@ -4582,12 +4609,12 @@ if (isa_root || KMP_HIDDEN_HELPER_THREAD(gtid)) { th->th.th_new_place = i; th->th.th_first_place = 0; - th->th.th_last_place = __kmp_affinity_num_masks - 1; + th->th.th_last_place = affinity->num_masks - 1; } else if (KMP_AFFINITY_NON_PROC_BIND) { // When using a Non-OMP_PROC_BIND affinity method, // set all threads' place-partition-var to the entire place list th->th.th_first_place = 0; - th->th.th_last_place = __kmp_affinity_num_masks - 1; + th->th.th_last_place = affinity->num_masks - 1; } if (i == KMP_PLACE_ALL) { @@ -4600,20 +4627,20 @@ KMP_CPU_COPY(th->th.th_affin_mask, mask); - if (__kmp_affinity_verbose && !KMP_HIDDEN_HELPER_THREAD(gtid) + if (affinity->flags.verbose && !KMP_HIDDEN_HELPER_THREAD(gtid) /* to avoid duplicate printing (will be correctly printed on barrier) */ - && (__kmp_affinity_type == affinity_none || - (i != KMP_PLACE_ALL && __kmp_affinity_type != affinity_balanced))) { + && (affinity->type == affinity_none || + (i != KMP_PLACE_ALL && affinity->type != affinity_balanced))) { char buf[KMP_AFFIN_MASK_PRINT_LEN]; __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, th->th.th_affin_mask); - KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), - __kmp_gettid(), gtid, buf); + KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(), + gtid, buf); } #if KMP_DEBUG // Hidden helper thread affinity only printed for debug builds - if (__kmp_affinity_verbose && KMP_HIDDEN_HELPER_THREAD(gtid)) { + if (affinity->flags.verbose && KMP_HIDDEN_HELPER_THREAD(gtid)) { char buf[KMP_AFFIN_MASK_PRINT_LEN]; __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, th->th.th_affin_mask); @@ -4626,7 +4653,7 @@ // On Windows* OS, the process affinity mask might have changed. If the user // didn't request affinity and this call fails, just continue silently. // See CQ171393. - if (__kmp_affinity_type == affinity_none) { + if (affinity->type == affinity_none) { __kmp_set_system_affinity(th->th.th_affin_mask, FALSE); } else #endif @@ -4647,7 +4674,7 @@ // Check that the new place is within this thread's partition. KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); KMP_ASSERT(th->th.th_new_place >= 0); - KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks); + KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity.num_masks); if (th->th.th_first_place <= th->th.th_last_place) { KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) && (th->th.th_new_place <= th->th.th_last_place)); @@ -4659,11 +4686,11 @@ // Copy the thread mask to the kmp_info_t structure, // and set this thread's affinity. kmp_affin_mask_t *mask = - KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place); + KMP_CPU_INDEX(__kmp_affinity.masks, th->th.th_new_place); KMP_CPU_COPY(th->th.th_affin_mask, mask); th->th.th_current_place = th->th.th_new_place; - if (__kmp_affinity_verbose) { + if (__kmp_affinity.flags.verbose) { char buf[KMP_AFFIN_MASK_PRINT_LEN]; __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, th->th.th_affin_mask); @@ -4731,7 +4758,7 @@ th->th.th_current_place = KMP_PLACE_UNDEFINED; th->th.th_new_place = KMP_PLACE_UNDEFINED; th->th.th_first_place = 0; - th->th.th_last_place = __kmp_affinity_num_masks - 1; + th->th.th_last_place = __kmp_affinity.num_masks - 1; // Turn off 4.0 affinity for the current tread at this parallel level. th->th.th_current_task->td_icvs.proc_bind = proc_bind_false; @@ -4911,12 +4938,13 @@ KMP_DEBUG_ASSERT(th); bool fine_gran = true; int tid = th->th.th_info.ds.ds_tid; + const char *env_var = "KMP_AFFINITY"; // Do not perform balanced affinity for the hidden helper threads if (KMP_HIDDEN_HELPER_THREAD(__kmp_gtid_from_thread(th))) return; - switch (__kmp_affinity_gran) { + switch (__kmp_affinity.gran) { case KMP_HW_THREAD: break; case KMP_HW_CORE: @@ -4974,11 +5002,11 @@ KMP_CPU_SET(osID, mask); } } - if (__kmp_affinity_verbose) { + if (__kmp_affinity.flags.verbose) { char buf[KMP_AFFIN_MASK_PRINT_LEN]; __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask); - KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), - __kmp_gettid(), tid, buf); + KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(), + tid, buf); } __kmp_set_system_affinity(mask, TRUE); } else { // Non-uniform topology @@ -5140,11 +5168,11 @@ __kmp_free(newarr); } - if (__kmp_affinity_verbose) { + if (__kmp_affinity.flags.verbose) { char buf[KMP_AFFIN_MASK_PRINT_LEN]; __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask); - KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), - __kmp_gettid(), tid, buf); + KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(), + tid, buf); } __kmp_set_system_affinity(mask, TRUE); } diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -2582,7 +2582,7 @@ kmp_proc_bind_t proc_bind = team->t.t_proc_bind; if (proc_bind == proc_bind_intel) { // Call dynamic affinity settings - if (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) { + if (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed) { __kmp_balanced_affinity(this_thr, team->t.t_nproc); } } else if (proc_bind != proc_bind_false) { @@ -2599,7 +2599,7 @@ if (__kmp_display_affinity) { if (team->t.t_display_affinity #if KMP_AFFINITY_SUPPORTED - || (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) + || (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed) #endif ) { // NULL means use the affinity-format-var ICV diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -633,7 +633,7 @@ global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); } #if KMP_AFFINITY_SUPPORTED - if (this_thr->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) { __kmp_reset_root_init_mask(global_tid); } #endif @@ -1989,7 +1989,8 @@ __kmp_assign_root_init_mask(); gtid = __kmp_get_gtid(); #if KMP_AFFINITY_SUPPORTED - if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && + __kmp_affinity.flags.reset) { __kmp_reset_root_init_mask(gtid); } #endif @@ -2007,7 +2008,8 @@ __kmp_assign_root_init_mask(); gtid = __kmp_get_gtid(); #if KMP_AFFINITY_SUPPORTED - if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && + __kmp_affinity.flags.reset) { __kmp_reset_root_init_mask(gtid); } #endif diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -239,7 +239,8 @@ } __kmp_assign_root_init_mask(); int gtid = __kmp_get_gtid(); - if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && + __kmp_affinity.flags.reset) { __kmp_reset_root_init_mask(gtid); } return __kmp_aux_get_affinity(mask); @@ -365,7 +366,7 @@ gtid = __kmp_entry_gtid(); thread = __kmp_threads[gtid]; #if KMP_AFFINITY_SUPPORTED - if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) { + if (thread->th.th_team->t.t_level == 0 && !__kmp_affinity.flags.reset) { __kmp_assign_root_init_mask(); } #endif @@ -518,7 +519,8 @@ __kmp_assign_root_init_mask(); gtid = __kmp_get_gtid(); #if KMP_AFFINITY_SUPPORTED - if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && + __kmp_affinity.flags.reset) { __kmp_reset_root_init_mask(gtid); } #endif @@ -551,7 +553,8 @@ __kmp_assign_root_init_mask(); gtid = __kmp_get_gtid(); #if KMP_AFFINITY_SUPPORTED - if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && + __kmp_affinity.flags.reset) { __kmp_reset_root_init_mask(gtid); } #endif @@ -631,7 +634,7 @@ __kmp_middle_initialize(); } #if KMP_AFFINITY_SUPPORTED - if (!__kmp_affin_reset) { + if (!__kmp_affinity.flags.reset) { // only bind root here if its affinity reset is not requested int gtid = __kmp_entry_gtid(); kmp_info_t *thread = __kmp_threads[gtid]; @@ -831,7 +834,7 @@ } if (!KMP_AFFINITY_CAPABLE()) return 0; - if (!__kmp_affin_reset) { + if (!__kmp_affinity.flags.reset) { // only bind root here if its affinity reset is not requested int gtid = __kmp_entry_gtid(); kmp_info_t *thread = __kmp_threads[gtid]; @@ -839,7 +842,7 @@ __kmp_assign_root_init_mask(); } } - return __kmp_affinity_num_masks; + return __kmp_affinity.num_masks; #endif } @@ -854,7 +857,7 @@ } if (!KMP_AFFINITY_CAPABLE()) return 0; - if (!__kmp_affin_reset) { + if (!__kmp_affinity.flags.reset) { // only bind root here if its affinity reset is not requested int gtid = __kmp_entry_gtid(); kmp_info_t *thread = __kmp_threads[gtid]; @@ -862,9 +865,9 @@ __kmp_assign_root_init_mask(); } } - if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks) + if (place_num < 0 || place_num >= (int)__kmp_affinity.num_masks) return 0; - kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); + kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity.masks, place_num); KMP_CPU_SET_ITERATE(i, mask) { if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) || (!KMP_CPU_ISSET(i, mask))) { @@ -887,7 +890,7 @@ } if (!KMP_AFFINITY_CAPABLE()) return; - if (!__kmp_affin_reset) { + if (!__kmp_affinity.flags.reset) { // only bind root here if its affinity reset is not requested int gtid = __kmp_entry_gtid(); kmp_info_t *thread = __kmp_threads[gtid]; @@ -895,9 +898,9 @@ __kmp_assign_root_init_mask(); } } - if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks) + if (place_num < 0 || place_num >= (int)__kmp_affinity.num_masks) return; - kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); + kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity.masks, place_num); j = 0; KMP_CPU_SET_ITERATE(i, mask) { if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) || @@ -922,7 +925,7 @@ return -1; gtid = __kmp_entry_gtid(); thread = __kmp_thread_from_gtid(gtid); - if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) { + if (thread->th.th_team->t.t_level == 0 && !__kmp_affinity.flags.reset) { __kmp_assign_root_init_mask(); } if (thread->th.th_current_place < 0) @@ -944,7 +947,7 @@ return 0; gtid = __kmp_entry_gtid(); thread = __kmp_thread_from_gtid(gtid); - if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) { + if (thread->th.th_team->t.t_level == 0 && !__kmp_affinity.flags.reset) { __kmp_assign_root_init_mask(); } first_place = thread->th.th_first_place; @@ -954,7 +957,7 @@ if (first_place <= last_place) num_places = last_place - first_place + 1; else - num_places = __kmp_affinity_num_masks - first_place + last_place + 1; + num_places = __kmp_affinity.num_masks - first_place + last_place + 1; return num_places; #endif } @@ -973,7 +976,7 @@ return; gtid = __kmp_entry_gtid(); thread = __kmp_thread_from_gtid(gtid); - if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) { + if (thread->th.th_team->t.t_level == 0 && !__kmp_affinity.flags.reset) { __kmp_assign_root_init_mask(); } first_place = thread->th.th_first_place; diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -269,23 +269,13 @@ #endif /* KMP_OS_WINDOWS */ size_t __kmp_affin_mask_size = 0; -enum affinity_type __kmp_affinity_type = affinity_default; -kmp_hw_t __kmp_affinity_gran = KMP_HW_UNKNOWN; -int __kmp_affinity_gran_levels = -1; -int __kmp_affinity_dups = TRUE; enum affinity_top_method __kmp_affinity_top_method = affinity_top_method_default; -int __kmp_affinity_compact = 0; -int __kmp_affinity_offset = 0; -int __kmp_affinity_verbose = FALSE; -int __kmp_affinity_warnings = TRUE; -int __kmp_affinity_respect_mask = affinity_respect_mask_default; -char *__kmp_affinity_proclist = NULL; -kmp_affin_mask_t *__kmp_affinity_masks = NULL; -unsigned __kmp_affinity_num_masks = 0; + +// Regular thread affinity settings from KMP_AFFINITY +kmp_affinity_t __kmp_affinity = KMP_AFFINITY_INIT("KMP_AFFINITY"); char *__kmp_cpuinfo_file = NULL; -bool __kmp_affin_reset = 0; #endif /* KMP_AFFINITY_SUPPORTED */ diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -2647,7 +2647,7 @@ __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); #if KMP_AFFINITY_SUPPORTED - if (master_th->th.th_team->t.t_level == 0 && __kmp_affin_reset) { + if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) { __kmp_reset_root_init_mask(gtid); } #endif @@ -4736,6 +4736,7 @@ int first_place = master_th->th.th_first_place; int last_place = master_th->th.th_last_place; int masters_place = master_th->th.th_current_place; + int num_masks = __kmp_affinity.num_masks; team->t.t_first_place = first_place; team->t.t_last_place = last_place; @@ -4780,7 +4781,7 @@ if (first_place <= last_place) { n_places = last_place - first_place + 1; } else { - n_places = __kmp_affinity_num_masks - first_place + last_place + 1; + n_places = num_masks - first_place + last_place + 1; } if (n_th <= n_places) { int place = masters_place; @@ -4790,7 +4791,7 @@ if (place == last_place) { place = first_place; - } else if (place == (int)(__kmp_affinity_num_masks - 1)) { + } else if (place == (num_masks - 1)) { place = 0; } else { place++; @@ -4835,7 +4836,7 @@ // we added an extra thread to this place; move to next place if (place == last_place) { place = first_place; - } else if (place == (int)(__kmp_affinity_num_masks - 1)) { + } else if (place == (num_masks - 1)) { place = 0; } else { place++; @@ -4846,7 +4847,7 @@ } else if (s_count == S) { // place full; don't add extra if (place == last_place) { place = first_place; - } else if (place == (int)(__kmp_affinity_num_masks - 1)) { + } else if (place == (num_masks - 1)) { place = 0; } else { place++; @@ -4873,12 +4874,12 @@ if (first_place <= last_place) { n_places = last_place - first_place + 1; } else { - n_places = __kmp_affinity_num_masks - first_place + last_place + 1; + n_places = num_masks - first_place + last_place + 1; } if (n_th <= n_places) { int place = -1; - if (n_places != static_cast(__kmp_affinity_num_masks)) { + if (n_places != num_masks) { int S = n_places / n_th; int s_count, rem, gap, gap_ct; @@ -4903,7 +4904,7 @@ while (s_count < S) { if (place == last_place) { place = first_place; - } else if (place == (int)(__kmp_affinity_num_masks - 1)) { + } else if (place == (num_masks - 1)) { place = 0; } else { place++; @@ -4913,7 +4914,7 @@ if (rem && (gap_ct == gap)) { if (place == last_place) { place = first_place; - } else if (place == (int)(__kmp_affinity_num_masks - 1)) { + } else if (place == (num_masks - 1)) { place = 0; } else { place++; @@ -4926,7 +4927,7 @@ if (place == last_place) { place = first_place; - } else if (place == (int)(__kmp_affinity_num_masks - 1)) { + } else if (place == (num_masks - 1)) { place = 0; } else { place++; @@ -4934,10 +4935,10 @@ KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " - "partition = [%d,%d], __kmp_affinity_num_masks: %u\n", + "partition = [%d,%d], num_masks: %u\n", __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place, th->th.th_first_place, - th->th.th_last_place, __kmp_affinity_num_masks)); + th->th.th_last_place, num_masks)); } } else { /* Having uniform space of available computation places I can create @@ -5034,7 +5035,7 @@ // we added an extra thread to this place; move on to next place if (place == last_place) { place = first_place; - } else if (place == (int)(__kmp_affinity_num_masks - 1)) { + } else if (place == (num_masks - 1)) { place = 0; } else { place++; @@ -5045,7 +5046,7 @@ } else if (s_count == S) { // place is full; don't add extra thread if (place == last_place) { place = first_place; - } else if (place == (int)(__kmp_affinity_num_masks - 1)) { + } else if (place == (num_masks - 1)) { place = 0; } else { place++; @@ -7280,7 +7281,7 @@ #if KMP_AFFINITY_SUPPORTED // __kmp_affinity_initialize() will try to set __kmp_ncores to the // number of cores on the machine. - __kmp_affinity_initialize(); + __kmp_affinity_initialize(__kmp_affinity); #endif /* KMP_AFFINITY_SUPPORTED */ diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -2148,12 +2148,7 @@ static kmp_setting_t *__kmp_affinity_notype = NULL; static void __kmp_parse_affinity_env(char const *name, char const *value, - enum affinity_type *out_type, - char **out_proclist, int *out_verbose, - int *out_warn, int *out_respect, - kmp_hw_t *out_gran, int *out_gran_levels, - int *out_dups, int *out_compact, - int *out_offset) { + kmp_affinity_t *out_affinity) { char *buffer = NULL; // Copy of env var value. char *buf = NULL; // Buffer for strtok_r() function. char *next = NULL; // end of token / start of next. @@ -2219,19 +2214,20 @@ ++_guard; \ } -#define set_type(val) _set_param(type, *out_type, val) -#define set_verbose(val) _set_param(verbose, *out_verbose, val) -#define set_warnings(val) _set_param(warnings, *out_warn, val) -#define set_respect(val) _set_param(respect, *out_respect, val) -#define set_dups(val) _set_param(dups, *out_dups, val) -#define set_proclist(val) _set_param(proclist, *out_proclist, val) -#define set_reset(val) _set_param(reset, __kmp_affin_reset, val) +#define set_type(val) _set_param(type, out_affinity->type, val) +#define set_verbose(val) _set_param(verbose, out_affinity->flags.verbose, val) +#define set_warnings(val) \ + _set_param(warnings, out_affinity->flags.warnings, val) +#define set_respect(val) _set_param(respect, out_affinity->flags.respect, val) +#define set_dups(val) _set_param(dups, out_affinity->flags.dups, val) +#define set_proclist(val) _set_param(proclist, out_affinity->proclist, val) +#define set_reset(val) _set_param(reset, out_affinity->flags.reset, val) #define set_gran(val, levels) \ { \ if (gran == 0) { \ - *out_gran = val; \ - *out_gran_levels = levels; \ + out_affinity->gran = val; \ + out_affinity->gran_levels = levels; \ } else { \ EMIT_WARN(FALSE, (AffParamDefined, name, start)); \ } \ @@ -2448,20 +2444,20 @@ if (proclist) { if (!type) { KMP_WARNING(AffProcListNoType, name); - *out_type = affinity_explicit; + out_affinity->type = affinity_explicit; __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; - } else if (*out_type != affinity_explicit) { + } else if (out_affinity->type != affinity_explicit) { KMP_WARNING(AffProcListNotExplicit, name); - KMP_ASSERT(*out_proclist != NULL); - KMP_INTERNAL_FREE(*out_proclist); - *out_proclist = NULL; + KMP_ASSERT(out_affinity->proclist != NULL); + KMP_INTERNAL_FREE(out_affinity->proclist); + out_affinity->proclist = NULL; } } - switch (*out_type) { + switch (out_affinity->type) { case affinity_logical: case affinity_physical: { if (count > 0) { - *out_offset = number[0]; + out_affinity->offset = number[0]; } if (count > 1) { KMP_WARNING(AffManyParamsForLogic, name, number[1]); @@ -2469,42 +2465,44 @@ } break; case affinity_balanced: { if (count > 0) { - *out_compact = number[0]; + out_affinity->compact = number[0]; } if (count > 1) { - *out_offset = number[1]; + out_affinity->offset = number[1]; } - if (__kmp_affinity_gran == KMP_HW_UNKNOWN) { + if (__kmp_affinity.gran == KMP_HW_UNKNOWN) { + int verbose = out_affinity->flags.verbose; + int warnings = out_affinity->flags.warnings; #if KMP_MIC_SUPPORTED if (__kmp_mic_type != non_mic) { - if (__kmp_affinity_verbose || __kmp_affinity_warnings) { - KMP_WARNING(AffGranUsing, "KMP_AFFINITY", "fine"); + if (verbose || warnings) { + KMP_WARNING(AffGranUsing, out_affinity->env_var, "fine"); } - __kmp_affinity_gran = KMP_HW_THREAD; + out_affinity->gran = KMP_HW_THREAD; } else #endif { - if (__kmp_affinity_verbose || __kmp_affinity_warnings) { - KMP_WARNING(AffGranUsing, "KMP_AFFINITY", "core"); + if (verbose || warnings) { + KMP_WARNING(AffGranUsing, out_affinity->env_var, "core"); } - __kmp_affinity_gran = KMP_HW_CORE; + out_affinity->gran = KMP_HW_CORE; } } } break; case affinity_scatter: case affinity_compact: { if (count > 0) { - *out_compact = number[0]; + out_affinity->compact = number[0]; } if (count > 1) { - *out_offset = number[1]; + out_affinity->offset = number[1]; } } break; case affinity_explicit: { - if (*out_proclist == NULL) { + if (out_affinity->proclist == NULL) { KMP_WARNING(AffNoProcList, name); - __kmp_affinity_type = affinity_none; + out_affinity->type = affinity_none; } if (count > 0) { KMP_WARNING(AffNoParam, name, "explicit"); @@ -2541,74 +2539,68 @@ return; } - __kmp_parse_affinity_env(name, value, &__kmp_affinity_type, - &__kmp_affinity_proclist, &__kmp_affinity_verbose, - &__kmp_affinity_warnings, - &__kmp_affinity_respect_mask, &__kmp_affinity_gran, - &__kmp_affinity_gran_levels, &__kmp_affinity_dups, - &__kmp_affinity_compact, &__kmp_affinity_offset); + __kmp_parse_affinity_env(name, value, &__kmp_affinity); } // __kmp_stg_parse_affinity -static void __kmp_stg_print_affinity(kmp_str_buf_t *buffer, char const *name, - void *data) { +static void __kmp_print_affinity_env(kmp_str_buf_t *buffer, char const *name, + const kmp_affinity_t &affinity) { if (__kmp_env_format) { KMP_STR_BUF_PRINT_NAME_EX(name); } else { __kmp_str_buf_print(buffer, " %s='", name); } - if (__kmp_affinity_verbose) { + if (affinity.flags.verbose) { __kmp_str_buf_print(buffer, "%s,", "verbose"); } else { __kmp_str_buf_print(buffer, "%s,", "noverbose"); } - if (__kmp_affinity_warnings) { + if (affinity.flags.warnings) { __kmp_str_buf_print(buffer, "%s,", "warnings"); } else { __kmp_str_buf_print(buffer, "%s,", "nowarnings"); } if (KMP_AFFINITY_CAPABLE()) { - if (__kmp_affinity_respect_mask) { + if (affinity.flags.respect) { __kmp_str_buf_print(buffer, "%s,", "respect"); } else { __kmp_str_buf_print(buffer, "%s,", "norespect"); } - if (__kmp_affin_reset) { + if (affinity.flags.reset) { __kmp_str_buf_print(buffer, "%s,", "reset"); } else { __kmp_str_buf_print(buffer, "%s,", "noreset"); } __kmp_str_buf_print(buffer, "granularity=%s,", - __kmp_hw_get_keyword(__kmp_affinity_gran, false)); + __kmp_hw_get_keyword(affinity.gran, false)); } if (!KMP_AFFINITY_CAPABLE()) { __kmp_str_buf_print(buffer, "%s", "disabled"); - } else - switch (__kmp_affinity_type) { + } else { + int compact = affinity.compact; + int offset = affinity.offset; + switch (affinity.type) { case affinity_none: __kmp_str_buf_print(buffer, "%s", "none"); break; case affinity_physical: - __kmp_str_buf_print(buffer, "%s,%d", "physical", __kmp_affinity_offset); + __kmp_str_buf_print(buffer, "%s,%d", "physical", offset); break; case affinity_logical: - __kmp_str_buf_print(buffer, "%s,%d", "logical", __kmp_affinity_offset); + __kmp_str_buf_print(buffer, "%s,%d", "logical", offset); break; case affinity_compact: - __kmp_str_buf_print(buffer, "%s,%d,%d", "compact", __kmp_affinity_compact, - __kmp_affinity_offset); + __kmp_str_buf_print(buffer, "%s,%d,%d", "compact", compact, offset); break; case affinity_scatter: - __kmp_str_buf_print(buffer, "%s,%d,%d", "scatter", __kmp_affinity_compact, - __kmp_affinity_offset); + __kmp_str_buf_print(buffer, "%s,%d,%d", "scatter", compact, offset); break; case affinity_explicit: - __kmp_str_buf_print(buffer, "%s=[%s],%s", "proclist", - __kmp_affinity_proclist, "explicit"); + __kmp_str_buf_print(buffer, "%s=[%s],%s", "proclist", affinity.proclist, + "explicit"); break; case affinity_balanced: - __kmp_str_buf_print(buffer, "%s,%d,%d", "balanced", - __kmp_affinity_compact, __kmp_affinity_offset); + __kmp_str_buf_print(buffer, "%s,%d,%d", "balanced", compact, offset); break; case affinity_disabled: __kmp_str_buf_print(buffer, "%s", "disabled"); @@ -2620,9 +2612,15 @@ __kmp_str_buf_print(buffer, "%s", ""); break; } + } __kmp_str_buf_print(buffer, "'\n"); } //__kmp_stg_print_affinity +static void __kmp_stg_print_affinity(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_print_affinity_env(buffer, name, __kmp_affinity); +} + #ifdef KMP_GOMP_COMPAT static void __kmp_stg_parse_gomp_cpu_affinity(char const *name, @@ -2649,9 +2647,9 @@ SKIP_WS(next); if (*next == '\0') { // GOMP_CPU_AFFINITY => granularity=fine,explicit,proclist=... - __kmp_affinity_proclist = temp_proclist; - __kmp_affinity_type = affinity_explicit; - __kmp_affinity_gran = KMP_HW_THREAD; + __kmp_affinity.proclist = temp_proclist; + __kmp_affinity.type = affinity_explicit; + __kmp_affinity.gran = KMP_HW_THREAD; __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; } else { KMP_WARNING(AffSyntaxError, name); @@ -2661,7 +2659,7 @@ } } else { // Warning already emitted - __kmp_affinity_type = affinity_none; + __kmp_affinity.type = affinity_none; __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; } } // __kmp_stg_parse_gomp_cpu_affinity @@ -2963,9 +2961,9 @@ const kmp_place_t &place = std_places[i]; if (__kmp_match_str(place.name, scan, &next)) { scan = next; - __kmp_affinity_type = affinity_compact; - __kmp_affinity_gran = place.type; - __kmp_affinity_dups = FALSE; + __kmp_affinity.type = affinity_compact; + __kmp_affinity.gran = place.type; + __kmp_affinity.flags.dups = FALSE; set = true; break; } @@ -2978,36 +2976,36 @@ continue; if (__kmp_match_str(name, scan, &next)) { scan = next; - __kmp_affinity_type = affinity_compact; - __kmp_affinity_gran = type; - __kmp_affinity_dups = FALSE; + __kmp_affinity.type = affinity_compact; + __kmp_affinity.gran = type; + __kmp_affinity.flags.dups = FALSE; set = true; break; } } } if (!set) { - if (__kmp_affinity_proclist != NULL) { - KMP_INTERNAL_FREE((void *)__kmp_affinity_proclist); - __kmp_affinity_proclist = NULL; - } - if (__kmp_parse_place_list(name, value, &__kmp_affinity_proclist)) { - __kmp_affinity_type = affinity_explicit; - __kmp_affinity_gran = KMP_HW_THREAD; - __kmp_affinity_dups = FALSE; + if (__kmp_affinity.proclist != NULL) { + KMP_INTERNAL_FREE((void *)__kmp_affinity.proclist); + __kmp_affinity.proclist = NULL; + } + if (__kmp_parse_place_list(name, value, &__kmp_affinity.proclist)) { + __kmp_affinity.type = affinity_explicit; + __kmp_affinity.gran = KMP_HW_THREAD; + __kmp_affinity.flags.dups = FALSE; } else { // Syntax error fallback - __kmp_affinity_type = affinity_compact; - __kmp_affinity_gran = KMP_HW_CORE; - __kmp_affinity_dups = FALSE; + __kmp_affinity.type = affinity_compact; + __kmp_affinity.gran = KMP_HW_CORE; + __kmp_affinity.flags.dups = FALSE; } if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) { __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; } return; } - if (__kmp_affinity_gran != KMP_HW_UNKNOWN) { - kind = __kmp_hw_get_keyword(__kmp_affinity_gran); + if (__kmp_affinity.gran != KMP_HW_UNKNOWN) { + kind = __kmp_hw_get_keyword(__kmp_affinity.gran); } if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) { @@ -3049,6 +3047,10 @@ static void __kmp_stg_print_places(kmp_str_buf_t *buffer, char const *name, void *data) { + enum affinity_type type = __kmp_affinity.type; + const char *proclist = __kmp_affinity.proclist; + kmp_hw_t gran = __kmp_affinity.gran; + if (__kmp_env_format) { KMP_STR_BUF_PRINT_NAME; } else { @@ -3058,23 +3060,23 @@ (__kmp_nested_proc_bind.bind_types == NULL) || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) { __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); - } else if (__kmp_affinity_type == affinity_explicit) { - if (__kmp_affinity_proclist != NULL) { - __kmp_str_buf_print(buffer, "='%s'\n", __kmp_affinity_proclist); + } else if (type == affinity_explicit) { + if (proclist != NULL) { + __kmp_str_buf_print(buffer, "='%s'\n", proclist); } else { __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); } - } else if (__kmp_affinity_type == affinity_compact) { + } else if (type == affinity_compact) { int num; - if (__kmp_affinity_num_masks > 0) { - num = __kmp_affinity_num_masks; + if (__kmp_affinity.num_masks > 0) { + num = __kmp_affinity.num_masks; } else if (__kmp_affinity_num_places > 0) { num = __kmp_affinity_num_places; } else { num = 0; } - if (__kmp_affinity_gran != KMP_HW_UNKNOWN) { - const char *name = __kmp_hw_get_keyword(__kmp_affinity_gran, true); + if (gran != KMP_HW_UNKNOWN) { + const char *name = __kmp_hw_get_keyword(gran, true); if (num > 0) { __kmp_str_buf_print(buffer, "='%s(%d)'\n", name, num); } else { @@ -3306,7 +3308,7 @@ buf = next; SKIP_WS(buf); #if KMP_AFFINITY_SUPPORTED - __kmp_affinity_type = affinity_disabled; + __kmp_affinity.type = affinity_disabled; #endif /* KMP_AFFINITY_SUPPORTED */ __kmp_nested_proc_bind.used = 1; __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; @@ -3315,7 +3317,7 @@ buf = next; SKIP_WS(buf); #if KMP_AFFINITY_SUPPORTED - __kmp_affinity_type = affinity_none; + __kmp_affinity.type = affinity_none; #endif /* KMP_AFFINITY_SUPPORTED */ __kmp_nested_proc_bind.used = 1; __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; @@ -5887,6 +5889,22 @@ return rc; } +#if defined(KMP_DEBUG) && KMP_AFFINITY_SUPPORTED +static void __kmp_print_affinity_settings(const kmp_affinity_t *affinity) { + K_DIAG(1, ("%s:\n", affinity->env_var)); + K_DIAG(1, (" type : %d\n", affinity->type)); + K_DIAG(1, (" compact : %d\n", affinity->compact)); + K_DIAG(1, (" offset : %d\n", affinity->offset)); + K_DIAG(1, (" verbose : %u\n", affinity->flags.verbose)); + K_DIAG(1, (" warnings : %u\n", affinity->flags.warnings)); + K_DIAG(1, (" respect : %u\n", affinity->flags.respect)); + K_DIAG(1, (" reset : %u\n", affinity->flags.reset)); + K_DIAG(1, (" dups : %u\n", affinity->flags.dups)); + K_DIAG(1, (" gran : %d\n", (int)affinity->gran)); + KMP_DEBUG_ASSERT(affinity->type != affinity_default); +} +#endif + static void __kmp_aux_env_initialize(kmp_env_blk_t *block) { char const *value; @@ -5994,20 +6012,20 @@ // A new affinity type is specified. // Reset the affinity flags to their default values, // in case this is called from kmp_set_defaults(). - __kmp_affinity_type = affinity_default; - __kmp_affinity_gran = KMP_HW_UNKNOWN; + __kmp_affinity.type = affinity_default; + __kmp_affinity.gran = KMP_HW_UNKNOWN; __kmp_affinity_top_method = affinity_top_method_default; - __kmp_affinity_respect_mask = affinity_respect_mask_default; + __kmp_affinity.flags.respect = affinity_respect_mask_default; } #undef FIND // Also reset the affinity flags if OMP_PROC_BIND is specified. aff_str = __kmp_env_blk_var(&block, "OMP_PROC_BIND"); if (aff_str != NULL) { - __kmp_affinity_type = affinity_default; - __kmp_affinity_gran = KMP_HW_UNKNOWN; + __kmp_affinity.type = affinity_default; + __kmp_affinity.gran = KMP_HW_UNKNOWN; __kmp_affinity_top_method = affinity_top_method_default; - __kmp_affinity_respect_mask = affinity_respect_mask_default; + __kmp_affinity.flags.respect = affinity_respect_mask_default; } } @@ -6083,12 +6101,12 @@ __kmp_affinity_top_method == affinity_top_method_default) if (__kmp_hw_subset->specified(KMP_HW_NUMA) || __kmp_hw_subset->specified(KMP_HW_TILE) || - __kmp_affinity_gran == KMP_HW_TILE || - __kmp_affinity_gran == KMP_HW_NUMA) + __kmp_affinity.gran == KMP_HW_TILE || + __kmp_affinity.gran == KMP_HW_NUMA) __kmp_affinity_top_method = affinity_top_method_hwloc; // Force using hwloc when tiles or numa nodes requested for OMP_PLACES - if (__kmp_affinity_gran == KMP_HW_NUMA || - __kmp_affinity_gran == KMP_HW_TILE) + if (__kmp_affinity.gran == KMP_HW_NUMA || + __kmp_affinity.gran == KMP_HW_TILE) __kmp_affinity_top_method = affinity_top_method_hwloc; #endif // Determine if the machine/OS is actually capable of supporting @@ -6105,25 +6123,25 @@ __kmp_affinity_top_method = affinity_top_method_all; } #endif - if (__kmp_affinity_type == affinity_disabled) { + if (__kmp_affinity.type == affinity_disabled) { KMP_AFFINITY_DISABLE(); } else if (!KMP_AFFINITY_CAPABLE()) { __kmp_affinity_dispatch->determine_capable(var); if (!KMP_AFFINITY_CAPABLE()) { - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_default) && - (__kmp_affinity_type != affinity_none) && - (__kmp_affinity_type != affinity_disabled))) { + if (__kmp_affinity.flags.verbose || + (__kmp_affinity.flags.warnings && + (__kmp_affinity.type != affinity_default) && + (__kmp_affinity.type != affinity_none) && + (__kmp_affinity.type != affinity_disabled))) { KMP_WARNING(AffNotSupported, var); } - __kmp_affinity_type = affinity_disabled; - __kmp_affinity_respect_mask = 0; - __kmp_affinity_gran = KMP_HW_THREAD; + __kmp_affinity.type = affinity_disabled; + __kmp_affinity.flags.respect = FALSE; + __kmp_affinity.gran = KMP_HW_THREAD; } } - if (__kmp_affinity_type == affinity_disabled) { + if (__kmp_affinity.type == affinity_disabled) { __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; } else if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_true) { // OMP_PROC_BIND=true maps to OMP_PROC_BIND=spread. @@ -6165,48 +6183,48 @@ // processor groups, or if the user requested it, and OMP 4.0 // affinity is not in effect. if (__kmp_num_proc_groups > 1 && - __kmp_affinity_type == affinity_default && + __kmp_affinity.type == affinity_default && __kmp_nested_proc_bind.bind_types[0] == proc_bind_default) { // Do not respect the initial processor affinity mask if it is assigned // exactly one Windows Processor Group since this is interpreted as the // default OS assignment. Not respecting the mask allows the runtime to // use all the logical processors in all groups. - if (__kmp_affinity_respect_mask == affinity_respect_mask_default && + if (__kmp_affinity.flags.respect == affinity_respect_mask_default && exactly_one_group) { - __kmp_affinity_respect_mask = FALSE; + __kmp_affinity.flags.respect = FALSE; } // Use compact affinity with anticipation of pinning to at least the // group granularity since threads can only be bound to one group. - if (__kmp_affinity_type == affinity_default) { - __kmp_affinity_type = affinity_compact; + if (__kmp_affinity.type == affinity_default) { + __kmp_affinity.type = affinity_compact; __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; } if (__kmp_affinity_top_method == affinity_top_method_default) __kmp_affinity_top_method = affinity_top_method_all; - if (__kmp_affinity_gran == KMP_HW_UNKNOWN) - __kmp_affinity_gran = KMP_HW_PROC_GROUP; + if (__kmp_affinity.gran == KMP_HW_UNKNOWN) + __kmp_affinity.gran = KMP_HW_PROC_GROUP; } else #endif /* KMP_GROUP_AFFINITY */ { - if (__kmp_affinity_respect_mask == affinity_respect_mask_default) { + if (__kmp_affinity.flags.respect == affinity_respect_mask_default) { #if KMP_GROUP_AFFINITY if (__kmp_num_proc_groups > 1 && exactly_one_group) { - __kmp_affinity_respect_mask = FALSE; + __kmp_affinity.flags.respect = FALSE; } else #endif /* KMP_GROUP_AFFINITY */ { - __kmp_affinity_respect_mask = TRUE; + __kmp_affinity.flags.respect = TRUE; } } if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) && (__kmp_nested_proc_bind.bind_types[0] != proc_bind_default)) { - if (__kmp_affinity_type == affinity_default) { - __kmp_affinity_type = affinity_compact; - __kmp_affinity_dups = FALSE; + if (__kmp_affinity.type == affinity_default) { + __kmp_affinity.type = affinity_compact; + __kmp_affinity.flags.dups = FALSE; } - } else if (__kmp_affinity_type == affinity_default) { + } else if (__kmp_affinity.type == affinity_default) { #if KMP_MIC_SUPPORTED if (__kmp_mic_type != non_mic) { __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; @@ -6217,22 +6235,22 @@ } #if KMP_MIC_SUPPORTED if (__kmp_mic_type != non_mic) { - __kmp_affinity_type = affinity_scatter; + __kmp_affinity.type = affinity_scatter; } else #endif { - __kmp_affinity_type = affinity_none; + __kmp_affinity.type = affinity_none; } } - if ((__kmp_affinity_gran == KMP_HW_UNKNOWN) && - (__kmp_affinity_gran_levels < 0)) { + if ((__kmp_affinity.gran == KMP_HW_UNKNOWN) && + (__kmp_affinity.gran_levels < 0)) { #if KMP_MIC_SUPPORTED if (__kmp_mic_type != non_mic) { - __kmp_affinity_gran = KMP_HW_THREAD; + __kmp_affinity.gran = KMP_HW_THREAD; } else #endif { - __kmp_affinity_gran = KMP_HW_CORE; + __kmp_affinity.gran = KMP_HW_CORE; } } if (__kmp_affinity_top_method == affinity_top_method_default) { @@ -6241,19 +6259,12 @@ } } - K_DIAG(1, ("__kmp_affinity_type == %d\n", __kmp_affinity_type)); - K_DIAG(1, ("__kmp_affinity_compact == %d\n", __kmp_affinity_compact)); - K_DIAG(1, ("__kmp_affinity_offset == %d\n", __kmp_affinity_offset)); - K_DIAG(1, ("__kmp_affinity_verbose == %d\n", __kmp_affinity_verbose)); - K_DIAG(1, ("__kmp_affinity_warnings == %d\n", __kmp_affinity_warnings)); - K_DIAG(1, ("__kmp_affinity_respect_mask == %d\n", - __kmp_affinity_respect_mask)); - K_DIAG(1, ("__kmp_affinity_gran == %d\n", __kmp_affinity_gran)); - - KMP_DEBUG_ASSERT(__kmp_affinity_type != affinity_default); +#ifdef KMP_DEBUG + __kmp_print_affinity_settings(&__kmp_affinity); KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.bind_types[0] != proc_bind_default); K_DIAG(1, ("__kmp_nested_proc_bind.bind_types[0] == %d\n", __kmp_nested_proc_bind.bind_types[0])); +#endif } #endif /* KMP_AFFINITY_SUPPORTED */ diff --git a/openmp/runtime/src/kmp_version.cpp b/openmp/runtime/src/kmp_version.cpp --- a/openmp/runtime/src/kmp_version.cpp +++ b/openmp/runtime/src/kmp_version.cpp @@ -179,7 +179,7 @@ &buffer, "%sthread affinity support: %s\n", KMP_VERSION_PREF_STR, #if KMP_AFFINITY_SUPPORTED (KMP_AFFINITY_CAPABLE() - ? (__kmp_affinity_type == affinity_none ? "not used" : "yes") + ? (__kmp_affinity.type == affinity_none ? "not used" : "yes") : "no") #else "no" diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp --- a/openmp/runtime/src/ompt-general.cpp +++ b/openmp/runtime/src/ompt-general.cpp @@ -687,7 +687,7 @@ #else if (!KMP_AFFINITY_CAPABLE()) return 0; - return __kmp_affinity_num_masks; + return __kmp_affinity.num_masks; #endif } @@ -703,11 +703,11 @@ tmp_ids[j] = 0; if (!KMP_AFFINITY_CAPABLE()) return 0; - if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks) + if (place_num < 0 || place_num >= (int)__kmp_affinity.num_masks) return 0; /* TODO: Is this safe for asynchronous call from signal handler during runtime * shutdown? */ - kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); + kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity.masks, place_num); count = 0; KMP_CPU_SET_ITERATE(i, mask) { if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) || diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -135,6 +135,9 @@ long gCode; unsigned char *buf; buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT); + int verbose = __kmp_affinity.flags.verbose; + int warnings = __kmp_affinity.flags.warnings; + enum affinity_type type = __kmp_affinity.type; // If the syscall returns a suggestion for the size, // then we don't have to search for an appropriate size. @@ -145,10 +148,9 @@ if (gCode < 0 && errno != EINVAL) { // System call not supported - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) && - (__kmp_affinity_type != affinity_default) && - (__kmp_affinity_type != affinity_disabled))) { + if (verbose || + (warnings && (type != affinity_none) && (type != affinity_default) && + (type != affinity_disabled))) { int error = errno; kmp_msg_t err_code = KMP_ERR(error); __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var), @@ -188,11 +190,9 @@ "inconsistent OS call behavior: errno == ENOSYS for mask " "size %d\n", size)); - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && - (__kmp_affinity_type != affinity_none) && - (__kmp_affinity_type != affinity_default) && - (__kmp_affinity_type != affinity_disabled))) { + if (verbose || + (warnings && (type != affinity_none) && + (type != affinity_default) && (type != affinity_disabled))) { int error = errno; kmp_msg_t err_code = KMP_ERR(error); __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var), @@ -239,10 +239,8 @@ KMP_AFFINITY_DISABLE(); KA_TRACE(10, ("__kmp_affinity_determine_capable: " "cannot determine mask size - affinity not supported\n")); - if (__kmp_affinity_verbose || - (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) && - (__kmp_affinity_type != affinity_default) && - (__kmp_affinity_type != affinity_disabled))) { + if (verbose || (warnings && (type != affinity_none) && + (type != affinity_default) && (type != affinity_disabled))) { KMP_WARNING(AffCantGetMaskSize, env_var); } } @@ -1230,12 +1228,12 @@ // Set default not to bind threads tightly in the child (we're expecting // over-subscription after the fork and this can improve things for // scripting languages that use OpenMP inside process-parallel code). - __kmp_affinity_type = affinity_none; if (__kmp_nested_proc_bind.bind_types != NULL) { __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; } - __kmp_affinity_masks = NULL; - __kmp_affinity_num_masks = 0; + __kmp_affinity = KMP_AFFINITY_INIT("KMP_AFFINITY"); + __kmp_affin_fullMask = nullptr; + __kmp_affin_origMask = nullptr; #endif // KMP_AFFINITY_SUPPORTED #if KMP_USE_MONITOR diff --git a/openmp/runtime/src/z_Windows_NT_util.cpp b/openmp/runtime/src/z_Windows_NT_util.cpp --- a/openmp/runtime/src/z_Windows_NT_util.cpp +++ b/openmp/runtime/src/z_Windows_NT_util.cpp @@ -608,7 +608,8 @@ KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { DWORD error = GetLastError(); - if (__kmp_affinity_verbose) { // AC: continue silently if not verbose + // AC: continue silently if not verbose + if (__kmp_affinity.flags.verbose) { kmp_msg_t err_code = KMP_ERR(error); __kmp_msg(kmp_ms_warning, KMP_MSG(CantSetThreadAffMask), err_code, __kmp_msg_null);