diff --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt --- a/openmp/runtime/src/i18n/en_US.txt +++ b/openmp/runtime/src/i18n/en_US.txt @@ -469,6 +469,8 @@ AffHWSubsetEqvLayers "KMP_HW_SUBSET ignored: %1$s, %2$s: layers are equivalent, please only specify one." AffHWSubsetOutOfOrder "KMP_HW_SUBSET ignored: %1$s layer should come after %2$s." AffEqualTopologyTypes "%1$s: topology layer \"%2$s\" is equivalent to \"%3$s\"." +AffGranTooCoarseProcGroup "%1$s: granularity=%2$s is too coarse, setting granularity=group." +StgDeprecatedValue "%1$s: \"%2$s\" value is deprecated. Please use \"%3$s\" instead." # -------------------------------------------------------------------------------------------------- -*- HINTS -*- diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h --- a/openmp/runtime/src/kmp_affinity.h +++ b/openmp/runtime/src/kmp_affinity.h @@ -638,7 +638,9 @@ int depth; - // The following arrays are all 'depth' long + // The following arrays are all 'depth' long and have been + // allocated to hold up to KMP_HW_LAST number of objects if + // needed so layers can be added without reallocation of any array // Orderd array of the types in the topology kmp_hw_t *types; @@ -671,6 +673,14 @@ // Flags describing the topology flags_t flags; + // Insert a new topology layer after allocation + void _insert_layer(kmp_hw_t type, const int *ids); + +#if KMP_GROUP_AFFINITY + // Insert topology information about Windows Processor groups + void _insert_windows_proc_groups(); +#endif + // Count each item & get the num x's per y // e.g., get the number of cores and the number of threads per core // for each (x, y) in (KMP_HW_* , KMP_HW_*) diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -198,14 +198,82 @@ //////////////////////////////////////////////////////////////////////////////// // kmp_topology_t methods +// Add a layer to the topology based on the ids. Assume the topology +// is perfectly nested (i.e., so no object has more than one parent) +void kmp_topology_t::_insert_layer(kmp_hw_t type, const int *ids) { + // Figure out where the layer should go by comparing the ids of the current + // layers with the new ids + int target_layer; + int previous_id = kmp_hw_thread_t::UNKNOWN_ID; + int previous_new_id = kmp_hw_thread_t::UNKNOWN_ID; + + // Start from the highest layer and work down to find target layer + // If new layer is equal to another layer then put the new layer above + for (target_layer = 0; target_layer < depth; ++target_layer) { + bool layers_equal = true; + bool strictly_above_target_layer = false; + for (int i = 0; i < num_hw_threads; ++i) { + int id = hw_threads[i].ids[target_layer]; + int new_id = ids[i]; + if (id != previous_id && new_id == previous_new_id) { + // Found the layer we are strictly above + strictly_above_target_layer = true; + layers_equal = false; + break; + } else if (id == previous_id && new_id != previous_new_id) { + // Found a layer we are below. Move to next layer and check. + layers_equal = false; + break; + } + previous_id = id; + previous_new_id = new_id; + } + if (strictly_above_target_layer || layers_equal) + break; + } + + // Found the layer we are above. Now move everything to accommodate the new + // layer. And put the new ids and type into the topology. + for (int i = depth - 1, j = depth; i >= target_layer; --i, --j) + types[j] = types[i]; + types[target_layer] = type; + for (int k = 0; k < num_hw_threads; ++k) { + for (int i = depth - 1, j = depth; i >= target_layer; --i, --j) + hw_threads[k].ids[j] = hw_threads[k].ids[i]; + hw_threads[k].ids[target_layer] = ids[k]; + } + equivalent[type] = type; + depth++; +} + +#if KMP_GROUP_AFFINITY +// Insert the Windows Processor Group structure into the topology +void kmp_topology_t::_insert_windows_proc_groups() { + // Do not insert the processor group structure for a single group + if (__kmp_num_proc_groups == 1) + return; + kmp_affin_mask_t *mask; + int *ids = (int *)__kmp_allocate(sizeof(int) * num_hw_threads); + KMP_CPU_ALLOC(mask); + for (int i = 0; i < num_hw_threads; ++i) { + KMP_CPU_ZERO(mask); + KMP_CPU_SET(hw_threads[i].os_id, mask); + ids[i] = __kmp_get_proc_group(mask); + } + KMP_CPU_FREE(mask); + _insert_layer(KMP_HW_PROC_GROUP, ids); + __kmp_free(ids); +} +#endif + // Remove layers that don't add information to the topology. // This is done by having the layer take on the id = UNKNOWN_ID (-1) void kmp_topology_t::_remove_radix1_layers() { int preference[KMP_HW_LAST]; int top_index1, top_index2; // Set up preference associative array - preference[KMP_HW_PROC_GROUP] = 110; - preference[KMP_HW_SOCKET] = 100; + preference[KMP_HW_SOCKET] = 110; + preference[KMP_HW_PROC_GROUP] = 100; preference[KMP_HW_CORE] = 95; preference[KMP_HW_THREAD] = 90; preference[KMP_HW_NUMA] = 85; @@ -440,7 +508,7 @@ kmp_topology_t *retval; // Allocate all data in one large allocation size_t size = sizeof(kmp_topology_t) + sizeof(kmp_hw_thread_t) * nproc + - sizeof(int) * ndepth * 3; + sizeof(int) * (size_t)KMP_HW_LAST * 3; char *bytes = (char *)__kmp_allocate(size); retval = (kmp_topology_t *)bytes; if (nproc > 0) { @@ -453,8 +521,8 @@ int *arr = (int *)(bytes + sizeof(kmp_topology_t) + sizeof(kmp_hw_thread_t) * nproc); retval->types = (kmp_hw_t *)arr; - retval->ratio = arr + ndepth; - retval->count = arr + 2 * ndepth; + retval->ratio = arr + (size_t)KMP_HW_LAST; + retval->count = arr + 2 * (size_t)KMP_HW_LAST; KMP_FOREACH_HW_TYPE(type) { retval->equivalent[type] = KMP_HW_UNKNOWN; } for (int i = 0; i < ndepth; ++i) { retval->types[i] = types[i]; @@ -651,6 +719,9 @@ } void kmp_topology_t::canonicalize() { +#if KMP_GROUP_AFFINITY + _insert_windows_proc_groups(); +#endif _remove_radix1_layers(); _gather_enumeration_information(); _discover_uniformity(); @@ -699,6 +770,25 @@ __kmp_hw_get_catalog_string(gran_type)); __kmp_affinity_gran = gran_type; } +#if KMP_GROUP_AFFINITY + // If more than one processor group exists, and the level of + // granularity specified by the user is too coarse, then the + // granularity must be adjusted "down" to processor group affinity + // because threads can only exist within one processor group. + // For example, if a user sets granularity=socket and there are two + // processor groups that cover a socket, then the runtime must + // restrict the granularity down to the processor group level. + if (__kmp_num_proc_groups > 1) { + int gran_depth = __kmp_topology->get_level(gran_type); + int proc_group_depth = __kmp_topology->get_level(KMP_HW_PROC_GROUP); + if (gran_depth >= 0 && proc_group_depth >= 0 && + gran_depth < proc_group_depth) { + KMP_WARNING(AffGranTooCoarseProcGroup, "KMP_AFFINITY", + __kmp_hw_get_catalog_string(__kmp_affinity_gran)); + __kmp_affinity_gran = gran_type = KMP_HW_PROC_GROUP; + } + } +#endif __kmp_affinity_gran_levels = 0; for (int i = depth - 1; i >= 0 && get_type(i) != gran_type; --i) __kmp_affinity_gran_levels++; diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -3136,6 +3136,7 @@ } #if KMP_GROUP_AFFINITY else if (__kmp_str_match("group", 1, value)) { + KMP_WARNING(StgDeprecatedValue, name, value, "all"); __kmp_affinity_top_method = affinity_top_method_group; } #endif /* KMP_GROUP_AFFINITY */ @@ -6029,65 +6030,27 @@ // Handle the Win 64 group affinity stuff if there are multiple // processor groups, or if the user requested it, and OMP 4.0 // affinity is not in effect. - if (((__kmp_num_proc_groups > 1) && - (__kmp_affinity_type == affinity_default) && - (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default)) || - (__kmp_affinity_top_method == affinity_top_method_group)) { + if (__kmp_num_proc_groups > 1 && + __kmp_affinity_type == affinity_default && + __kmp_nested_proc_bind.bind_types[0] == proc_bind_default) { + // Do not respect the initial processor affinity mask if it is assigned + // exactly one Windows Processor Group since this is interpreted as the + // default OS assignment. Not respecting the mask allows the runtime to + // use all the logical processors in all groups. if (__kmp_affinity_respect_mask == affinity_respect_mask_default && exactly_one_group) { __kmp_affinity_respect_mask = FALSE; } + // Use compact affinity with anticipation of pinning to at least the + // group granularity since threads can only be bound to one group. if (__kmp_affinity_type == affinity_default) { __kmp_affinity_type = affinity_compact; __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; } - if (__kmp_affinity_top_method == affinity_top_method_default) { - if (__kmp_affinity_gran == KMP_HW_UNKNOWN) { - __kmp_affinity_top_method = affinity_top_method_group; - __kmp_affinity_gran = KMP_HW_PROC_GROUP; - } else if (__kmp_affinity_gran == KMP_HW_PROC_GROUP) { - __kmp_affinity_top_method = affinity_top_method_group; - } else { - __kmp_affinity_top_method = affinity_top_method_all; - } - } else if (__kmp_affinity_top_method == affinity_top_method_group) { - if (__kmp_affinity_gran == KMP_HW_UNKNOWN) { - __kmp_affinity_gran = KMP_HW_PROC_GROUP; - } else if ((__kmp_affinity_gran != KMP_HW_PROC_GROUP) && - (__kmp_affinity_gran != KMP_HW_THREAD)) { - const char *str = __kmp_hw_get_keyword(__kmp_affinity_gran); - KMP_WARNING(AffGranTopGroup, var, str); - __kmp_affinity_gran = KMP_HW_THREAD; - } - } else { - if (__kmp_affinity_gran == KMP_HW_UNKNOWN) { - __kmp_affinity_gran = KMP_HW_CORE; - } else if (__kmp_affinity_gran == KMP_HW_PROC_GROUP) { - const char *str = NULL; - switch (__kmp_affinity_type) { - case affinity_physical: - str = "physical"; - break; - case affinity_logical: - str = "logical"; - break; - case affinity_compact: - str = "compact"; - break; - case affinity_scatter: - str = "scatter"; - break; - case affinity_explicit: - str = "explicit"; - break; - // No MIC on windows, so no affinity_balanced case - default: - KMP_DEBUG_ASSERT(0); - } - KMP_WARNING(AffGranGroupType, var, str); - __kmp_affinity_gran = KMP_HW_CORE; - } - } + if (__kmp_affinity_top_method == affinity_top_method_default) + __kmp_affinity_top_method = affinity_top_method_all; + if (__kmp_affinity_gran == KMP_HW_UNKNOWN) + __kmp_affinity_gran = KMP_HW_PROC_GROUP; } else #endif /* KMP_GROUP_AFFINITY */