diff --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt --- a/openmp/runtime/src/i18n/en_US.txt +++ b/openmp/runtime/src/i18n/en_US.txt @@ -480,6 +480,8 @@ AffHWSubsetAttrsNonHybrid "KMP_HW_SUBSET ignored: Too many attributes specified. This machine is not a hybrid architecutre." AffHWSubsetIgnoringAttr "KMP_HW_SUBSET: ignoring %1$s attribute. This machine is not a hybrid architecutre." TargetMemNotAvailable "Target memory not available, will use default allocator." +AffIgnoringNonHybrid "%1$s ignored: This machine is not a hybrid architecutre. Using \"%2$s\" instead." +AffIgnoringNotAvailable "%1$s ignored: %2$s is not available. Using \"%3$s\" instead." # -------------------------------------------------------------------------------------------------- -*- HINTS -*- diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -690,10 +690,12 @@ #define KMP_CPU_ISSET(i, mask) (mask)->is_set(i) #define KMP_CPU_CLR(i, mask) (mask)->clear(i) #define KMP_CPU_ZERO(mask) (mask)->zero() +#define KMP_CPU_ISEMPTY(mask) (mask)->empty() #define KMP_CPU_COPY(dest, src) (dest)->copy(src) #define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src) #define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not() #define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src) +#define KMP_CPU_EQUAL(dest, src) (dest)->is_equal(src) #define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask()) #define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr) #define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr) @@ -730,6 +732,8 @@ virtual void clear(int i) {} // Zero out entire mask virtual void zero() {} + // Check whether mask is empty + virtual bool empty() const { return true; } // Copy src into this mask virtual void copy(const Mask *src) {} // this &= rhs @@ -738,6 +742,8 @@ virtual void bitwise_or(const Mask *rhs) {} // this = ~this virtual void bitwise_not() {} + // this == rhs + virtual bool is_equal(const Mask *rhs) const { return false; } // API for iterating over an affinity mask // for (int i = mask->begin(); i != mask->end(); i = mask->next(i)) virtual int begin() const { return 0; } @@ -866,7 +872,10 @@ unsigned respect : 2; unsigned reset : 1; unsigned initialized : 1; - unsigned reserved : 25; + unsigned core_types_gran : 1; + unsigned core_effs_gran : 1; + unsigned omp_places : 1; + unsigned reserved : 22; } kmp_affinity_flags_t; KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4); @@ -895,6 +904,7 @@ enum affinity_type type; kmp_hw_t gran; int gran_levels; + kmp_affinity_attrs_t core_attr_gran; int compact; int offset; kmp_affinity_flags_t flags; @@ -909,9 +919,11 @@ #define KMP_AFFINITY_INIT(env) \ { \ - nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0, \ - {TRUE, FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE}, 0, \ - nullptr, nullptr, nullptr, 0, nullptr, env \ + nullptr, affinity_default, KMP_HW_UNKNOWN, -1, KMP_AFFINITY_ATTRS_UNKNOWN, \ + 0, 0, \ + {TRUE, FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE, \ + FALSE, FALSE, FALSE}, \ + 0, nullptr, nullptr, nullptr, 0, nullptr, env \ } extern enum affinity_top_method __kmp_affinity_top_method; diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h --- a/openmp/runtime/src/kmp_affinity.h +++ b/openmp/runtime/src/kmp_affinity.h @@ -34,6 +34,7 @@ bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } void clear(int i) override { hwloc_bitmap_clr(mask, i); } void zero() override { hwloc_bitmap_zero(mask); } + bool empty() const override { return hwloc_bitmap_iszero(mask); } void copy(const KMPAffinity::Mask *src) override { const Mask *convert = static_cast(src); hwloc_bitmap_copy(mask, convert->mask); @@ -47,6 +48,10 @@ hwloc_bitmap_or(mask, mask, convert->mask); } void bitwise_not() override { hwloc_bitmap_not(mask, mask); } + bool is_equal(const KMPAffinity::Mask *rhs) const override { + const Mask *convert = static_cast(rhs); + return hwloc_bitmap_isequal(mask, convert->mask); + } int begin() const override { return hwloc_bitmap_first(mask); } int end() const override { return -1; } int next(int previous) const override { @@ -319,6 +324,13 @@ for (mask_size_type i = 0; i < e; ++i) mask[i] = (mask_t)0; } + bool empty() const override { + mask_size_type e = get_num_mask_types(); + for (mask_size_type i = 0; i < e; ++i) + if (mask[i] != (mask_t)0) + return false; + return true; + } void copy(const KMPAffinity::Mask *src) override { const Mask *convert = static_cast(src); mask_size_type e = get_num_mask_types(); @@ -342,6 +354,14 @@ for (mask_size_type i = 0; i < e; ++i) mask[i] = ~(mask[i]); } + bool is_equal(const KMPAffinity::Mask *rhs) const override { + const Mask *convert = static_cast(rhs); + mask_size_type e = get_num_mask_types(); + for (mask_size_type i = 0; i < e; ++i) + if (mask[i] != convert->mask[i]) + return false; + return true; + } int begin() const override { int retval = 0; while (retval < end() && !is_set(retval)) @@ -459,6 +479,12 @@ for (int i = 0; i < __kmp_num_proc_groups; ++i) mask[i] = 0; } + bool empty() const override { + for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + if (mask[i]) + return false; + return true; + } void copy(const KMPAffinity::Mask *src) override { const Mask *convert = static_cast(src); for (int i = 0; i < __kmp_num_proc_groups; ++i) @@ -478,6 +504,13 @@ for (int i = 0; i < __kmp_num_proc_groups; ++i) mask[i] = ~(mask[i]); } + bool is_equal(const KMPAffinity::Mask *rhs) const override { + const Mask *convert = static_cast(rhs); + for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + if (mask[i] != convert->mask[i]) + return false; + return true; + } int begin() const override { int retval = 0; while (retval < end() && !is_set(retval)) @@ -679,6 +712,21 @@ } return false; } +#if KMP_AFFINITY_SUPPORTED + bool contains(const kmp_affinity_attrs_t &attr) const { + if (!valid && !attr.valid) + return true; + if (valid && attr.valid) { + if (attr.core_type != KMP_HW_CORE_TYPE_UNKNOWN) + return (is_core_type_valid() && + (get_core_type() == (kmp_hw_core_type_t)attr.core_type)); + if (attr.core_eff != UNKNOWN_CORE_EFF) + return (is_core_eff_valid() && (get_core_eff() == attr.core_eff)); + return true; + } + return false; + } +#endif // KMP_AFFINITY_SUPPORTED bool operator==(const kmp_hw_attr_t &rhs) const { return (rhs.valid == valid && rhs.core_eff == core_eff && rhs.core_type == core_type); @@ -834,13 +882,18 @@ #if KMP_AFFINITY_SUPPORTED // Set the granularity for affinity settings void set_granularity(kmp_affinity_t &stgs) const; -#endif + bool is_close(int hwt1, int hwt2, const kmp_affinity_t &stgs) const; + bool restrict_to_mask(const kmp_affin_mask_t *mask); bool filter_hw_subset(); - bool is_close(int hwt1, int hwt2, int level) const; +#endif bool is_uniform() const { return flags.uniform; } // Tell whether a type is a valid type in the topology // returns KMP_HW_UNKNOWN when there is no equivalent type - kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; } + kmp_hw_t get_equivalent_type(kmp_hw_t type) const { + if (type == KMP_HW_UNKNOWN) + return KMP_HW_UNKNOWN; + return equivalent[type]; + } // Set type1 = type2 void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) { KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1); diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -38,6 +38,43 @@ void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); } +#if KMP_AFFINITY_SUPPORTED +// Helper class to see if place lists further restrict the fullMask +class kmp_full_mask_modifier_t { + kmp_affin_mask_t *mask; + +public: + kmp_full_mask_modifier_t() { + KMP_CPU_ALLOC(mask); + KMP_CPU_ZERO(mask); + } + ~kmp_full_mask_modifier_t() { + KMP_CPU_FREE(mask); + mask = nullptr; + } + void include(const kmp_affin_mask_t *other) { KMP_CPU_UNION(mask, other); } + // If the new full mask is different from the current full mask, + // then switch them. Returns true if full mask was affected, false otherwise. + bool restrict_to_mask() { + // See if the new mask further restricts or changes the full mask + if (KMP_CPU_EQUAL(__kmp_affin_fullMask, mask) || KMP_CPU_ISEMPTY(mask)) + return false; + return __kmp_topology->restrict_to_mask(mask); + } +}; + +static inline const char * +__kmp_get_affinity_env_var(const kmp_affinity_t &affinity, + bool for_binding = false) { + if (affinity.flags.omp_places) { + if (for_binding) + return "OMP_PROC_BIND"; + return "OMP_PLACES"; + } + return affinity.env_var; +} +#endif // KMP_AFFINITY_SUPPORTED + void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) { kmp_uint32 depth; // The test below is true if affinity is available, but set to "none". Need to @@ -207,6 +244,8 @@ if (attrs.is_core_eff_valid()) printf(" (eff=%d)", attrs.get_core_eff()); } + if (leader) + printf(" (leader)"); printf("\n"); } @@ -797,7 +836,40 @@ #if KMP_AFFINITY_SUPPORTED void kmp_topology_t::set_granularity(kmp_affinity_t &affinity) const { - const char *env_var = affinity.env_var; + const char *env_var = __kmp_get_affinity_env_var(affinity); + // If requested hybrid CPU attributes for granularity (either OMP_PLACES or + // KMP_AFFINITY), but none exist, then reset granularity and have below method + // select a granularity and warn user. + if (!__kmp_is_hybrid_cpu()) { + if (affinity.core_attr_gran.valid) { + // OMP_PLACES with cores: but non-hybrid arch, use cores + // instead + KMP_AFF_WARNING( + affinity, AffIgnoringNonHybrid, env_var, + __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true)); + affinity.gran = KMP_HW_CORE; + affinity.gran_levels = -1; + affinity.core_attr_gran = KMP_AFFINITY_ATTRS_UNKNOWN; + affinity.flags.core_types_gran = affinity.flags.core_effs_gran = 0; + } else if (affinity.flags.core_types_gran || + affinity.flags.core_effs_gran) { + // OMP_PLACES=core_types|core_effs but non-hybrid, use cores instead + if (affinity.flags.omp_places) { + KMP_AFF_WARNING( + affinity, AffIgnoringNonHybrid, env_var, + __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true)); + } else { + // KMP_AFFINITY=granularity=core_type|core_eff,... + KMP_AFF_WARNING(affinity, AffGranularityBad, env_var, + "Intel(R) Hybrid Technology core attribute", + __kmp_hw_get_catalog_string(KMP_HW_CORE)); + } + affinity.gran = KMP_HW_CORE; + affinity.gran_levels = -1; + affinity.core_attr_gran = KMP_AFFINITY_ATTRS_UNKNOWN; + affinity.flags.core_types_gran = affinity.flags.core_effs_gran = 0; + } + } // Set the number of affinity granularity levels if (affinity.gran_levels < 0) { kmp_hw_t gran_type = get_equivalent_type(affinity.gran); @@ -937,6 +1009,7 @@ } }; +#if KMP_AFFINITY_SUPPORTED static kmp_str_buf_t * __kmp_hw_get_catalog_core_string(const kmp_hw_attr_t &attr, kmp_str_buf_t *buf, bool plural) { @@ -952,6 +1025,41 @@ return buf; } +bool kmp_topology_t::restrict_to_mask(const kmp_affin_mask_t *mask) { + // Apply the filter + bool affected; + int new_index = 0; + for (int i = 0; i < num_hw_threads; ++i) { + int os_id = hw_threads[i].os_id; + if (KMP_CPU_ISSET(os_id, mask)) { + if (i != new_index) + hw_threads[new_index] = hw_threads[i]; + new_index++; + } else { + KMP_CPU_CLR(os_id, __kmp_affin_fullMask); + __kmp_avail_proc--; + } + } + + KMP_DEBUG_ASSERT(new_index <= num_hw_threads); + affected = (num_hw_threads != new_index); + num_hw_threads = new_index; + + // Post hardware subset canonicalization + if (affected) { + _gather_enumeration_information(); + _discover_uniformity(); + _set_globals(); + _set_last_level_cache(); +#if KMP_OS_WINDOWS + // Copy filtered full mask if topology has single processor group + if (__kmp_num_proc_groups <= 1) +#endif + __kmp_affin_origMask->copy(__kmp_affin_fullMask); + } + return affected; +} + // Apply the KMP_HW_SUBSET envirable to the topology // Returns true if KMP_HW_SUBSET filtered any processors // otherwise, returns false @@ -1156,7 +1264,9 @@ // Determine which hardware threads should be filtered. int num_filtered = 0; - bool *filtered = (bool *)__kmp_allocate(sizeof(bool) * num_hw_threads); + kmp_affin_mask_t *filtered_mask; + KMP_CPU_ALLOC(filtered_mask); + KMP_CPU_COPY(filtered_mask, __kmp_affin_fullMask); for (int i = 0; i < num_hw_threads; ++i) { kmp_hw_thread_t &hw_thread = hw_threads[i]; // Update type_sub_id @@ -1218,51 +1328,35 @@ } } // Collect filtering information - filtered[i] = should_be_filtered; - if (should_be_filtered) + if (should_be_filtered) { + KMP_CPU_CLR(hw_thread.os_id, filtered_mask); num_filtered++; + } } // One last check that we shouldn't allow filtering entire machine if (num_filtered == num_hw_threads) { KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetAllFiltered); - __kmp_free(filtered); return false; } // Apply the filter - int new_index = 0; - for (int i = 0; i < num_hw_threads; ++i) { - if (!filtered[i]) { - if (i != new_index) - hw_threads[new_index] = hw_threads[i]; - new_index++; - } else { -#if KMP_AFFINITY_SUPPORTED - KMP_CPU_CLR(hw_threads[i].os_id, __kmp_affin_fullMask); -#endif - __kmp_avail_proc--; - } - } - - KMP_DEBUG_ASSERT(new_index <= num_hw_threads); - num_hw_threads = new_index; - - // Post hardware subset canonicalization - _gather_enumeration_information(); - _discover_uniformity(); - _set_globals(); - _set_last_level_cache(); - __kmp_free(filtered); + restrict_to_mask(filtered_mask); return true; } -bool kmp_topology_t::is_close(int hwt1, int hwt2, int hw_level) const { +bool kmp_topology_t::is_close(int hwt1, int hwt2, + const kmp_affinity_t &stgs) const { + int hw_level = stgs.gran_levels; if (hw_level >= depth) return true; bool retval = true; const kmp_hw_thread_t &t1 = hw_threads[hwt1]; const kmp_hw_thread_t &t2 = hw_threads[hwt2]; + if (stgs.flags.core_types_gran) + return t1.attrs.get_core_type() == t2.attrs.get_core_type(); + if (stgs.flags.core_effs_gran) + return t1.attrs.get_core_eff() == t2.attrs.get_core_eff(); for (int i = 0; i < (depth - hw_level); ++i) { if (t1.ids[i] != t2.ids[i]) return false; @@ -1272,8 +1366,6 @@ //////////////////////////////////////////////////////////////////////////////// -#if KMP_AFFINITY_SUPPORTED - bool KMPAffinity::picked_api = false; void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); } @@ -3353,17 +3445,25 @@ // Create and return a table of affinity masks, indexed by OS thread ID. // This routine handles OR'ing together all the affinity masks of threads // that are sufficiently close, if granularity > fine. +template static void __kmp_create_os_id_masks(unsigned *numUnique, - kmp_affinity_t &affinity) { + kmp_affinity_t &affinity, + FindNextFunctionType find_next) { // First form a table of affinity masks in order of OS thread id. int maxOsId; int i; int numAddrs = __kmp_topology->get_num_hw_threads(); int depth = __kmp_topology->get_depth(); - const char *env_var = affinity.env_var; + const char *env_var = __kmp_get_affinity_env_var(affinity); KMP_ASSERT(numAddrs); KMP_ASSERT(depth); + i = find_next(-1); + // If could not find HW thread location with attributes, then return and + // fallback to increment find_next and disregard core attributes. + if (i >= numAddrs) + return; + maxOsId = 0; for (i = numAddrs - 1;; --i) { int osId = __kmp_topology->at(i).os_id; @@ -3393,19 +3493,22 @@ kmp_affin_mask_t *sum; KMP_CPU_ALLOC_ON_STACK(sum); KMP_CPU_ZERO(sum); - KMP_CPU_SET(__kmp_topology->at(0).os_id, sum); - for (i = 1; i < numAddrs; i++) { + + i = j = leader = find_next(-1); + KMP_CPU_SET(__kmp_topology->at(i).os_id, sum); + kmp_full_mask_modifier_t full_mask; + for (i = find_next(i); i < numAddrs; i = find_next(i)) { // If this thread is sufficiently close to the leader (within the // granularity setting), then set the bit for this os thread in the // affinity mask for this group, and go on to the next thread. - if (__kmp_topology->is_close(leader, i, affinity.gran_levels)) { + if (__kmp_topology->is_close(leader, i, affinity)) { KMP_CPU_SET(__kmp_topology->at(i).os_id, sum); continue; } // For every thread in this group, copy the mask to the thread's entry in // the OS Id mask table. Mark the first address as a leader. - for (; j < i; j++) { + for (; j < i; j = find_next(j)) { int osId = __kmp_topology->at(j).os_id; KMP_DEBUG_ASSERT(osId <= maxOsId); kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId); @@ -3416,22 +3519,29 @@ // Start a new mask. leader = i; + full_mask.include(sum); KMP_CPU_ZERO(sum); KMP_CPU_SET(__kmp_topology->at(i).os_id, sum); } // For every thread in last group, copy the mask to the thread's // entry in the OS Id mask table. - for (; j < i; j++) { + for (; j < i; j = find_next(j)) { int osId = __kmp_topology->at(j).os_id; KMP_DEBUG_ASSERT(osId <= maxOsId); kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId); KMP_CPU_COPY(mask, sum); __kmp_topology->at(j).leader = (j == leader); } + full_mask.include(sum); unique++; KMP_CPU_FREE_FROM_STACK(sum); + // See if the OS Id mask table further restricts or changes the full mask + if (full_mask.restrict_to_mask() && affinity.flags.verbose) { + __kmp_topology->print(env_var); + } + *numUnique = unique; } @@ -4134,8 +4244,11 @@ } // Create the OS proc to hardware thread map - for (int hw_thread = 0; hw_thread < num_hw_threads; ++hw_thread) - __kmp_osid_to_hwthread_map[__kmp_topology->at(hw_thread).os_id] = hw_thread; + for (int hw_thread = 0; hw_thread < num_hw_threads; ++hw_thread) { + int os_id = __kmp_topology->at(hw_thread).os_id; + if (KMP_CPU_ISSET(os_id, __kmp_affin_fullMask)) + __kmp_osid_to_hwthread_map[os_id] = hw_thread; + } for (unsigned i = 0; i < affinity.num_masks; ++i) { kmp_affinity_ids_t &ids = affinity.ids[i]; @@ -4145,16 +4258,26 @@ } } +// Called when __kmp_topology is ready +static void __kmp_aux_affinity_initialize_other_data(kmp_affinity_t &affinity) { + // Initialize data dependent on __kmp_topology + if (__kmp_topology) { + machine_hierarchy.init(__kmp_topology->get_num_hw_threads()); + __kmp_affinity_get_topology_info(affinity); + } +} + // Create a one element mask array (set of places) which only contains the // initial process's affinity mask static void __kmp_create_affinity_none_places(kmp_affinity_t &affinity) { KMP_ASSERT(__kmp_affin_fullMask != NULL); KMP_ASSERT(affinity.type == affinity_none); + KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads()); affinity.num_masks = 1; KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks); kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, 0); KMP_CPU_COPY(dest, __kmp_affin_fullMask); - __kmp_affinity_get_topology_info(affinity); + __kmp_aux_affinity_initialize_other_data(affinity); } static void __kmp_aux_affinity_initialize_masks(kmp_affinity_t &affinity) { @@ -4383,13 +4506,6 @@ if (verbose) __kmp_topology->print(env_var); bool filtered = __kmp_topology->filter_hw_subset(); - if (filtered) { -#if KMP_OS_WINDOWS - // Copy filtered full mask if topology has single processor group - if (__kmp_num_proc_groups <= 1) -#endif - __kmp_affin_origMask->copy(__kmp_affin_fullMask); - } if (filtered && verbose) __kmp_topology->print("KMP_HW_SUBSET"); return success; @@ -4398,7 +4514,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) { bool is_regular_affinity = (&affinity == &__kmp_affinity); bool is_hidden_helper_affinity = (&affinity == &__kmp_hh_affinity); - const char *env_var = affinity.env_var; + const char *env_var = __kmp_get_affinity_env_var(affinity); if (affinity.flags.initialized) { KMP_ASSERT(__kmp_affin_fullMask != NULL); @@ -4437,7 +4553,36 @@ // Create the table of masks, indexed by thread Id. unsigned numUnique; - __kmp_create_os_id_masks(&numUnique, affinity); + int numAddrs = __kmp_topology->get_num_hw_threads(); + // If OMP_PLACES=cores: specified, then attempt + // to make OS Id mask table using those attributes + if (affinity.core_attr_gran.valid) { + __kmp_create_os_id_masks(&numUnique, affinity, [&](int idx) { + KMP_ASSERT(idx >= -1); + for (int i = idx + 1; i < numAddrs; ++i) + if (__kmp_topology->at(i).attrs.contains(affinity.core_attr_gran)) + return i; + return numAddrs; + }); + if (!affinity.os_id_masks) { + const char *core_attribute; + if (affinity.core_attr_gran.core_eff != kmp_hw_attr_t::UNKNOWN_CORE_EFF) + core_attribute = "core_efficiency"; + else + core_attribute = "core_type"; + KMP_AFF_WARNING(affinity, AffIgnoringNotAvailable, env_var, + core_attribute, + __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true)) + } + } + // If core attributes did not work, or none were specified, + // then make OS Id mask table using typical incremental way. + if (!affinity.os_id_masks) { + __kmp_create_os_id_masks(&numUnique, affinity, [](int idx) { + KMP_ASSERT(idx >= -1); + return idx + 1; + }); + } if (affinity.gran_levels == 0) { KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc); } @@ -4578,6 +4723,7 @@ int i; unsigned j; int num_hw_threads = __kmp_topology->get_num_hw_threads(); + kmp_full_mask_modifier_t full_mask; for (i = 0, j = 0; i < num_hw_threads; i++) { if ((!affinity.flags.dups) && (!__kmp_topology->at(i).leader)) { continue; @@ -4588,11 +4734,16 @@ kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, j); KMP_ASSERT(KMP_CPU_ISSET(osId, src)); KMP_CPU_COPY(dest, src); + full_mask.include(src); if (++j >= affinity.num_masks) { break; } } KMP_DEBUG_ASSERT(j == affinity.num_masks); + // See if the places list further restricts or changes the full mask + if (full_mask.restrict_to_mask() && affinity.flags.verbose) { + __kmp_topology->print(env_var); + } } // Sort the topology back using ids __kmp_topology->sort_ids(); @@ -4601,7 +4752,7 @@ default: KMP_ASSERT2(0, "Unexpected affinity setting"); } - __kmp_affinity_get_topology_info(affinity); + __kmp_aux_affinity_initialize_other_data(affinity); affinity.flags.initialized = TRUE; } @@ -4722,7 +4873,7 @@ affinity = &__kmp_hh_affinity; else affinity = &__kmp_affinity; - env_var = affinity->env_var; + env_var = __kmp_get_affinity_env_var(*affinity, /*for_binding=*/true); if (KMP_AFFINITY_NON_PROC_BIND || is_hidden_helper) { if ((affinity->type == affinity_none) || diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -2005,6 +2005,21 @@ // ----------------------------------------------------------------------------- // KMP_AFFINITY, GOMP_CPU_AFFINITY, KMP_TOPOLOGY_METHOD +static inline const char * +__kmp_hw_get_core_type_keyword(kmp_hw_core_type_t type) { + switch (type) { + case KMP_HW_CORE_TYPE_UNKNOWN: + return "unknown"; +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + case KMP_HW_CORE_TYPE_ATOM: + return "intel_atom"; + case KMP_HW_CORE_TYPE_CORE: + return "intel_core"; +#endif + } + return "unknown"; +} + #if KMP_AFFINITY_SUPPORTED // Parse the proc id list. Return TRUE if successful, FALSE otherwise. static int __kmp_parse_affinity_proc_id_list(const char *var, const char *env, @@ -2359,14 +2374,32 @@ buf = next; - // Try any hardware topology type for granularity - KMP_FOREACH_HW_TYPE(type) { - const char *name = __kmp_hw_get_keyword(type); - if (__kmp_match_str(name, buf, CCAST(const char **, &next))) { - set_gran(type, -1); - buf = next; - set = true; - break; + // Have to try core_type and core_efficiency matches first since "core" + // will register as core granularity with "extra chars" + if (__kmp_match_str("core_type", buf, CCAST(const char **, &next))) { + set_gran(KMP_HW_CORE, -1); + out_affinity->flags.core_types_gran = 1; + buf = next; + set = true; + } else if (__kmp_match_str("core_efficiency", buf, + CCAST(const char **, &next)) || + __kmp_match_str("core_eff", buf, + CCAST(const char **, &next))) { + set_gran(KMP_HW_CORE, -1); + out_affinity->flags.core_effs_gran = 1; + buf = next; + set = true; + } + if (!set) { + // Try any hardware topology type for granularity + KMP_FOREACH_HW_TYPE(type) { + const char *name = __kmp_hw_get_keyword(type); + if (__kmp_match_str(name, buf, CCAST(const char **, &next))) { + set_gran(type, -1); + buf = next; + set = true; + break; + } } } if (!set) { @@ -2626,8 +2659,15 @@ __kmp_str_buf_print(buffer, "%s,", "noreset"); } } - __kmp_str_buf_print(buffer, "granularity=%s,", - __kmp_hw_get_keyword(affinity.gran, false)); + __kmp_str_buf_print(buffer, "granularity="); + if (affinity.flags.core_types_gran) + __kmp_str_buf_print(buffer, "core_type,"); + else if (affinity.flags.core_effs_gran) { + __kmp_str_buf_print(buffer, "core_eff,"); + } else { + __kmp_str_buf_print( + buffer, "%s,", __kmp_hw_get_keyword(affinity.gran, /*plural=*/false)); + } } if (!KMP_AFFINITY_CAPABLE()) { __kmp_str_buf_print(buffer, "%s", "disabled"); @@ -2745,11 +2785,7 @@ signed := - signed -----------------------------------------------------------------------------*/ -// Warning to issue for syntax error during parsing of OMP_PLACES -static inline void __kmp_omp_places_syntax_warn(const char *var) { - KMP_WARNING(SyntaxErrorUsing, var, "\"cores\""); -} - +// Return TRUE if successful parse, FALSE otherwise static int __kmp_parse_subplace_list(const char *var, const char **scan) { const char *next; @@ -2761,7 +2797,6 @@ // SKIP_WS(*scan); if ((**scan < '0') || (**scan > '9')) { - __kmp_omp_places_syntax_warn(var); return FALSE; } next = *scan; @@ -2780,7 +2815,6 @@ continue; } if (**scan != ':') { - __kmp_omp_places_syntax_warn(var); return FALSE; } (*scan)++; // skip ':' @@ -2788,7 +2822,6 @@ // Read count parameter SKIP_WS(*scan); if ((**scan < '0') || (**scan > '9')) { - __kmp_omp_places_syntax_warn(var); return FALSE; } next = *scan; @@ -2807,7 +2840,6 @@ continue; } if (**scan != ':') { - __kmp_omp_places_syntax_warn(var); return FALSE; } (*scan)++; // skip ':' @@ -2829,7 +2861,6 @@ } SKIP_WS(*scan); if ((**scan < '0') || (**scan > '9')) { - __kmp_omp_places_syntax_warn(var); return FALSE; } next = *scan; @@ -2848,13 +2879,12 @@ (*scan)++; // skip ',' continue; } - - __kmp_omp_places_syntax_warn(var); return FALSE; } return TRUE; } +// Return TRUE if successful parse, FALSE otherwise static int __kmp_parse_place(const char *var, const char **scan) { const char *next; @@ -2866,7 +2896,6 @@ return FALSE; } if (**scan != '}') { - __kmp_omp_places_syntax_warn(var); return FALSE; } (*scan)++; // skip '}' @@ -2880,12 +2909,12 @@ KMP_ASSERT(proc >= 0); *scan = next; } else { - __kmp_omp_places_syntax_warn(var); return FALSE; } return TRUE; } +// Return TRUE if successful parse, FALSE otherwise static int __kmp_parse_place_list(const char *var, const char *env, char **place_list) { const char *scan = env; @@ -2908,7 +2937,6 @@ continue; } if (*scan != ':') { - __kmp_omp_places_syntax_warn(var); return FALSE; } scan++; // skip ':' @@ -2916,7 +2944,6 @@ // Read count parameter SKIP_WS(scan); if ((*scan < '0') || (*scan > '9')) { - __kmp_omp_places_syntax_warn(var); return FALSE; } next = scan; @@ -2935,7 +2962,6 @@ continue; } if (*scan != ':') { - __kmp_omp_places_syntax_warn(var); return FALSE; } scan++; // skip ':' @@ -2957,7 +2983,6 @@ } SKIP_WS(scan); if ((*scan < '0') || (*scan > '9')) { - __kmp_omp_places_syntax_warn(var); return FALSE; } next = scan; @@ -2977,7 +3002,6 @@ continue; } - __kmp_omp_places_syntax_warn(var); return FALSE; } @@ -2991,6 +3015,22 @@ return TRUE; } +static inline void __kmp_places_set(enum affinity_type type, kmp_hw_t kind) { + __kmp_affinity.type = type; + __kmp_affinity.gran = kind; + __kmp_affinity.flags.dups = FALSE; + __kmp_affinity.flags.omp_places = TRUE; +} + +static void __kmp_places_syntax_error_fallback(char const *name, + kmp_hw_t kind) { + const char *str = __kmp_hw_get_catalog_string(kind, /*plural=*/true); + KMP_WARNING(SyntaxErrorUsing, name, str); + __kmp_places_set(affinity_compact, kind); + if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) + __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; +} + static void __kmp_stg_parse_places(char const *name, char const *value, void *data) { struct kmp_place_t { @@ -3001,7 +3041,6 @@ bool set = false; const char *scan = value; const char *next = scan; - const char *kind = "\"threads\""; kmp_place_t std_places[] = {{"threads", KMP_HW_THREAD}, {"cores", KMP_HW_CORE}, {"numa_domains", KMP_HW_NUMA}, @@ -3020,10 +3059,54 @@ const kmp_place_t &place = std_places[i]; if (__kmp_match_str(place.name, scan, &next)) { scan = next; - __kmp_affinity.type = affinity_compact; - __kmp_affinity.gran = place.type; - __kmp_affinity.flags.dups = FALSE; + __kmp_places_set(affinity_compact, place.type); set = true; + // Parse core attribute if it exists + if (KMP_HW_MAX_NUM_CORE_TYPES > 1) { + SKIP_WS(scan); + if (*scan == ':') { + if (place.type != KMP_HW_CORE) { + __kmp_places_syntax_error_fallback(name, place.type); + return; + } + scan++; // skip ':' + SKIP_WS(scan); +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + if (__kmp_match_str("intel_core", scan, &next)) { + __kmp_affinity.core_attr_gran.core_type = KMP_HW_CORE_TYPE_CORE; + __kmp_affinity.core_attr_gran.valid = 1; + scan = next; + } else if (__kmp_match_str("intel_atom", scan, &next)) { + __kmp_affinity.core_attr_gran.core_type = KMP_HW_CORE_TYPE_ATOM; + __kmp_affinity.core_attr_gran.valid = 1; + scan = next; + } else +#endif + if (__kmp_match_str("eff", scan, &next)) { + int eff; + if (!isdigit(*next)) { + __kmp_places_syntax_error_fallback(name, place.type); + return; + } + scan = next; + SKIP_DIGITS(next); + eff = __kmp_str_to_int(scan, *next); + if (eff < 0) { + __kmp_places_syntax_error_fallback(name, place.type); + return; + } + if (eff >= KMP_HW_MAX_NUM_CORE_EFFS) + eff = KMP_HW_MAX_NUM_CORE_EFFS - 1; + __kmp_affinity.core_attr_gran.core_eff = eff; + __kmp_affinity.core_attr_gran.valid = 1; + scan = next; + } + if (!__kmp_affinity.core_attr_gran.valid) { + __kmp_places_syntax_error_fallback(name, place.type); + return; + } + } + } break; } } @@ -3035,36 +3118,56 @@ continue; if (__kmp_match_str(name, scan, &next)) { scan = next; - __kmp_affinity.type = affinity_compact; - __kmp_affinity.gran = type; - __kmp_affinity.flags.dups = FALSE; + __kmp_places_set(affinity_compact, type); set = true; break; } } } + // Implementation choices for OMP_PLACES based on core attributes + if (!set) { + if (__kmp_match_str("core_types", scan, &next)) { + scan = next; + if (*scan != '\0') { + KMP_WARNING(ParseExtraCharsWarn, name, scan); + } + __kmp_places_set(affinity_compact, KMP_HW_CORE); + __kmp_affinity.flags.core_types_gran = 1; + set = true; + } else if (__kmp_match_str("core_effs", scan, &next) || + __kmp_match_str("core_efficiencies", scan, &next)) { + scan = next; + if (*scan != '\0') { + KMP_WARNING(ParseExtraCharsWarn, name, scan); + } + __kmp_places_set(affinity_compact, KMP_HW_CORE); + __kmp_affinity.flags.core_effs_gran = 1; + set = true; + } + } + // Explicit place list if (!set) { if (__kmp_affinity.proclist != NULL) { KMP_INTERNAL_FREE((void *)__kmp_affinity.proclist); __kmp_affinity.proclist = NULL; } if (__kmp_parse_place_list(name, value, &__kmp_affinity.proclist)) { - __kmp_affinity.type = affinity_explicit; - __kmp_affinity.gran = KMP_HW_THREAD; - __kmp_affinity.flags.dups = FALSE; + __kmp_places_set(affinity_explicit, KMP_HW_THREAD); } else { // Syntax error fallback - __kmp_affinity.type = affinity_compact; - __kmp_affinity.gran = KMP_HW_CORE; - __kmp_affinity.flags.dups = FALSE; + __kmp_places_syntax_error_fallback(name, KMP_HW_CORE); } if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) { __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; } return; } + + kmp_hw_t gran = __kmp_affinity.gran; if (__kmp_affinity.gran != KMP_HW_UNKNOWN) { - kind = __kmp_hw_get_keyword(__kmp_affinity.gran); + gran = __kmp_affinity.gran; + } else { + gran = KMP_HW_CORE; } if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) { @@ -3078,7 +3181,7 @@ // Parse option count parameter in parentheses if (*scan != '(') { - KMP_WARNING(SyntaxErrorUsing, name, kind); + __kmp_places_syntax_error_fallback(name, gran); return; } scan++; // skip '(' @@ -3092,7 +3195,7 @@ SKIP_WS(scan); if (*scan != ')') { - KMP_WARNING(SyntaxErrorUsing, name, kind); + __kmp_places_syntax_error_fallback(name, gran); return; } scan++; // skip ')' @@ -3135,12 +3238,37 @@ num = 0; } if (gran != KMP_HW_UNKNOWN) { + // If core_types or core_effs, just print and return + if (__kmp_affinity.flags.core_types_gran) { + __kmp_str_buf_print(buffer, "='%s'\n", "core_types"); + return; + } + if (__kmp_affinity.flags.core_effs_gran) { + __kmp_str_buf_print(buffer, "='%s'\n", "core_effs"); + return; + } + + // threads, cores, sockets, cores:, etc. const char *name = __kmp_hw_get_keyword(gran, true); - if (num > 0) { - __kmp_str_buf_print(buffer, "='%s(%d)'\n", name, num); - } else { - __kmp_str_buf_print(buffer, "='%s'\n", name); + __kmp_str_buf_print(buffer, "='%s", name); + + // Add core attributes if it exists + if (__kmp_affinity.core_attr_gran.valid) { + kmp_hw_core_type_t ct = + (kmp_hw_core_type_t)__kmp_affinity.core_attr_gran.core_type; + int eff = __kmp_affinity.core_attr_gran.core_eff; + if (ct != KMP_HW_CORE_TYPE_UNKNOWN) { + const char *ct_name = __kmp_hw_get_core_type_keyword(ct); + __kmp_str_buf_print(buffer, ":%s", name, ct_name); + } else if (eff >= 0 && eff < KMP_HW_MAX_NUM_CORE_EFFS) { + __kmp_str_buf_print(buffer, ":eff%d", name, eff); + } } + + // Add the '(#)' part if it exists + if (num > 0) + __kmp_str_buf_print(buffer, "(%d)", num); + __kmp_str_buf_print(buffer, "'\n"); } else { __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); } @@ -5139,21 +5267,6 @@ return; } -static inline const char * -__kmp_hw_get_core_type_keyword(kmp_hw_core_type_t type) { - switch (type) { - case KMP_HW_CORE_TYPE_UNKNOWN: - return "unknown"; -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - case KMP_HW_CORE_TYPE_ATOM: - return "intel_atom"; - case KMP_HW_CORE_TYPE_CORE: - return "intel_core"; -#endif - } - return "unknown"; -} - static void __kmp_stg_print_hw_subset(kmp_str_buf_t *buffer, char const *name, void *data) { kmp_str_buf_t buf; diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -1242,6 +1242,7 @@ *affinity = KMP_AFFINITY_INIT(affinity->env_var); __kmp_affin_fullMask = nullptr; __kmp_affin_origMask = nullptr; + __kmp_topology = nullptr; #endif // KMP_AFFINITY_SUPPORTED #if KMP_USE_MONITOR diff --git a/openmp/runtime/test/affinity/omp-places-invalid-syntax.c b/openmp/runtime/test/affinity/omp-places-invalid-syntax.c --- a/openmp/runtime/test/affinity/omp-places-invalid-syntax.c +++ b/openmp/runtime/test/affinity/omp-places-invalid-syntax.c @@ -1,7 +1,20 @@ -// RUN: %libomp-compile && env KMP_SETTINGS=1 OMP_PLACES=invalid %libomp-run 2>&1 | FileCheck %s -// CHECK-DAG: Effective settings -// CHECK: OMP_PLACES= -// CHECK-SAME: cores +// RUN: %libomp-compile +// RUN: env KMP_SETTINGS=1 OMP_PLACES=invalid %libomp-run 2>&1 | FileCheck --check-prefix=INVALID %s +// RUN: env KMP_SETTINGS=1 OMP_PLACES='sockets(' %libomp-run 2>&1 | FileCheck --check-prefix=SOCKETS %s +// RUN: env KMP_SETTINGS=1 OMP_PLACES='threads()' %libomp-run 2>&1 | FileCheck --check-prefix=THREADS %s +// +// INVALID-DAG: Effective settings +// INVALID: OMP_PLACES= +// INVALID-SAME: cores +// +// SOCKETS-DAG: Effective settings +// SOCKETS: OMP_PLACES= +// SOCKETS-SAME: sockets +// +// THREADS-DAG: Effective settings +// THREADS: OMP_PLACES= +// THREADS-SAME: threads +// // REQUIRES: affinity #include "omp_testsuite.h"