diff --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt --- a/openmp/runtime/src/i18n/en_US.txt +++ b/openmp/runtime/src/i18n/en_US.txt @@ -360,6 +360,7 @@ OmpNoAllocator "Allocator %1$s is not available, will use default allocator." TopologyGeneric "%1$s: %2$s (%3$d total cores)" AffGranularityBad "%1$s: granularity setting: %2$s does not exist in topology. Using granularity=%3$s instead." +TopologyHybrid "%1$s: hybrid core type detected: %2$d %3$s cores." # --- OpenMP errors detected at runtime --- # diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -1222,7 +1222,8 @@ typedef struct kmp_cpuinfo_flags_t { unsigned sse2 : 1; // 0 if SSE2 instructions are not supported, 1 otherwise. unsigned rtm : 1; // 0 if RTM instructions are not supported, 1 otherwise. - unsigned reserved : 30; // Ensure size of 32 bits + unsigned hybrid : 1; + unsigned reserved : 29; // Ensure size of 32 bits } kmp_cpuinfo_flags_t; typedef struct kmp_cpuinfo { @@ -2984,6 +2985,9 @@ #if KMP_ARCH_X86 || KMP_ARCH_X86_64 extern kmp_cpuinfo_t __kmp_cpuinfo; +static inline bool __kmp_is_hybrid_cpu() { return __kmp_cpuinfo.flags.hybrid; } +#else +static inline bool __kmp_is_hybrid_cpu() { return false; } #endif extern volatile int __kmp_init_serial; diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h --- a/openmp/runtime/src/kmp_affinity.h +++ b/openmp/runtime/src/kmp_affinity.h @@ -598,6 +598,17 @@ #endif /* KMP_OS_WINDOWS */ #endif /* KMP_AFFINITY_SUPPORTED */ +typedef enum kmp_hw_core_type_t { + KMP_HW_CORE_TYPE_UNKNOWN = 0x0, +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + KMP_HW_CORE_TYPE_ATOM = 0x20, + KMP_HW_CORE_TYPE_CORE = 0x40, + KMP_HW_MAX_NUM_CORE_TYPES = 3, +#else + KMP_HW_MAX_NUM_CORE_TYPES = 1, +#endif +} kmp_hw_core_type_t; + class kmp_hw_thread_t { public: static const int UNKNOWN_ID = -1; @@ -607,11 +618,14 @@ int sub_ids[KMP_HW_LAST]; bool leader; int os_id; + kmp_hw_core_type_t core_type; + void print() const; void clear() { for (int i = 0; i < (int)KMP_HW_LAST; ++i) ids[i] = UNKNOWN_ID; leader = false; + core_type = KMP_HW_CORE_TYPE_UNKNOWN; } }; @@ -637,6 +651,11 @@ // Storage containing the absolute number of each topology layer int *count; + // Storage containing the core types and the number of + // each core type for hybrid processors + kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES]; + int core_types_count[KMP_HW_MAX_NUM_CORE_TYPES]; + // The hardware threads array // hw_threads is num_hw_threads long // Each hw_thread's ids and sub_ids are depth deep @@ -675,6 +694,20 @@ // Set the last level cache equivalent type void _set_last_level_cache(); + // Increments the number of cores of type 'type' + void _increment_core_type(kmp_hw_core_type_t type) { + for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) { + if (core_types[i] == KMP_HW_CORE_TYPE_UNKNOWN) { + core_types[i] = type; + core_types_count[i] = 1; + break; + } else if (core_types[i] == type) { + core_types_count[i]++; + break; + } + } + } + public: // Force use of allocate()/deallocate() kmp_topology_t() = delete; diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -123,6 +123,20 @@ return ((plural) ? "unknowns" : "unknown"); } +const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type) { + switch (type) { + case KMP_HW_CORE_TYPE_UNKNOWN: + return "unknown"; +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + case KMP_HW_CORE_TYPE_ATOM: + return "Intel Atom(R) processor"; + case KMP_HW_CORE_TYPE_CORE: + return "Intel(R) Core(TM) processor"; +#endif + } + return "unknown"; +} + //////////////////////////////////////////////////////////////////////////////// // kmp_hw_thread_t methods int kmp_hw_thread_t::compare_ids(const void *a, const void *b) { @@ -174,6 +188,9 @@ for (int i = 0; i < depth; ++i) { printf("%4d ", ids[i]); } + if (core_type != KMP_HW_CORE_TYPE_UNKNOWN) { + printf(" (%s)", __kmp_hw_get_core_type_string(core_type)); + } printf("\n"); } @@ -298,6 +315,7 @@ void kmp_topology_t::_gather_enumeration_information() { int previous_id[KMP_HW_LAST]; int max[KMP_HW_LAST]; + int previous_core_id = kmp_hw_thread_t::UNKNOWN_ID; for (int i = 0; i < depth; ++i) { previous_id[i] = kmp_hw_thread_t::UNKNOWN_ID; @@ -305,6 +323,12 @@ count[i] = 0; ratio[i] = 0; } + if (__kmp_is_hybrid_cpu()) { + for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) { + core_types_count[i] = 0; + core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN; + } + } for (int i = 0; i < num_hw_threads; ++i) { kmp_hw_thread_t &hw_thread = hw_threads[i]; for (int layer = 0; layer < depth; ++layer) { @@ -326,6 +350,15 @@ for (int layer = 0; layer < depth; ++layer) { previous_id[layer] = hw_thread.ids[layer]; } + // Figure out the number of each core type for hybrid CPUs + if (__kmp_is_hybrid_cpu()) { + int core_level = get_level(KMP_HW_CORE); + if (core_level != -1) { + if (hw_thread.ids[core_level] != previous_core_id) + _increment_core_type(hw_thread.core_type); + previous_core_id = hw_thread.ids[core_level]; + } + } } for (int layer = 0; layer < depth; ++layer) { if (max[layer] > ratio[layer]) @@ -478,6 +511,19 @@ } printf("\n"); + printf("* core_types:\n"); + for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) { + if (core_types[i] != KMP_HW_CORE_TYPE_UNKNOWN) { + printf(" %d %s core%c\n", core_types_count[i], + __kmp_hw_get_core_type_string(core_types[i]), + ((core_types_count[i] > 1) ? 's' : ' ')); + } else { + if (i == 0) + printf("No hybrid information available\n"); + break; + } + } + printf("* equivalent map:\n"); KMP_FOREACH_HW_TYPE(i) { const char *key = __kmp_hw_get_keyword(i); @@ -571,6 +617,15 @@ } KMP_INFORM(TopologyGeneric, env_var, buf.str, ncores); + if (__kmp_is_hybrid_cpu()) { + for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) { + if (core_types[i] == KMP_HW_CORE_TYPE_UNKNOWN) + break; + KMP_INFORM(TopologyHybrid, env_var, core_types_count[i], + __kmp_hw_get_core_type_string(core_types[i])); + } + } + if (num_hw_threads <= 0) { __kmp_str_buf_free(&buf); return; @@ -585,6 +640,9 @@ __kmp_str_buf_print(&buf, "%s ", __kmp_hw_get_catalog_string(type)); __kmp_str_buf_print(&buf, "%d ", hw_threads[i].ids[level]); } + if (__kmp_is_hybrid_cpu()) + __kmp_str_buf_print( + &buf, "(%s)", __kmp_hw_get_core_type_string(hw_threads[i].core_type)); KMP_INFORM(OSProcMapToPack, env_var, hw_threads[i].os_id, buf.str); } @@ -1782,6 +1840,16 @@ return true; } +// Hybrid cpu detection using CPUID.1A +// Thread should be pinned to processor already +static void __kmp_get_hybrid_info(kmp_hw_core_type_t *type, + unsigned *native_model_id) { + kmp_cpuid buf; + __kmp_x86_cpuid(0x1a, 0, &buf); + *type = (kmp_hw_core_type_t)__kmp_extract_bits<24, 31>(buf.eax); + *native_model_id = __kmp_extract_bits<0, 23>(buf.eax); +} + // Intel(R) microarchitecture code name Nehalem, Dunnington and later // architectures support a newer interface for specifying the x2APIC Ids, // based on CPUID.B or CPUID.1F @@ -2051,6 +2119,13 @@ hw_thread.ids[idx] >>= my_levels[j - 1].mask_width; } } + // Hybrid information + if (__kmp_is_hybrid_cpu() && highest_leaf >= 0x1a) { + kmp_hw_core_type_t type; + unsigned native_model_id; + __kmp_get_hybrid_info(&type, &native_model_id); + hw_thread.core_type = type; + } hw_thread_index++; } KMP_ASSERT(hw_thread_index > 0); diff --git a/openmp/runtime/src/kmp_utility.cpp b/openmp/runtime/src/kmp_utility.cpp --- a/openmp/runtime/src/kmp_utility.cpp +++ b/openmp/runtime/src/kmp_utility.cpp @@ -248,13 +248,19 @@ } #endif p->flags.rtm = 0; + p->flags.hybrid = 0; if (max_arg > 7) { /* RTM bit CPUID.07:EBX, bit 11 */ + /* HYRBID bit CPUID.07:EDX, bit 15 */ __kmp_x86_cpuid(7, 0, &buf); p->flags.rtm = (buf.ebx >> 11) & 1; + p->flags.hybrid = (buf.edx >> 15) & 1; if (p->flags.rtm) { KA_TRACE(trace_level, (" RTM")); } + if (p->flags.hybrid) { + KA_TRACE(trace_level, (" HYBRID")); + } } }