Index: runtime/src/kmp.h =================================================================== --- runtime/src/kmp.h +++ runtime/src/kmp.h @@ -392,6 +392,16 @@ }; #endif /* KMP_OS_LINUX */ +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) +enum mic_type { + non_mic, + mic1, + mic2, + mic3, + dummy +}; +#endif + /* ------------------------------------------------------------------------ */ /* -- fast reduction stuff ------------------------------------------------ */ @@ -972,12 +982,12 @@ #endif #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -struct kmp_cpuid { +typedef struct kmp_cpuid { kmp_uint32 eax; kmp_uint32 ebx; kmp_uint32 ecx; kmp_uint32 edx; -}; +} kmp_cpuid_t; extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); # if KMP_ARCH_X86 extern void __kmp_x86_pause( void ); @@ -2614,6 +2624,10 @@ extern int __kmp_clock_function_param; # endif /* KMP_OS_LINUX */ +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) +extern enum mic_type __kmp_mic_type; +#endif + # ifdef USE_LOAD_BALANCE extern double __kmp_load_balance_interval; /* Interval for the load balance algorithm */ # endif /* USE_LOAD_BALANCE */ Index: runtime/src/kmp_global.c =================================================================== --- runtime/src/kmp_global.c +++ runtime/src/kmp_global.c @@ -212,6 +212,10 @@ int __kmp_clock_function_param; #endif /* KMP_OS_LINUX */ +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) +enum mic_type __kmp_mic_type = non_mic; +#endif + #if KMP_AFFINITY_SUPPORTED # if KMP_GROUP_AFFINITY Index: runtime/src/kmp_runtime.c =================================================================== --- runtime/src/kmp_runtime.c +++ runtime/src/kmp_runtime.c @@ -5859,6 +5859,28 @@ // End of Library registration stuff. // ------------------------------------------------------------------------------------------------- +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + +static void __kmp_check_mic_type() +{ + kmp_cpuid_t cpuid_state = {0}; + kmp_cpuid_t * cs_p = &cpuid_state; + cs_p->eax=1; + cs_p->ecx=0; + __asm__ __volatile__("cpuid" + : "+a" (cs_p->eax), "=b" (cs_p->ebx), "+c" (cs_p->ecx), "=d" (cs_p->edx)); + // We don't support mic1 at the moment + if( (cs_p->eax & 0xff0) == 0xB10 ) { + __kmp_mic_type = mic2; + } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) { + __kmp_mic_type = mic3; + } else { + __kmp_mic_type = non_mic; + } +} + +#endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */ + static void __kmp_do_serial_initialize( void ) { @@ -5923,6 +5945,10 @@ __kmp_runtime_initialize(); +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + __kmp_check_mic_type(); +#endif + // Some global variable initialization moved here from kmp_env_initialize() #ifdef KMP_DEBUG kmp_diag = 0; @@ -5979,17 +6005,21 @@ #undef kmp_reduction_barrier_release_bb #undef kmp_reduction_barrier_gather_bb #endif // KMP_FAST_REDUCTION_BARRIER - #if KMP_MIC - // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC - __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3; // plane gather - __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1; // forkjoin release - __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar; - __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar; +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + if( __kmp_mic_type != non_mic ) { + // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC + __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3; // plane gather + __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1; // forkjoin release + __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar; + __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar; + } #if KMP_FAST_REDUCTION_BARRIER - __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar; - __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar; + if( __kmp_mic_type != non_mic ) { + __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar; + __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar; + } +#endif #endif - #endif // From KMP_CHECKS initialization #ifdef KMP_DEBUG @@ -6993,6 +7023,8 @@ int team_size; + int teamsize_cutoff = 4; + KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 ) KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 ) @@ -7015,13 +7047,13 @@ #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN - #if KMP_MIC - #define REDUCTION_TEAMSIZE_CUTOFF 8 - #else // KMP_MIC - #define REDUCTION_TEAMSIZE_CUTOFF 4 - #endif // KMP_MIC +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + if( __kmp_mic_type != non_mic ) { + teamsize_cutoff = 8; + } +#endif if( tree_available ) { - if( team_size <= REDUCTION_TEAMSIZE_CUTOFF ) { + if( team_size <= teamsize_cutoff ) { if ( atomic_available ) { retval = atomic_reduce_block; } Index: runtime/src/kmp_settings.c =================================================================== --- runtime/src/kmp_settings.c +++ runtime/src/kmp_settings.c @@ -2314,17 +2314,20 @@ }; // if if ( __kmp_affinity_gran == affinity_gran_default ) { -# if KMP_MIC - if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { - KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "fine" ); - } - __kmp_affinity_gran = affinity_gran_fine; -# else - if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { - KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "core" ); +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + if( __kmp_mic_type != non_mic ) { + if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { + KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "fine" ); + } + __kmp_affinity_gran = affinity_gran_fine; + } else +#endif + { + if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { + KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "core" ); + } + __kmp_affinity_gran = affinity_gran_core; } - __kmp_affinity_gran = affinity_gran_core; -# endif /* KMP_MIC */ } } break; case affinity_scatter: @@ -3030,11 +3033,11 @@ // OMP_PROC_BIND => granularity=core,scatter elsewhere // __kmp_affinity_type = affinity_scatter; -# if KMP_MIC - __kmp_affinity_gran = affinity_gran_fine; -# else - __kmp_affinity_gran = affinity_gran_core; -# endif /* KMP_MIC */ + if( __kmp_mic_type != non_mic ) { + __kmp_affinity_gran = affinity_gran_fine; + } else { + __kmp_affinity_gran = affinity_gran_core; + } } else { __kmp_affinity_type = affinity_none; @@ -5208,25 +5211,36 @@ else # endif /* OMP_40_ENABLED */ if ( __kmp_affinity_type == affinity_default ) { -# if KMP_MIC - __kmp_affinity_type = affinity_scatter; -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif -# else - __kmp_affinity_type = affinity_none; -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; -# endif -# endif +#if OMP_40_ENABLED +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + if( __kmp_mic_type != non_mic ) { + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; + } else +#endif + { + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; + } +#endif /* OMP_40_ENABLED */ +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + if( __kmp_mic_type != non_mic ) { + __kmp_affinity_type = affinity_scatter; + } else +#endif + { + __kmp_affinity_type = affinity_none; + } + } if ( ( __kmp_affinity_gran == affinity_gran_default ) && ( __kmp_affinity_gran_levels < 0 ) ) { -# if KMP_MIC - __kmp_affinity_gran = affinity_gran_fine; -# else - __kmp_affinity_gran = affinity_gran_core; -# endif +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + if( __kmp_mic_type != non_mic ) { + __kmp_affinity_gran = affinity_gran_fine; + } else +#endif + { + __kmp_affinity_gran = affinity_gran_core; + } } if ( __kmp_affinity_top_method == affinity_top_method_default ) { __kmp_affinity_top_method = affinity_top_method_all;