Index: openmp/trunk/runtime/src/i18n/en_US.txt =================================================================== --- openmp/trunk/runtime/src/i18n/en_US.txt +++ openmp/trunk/runtime/src/i18n/en_US.txt @@ -388,7 +388,8 @@ EnvLockWarn "%1$s must be set prior to first OMP lock call or critical section; ignored." FutexNotSupported "futex system call not supported; %1$s=%2$s ignored." AffGranUsing "%1$s: granularity=%2$s will be used." -AffThrPlaceInvalid "%1$s: invalid value \"%2$s\", valid format is \"nC,mT[,kO]\"." +AffThrPlaceInvalid "%1$s: invalid value \"%2$s\", valid format is \"nS[,nO],nC[,nO],nT " + "(nSockets@offset, nCores@offset, nTthreads per core)\"." AffThrPlaceUnsupported "KMP_PLACE_THREADS ignored: unsupported architecture." AffThrPlaceManyCores "KMP_PLACE_THREADS ignored: too many cores requested." SyntaxErrorUsing "%1$s: syntax error, using %2$s." @@ -402,6 +403,7 @@ AffThrPlaceNonThreeLevel "KMP_PLACE_THREADS ignored: only three-level topology is supported." AffGranTopGroup "%1$s: granularity=%2$s is not supported with KMP_TOPOLOGY_METHOD=group. Using \"granularity=fine\"." AffGranGroupType "%1$s: granularity=group is not supported with KMP_AFFINITY=%2$s. Using \"granularity=core\"." +AffThrPlaceManySockets "KMP_PLACE_THREADS ignored: too many sockets requested." # -------------------------------------------------------------------------------------------------- Index: openmp/trunk/runtime/src/kmp.h =================================================================== --- openmp/trunk/runtime/src/kmp.h +++ openmp/trunk/runtime/src/kmp.h @@ -788,9 +788,11 @@ } kmp_cancel_kind_t; #endif // OMP_40_ENABLED +extern int __kmp_place_num_sockets; +extern int __kmp_place_socket_offset; extern int __kmp_place_num_cores; -extern int __kmp_place_num_threads_per_core; extern int __kmp_place_core_offset; +extern int __kmp_place_num_threads_per_core; /* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ @@ -3388,7 +3390,8 @@ KMP_EXPORT kmp_uint64 __kmpc_get_taskid(); KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid(); -KMP_EXPORT void __kmpc_place_threads(int,int,int); +// this function exported for testing of KMP_PLACE_THREADS functionality +KMP_EXPORT void __kmpc_place_threads(int,int,int,int,int); /* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ Index: openmp/trunk/runtime/src/kmp_affinity.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_affinity.cpp +++ openmp/trunk/runtime/src/kmp_affinity.cpp @@ -3055,12 +3055,18 @@ static void __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) { - if ( __kmp_place_num_cores == 0 ) { - if ( __kmp_place_num_threads_per_core == 0 ) { - return; // no cores limiting actions requested, exit - } + if (__kmp_place_num_sockets == 0 && + __kmp_place_num_cores == 0 && + __kmp_place_num_threads_per_core == 0 ) + return; // no topology limiting actions requested, exit + if (__kmp_place_num_sockets == 0) + __kmp_place_num_sockets = nPackages; // use all available sockets + if (__kmp_place_num_cores == 0) __kmp_place_num_cores = nCoresPerPkg; // use all available cores - } + if (__kmp_place_num_threads_per_core == 0 || + __kmp_place_num_threads_per_core > __kmp_nThreadsPerCore) + __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts + if ( !__kmp_affinity_uniform_topology() ) { KMP_WARNING( AffThrPlaceNonUniform ); return; // don't support non-uniform topology @@ -3069,8 +3075,9 @@ KMP_WARNING( AffThrPlaceNonThreeLevel ); return; // don't support not-3-level topology } - if ( __kmp_place_num_threads_per_core == 0 ) { - __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts + if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) { + KMP_WARNING(AffThrPlaceManySockets); + return; } if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) { KMP_WARNING( AffThrPlaceManyCores ); @@ -3078,23 +3085,31 @@ } AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) * - nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core); + __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core); + int i, j, k, n_old = 0, n_new = 0; - for ( i = 0; i < nPackages; ++i ) { - for ( j = 0; j < nCoresPerPkg; ++j ) { - if ( j < __kmp_place_core_offset || j >= __kmp_place_core_offset + __kmp_place_num_cores ) { - n_old += __kmp_nThreadsPerCore; // skip not-requested core - } else { - for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) { - if ( k < __kmp_place_num_threads_per_core ) { - newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location - n_new++; + for (i = 0; i < nPackages; ++i) + if (i < __kmp_place_socket_offset || + i >= __kmp_place_socket_offset + __kmp_place_num_sockets) + n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket + else + for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket + if (j < __kmp_place_core_offset || + j >= __kmp_place_core_offset + __kmp_place_num_cores) + n_old += __kmp_nThreadsPerCore; // skip not-requested core + else + for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core + if (k < __kmp_place_num_threads_per_core) { + newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data + n_new++; + } + n_old++; } - n_old++; - } - } - } - } + KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore); + KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores * + __kmp_place_num_threads_per_core); + + nPackages = __kmp_place_num_sockets; // correct nPackages nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore __kmp_avail_proc = n_new; // correct avail_proc Index: openmp/trunk/runtime/src/kmp_csupport.c =================================================================== --- openmp/trunk/runtime/src/kmp_csupport.c +++ openmp/trunk/runtime/src/kmp_csupport.c @@ -2855,14 +2855,16 @@ } // __kmpc_get_parent_taskid -void __kmpc_place_threads(int nC, int nT, int nO) +void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT) { if ( ! __kmp_init_serial ) { __kmp_serial_initialize(); } + __kmp_place_num_sockets = nS; + __kmp_place_socket_offset = sO; __kmp_place_num_cores = nC; + __kmp_place_core_offset = cO; __kmp_place_num_threads_per_core = nT; - __kmp_place_core_offset = nO; } // end of file // Index: openmp/trunk/runtime/src/kmp_global.c =================================================================== --- openmp/trunk/runtime/src/kmp_global.c +++ openmp/trunk/runtime/src/kmp_global.c @@ -249,9 +249,11 @@ int __kmp_affinity_num_places = 0; #endif +int __kmp_place_num_sockets = 0; +int __kmp_place_socket_offset = 0; int __kmp_place_num_cores = 0; -int __kmp_place_num_threads_per_core = 0; int __kmp_place_core_offset = 0; +int __kmp_place_num_threads_per_core = 0; kmp_tasking_mode_t __kmp_tasking_mode = tskm_task_teams; Index: openmp/trunk/runtime/src/kmp_settings.c =================================================================== --- openmp/trunk/runtime/src/kmp_settings.c +++ openmp/trunk/runtime/src/kmp_settings.c @@ -4117,127 +4117,220 @@ __kmp_stg_parse_place_threads( char const * name, char const * value, void * data ) { // Value example: 5Cx2Tx15O // Which means "use 5 cores with offset 15, 2 threads per core" - + // AC: extended to sockets level: + // 2s,6o,2c,2o,2t or 2s,6o,2c,2t,2o + // (to not break legacy code core-offset can be last). + // Note: not all syntax errors are analyzed, some may be skipped. +#define CHECK_DELIM(_x) (*(_x) == ',' || *(_x) == '@' || *(_x) == 'x') int num; - int prev_delim = 0; + int flagS = 0, flagC = 0, flagT = 0; const char *next = value; const char *prev; - SKIP_WS( next ); - if ( *next == '\0' ) { - return; // leave default values - } - - // Get num_cores first - if ( *next >= '0' && *next <= '9' ) { + SKIP_WS(next); // skip white spaces + if (*next == '\0') + return; // no data provided, retain default values + // Get num_sockets first (or whatever specified) + if (*next >= '0' && *next <= '9') { prev = next; - SKIP_DIGITS( next ); - num = __kmp_str_to_int( prev, *next ); - SKIP_WS( next ); - if ( *next == 'C' || *next == 'c' ) { - __kmp_place_num_cores = num; + SKIP_DIGITS(next); + num = __kmp_str_to_int(prev, *next); + SKIP_WS(next); + if (*next == 's' || *next == 'S') { + __kmp_place_num_sockets = num; + flagS = 1; // got num sockets next++; - } else if ( *next == ',' || *next == 'x' ) { + } else if (*next == 'c' || *next == 'C') { __kmp_place_num_cores = num; - prev_delim = 1; + flagS = flagC = 1; // sockets were not specified - use default + next++; + } else if (CHECK_DELIM(next)) { + __kmp_place_num_cores = num; // no letter-designator - num cores + flagS = flagC = 1; // sockets were not specified - use default next++; - } else if ( *next == 'T' || *next == 't' ) { + } else if (*next == 't' || *next == 'T') { __kmp_place_num_threads_per_core = num; + // sockets, cores were not specified - use default return; // we ignore offset value in case all cores are used - } else if ( *next == '\0' ) { + } else if (*next == '\0') { __kmp_place_num_cores = num; - return; // the only value provided + return; // the only value provided - set num cores } else { - KMP_WARNING( AffThrPlaceInvalid, name, value ); + KMP_WARNING(AffThrPlaceInvalid, name, value); return; } - } else if ( *next == ',' || *next == 'x' ) { - // First character is delimiter, skip it, leave num_cores default value - prev_delim = 2; - next++; } else { - KMP_WARNING( AffThrPlaceInvalid, name, value ); + KMP_WARNING(AffThrPlaceInvalid, name, value); return; } - SKIP_WS( next ); - if ( *next == '\0' ) { + KMP_DEBUG_ASSERT(flagS); // num sockets should already be set here + SKIP_WS(next); + if (*next == '\0') return; // " n " - something like this - } - if ( ( *next == ',' || *next == 'x' ) && !prev_delim ) { - prev_delim = 1; - next++; // skip delimiter after num_core value - SKIP_WS( next ); + if (CHECK_DELIM(next)) { + next++; // skip delimiter + SKIP_WS(next); } - // Get threads_per_core next - if ( *next >= '0' && *next <= '9' ) { - prev_delim = 0; + // Get second value (could be offset, num_cores, num_threads) + if (*next >= '0' && *next <= '9') { prev = next; - SKIP_DIGITS( next ); - num = __kmp_str_to_int( prev, *next ); - SKIP_WS( next ); - if ( *next == 'T' || *next == 't' ) { - __kmp_place_num_threads_per_core = num; + SKIP_DIGITS(next); + num = __kmp_str_to_int(prev, *next); + SKIP_WS(next); + if (*next == 'o' || *next == 'O') { // offset specified + if (flagC) { // whether num_cores already specified (when sockets skipped) + __kmp_place_core_offset = num; + } else { + __kmp_place_socket_offset = num; + } next++; - } else if ( *next == ',' || *next == 'x' ) { + } else if (*next == 'c' || *next == 'C') { + KMP_DEBUG_ASSERT(flagC == 0); + __kmp_place_num_cores = num; + flagC = 1; + next++; + } else if (*next == 't' || *next == 'T') { + KMP_DEBUG_ASSERT(flagT == 0); + __kmp_place_num_threads_per_core = num; + flagC = 1; // num_cores could be skipped ? + flagT = 1; + next++; // can have core-offset specified after num threads + } else if (*next == '\0') { + KMP_DEBUG_ASSERT(flagC); // 4x2 means 4 cores 2 threads per core __kmp_place_num_threads_per_core = num; - prev_delim = 1; + return; // two values provided without letter-designator + } else { + KMP_WARNING(AffThrPlaceInvalid, name, value); + return; + } + } else { + KMP_WARNING(AffThrPlaceInvalid, name, value); + return; + } + SKIP_WS(next); + if (*next == '\0') + return; // " Ns,Nc " - something like this + if (CHECK_DELIM(next)) { + next++; // skip delimiter + SKIP_WS(next); + } + + // Get third value (could be core-offset, num_cores, num_threads) + if (*next >= '0' && *next <= '9') { + prev = next; + SKIP_DIGITS(next); + num = __kmp_str_to_int(prev, *next); + SKIP_WS(next); + if (*next == 'c' || *next == 'C') { + KMP_DEBUG_ASSERT(flagC == 0); + __kmp_place_num_cores = num; + flagC = 1; next++; - } else if ( *next == 'O' || *next == 'o' ) { + } else if (*next == 'o' || *next == 'O') { + KMP_DEBUG_ASSERT(flagC); __kmp_place_core_offset = num; - return; // threads_per_core remains default - } else if ( *next == '\0' ) { + next++; + } else if (*next == 't' || *next == 'T') { + KMP_DEBUG_ASSERT(flagT == 0); __kmp_place_num_threads_per_core = num; - return; + if (flagC == 0) + return; // num_cores could be skipped (e.g. 2s,4o,2t) + flagT = 1; + next++; // can have core-offset specified later (e.g. 2s,1c,2t,3o) } else { - KMP_WARNING( AffThrPlaceInvalid, name, value ); + KMP_WARNING(AffThrPlaceInvalid, name, value); return; } - } else if ( *next == ',' || *next == 'x' ) { - if ( prev_delim == 2 ) { - return; // no sense in the only offset value, thus skip the rest + } else { + KMP_WARNING(AffThrPlaceInvalid, name, value); + return; + } + KMP_DEBUG_ASSERT(flagC); + SKIP_WS(next); + if ( *next == '\0' ) + return; + if (CHECK_DELIM(next)) { + next++; // skip delimiter + SKIP_WS(next); + } + + // Get 4-th value (could be core-offset, num_threads) + if (*next >= '0' && *next <= '9') { + prev = next; + SKIP_DIGITS(next); + num = __kmp_str_to_int(prev, *next); + SKIP_WS(next); + if (*next == 'o' || *next == 'O') { + __kmp_place_core_offset = num; + next++; + } else if (*next == 't' || *next == 'T') { + KMP_DEBUG_ASSERT(flagT == 0); + __kmp_place_num_threads_per_core = num; + flagT = 1; + next++; // can have core-offset specified after num threads + } else { + KMP_WARNING(AffThrPlaceInvalid, name, value); + return; } - KMP_DEBUG_ASSERT( prev_delim == 1 ); - next++; // no value for threads_per_core provided } else { - KMP_WARNING( AffThrPlaceInvalid, name, value ); + KMP_WARNING(AffThrPlaceInvalid, name, value); return; } - SKIP_WS( next ); - if ( *next == '\0' ) { - return; // " nC,mT " - something like this - } - if ( ( *next == ',' || *next == 'x' ) && !prev_delim ) { - prev_delim = 1; - next++; // skip delimiter after threads_per_core value - SKIP_WS( next ); + SKIP_WS(next); + if ( *next == '\0' ) + return; + if (CHECK_DELIM(next)) { + next++; // skip delimiter + SKIP_WS(next); } - // Get core offset last if any, - // don't bother checking syntax after all data obtained - if ( *next >= '0' && *next <= '9' ) { + // Get 5-th value (could be core-offset, num_threads) + if (*next >= '0' && *next <= '9') { prev = next; - SKIP_DIGITS( next ); - num = __kmp_str_to_int( prev, *next ); - __kmp_place_core_offset = num; + SKIP_DIGITS(next); + num = __kmp_str_to_int(prev, *next); + SKIP_WS(next); + if (*next == 'o' || *next == 'O') { + KMP_DEBUG_ASSERT(flagT); + __kmp_place_core_offset = num; + } else if (*next == 't' || *next == 'T') { + KMP_DEBUG_ASSERT(flagT == 0); + __kmp_place_num_threads_per_core = num; + } else { + KMP_WARNING(AffThrPlaceInvalid, name, value); + } + } else { + KMP_WARNING(AffThrPlaceInvalid, name, value); } + return; +#undef CHECK_DELIM } static void __kmp_stg_print_place_threads( kmp_str_buf_t * buffer, char const * name, void * data ) { - if ( __kmp_place_num_cores + __kmp_place_num_threads_per_core ) { + if (__kmp_place_num_sockets + __kmp_place_num_cores + __kmp_place_num_threads_per_core) { + int comma = 0; kmp_str_buf_t buf; - __kmp_str_buf_init( &buf ); - if( __kmp_env_format ) { + __kmp_str_buf_init(&buf); + if(__kmp_env_format) KMP_STR_BUF_PRINT_NAME_EX(name); - } else { - __kmp_str_buf_print( buffer, " %s='", name ); - } - __kmp_str_buf_print( &buf, "%dC", __kmp_place_num_cores ); - __kmp_str_buf_print( &buf, "x%dT", __kmp_place_num_threads_per_core ); - if ( __kmp_place_core_offset ) { - __kmp_str_buf_print( &buf, ",%dO", __kmp_place_core_offset ); + else + __kmp_str_buf_print(buffer, " %s='", name); + if (__kmp_place_num_sockets) { + __kmp_str_buf_print(&buf, "%ds", __kmp_place_num_sockets); + if (__kmp_place_socket_offset) + __kmp_str_buf_print(&buf, "@%do", __kmp_place_socket_offset); + comma = 1; + } + if (__kmp_place_num_cores) { + __kmp_str_buf_print(&buf, "%s%dc", comma?",":"", __kmp_place_num_cores); + if (__kmp_place_core_offset) + __kmp_str_buf_print(&buf, "@%do", __kmp_place_core_offset); + comma = 1; } + if (__kmp_place_num_threads_per_core) + __kmp_str_buf_print(&buf, "%s%dt", comma?",":"", __kmp_place_num_threads_per_core); __kmp_str_buf_print(buffer, "%s'\n", buf.str ); __kmp_str_buf_free(&buf); /*