Index: openmp/trunk/runtime/src/kmp_affinity.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_affinity.cpp +++ openmp/trunk/runtime/src/kmp_affinity.cpp @@ -249,6 +249,7 @@ #ifndef KMP_DFLT_NTH_CORES static int __kmp_ncores; #endif +static int *__kmp_pu_os_idx = NULL; // // __kmp_affinity_uniform_topology() doesn't work when called from @@ -421,6 +422,7 @@ // Allocate the data structure to be returned. // AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc); + __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc); // // When affinity is off, this routine will still be called to set @@ -464,6 +466,7 @@ addr.labels[1] = core_identifier; // core addr.labels[2] = pu_identifier; // pu retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index); + __kmp_pu_os_idx[nActiveThreads] = pu->os_index; // keep os index for each active pu nActiveThreads++; ++num_active_threads; // count active threads per core } @@ -668,7 +671,16 @@ KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore, __kmp_ncores); } + KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); + __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc); if (__kmp_affinity_type == affinity_none) { + int avail_ct = 0; + unsigned int i; + KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { + if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) + continue; + __kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat + } return 0; } @@ -686,7 +698,7 @@ if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) { continue; } - + __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat Address addr(1); addr.labels[0] = i; (*address2os)[avail_ct++] = AddrUnsPair(addr,i); @@ -742,6 +754,8 @@ // *address2os = (AddrUnsPair*) __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc); + KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); + __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc); int avail_ct = 0; int i; KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { @@ -751,7 +765,7 @@ if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) { continue; } - + __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat Address addr(2); addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR)); addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR)); @@ -1267,7 +1281,12 @@ __kmp_nThreadsPerCore, __kmp_ncores); } - + KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); + KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc); + __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc); + for (i = 0; i < nApics; ++i) { + __kmp_pu_os_idx[i] = threadInfo[i].osId; + } if (__kmp_affinity_type == affinity_none) { __kmp_free(threadInfo); KMP_CPU_FREE(oldMask); @@ -1745,7 +1764,12 @@ __kmp_str_buf_free(&buf); } - + KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); + KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc); + __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc); + for (proc = 0; (int)proc < nApics; ++proc) { + __kmp_pu_os_idx[proc] = retval[proc].second; + } if (__kmp_affinity_type == affinity_none) { __kmp_free(last); __kmp_free(maxCt); @@ -2491,6 +2515,13 @@ } # endif // KMP_MIC && REDUCE_TEAM_SIZE + KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); + KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc); + __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc); + for (i = 0; i < num_avail; ++i) { // fill the os indices + __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex]; + } + if (__kmp_affinity_type == affinity_none) { __kmp_free(lastId); __kmp_free(totals); @@ -3391,10 +3422,11 @@ static void __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) { + int i, j, k, n_old = 0, n_new = 0, proc_num = 0; if (__kmp_place_num_sockets == 0 && __kmp_place_num_cores == 0 && __kmp_place_num_threads_per_core == 0 ) - return; // no topology limiting actions requested, exit + goto _exit; // no topology limiting actions requested, exit if (__kmp_place_num_sockets == 0) __kmp_place_num_sockets = nPackages; // use all available sockets if (__kmp_place_num_cores == 0) @@ -3405,42 +3437,66 @@ if ( !__kmp_affinity_uniform_topology() ) { KMP_WARNING( AffHWSubsetNonUniform ); - return; // don't support non-uniform topology + goto _exit; // don't support non-uniform topology } - if ( depth != 3 ) { + if ( depth > 3 ) { KMP_WARNING( AffHWSubsetNonThreeLevel ); - return; // don't support not-3-level topology + goto _exit; // don't support not-3-level topology } if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) { KMP_WARNING(AffHWSubsetManySockets); - return; + goto _exit; } if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) { KMP_WARNING( AffHWSubsetManyCores ); - return; + goto _exit; } - AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) * - __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core); + AddrUnsPair *newAddr; + if (pAddr) // pAddr is NULL in case of affinity_none + newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) * + __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core); - int i, j, k, n_old = 0, n_new = 0; - for (i = 0; i < nPackages; ++i) + for (i = 0; i < nPackages; ++i) { if (i < __kmp_place_socket_offset || - i >= __kmp_place_socket_offset + __kmp_place_num_sockets) + i >= __kmp_place_socket_offset + __kmp_place_num_sockets) { n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket - else - for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket + if (__kmp_pu_os_idx != NULL) { + for (j = 0; j < nCoresPerPkg; ++j) { // walk through skipped socket + for (k = 0; k < __kmp_nThreadsPerCore; ++k) { + KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask); + ++proc_num; + } + } + } + } else { + for (j = 0; j < nCoresPerPkg; ++j) { // walk through requested socket if (j < __kmp_place_core_offset || - j >= __kmp_place_core_offset + __kmp_place_num_cores) + j >= __kmp_place_core_offset + __kmp_place_num_cores) { n_old += __kmp_nThreadsPerCore; // skip not-requested core - else + if (__kmp_pu_os_idx != NULL) { + for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through skipped core + KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask); + ++proc_num; + } + } + } else { for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core if (k < __kmp_place_num_threads_per_core) { - newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data + if (pAddr) + newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data n_new++; + } else { + if (__kmp_pu_os_idx != NULL) + KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask); } n_old++; + ++proc_num; } + } + } + } + } KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore); KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core); @@ -3451,8 +3507,15 @@ __kmp_avail_proc = n_new; // correct avail_proc __kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores - __kmp_free( *pAddr ); - *pAddr = newAddr; // replace old topology with new one + if (pAddr) { + __kmp_free( *pAddr ); + *pAddr = newAddr; // replace old topology with new one + } +_exit: + if (__kmp_pu_os_idx != NULL) { + __kmp_free(__kmp_pu_os_idx); + __kmp_pu_os_idx = NULL; + } } @@ -3460,6 +3523,12 @@ static int * procarr = NULL; static int __kmp_aff_depth = 0; +#define KMP_EXIT_AFF_NONE \ + KMP_ASSERT(__kmp_affinity_type == affinity_none); \ + KMP_ASSERT(address2os == NULL); \ + __kmp_apply_thread_places(NULL, 0); \ + return; + static void __kmp_aux_affinity_initialize(void) { @@ -3536,9 +3605,7 @@ if(!__kmp_hwloc_error) { depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id); if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; + KMP_EXIT_AFF_NONE; } else if(depth < 0 && __kmp_affinity_verbose) { KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY"); } @@ -3558,9 +3625,7 @@ file_name = NULL; depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id); if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; + KMP_EXIT_AFF_NONE; } if (depth < 0) { @@ -3577,9 +3642,7 @@ file_name = NULL; depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id); if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; + KMP_EXIT_AFF_NONE; } } } @@ -3607,9 +3670,7 @@ depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f); fclose(f); if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; + KMP_EXIT_AFF_NONE; } } } @@ -3646,9 +3707,7 @@ file_name = ""; depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; + KMP_EXIT_AFF_NONE; } KMP_ASSERT(depth > 0); KMP_ASSERT(address2os != NULL); @@ -3671,9 +3730,7 @@ depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id); if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; + KMP_EXIT_AFF_NONE; } if (depth < 0) { KMP_ASSERT(msg_id != kmp_i18n_null); @@ -3688,9 +3745,7 @@ depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id); if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; + KMP_EXIT_AFF_NONE; } if (depth < 0) { KMP_ASSERT(msg_id != kmp_i18n_null); @@ -3748,8 +3803,7 @@ } if (__kmp_affinity_type == affinity_none) { KMP_ASSERT(depth == 0); - KMP_ASSERT(address2os == NULL); - return; + KMP_EXIT_AFF_NONE; } } @@ -3777,9 +3831,7 @@ depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; + KMP_EXIT_AFF_NONE; } // should not fail KMP_ASSERT(depth > 0); @@ -3793,9 +3845,7 @@ } depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id); if (depth == 0) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - KMP_ASSERT(address2os == NULL); - return; + KMP_EXIT_AFF_NONE; } } # endif // KMP_USE_HWLOC @@ -4014,6 +4064,7 @@ __kmp_free(osId2Mask); machine_hierarchy.init(address2os, __kmp_avail_proc); } +#undef KMP_EXIT_AFF_NONE void