Index: openmp/trunk/runtime/src/kmp_affinity.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_affinity.cpp +++ openmp/trunk/runtime/src/kmp_affinity.cpp @@ -389,9 +389,6 @@ int pkgLevel = 0; int coreLevel = 1; int threadLevel = 2; - nPackages = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_root_obj(__kmp_hwloc_topology), HWLOC_OBJ_SOCKET); - nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE); - __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU); if (! KMP_AFFINITY_CAPABLE()) { @@ -401,6 +398,8 @@ // KMP_ASSERT(__kmp_affinity_type == affinity_none); + nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE); + __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU); __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore; nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; if (__kmp_affinity_verbose) { @@ -423,23 +422,34 @@ // AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc); + // + // When affinity is off, this routine will still be called to set + // __kmp_ncores, as well as __kmp_nThreadsPerCore, + // nCoresPerPkg, & nPackages. Make sure all these vars are set + // correctly, and return if affinity is not enabled. + // + hwloc_obj_t pu; hwloc_obj_t core; hwloc_obj_t socket; int nActiveThreads = 0; int socket_identifier = 0; + // re-calculate globals to count only accessible resources + __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0; for(socket = hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0); socket != NULL; socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, socket), socket_identifier++) { int core_identifier = 0; + int num_active_cores = 0; for(core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type, socket->logical_index, HWLOC_OBJ_CORE, 0); core != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type, core) == socket; core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, core), core_identifier++) { int pu_identifier = 0; + int num_active_threads = 0; for(pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type, core->logical_index, HWLOC_OBJ_PU, 0); pu != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type, pu) == core; pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU, pu), @@ -447,7 +457,7 @@ { Address addr(3); if(! KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask)) - continue; + continue; // skip inactive (inaccessible) unit KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n", socket->os_index, socket->logical_index, core->os_index, core->logical_index, pu->os_index,pu->logical_index)); addr.labels[0] = socket_identifier; // package @@ -455,13 +465,26 @@ addr.labels[2] = pu_identifier; // pu retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index); nActiveThreads++; + ++num_active_threads; // count active threads per core + } + if (num_active_threads) { // were there any active threads on the core? + ++__kmp_ncores; // count total active cores + ++num_active_cores; // count active cores per socket + if (num_active_threads > __kmp_nThreadsPerCore) + __kmp_nThreadsPerCore = num_active_threads; // calc maximum } } + if (num_active_cores) { // were there any active cores on the socket? + ++nPackages; // count total active packages + if (num_active_cores > nCoresPerPkg) + nCoresPerPkg = num_active_cores; // calc maximum + } } // // If there's only one thread context to bind to, return now. // + KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc); KMP_ASSERT(nActiveThreads > 0); if (nActiveThreads == 1) { __kmp_ncores = nPackages = 1; @@ -514,20 +537,9 @@ qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels); // - // When affinity is off, this routine will still be called to set - // __kmp_ncores, as well as __kmp_nThreadsPerCore, - // nCoresPerPkg, & nPackages. Make sure all these vars are set - // correctly, and return if affinity is not enabled. - // - __kmp_ncores = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE); - - // // Check to see if the machine topology is uniform // - unsigned npackages = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET); - unsigned ncores = __kmp_ncores; - unsigned nthreads = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU); - unsigned uniform = (npackages * nCoresPerPkg * __kmp_nThreadsPerCore == nthreads); + unsigned uniform = (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads); // // Print the machine topology summary. @@ -552,7 +564,7 @@ kmp_str_buf_t buf; __kmp_str_buf_init(&buf); - __kmp_str_buf_print(&buf, "%d", npackages); + __kmp_str_buf_print(&buf, "%d", nPackages); //for (level = 1; level <= pkgLevel; level++) { // __kmp_str_buf_print(&buf, " x %d", maxCt[level]); // }