Skip to content

Commit bf35771

Browse files
committedJun 16, 2016
Change hwloc discovery algorithm to print topology only for accessible resources
Change hwloc discovery algorithm to print topology for only accessible resources, and report uniformity correspondingly, similar to what other topology discovery algorithms do. Fixes minor inconsistency in total topology reported and resources used for threads binding in case hwloc used. Patch by Andrey Churbanov. Differential Revision: http://reviews.llvm.org/D21389 llvm-svn: 272952
1 parent 0f3c2b9 commit bf35771

File tree

1 file changed

+29
-17
lines changed

1 file changed

+29
-17
lines changed
 

‎openmp/runtime/src/kmp_affinity.cpp

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -389,9 +389,6 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
389389
int pkgLevel = 0;
390390
int coreLevel = 1;
391391
int threadLevel = 2;
392-
nPackages = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_root_obj(__kmp_hwloc_topology), HWLOC_OBJ_SOCKET);
393-
nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE);
394-
__kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
395392

396393
if (! KMP_AFFINITY_CAPABLE())
397394
{
@@ -401,6 +398,8 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
401398
//
402399
KMP_ASSERT(__kmp_affinity_type == affinity_none);
403400

401+
nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE);
402+
__kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
404403
__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
405404
nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
406405
if (__kmp_affinity_verbose) {
@@ -423,45 +422,69 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
423422
//
424423
AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
425424

425+
//
426+
// When affinity is off, this routine will still be called to set
427+
// __kmp_ncores, as well as __kmp_nThreadsPerCore,
428+
// nCoresPerPkg, & nPackages. Make sure all these vars are set
429+
// correctly, and return if affinity is not enabled.
430+
//
431+
426432
hwloc_obj_t pu;
427433
hwloc_obj_t core;
428434
hwloc_obj_t socket;
429435
int nActiveThreads = 0;
430436
int socket_identifier = 0;
437+
// re-calculate globals to count only accessible resources
438+
__kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
431439
for(socket = hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0);
432440
socket != NULL;
433441
socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, socket),
434442
socket_identifier++)
435443
{
436444
int core_identifier = 0;
445+
int num_active_cores = 0;
437446
for(core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type, socket->logical_index, HWLOC_OBJ_CORE, 0);
438447
core != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type, core) == socket;
439448
core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, core),
440449
core_identifier++)
441450
{
442451
int pu_identifier = 0;
452+
int num_active_threads = 0;
443453
for(pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type, core->logical_index, HWLOC_OBJ_PU, 0);
444454
pu != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type, pu) == core;
445455
pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU, pu),
446456
pu_identifier++)
447457
{
448458
Address addr(3);
449459
if(! KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
450-
continue;
460+
continue; // skip inactive (inaccessible) unit
451461
KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
452462
socket->os_index, socket->logical_index, core->os_index, core->logical_index, pu->os_index,pu->logical_index));
453463
addr.labels[0] = socket_identifier; // package
454464
addr.labels[1] = core_identifier; // core
455465
addr.labels[2] = pu_identifier; // pu
456466
retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
457467
nActiveThreads++;
468+
++num_active_threads; // count active threads per core
469+
}
470+
if (num_active_threads) { // were there any active threads on the core?
471+
++__kmp_ncores; // count total active cores
472+
++num_active_cores; // count active cores per socket
473+
if (num_active_threads > __kmp_nThreadsPerCore)
474+
__kmp_nThreadsPerCore = num_active_threads; // calc maximum
458475
}
459476
}
477+
if (num_active_cores) { // were there any active cores on the socket?
478+
++nPackages; // count total active packages
479+
if (num_active_cores > nCoresPerPkg)
480+
nCoresPerPkg = num_active_cores; // calc maximum
481+
}
460482
}
461483

462484
//
463485
// If there's only one thread context to bind to, return now.
464486
//
487+
KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
465488
KMP_ASSERT(nActiveThreads > 0);
466489
if (nActiveThreads == 1) {
467490
__kmp_ncores = nPackages = 1;
@@ -513,21 +536,10 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
513536
//
514537
qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
515538

516-
//
517-
// When affinity is off, this routine will still be called to set
518-
// __kmp_ncores, as well as __kmp_nThreadsPerCore,
519-
// nCoresPerPkg, & nPackages. Make sure all these vars are set
520-
// correctly, and return if affinity is not enabled.
521-
//
522-
__kmp_ncores = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE);
523-
524539
//
525540
// Check to see if the machine topology is uniform
526541
//
527-
unsigned npackages = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET);
528-
unsigned ncores = __kmp_ncores;
529-
unsigned nthreads = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU);
530-
unsigned uniform = (npackages * nCoresPerPkg * __kmp_nThreadsPerCore == nthreads);
542+
unsigned uniform = (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
531543

532544
//
533545
// Print the machine topology summary.
@@ -552,7 +564,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
552564
kmp_str_buf_t buf;
553565
__kmp_str_buf_init(&buf);
554566

555-
__kmp_str_buf_print(&buf, "%d", npackages);
567+
__kmp_str_buf_print(&buf, "%d", nPackages);
556568
//for (level = 1; level <= pkgLevel; level++) {
557569
// __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
558570
// }

0 commit comments

Comments
 (0)