@@ -389,9 +389,6 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
389
389
int pkgLevel = 0 ;
390
390
int coreLevel = 1 ;
391
391
int threadLevel = 2 ;
392
- nPackages = __kmp_hwloc_get_nobjs_under_obj (hwloc_get_root_obj (__kmp_hwloc_topology), HWLOC_OBJ_SOCKET);
393
- nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj (hwloc_get_obj_by_type (__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0 ), HWLOC_OBJ_CORE);
394
- __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj (hwloc_get_obj_by_type (__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0 ), HWLOC_OBJ_PU);
395
392
396
393
if (! KMP_AFFINITY_CAPABLE ())
397
394
{
@@ -401,6 +398,8 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
401
398
//
402
399
KMP_ASSERT (__kmp_affinity_type == affinity_none);
403
400
401
+ nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj (hwloc_get_obj_by_type (__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0 ), HWLOC_OBJ_CORE);
402
+ __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj (hwloc_get_obj_by_type (__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0 ), HWLOC_OBJ_PU);
404
403
__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
405
404
nPackages = (__kmp_xproc + nCoresPerPkg - 1 ) / nCoresPerPkg;
406
405
if (__kmp_affinity_verbose) {
@@ -423,45 +422,69 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
423
422
//
424
423
AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate (sizeof (AddrUnsPair) * __kmp_avail_proc);
425
424
425
+ //
426
+ // When affinity is off, this routine will still be called to set
427
+ // __kmp_ncores, as well as __kmp_nThreadsPerCore,
428
+ // nCoresPerPkg, & nPackages. Make sure all these vars are set
429
+ // correctly, and return if affinity is not enabled.
430
+ //
431
+
426
432
hwloc_obj_t pu;
427
433
hwloc_obj_t core;
428
434
hwloc_obj_t socket;
429
435
int nActiveThreads = 0 ;
430
436
int socket_identifier = 0 ;
437
+ // re-calculate globals to count only accessible resources
438
+ __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0 ;
431
439
for (socket = hwloc_get_obj_by_type (__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0 );
432
440
socket != NULL ;
433
441
socket = hwloc_get_next_obj_by_type (__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, socket),
434
442
socket_identifier++)
435
443
{
436
444
int core_identifier = 0 ;
445
+ int num_active_cores = 0 ;
437
446
for (core = hwloc_get_obj_below_by_type (__kmp_hwloc_topology, socket->type , socket->logical_index , HWLOC_OBJ_CORE, 0 );
438
447
core != NULL && hwloc_get_ancestor_obj_by_type (__kmp_hwloc_topology, socket->type , core) == socket;
439
448
core = hwloc_get_next_obj_by_type (__kmp_hwloc_topology, HWLOC_OBJ_CORE, core),
440
449
core_identifier++)
441
450
{
442
451
int pu_identifier = 0 ;
452
+ int num_active_threads = 0 ;
443
453
for (pu = hwloc_get_obj_below_by_type (__kmp_hwloc_topology, core->type , core->logical_index , HWLOC_OBJ_PU, 0 );
444
454
pu != NULL && hwloc_get_ancestor_obj_by_type (__kmp_hwloc_topology, core->type , pu) == core;
445
455
pu = hwloc_get_next_obj_by_type (__kmp_hwloc_topology, HWLOC_OBJ_PU, pu),
446
456
pu_identifier++)
447
457
{
448
458
Address addr (3 );
449
459
if (! KMP_CPU_ISSET (pu->os_index , __kmp_affin_fullMask))
450
- continue ;
460
+ continue ; // skip inactive (inaccessible) unit
451
461
KA_TRACE (20 , (" Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n " ,
452
462
socket->os_index , socket->logical_index , core->os_index , core->logical_index , pu->os_index ,pu->logical_index ));
453
463
addr.labels [0 ] = socket_identifier; // package
454
464
addr.labels [1 ] = core_identifier; // core
455
465
addr.labels [2 ] = pu_identifier; // pu
456
466
retval[nActiveThreads] = AddrUnsPair (addr, pu->os_index );
457
467
nActiveThreads++;
468
+ ++num_active_threads; // count active threads per core
469
+ }
470
+ if (num_active_threads) { // were there any active threads on the core?
471
+ ++__kmp_ncores; // count total active cores
472
+ ++num_active_cores; // count active cores per socket
473
+ if (num_active_threads > __kmp_nThreadsPerCore)
474
+ __kmp_nThreadsPerCore = num_active_threads; // calc maximum
458
475
}
459
476
}
477
+ if (num_active_cores) { // were there any active cores on the socket?
478
+ ++nPackages; // count total active packages
479
+ if (num_active_cores > nCoresPerPkg)
480
+ nCoresPerPkg = num_active_cores; // calc maximum
481
+ }
460
482
}
461
483
462
484
//
463
485
// If there's only one thread context to bind to, return now.
464
486
//
487
+ KMP_DEBUG_ASSERT (nActiveThreads == __kmp_avail_proc);
465
488
KMP_ASSERT (nActiveThreads > 0 );
466
489
if (nActiveThreads == 1 ) {
467
490
__kmp_ncores = nPackages = 1 ;
@@ -513,21 +536,10 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
513
536
//
514
537
qsort (retval, nActiveThreads, sizeof (*retval), __kmp_affinity_cmp_Address_labels);
515
538
516
- //
517
- // When affinity is off, this routine will still be called to set
518
- // __kmp_ncores, as well as __kmp_nThreadsPerCore,
519
- // nCoresPerPkg, & nPackages. Make sure all these vars are set
520
- // correctly, and return if affinity is not enabled.
521
- //
522
- __kmp_ncores = hwloc_get_nbobjs_by_type (__kmp_hwloc_topology, HWLOC_OBJ_CORE);
523
-
524
539
//
525
540
// Check to see if the machine topology is uniform
526
541
//
527
- unsigned npackages = hwloc_get_nbobjs_by_type (__kmp_hwloc_topology, HWLOC_OBJ_SOCKET);
528
- unsigned ncores = __kmp_ncores;
529
- unsigned nthreads = hwloc_get_nbobjs_by_type (__kmp_hwloc_topology, HWLOC_OBJ_PU);
530
- unsigned uniform = (npackages * nCoresPerPkg * __kmp_nThreadsPerCore == nthreads);
542
+ unsigned uniform = (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
531
543
532
544
//
533
545
// Print the machine topology summary.
@@ -552,7 +564,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
552
564
kmp_str_buf_t buf;
553
565
__kmp_str_buf_init (&buf);
554
566
555
- __kmp_str_buf_print (&buf, " %d" , npackages );
567
+ __kmp_str_buf_print (&buf, " %d" , nPackages );
556
568
// for (level = 1; level <= pkgLevel; level++) {
557
569
// __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
558
570
// }
0 commit comments