Index: openmp/trunk/runtime/src/kmp_affinity.h =================================================================== --- openmp/trunk/runtime/src/kmp_affinity.h +++ openmp/trunk/runtime/src/kmp_affinity.h @@ -119,15 +119,21 @@ } -/** A structure for holding machine-specific hierarchy info to be computed once at init. */ +/** A structure for holding machine-specific hierarchy info to be computed once at init. + This structure represents a mapping of threads to the actual machine hierarchy, or to + our best guess at what the hierarchy might be, for the purpose of performing an + efficient barrier. In the worst case, when there is no machine hierarchy information, + it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */ class hierarchy_info { public: /** Good default values for number of leaves and branching factor, given no affinity information. Behaves a bit like hyper barrier. */ static const kmp_uint32 maxLeaves=4; static const kmp_uint32 minBranch=4; - /** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine, - etc. We don't want to get specific with nomenclature */ + /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package + or socket, packages/node, nodes/machine, etc. We don't want to get specific with + nomenclature. When the machine is oversubscribed we add levels to duplicate the + hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */ kmp_uint32 maxLevels; /** This is specifically the depth of the machine configuration hierarchy, in terms of the @@ -227,6 +233,7 @@ } + // Resize the hierarchy if nproc changes to something larger than before void resize(kmp_uint32 nproc) { kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); @@ -237,13 +244,23 @@ KMP_DEBUG_ASSERT(bool_result!=0); KMP_DEBUG_ASSERT(nproc > base_num_threads); - // Calculate new max_levels + // Calculate new maxLevels kmp_uint32 old_sz = skipPerLevel[depth-1]; - kmp_uint32 incs = 0, old_maxLevels= maxLevels; + kmp_uint32 incs = 0, old_maxLevels = maxLevels; + // First see if old maxLevels is enough to contain new size + for (kmp_uint32 i=depth; iold_sz; ++i) { + skipPerLevel[i] = 2*skipPerLevel[i-1]; + old_sz *= 2; + depth++; + } + if (nproc <= old_sz) // enough space already + return; + // Not enough space, need to expand hierarchy while (nproc > old_sz) { old_sz *=2; incs++; - } + depth++; + } maxLevels += incs; // Resize arrays Index: openmp/trunk/runtime/src/kmp_affinity.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_affinity.cpp +++ openmp/trunk/runtime/src/kmp_affinity.cpp @@ -32,15 +32,12 @@ // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier. if (TCR_1(machine_hierarchy.uninitialized)) machine_hierarchy.init(NULL, nproc); - // Adjust the hierarchy in case num threads exceeds original - if (nproc > machine_hierarchy.base_num_threads) - machine_hierarchy.resize(nproc); depth = machine_hierarchy.depth; KMP_DEBUG_ASSERT(depth > 0); - // The loop below adjusts the depth in the case of a resize - while (nproc > machine_hierarchy.skipPerLevel[depth-1]) - depth++; + // Adjust the hierarchy in case num threads exceeds original + if (nproc > machine_hierarchy.skipPerLevel[depth-1]) + machine_hierarchy.resize(nproc); thr_bar->depth = depth; thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;