diff --git a/bolt/include/bolt/Passes/ReorderFunctions.h b/bolt/include/bolt/Passes/ReorderFunctions.h --- a/bolt/include/bolt/Passes/ReorderFunctions.h +++ b/bolt/include/bolt/Passes/ReorderFunctions.h @@ -32,6 +32,7 @@ RT_EXEC_COUNT, RT_HFSORT, RT_HFSORT_PLUS, + RT_CDS, RT_PETTIS_HANSEN, RT_RANDOM, RT_USER diff --git a/bolt/lib/Passes/ReorderFunctions.cpp b/bolt/lib/Passes/ReorderFunctions.cpp --- a/bolt/lib/Passes/ReorderFunctions.cpp +++ b/bolt/lib/Passes/ReorderFunctions.cpp @@ -15,6 +15,7 @@ #include "bolt/Utils/Utils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/CodeLayout.h" #include #define DEBUG_TYPE "hfsort" @@ -29,33 +30,27 @@ extern size_t padFunction(const bolt::BinaryFunction &Function); -cl::opt -ReorderFunctions("reorder-functions", - cl::desc("reorder and cluster functions (works only with relocations)"), - cl::init(bolt::ReorderFunctions::RT_NONE), - cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, - "none", - "do not reorder functions"), - clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, - "exec-count", - "order by execution count"), - clEnumValN(bolt::ReorderFunctions::RT_HFSORT, - "hfsort", - "use hfsort algorithm"), - clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, - "hfsort+", - "use hfsort+ algorithm"), - clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN, - "pettis-hansen", - "use Pettis-Hansen algorithm"), - clEnumValN(bolt::ReorderFunctions::RT_RANDOM, - "random", - "reorder functions randomly"), - clEnumValN(bolt::ReorderFunctions::RT_USER, - "user", - "use function order specified by -function-order")), - cl::ZeroOrMore, - cl::cat(BoltOptCategory)); +cl::opt ReorderFunctions( + "reorder-functions", + cl::desc("reorder and cluster functions (works only with relocations)"), + cl::init(bolt::ReorderFunctions::RT_NONE), + cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none", + "do not reorder functions"), + clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count", + "order by execution count"), + clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort", + "use hfsort algorithm"), + clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+", + "use hfsort+ algorithm"), + clEnumValN(bolt::ReorderFunctions::RT_CDS, "cds", + "use cache-directed sort"), + clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN, + "pettis-hansen", "use Pettis-Hansen algorithm"), + clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random", + "reorder functions randomly"), + clEnumValN(bolt::ReorderFunctions::RT_USER, "user", + "use function order specified by -function-order")), + cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt ReorderFunctionsUseHotSize( "reorder-functions-use-hot-size", @@ -292,29 +287,33 @@ break; case RT_EXEC_COUNT: { - std::vector SortedFunctions(BFs.size()); - uint32_t Index = 0; - llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(), - [](BinaryFunction &BF) { return &BF; }); - llvm::stable_sort(SortedFunctions, [&](const BinaryFunction *A, - const BinaryFunction *B) { - if (A->isIgnored()) - return false; - const size_t PadA = opts::padFunction(*A); - const size_t PadB = opts::padFunction(*B); - if (!PadA || !PadB) { - if (PadA) - return true; - if (PadB) - return false; - } - return !A->hasProfile() && - (B->hasProfile() || - (A->getExecutionCount() > B->getExecutionCount())); - }); - for (BinaryFunction *BF : SortedFunctions) - if (BF->hasProfile()) - BF->setIndex(Index++); + std::vector SortedFunctions(BFs.size()); + llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(), + [](BinaryFunction &BF) { return &BF; }); + llvm::stable_sort(SortedFunctions, + [&](const BinaryFunction *A, const BinaryFunction *B) { + if (A->isIgnored()) + return false; + if (B->isIgnored()) + return true; + const size_t PadA = opts::padFunction(*A); + const size_t PadB = opts::padFunction(*B); + if (!PadA || !PadB) { + if (PadA) + return true; + if (PadB) + return false; + } + if (!A->hasProfile()) + return false; + if (!B->hasProfile()) + return true; + return A->getExecutionCount() > B->getExecutionCount(); + }); + uint32_t Index = 0; + for (BinaryFunction *BF : SortedFunctions) + if (BF->hasProfile()) + BF->setIndex(Index++); } break; case RT_HFSORT: @@ -323,6 +322,39 @@ case RT_HFSORT_PLUS: Clusters = hfsortPlus(Cg); break; + case RT_CDS: { + // It is required that the sum of incoming arc weights is not greater + // than the number of samples for every function. Ensuring the call graph + // obeys the property before running the algorithm. + Cg.adjustArcWeights(); + + // Initialize CFG nodes and their data + std::vector FuncSizes; + std::vector FuncCounts; + using JumpT = std::pair; + std::vector> CallCounts; + std::vector CallOffsets; + for (NodeId F = 0; F < Cg.numNodes(); ++F) { + FuncSizes.push_back(Cg.size(F)); + FuncCounts.push_back(Cg.samples(F)); + for (NodeId Succ : Cg.successors(F)) { + const Arc &Arc = *Cg.findArc(F, Succ); + auto It = std::make_pair(F, Succ); + CallCounts.push_back(std::make_pair(It, Arc.weight())); + CallOffsets.push_back(uint64_t(Arc.avgCallOffset())); + } + } + + // Run the layout algorithm + CDSortConfig Config; + std::vector Result = + applyCDSLayout(Config, FuncSizes, FuncCounts, CallCounts, CallOffsets); + + // Set the order of hot functions based on the layout + uint32_t Index = 0; + for (uint64_t FuncId : Result) + Cg.nodeIdToFunc(FuncId)->setIndex(Index++); + } break; case RT_PETTIS_HANSEN: Clusters = pettisAndHansen(Cg); break;