diff --git a/bolt/include/bolt/Passes/ReorderFunctions.h b/bolt/include/bolt/Passes/ReorderFunctions.h --- a/bolt/include/bolt/Passes/ReorderFunctions.h +++ b/bolt/include/bolt/Passes/ReorderFunctions.h @@ -32,6 +32,7 @@ RT_EXEC_COUNT, RT_HFSORT, RT_HFSORT_PLUS, + RT_CDS, RT_PETTIS_HANSEN, RT_RANDOM, RT_USER diff --git a/bolt/lib/Passes/ReorderFunctions.cpp b/bolt/lib/Passes/ReorderFunctions.cpp --- a/bolt/lib/Passes/ReorderFunctions.cpp +++ b/bolt/lib/Passes/ReorderFunctions.cpp @@ -15,6 +15,7 @@ #include "bolt/Utils/Utils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/CodeLayout.h" #include #define DEBUG_TYPE "hfsort" @@ -29,82 +30,72 @@ extern size_t padFunction(const bolt::BinaryFunction &Function); -cl::opt -ReorderFunctions("reorder-functions", - cl::desc("reorder and cluster functions (works only with relocations)"), - cl::init(bolt::ReorderFunctions::RT_NONE), - cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, - "none", - "do not reorder functions"), - clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, - "exec-count", - "order by execution count"), - clEnumValN(bolt::ReorderFunctions::RT_HFSORT, - "hfsort", - "use hfsort algorithm"), - clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, - "hfsort+", - "use hfsort+ algorithm"), - clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN, - "pettis-hansen", - "use Pettis-Hansen algorithm"), - clEnumValN(bolt::ReorderFunctions::RT_RANDOM, - "random", - "reorder functions randomly"), - clEnumValN(bolt::ReorderFunctions::RT_USER, - "user", - "use function order specified by -function-order")), - cl::ZeroOrMore, - cl::cat(BoltOptCategory)); +cl::opt ReorderFunctions( + "reorder-functions", + cl::desc("reorder and cluster functions (works only with relocations)"), + cl::init(bolt::ReorderFunctions::RT_NONE), + cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE, "none", + "do not reorder functions"), + clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT, "exec-count", + "order by execution count"), + clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort", + "use hfsort algorithm"), + clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+", + "use hfsort+ algorithm"), + clEnumValN(bolt::ReorderFunctions::RT_CDS, "cds", + "use cache-directed sort"), + clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN, + "pettis-hansen", "use Pettis-Hansen algorithm"), + clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random", + "reorder functions randomly"), + clEnumValN(bolt::ReorderFunctions::RT_USER, "user", + "use function order specified by -function-order")), + cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt ReorderFunctionsUseHotSize( "reorder-functions-use-hot-size", cl::desc("use a function's hot size when doing clustering"), cl::init(true), cl::cat(BoltOptCategory)); -static cl::opt -FunctionOrderFile("function-order", - cl::desc("file containing an ordered list of functions to use for function " - "reordering"), - cl::cat(BoltOptCategory)); +static cl::opt FunctionOrderFile( + "function-order", + cl::desc("file containing an ordered list of functions to use for function " + "reordering"), + cl::cat(BoltOptCategory)); -static cl::opt -GenerateFunctionOrderFile("generate-function-order", - cl::desc("file to dump the ordered list of functions to use for function " - "reordering"), - cl::cat(BoltOptCategory)); +static cl::opt GenerateFunctionOrderFile( + "generate-function-order", + cl::desc("file to dump the ordered list of functions to use for function " + "reordering"), + cl::cat(BoltOptCategory)); -static cl::opt -LinkSectionsFile("generate-link-sections", - cl::desc("generate a list of function sections in a format suitable for " - "inclusion in a linker script"), - cl::cat(BoltOptCategory)); +static cl::opt LinkSectionsFile( + "generate-link-sections", + cl::desc("generate a list of function sections in a format suitable for " + "inclusion in a linker script"), + cl::cat(BoltOptCategory)); static cl::opt UseEdgeCounts("use-edge-counts", cl::desc("use edge count data when doing clustering"), cl::init(true), cl::cat(BoltOptCategory)); -static cl::opt -CgFromPerfData("cg-from-perf-data", - cl::desc("use perf data directly when constructing the call graph" - " for stale functions"), - cl::init(true), - cl::ZeroOrMore, - cl::cat(BoltOptCategory)); +static cl::opt CgFromPerfData( + "cg-from-perf-data", + cl::desc("use perf data directly when constructing the call graph" + " for stale functions"), + cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt CgIgnoreRecursiveCalls( "cg-ignore-recursive-calls", cl::desc("ignore recursive calls when constructing the call graph"), cl::init(true), cl::cat(BoltOptCategory)); -static cl::opt -CgUseSplitHotSize("cg-use-split-hot-size", - cl::desc("use hot/cold data on basic blocks to determine hot sizes for " - "call graph functions"), - cl::init(false), - cl::ZeroOrMore, - cl::cat(BoltOptCategory)); +static cl::opt CgUseSplitHotSize( + "cg-use-split-hot-size", + cl::desc("use hot/cold data on basic blocks to determine hot sizes for " + "call graph functions"), + cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); } // namespace opts @@ -157,13 +148,13 @@ bool PrintDetailed = opts::Verbosity > 1; #ifndef NDEBUG PrintDetailed |= - (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0); + (DebugFlag && isCurrentDebugType("hfsort") && opts::Verbosity > 0); #endif - uint64_t TotalSize = 0; - uint64_t CurPage = 0; - uint64_t Hotfuncs = 0; + uint64_t TotalSize = 0; + uint64_t CurPage = 0; + uint64_t Hotfuncs = 0; double TotalDistance = 0; - double TotalCalls = 0; + double TotalCalls = 0; double TotalCalls64B = 0; double TotalCalls4KB = 0; double TotalCalls2MB = 0; @@ -198,21 +189,22 @@ << "BOLT-INFO: Src: " << *Cg.nodeIdToFunc(FuncId) << "\n" << "BOLT-INFO: Dst: " << *Cg.nodeIdToFunc(Dst) << "\n" << "BOLT-INFO: Weight = " << W << "\n" - << "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset() << "\n"; + << "BOLT-INFO: AvgOffset = " << Arc.avgCallOffset() + << "\n"; Calls += W; - if (D < 64) TotalCalls64B += W; - if (D < 4096) TotalCalls4KB += W; - if (D < (2 << 20)) TotalCalls2MB += W; + if (D < 64) + TotalCalls64B += W; + if (D < 4096) + TotalCalls4KB += W; + if (D < (2 << 20)) + TotalCalls2MB += W; Dist += Arc.weight() * D; if (PrintDetailed) outs() << format("BOLT-INFO: arc: %u [@%lu+%.1lf] -> %u [@%lu]: " "weight = %.0lf, callDist = %f\n", - Arc.src(), - FuncAddr[Arc.src()], - Arc.avgCallOffset(), - Arc.dst(), - FuncAddr[Arc.dst()], - Arc.weight(), D); + Arc.src(), FuncAddr[Arc.src()], + Arc.avgCallOffset(), Arc.dst(), + FuncAddr[Arc.dst()], Arc.weight(), D); } TotalCalls += Calls; TotalDistance += Dist; @@ -290,39 +282,68 @@ switch (opts::ReorderFunctions) { case RT_NONE: break; - case RT_EXEC_COUNT: - { - std::vector SortedFunctions(BFs.size()); - uint32_t Index = 0; - llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(), - [](BinaryFunction &BF) { return &BF; }); - llvm::stable_sort(SortedFunctions, [&](const BinaryFunction *A, - const BinaryFunction *B) { - if (A->isIgnored()) + case RT_EXEC_COUNT: { + std::vector SortedFunctions(BFs.size()); + uint32_t Index = 0; + llvm::transform(llvm::make_second_range(BFs), SortedFunctions.begin(), + [](BinaryFunction &BF) { return &BF; }); + llvm::stable_sort(SortedFunctions, [&](const BinaryFunction *A, + const BinaryFunction *B) { + if (A->isIgnored()) + return false; + const size_t PadA = opts::padFunction(*A); + const size_t PadB = opts::padFunction(*B); + if (!PadA || !PadB) { + if (PadA) + return true; + if (PadB) return false; - const size_t PadA = opts::padFunction(*A); - const size_t PadB = opts::padFunction(*B); - if (!PadA || !PadB) { - if (PadA) - return true; - if (PadB) - return false; - } - return !A->hasProfile() && - (B->hasProfile() || - (A->getExecutionCount() > B->getExecutionCount())); - }); - for (BinaryFunction *BF : SortedFunctions) - if (BF->hasProfile()) - BF->setIndex(Index++); - } - break; + } + return !A->hasProfile() && (B->hasProfile() || (A->getExecutionCount() > + B->getExecutionCount())); + }); + for (BinaryFunction *BF : SortedFunctions) + if (BF->hasProfile()) + BF->setIndex(Index++); + } break; case RT_HFSORT: Clusters = clusterize(Cg); break; case RT_HFSORT_PLUS: Clusters = hfsortPlus(Cg); break; + case RT_CDS: { + // It is required that the sum of incoming arc weights is not greater + // than the number of samples for every function. Ensuring the call graph + // obeys the property before running the algorithm. + Cg.adjustArcWeights(); + + // Initialize CFG nodes and their data + std::vector FuncSizes; + std::vector FuncCounts; + using JumpT = std::pair; + std::vector> CallCounts; + std::vector CallOffsets; + for (NodeId F = 0; F < Cg.numNodes(); ++F) { + FuncSizes.push_back(Cg.size(F)); + FuncCounts.push_back(Cg.samples(F)); + for (NodeId Succ : Cg.successors(F)) { + const Arc &Arc = *Cg.findArc(F, Succ); + auto It = std::make_pair(F, Succ); + CallCounts.push_back(std::make_pair(It, Arc.weight())); + CallOffsets.push_back(uint64_t(Arc.avgCallOffset())); + } + } + + // Run the layout algorithm. + std::vector Result = + applyCDSLayout(FuncSizes, FuncCounts, CallCounts, CallOffsets); + + // Set the order of hot functions based on the layout + uint32_t Index = 0; + for (uint64_t FuncId : Result) + Cg.nodeIdToFunc(FuncId)->setIndex(Index++); + } break; case RT_PETTIS_HANSEN: Clusters = pettisAndHansen(Cg); break; @@ -330,74 +351,71 @@ std::srand(opts::RandomSeed); Clusters = randomClusters(Cg); break; - case RT_USER: - { - // Build LTOCommonNameMap - StringMap> LTOCommonNameMap; - for (const BinaryFunction &BF : llvm::make_second_range(BFs)) - for (StringRef Name : BF.getNames()) - if (std::optional LTOCommonName = getLTOCommonName(Name)) - LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress()); - - uint32_t Index = 0; - uint32_t InvalidEntries = 0; - for (const std::string &Function : readFunctionOrderFile()) { - std::vector FuncAddrs; - - BinaryData *BD = BC.getBinaryDataByName(Function); - if (!BD) { - // If we can't find the main symbol name, look for alternates. - uint32_t LocalID = 1; - while (true) { - const std::string FuncName = - Function + "/" + std::to_string(LocalID); - BD = BC.getBinaryDataByName(FuncName); - if (BD) - FuncAddrs.push_back(BD->getAddress()); - else - break; - LocalID++; - } - // Strip LTO suffixes - if (std::optional CommonName = getLTOCommonName(Function)) - if (LTOCommonNameMap.contains(*CommonName)) - llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]); - } else { - FuncAddrs.push_back(BD->getAddress()); + case RT_USER: { + // Build LTOCommonNameMap + StringMap> LTOCommonNameMap; + for (const BinaryFunction &BF : llvm::make_second_range(BFs)) + for (StringRef Name : BF.getNames()) + if (std::optional LTOCommonName = getLTOCommonName(Name)) + LTOCommonNameMap[*LTOCommonName].push_back(BF.getAddress()); + + uint32_t Index = 0; + uint32_t InvalidEntries = 0; + for (const std::string &Function : readFunctionOrderFile()) { + std::vector FuncAddrs; + + BinaryData *BD = BC.getBinaryDataByName(Function); + if (!BD) { + // If we can't find the main symbol name, look for alternates. + uint32_t LocalID = 1; + while (true) { + const std::string FuncName = Function + "/" + std::to_string(LocalID); + BD = BC.getBinaryDataByName(FuncName); + if (BD) + FuncAddrs.push_back(BD->getAddress()); + else + break; + LocalID++; } + // Strip LTO suffixes + if (std::optional CommonName = getLTOCommonName(Function)) + if (LTOCommonNameMap.contains(*CommonName)) + llvm::append_range(FuncAddrs, LTOCommonNameMap[*CommonName]); + } else { + FuncAddrs.push_back(BD->getAddress()); + } + + if (FuncAddrs.empty()) { + if (opts::Verbosity >= 1) + errs() << "BOLT-WARNING: Reorder functions: can't find function " + << "for " << Function << "\n"; + ++InvalidEntries; + continue; + } - if (FuncAddrs.empty()) { + for (const uint64_t FuncAddr : FuncAddrs) { + const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr); + assert(FuncBD); + + BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol()); + if (!BF) { if (opts::Verbosity >= 1) errs() << "BOLT-WARNING: Reorder functions: can't find function " << "for " << Function << "\n"; ++InvalidEntries; - continue; - } - - for (const uint64_t FuncAddr : FuncAddrs) { - const BinaryData *FuncBD = BC.getBinaryDataAtAddress(FuncAddr); - assert(FuncBD); - - BinaryFunction *BF = BC.getFunctionForSymbol(FuncBD->getSymbol()); - if (!BF) { - if (opts::Verbosity >= 1) - errs() << "BOLT-WARNING: Reorder functions: can't find function " - << "for " << Function << "\n"; - ++InvalidEntries; - break; - } - if (!BF->hasValidIndex()) - BF->setIndex(Index++); - else if (opts::Verbosity > 0) - errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function - << "\n"; + break; } + if (!BF->hasValidIndex()) + BF->setIndex(Index++); + else if (opts::Verbosity > 0) + errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function + << "\n"; } - if (InvalidEntries) - errs() << "BOLT-WARNING: Reorder functions: can't find functions for " - << InvalidEntries << " entries in -function-order list\n"; } - break; + if (InvalidEntries) + errs() << "BOLT-WARNING: Reorder functions: can't find functions for " + << InvalidEntries << " entries in -function-order list\n"; + } break; } reorder(std::move(Clusters), BFs);