diff --git a/bolt/include/bolt/Passes/ReorderFunctions.h b/bolt/include/bolt/Passes/ReorderFunctions.h --- a/bolt/include/bolt/Passes/ReorderFunctions.h +++ b/bolt/include/bolt/Passes/ReorderFunctions.h @@ -32,6 +32,7 @@ RT_EXEC_COUNT, RT_HFSORT, RT_HFSORT_PLUS, + RT_CDS, RT_PETTIS_HANSEN, RT_RANDOM, RT_USER diff --git a/bolt/lib/Passes/ReorderFunctions.cpp b/bolt/lib/Passes/ReorderFunctions.cpp --- a/bolt/lib/Passes/ReorderFunctions.cpp +++ b/bolt/lib/Passes/ReorderFunctions.cpp @@ -15,6 +15,7 @@ #include "bolt/Utils/Utils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/CodeLayout.h" #include #define DEBUG_TYPE "hfsort" @@ -41,6 +42,8 @@ "use hfsort algorithm"), clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+", "use hfsort+ algorithm"), + clEnumValN(bolt::ReorderFunctions::RT_CDS, "cds", + "use cache-directed sort"), clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN, "pettis-hansen", "use Pettis-Hansen algorithm"), clEnumValN(bolt::ReorderFunctions::RT_RANDOM, "random", @@ -309,6 +312,36 @@ case RT_HFSORT_PLUS: Clusters = hfsortPlus(Cg); break; + case RT_CDS: { + // It is required that the sum of incoming arc weights is not greater + // than the number of samples for every function. Ensuring the call graph + // obeys the property before running the algorithm. + Cg.adjustArcWeights(); + + // Initialize CFG nodes and their data + std::vector FuncSizes; + std::vector FuncCounts; + using JumpT = std::pair; + std::vector> CallCounts; + std::vector CallOffsets; + for (NodeId F = 0; F < Cg.numNodes(); ++F) { + FuncSizes.push_back(Cg.size(F)); + FuncCounts.push_back(Cg.samples(F)); + for (NodeId Succ : Cg.successors(F)) { + const Arc &Arc = *Cg.findArc(F, Succ); + auto It = std::make_pair(F, Succ); + CallCounts.push_back(std::make_pair(It, Arc.weight())); + CallOffsets.push_back(uint64_t(Arc.avgCallOffset())); + } + } + + // Run the layout algorithm. + std::vector Result = + applyCDSLayout(FuncSizes, FuncCounts, CallCounts, CallOffsets); + + // Create a single cluster from the computed order of hot functions. + Clusters.emplace_back(Cluster(Result, Cg)); + } break; case RT_PETTIS_HANSEN: Clusters = pettisAndHansen(Cg); break;