diff --git a/bolt/lib/Passes/ReorderAlgorithm.cpp b/bolt/lib/Passes/ReorderAlgorithm.cpp --- a/bolt/lib/Passes/ReorderAlgorithm.cpp +++ b/bolt/lib/Passes/ReorderAlgorithm.cpp @@ -545,7 +545,7 @@ } // Run the layout algorithm - auto Result = applyExtTspLayout(BlockSizes, BlockCounts, JumpCounts); + auto Result = computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts); Order.reserve(BF.getLayout().block_size()); for (uint64_t R : Result) Order.push_back(OrigOrder[R]); diff --git a/bolt/lib/Passes/ReorderFunctions.cpp b/bolt/lib/Passes/ReorderFunctions.cpp --- a/bolt/lib/Passes/ReorderFunctions.cpp +++ b/bolt/lib/Passes/ReorderFunctions.cpp @@ -346,8 +346,8 @@ } // Run the layout algorithm. - std::vector Result = - applyCDSLayout(FuncSizes, FuncCounts, CallCounts, CallOffsets); + std::vector Result = computeCacheDirectedLayout( + FuncSizes, FuncCounts, CallCounts, CallOffsets); // Create a single cluster from the computed order of hot functions. std::vector NodeOrder(Result.begin(), Result.end()); diff --git a/llvm/include/llvm/Transforms/Utils/CodeLayout.h b/llvm/include/llvm/Transforms/Utils/CodeLayout.h --- a/llvm/include/llvm/Transforms/Utils/CodeLayout.h +++ b/llvm/include/llvm/Transforms/Utils/CodeLayout.h @@ -14,6 +14,7 @@ #ifndef LLVM_TRANSFORMS_UTILS_CODELAYOUT_H #define LLVM_TRANSFORMS_UTILS_CODELAYOUT_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include @@ -34,24 +35,22 @@ /// \p EdgeCounts: The execution counts of every edge (jump) in the profile. The /// map also defines the edges in CFG and should include 0-count edges. /// \returns The best block order found. -std::vector -applyExtTspLayout(const std::vector &NodeSizes, - const std::vector &NodeCounts, - const std::vector &EdgeCounts); +std::vector computeExtTspLayout(ArrayRef NodeSizes, + ArrayRef NodeCounts, + ArrayRef EdgeCounts); /// Estimate the "quality" of a given node order in CFG. The higher the score, /// the better the order is. The score is designed to reflect the locality of /// the given order, which is anti-correlated with the number of I-cache misses /// in a typical execution of the function. -double calcExtTspScore(const std::vector &Order, - const std::vector &NodeSizes, - const std::vector &NodeCounts, - const std::vector &EdgeCounts); +double calcExtTspScore(ArrayRef Order, ArrayRef NodeSizes, + ArrayRef NodeCounts, + ArrayRef EdgeCounts); /// Estimate the "quality" of the current node order in CFG. -double calcExtTspScore(const std::vector &NodeSizes, - const std::vector &NodeCounts, - const std::vector &EdgeCounts); +double calcExtTspScore(ArrayRef NodeSizes, + ArrayRef NodeCounts, + ArrayRef EdgeCounts); /// Algorithm-specific params for Cache-Directed Sort. The values are tuned for /// the best performance of large-scale front-end bound binaries. @@ -75,17 +74,15 @@ /// map also defines the edges in CFG and should include 0-count edges. /// \p CallOffsets: The offsets of the calls from their source nodes. /// \returns The best function order found. -std::vector applyCDSLayout(const std::vector &FuncSizes, - const std::vector &FuncCounts, - const std::vector &CallCounts, - const std::vector &CallOffsets); +std::vector computeCacheDirectedLayout( + ArrayRef FuncSizes, ArrayRef FuncCounts, + ArrayRef CallCounts, ArrayRef CallOffsets); /// Apply a Cache-Directed Sort with a custom config. -std::vector applyCDSLayout(const CDSortConfig &Config, - const std::vector &FuncSizes, - const std::vector &FuncCounts, - const std::vector &CallCounts, - const std::vector &CallOffsets); +std::vector computeCacheDirectedLayout( + const CDSortConfig &Config, ArrayRef FuncSizes, + ArrayRef FuncCounts, ArrayRef CallCounts, + ArrayRef CallOffsets); } // end namespace llvm diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -3534,7 +3534,7 @@ calcExtTspScore(BlockSizes, BlockCounts, JumpCounts))); // Run the layout algorithm. - auto NewOrder = applyExtTspLayout(BlockSizes, BlockCounts, JumpCounts); + auto NewOrder = computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts); std::vector NewBlockOrder; NewBlockOrder.reserve(F->size()); for (uint64_t Node : NewOrder) { diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp --- a/llvm/lib/Transforms/Utils/CodeLayout.cpp +++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp @@ -318,8 +318,8 @@ Edges.push_back(std::make_pair(Other, Edge)); } - void merge(ChainT *Other, const std::vector &MergedBlocks) { - Nodes = MergedBlocks; + void merge(ChainT *Other, std::vector MergedBlocks) { + Nodes = std::move(MergedBlocks); // Update the chain's data. ExecutionCount += Other->ExecutionCount; Size += Other->Size; @@ -549,15 +549,14 @@ /// The implementation of the ExtTSP algorithm. class ExtTSPImpl { public: - ExtTSPImpl(const std::vector &NodeSizes, - const std::vector &NodeCounts, - const std::vector &EdgeCounts) + ExtTSPImpl(ArrayRef NodeSizes, ArrayRef NodeCounts, + ArrayRef EdgeCounts) : NumNodes(NodeSizes.size()) { initialize(NodeSizes, NodeCounts, EdgeCounts); } /// Run the algorithm and return an optimized ordering of nodes. - void run(std::vector &Result) { + std::vector run() { // Pass 1: Merge nodes with their mutually forced successors mergeForcedPairs(); @@ -568,14 +567,14 @@ mergeColdChains(); // Collect nodes from all chains - concatChains(Result); + return concatChains(); } private: /// Initialize the algorithm's data structures. - void initialize(const std::vector &NodeSizes, - const std::vector &NodeCounts, - const std::vector &EdgeCounts) { + void initialize(const ArrayRef &NodeSizes, + const ArrayRef &NodeCounts, + const ArrayRef &EdgeCounts) { // Initialize nodes AllNodes.reserve(NumNodes); for (uint64_t Idx = 0; Idx < NumNodes; Idx++) { @@ -923,7 +922,7 @@ } /// Concatenate all chains into the final order. - void concatChains(std::vector &Order) { + std::vector concatChains() { // Collect chains and calculate density stats for their sorting. std::vector SortedChains; DenseMap ChainDensity; @@ -957,12 +956,12 @@ }); // Collect the nodes in the order specified by their chains. + std::vector Order; Order.reserve(NumNodes); - for (const ChainT *Chain : SortedChains) { - for (NodeT *Node : Chain->Nodes) { + for (const ChainT *Chain : SortedChains) + for (NodeT *Node : Chain->Nodes) Order.push_back(Node->Index); - } - } + return Order; } private: @@ -1004,7 +1003,7 @@ } /// Run the algorithm and return an ordered set of function clusters. - void run(std::vector &Result) { + std::vector run() { // Merge pairs of chains while improving the objective. mergeChainPairs(); @@ -1013,15 +1012,15 @@ << HotChains.size() << "\n"); // Collect nodes from all the chains. - concatChains(Result); + return concatChains(); } private: /// Initialize the algorithm's data structures. - void initialize(const std::vector &NodeSizes, - const std::vector &NodeCounts, - const std::vector &EdgeCounts, - const std::vector &EdgeOffsets) { + void initialize(const ArrayRef &NodeSizes, + const ArrayRef &NodeCounts, + const ArrayRef &EdgeCounts, + const ArrayRef &EdgeOffsets) { // Initialize nodes. AllNodes.reserve(NumNodes); for (uint64_t Node = 0; Node < NumNodes; Node++) { @@ -1302,7 +1301,7 @@ } /// Concatenate all chains into the final order. - void concatChains(std::vector &Order) { + std::vector concatChains() { // Collect chains and calculate density stats for their sorting. std::vector SortedChains; DenseMap ChainDensity; @@ -1332,10 +1331,12 @@ }); // Collect the nodes in the order specified by their chains. + std::vector Order; Order.reserve(NumNodes); for (const ChainT *Chain : SortedChains) for (NodeT *Node : Chain->Nodes) Order.push_back(Node->Index); + return Order; } private: @@ -1376,17 +1377,16 @@ } // end of anonymous namespace std::vector -llvm::applyExtTspLayout(const std::vector &NodeSizes, - const std::vector &NodeCounts, - const std::vector &EdgeCounts) { +llvm::computeExtTspLayout(ArrayRef NodeSizes, + ArrayRef NodeCounts, + ArrayRef EdgeCounts) { // Verify correctness of the input data. assert(NodeCounts.size() == NodeSizes.size() && "Incorrect input"); assert(NodeSizes.size() > 2 && "Incorrect input"); // Apply the reordering algorithm. ExtTSPImpl Alg(NodeSizes, NodeCounts, EdgeCounts); - std::vector Result; - Alg.run(Result); + std::vector Result = Alg.run(); // Verify correctness of the output. assert(Result.front() == 0 && "Original entry point is not preserved"); @@ -1394,10 +1394,10 @@ return Result; } -double llvm::calcExtTspScore(const std::vector &Order, - const std::vector &NodeSizes, - const std::vector &NodeCounts, - const std::vector &EdgeCounts) { +double llvm::calcExtTspScore(ArrayRef Order, + ArrayRef NodeSizes, + ArrayRef NodeCounts, + ArrayRef EdgeCounts) { // Estimate addresses of the blocks in memory. std::vector Addr(NodeSizes.size(), 0); for (size_t Idx = 1; Idx < Order.size(); Idx++) { @@ -1422,9 +1422,9 @@ return Score; } -double llvm::calcExtTspScore(const std::vector &NodeSizes, - const std::vector &NodeCounts, - const std::vector &EdgeCounts) { +double llvm::calcExtTspScore(ArrayRef NodeSizes, + ArrayRef NodeCounts, + ArrayRef EdgeCounts) { std::vector Order(NodeSizes.size()); for (size_t Idx = 0; Idx < NodeSizes.size(); Idx++) { Order[Idx] = Idx; @@ -1432,30 +1432,23 @@ return calcExtTspScore(Order, NodeSizes, NodeCounts, EdgeCounts); } -std::vector -llvm::applyCDSLayout(const CDSortConfig &Config, - const std::vector &FuncSizes, - const std::vector &FuncCounts, - const std::vector &CallCounts, - const std::vector &CallOffsets) { +std::vector llvm::computeCacheDirectedLayout( + const CDSortConfig &Config, ArrayRef FuncSizes, + ArrayRef FuncCounts, ArrayRef CallCounts, + ArrayRef CallOffsets) { // Verify correctness of the input data. assert(FuncCounts.size() == FuncSizes.size() && "Incorrect input"); // Apply the reordering algorithm. CDSortImpl Alg(Config, FuncSizes, FuncCounts, CallCounts, CallOffsets); - std::vector Result; - Alg.run(Result); - - // Verify correctness of the output. + std::vector Result = Alg.run(); assert(Result.size() == FuncSizes.size() && "Incorrect size of layout"); return Result; } -std::vector -llvm::applyCDSLayout(const std::vector &FuncSizes, - const std::vector &FuncCounts, - const std::vector &CallCounts, - const std::vector &CallOffsets) { +std::vector llvm::computeCacheDirectedLayout( + ArrayRef FuncSizes, ArrayRef FuncCounts, + ArrayRef CallCounts, ArrayRef CallOffsets) { CDSortConfig Config; // Populate the config from the command-line options. if (CacheEntries.getNumOccurrences() > 0) @@ -1466,5 +1459,6 @@ Config.DistancePower = DistancePower; if (FrequencyScale.getNumOccurrences() > 0) Config.FrequencyScale = FrequencyScale; - return applyCDSLayout(Config, FuncSizes, FuncCounts, CallCounts, CallOffsets); + return computeCacheDirectedLayout(Config, FuncSizes, FuncCounts, CallCounts, + CallOffsets); }