diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -201,6 +201,7 @@ cl::Hidden); extern cl::opt EnableExtTspBlockPlacement; +extern cl::opt ApplyExtTspWithoutProfile; namespace llvm { extern cl::opt StaticLikelyProb; @@ -3419,7 +3420,8 @@ } // Apply a post-processing optimizing block placement. - if (MF.size() >= 3 && EnableExtTspBlockPlacement) { + if (MF.size() >= 3 && EnableExtTspBlockPlacement && + (ApplyExtTspWithoutProfile || MF.getFunction().hasProfileData())) { // Find a new placement and modify the layout of the blocks in the function. applyExtTsp(); diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp --- a/llvm/lib/Transforms/Utils/CodeLayout.cpp +++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp @@ -49,6 +49,11 @@ cl::desc("Enable machine block placement based on the ext-tsp model, " "optimizing I-cache utilization.")); +cl::opt ApplyExtTspWithoutProfile( + "ext-tsp-apply-without-profile", + cl::desc("Whether to apply ext-tsp placement for instances w/o profile"), + cl::init(true), cl::Hidden, cl::ZeroOrMore); + // Algorithm-specific constants. The values are tuned for the best performance // of large-scale front-end bound binaries. static cl::opt @@ -67,6 +72,12 @@ "ext-tsp-backward-distance", cl::Hidden, cl::init(640), cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP")); +// The maximum size of a chain created by the algorithm. The size is bounded +// so that the algorithm can efficiently process extremely large instance. +static cl::opt + MaxChainSize("ext-tsp-max-chain-size", cl::Hidden, cl::init(4096), + cl::desc("The maximum size of a chain to create.")); + // The maximum size of a chain for splitting. Larger values of the threshold // may yield better quality at the cost of worsen run-time. static cl::opt ChainSplitThreshold( @@ -226,6 +237,8 @@ const std::vector &blocks() const { return Blocks; } + size_t numBlocks() const { return Blocks.size(); } + const std::vector> &edges() const { return Edges; } @@ -502,7 +515,7 @@ AllEdges.reserve(AllJumps.size()); for (auto &Block : AllBlocks) { for (auto &Jump : Block.OutJumps) { - const auto SuccBlock = Jump->Target; + auto SuccBlock = Jump->Target; auto CurEdge = Block.CurChain->getEdge(SuccBlock->CurChain); // this edge is already present in the graph if (CurEdge != nullptr) { @@ -592,6 +605,10 @@ if (ChainPred == ChainSucc) continue; + // Stop early if the combined chain violates the maximum allowed size + if (ChainPred->numBlocks() + ChainSucc->numBlocks() >= MaxChainSize) + continue; + // Compute the gain of merging the two chains auto CurGain = getBestMergeGain(ChainPred, ChainSucc, ChainEdge); if (CurGain.score() <= EPS)