diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h --- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -64,16 +64,22 @@ using iterator = ProfiledCallGraphNode::iterator; // Constructor for non-CS profile. - ProfiledCallGraph(SampleProfileMap &ProfileMap) { + ProfiledCallGraph(SampleProfileMap &ProfileMap, + uint64_t IgnoreColdCallThreshold = 0) { assert(!FunctionSamples::ProfileIsCS && "CS flat profile is not handled here"); for (const auto &Samples : ProfileMap) { addProfiledCalls(Samples.second); } + + // Trim edges with weight up to `IgnoreColdCallThreshold`. This aims + // for a more stable call graph with "determinstic" edges from run to run. + trimColdEges(IgnoreColdCallThreshold); } // Constructor for CS profile. - ProfiledCallGraph(SampleContextTracker &ContextTracker) { + ProfiledCallGraph(SampleContextTracker &ContextTracker, + uint64_t IgnoreColdCallThreshold = 0) { // BFS traverse the context profile trie to add call edges for calls shown // in context. std::queue Queue; @@ -121,11 +127,16 @@ ContextTracker.getFuncNameFor(Callee), Weight); } } + + // Trim edges with weight up to `IgnoreColdCallThreshold`. This aims + // for a more stable call graph with "determinstic" edges from run to run. + trimColdEges(IgnoreColdCallThreshold); } iterator begin() { return Root.Edges.begin(); } iterator end() { return Root.Edges.end(); } ProfiledCallGraphNode *getEntryNode() { return &Root; } + void addProfiledFunction(StringRef Name) { if (!ProfiledFunctions.count(Name)) { // Link to synthetic root to make sure every node is reachable @@ -148,8 +159,9 @@ auto EdgeIt = Edges.find(Edge); if (EdgeIt == Edges.end()) { Edges.insert(Edge); - } else if (EdgeIt->Weight < Edge.Weight) { - // Replace existing call edges with same target but smaller weight. + } else { + // Accumulate weight to the existing edge. + Edge.Weight += EdgeIt->Weight; Edges.erase(EdgeIt); Edges.insert(Edge); } @@ -175,6 +187,24 @@ } } + // Trim edges with weight up to `Threshold`. Do not trim anything if + // `Threshold` is zero. + void trimColdEges(uint64_t Threshold = 0) { + if (!Threshold) + return; + + for (auto &Node : ProfiledFunctions) { + auto &Edges = Node.second.Edges; + auto I = Edges.begin(); + while (I != Edges.end()) { + if (I->Weight <= Threshold) + I = Edges.erase(I); + else + I++; + } + } + } + ProfiledCallGraphNode Root; StringMap ProfiledFunctions; }; diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -76,7 +76,12 @@ std::vector CSPreInliner::buildTopDownOrder() { std::vector Order; - ProfiledCallGraph ProfiledCG(ContextTracker); + // Trim cold edges to get a more stable call graph. This allows for a more + // stable top-down order which in turns helps the stablity of the generated + // profile from run to run. + uint64_t ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( + (Summary->getDetailedSummary())); + ProfiledCallGraph ProfiledCG(ContextTracker, ColdCountThreshold); // Now that we have a profiled call graph, construct top-down order // by building up SCC and reversing SCC order.