diff --git a/llvm/test/tools/llvm-profgen/Inputs/noinline-tailcall-probe.perfbin b/llvm/test/tools/llvm-profgen/Inputs/noinline-tailcall-probe.perfbin new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ + +int s; +int bar(int x, int y) { + if (x % 3) { + return x - y; + } + return x + y; +} + +int foo() { + int i = 0; + while (i++ < 4000) + if (i % 91) s = bar(i, s); else s += 30; + return 0; +} + +void go() { + foo(); +} + +int main() { + int i = 0; + while (i++ < 4000) + go(); + printf("sum is %d\n", s); + return 0; +} diff --git a/llvm/tools/llvm-profgen/CMakeLists.txt b/llvm/tools/llvm-profgen/CMakeLists.txt --- a/llvm/tools/llvm-profgen/CMakeLists.txt +++ b/llvm/tools/llvm-profgen/CMakeLists.txt @@ -20,4 +20,5 @@ CSPreInliner.cpp ProfiledBinary.cpp ProfileGenerator.cpp + MissingFrameInferrer.cpp ) diff --git a/llvm/tools/llvm-profgen/MissingFrameInferrer.h b/llvm/tools/llvm-profgen/MissingFrameInferrer.h new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-profgen/MissingFrameInferrer.h @@ -0,0 +1,115 @@ +//===-- MissingFrameInferrer.h - Missing frame inferrer ---------- C++/-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_PROFGEN_MISSINGFRAMEINFERRER_H +#define LLVM_TOOLS_LLVM_PROFGEN_MISSINGFRAMEINFERRER_H + +#include "PerfReader.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include +#include + +namespace llvm { +namespace sampleprof { + +class ProfiledBinary; +struct BinaryFunction; + +class MissingFrameInferrer { +public: + MissingFrameInferrer(ProfiledBinary *Binary) : Binary(Binary) {} + + // Defininig a frame transition from a caller function to the callee function. + using CallerCalleePair = std::pair; + + void initialize(const ContextSampleCounterMap *SampleCounters); + + // Given an input `Context`, output `NewContext` with inferred missing tail + // call frames. + void inferMissingFrames(const SmallVectorImpl &Context, + SmallVectorImpl &NewContext); + +private: + friend class ProfiledBinary; + + // Compute a unique tail call path for a pair of source frame address and + // target frame address. Append the unique path prefix (not including `To`) to + // `UniquePath` if exists. Return the whether this's a unqiue tail call + // path. The source/dest frame will typically be a pair of adjacent frame + // entries of call stack samples. + bool inferMissingFrames(uint64_t From, uint64_t To, + SmallVectorImpl &UniquePath); + + // Compute a unique tail call path from the source frame address to the target + // function. Output the unique path prefix (not including `To`) in + // `UniquePath` if exists. Return the number of possibly availabe tail call + // paths. + uint64_t computeUniqueTailCallPath(uint64_t From, BinaryFunction *To, + SmallVectorImpl &UniquePath); + + // Compute a unique tail call path from the source function to the target + // function. Output the unique path prefix (not including `To`) in + // `UniquePath` if exists. Return the number of possibly availabe tail call + // paths. + uint64_t computeUniqueTailCallPath(BinaryFunction *From, BinaryFunction *To, + SmallVectorImpl &UniquePath); + + ProfiledBinary *Binary; + + // A map of call instructions to their target addresses. This is first + // populated with static call edges but then trimmed down to dynamic call + // edges based on LBR samples. + std::unordered_map> CallEdges; + + // A map of tail call instructions to their target addresses. This is first + // populated with static call edges but then trimmed down to dynamic call + // edges based on LBR samples. + std::unordered_map> TailCallEdges; + + // Dynamic call targets in terms of BinaryFunction for any calls. + std::unordered_map> CallEdgesF; + + // Dynamic call targets in terms of BinaryFunction for tail calls. + std::unordered_map> + TailCallEdgesF; + + // Dynamic tail call targets of caller functions. + std::unordered_map> FuncToTailCallMap; + + // Functions that are reachable via tail calls. + DenseSet TailCallTargetFuncs; + + struct PairHash { + std::size_t operator()( + const std::pair &Pair) const { + return std::hash()(Pair.first) ^ + std::hash()(Pair.second); + } + }; + + // Cached results from a CallerCalleePair to a unique call path between them. + std::unordered_map, PairHash> + UniquePaths; + // Cached results from CallerCalleePair to the number of available call paths. + std::unordered_map NonUniquePaths; + + DenseSet Visiting; + + uint32_t CurSearchingDepth = 0; + +#ifndef NDEBUG + DenseSet> ReachableViaUniquePaths; + DenseSet> Unreachables; + DenseSet> ReachableViaMultiPaths; +#endif +}; +} // end namespace sampleprof +} // end namespace llvm + +#endif diff --git a/llvm/tools/llvm-profgen/MissingFrameInferrer.cpp b/llvm/tools/llvm-profgen/MissingFrameInferrer.cpp new file mode 100644 --- /dev/null +++ b/llvm/tools/llvm-profgen/MissingFrameInferrer.cpp @@ -0,0 +1,318 @@ +//===-- MissingFrameInferrer.cpp - Missing frame inferrer --------- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MissingFrameInferrer.h" +#include "PerfReader.h" +#include "ProfiledBinary.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/Statistic.h" +#include +#include +#include +#include +#include + +#define DEBUG_TYPE "missing-frame-inferrer" + +using namespace llvm; +using namespace sampleprof; + +STATISTIC(TailCallUniReachable, + "Number of frame pairs reachable via a unique tail call path"); +STATISTIC(TailCallMultiReachable, + "Number of frame pairs reachable via a multiple tail call paths"); +STATISTIC(TailCallUnreachable, + "Number of frame pairs unreachable via any tail call path"); +STATISTIC(TailCallFuncSingleTailCalls, + "Number of functions with single tail call site"); +STATISTIC(TailCallFuncMultipleTailCalls, + "Number of functions with multiple tail call sites"); +STATISTIC(TailCallMaxTailCallPath, "Length of the longest tail call path"); + +static cl::opt + MaximumSearchDepth("max-search-depth", cl::init(UINT32_MAX - 1), + cl::desc("The maximum levels the DFS-based missing " + "frame search should go with")); + +void MissingFrameInferrer::initialize( + const ContextSampleCounterMap *SampleCounters) { + // Refine call edges based on LBR samples. + if (SampleCounters) { + std::unordered_map> SampledCalls; + std::unordered_map> SampledTailCalls; + + // Populate SampledCalls based on static call sites. Similarly to + // SampledTailCalls. + for (const auto &CI : *SampleCounters) { + for (auto Item : CI.second.BranchCounter) { + auto From = Item.first.first; + auto To = Item.first.second; + if (CallEdges.count(From)) { + assert(CallEdges[From].size() == 1 && + "A callsite should only appear once with either a known or a " + "zero (unknown) target value at this point"); + SampledCalls[From].insert(To); + } + if (TailCallEdges.count(From)) { + assert(TailCallEdges[From].size() == 1 && + "A callsite should only appear once with either a known or a " + "zero (unknown) target value at this point"); + FuncRange *FromFRange = Binary->findFuncRange(From); + FuncRange *ToFRange = Binary->findFuncRange(To); + if (FromFRange != ToFRange) + SampledTailCalls[From].insert(To); + } + } + } + + // Replace static edges with dynamic edges. + CallEdges = SampledCalls; + TailCallEdges = SampledTailCalls; + } + + // Populate function-based edges. This is to speed up address to function + // translation. + for (auto Call : CallEdges) + for (auto Target : Call.second) + if (FuncRange *ToFRange = Binary->findFuncRange(Target)) + CallEdgesF[Call.first].insert(ToFRange->Func); + + for (auto Call : TailCallEdges) { + for (auto Target : Call.second) { + if (FuncRange *ToFRange = Binary->findFuncRange(Target)) { + TailCallEdgesF[Call.first].insert(ToFRange->Func); + TailCallTargetFuncs.insert(ToFRange->Func); + } + } + if (FuncRange *FromFRange = Binary->findFuncRange(Call.first)) + FuncToTailCallMap[FromFRange->Func].push_back(Call.first); + } + +#if LLVM_ENABLE_STATS + for (auto F : FuncToTailCallMap) { + assert(F.second.size() > 0 && ""); + if (F.second.size() > 1) + TailCallFuncMultipleTailCalls++; + else + TailCallFuncSingleTailCalls++; + } +#endif + +#ifndef NDEBUG + auto PrintCallTargets = + [&](const std::unordered_map> + &CallTargets, + bool IsTailCall) { + for (const auto &Targets : CallTargets) { + for (const auto &Target : Targets.second) { + dbgs() << (IsTailCall ? "TailCall" : "Call"); + dbgs() << " From " << format("%8" PRIx64, Targets.first) << " to " + << format("%8" PRIx64, Target) << "\n"; + } + } + }; + + LLVM_DEBUG(dbgs() << "============================\n "; + dbgs() << "Call targets:\n"; + PrintCallTargets(CallEdges, false); + dbgs() << "\nTail call targets:\n"; + PrintCallTargets(CallEdges, true); + dbgs() << "============================\n";); +#endif +} + +uint64_t MissingFrameInferrer::computeUniqueTailCallPath( + BinaryFunction *From, BinaryFunction *To, SmallVectorImpl &Path) { + // Search for a unique path comprised of only tail call edges for a given + // source and target frame address on the a tail call graph that consists of + // only tail call edges. Note that only a unique path counts. Multiple paths + // are treated unreachable. + if (From == To) + return 1; + + // Ignore cyclic paths. Since we are doing a recursive DFS walk, if the source + // frame being visited is already in the stack, it means we are seeing a + // cycle. This is done before querying the cached result because the cached + // result may be computed based on the same path. Consider the following case: + // A -> B, B -> A, A -> D + // When computing unique reachablity from A to D, the cached result for (B,D) + // should not be counted since the unique path B->A->D is basically the same + // path as A->D. Counting that with invalidate the uniqueness from A to D. + if (Visiting.contains(From)) + return 0; + + // If already computed, return the cached result. + auto I = UniquePaths.find({From, To}); + if (I != UniquePaths.end()) { + Path.append(I->second.begin(), I->second.end()); + return 1; + } + + auto J = NonUniquePaths.find({From, To}); + if (J != NonUniquePaths.end()) { + return J->second; + } + + uint64_t Pos = Path.size(); + + // DFS walk each outgoing tail call edges. + // Bail out if we are already at the the maximum searching depth. + if (CurSearchingDepth == MaximumSearchDepth) + return 0; + + + if (!FuncToTailCallMap.count(From)) + return 0; + + CurSearchingDepth++; + Visiting.insert(From); + uint64_t NumPaths = 0; + for (auto TailCall : FuncToTailCallMap[From]) { + NumPaths += computeUniqueTailCallPath(TailCall, To, Path); + // Stop analyzing the remaining if we are already seeing more than one + // reachable paths. + if (NumPaths > 1) + break; + } + CurSearchingDepth--; + Visiting.erase(From); + + // Undo already-computed path if it is not unique. + if (NumPaths != 1) { + Path.pop_back_n(Path.size() - Pos); + } + + // Cache the result. + if (NumPaths == 1) { + UniquePaths[{From, To}].assign(Path.begin() + Pos, Path.end()); +#if LLVM_ENABLE_STATS + auto &LocalPath = UniquePaths[{From, To}]; + assert((LocalPath.size() <= MaximumSearchDepth + 1) && + "Path should not be longer than the maximum searching depth"); + TailCallMaxTailCallPath = + std::max(LocalPath.size(), TailCallMaxTailCallPath.getValue()); +#endif + } else { + NonUniquePaths[{From, To}] = NumPaths; + } + + return NumPaths; +} + +uint64_t MissingFrameInferrer::computeUniqueTailCallPath( + uint64_t From, BinaryFunction *To, SmallVectorImpl &Path) { + if (!TailCallEdgesF.count(From)) + return 0; + Path.push_back(From); + uint64_t NumPaths = 0; + for (auto Target : TailCallEdgesF[From]) { + NumPaths += computeUniqueTailCallPath(Target, To, Path); + // Stop analyzing the remaining if we are already seeing more than one + // reachable paths. + if (NumPaths > 1) + break; + } + + // Undo already-computed path if it is not unique. + if (NumPaths != 1) + Path.pop_back(); + return NumPaths; +} + +bool MissingFrameInferrer::inferMissingFrames( + uint64_t From, uint64_t To, SmallVectorImpl &UniquePath) { + assert(!TailCallEdgesF.count(From) && + "transition between From and To cannot be via a tailcall otherwise " + "they would not show up at the same time"); + UniquePath.push_back(From); + uint64_t Pos = UniquePath.size(); + + FuncRange *ToFRange = Binary->findFuncRange(To); + if (!ToFRange) + return false; + + // Bail out if caller has no known outgoing call edges. + if (!CallEdgesF.count(From)) + return false; + + // Done with the inference if the calle is reachable via a single callsite. + // This may not be accurate but it improves the search throughput. + for (auto Target : CallEdgesF[From]) { + if (Target == ToFRange->Func) + return true; + } + + // Bail out if callee is not tailcall reachable at all. + if (!TailCallTargetFuncs.contains(ToFRange->Func)) + return false; + + Visiting.clear(); + CurSearchingDepth = 0; + uint64_t NumPaths = 0; + for (auto Target : CallEdgesF[From]) { + NumPaths += + computeUniqueTailCallPath(Target, ToFRange->Func, UniquePath); + // Stop analyzing the remaining if we are already seeing more than one + // reachable paths. + if (NumPaths > 1) + break; + } + + // Undo already-computed path if it is not unique. + if (NumPaths != 1) { + UniquePath.pop_back_n(UniquePath.size() - Pos); + assert(UniquePath.back() == From && "broken path"); + } + +#ifndef NDEBUG + if (NumPaths == 1) { + if (ReachableViaUniquePaths.insert({From, ToFRange->StartAddress}).second) + TailCallUniReachable++; + } else if (NumPaths == 0) { + if (Unreachables.insert({From, ToFRange->StartAddress}).second) { + TailCallUnreachable++; + LLVM_DEBUG(dbgs() << "No path found from " + << format("%8" PRIx64 ":", From) << " to " + << format("%8" PRIx64 ":", ToFRange->StartAddress) + << "\n"); + } + } else if (NumPaths > 1) { + if (ReachableViaMultiPaths.insert({From, ToFRange->StartAddress}) + .second) { + TailCallMultiReachable++; + LLVM_DEBUG(dbgs() << "Multiple paths found from " + << format("%8" PRIx64 ":", From) << " to " + << format("%8" PRIx64 ":", ToFRange->StartAddress) + << "\n"); + } + } +#endif + + return NumPaths == 1; +} + +void MissingFrameInferrer::inferMissingFrames( + const SmallVectorImpl &Context, + SmallVectorImpl &NewContext) { + if (Context.size() == 1) { + NewContext = Context; + return; + } + + NewContext.clear(); + for (uint64_t I = 1; I < Context.size(); I++) { + inferMissingFrames(Context[I - 1], Context[I], NewContext); + } + NewContext.push_back(Context.back()); + + assert((NewContext.size() >= Context.size()) && + "Inferred context should include all frames in the original context"); + assert((NewContext.size() > Context.size() || NewContext == Context) && + "Inferred context should be exactly the same " + "with the original context"); +} diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -334,18 +334,18 @@ // Fill in function body samples from probes void populateBodySamplesWithProbes(const RangeSample &RangeCounter, - SampleContextFrames ContextStack); + const AddrBasedCtxKey *CtxKey); // Fill in boundary samples for a call probe void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter, - SampleContextFrames ContextStack); + const AddrBasedCtxKey *CtxKey); ContextTrieNode * - getContextNodeForLeafProbe(SampleContextFrames ContextStack, + getContextNodeForLeafProbe(const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe); // Helper function to get FunctionSamples for the leaf probe FunctionSamples & - getFunctionProfileForLeafProbe(SampleContextFrames ContextStack, + getFunctionProfileForLeafProbe(const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe); void convertToProfileMap(ContextTrieNode &Node, @@ -358,6 +358,13 @@ bool collectFunctionsFromLLVMProfile( std::unordered_set &ProfiledFunctions) override; + void initializeMissingFrameInferrer(); + + // Given an input `Context`, output `NewContext` with inferred missing tail + // call frames. + void inferMissingFrames(const SmallVectorImpl &Context, + SmallVectorImpl &NewContext); + ContextTrieNode &getRootContext() { return ContextTracker.getRootContext(); }; // The container for holding the FunctionSamples used by context trie. diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "ProfileGenerator.h" #include "ErrorHandling.h" +#include "MissingFrameInferrer.h" #include "PerfReader.h" #include "ProfiledBinary.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" @@ -94,6 +95,12 @@ "gen-cs-nested-profile", cl::Hidden, cl::init(true), cl::desc("Generate nested function profiles for CSSPGO")); +cl::opt InferMissingFrames( + "infer-missing-frames", llvm::cl::init(true), + llvm::cl::desc( + "Infer missing call frames due to compiler tail call elimination."), + llvm::cl::Optional); + using namespace llvm; using namespace sampleprof; @@ -769,8 +776,11 @@ collectProfiledFunctions(); - if (Binary->usePseudoProbes()) + if (Binary->usePseudoProbes()) { Binary->decodePseudoProbe(); + if (InferMissingFrames) + initializeMissingFrameInferrer(); + } if (SampleCounters) { if (Binary->usePseudoProbes()) { @@ -786,6 +796,16 @@ postProcessProfiles(); } +void CSProfileGenerator::initializeMissingFrameInferrer() { + Binary->getMissingContextInferrer()->initialize(SampleCounters); +} + +void CSProfileGenerator::inferMissingFrames( + const SmallVectorImpl &Context, + SmallVectorImpl &NewContext) { + Binary->inferMissingFrames(Context, NewContext); +} + void CSProfileGenerator::computeSizeForProfiledFunctions() { for (auto *Func : Binary->getProfiledFunctions()) Binary->computeInlinedContextSizeForFunc(Func); @@ -1109,18 +1129,16 @@ for (const auto &CI : *SampleCounters) { const AddrBasedCtxKey *CtxKey = dyn_cast(CI.first.getPtr()); - SampleContextFrameVector ContextStack; - extractPrefixContextStack(ContextStack, CtxKey->Context, Binary); // Fill in function body samples from probes, also infer caller's samples // from callee's probe - populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack); + populateBodySamplesWithProbes(CI.second.RangeCounter, CtxKey); // Fill in boundary samples for a call probe - populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack); + populateBoundarySamplesWithProbes(CI.second.BranchCounter, CtxKey); } } void CSProfileGenerator::populateBodySamplesWithProbes( - const RangeSample &RangeCounter, SampleContextFrames ContextStack) { + const RangeSample &RangeCounter, const AddrBasedCtxKey *CtxKey) { ProbeCounterMap ProbeCounter; // Extract the top frame probes by looking up each address among the range in // the Address2ProbeMap @@ -1136,8 +1154,7 @@ if (!Probe->isBlock() || Count == 0) continue; - ContextTrieNode *ContextNode = - getContextNodeForLeafProbe(ContextStack, Probe); + ContextTrieNode *ContextNode = getContextNodeForLeafProbe(CtxKey, Probe); FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples(); // Record the current frame and FunctionProfile whenever samples are // collected for non-danglie probes. This is for reporting all of the @@ -1181,7 +1198,7 @@ } void CSProfileGenerator::populateBoundarySamplesWithProbes( - const BranchSample &BranchCounter, SampleContextFrames ContextStack) { + const BranchSample &BranchCounter, const AddrBasedCtxKey *CtxKey) { for (const auto &BI : BranchCounter) { uint64_t SourceAddress = BI.first.first; uint64_t TargetAddress = BI.first.second; @@ -1191,7 +1208,7 @@ if (CallProbe == nullptr) continue; FunctionSamples &FunctionProfile = - getFunctionProfileForLeafProbe(ContextStack, CallProbe); + getFunctionProfileForLeafProbe(CtxKey, CallProbe); FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); FunctionProfile.addTotalSamples(Count); StringRef CalleeName = getCalleeNameForAddress(TargetAddress); @@ -1203,7 +1220,23 @@ } ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe( - SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { + const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) { + + const SmallVectorImpl *PContext = &CtxKey->Context; + SmallVector NewContext; + + if (InferMissingFrames) { + SmallVector Context = CtxKey->Context; + // Append leaf frame for a complete inference. + Context.push_back(LeafProbe->getAddress()); + inferMissingFrames(Context, NewContext); + // Pop out the leaf probe that was pushed in above. + NewContext.pop_back(); + PContext = &NewContext; + } + + SampleContextFrameVector ContextStack; + extractPrefixContextStack(ContextStack, *PContext, Binary); // Explicitly copy the context for appending the leaf context SampleContextFrameVector NewContextStack(ContextStack.begin(), @@ -1229,9 +1262,8 @@ } FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( - SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) { - return *getContextNodeForLeafProbe(ContextStack, LeafProbe) - ->getFunctionSamples(); + const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) { + return *getContextNodeForLeafProbe(CtxKey, LeafProbe)->getFunctionSamples(); } } // end namespace sampleprof diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -53,6 +53,7 @@ namespace sampleprof { class ProfiledBinary; +class MissingFrameInferrer; struct InstructionPointer { const ProfiledBinary *Binary; @@ -250,6 +251,10 @@ // Estimate and track function prolog and epilog ranges. PrologEpilogTracker ProEpilogTracker; + // Infer missing frames due to compiler optimizations such as tail call + // elimination. + std::unique_ptr MissingContextInferrer; + // Track function sizes under different context BinarySizeContextTracker FuncSizeTracker; @@ -309,9 +314,9 @@ void populateElfSymbolAddressList(const ELFObjectFileBase *O); // A function may be spilt into multiple non-continuous address ranges. We use - // this to set whether start address of a function is the real entry of the + // this to set whether start a function range is the real entry of the // function and also set false to the non-function label. - void setIsFuncEntry(uint64_t Address, StringRef RangeSymName); + void setIsFuncEntry(FuncRange *FRange, StringRef RangeSymName); // Warn if no entry range exists in the function. void warnNoFuncEntry(); @@ -336,15 +341,8 @@ void load(); public: - ProfiledBinary(const StringRef ExeBinPath, const StringRef DebugBinPath) - : Path(ExeBinPath), DebugBinaryPath(DebugBinPath), ProEpilogTracker(this), - TrackFuncContextSize(EnableCSPreInliner && - UseContextCostForPreInliner) { - // Point to executable binary if debug info binary is not specified. - SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath; - setupSymbolizer(); - load(); - } + ProfiledBinary(const StringRef ExeBinPath, const StringRef DebugBinPath); + ~ProfiledBinary(); void decodePseudoProbe(); @@ -486,6 +484,9 @@ return FuncSizeTracker.getFuncSizeForContext(ContextNode); } + void inferMissingFrames(const SmallVectorImpl &Context, + SmallVectorImpl &NewContext); + // Load the symbols from debug table and populate into symbol list. void populateSymbolListFromDWARF(ProfileSymbolList &SymbolList); @@ -514,6 +515,10 @@ void flushSymbolizer() { Symbolizer.reset(); } + MissingFrameInferrer* getMissingContextInferrer() { + return MissingContextInferrer.get(); + } + // Compare two addresses' inline context bool inlineContextEqual(uint64_t Add1, uint64_t Add2); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -8,6 +8,7 @@ #include "ProfiledBinary.h" #include "ErrorHandling.h" +#include "MissingFrameInferrer.h" #include "ProfileGenerator.h" #include "llvm/ADT/Triple.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" @@ -15,6 +16,7 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/TargetSelect.h" #include @@ -54,6 +56,7 @@ "names only. Only work with show-disassembly-only")); extern cl::opt ShowDetailedWarning; +extern cl::opt InferMissingFrames; namespace llvm { namespace sampleprof { @@ -159,6 +162,20 @@ ProbeContext.pop_back(); } +ProfiledBinary::ProfiledBinary(const StringRef ExeBinPath, + const StringRef DebugBinPath) + : Path(ExeBinPath), DebugBinaryPath(DebugBinPath), ProEpilogTracker(this), + TrackFuncContextSize(EnableCSPreInliner && UseContextCostForPreInliner) { + // Point to executable binary if debug info binary is not specified. + SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath; + setupSymbolizer(); + if (InferMissingFrames) + MissingContextInferrer = std::make_unique(this); + load(); +} + +ProfiledBinary::~ProfiledBinary() {} + void ProfiledBinary::warnNoFuncEntry() { uint64_t NoFuncEntryNum = 0; for (auto &F : BinaryFunctions) { @@ -429,10 +446,8 @@ decodePseudoProbe(Obj); } -void ProfiledBinary::setIsFuncEntry(uint64_t Address, StringRef RangeSymName) { - // Note that the start address of each ELF section can be a non-function - // symbol, we need to binary search for the start of a real function range. - auto *FuncRange = findFuncRange(Address); +void ProfiledBinary::setIsFuncEntry(FuncRange *FuncRange, + StringRef RangeSymName) { // Skip external function symbol. if (!FuncRange) return; @@ -453,9 +468,8 @@ uint64_t StartAddress = Symbols[SI].Addr; uint64_t NextStartAddress = (SI + 1 < SE) ? Symbols[SI + 1].Addr : SectionAddress + SectSize; - setIsFuncEntry(StartAddress, - FunctionSamples::getCanonicalFnName(Symbols[SI].Name)); - + FuncRange *FRange = findFuncRange(StartAddress); + setIsFuncEntry(FRange, FunctionSamples::getCanonicalFnName(Symbols[SI].Name)); StringRef SymbolName = ShowCanonicalFnName ? FunctionSamples::getCanonicalFnName(Symbols[SI].Name) @@ -526,6 +540,43 @@ BranchAddressSet.insert(Address); } + // Record potential call targets for tail frame inference later-on. + if (InferMissingFrames && FRange) { + uint64_t Target = 0; + MIA->evaluateBranch(Inst, Address, Size, Target); + if (MCDesc.isCall()) { + // Indirect call targets are unknown at this point. Recording the + // unknown target (zero) for further LBR-based refinement. + MissingContextInferrer->CallEdges[Address].insert(Target); + } else if (MCDesc.isUnconditionalBranch()) { + assert(Target && + "target should be known for unconditional direct branch"); + // Any inter-function unconditional jump is considered tail call at + // this point. This is not 100% accurate and could further be + // optimized based on some source annotation. + FuncRange *ToFRange = findFuncRange(Target); + if (ToFRange && ToFRange->Func != FRange->Func) + MissingContextInferrer->TailCallEdges[Address].insert(Target); + LLVM_DEBUG({ + dbgs() << "Direct Tail call: " << format("%8" PRIx64 ":", Address); + IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs()); + dbgs() << "\n"; + }); + } else if (MCDesc.isIndirectBranch() && MCDesc.isBarrier()) { + // This is an indirect branch but not necessarily an indirect tail + // call. The isBarrier check is to filter out conditional branch. + // Similar with indirect call targets, recording the unknown target + // (zero) for further LBR-based refinement. + MissingContextInferrer->TailCallEdges[Address].insert(Target); + LLVM_DEBUG({ + dbgs() << "Indirect Tail call: " + << format("%8" PRIx64 ":", Address); + IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs()); + dbgs() << "\n"; + }); + } + } + if (InvalidInstLength) { WarnInvalidInsts(Address - InvalidInstLength, Address - 1); InvalidInstLength = 0; @@ -876,6 +927,12 @@ } } +void ProfiledBinary::inferMissingFrames( + const SmallVectorImpl &Context, + SmallVectorImpl &NewContext) { + MissingContextInferrer->inferMissingFrames(Context, NewContext); +} + InstructionPointer::InstructionPointer(const ProfiledBinary *Binary, uint64_t Address, bool RoundToNext) : Binary(Binary), Address(Address) {