diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -589,27 +589,24 @@ } void CSProfileGenerator::computeSizeForProfiledFunctions() { - // Hash map to deduplicate the function range and the item is a pair of - // function start and end offset. - std::unordered_map AggregatedRanges; + std::unordered_set ProfiledFunctions; + // Go through all the ranges in the CS counters, use the start of the range to - // look up the function it belongs and record the function range. + // look up the function it belongs and record the function. for (const auto &CI : SampleCounters) { for (const auto &Item : CI.second.RangeCounter) { // FIXME: Filter the bogus crossing function range. uint64_t StartOffset = Item.first.first; - // Note that a function can be spilt into multiple ranges, so get all - // ranges of the function. - for (const auto &Range : Binary->getRangesForOffset(StartOffset)) - AggregatedRanges[Range.first] = Range.second; + if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset)) + ProfiledFunctions.insert(FRange->Func); } } - for (const auto &I : AggregatedRanges) { - uint64_t StartOffset = I.first; - uint64_t EndOffset = I.second; - Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset); - } + for (auto *Func : ProfiledFunctions) + Binary->computeInlinedContextSizeForFunc(Func); + + // Flush the symbolizer to save memory. + Binary->flushSymbolizer(); } void CSProfileGenerator::generateLineNumBasedProfile() { diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -166,14 +166,14 @@ // their remaining probes. void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder); - void dump() { RootContext.dumpTree(); } - -private: using ProbeFrameStack = SmallVector>; void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder, MCDecodedPseudoProbeInlineTree &ProbeNode, ProbeFrameStack &Context); + void dump() { RootContext.dumpTree(); } + +private: // Root node for context trie tree, node that this is a reverse context trie // with callee as parent and caller as child. This way we can traverse from // root to find the best/longest matching context if an exact match does not @@ -256,6 +256,9 @@ // Pseudo probe decoder MCPseudoProbeDecoder ProbeDecoder; + // Function name to probe frame map for top-level outlined functions. + StringMap TopLevelProbeFrameMap; + bool UsePseudoProbes = false; bool UseFSDiscriminator = false; @@ -477,6 +480,8 @@ return Stack.back(); } + void flushSymbolizer() { Symbolizer.reset(); } + // Compare two addresses' inline context bool inlineContextEqual(uint64_t Add1, uint64_t Add2); @@ -491,6 +496,8 @@ void computeInlinedContextSizeForRange(uint64_t StartOffset, uint64_t EndOffset); + void computeInlinedContextSizeForFunc(const BinaryFunction *Func); + const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const { return ProbeDecoder.getCallProbeForAddr(Address); } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -219,10 +219,6 @@ // Disassemble the text sections. disassemble(Obj); - // Track size for optimized inlinees when probe is available - if (UsePseudoProbes && TrackFuncContextSize) - FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder); - // Use function start and return address to infer prolog and epilog ProEpilogTracker.inferPrologOffsets(StartOffset2FuncRangeMap); ProEpilogTracker.inferEpilogOffsets(RetOffsets); @@ -349,6 +345,17 @@ } } + // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe + // is available + if (UsePseudoProbes && TrackFuncContextSize) { + for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) { + auto *Frame = Child.second.get(); + StringRef FuncName = + ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName; + TopLevelProbeFrameMap[FuncName] = Frame; + } + } + if (ShowPseudoProbe) ProbeDecoder.printGUID2FuncDescMap(outs()); } @@ -747,6 +754,25 @@ } while (IP.advance() && IP.Address < RangeEnd); } +void ProfiledBinary::computeInlinedContextSizeForFunc( + const BinaryFunction *Func) { + // Note that a function can be spilt into multiple ranges, so compute for all + // ranges of the function. + for (const auto &Range : Func->Ranges) + computeInlinedContextSizeForRange(Range.first, Range.second); + + // Track optimized-away inlinee for probed binary. A function inlined and then + // optimized away should still have their probes left over in places. + if (usePseudoProbes()) { + auto I = TopLevelProbeFrameMap.find(Func->FuncName); + if (I != TopLevelProbeFrameMap.end()) { + BinarySizeContextTracker::ProbeFrameStack ProbeContext; + FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder, *I->second, + ProbeContext); + } + } +} + InstructionPointer::InstructionPointer(const ProfiledBinary *Binary, uint64_t Address, bool RoundToNext) : Binary(Binary), Address(Address) {