diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -246,9 +246,8 @@ // A Tri-tree based data structure to group probes by inline stack. // A tree is allocated for a standalone .text section. A fake // instance is created as the root of a tree. -// A real instance of this class is created for each function, either an -// unlined function that has code in .text section or an inlined function. - +// A real instance of this class is created for each function, either a +// not inlined function that has code in .text section or an inlined function. class MCPseudoProbeInlineTree : public MCPseudoProbeInlineTreeBase { diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -37,7 +37,7 @@ public: ContextTrieNode(ContextTrieNode *Parent = nullptr, StringRef FName = StringRef(), - FunctionSamples *FSamples = nullptr, uint32_t FSize = 0, + FunctionSamples *FSamples = nullptr, uint32_t FSize = -1, LineLocation CallLoc = {0, 0}) : ParentContext(Parent), FuncName(FName), FuncSamples(FSamples), FuncSize(FSize), CallSiteLoc(CallLoc){}; @@ -57,8 +57,8 @@ StringRef getFuncName() const; FunctionSamples *getFunctionSamples() const; void setFunctionSamples(FunctionSamples *FSamples); - uint32_t getFunctionSize() const; - void setFunctionSize(uint32_t FSize); + Optional getFunctionSize() const; + void addFunctionSize(uint32_t FSize); LineLocation getCallSiteLoc() const; ContextTrieNode *getParentContext() const; void setParentContext(ContextTrieNode *Parent); @@ -81,7 +81,7 @@ FunctionSamples *FuncSamples; // Function size for current context - uint32_t FuncSize; + Optional FuncSize; // Callsite location in parent context LineLocation CallSiteLoc; diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -127,9 +127,16 @@ FuncSamples = FSamples; } -uint32_t ContextTrieNode::getFunctionSize() const { return FuncSize; } +Optional ContextTrieNode::getFunctionSize() const { + return FuncSize; +} + +void ContextTrieNode::addFunctionSize(uint32_t FSize) { + if (!FuncSize.hasValue()) + FuncSize = 0; -void ContextTrieNode::setFunctionSize(uint32_t FSize) { FuncSize = FSize; } + FuncSize = FuncSize.getValue() + FSize; +} LineLocation ContextTrieNode::getCallSiteLoc() const { return CallSiteLoc; } diff --git a/llvm/test/tools/llvm-profgen/cs-preinline-cost.test b/llvm/test/tools/llvm-profgen/cs-preinline-cost.test --- a/llvm/test/tools/llvm-profgen/cs-preinline-cost.test +++ b/llvm/test/tools/llvm-profgen/cs-preinline-cost.test @@ -11,7 +11,9 @@ CHECK-DEFAULT-NEXT: Inlined context profile for: main:8 @ _Z3fooi (callee size: 4, call count:544) CHECK-CSCOST: Process main for context-sensitive pre-inlining (pre-inline size: 69, size limit: 828) -CHECK-CSCOST-NEXT: Inlined context profile for: main:9 @ _Z3fooi (callee size: 264, call count:545) +; This inlinee is fully optimized away, make sure we have the correct zero size for that context even if the size is +; not available through symbolization. +CHECK-CSCOST-NEXT: Inlined context profile for: main:9 @ _Z3fooi (callee size: 0, call count:545) CHECK-CSCOST-NEXT: Inlined context profile for: main:7 @ _Z3fooi (callee size: 279, call count:545) CHECK-CSCOST-NEXT: Inlined context profile for: main:8 @ _Z3fooi (callee size: 44, call count:544) diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -118,9 +118,18 @@ // closest matching context. uint32_t getFuncSizeForContext(const SampleContext &Context); + // For inlinees that are full optimized away, we can establish zero size using + // their remaining probes. + void trackOptimizedInlinees(MCPseudoProbeDecoder &ProbeDecoder); + void dump() { RootContext.dumpTree(); } private: + using ProbeFrameStack = SmallVector>; + void trackOptimizedInlinees(MCPseudoProbeDecoder &ProbeDecoder, + MCDecodedPseudoProbeInlineTree &ProbeNode, + ProbeFrameStack &Context); + // Root node for context trie tree, node that this is a reverse context trie // with callee as parent and caller as child. This way we can traverse from // root to find the best/longest matching context if an exact match does not diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -63,7 +63,7 @@ IsLeaf = false; } - CurNode->setFunctionSize(CurNode->getFunctionSize() + InstrSize); + CurNode->addFunctionSize(InstrSize); } uint32_t @@ -73,7 +73,7 @@ StringRef ContextRemain = Context; StringRef ChildContext; StringRef CallerName; - uint32_t Size = 0; + Optional Size; // Start from top-level context-less function, travese down the reverse // context trie to find the best/longest match for given context, then @@ -87,23 +87,64 @@ SampleContext::decodeContextString(ChildContext, CallerName, CallSiteLoc); PrevNode = CurrNode; CurrNode = CurrNode->getChildContext(CallSiteLoc, CallerName); - if (CurrNode && CurrNode->getFunctionSize()) - Size = CurrNode->getFunctionSize(); + if (CurrNode && CurrNode->getFunctionSize().hasValue()) + Size = CurrNode->getFunctionSize().getValue(); } // If we traversed all nodes along the path of the context and haven't // found a size yet, pivot to look for size from sibling nodes, i.e size // of inlinee under different context. - if (!Size) { + if (!Size.hasValue()) { if (!CurrNode) CurrNode = PrevNode; - while (!Size && CurrNode) { + while (!Size.hasValue() && CurrNode && + !CurrNode->getAllChildContext().empty()) { CurrNode = &CurrNode->getAllChildContext().begin()->second; - Size = CurrNode->getFunctionSize(); + if (CurrNode->getFunctionSize().hasValue()) + Size = CurrNode->getFunctionSize().getValue(); } } - return Size; + assert(Size.hasValue() && "We should at least find one context size."); + return Size.getValue(); +} + +void BinarySizeContextTracker::trackOptimizedInlinees( + MCPseudoProbeDecoder &ProbeDecoder) { + ProbeFrameStack ProbeContext; + for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) + trackOptimizedInlinees(ProbeDecoder, *Child.second.get(), ProbeContext); +} + +void BinarySizeContextTracker::trackOptimizedInlinees( + MCPseudoProbeDecoder &ProbeDecoder, + MCDecodedPseudoProbeInlineTree &ProbeNode, ProbeFrameStack &ProbeContext) { + StringRef FuncName = + ProbeDecoder.getFuncDescForGUID(ProbeNode.Guid)->FuncName; + ProbeContext.emplace_back(FuncName, 0); + + // This ProbeContext has a probe, so it has code before inlining and + // optimization. Make sure we mark its size as known. + if (!ProbeNode.getProbes().empty()) { + ContextTrieNode *SizeContext = &RootContext; + for (auto &ProbeFrame : reverse(ProbeContext)) { + StringRef CallerName = ProbeFrame.first; + LineLocation CallsiteLoc(ProbeFrame.second, 0); + SizeContext = + SizeContext->getOrCreateChildContext(CallsiteLoc, CallerName); + } + // Add 0 size to make known. + SizeContext->addFunctionSize(0); + } + + // DFS down the probe inline tree + for (const auto &ChildNode : ProbeNode.getChildren()) { + InlineSite Location = ChildNode.first; + ProbeContext.back().second = std::get<1>(Location); + trackOptimizedInlinees(ProbeDecoder, *ChildNode.second.get(), ProbeContext); + } + + ProbeContext.pop_back(); } void ProfiledBinary::load() { @@ -130,6 +171,10 @@ // Disassemble the text sections. disassemble(Obj); + // Track size for optimized inlinees when probe is available + if (UsePseudoProbes && TrackFuncContextSize) + FuncSizeTracker.trackOptimizedInlinees(ProbeDecoder); + // Use function start and return address to infer prolog and epilog ProEpilogTracker.inferPrologOffsets(FuncStartAddrMap); ProEpilogTracker.inferEpilogOffsets(RetAddrs);