diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -242,6 +242,10 @@ (LineOffset == O.LineOffset && Discriminator < O.Discriminator); } + bool operator==(const LineLocation &O) const { + return LineOffset == O.LineOffset && Discriminator == O.Discriminator; + } + uint32_t LineOffset; uint32_t Discriminator; }; @@ -339,6 +343,129 @@ raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); +// State of context associated with FunctionSamples +enum ContextStateMask { + UnknownContext = 0x0, // Profile without context + RawContext = 0x1, // Full context profile from input profile + SyntheticContext = 0x2, // Synthetic context created for context promotion + InlinedContext = 0x4, // Profile for context that is inlined into caller + MergedContext = 0x8 // Profile for context merged into base profile +}; + +// Sample context for FunctionSamples. It consists of the calling context, +// the function name and context state. Internally sample context is represented +// using StringRef, which is also the input for constructing a `SampleContext`. +// It can accept and represent both full context string as well as context-less +// function name. +// Example of full context string (note the wrapping `[]`): +// `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` +// Example of context-less function name (same as AutoFDO): +// `_Z8funcLeafi` +class SampleContext { +public: + SampleContext() : State(UnknownContext) {} + SampleContext(StringRef ContextStr, + ContextStateMask CState = UnknownContext) { + setContext(ContextStr, CState); + } + + // Promote context by removing top frames (represented by `ContextStrToRemove`). + // Note that with string representation of context, the promotion is effectively + // a substr operation with `ContextStrToRemove` removed from left. + void promoteOnPath(StringRef ContextStrToRemove) { + assert(FullContext.startswith(ContextStrToRemove)); + + // Remove leading context and frame separator " @ ". + FullContext = FullContext.substr(ContextStrToRemove.size() + 3); + CallingContext = CallingContext.substr(ContextStrToRemove.size() + 3); + } + + // Split the top context frame (left-most substr) from context. + static std::pair + splitContextString(StringRef ContextStr) { + return ContextStr.split(" @ "); + } + + // Decode context string for a frame to get function name and location. + // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`. + static void decodeContextString(StringRef ContextStr, StringRef &FName, + LineLocation &LineLoc) { + // Get function name + auto EntrySplit = ContextStr.split(':'); + FName = EntrySplit.first; + + LineLoc = {0, 0}; + if (!EntrySplit.second.empty()) { + // Get line offset, use signed int for getAsInteger so string will + // be parsed as signed. + int LineOffset = 0; + auto LocSplit = EntrySplit.second.split('.'); + LocSplit.first.getAsInteger(10, LineOffset); + LineLoc.LineOffset = LineOffset; + + // Get discriminator + if (!LocSplit.second.empty()) + LocSplit.second.getAsInteger(10, LineLoc.Discriminator); + } + } + + operator StringRef() const { return FullContext; } + bool hasState(ContextStateMask S) { return State & (uint32_t)S; } + void setState(ContextStateMask S) { State |= (uint32_t)S; } + void clearState(ContextStateMask S) { State &= (uint32_t)~S; } + bool hasContext() const { return State != UnknownContext; } + bool isBaseContext() const { return CallingContext.empty(); } + StringRef getName() const { return Name; } + StringRef getCallingContext() const { return CallingContext; } + StringRef getNameWithContext() const { return FullContext; } + +private: + // Give a context string, decode and populate internal states like + // Function name, Calling context and context state. Example of input + // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` + void setContext(StringRef ContextStr, ContextStateMask CState) { + assert(!ContextStr.empty()); + // Note that `[]` wrapped input indicates a full context string, otherwise + // it's treated as context-less function name only. + bool HasContext = ContextStr.startswith("["); + if (!HasContext && CState == UnknownContext) { + State = UnknownContext; + Name = FullContext = ContextStr; + } else { + // Assume raw context profile if unspecified + if (CState == UnknownContext) + State = RawContext; + else + State = CState; + + // Remove encapsulating '[' and ']' if any + if (HasContext) + FullContext = ContextStr.substr(1, ContextStr.size() - 2); + else + FullContext = ContextStr; + + // Caller is to the left of callee in context string + auto NameContext = FullContext.rsplit(" @ "); + if (NameContext.second.empty()) { + Name = NameContext.first; + CallingContext = NameContext.second; + } else { + Name = NameContext.second; + CallingContext = NameContext.first; + } + } + } + + // Full context string including calling context and leaf function name + StringRef FullContext; + // Function name for the associated sample profile + StringRef Name; + // Calling context (leaf function excluded) for the associated sample profile + StringRef CallingContext; + // State of the associated sample profile + uint32_t State; +}; + class FunctionSamples; class SampleProfileReaderItaniumRemapper; @@ -396,10 +523,16 @@ ErrorOr findSamplesAt(uint32_t LineOffset, uint32_t Discriminator) const { const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator)); - if (ret == BodySamples.end()) + if (ret == BodySamples.end()) { + // For CSSPGO, in order to conserve profile size, we no longer write out + // locations profile for those not hit during training, so we need to + // treat them as zero instead of error here. + if (ProfileIsCS) + return 0; return std::error_code(); - else + } else { return ret->second.getSamples(); + } } /// Returns the call target map collected at a given location. @@ -615,6 +748,12 @@ const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper = nullptr) const; + static bool ProfileIsCS; + + SampleContext &getContext() const { return Context; } + + void setContext(const SampleContext &FContext) { Context = FContext; } + static SampleProfileFormat Format; /// Whether the profile uses MD5 to represent string. @@ -639,6 +778,9 @@ /// Mangled name of the function. StringRef Name; + /// Calling context for function profile + mutable SampleContext Context; + /// Total number of samples collected inside this function. /// /// Samples are cumulative, they include all the samples collected diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -419,6 +419,9 @@ /// \brief Return the profile format. SampleProfileFormat getFormat() const { return Format; } + /// Whether input profile is fully context-sensitie + bool profileIsCS() const { return ProfileIsCS; } + virtual std::unique_ptr getProfileSymbolList() { return nullptr; }; @@ -461,6 +464,8 @@ std::unique_ptr Remapper; + bool ProfileIsCS = false; + /// \brief The format of sample. SampleProfileFormat Format = SPF_None; }; diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -0,0 +1,141 @@ +//===- Transforms/IPO/SampleContextTracker.h --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file provides the interface for context-sensitive profile tracker used +/// by CSSPGO. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_SAMPLECONTEXTTRACKER_H +#define LLVM_TRANSFORMS_IPO_SAMPLECONTEXTTRACKER_H + +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Instructions.h" +#include "llvm/ProfileData/SampleProf.h" +#include +#include + +using namespace llvm; +using namespace sampleprof; + +namespace llvm { + +// Internal trie tree representation used for tracking context tree and sample +// profiles. The path from root node to a given node represents the context of +// that nodes' profile. +class ContextTrieNode { +public: + ContextTrieNode(ContextTrieNode *Parent = nullptr, + StringRef FName = StringRef(), + FunctionSamples *FSamples = nullptr, + LineLocation CallLoc = {0, 0}) + : ParentContext(Parent), FuncName(FName), FuncSamples(FSamples), + CallSiteLoc(CallLoc){}; + ContextTrieNode *getChildContext(const LineLocation &CallSite, + StringRef CalleeName); + ContextTrieNode *getChildContext(const LineLocation &CallSite); + ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite, + StringRef CalleeName, + bool AllowCreate = true); + + ContextTrieNode &moveToChildContext(const LineLocation &CallSite, + ContextTrieNode &&NodeToMove, + StringRef ContextStrToRemove, + bool DeleteNode = true); + void removeChildContext(const LineLocation &CallSite, StringRef CalleeName); + std::map &getAllChildContext(); + const StringRef getFuncName() const; + FunctionSamples *getFunctionSamples() const; + void setFunctionSamples(FunctionSamples *FSamples); + LineLocation getCallSiteLoc() const; + ContextTrieNode *getParentContext() const; + void setParentContext(ContextTrieNode *Parent); + void dump(); + +private: + static uint32_t nodeHash(StringRef ChildName, const LineLocation &Callsite); + + // Map line+discriminator location to child context + std::map AllChildContext; + + // Link to parent context node + ContextTrieNode *ParentContext; + + // Function name for current context + StringRef FuncName; + + // Function Samples for current context + FunctionSamples *FuncSamples; + + // Callsite location in parent context + LineLocation CallSiteLoc; +}; + +// Profile tracker that manages profiles and its associated context. It +// provides interfaces used by sample profile loader to query context profile or +// base profile for given function or location; it also manages context tree +// manipulation that is needed to accommodate inline decisions so we have +// accurate post-inline profile for functions. Internally context profiles +// are organized in a trie, with each node representing profile for specific +// calling context and the context is identified by path from root to the node. +class SampleContextTracker { +public: + SampleContextTracker(StringMap &Profiles); + // Query context profile for a specific callee with given name at a given + // call-site. The full context is identified by location of call instruction. + FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst, + StringRef CalleeName); + // Query context profile for a given location. The full context + // is identified by input DILocation. + FunctionSamples *getContextSamplesFor(const DILocation *DIL); + // Query context profile for a given sample contxt of a function. + FunctionSamples *getContextSamplesFor(const SampleContext &Context); + // Query base profile for a given function. A base profile is a merged view + // of all context profiles for contexts that are not inlined. + FunctionSamples *getBaseSamplesFor(const Function &Func, + bool MergeContext = true); + // Query base profile for a given function by name. + FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext); + // Mark a context profile as inlined when function is inlined. + // This makes sure that inlined context profile will be excluded in + // function's base profile. + void markContextSamplesInlined(const FunctionSamples *InlinedSamples); + // Dump the internal context profile trie. + void dump(); + +private: + ContextTrieNode *getContextFor(const DILocation *DIL); + ContextTrieNode *getContextFor(const SampleContext &Context); + ContextTrieNode *getCalleeContextFor(const DILocation *DIL, + StringRef CalleeName); + ContextTrieNode *getOrCreateContextPath(const SampleContext &Context, + bool AllowCreate); + ContextTrieNode *getTopLevelContextNode(StringRef FName); + ContextTrieNode &addTopLevelContextNode(StringRef FName); + ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo); + void promoteMergeContextSamplesTree(const Instruction &Inst, + StringRef CalleeName); + void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode, + StringRef ContextStrToRemove); + ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &FromNode, + ContextTrieNode &ToNodeParent, + StringRef ContextStrToRemove); + + // Map from function name to context profiles (excluding base profile) + StringMap> FuncToCtxtProfileSet; + + // Root node for context trie tree + ContextTrieNode RootContext; +}; + +} // end namespace llvm +#endif // LLVM_TRANSFORMS_IPO_SAMPLECONTEXTTRACKER_H diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -31,6 +31,7 @@ namespace llvm { namespace sampleprof { SampleProfileFormat FunctionSamples::Format; +bool FunctionSamples::ProfileIsCS = false; bool FunctionSamples::UseMD5; } // namespace sampleprof } // namespace llvm diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -196,6 +196,8 @@ sampleprof_error Result = sampleprof_error::success; InlineCallStack InlineStack; + int CSProfileCount = 0; + int RegularProfileCount = 0; for (; !LineIt.is_at_eof(); ++LineIt) { if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#') @@ -220,9 +222,15 @@ "Expected 'mangled_name:NUM:NUM', found " + *LineIt); return sampleprof_error::malformed; } - Profiles[FName] = FunctionSamples(); - FunctionSamples &FProfile = Profiles[FName]; - FProfile.setName(FName); + SampleContext FContext(FName); + if (FContext.hasContext()) + ++CSProfileCount; + else + ++RegularProfileCount; + Profiles[FContext] = FunctionSamples(); + FunctionSamples &FProfile = Profiles[FContext]; + FProfile.setName(FContext.getName()); + FProfile.setContext(FContext); MergeResult(Result, FProfile.addTotalSamples(NumSamples)); MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples)); InlineStack.clear(); @@ -264,6 +272,11 @@ } } } + + assert((RegularProfileCount == 0 || CSProfileCount == 0) && + "Cannot have both context-sensitive and regular profile"); + ProfileIsCS = (CSProfileCount > 0); + if (Result == sampleprof_error::success) computeSummary(); @@ -1292,6 +1305,8 @@ return; } + // CSSPGO-TODO: Remapper is not yet supported. + // We will need to remap the entire context string. assert(Remappings && "should be initialized while creating remapper"); for (auto &Sample : Reader.getProfiles()) { DenseSet NamesInSample; diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -31,6 +31,7 @@ PartialInlining.cpp PassManagerBuilder.cpp PruneEH.cpp + SampleContextTracker.cpp SampleProfile.cpp SampleProfileProbe.cpp SCCP.cpp diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -0,0 +1,521 @@ +//===- SampleContextTracker.cpp - Context-sensitive Profile Tracker -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SampleContextTracker used by CSSPGO. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/SampleContextTracker.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Instructions.h" +#include "llvm/ProfileData/SampleProf.h" +#include +#include +#include + +using namespace llvm; +using namespace sampleprof; + +#define DEBUG_TYPE "sample-context-tracker" + +namespace llvm { + +ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite, + StringRef CalleeName) { + if (CalleeName.empty()) + return getChildContext(CallSite); + + uint32_t Hash = nodeHash(CalleeName, CallSite); + auto It = AllChildContext.find(Hash); + if (It != AllChildContext.end()) + return &It->second; + return nullptr; +} + +ContextTrieNode * +ContextTrieNode::getChildContext(const LineLocation &CallSite) { + // CSFDO-TODO: This could be slow, change AllChildContext so we can + // do point look up for child node by call site alone. + // CSFDO-TODO: Return the child with max count for indirect call + ContextTrieNode *ChildNodeRet = nullptr; + for (auto &It : AllChildContext) { + ContextTrieNode &ChildNode = It.second; + if (ChildNode.CallSiteLoc == CallSite) { + if (ChildNodeRet) + return nullptr; + else + ChildNodeRet = &ChildNode; + } + } + + return ChildNodeRet; +} + +ContextTrieNode &ContextTrieNode::moveToChildContext( + const LineLocation &CallSite, ContextTrieNode &&NodeToMove, + StringRef ContextStrToRemove, bool DeleteNode) { + uint32_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite); + assert(!AllChildContext.count(Hash) && "Node to remove must exist"); + LineLocation OldCallSite = NodeToMove.CallSiteLoc; + ContextTrieNode &OldParentContext = *NodeToMove.getParentContext(); + AllChildContext[Hash] = NodeToMove; + ContextTrieNode &NewNode = AllChildContext[Hash]; + NewNode.CallSiteLoc = CallSite; + + // Walk through nodes in the moved the subtree, and update + // FunctionSamples' context as for the context promotion. + // We also need to set new parant link for all children. + std::queue NodeToUpdate; + NewNode.setParentContext(this); + NodeToUpdate.push(&NewNode); + + while (!NodeToUpdate.empty()) { + ContextTrieNode *Node = NodeToUpdate.front(); + NodeToUpdate.pop(); + FunctionSamples *FSamples = Node->getFunctionSamples(); + + if (FSamples) { + FSamples->getContext().promoteOnPath(ContextStrToRemove); + FSamples->getContext().setState(SyntheticContext); + LLVM_DEBUG(dbgs() << " Context promoted to: " << FSamples->getContext() + << "\n"); + } + + for (auto &It : Node->getAllChildContext()) { + ContextTrieNode *ChildNode = &It.second; + ChildNode->setParentContext(Node); + NodeToUpdate.push(ChildNode); + } + } + + // Original context no longer needed, destroy if requested. + if (DeleteNode) + OldParentContext.removeChildContext(OldCallSite, NewNode.getFuncName()); + + return NewNode; +} + +void ContextTrieNode::removeChildContext(const LineLocation &CallSite, + StringRef CalleeName) { + uint32_t Hash = nodeHash(CalleeName, CallSite); + // Note this essentially calls dtor and destroys that child context + AllChildContext.erase(Hash); +} + +std::map &ContextTrieNode::getAllChildContext() { + return AllChildContext; +} + +const StringRef ContextTrieNode::getFuncName() const { return FuncName; } + +FunctionSamples *ContextTrieNode::getFunctionSamples() const { + return FuncSamples; +} + +void ContextTrieNode::setFunctionSamples(FunctionSamples *FSamples) { + FuncSamples = FSamples; +} + +LineLocation ContextTrieNode::getCallSiteLoc() const { return CallSiteLoc; } + +ContextTrieNode *ContextTrieNode::getParentContext() const { + return ParentContext; +} + +void ContextTrieNode::setParentContext(ContextTrieNode *Parent) { + ParentContext = Parent; +} + +void ContextTrieNode::dump() { + dbgs() << "Node: " << FuncName << "\n" + << " Callsite: " << CallSiteLoc << "\n" + << " Children:\n"; + + for (auto &It : AllChildContext) { + dbgs() << " Node: " << It.second.getFuncName() << "\n"; + } +} + +uint32_t ContextTrieNode::nodeHash(StringRef ChildName, + const LineLocation &Callsite) { + // We still use child's name for child hash, this is + // because for children of root node, we don't have + // different line/discriminator, and we'll rely on name + // to differentiate children. + uint32_t NameHash = std::hash{}(ChildName.str()); + uint32_t LocId = (Callsite.LineOffset << 16) | Callsite.Discriminator; + return NameHash + (LocId << 5) + LocId; +} + +ContextTrieNode *ContextTrieNode::getOrCreateChildContext( + const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate) { + uint32_t Hash = nodeHash(CalleeName, CallSite); + auto It = AllChildContext.find(Hash); + if (It != AllChildContext.end()) { + assert(It->second.getFuncName() == CalleeName && + "Hash collision for child context node"); + return &It->second; + } + + if (!AllowCreate) + return nullptr; + + AllChildContext[Hash] = ContextTrieNode(this, CalleeName, nullptr, CallSite); + return &AllChildContext[Hash]; +} + +// Profiler tracker than manages profiles and its associated context +SampleContextTracker::SampleContextTracker( + StringMap &Profiles) { + for (auto &FuncSample : Profiles) { + FunctionSamples *FSamples = &FuncSample.second; + SampleContext Context(FuncSample.first(), RawContext); + LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n"); + if (!Context.isBaseContext()) + FuncToCtxtProfileSet[Context.getName()].insert(FSamples); + ContextTrieNode *NewNode = getOrCreateContextPath(Context, true); + assert(!NewNode->getFunctionSamples() && + "New node can't have sample profile"); + NewNode->setFunctionSamples(FSamples); + } +} + +FunctionSamples * +SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst, + StringRef CalleeName) { + LLVM_DEBUG(dbgs() << "Getting callee context for instr: " << Inst << "\n"); + // CSFDO-TODO: We use CalleeName to differentiate indirect call + // We need to get sample for indirect callee too. + DILocation *DIL = Inst.getDebugLoc(); + if (!DIL) + return nullptr; + + ContextTrieNode *CalleeContext = getCalleeContextFor(DIL, CalleeName); + if (CalleeContext) { + FunctionSamples *FSamples = CalleeContext->getFunctionSamples(); + LLVM_DEBUG(if (FSamples) { + dbgs() << " Callee context found: " << FSamples->getContext() << "\n"; + }); + return FSamples; + } + + return nullptr; +} + +FunctionSamples * +SampleContextTracker::getContextSamplesFor(const DILocation *DIL) { + assert(DIL && "Expect non-null location"); + + ContextTrieNode *ContextNode = getContextFor(DIL); + if (!ContextNode) + return nullptr; + + // We may have inlined callees during pre-LTO compilation, in which case + // we need to rely on the inline stack from !dbg to mark context profile + // as inlined, instead of `MarkContextSamplesInlined` during inlining. + // Sample profile loader walks through all instructions to get profile, + // which calls this function. So once that is done, all previously inlined + // context profile should be marked properly. + FunctionSamples *Samples = ContextNode->getFunctionSamples(); + if (Samples && ContextNode->getParentContext() != &RootContext) + Samples->getContext().setState(InlinedContext); + + return Samples; +} + +FunctionSamples * +SampleContextTracker::getContextSamplesFor(const SampleContext &Context) { + ContextTrieNode *Node = getContextFor(Context); + if (!Node) + return nullptr; + + return Node->getFunctionSamples(); +} + +FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func, + bool MergeContext) { + StringRef CanonName = FunctionSamples::getCanonicalFnName(Func); + return getBaseSamplesFor(CanonName, MergeContext); +} + +FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name, + bool MergeContext) { + LLVM_DEBUG(dbgs() << "Getting base profile for function: " << Name << "\n"); + // Base profile is top-level node (child of root node), so try to retrieve + // existing top-level node for given function first. If it exists, it could be + // that we've merged base profile before, or there's actually context-less + // profile from the input (e.g. due to unreliable stack walking). + ContextTrieNode *Node = getTopLevelContextNode(Name); + if (MergeContext) { + LLVM_DEBUG(dbgs() << " Merging context profile into base profile: " << Name + << "\n"); + + // We have profile for function under different contexts, + // create synthetic base profile and merge context profiles + // into base profile. + for (auto *CSamples : FuncToCtxtProfileSet[Name]) { + SampleContext &Context = CSamples->getContext(); + ContextTrieNode *FromNode = getContextFor(Context); + if (FromNode == Node) + continue; + + // Skip inlined context profile and also don't re-merge any context + if (Context.hasState(InlinedContext) || Context.hasState(MergedContext)) + continue; + + ContextTrieNode &ToNode = promoteMergeContextSamplesTree(*FromNode); + assert(!Node || Node == &ToNode && "Expect only one base profile"); + Node = &ToNode; + } + } + + // Still no profile even after merge/promotion (if allowed) + if (!Node) + return nullptr; + + return Node->getFunctionSamples(); +} + +void SampleContextTracker::markContextSamplesInlined( + const FunctionSamples *InlinedSamples) { + assert(InlinedSamples && "Expect non-null inlined samples"); + LLVM_DEBUG(dbgs() << "Marking context profile as inlined: " + << InlinedSamples->getContext() << "\n"); + InlinedSamples->getContext().setState(InlinedContext); +} + +void SampleContextTracker::promoteMergeContextSamplesTree( + const Instruction &Inst, StringRef CalleeName) { + LLVM_DEBUG(dbgs() << "Promoting and merging context tree for instr: \n" + << Inst << "\n"); + // CSFDO-TODO: We also need to promote context profile from indirect + // calls. We won't have callee names from those from call instr. + if (CalleeName.empty()) + return; + + // Get the caller context for the call instruction, we don't use callee + // name from call because there can be context from indirect calls too. + DILocation *DIL = Inst.getDebugLoc(); + ContextTrieNode *CallerNode = getContextFor(DIL); + if (!CallerNode) + return; + + // Get the context that needs to be promoted + LineLocation CallSite(FunctionSamples::getOffset(DIL), + DIL->getBaseDiscriminator()); + ContextTrieNode *NodeToPromo = + CallerNode->getChildContext(CallSite, CalleeName); + if (!NodeToPromo) + return; + + promoteMergeContextSamplesTree(*NodeToPromo); +} + +ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( + ContextTrieNode &NodeToPromo) { + // Promote the input node to be directly under root. This can happen + // when we decided to not inline a function under context represented + // by the input node. The promote and merge is then needed to reflect + // the context profile in the base (context-less) profile. + FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples(); + assert(FromSamples && "Shouldn't promote a context without profile"); + LLVM_DEBUG(dbgs() << " Found context tree root to promote: " + << FromSamples->getContext() << "\n"); + + StringRef ContextStrToRemove = FromSamples->getContext().getCallingContext(); + return promoteMergeContextSamplesTree(NodeToPromo, RootContext, + ContextStrToRemove); +} + +void SampleContextTracker::dump() { + dbgs() << "Context Profile Tree:\n"; + std::queue NodeQueue; + NodeQueue.push(&RootContext); + + while (!NodeQueue.empty()) { + ContextTrieNode *Node = NodeQueue.front(); + NodeQueue.pop(); + Node->dump(); + + for (auto &It : Node->getAllChildContext()) { + ContextTrieNode *ChildNode = &It.second; + NodeQueue.push(ChildNode); + } + } +} + +ContextTrieNode * +SampleContextTracker::getContextFor(const SampleContext &Context) { + return getOrCreateContextPath(Context, false); +} + +ContextTrieNode * +SampleContextTracker::getCalleeContextFor(const DILocation *DIL, + StringRef CalleeName) { + assert(DIL && "Expect non-null location"); + + // CSSPGO-TODO: need to support indirect callee + if (CalleeName.empty()) + return nullptr; + + ContextTrieNode *CallContext = getContextFor(DIL); + if (!CallContext) + return nullptr; + + return CallContext->getChildContext( + LineLocation(FunctionSamples::getOffset(DIL), + DIL->getBaseDiscriminator()), + CalleeName); +} + +ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) { + assert(DIL && "Expect non-null location"); + SmallVector, 10> S; + + // Use C++ linkage name if possible. + const DILocation *PrevDIL = DIL; + for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { + StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName(); + if (Name.empty()) + Name = PrevDIL->getScope()->getSubprogram()->getName(); + S.push_back( + std::make_pair(LineLocation(FunctionSamples::getOffset(DIL), + DIL->getBaseDiscriminator()), Name)); + PrevDIL = DIL; + } + + // Push root node, note that root node like main may only + // a name, but not linkage name. + StringRef RootName = PrevDIL->getScope()->getSubprogram()->getLinkageName(); + if (RootName.empty()) + RootName = PrevDIL->getScope()->getSubprogram()->getName(); + S.push_back(std::make_pair(LineLocation(0, 0), RootName)); + + ContextTrieNode *ContextNode = &RootContext; + int I = S.size(); + while (--I >= 0 && ContextNode) { + LineLocation &CallSite = S[I].first; + StringRef &CalleeName = S[I].second; + ContextNode = ContextNode->getChildContext(CallSite, CalleeName); + } + + if (I < 0) + return ContextNode; + + return nullptr; +} + +ContextTrieNode * +SampleContextTracker::getOrCreateContextPath(const SampleContext &Context, + bool AllowCreate) { + ContextTrieNode *ContextNode = &RootContext; + StringRef ContextRemain = Context; + StringRef ChildContext; + StringRef CalleeName; + LineLocation CallSiteLoc(0, 0); + + while (ContextNode && !ContextRemain.empty()) { + auto ContextSplit = SampleContext::splitContextString(ContextRemain); + ChildContext = ContextSplit.first; + ContextRemain = ContextSplit.second; + LineLocation NextCallSiteLoc(0, 0); + SampleContext::decodeContextString(ChildContext, CalleeName, + NextCallSiteLoc); + + // Create child node at parent line/disc location + if (AllowCreate) { + ContextNode = + ContextNode->getOrCreateChildContext(CallSiteLoc, CalleeName); + } else { + ContextNode = ContextNode->getChildContext(CallSiteLoc, CalleeName); + } + CallSiteLoc = NextCallSiteLoc; + } + + assert((!AllowCreate || ContextNode) && + "Node must exist if creation is allowed"); + return ContextNode; +} + +ContextTrieNode *SampleContextTracker::getTopLevelContextNode(StringRef FName) { + return RootContext.getChildContext(LineLocation(0, 0), FName); +} + +ContextTrieNode &SampleContextTracker::addTopLevelContextNode(StringRef FName) { + assert(!getTopLevelContextNode(FName) && "Node to add must not exist"); + return *RootContext.getOrCreateChildContext(LineLocation(0, 0), FName); +} + +void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode, + ContextTrieNode &ToNode, + StringRef ContextStrToRemove) { + FunctionSamples *FromSamples = FromNode.getFunctionSamples(); + FunctionSamples *ToSamples = ToNode.getFunctionSamples(); + if (FromSamples && ToSamples) { + // Merge/duplicate FromSamples into ToSamples + ToSamples->merge(*FromSamples); + ToSamples->getContext().setState(SyntheticContext); + FromSamples->getContext().setState(MergedContext); + } else if (FromSamples) { + // Transfer FromSamples from FromNode to ToNode + ToNode.setFunctionSamples(FromSamples); + FromSamples->getContext().setState(SyntheticContext); + FromSamples->getContext().promoteOnPath(ContextStrToRemove); + FromNode.setFunctionSamples(nullptr); + } +} + +ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( + ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent, + StringRef ContextStrToRemove) { + assert(!ContextStrToRemove.empty() && "Context to remove can't be empty"); + + // Ignore call site location if destination is top level under root + LineLocation NewCallSiteLoc = LineLocation(0, 0); + LineLocation OldCallSiteLoc = FromNode.getCallSiteLoc(); + ContextTrieNode &FromNodeParent = *FromNode.getParentContext(); + ContextTrieNode *ToNode = nullptr; + bool MoveToRoot = (&ToNodeParent == &RootContext); + if (!MoveToRoot) { + NewCallSiteLoc = OldCallSiteLoc; + } + + // Locate destination node, create/move if not existing + ToNode = ToNodeParent.getChildContext(NewCallSiteLoc, FromNode.getFuncName()); + if (!ToNode) { + // Do not delete node to move from its parent here because + // caller is iterating over children of that parent node. + ToNode = &ToNodeParent.moveToChildContext( + NewCallSiteLoc, std::move(FromNode), ContextStrToRemove, false); + } else { + // Destination node exists, merge samples for the context tree + mergeContextNode(FromNode, *ToNode, ContextStrToRemove); + LLVM_DEBUG(dbgs() << " Context promoted and merged to: " + << ToNode->getFunctionSamples()->getContext() << "\n"); + + // Recursively promote and merge children + for (auto &It : FromNode.getAllChildContext()) { + ContextTrieNode &FromChildNode = It.second; + promoteMergeContextSamplesTree(FromChildNode, *ToNode, + ContextStrToRemove); + } + + // Remove children once they're all merged + FromNode.getAllChildContext().clear(); + } + + // For root of subtree, remove itself from old parent too + if (MoveToRoot) + FromNodeParent.removeChildContext(OldCallSiteLoc, ToNode->getFuncName()); + + return *ToNode; +} + +} // namespace llvm diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -76,6 +76,7 @@ #include "llvm/Support/GenericDomTree.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/SampleContextTracker.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -424,6 +425,9 @@ /// Profile reader object. std::unique_ptr Reader; + /// Profile tracker for different context. + std::unique_ptr ContextTracker; + /// Samples collected for the body of this function. FunctionSamples *Samples = nullptr; @@ -436,6 +440,9 @@ /// Flag indicating whether the profile input loaded successfully. bool ProfileIsValid = false; + /// Flag indicating whether input profile is context-sensitive + bool ProfileIsCS = false; + /// Flag indicating if the pass is invoked in ThinLTO compile phase. /// /// In this phase, in annotation, we should not promote indirect calls. @@ -733,9 +740,10 @@ // (findCalleeFunctionSamples returns non-empty result), but not inlined here, // it means that the inlined callsite has no sample, thus the call // instruction should have 0 count. - if (auto *CB = dyn_cast(&Inst)) - if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB)) - return 0; + if (!ProfileIsCS) + if (const auto *CB = dyn_cast(&Inst)) + if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB)) + return 0; const DILocation *DIL = DLoc; uint32_t LineOffset = FunctionSamples::getOffset(DIL); @@ -831,7 +839,10 @@ StringRef CalleeName; if (Function *Callee = Inst.getCalledFunction()) - CalleeName = Callee->getName(); + CalleeName = FunctionSamples::getCanonicalFnName(*Callee); + + if (ProfileIsCS) + return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName); const FunctionSamples *FS = findFunctionSamples(Inst); if (FS == nullptr) @@ -901,8 +912,13 @@ return Samples; auto it = DILocation2SampleMap.try_emplace(DIL,nullptr); - if (it.second) - it.first->second = Samples->findFunctionSamples(DIL, Reader->getRemapper()); + if (it.second) { + if (ProfileIsCS) + it.first->second = ContextTracker->getContextSamplesFor(DIL); + else + it.first->second = + Samples->findFunctionSamples(DIL, Reader->getRemapper()); + } return it.first->second; } @@ -957,6 +973,12 @@ InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee), GetAC, GetTLI); + if (Cost.isNever()) + return false; + + if (Cost.isAlways()) + return true; + return Cost.getCost() <= SampleColdCallSiteThreshold; } @@ -1017,7 +1039,7 @@ assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && "GUIDToFuncNameMap has to be populated"); AllCandidates.push_back(CB); - if (FS->getEntrySamples() > 0) + if (FS->getEntrySamples() > 0 || ProfileIsCS) localNotInlinedCallSites.try_emplace(CB, FS); if (callsiteIsHot(FS, PSI)) Hot = true; @@ -1075,6 +1097,8 @@ // If profile mismatches, we should not attempt to inline DI. if ((isa(DI) || isa(DI)) && inlineCallInstruction(cast(DI))) { + if (ProfileIsCS) + ContextTracker->markContextSamplesInlined(FS); localNotInlinedCallSites.erase(I); LocalChanged = true; ++NumCSInlined; @@ -1088,6 +1112,9 @@ } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { if (inlineCallInstruction(*I)) { + if (ProfileIsCS) + ContextTracker->markContextSamplesInlined( + localNotInlinedCallSites[I]); localNotInlinedCallSites.erase(I); LocalChanged = true; ++NumCSInlined; @@ -1875,6 +1902,16 @@ ExternalInlineAdvisor.reset(); } + // Apply tweaks if context-sensitive profile is available. + if (Reader->profileIsCS()) { + ProfileIsCS = true; + FunctionSamples::ProfileIsCS = true; + + // Tracker for profiles under different context + ContextTracker = + std::make_unique(Reader->getProfiles()); + } + return true; } @@ -1940,9 +1977,10 @@ } // Account for cold calls not inlined.... - for (const std::pair &pair : - notInlinedCallInfo) - updateProfileCallee(pair.first, pair.second.entryCount); + if (!ProfileIsCS) + for (const std::pair &pair : + notInlinedCallInfo) + updateProfileCallee(pair.first, pair.second.entryCount); return retval; } @@ -1957,7 +1995,6 @@ } bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) { - DILocation2SampleMap.clear(); // By default the entry count is initialized to -1, which will be treated // conservatively by getEntryCount as the same as unknown (None). This is @@ -2010,7 +2047,12 @@ OwnedORE = std::make_unique(&F); ORE = OwnedORE.get(); } - Samples = Reader->getSamplesFor(F); + + if (ProfileIsCS) + Samples = ContextTracker->getBaseSamplesFor(F); + else + Samples = Reader->getSamplesFor(F); + if (Samples && !Samples->empty()) return emitAnnotations(F); return false; diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-context-tracker.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-context-tracker.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-context-tracker.prof @@ -0,0 +1,36 @@ +[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]:1467299:11 + 0: 6 + 1: 6 + 3: 287884 + 4: 287864 _Z3fibi:315608 + 15: 23 +[main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20 + 0: 15 + 1: 15 + 3: 74946 + 4: 74941 _Z3fibi:82359 + 10: 23324 + 11: 23327 _Z3fibi:25228 + 15: 11 +[main]:154:0 + 2: 12 + 3: 18 _Z5funcAi:11 + 3.1: 18 _Z5funcBi:19 +[external:12 @ main]:154:12 + 2: 12 + 3: 10 _Z5funcAi:7 + 3.1: 10 _Z5funcBi:11 +[main:3.1 @ _Z5funcBi]:120:19 + 0: 19 + 1: 19 _Z8funcLeafi:20 + 3: 12 +[externalA:17 @ _Z5funcBi]:120:3 + 0: 3 + 1: 3 +[external:10 @ _Z5funcBi]:120:10 + 0: 10 + 1: 10 +[main:3 @ _Z5funcAi]:99:11 + 0: 10 + 1: 10 _Z8funcLeafi:11 + 3: 24 diff --git a/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll b/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll @@ -0,0 +1,234 @@ +; REQUIRES: asserts +; Test for CSSPGO's SampleContextTracker to make sure context profile tree is promoted and merged properly +; based on inline decision, so post inline counts are accurate. + +; Note that we need new pass manager to enable top-down processing for sample profile loader +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-ALL +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-HOT + + +; Testwe we inlined the following in top-down order and promot rest not inlined context profile into base profile +; main:3 @ _Z5funcAi +; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi +; _Z5funcBi:1 @ _Z8funcLeafi +; INLINE-ALL: Getting base profile for function: main +; INLINE-ALL-NEXT: Merging context profile into base profile: main +; INLINE-ALL-NEXT: Found context tree root to promote: external:12 @ main +; INLINE-ALL-NEXT: Context promoted and merged to: main +; INLINE-ALL-NEXT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi +; INLINE-ALL-NEXT: Callee context found: main:3.1 @ _Z5funcBi +; INLINE-ALL-NEXT: Getting callee context for instr: %call1 = tail call i32 @_Z5funcAi +; INLINE-ALL-NEXT: Callee context found: main:3 @ _Z5funcAi +; INLINE-ALL-NEXT: Marking context profile as inlined: main:3 @ _Z5funcAi +; INLINE-ALL-NEXT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi( +; INLINE-ALL-NEXT: Callee context found: main:3.1 @ _Z5funcBi +; INLINE-ALL-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z8funcLeafi +; INLINE-ALL-NEXT: Callee context found: main:3 @ _Z5funcAi:1 @ _Z8funcLeafi +; INLINE-ALL-NEXT: Marking context profile as inlined: main:3 @ _Z5funcAi:1 @ _Z8funcLeafi +; INLINE-ALL-NEXT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi +; INLINE-ALL-NEXT: Callee context found: main:3.1 @ _Z5funcBi +; INLINE-ALL-NEXT: Getting callee context for instr: %call.i1 = tail call i32 @_Z3fibi +; INLINE-ALL-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi +; INLINE-ALL-NEXT: Getting base profile for function: _Z5funcAi +; INLINE-ALL-NEXT: Merging context profile into base profile: _Z5funcAi +; INLINE-ALL-NEXT: Getting base profile for function: _Z5funcBi +; INLINE-ALL-NEXT: Merging context profile into base profile: _Z5funcBi +; INLINE-ALL-NEXT: Found context tree root to promote: external:10 @ _Z5funcBi +; INLINE-ALL-NEXT: Context promoted to: _Z5funcBi +; INLINE-ALL-NEXT: Found context tree root to promote: main:3.1 @ _Z5funcBi +; INLINE-ALL-NEXT: Context promoted and merged to: _Z5funcBi +; INLINE-ALL-NEXT: Context promoted to: _Z5funcBi:1 @ _Z8funcLeafi +; INLINE-ALL-NEXT: Found context tree root to promote: externalA:17 @ _Z5funcBi +; INLINE-ALL-NEXT: Context promoted and merged to: _Z5funcBi +; INLINE-ALL-NEXT: Getting callee context for instr: %call = tail call i32 @_Z8funcLeafi +; INLINE-ALL-NEXT: Callee context found: _Z5funcBi:1 @ _Z8funcLeafi +; INLINE-ALL-NEXT: Marking context profile as inlined: _Z5funcBi:1 @ _Z8funcLeafi +; INLINE-ALL-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z3fibi +; INLINE-ALL-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi +; INLINE-ALL-NEXT: Getting base profile for function: _Z8funcLeafi +; INLINE-ALL-NEXT: Merging context profile into base profile: _Z8funcLeafi + +; Testwe we inlined the following in top-down order and promot rest not inlined context profile into base profile +; main:3 @ _Z5funcAi +; _Z5funcAi:1 @ _Z8funcLeafi +; _Z5funcBi:1 @ _Z8funcLeafi +; INLINE-HOT: Getting base profile for function: main +; INLINE-HOT-NEXT: Merging context profile into base profile: main +; INLINE-HOT-NEXT: Found context tree root to promote: external:12 @ main +; INLINE-HOT-NEXT: Context promoted and merged to: main +; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !58 +; INLINE-HOT-NEXT: Callee context found: main:3.1 @ _Z5funcBi +; INLINE-HOT-NEXT: Getting callee context for instr: %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !63 +; INLINE-HOT-NEXT: Callee context found: main:3 @ _Z5funcAi +; INLINE-HOT-NEXT: Getting base profile for function: _Z5funcAi +; INLINE-HOT-NEXT: Merging context profile into base profile: _Z5funcAi +; INLINE-HOT-NEXT: Found context tree root to promote: main:3 @ _Z5funcAi +; INLINE-HOT-NEXT: Context promoted to: _Z5funcAi +; INLINE-HOT-NEXT: Context promoted to: _Z5funcAi:1 @ _Z8funcLeafi +; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !50 +; INLINE-HOT-NEXT: Callee context found: _Z5funcAi:1 @ _Z8funcLeafi +; INLINE-HOT-NEXT: Marking context profile as inlined: _Z5funcAi:1 @ _Z8funcLeafi +; INLINE-HOT-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z3fibi(i32 %tmp.i) #2, !dbg !62 +; INLINE-HOT-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi(i32 %tmp1.i) #2, !dbg !69 +; INLINE-HOT-NEXT: Getting base profile for function: _Z5funcBi +; INLINE-HOT-NEXT: Merging context profile into base profile: _Z5funcBi +; INLINE-HOT-NEXT: Found context tree root to promote: external:10 @ _Z5funcBi +; INLINE-HOT-NEXT: Context promoted to: _Z5funcBi +; INLINE-HOT-NEXT: Found context tree root to promote: main:3.1 @ _Z5funcBi +; INLINE-HOT-NEXT: Context promoted and merged to: _Z5funcBi +; INLINE-HOT-NEXT: Context promoted to: _Z5funcBi:1 @ _Z8funcLeafi +; INLINE-HOT-NEXT: Found context tree root to promote: externalA:17 @ _Z5funcBi +; INLINE-HOT-NEXT: Context promoted and merged to: _Z5funcBi +; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !50 +; INLINE-HOT-NEXT: Callee context found: _Z5funcBi:1 @ _Z8funcLeafi +; INLINE-HOT-NEXT: Marking context profile as inlined: _Z5funcBi:1 @ _Z8funcLeafi +; INLINE-HOT-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z3fibi(i32 %tmp.i) #2, !dbg !62 +; INLINE-HOT-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi(i32 %tmp1.i) #2, !dbg !69 +; INLINE-HOT-NEXT: Getting base profile for function: _Z8funcLeafi +; INLINE-HOT-NEXT: Merging context profile into base profile: _Z8funcLeafi + + +@factor = dso_local global i32 3, align 4, !dbg !0 + +define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 { +entry: + br label %for.body, !dbg !25 + +for.cond.cleanup: ; preds = %for.body + ret i32 %add3, !dbg !27 + +for.body: ; preds = %for.body, %entry + %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ] + %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ] + %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32 + %add = add nuw nsw i32 %x.011, 1, !dbg !31 + %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28 + %add2 = add i32 %call, %r.010, !dbg !34 + %add3 = add i32 %add2, %call1, !dbg !35 + %dec = add nsw i32 %x.011, -1, !dbg !36 + %cmp = icmp eq i32 %x.011, 0, !dbg !38 + br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25 +} + +define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #1 !dbg !40 { +entry: + %add = add nsw i32 %x, 100000, !dbg !44 + %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !45 + ret i32 %call, !dbg !46 +} + +define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 { +entry: + %cmp = icmp sgt i32 %x, 0, !dbg !57 + br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59 + +while.cond2.preheader: ; preds = %entry + %cmp313 = icmp slt i32 %x, 0, !dbg !60 + br i1 %cmp313, label %while.body4, label %if.end, !dbg !63 + +while.body: ; preds = %while.body, %entry + %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ] + %tmp = load volatile i32, i32* @factor, align 4, !dbg !64 + %call = tail call i32 @_Z3fibi(i32 %tmp), !dbg !67 + %sub = sub nsw i32 %x.addr.016, %call, !dbg !68 + %cmp1 = icmp sgt i32 %sub, 0, !dbg !69 + br i1 %cmp1, label %while.body, label %if.end, !dbg !71 + +while.body4: ; preds = %while.body4, %while.cond2.preheader + %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ] + %tmp1 = load volatile i32, i32* @factor, align 4, !dbg !72 + %call5 = tail call i32 @_Z3fibi(i32 %tmp1), !dbg !74 + %add = add nsw i32 %call5, %x.addr.114, !dbg !75 + %cmp3 = icmp slt i32 %add, 0, !dbg !60 + br i1 %cmp3, label %while.body4, label %if.end, !dbg !63 + +if.end: ; preds = %while.body4, %while.body, %while.cond2.preheader + %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ] + ret i32 %x.addr.2, !dbg !76 +} + +define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 { +entry: + %sub = add nsw i32 %x, -100000, !dbg !51 + %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52 + ret i32 %call, !dbg !53 +} + +declare i32 @_Z3fibi(i32) + +attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } +attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!14, !15, !16} +!llvm.ident = !{!17} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo") +!4 = !{} +!5 = !{!6, !10, !11} +!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!7 = !DISubroutineType(types: !8) +!8 = !{!9, !9} +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!12 = !{!0} +!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9) +!14 = !{i32 7, !"Dwarf Version", i32 4} +!15 = !{i32 2, !"Debug Info Version", i32 3} +!16 = !{i32 1, !"wchar_size", i32 4} +!17 = !{!"clang version 11.0.0"} +!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21) +!19 = !DISubroutineType(types: !20) +!20 = !{!9} +!21 = !{!22, !23} +!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9) +!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9) +!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3) +!25 = !DILocation(line: 13, column: 3, scope: !26) +!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2) +!27 = !DILocation(line: 17, column: 3, scope: !18) +!28 = !DILocation(line: 14, column: 10, scope: !29) +!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37) +!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3) +!31 = !DILocation(line: 14, column: 29, scope: !29) +!32 = !DILocation(line: 14, column: 21, scope: !33) +!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2) +!34 = !DILocation(line: 14, column: 19, scope: !29) +!35 = !DILocation(line: 14, column: 7, scope: !29) +!36 = !DILocation(line: 13, column: 33, scope: !37) +!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6) +!38 = !DILocation(line: 13, column: 26, scope: !39) +!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2) +!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!44 = !DILocation(line: 27, column: 22, scope: !40) +!45 = !DILocation(line: 27, column: 11, scope: !40) +!46 = !DILocation(line: 29, column: 3, scope: !40) +!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!51 = !DILocation(line: 33, column: 22, scope: !47) +!52 = !DILocation(line: 33, column: 11, scope: !47) +!53 = !DILocation(line: 35, column: 3, scope: !47) +!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!57 = !DILocation(line: 49, column: 9, scope: !58) +!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7) +!59 = !DILocation(line: 49, column: 7, scope: !54) +!60 = !DILocation(line: 58, column: 14, scope: !61) +!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2) +!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8) +!63 = !DILocation(line: 58, column: 5, scope: !61) +!64 = !DILocation(line: 52, column: 16, scope: !65) +!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19) +!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14) +!67 = !DILocation(line: 52, column: 12, scope: !65) +!68 = !DILocation(line: 52, column: 9, scope: !65) +!69 = !DILocation(line: 51, column: 14, scope: !70) +!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2) +!71 = !DILocation(line: 51, column: 5, scope: !70) +!72 = !DILocation(line: 59, column: 16, scope: !73) +!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19) +!74 = !DILocation(line: 59, column: 12, scope: !73) +!75 = !DILocation(line: 59, column: 9, scope: !73) +!76 = !DILocation(line: 63, column: 3, scope: !54) diff --git a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll @@ -0,0 +1,197 @@ +; Test for CSSPGO's SampleContextTracker to make sure context profile tree is promoted and merged properly +; based on inline decision, so post inline counts are accurate. + +; Note that we need new pass manager to enable top-down processing for sample profile loader +; Testwe we inlined the following in top-down order and entry counts accurate reflects post-inline base profile +; main:3 @ _Z5funcAi +; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi +; _Z5funcBi:1 @ _Z8funcLeafi +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL + +; Testwe we inlined the following in top-down order and entry counts accurate reflects post-inline base profile +; main:3 @ _Z5funcAi +; _Z5funcAi:1 @ _Z8funcLeafi +; _Z5funcBi:1 @ _Z8funcLeafi +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-HOT + + +@factor = dso_local global i32 3, align 4, !dbg !0 + +define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 { +; INLINE-ALL: @main{{.*}}!prof ![[MAIN_PROF:[0-9]+]] +; INLINE-HOT: @main{{.*}}!prof ![[MAIN_PROF:[0-9]+]] +entry: + br label %for.body, !dbg !25 + +for.cond.cleanup: ; preds = %for.body + ret i32 %add3, !dbg !27 + +for.body: ; preds = %for.body, %entry + %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ] + %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ] + %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32 +; _Z5funcBi is marked noinline +; INLINE-ALL: call i32 @_Z5funcBi +; INLINE-HOT: call i32 @_Z5funcBi + %add = add nuw nsw i32 %x.011, 1, !dbg !31 + %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28 +; INLINE-ALL-NOT: call i32 @_Z5funcAi +; INLINE-HOT: call i32 @_Z5funcAi + %add2 = add i32 %call, %r.010, !dbg !34 + %add3 = add i32 %add2, %call1, !dbg !35 + %dec = add nsw i32 %x.011, -1, !dbg !36 + %cmp = icmp eq i32 %x.011, 0, !dbg !38 + br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25 +} + +define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #1 !dbg !40 { +; _Z5funcAi is inlined, so outline remainder should have zero counts +; INLINE-ALL: @_Z5funcAi{{.*}}!prof ![[FUNCA_PROF:[0-9]+]] +; INLINE-HOT: @_Z5funcAi{{.*}}!prof ![[FUNCA_PROF:[0-9]+]] +entry: + %add = add nsw i32 %x, 100000, !dbg !44 +; _Z8funcLeafi is already inlined on main->_Z5funcAi->_Z8funcLeafi, +; so it should not be inlined on _Z5funcAi->_Z8funcLeafi based on updated +; (merged and promoted) context profile +; INLINE-ALL: call i32 @_Z8funcLeafi +; INLINE-HOT-NOT: call i32 @_Z8funcLeafi + %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !45 + ret i32 %call, !dbg !46 +} + +define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 { +; main->_Z5funcAi->_Z8funcLeafi is inlined, and _Z5funcBi->_Z8funcLeafi is also +; inlined, so outline remainder should have empty profile +; INLINE-ALL: @_Z8funcLeafi{{.*}}!prof ![[LEAF_PROF:[0-9]+]] +; INLINE-HOT: @_Z8funcLeafi{{.*}}!prof ![[LEAF_PROF:[0-9]+]] +entry: + %cmp = icmp sgt i32 %x, 0, !dbg !57 + br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59 + +while.cond2.preheader: ; preds = %entry + %cmp313 = icmp slt i32 %x, 0, !dbg !60 + br i1 %cmp313, label %while.body4, label %if.end, !dbg !63 + +while.body: ; preds = %while.body, %entry + %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ] + %tmp = load volatile i32, i32* @factor, align 4, !dbg !64 + %call = tail call i32 @_Z3fibi(i32 %tmp), !dbg !67 + %sub = sub nsw i32 %x.addr.016, %call, !dbg !68 + %cmp1 = icmp sgt i32 %sub, 0, !dbg !69 + br i1 %cmp1, label %while.body, label %if.end, !dbg !71 + +while.body4: ; preds = %while.body4, %while.cond2.preheader + %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ] + %tmp1 = load volatile i32, i32* @factor, align 4, !dbg !72 + %call5 = tail call i32 @_Z3fibi(i32 %tmp1), !dbg !74 + %add = add nsw i32 %call5, %x.addr.114, !dbg !75 + %cmp3 = icmp slt i32 %add, 0, !dbg !60 + br i1 %cmp3, label %while.body4, label %if.end, !dbg !63 + +if.end: ; preds = %while.body4, %while.body, %while.cond2.preheader + %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ] + ret i32 %x.addr.2, !dbg !76 +} + +define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 { +; _Z5funcBi is marked noinline, so outline remainder has promoted context profile +; INLINE-ALL: @_Z5funcBi{{.*}}!prof ![[FUNCB_PROF:[0-9]+]] +; INLINE-HOT: @_Z5funcBi{{.*}}!prof ![[FUNCB_PROF:[0-9]+]] +entry: + %sub = add nsw i32 %x, -100000, !dbg !51 + %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52 +; _Z5funcBi is not inlined into main, so we main->_Z5funcBi->_Z8funcLeafi +; should be inlined based on promoted context profile +; INLINE-ALL-NOT: call i32 @_Z8funcLeafi +; INLINE-HOT-NOT: call i32 @_Z8funcLeafi + ret i32 %call, !dbg !53 +} + +; INLINE-ALL-DAG: [[MAIN_PROF]] = !{!"function_entry_count", i64 13} +; INLINE-ALL-DAG: [[FUNCA_PROF]] = !{!"function_entry_count", i64 0} +; INLINE-ALL-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 0} +; INLINE-ALL-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 33} + +; INLINE-HOT-DAG: [[MAIN_PROF]] = !{!"function_entry_count", i64 13} +; INLINE-HOT-DAG: [[FUNCA_PROF]] = !{!"function_entry_count", i64 12} +; INLINE-HOT-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 0} +; INLINE-HOT-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 33} + +declare i32 @_Z3fibi(i32) + +attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } +attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!14, !15, !16} +!llvm.ident = !{!17} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo") +!4 = !{} +!5 = !{!6, !10, !11} +!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!7 = !DISubroutineType(types: !8) +!8 = !{!9, !9} +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!12 = !{!0} +!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9) +!14 = !{i32 7, !"Dwarf Version", i32 4} +!15 = !{i32 2, !"Debug Info Version", i32 3} +!16 = !{i32 1, !"wchar_size", i32 4} +!17 = !{!"clang version 11.0.0"} +!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21) +!19 = !DISubroutineType(types: !20) +!20 = !{!9} +!21 = !{!22, !23} +!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9) +!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9) +!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3) +!25 = !DILocation(line: 13, column: 3, scope: !26) +!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2) +!27 = !DILocation(line: 17, column: 3, scope: !18) +!28 = !DILocation(line: 14, column: 10, scope: !29) +!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37) +!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3) +!31 = !DILocation(line: 14, column: 29, scope: !29) +!32 = !DILocation(line: 14, column: 21, scope: !33) +!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2) +!34 = !DILocation(line: 14, column: 19, scope: !29) +!35 = !DILocation(line: 14, column: 7, scope: !29) +!36 = !DILocation(line: 13, column: 33, scope: !37) +!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6) +!38 = !DILocation(line: 13, column: 26, scope: !39) +!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2) +!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!44 = !DILocation(line: 27, column: 22, scope: !40) +!45 = !DILocation(line: 27, column: 11, scope: !40) +!46 = !DILocation(line: 29, column: 3, scope: !40) +!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!51 = !DILocation(line: 33, column: 22, scope: !47) +!52 = !DILocation(line: 33, column: 11, scope: !47) +!53 = !DILocation(line: 35, column: 3, scope: !47) +!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!57 = !DILocation(line: 49, column: 9, scope: !58) +!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7) +!59 = !DILocation(line: 49, column: 7, scope: !54) +!60 = !DILocation(line: 58, column: 14, scope: !61) +!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2) +!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8) +!63 = !DILocation(line: 58, column: 5, scope: !61) +!64 = !DILocation(line: 52, column: 16, scope: !65) +!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19) +!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14) +!67 = !DILocation(line: 52, column: 12, scope: !65) +!68 = !DILocation(line: 52, column: 9, scope: !65) +!69 = !DILocation(line: 51, column: 14, scope: !70) +!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2) +!71 = !DILocation(line: 51, column: 5, scope: !70) +!72 = !DILocation(line: 59, column: 16, scope: !73) +!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19) +!74 = !DILocation(line: 59, column: 12, scope: !73) +!75 = !DILocation(line: 59, column: 9, scope: !73) +!76 = !DILocation(line: 63, column: 3, scope: !54)