diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -341,7 +341,12 @@ // Values should be powers of two so that they can be ORed, in particular to // track allocations that have different behavior with different calling // contexts. -enum class AllocationType : uint8_t { None = 0, NotCold = 1, Cold = 2 }; +enum class AllocationType : uint8_t { + None = 0, + NotCold = 1, + Cold = 2, + All = 3 // This should always be set to the OR of all values. +}; /// Summary of a single MIB in a memprof metadata on allocations. struct MIBInfo { diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -67,22 +67,6 @@ VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, cl::desc("Perform frequent verification checks on nodes.")); -inline bool hasSingleAllocType(uint8_t AllocTypes) { - switch (AllocTypes) { - case (uint8_t)AllocationType::Cold: - case (uint8_t)AllocationType::NotCold: - return true; - break; - case (uint8_t)AllocationType::None: - assert(false); - break; - default: - return false; - break; - } - llvm_unreachable("invalid alloc type"); -} - /// CRTP base for graphs built from either IR or ThinLTO summary index. /// /// The graph represents the call contexts in all memprof metadata on allocation @@ -107,6 +91,10 @@ /// Main entry point to perform analysis and transformations on graph. bool process(); + /// Perform cloning on the graph necessary to uniquely identify the allocation + /// behavior of an allocation based on its context. + void identifyClones(); + void dump() const; void print(raw_ostream &OS) const; @@ -214,16 +202,15 @@ ContextNode(bool IsAllocation, CallInfo C) : IsAllocation(IsAllocation), Call(C) {} - std::unique_ptr clone() { - auto Clone = std::make_unique(IsAllocation, Call); + void addClone(ContextNode *Clone) { if (CloneOf) { - CloneOf->Clones.push_back(Clone.get()); + CloneOf->Clones.push_back(Clone); Clone->CloneOf = CloneOf; } else { - Clones.push_back(Clone.get()); + Clones.push_back(Clone); + assert(!Clone->CloneOf); Clone->CloneOf = this; } - return Clone; } ContextNode *getOrigNode() { @@ -292,6 +279,10 @@ } }; + /// Helper to remove callee edges that have allocation type None (due to not + /// carrying any context ids) after transformations. + void removeNoneTypeCalleeEdges(ContextNode *Node); + protected: /// Get a list of nodes corresponding to the stack ids in the given callsite /// context. @@ -403,6 +394,40 @@ /// unioning their recorded alloc types. uint8_t computeAllocType(DenseSet &ContextIds); + /// Returns the alloction type of the intersection of the contexts of two + /// nodes (based on their provided context id sets), optimized for the case + /// when Node1Ids is smaller than Node2Ids. + uint8_t intersectAllocTypesImpl(const DenseSet &Node1Ids, + const DenseSet &Node2Ids); + + /// Returns the alloction type of the intersection of the contexts of two + /// nodes (based on their provided context id sets). + uint8_t intersectAllocTypes(const DenseSet &Node1Ids, + const DenseSet &Node2Ids); + + /// Create a clone of Edge's callee and move Edge to that new callee node, + /// performing the necessary context id and allocation type updates. + /// If callee's caller edge iterator is supplied, it is updated when removing + /// the edge from that list. + ContextNode * + moveEdgeToNewCalleeClone(const std::shared_ptr &Edge, + EdgeIter *CallerEdgeI = nullptr); + + /// Change the callee of Edge to existing callee clone NewCallee, performing + /// the necessary context id and allocation type updates. + /// If callee's caller edge iterator is supplied, it is updated when removing + /// the edge from that list. + void moveEdgeToExistingCalleeClone(const std::shared_ptr &Edge, + ContextNode *NewCallee, + EdgeIter *CallerEdgeI = nullptr, + bool NewClone = false); + + /// Recursively perform cloning on the graph for the given Node and its + /// callers, in order to uniquely identify the allocation behavior of an + /// allocation given its context. + void identifyClones(ContextNode *Node, + DenseSet &Visited); + /// Map from each context ID to the AllocationType assigned to that context. std::map ContextIdToAllocationType; @@ -543,6 +568,28 @@ return (AllocationType)AllocTypes; } +// Helper to check if the alloc types for all edges recorded in the +// InAllocTypes vector match the alloc types for all edges in the Edges +// vector. +template +bool allocTypesMatch( + const std::vector &InAllocTypes, + const std::vector>> + &Edges) { + return std::equal( + InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), + [](const uint8_t &l, + const std::shared_ptr> &r) { + // Can share if one of the edges is None type - don't + // care about the type along that edge as it doesn't + // exist for those context ids. + if (l == (uint8_t)AllocationType::None || + r->AllocTypes == (uint8_t)AllocationType::None) + return true; + return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes); + }); +} + } // end anonymous namespace template @@ -607,6 +654,20 @@ Caller->CalleeEdges.push_back(Edge); } +template +void CallsiteContextGraph< + DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) { + for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) { + auto Edge = *EI; + if (Edge->AllocTypes == (uint8_t)AllocationType::None) { + assert(Edge->ContextIds.empty()); + Edge->Callee->eraseCallerEdge(Edge.get()); + EI = Node->CalleeEdges.erase(EI); + } else + ++EI; + } +} + template typename CallsiteContextGraph::ContextEdge * CallsiteContextGraph::ContextNode:: @@ -666,6 +727,33 @@ return AllocType; } +template +uint8_t +CallsiteContextGraph::intersectAllocTypesImpl( + const DenseSet &Node1Ids, const DenseSet &Node2Ids) { + uint8_t BothTypes = + (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; + uint8_t AllocType = (uint8_t)AllocationType::None; + for (auto Id : Node1Ids) { + if (!Node2Ids.count(Id)) + continue; + AllocType |= (uint8_t)ContextIdToAllocationType[Id]; + // Bail early if alloc type reached both, no further refinement. + if (AllocType == BothTypes) + return AllocType; + } + return AllocType; +} + +template +uint8_t CallsiteContextGraph::intersectAllocTypes( + const DenseSet &Node1Ids, const DenseSet &Node2Ids) { + if (Node1Ids.size() < Node2Ids.size()) + return intersectAllocTypesImpl(Node1Ids, Node2Ids); + else + return intersectAllocTypesImpl(Node2Ids, Node1Ids); +} + template typename CallsiteContextGraph::ContextNode * CallsiteContextGraph::addAllocNode( @@ -1121,7 +1209,7 @@ // not fully matching stack contexts. To do this, subtract any context ids // found in caller nodes of the last node found above. if (Ids.back() != getLastStackId(Call)) { - for (const auto &PE : LastNode->CallerEdges) { + for (const auto &PE : CurNode->CallerEdges) { set_subtract(StackSequenceContextIds, PE->getContextIds()); if (StackSequenceContextIds.empty()) break; @@ -1572,7 +1660,8 @@ } template -static void checkNode(const ContextNode *Node) { +static void checkNode(const ContextNode *Node, + bool CheckEdges = false) { if (Node->isRemoved()) return; // Node's context ids should be the union of both its callee and caller edge @@ -1584,6 +1673,8 @@ DenseSet CallerEdgeContextIds(FirstEdge->ContextIds); for (; EI != Node->CallerEdges.end(); EI++) { const auto &Edge = *EI; + if (CheckEdges) + checkEdge(Edge); set_union(CallerEdgeContextIds, Edge->ContextIds); } // Node can have more context ids than callers if some contexts terminate at @@ -1598,6 +1689,8 @@ DenseSet CalleeEdgeContextIds(FirstEdge->ContextIds); for (; EI != Node->CalleeEdges.end(); EI++) { const auto &Edge = *EI; + if (CheckEdges) + checkEdge(Edge); set_union(CalleeEdgeContextIds, Edge->ContextIds); } assert(Node->ContextIds == CalleeEdgeContextIds); @@ -1760,6 +1853,276 @@ DotFilePathPrefix + "ccg." + Label + ".dot"); } +template +ContextNode * +CallsiteContextGraph::moveEdgeToNewCalleeClone( + const std::shared_ptr &Edge, EdgeIter *CallerEdgeI) { + ContextNode *Node = Edge->Callee; + NodeOwner.push_back( + std::make_unique(Node->IsAllocation, Node->Call)); + ContextNode *Clone = NodeOwner.back().get(); + Node->addClone(Clone); + assert(NodeToCallingFunc.count(Node)); + NodeToCallingFunc[Clone] = NodeToCallingFunc[Node]; + moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true); + return Clone; +} + +template +void CallsiteContextGraph:: + moveEdgeToExistingCalleeClone(const std::shared_ptr &Edge, + ContextNode *NewCallee, EdgeIter *CallerEdgeI, + bool NewClone) { + // NewCallee and Edge's current callee must be clones of the same original + // node (Edge's current callee may be the original node too). + assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode()); + auto &EdgeContextIds = Edge->getContextIds(); + ContextNode *OldCallee = Edge->Callee; + if (CallerEdgeI) + *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI); + else + OldCallee->eraseCallerEdge(Edge.get()); + Edge->Callee = NewCallee; + NewCallee->CallerEdges.push_back(Edge); + // Don't need to update Edge's context ids since we are simply reconnecting + // it. + set_subtract(OldCallee->ContextIds, EdgeContextIds); + NewCallee->ContextIds.insert(EdgeContextIds.begin(), EdgeContextIds.end()); + NewCallee->AllocTypes |= Edge->AllocTypes; + OldCallee->AllocTypes = computeAllocType(OldCallee->ContextIds); + // OldCallee alloc type should be None iff its context id set is now empty. + assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) == + OldCallee->ContextIds.empty()); + // Now walk the old callee node's callee edges and move Edge's context ids + // over to the corresponding edge into the clone (which is created here if + // this is a newly created clone). + for (auto &OldCalleeEdge : OldCallee->CalleeEdges) { + // The context ids moving to the new callee are the subset of this edge's + // context ids and the context ids on the caller edge being moved. + DenseSet EdgeContextIdsToMove = + set_intersection(OldCalleeEdge->getContextIds(), EdgeContextIds); + set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove); + OldCalleeEdge->AllocTypes = + computeAllocType(OldCalleeEdge->getContextIds()); + if (!NewClone) { + // Update context ids / alloc type on corresponding edge to NewCallee. + // There is a chance this may not exist if we are reusing an existing + // clone, specifically during function assignment, where we would have + // removed none type edges after creating the clone. If we can't find + // a corresponding edge there, fall through to the cloning below. + if (auto *NewCalleeEdge = + NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) { + NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.begin(), + EdgeContextIdsToMove.end()); + NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove); + continue; + } + } + auto NewEdge = std::make_shared( + OldCalleeEdge->Callee, NewCallee, + computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove); + NewCallee->CalleeEdges.push_back(NewEdge); + NewEdge->Callee->CallerEdges.push_back(NewEdge); + } + if (VerifyCCG) { + checkNode(OldCallee); + checkNode(NewCallee); + for (const auto &OldCalleeEdge : OldCallee->CalleeEdges) + checkNode(OldCalleeEdge->Callee); + for (const auto &NewCalleeEdge : NewCallee->CalleeEdges) + checkNode(NewCalleeEdge->Callee); + } +} + +template +void CallsiteContextGraph::identifyClones() { + DenseSet Visited; + for (auto &Entry : AllocationCallToContextNodeMap) + identifyClones(Entry.second, Visited); +} + +template +void CallsiteContextGraph::identifyClones( + ContextNode *Node, DenseSet &Visited) { + if (VerifyNodes) + checkNode(Node, /*CheckEdges=*/true); + assert(!Node->CloneOf); + + // If Node as a null call, then either it wasn't found in the module (regular + // LTO) or summary index (ThinLTO), or there were other conditions blocking + // cloning (e.g. recursion, calls multiple targets, etc). + // Do this here so that we don't try to recursively clone callers below, which + // isn't useful at least for this node. + if (!Node->hasCall()) + return; + +#ifndef NDEBUG + auto Insert = +#endif + Visited.insert(Node); + // We should not have visited this node yet. + assert(Insert.second); + // The recursive call to identifyClones may delete the current edge from the + // CallerEdges vector. Make a copy and iterate on that, simpler than passing + // in an iterator and having recursive call erase from it. Other edges may + // also get removed during the recursion, which will have null Callee and + // Caller pointers (and are deleted later), so we skip those below. + { + auto CallerEdges = Node->CallerEdges; + for (auto &Edge : CallerEdges) { + // Skip any that have been removed by an earlier recursive call. + if (Edge->Callee == nullptr && Edge->Caller == nullptr) { + assert(!std::count(Node->CallerEdges.begin(), Node->CallerEdges.end(), + Edge)); + continue; + } + // Ignore any caller we previously visited via another edge. + if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) { + identifyClones(Edge->Caller, Visited); + } + } + } + + // Check if we reached an unambiguous call or have have only a single caller. + if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) + return; + + // We need to clone. + + // Try to keep the original version as alloc type NotCold. This will make + // cases with indirect calls or any other situation with an unknown call to + // the original function get the default behavior. We do this by sorting the + // CallerEdges of the Node we will clone by alloc type. + // + // Give NotCold edge the lowest sort priority so those edges are at the end of + // the caller edges vector, and stay on the original version (since the below + // code clones greedily until it finds all remaining edges have the same type + // and leaves the remaining ones on the original Node). + // + // We shouldn't actually have any None type edges, so the sorting priority for + // that is arbitrary, and we assert in that case below. + constexpr unsigned AllocTypeCloningPriority[] = {/*None*/ 3, /*NotCold*/ 4, + /*Cold*/ 1, + /*NotColdCold*/ 2}; + assert(std::size(AllocTypeCloningPriority) == + (std::size_t)AllocationType::All + 1); + std::stable_sort(Node->CallerEdges.begin(), Node->CallerEdges.end(), + [](const std::shared_ptr &A, + const std::shared_ptr &B) { + assert(A->AllocTypes != (uint8_t)AllocationType::None && + B->AllocTypes != (uint8_t)AllocationType::None); + if (A->AllocTypes == B->AllocTypes) + // Use the first context id for each edge as a + // tie-breaker. + return *A->ContextIds.begin() < *B->ContextIds.begin(); + return AllocTypeCloningPriority[A->AllocTypes] < + AllocTypeCloningPriority[B->AllocTypes]; + }); + + assert(Node->AllocTypes != (uint8_t)AllocationType::None); + + // Iterate until we find no more opportunities for disambiguating the alloc + // types via cloning. In most cases this loop will terminate once the Node + // has a single allocation type, in which case no more cloning is needed. + // We need to be able to remove Edge from CallerEdges, so need to adjust + // iterator inside the loop. + for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) { + auto CallerEdge = *EI; + + // See if cloning the prior caller edge left this node with a single alloc + // type or a single caller. In that case no more cloning of Node is needed. + if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) + break; + + // Compute the node callee edge alloc types corresponding to the context ids + // for this caller edge. + std::vector CalleeEdgeAllocTypesForCallerEdge; + CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size()); + for (auto &CalleeEdge : Node->CalleeEdges) + CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes( + CalleeEdge->getContextIds(), CallerEdge->getContextIds())); + + // Don't clone if doing so will not disambiguate any alloc types amongst + // caller edges (including the callee edges that would be cloned). + // Otherwise we will simply move all edges to the clone. + // + // First check if by cloning we will disambiguate the caller allocation + // type from node's allocation type. Query allocTypeToUse so that we don't + // bother cloning to distinguish NotCold+Cold from NotCold. Note that + // neither of these should be None type. + // + // Then check if by cloning node at least one of the callee edges will be + // disambiguated by splitting out different context ids. + assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None); + assert(Node->AllocTypes != (uint8_t)AllocationType::None); + if (allocTypeToUse(CallerEdge->AllocTypes) == + allocTypeToUse(Node->AllocTypes) && + allocTypesMatch( + CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) { + ++EI; + continue; + } + + // First see if we can use an existing clone. Check each clone and its + // callee edges for matching alloc types. + ContextNode *Clone = nullptr; + for (auto *CurClone : Node->Clones) { + if (allocTypeToUse(CurClone->AllocTypes) != + allocTypeToUse(CallerEdge->AllocTypes)) + continue; + + if (!allocTypesMatch( + CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges)) + continue; + Clone = CurClone; + break; + } + + // The edge iterator is adjusted when we move the CallerEdge to the clone. + if (Clone) + moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI); + else + Clone = moveEdgeToNewCalleeClone(CallerEdge, &EI); + + assert(EI == Node->CallerEdges.end() || + Node->AllocTypes != (uint8_t)AllocationType::None); + // Sanity check that no alloc types on clone or its edges are None. + assert(Clone->AllocTypes != (uint8_t)AllocationType::None); + assert(llvm::none_of( + Clone->CallerEdges, [&](const std::shared_ptr &E) { + return E->AllocTypes == (uint8_t)AllocationType::None; + })); + } + + // Cloning may have resulted in some cloned callee edges with type None, + // because they aren't carrying any contexts. Remove those edges. + for (auto *Clone : Node->Clones) { + removeNoneTypeCalleeEdges(Clone); + if (VerifyNodes) + checkNode(Clone, /*CheckEdges=*/true); + } + // We should still have some context ids on the original Node. + assert(!Node->ContextIds.empty()); + + // Remove any callee edges that ended up with alloc type None after creating + // clones and updating callee edges. + removeNoneTypeCalleeEdges(Node); + + // Sanity check that no alloc types on node or edges are None. + assert(Node->AllocTypes != (uint8_t)AllocationType::None); + assert(llvm::none_of(Node->CalleeEdges, + [&](const std::shared_ptr &E) { + return E->AllocTypes == (uint8_t)AllocationType::None; + })); + assert(llvm::none_of(Node->CallerEdges, + [&](const std::shared_ptr &E) { + return E->AllocTypes == (uint8_t)AllocationType::None; + })); + + if (VerifyNodes) + checkNode(Node, /*CheckEdges=*/true); +} + template bool CallsiteContextGraph::process() { if (DumpCCG) { @@ -1773,6 +2136,19 @@ check(); } + identifyClones(); + + if (VerifyCCG) { + check(); + } + + if (DumpCCG) { + dbgs() << "CCG after cloning:\n"; + dbgs() << *this; + } + if (ExportToDot) + exportToDot("cloned"); + return false; } diff --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll --- a/llvm/test/ThinLTO/X86/memprof-basic.ll +++ b/llvm/test/ThinLTO/X86/memprof-basic.ll @@ -42,6 +42,8 @@ ; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP ; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT +;; We should have cloned bar, baz, and foo, for the cold memory allocation. +; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED source_filename = "memprof-basic.ll" @@ -142,6 +144,88 @@ ; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 ; DUMP: CallerEdges: +; DUMP: CCG after cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 2, 3, 0 +; DUMP: AllocType 2 StackIds: 2, 3, 1 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[BAR2:0x[a-z0-9]+]] + +; DUMP: Node [[BAZ]] +; DUMP: Callee: 9832687305761716512 (_Z3barv) Clones: 0 StackIds: 2 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[BAZ2:0x[a-z0-9]+]] + +; DUMP: Node [[FOO]] +; DUMP: Callee: 5878270615442837395 (_Z3bazv) Clones: 0 StackIds: 3 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[FOO2:0x[a-z0-9]+]] + +; DUMP: Node [[MAIN1]] +; DUMP: Callee: 6731117468105397038 (_Z3foov) Clones: 0 StackIds: 0 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: Callee: 6731117468105397038 (_Z3foov) Clones: 0 StackIds: 1 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[FOO2]] +; DUMP: Callee: 5878270615442837395 (_Z3bazv) Clones: 0 StackIds: 3 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[FOO]] + +; DUMP: Node [[BAZ2]] +; DUMP: Callee: 9832687305761716512 (_Z3barv) Clones: 0 StackIds: 2 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[BAZ]] + +; DUMP: Node [[BAR2]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 2, 3, 0 +; DUMP: AllocType 2 StackIds: 2, 3, 1 +; DUMP: (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[BAR]] + ; DOT: digraph "postbuild" { ; DOT: label="postbuild"; @@ -155,3 +239,22 @@ ; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; ; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"]; ; DOT: } + + +; DOTCLONED: digraph "cloned" { +; DOTCLONED: label="cloned"; +; DOTCLONED: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"]; +; DOTCLONED: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"]; +; DOTCLONED: Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"]; +; DOTCLONED: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOTCLONED: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOTCLONED: Node[[MAIN2]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3bazv}"]; +; DOTCLONED: Node[[FOO2]] -> Node[[BAZ2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[BAZ2]] [shape=record,tooltip="N[[BAZ2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3bazv -\> _Z3barv}"]; +; DOTCLONED: Node[[BAZ2]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"]; +; DOTCLONED: } diff --git a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll --- a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll +++ b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll @@ -64,6 +64,8 @@ ; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE ; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST +;; We should clone D once for the cold allocations via C. +; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED source_filename = "duplicate-context-ids.ll" @@ -205,6 +207,67 @@ ; DUMP: CallerEdges: +; DUMP: CCG after cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[D]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 2 StackIds: 0 +; DUMP: AllocType 1 StackIds: 1 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 +; DUMP: Clones: [[D2:0x[a-z0-9]+]] + +; DUMP: Node [[F]] +; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 1 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[C2]] +; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D2]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3 +; DUMP: CallerEdges: + +; DUMP: Node [[B]] +; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 2 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D2]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[E]] +; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 3 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D2]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[D2]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 2 StackIds: 0 +; DUMP: AllocType 1 StackIds: 1 +; DUMP: (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 1 3 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D2]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 +; DUMP: Edge from Callee [[D2]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3 +; DUMP: Edge from Callee [[D2]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 +; DUMP: Clone of [[D]] + + ; DOTPRE: digraph "prestackupdate" { ; DOTPRE: label="prestackupdate"; ; DOTPRE: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"]; @@ -227,3 +290,18 @@ ; DOTPOST: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"]; ; DOTPOST: Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; ; DOTPOST:} + + +; DOTCLONED: digraph "cloned" { +; DOTCLONED: label="cloned"; +; DOTCLONED: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"]; +; DOTCLONED: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"]; +; DOTCLONED: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; +; DOTCLONED: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"]; +; DOTCLONED: Node[[C]] -> Node[[D2:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="cyan"]; +; DOTCLONED: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"]; +; DOTCLONED: Node[[B]] -> Node[[D2]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOTCLONED: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"]; +; DOTCLONED: Node[[E]] -> Node[[D2]][tooltip="ContextIds: 1",fillcolor="cyan"]; +; DOTCLONED: Node[[D2]] [shape=record,tooltip="N[[D2]] ContextIds: 1 3 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"]; +; DOTCLONED: } diff --git a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll --- a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll +++ b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll @@ -64,6 +64,9 @@ ; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP ; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT +;; We should only create a single clone of foo, for the direct call +;; from main allocating cold memory. +; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED source_filename = "indirectcall.ll" @@ -240,6 +243,121 @@ ; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6 ; DUMP: CallerEdges: +; DUMP: CCG after cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[FOO]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 6, 8, 4 +; DUMP: AllocType 2 StackIds: 6, 8, 5 +; DUMP: AllocType 1 StackIds: 0 +; DUMP: AllocType 2 StackIds: 7, 8, 2 +; DUMP: AllocType 1 StackIds: 7, 8, 3 +; DUMP: AllocType 2 StackIds: 1 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 5 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Clones: [[FOO2:0x[a-z0-9]+]] + +; DUMP: Node [[AX]] +; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 6 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2 + +; DUMP: Node [[BAR]] +; DUMP: null Call +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 4 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5 + +; DUMP: Node [[MAIN3]] +; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 4 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN4]] +; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 5 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN1]] +; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 0 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: CallerEdges: + +; DUMP: Node [[BX]] +; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 4 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 + +; DUMP: Node [[MAIN5]] +; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 2 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN6]] +; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 3 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 1 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 6 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6 +; DUMP: CallerEdges: + +; DUMP: Node [[FOO2]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 6, 8, 4 +; DUMP: AllocType 2 StackIds: 6, 8, 5 +; DUMP: AllocType 1 StackIds: 0 +; DUMP: AllocType 2 StackIds: 7, 8, 2 +; DUMP: AllocType 1 StackIds: 7, 8, 3 +; DUMP: AllocType 2 StackIds: 1 +; DUMP: (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 6 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6 +; DUMP: Clone of [[FOO]] + ; DOT: digraph "postbuild" { ; DOT: label="postbuild"; @@ -264,3 +382,29 @@ ; DOT: Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; ; DOT: Node[[MAIN6]] -> Node[[FOO]][tooltip="ContextIds: 6",fillcolor="cyan"]; ; DOT: } + + +; DOTCLONED: digraph "cloned" { +; DOTCLONED: label="cloned"; +; DOTCLONED: Node[[FOO2:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2 3 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> alloc}"]; +; DOTCLONED: Node[[AX:0x[a-z0-9]+]] [shape=record,tooltip="N[[AX]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 8256774051149711748\n_ZN1A1xEv -\> _Z3foov}"]; +; DOTCLONED: Node[[AX]] -> Node[[FOO2]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOTCLONED: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13626499562959447861\nnull call (external)}"]; +; DOTCLONED: Node[[BAR]] -> Node[[AX]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOTCLONED: Node[[BAR]] -> Node[[BX:0x[a-z0-9]+]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"]; +; DOTCLONED: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 748269490701775343\nmain -\> _Z3barP1A}"]; +; DOTCLONED: Node[[MAIN1]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12699492813229484831\nmain -\> _Z3barP1A}"]; +; DOTCLONED: Node[[MAIN2]] -> Node[[BAR]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[MAIN3:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN3]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOTCLONED: Node[[MAIN3]] -> Node[[FOO2]][tooltip="ContextIds: 3",fillcolor="brown1"]; +; DOTCLONED: Node[[BX]] [shape=record,tooltip="N[[BX]] ContextIds: 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13614864978754796978\n_ZN1B1xEv -\> _Z3foov}"]; +; DOTCLONED: Node[[BX]] -> Node[[FOO2]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"]; +; DOTCLONED: Node[[MAIN4:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN4]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 6792096022461663180\nmain -\> _Z3barP1A}"]; +; DOTCLONED: Node[[MAIN4]] -> Node[[BAR]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOTCLONED: Node[[MAIN5:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN5]] ContextIds: 5",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 15737101490731057601\nmain -\> _Z3barP1A}"]; +; DOTCLONED: Node[[MAIN5]] -> Node[[BAR]][tooltip="ContextIds: 5",fillcolor="brown1"]; +; DOTCLONED: Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOTCLONED: Node[[MAIN6]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 6",fillcolor="cyan"]; +; DOTCLONED: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 6",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3foov -\> alloc}"]; +; DOTCLONED: } diff --git a/llvm/test/ThinLTO/X86/memprof-inlined.ll b/llvm/test/ThinLTO/X86/memprof-inlined.ll --- a/llvm/test/ThinLTO/X86/memprof-inlined.ll +++ b/llvm/test/ThinLTO/X86/memprof-inlined.ll @@ -51,6 +51,9 @@ ; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP ; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT +;; We should create clones for foo and bar for the call from main to allocate +;; cold memory. +; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED source_filename = "inlined.ll" @@ -168,6 +171,91 @@ ; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 ; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4 +; DUMP: CCG after cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAZ]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 1, 2 +; DUMP: AllocType 2 StackIds: 1, 3 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 1 2 + +; DUMP: Node [[FOO2]] +; DUMP: null Call +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 + +; DUMP: Node [[MAIN1]] +; DUMP: Callee: 2229562716906371625 (_Z3foov) Clones: 0 StackIds: 2 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: Callee: 2229562716906371625 (_Z3foov) Clones: 0 StackIds: 3 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[FOO3:0x[a-z0-9]+]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[BAR]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 0, 1, 2 +; DUMP: AllocType 2 StackIds: 0, 1, 3 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Clones: [[BAR2:0x[a-z0-9]+]] + +; DUMP: Node [[FOO]] +; DUMP: Callee: 16064618363798697104 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 3 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Clones: [[FOO3]] + +; DUMP: Node [[FOO3]] +; DUMP: Callee: 16064618363798697104 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO3]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4 +; DUMP: Clone of [[FOO]] + +; DUMP: Node [[BAR2]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 0, 1, 2 +; DUMP: AllocType 2 StackIds: 0, 1, 3 +; DUMP: (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 4 +; DUMP: Clone of [[BAR]] + ; DOT: digraph "postbuild" { ; DOT: label="postbuild"; @@ -184,3 +272,23 @@ ; DOT: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"]; ; DOT: Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"]; ; DOT: } + + +; DOTCLONED: digraph "cloned" { +; DOTCLONED: label="cloned"; +; DOTCLONED: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3bazv -\> alloc}"]; +; DOTCLONED: Node[[FOO2:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"]; +; DOTCLONED: Node[[FOO2]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOTCLONED: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOTCLONED: Node[[MAIN1]] -> Node[[FOO2]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[MAIN1]] -> Node[[FOO:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="brown1"]; +; DOTCLONED: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOTCLONED: Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[MAIN2]] -> Node[[FOO3:0x[a-z0-9]+]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOTCLONED: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3barv -\> alloc}"]; +; DOTCLONED: Node[[FOO]] [shape=record,tooltip="N[[FOO]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"]; +; DOTCLONED: Node[[FOO]] -> Node[[BAR]][tooltip="ContextIds: 3",fillcolor="brown1"]; +; DOTCLONED: Node[[FOO3]] [shape=record,tooltip="N[[FOO3]] ContextIds: 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"]; +; DOTCLONED: Node[[FOO3]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOTCLONED: Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"]; +; DOTCLONED: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll --- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll @@ -37,6 +37,8 @@ ; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP ; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT +;; We should have cloned bar, baz, and foo, for the cold memory allocation. +; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -143,6 +145,82 @@ ; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 ; DUMP: CallerEdges: +; DUMP: CCG after cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[BAR2:0x[a-z0-9]+]] + +; DUMP: Node [[BAZ]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[BAZ2:0x[a-z0-9]+]] + +; DUMP: Node [[FOO]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[FOO2:0x[a-z0-9]+]] + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[FOO2]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[FOO]] + +; DUMP: Node [[BAZ2]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[BAZ]] + +; DUMP: Node [[BAR2]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[BAR]] + ; DOT: digraph "postbuild" { ; DOT: label="postbuild"; @@ -156,3 +234,22 @@ ; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; ; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"]; ; DOT: } + + +; DOTCLONED: digraph "cloned" { +; DOTCLONED: label="cloned"; +; DOTCLONED: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; +; DOTCLONED: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"]; +; DOTCLONED: Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"]; +; DOTCLONED: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOTCLONED: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOTCLONED: Node[[MAIN2]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3bazv}"]; +; DOTCLONED: Node[[FOO2]] -> Node[[BAZ2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[BAZ2]] [shape=record,tooltip="N[[BAZ2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3bazv -\> _Z3barv}"]; +; DOTCLONED: Node[[BAZ2]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; +; DOTCLONED: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll --- a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll @@ -59,6 +59,8 @@ ; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE ; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST +;; We should clone D once for the cold allocations via C. +; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -207,6 +209,60 @@ ; DUMP: Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1 ; DUMP: CallerEdges: +; DUMP: CCG after cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[D]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 +; DUMP: Clones: [[D2:0x[a-z0-9]+]] + +; DUMP: Node [[F]] +; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[C2]] +; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D2]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3 +; DUMP: CallerEdges: + +; DUMP: Node [[B]] +; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D2]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[E]] +; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D2]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[D2]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 1 3 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D2]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 +; DUMP: Edge from Callee [[D2]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3 +; DUMP: Edge from Callee [[D2]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 +; DUMP: Clone of [[D]] + ; DOTPRE: digraph "prestackupdate" { ; DOTPRE: label="prestackupdate"; @@ -230,3 +286,18 @@ ; DOTPOST: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"]; ; DOTPOST: Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; ; DOTPOST:} + + +; DOTCLONED: digraph "cloned" { +; DOTCLONED: label="cloned"; +; DOTCLONED: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"]; +; DOTCLONED: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"]; +; DOTCLONED: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; +; DOTCLONED: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"]; +; DOTCLONED: Node[[C]] -> Node[[D2:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="cyan"]; +; DOTCLONED: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"]; +; DOTCLONED: Node[[B]] -> Node[[D2]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOTCLONED: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"]; +; DOTCLONED: Node[[E]] -> Node[[D2]][tooltip="ContextIds: 1",fillcolor="cyan"]; +; DOTCLONED: Node[[D2]] [shape=record,tooltip="N[[D2]] ContextIds: 1 3 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"]; +; DOTCLONED: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll --- a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll @@ -57,6 +57,9 @@ ; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP ; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT +;; We should only create a single clone of foo, for the direct call +;; from main allocating cold memory. +; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -235,6 +238,107 @@ ; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6 ; DUMP: CallerEdges: +; DUMP: CCG after cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[FOO]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 5 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Clones: [[FOO2:0x[a-z0-9]+]] + +; DUMP: Node [[AX]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2 + +; DUMP: Node [[BAR]] +; DUMP: null Call +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 4 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5 + +; DUMP: Node [[MAIN3]] +; DUMP: %call4 = call noundef ptr @_Z3barP1A(ptr noundef %a) (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN4]] +; DUMP: %call5 = call noundef ptr @_Z3barP1A(ptr noundef %a) (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: CallerEdges: + +; DUMP: Node [[BX]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 4 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 + +; DUMP: Node [[MAIN5]] +; DUMP: %call2 = call noundef ptr @_Z3barP1A(ptr noundef %b) (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN6]] +; DUMP: %call3 = call noundef ptr @_Z3barP1A(ptr noundef %b) (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 6 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6 +; DUMP: CallerEdges: + +; DUMP: Node [[FOO2]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 6 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6 +; DUMP: Clone of [[FOO]] + ; DOT: digraph "postbuild" { ; DOT: label="postbuild"; @@ -259,3 +363,29 @@ ; DOT: Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; ; DOT: Node[[MAIN6]] -> Node[[FOO]][tooltip="ContextIds: 6",fillcolor="cyan"]; ; DOT: } + + +; DOTCLONED: digraph "cloned" { +; DOTCLONED: label="cloned"; +; DOTCLONED: Node[[FOO2:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2 3 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> _Znam}"]; +; DOTCLONED: Node[[AX:0x[a-z0-9]+]] [shape=record,tooltip="N[[AX]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 8256774051149711748\n_ZN1A1xEv -\> _Z3foov}"]; +; DOTCLONED: Node[[AX]] -> Node[[FOO2]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOTCLONED: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13626499562959447861\nnull call (external)}"]; +; DOTCLONED: Node[[BAR]] -> Node[[AX]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOTCLONED: Node[[BAR]] -> Node[[BX:0x[a-z0-9]+]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"]; +; DOTCLONED: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 748269490701775343\nmain -\> _Z3barP1A}"]; +; DOTCLONED: Node[[MAIN1]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12699492813229484831\nmain -\> _Z3barP1A}"]; +; DOTCLONED: Node[[MAIN2]] -> Node[[BAR]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[MAIN3:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN3]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOTCLONED: Node[[MAIN3]] -> Node[[FOO2]][tooltip="ContextIds: 3",fillcolor="brown1"]; +; DOTCLONED: Node[[BX]] [shape=record,tooltip="N[[BX]] ContextIds: 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13614864978754796978\n_ZN1B1xEv -\> _Z3foov}"]; +; DOTCLONED: Node[[BX]] -> Node[[FOO2]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"]; +; DOTCLONED: Node[[MAIN4:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN4]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 6792096022461663180\nmain -\> _Z3barP1A}"]; +; DOTCLONED: Node[[MAIN4]] -> Node[[BAR]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOTCLONED: Node[[MAIN5:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN5]] ContextIds: 5",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 15737101490731057601\nmain -\> _Z3barP1A}"]; +; DOTCLONED: Node[[MAIN5]] -> Node[[BAR]][tooltip="ContextIds: 5",fillcolor="brown1"]; +; DOTCLONED: Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOTCLONED: Node[[MAIN6]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 6",fillcolor="cyan"]; +; DOTCLONED: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 6",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3foov -\> _Znam}"]; +; DOTCLONED: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll --- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll @@ -46,6 +46,9 @@ ; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP ; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT +;; We should create clones for foo and bar for the call from main to allocate +;; cold memory. +; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -171,6 +174,82 @@ ; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 ; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CCG after cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[BAR2:0x[a-z0-9]+]] + +; DUMP: Node [[FOO2]] +; DUMP: null Call +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4 + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[FOO3:0x[a-z0-9]+]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[BAZ]] +; DUMP: %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4 + +; DUMP: Node [[FOO]] +; DUMP: %call.i = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Clones: [[FOO3]] + +; DUMP: Node [[FOO3]] +; DUMP: %call.i = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO3]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[FOO]] + +; DUMP: Node [[BAR2]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 2 +; DUMP: Clone of [[BAR]] + ; DOT: digraph "postbuild" { ; DOT: label="postbuild"; @@ -187,3 +266,23 @@ ; DOT: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"]; ; DOT: Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; ; DOT: } + + +; DOTCLONED: digraph "cloned" { +; DOTCLONED: label="cloned"; +; DOTCLONED: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; +; DOTCLONED: Node[[FOO2:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO2]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"]; +; DOTCLONED: Node[[FOO2]] -> Node[[BAZ:0x[a-z0-9]+]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"]; +; DOTCLONED: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOTCLONED: Node[[MAIN1]] -> Node[[FOO2]][tooltip="ContextIds: 3",fillcolor="brown1"]; +; DOTCLONED: Node[[MAIN1]] -> Node[[FOO:0x[a-z0-9]+]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOTCLONED: Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOTCLONED: Node[[MAIN2]] -> Node[[FOO3:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[BAZ]] [shape=record,tooltip="N[[BAZ]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3bazv -\> _Znam}"]; +; DOTCLONED: Node[[FOO]] [shape=record,tooltip="N[[FOO]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"]; +; DOTCLONED: Node[[FOO]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOTCLONED: Node[[FOO3]] [shape=record,tooltip="N[[FOO3]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"]; +; DOTCLONED: Node[[FOO3]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOTCLONED: Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; +; DOTCLONED: }