diff --git a/hot-alloctype.patch b/hot-alloctype.patch new file mode 100644 diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -337,7 +337,7 @@ } // Allocation type assigned to an allocation reached by a given context. -// More can be added but initially this is just noncold and cold. +// More can be added, now this is cold, notcold and hot. // Values should be powers of two so that they can be ORed, in particular to // track allocations that have different behavior with different calling // contexts. @@ -345,7 +345,8 @@ None = 0, NotCold = 1, Cold = 2, - All = 3 // This should always be set to the OR of all values. + Hot = 4, + All = 7 // This should always be set to the OR of all values. }; /// Summary of a single MIB in a memprof metadata on allocations. diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp --- a/llvm/lib/Analysis/MemoryProfileInfo.cpp +++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp @@ -33,6 +33,14 @@ cl::desc("The average lifetime (s) for an allocation to be considered " "cold")); +// Lower bound on average lifetime accesses density (total life time access +// density / alloc count) for marking an allocation hot. +cl::opt MemProfMinAveLifetimeAccessDensityHotThreshold( + "memprof-min-ave-lifetime-access-density-hot-threshold", cl::init(1000), + cl::Hidden, + cl::desc("The minimum TotalLifetimeAccessDensity / AllocCount for an " + "allocation to be considered hot")); + AllocationType llvm::memprof::getAllocType(uint64_t TotalLifetimeAccessDensity, uint64_t AllocCount, uint64_t TotalLifetime) { @@ -44,6 +52,13 @@ && ((float)TotalLifetime) / AllocCount >= MemProfAveLifetimeColdThreshold * 1000) return AllocationType::Cold; + + // The access densities are multiplied by 100 to hold 2 decimal places of + // precision, so need to divide by 100. + if (((float)TotalLifetimeAccessDensity) / AllocCount / 100 > + MemProfMinAveLifetimeAccessDensityHotThreshold) + return AllocationType::Hot; + return AllocationType::NotCold; } @@ -71,8 +86,11 @@ // types that can be applied based on the allocation profile data. auto *MDS = dyn_cast(MIB->getOperand(1)); assert(MDS); - if (MDS->getString().equals("cold")) + if (MDS->getString().equals("cold")) { return AllocationType::Cold; + } else if (MDS->getString().equals("hot")) { + return AllocationType::Hot; + } return AllocationType::NotCold; } @@ -84,6 +102,9 @@ case AllocationType::Cold: return "cold"; break; + case AllocationType::Hot: + return "hot"; + break; default: assert(false && "Unexpected alloc type"); } diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -9979,7 +9979,7 @@ } /// AllocType -/// := ('none'|'notcold'|'cold') +/// := ('none'|'notcold'|'cold'|'hot') bool LLParser::parseAllocType(uint8_t &AllocType) { switch (Lex.getKind()) { case lltok::kw_none: @@ -9991,6 +9991,9 @@ case lltok::kw_cold: AllocType = (uint8_t)AllocationType::Cold; break; + case lltok::kw_hot: + AllocType = (uint8_t)AllocationType::Hot; + break; default: return error(Lex.getLoc(), "invalid alloc type"); } diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -3219,6 +3219,8 @@ return "notcold"; case (uint8_t)AllocationType::Cold: return "cold"; + case (uint8_t)AllocationType::Hot: + return "hot"; } llvm_unreachable("Unexpected alloc type"); }; diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -778,6 +778,11 @@ void CallsiteContextGraph::addStackNodesForMIB( ContextNode *AllocNode, CallStack &StackContext, CallStack &CallsiteContext, AllocationType AllocType) { + // Treating the hot alloc type as NotCold before the disambiguation for "hot" + // is done. + if (AllocType == AllocationType::Hot) + AllocType = AllocationType::NotCold; + ContextIdToAllocationType[++LastContextId] = AllocType; // Update alloc type and context ids for this MIB. @@ -1941,6 +1946,14 @@ identifyClones(Entry.second, Visited); } +// helper function to check an AllocType is cold or notcold or both. +bool checkColdOrNotCold(uint8_t AllocType) { + return (AllocType == (uint8_t)AllocationType::Cold) || + (AllocType == (uint8_t)AllocationType::NotCold) || + (AllocType == + ((uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold)); +} + template void CallsiteContextGraph::identifyClones( ContextNode *Node, DenseSet &Visited) { @@ -2004,13 +2017,12 @@ const unsigned AllocTypeCloningPriority[] = {/*None*/ 3, /*NotCold*/ 4, /*Cold*/ 1, /*NotColdCold*/ 2}; - assert(std::size(AllocTypeCloningPriority) == - (std::size_t)AllocationType::All + 1); std::stable_sort(Node->CallerEdges.begin(), Node->CallerEdges.end(), [&](const std::shared_ptr &A, const std::shared_ptr &B) { - assert(A->AllocTypes != (uint8_t)AllocationType::None && - B->AllocTypes != (uint8_t)AllocationType::None); + assert(checkColdOrNotCold(A->AllocTypes) && + checkColdOrNotCold(B->AllocTypes)); + if (A->AllocTypes == B->AllocTypes) // Use the first context id for each edge as a // tie-breaker. diff --git a/llvm/test/Assembler/thinlto-memprof-summary.ll b/llvm/test/Assembler/thinlto-memprof-summary.ll --- a/llvm/test/Assembler/thinlto-memprof-summary.ll +++ b/llvm/test/Assembler/thinlto-memprof-summary.ll @@ -5,7 +5,7 @@ ^0 = module: (path: "thinlto-memprof-summary.o", hash: (1369602428, 2747878711, 259090915, 2507395659, 1141468049)) ;; Function with single alloc, multiple memprof MIBs, no versioning -^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)))))))) +^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: hot, stackIds: (987654321)))))))) ;; Function with callsite stack ids calling above function, no versioning ^2 = gv: (guid: 25, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0), stackIds: (8632435727821051414)), (callee: ^1, clones: (0), stackIds: (15025054523792398438, 12345678)), (callee: ^1, clones: (0), stackIds: (23456789)))))) ;; Function with multiple allocs, multiple memprof MIBs, multiple versions @@ -17,7 +17,7 @@ ; Make sure we get back from llvm-dis what we put in via llvm-as. ; CHECK: ^0 = module: (path: "thinlto-memprof-summary.o", hash: (1369602428, 2747878711, 259090915, 2507395659, 1141468049)) -; CHECK: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)))))))) +; CHECK: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: hot, stackIds: (987654321)))))))) ; CHECK: ^2 = gv: (guid: 25, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0), stackIds: (8632435727821051414)), (callee: ^1, clones: (0), stackIds: (15025054523792398438, 12345678)), (callee: ^1, clones: (0), stackIds: (23456789)))))) ; CHECK: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789)))))))) ; CHECK: ^4 = gv: (guid: 27, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^3)), callsites: ((callee: ^3, clones: (0, 1), stackIds: (3456789)), (callee: ^3, clones: (1, 1), stackIds: (456789)))))) diff --git a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp --- a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp +++ b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp @@ -16,12 +16,14 @@ #include "llvm/Support/SourceMgr.h" #include "gtest/gtest.h" #include +#include using namespace llvm; using namespace llvm::memprof; extern cl::opt MemProfLifetimeAccessDensityColdThreshold; extern cl::opt MemProfAveLifetimeColdThreshold; +extern cl::opt MemProfMinAveLifetimeAccessDensityHotThreshold; namespace { @@ -64,31 +66,47 @@ // To be cold we require that // ((float)TotalLifetimeAccessDensity) / AllocCount / 100 < // MemProfLifetimeAccessDensityColdThreshold - // so compute the TotalLifetimeAccessDensity right at the threshold. - const uint64_t TotalLifetimeAccessDensityThreshold = + // so compute the ColdTotalLifetimeAccessDensityThreshold at the threshold. + const uint64_t ColdTotalLifetimeAccessDensityThreshold = (uint64_t)(MemProfLifetimeAccessDensityColdThreshold * AllocCount * 100); // To be cold we require that // ((float)TotalLifetime) / AllocCount >= // MemProfAveLifetimeColdThreshold * 1000 // so compute the TotalLifetime right at the threshold. - const uint64_t TotalLifetimeThreshold = + const uint64_t ColdTotalLifetimeThreshold = MemProfAveLifetimeColdThreshold * AllocCount * 1000; - - // Long lived with more accesses per byte per sec than threshold is not cold. - EXPECT_EQ(getAllocType(TotalLifetimeAccessDensityThreshold + 1, AllocCount, - TotalLifetimeThreshold + 1), - AllocationType::NotCold); - // Long lived with less accesses per byte per sec than threshold is cold. - EXPECT_EQ(getAllocType(TotalLifetimeAccessDensityThreshold - 1, AllocCount, - TotalLifetimeThreshold + 1), + // To be hot we require that + // ((float)TotalLifetimeAccessDensity) / AllocCount / 100 > + // MemProfMinAveLifetimeAccessDensityHotThreshold + // so compute the HotTotalLifetimeAccessDensityThreshold at the threshold. + const uint64_t HotTotalLifetimeAccessDensityThreshold = + (uint64_t)(MemProfMinAveLifetimeAccessDensityHotThreshold * AllocCount * 100); + + + // Test Hot + // More accesses per byte per sec than hot threshold is hot. + EXPECT_EQ(getAllocType(HotTotalLifetimeAccessDensityThreshold + 1, AllocCount, + ColdTotalLifetimeThreshold + 1), + AllocationType::Hot); + + // Test Cold + // Long lived with less accesses per byte per sec than cold threshold is cold. + EXPECT_EQ(getAllocType(ColdTotalLifetimeAccessDensityThreshold - 1, AllocCount, + ColdTotalLifetimeThreshold + 1), AllocationType::Cold); - // Short lived with more accesses per byte per sec than threshold is not cold. - EXPECT_EQ(getAllocType(TotalLifetimeAccessDensityThreshold + 1, AllocCount, - TotalLifetimeThreshold - 1), + + // Test NotCold + // Long lived with more accesses per byte per sec than cold threshold is not cold. + EXPECT_EQ(getAllocType(ColdTotalLifetimeAccessDensityThreshold + 1, AllocCount, + ColdTotalLifetimeThreshold + 1), + AllocationType::NotCold); + // Short lived with more accesses per byte per sec than cold threshold is not cold. + EXPECT_EQ(getAllocType(ColdTotalLifetimeAccessDensityThreshold + 1, AllocCount, + ColdTotalLifetimeThreshold - 1), AllocationType::NotCold); - // Short lived with less accesses per byte per sec than threshold is not cold. - EXPECT_EQ(getAllocType(TotalLifetimeAccessDensityThreshold - 1, AllocCount, - TotalLifetimeThreshold - 1), + // Short lived with less accesses per byte per sec than cold threshold is not cold. + EXPECT_EQ(getAllocType(ColdTotalLifetimeAccessDensityThreshold - 1, AllocCount, + ColdTotalLifetimeThreshold - 1), AllocationType::NotCold); } @@ -96,9 +114,14 @@ TEST_F(MemoryProfileInfoTest, SingleAllocType) { uint8_t NotCold = (uint8_t)AllocationType::NotCold; uint8_t Cold = (uint8_t)AllocationType::Cold; + uint8_t Hot = (uint8_t)AllocationType::Hot; EXPECT_TRUE(hasSingleAllocType(NotCold)); EXPECT_TRUE(hasSingleAllocType(Cold)); + EXPECT_TRUE(hasSingleAllocType(Hot)); EXPECT_FALSE(hasSingleAllocType(NotCold | Cold)); + EXPECT_FALSE(hasSingleAllocType(NotCold | Hot)); + EXPECT_FALSE(hasSingleAllocType(Cold | Hot)); + EXPECT_FALSE(hasSingleAllocType(NotCold | Cold | Hot)); } // Test buildCallstackMetadata helper. @@ -129,6 +152,8 @@ %0 = bitcast i8* %call1 to i32* %call2 = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40) %1 = bitcast i8* %call2 to i32* + %call3 = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40) + %2 = bitcast i8* %call3 to i32* ret i32* %1 } declare dso_local noalias noundef i8* @malloc(i64 noundef) @@ -157,6 +182,17 @@ EXPECT_FALSE(Call2->hasMetadata(LLVMContext::MD_memprof)); EXPECT_TRUE(Call2->hasFnAttr("memprof")); EXPECT_EQ(Call2->getFnAttr("memprof").getValueAsString(), "notcold"); + + // Third call has all hot contexts. + CallStackTrie Trie3; + Trie3.addCallStack(AllocationType::Hot, {9, 10}); + Trie3.addCallStack(AllocationType::Hot, {9, 11, 12}); + CallBase *Call3 = findCall(*Func, "call3"); + Trie3.buildAndAttachMIBMetadata(Call3); + + EXPECT_FALSE(Call3->hasMetadata(LLVMContext::MD_memprof)); + EXPECT_TRUE(Call3->hasFnAttr("memprof")); + EXPECT_EQ(Call3->getFnAttr("memprof").getValueAsString(), "hot"); } // Test CallStackTrie::addCallStack interface taking allocation type and list of @@ -208,6 +244,157 @@ } } +// Test CallStackTrie::addCallStack interface taking allocation type and list of +// call stack ids. +// Test that an allocation call reached by both cold and hot call stacks +// gets memprof metadata representing the different allocation type contexts. +TEST_F(MemoryProfileInfoTest, ColdAndHotMIB) { + LLVMContext C; + std::unique_ptr M = makeLLVMModule(C, + R"IR( +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" +define i32* @test() { +entry: + %call = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40) + %0 = bitcast i8* %call to i32* + ret i32* %0 +} +declare dso_local noalias noundef i8* @malloc(i64 noundef) +)IR"); + + Function *Func = M->getFunction("test"); + + CallStackTrie Trie; + Trie.addCallStack(AllocationType::Cold, {1, 2}); + Trie.addCallStack(AllocationType::Hot, {1, 3}); + + CallBase *Call = findCall(*Func, "call"); + Trie.buildAndAttachMIBMetadata(Call); + + EXPECT_FALSE(Call->hasFnAttr("memprof")); + EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof)); + MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof); + ASSERT_EQ(MemProfMD->getNumOperands(), 2u); + for (auto &MIBOp : MemProfMD->operands()) { + MDNode *MIB = dyn_cast(MIBOp); + MDNode *StackMD = getMIBStackNode(MIB); + ASSERT_NE(StackMD, nullptr); + ASSERT_EQ(StackMD->getNumOperands(), 2u); + auto *StackId = mdconst::dyn_extract(StackMD->getOperand(0)); + ASSERT_EQ(StackId->getZExtValue(), 1u); + StackId = mdconst::dyn_extract(StackMD->getOperand(1)); + if (StackId->getZExtValue() == 2u) + EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Cold); + else { + ASSERT_EQ(StackId->getZExtValue(), 3u); + EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Hot); + } + } +} + +// Test CallStackTrie::addCallStack interface taking allocation type and list of +// call stack ids. +// Test that an allocation call reached by both non cold and hot call stacks +// gets memprof metadata representing the different allocation type contexts. +TEST_F(MemoryProfileInfoTest, NotColdAndHotMIB) { + LLVMContext C; + std::unique_ptr M = makeLLVMModule(C, + R"IR( +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" +define i32* @test() { +entry: + %call = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40) + %0 = bitcast i8* %call to i32* + ret i32* %0 +} +declare dso_local noalias noundef i8* @malloc(i64 noundef) +)IR"); + + Function *Func = M->getFunction("test"); + + CallStackTrie Trie; + Trie.addCallStack(AllocationType::NotCold, {1, 2}); + Trie.addCallStack(AllocationType::Hot, {1, 3}); + + CallBase *Call = findCall(*Func, "call"); + Trie.buildAndAttachMIBMetadata(Call); + + EXPECT_FALSE(Call->hasFnAttr("memprof")); + EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof)); + MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof); + ASSERT_EQ(MemProfMD->getNumOperands(), 2u); + for (auto &MIBOp : MemProfMD->operands()) { + MDNode *MIB = dyn_cast(MIBOp); + MDNode *StackMD = getMIBStackNode(MIB); + ASSERT_NE(StackMD, nullptr); + ASSERT_EQ(StackMD->getNumOperands(), 2u); + auto *StackId = mdconst::dyn_extract(StackMD->getOperand(0)); + ASSERT_EQ(StackId->getZExtValue(), 1u); + StackId = mdconst::dyn_extract(StackMD->getOperand(1)); + if (StackId->getZExtValue() == 2u) + EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold); + else { + ASSERT_EQ(StackId->getZExtValue(), 3u); + EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Hot); + } + } +} + +// Test CallStackTrie::addCallStack interface taking allocation type and list of +// call stack ids. +// Test that an allocation call reached by both cold, non cold and hot call +// stacks gets memprof metadata representing the different allocation type +// contexts. +TEST_F(MemoryProfileInfoTest, ColdAndNotColdAndHotMIB) { + LLVMContext C; + std::unique_ptr M = makeLLVMModule(C, + R"IR( +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" +define i32* @test() { +entry: + %call = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40) + %0 = bitcast i8* %call to i32* + ret i32* %0 +} +declare dso_local noalias noundef i8* @malloc(i64 noundef) +)IR"); + + Function *Func = M->getFunction("test"); + + CallStackTrie Trie; + Trie.addCallStack(AllocationType::Cold, {1, 2}); + Trie.addCallStack(AllocationType::NotCold, {1, 3}); + Trie.addCallStack(AllocationType::Hot, {1, 4}); + + CallBase *Call = findCall(*Func, "call"); + Trie.buildAndAttachMIBMetadata(Call); + + EXPECT_FALSE(Call->hasFnAttr("memprof")); + EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof)); + MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof); + ASSERT_EQ(MemProfMD->getNumOperands(), 3u); + for (auto &MIBOp : MemProfMD->operands()) { + MDNode *MIB = dyn_cast(MIBOp); + MDNode *StackMD = getMIBStackNode(MIB); + ASSERT_NE(StackMD, nullptr); + ASSERT_EQ(StackMD->getNumOperands(), 2u); + auto *StackId = mdconst::dyn_extract(StackMD->getOperand(0)); + ASSERT_EQ(StackId->getZExtValue(), 1u); + StackId = mdconst::dyn_extract(StackMD->getOperand(1)); + if (StackId->getZExtValue() == 2u) { + EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Cold); + } else if (StackId->getZExtValue() == 3u) { + EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold); + } else { + ASSERT_EQ(StackId->getZExtValue(), 4u); + EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Hot); + } + } +} + // Test CallStackTrie::addCallStack interface taking allocation type and list of // call stack ids. // Test that an allocation call reached by multiple call stacks has memprof @@ -239,6 +426,10 @@ // with the non-cold context {1, 5}. Trie.addCallStack(AllocationType::NotCold, {1, 5, 6}); Trie.addCallStack(AllocationType::NotCold, {1, 5, 7}); + // We should be able to trim the following two and combine into a single MIB + // with the hot context {1, 8}. + Trie.addCallStack(AllocationType::Hot, {1, 8, 9}); + Trie.addCallStack(AllocationType::Hot, {1, 8, 10}); CallBase *Call = findCall(*Func, "call"); Trie.buildAndAttachMIBMetadata(Call); @@ -246,7 +437,7 @@ EXPECT_FALSE(Call->hasFnAttr("memprof")); EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof)); MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof); - ASSERT_EQ(MemProfMD->getNumOperands(), 2u); + ASSERT_EQ(MemProfMD->getNumOperands(), 3u); for (auto &MIBOp : MemProfMD->operands()) { MDNode *MIB = dyn_cast(MIBOp); MDNode *StackMD = getMIBStackNode(MIB); @@ -257,9 +448,11 @@ StackId = mdconst::dyn_extract(StackMD->getOperand(1)); if (StackId->getZExtValue() == 2u) EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Cold); - else { - ASSERT_EQ(StackId->getZExtValue(), 5u); + else if (StackId->getZExtValue() == 5u) EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold); + else { + ASSERT_EQ(StackId->getZExtValue(), 8u); + EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Hot); } } } @@ -279,6 +472,8 @@ %0 = bitcast i8* %call1 to i32* %call2 = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40), !memprof !3 %1 = bitcast i8* %call2 to i32* + %call3 = call noalias dereferenceable_or_null(40) i8* @malloc(i64 noundef 40), !memprof !6 + %2 = bitcast i8* %call3 to i32* ret i32* %1 } declare dso_local noalias noundef i8* @malloc(i64 noundef) @@ -288,6 +483,9 @@ !3 = !{!4} !4 = !{!5, !"notcold"} !5 = !{i64 4, i64 5, i64 6, i64 7} +!6 = !{!7} +!7 = !{!8, !"hot"} +!8 = !{i64 8, i64 9, i64 10} )IR"); Function *Func = M->getFunction("test"); @@ -315,6 +513,18 @@ EXPECT_TRUE(Call2->hasFnAttr("memprof")); EXPECT_EQ(Call2->getFnAttr("memprof").getValueAsString(), "notcold"); + + // Third call has all hot contexts. + CallStackTrie Trie3; + CallBase *Call3 = findCall(*Func, "call3"); + MDNode *MemProfMD3 = Call3->getMetadata(LLVMContext::MD_memprof); + ASSERT_EQ(MemProfMD2->getNumOperands(), 1u); + MDNode *MIB3 = dyn_cast(MemProfMD3->getOperand(0)); + Trie3.addCallStack(MIB3); + Trie3.buildAndAttachMIBMetadata(Call3); + + EXPECT_TRUE(Call3->hasFnAttr("memprof")); + EXPECT_EQ(Call3->getFnAttr("memprof").getValueAsString(), "hot"); } // Test CallStackTrie::addCallStack interface taking memprof MIB metadata. @@ -334,7 +544,7 @@ ret i32* %0 } declare dso_local noalias noundef i8* @malloc(i64 noundef) -!0 = !{!1, !3, !5, !7} +!0 = !{!1, !3, !5, !7, !9, !11} !1 = !{!2, !"cold"} !2 = !{i64 1, i64 2, i64 3} !3 = !{!4, !"cold"} @@ -343,6 +553,10 @@ !6 = !{i64 1, i64 5, i64 6} !7 = !{!8, !"notcold"} !8 = !{i64 1, i64 5, i64 7} +!9 = !{!10, !"hot"} +!10 = !{i64 1, i64 8, i64 9} +!11 = !{!12, !"hot"} +!12 = !{i64 1, i64 8, i64 10} )IR"); Function *Func = M->getFunction("test"); @@ -366,7 +580,7 @@ EXPECT_FALSE(Call->hasFnAttr("memprof")); EXPECT_TRUE(Call->hasMetadata(LLVMContext::MD_memprof)); MemProfMD = Call->getMetadata(LLVMContext::MD_memprof); - ASSERT_EQ(MemProfMD->getNumOperands(), 2u); + ASSERT_EQ(MemProfMD->getNumOperands(), 3u); for (auto &MIBOp : MemProfMD->operands()) { MDNode *MIB = dyn_cast(MIBOp); MDNode *StackMD = getMIBStackNode(MIB); @@ -377,9 +591,11 @@ StackId = mdconst::dyn_extract(StackMD->getOperand(1)); if (StackId->getZExtValue() == 2u) EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Cold); - else { - ASSERT_EQ(StackId->getZExtValue(), 5u); + else if (StackId->getZExtValue() == 5u) EXPECT_EQ(getMIBAllocType(MIB), AllocationType::NotCold); + else { + ASSERT_EQ(StackId->getZExtValue(), 8u); + EXPECT_EQ(getMIBAllocType(MIB), AllocationType::Hot); } } } @@ -396,12 +612,14 @@ ret ptr %call } declare noundef nonnull ptr @_Znam(i64 noundef) -!1 = !{!2, !4} +!1 = !{!2, !4, !7} !2 = !{!3, !"notcold"} !3 = !{i64 1, i64 2, i64 3, i64 4} !4 = !{!5, !"cold"} !5 = !{i64 1, i64 2, i64 3, i64 5} !6 = !{i64 1} +!7 = !{!8, !"hot"} +!8 = !{i64 1, i64 2, i64 3, i64 6} )IR"); Function *Func = M->getFunction("test"); @@ -411,75 +629,82 @@ Call->getMetadata(LLVMContext::MD_callsite)); MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof); - bool First = true; + uint Idx = 0; for (auto &MIBOp : MemProfMD->operands()) { auto *MIBMD = cast(MIBOp); MDNode *StackNode = getMIBStackNode(MIBMD); CallStack StackContext(StackNode); - uint64_t ExpectedBack = First ? 4 : 5; - EXPECT_EQ(StackContext.back(), ExpectedBack); + EXPECT_EQ(StackContext.back(), 4 + Idx); std::vector StackIds; for (auto ContextIter = StackContext.beginAfterSharedPrefix(InstCallsite); ContextIter != StackContext.end(); ++ContextIter) StackIds.push_back(*ContextIter); - if (First) { + if (Idx == 0) { std::vector Expected = {2, 3, 4}; EXPECT_EQ(ArrayRef(StackIds), ArrayRef(Expected)); - } else { + } else if (Idx == 1) { std::vector Expected = {2, 3, 5}; EXPECT_EQ(ArrayRef(StackIds), ArrayRef(Expected)); + } else { + std::vector Expected = {2, 3, 6}; + EXPECT_EQ(ArrayRef(StackIds), ArrayRef(Expected)); } - First = false; + Idx++; } } TEST_F(MemoryProfileInfoTest, CallStackTestSummary) { std::unique_ptr Index = makeLLVMIndex(R"Summary( ^0 = module: (path: "test.o", hash: (0, 0, 0, 0, 0)) -^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (1, 2, 3, 4)), (type: cold, stackIds: (1, 2, 3, 5)))))))) -^2 = gv: (guid: 25, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0), stackIds: (3, 4)), (callee: ^1, clones: (0), stackIds: (3, 5)))))) +^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (1, 2, 3, 4)), (type: cold, stackIds: (1, 2, 3, 5)), (type: hot, stackIds: (1, 2, 3, 6)))))))) +^2 = gv: (guid: 25, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0), stackIds: (3, 4)), (callee: ^1, clones: (0), stackIds: (3, 5)), (callee: ^1, clones: (0), stackIds: (3, 6)))))) )Summary"); ASSERT_NE(Index, nullptr); auto *CallsiteSummary = cast(Index->getGlobalValueSummary(/*guid=*/25)); - bool First = true; + uint Idx = 0; for (auto &CI : CallsiteSummary->callsites()) { CallStack::const_iterator> InstCallsite( &CI); std::vector StackIds; for (auto StackIdIndex : InstCallsite) StackIds.push_back(Index->getStackIdAtIndex(StackIdIndex)); - if (First) { + if (Idx == 0) { std::vector Expected = {3, 4}; EXPECT_EQ(ArrayRef(StackIds), ArrayRef(Expected)); - } else { + } else if (Idx == 1) { std::vector Expected = {3, 5}; EXPECT_EQ(ArrayRef(StackIds), ArrayRef(Expected)); + } else { + std::vector Expected = {3, 6}; + EXPECT_EQ(ArrayRef(StackIds), ArrayRef(Expected)); } - First = false; + Idx++; } auto *AllocSummary = cast(Index->getGlobalValueSummary(/*guid=*/23)); for (auto &AI : AllocSummary->allocs()) { - bool First = true; + uint Idx = 0; for (auto &MIB : AI.MIBs) { CallStack::const_iterator> StackContext( &MIB); - uint64_t ExpectedBack = First ? 4 : 5; - EXPECT_EQ(Index->getStackIdAtIndex(StackContext.back()), ExpectedBack); + EXPECT_EQ(Index->getStackIdAtIndex(StackContext.back()), 4 + Idx); std::vector StackIds; for (auto StackIdIndex : StackContext) StackIds.push_back(Index->getStackIdAtIndex(StackIdIndex)); - if (First) { + if (Idx == 0) { std::vector Expected = {1, 2, 3, 4}; EXPECT_EQ(ArrayRef(StackIds), ArrayRef(Expected)); - } else { + } else if (Idx == 1) { std::vector Expected = {1, 2, 3, 5}; EXPECT_EQ(ArrayRef(StackIds), ArrayRef(Expected)); + } else { + std::vector Expected = {1, 2, 3, 6}; + EXPECT_EQ(ArrayRef(StackIds), ArrayRef(Expected)); } - First = false; + Idx++; } } }