diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h --- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h +++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -24,8 +24,8 @@ namespace memprof { /// Return the allocation type for a given set of memory profile values. -AllocationType getAllocType(uint64_t MaxAccessCount, uint64_t MinSize, - uint64_t MinLifetime); +AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity, + uint64_t AllocCount, uint64_t TotalLifetime); /// Build callstack metadata from the provided list of call stack ids. Returns /// the resulting metadata node. diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp --- a/llvm/lib/Analysis/MemoryProfileInfo.cpp +++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp @@ -18,25 +18,31 @@ #define DEBUG_TYPE "memory-profile-info" -// Upper bound on accesses per byte for marking an allocation cold. -cl::opt MemProfAccessesPerByteColdThreshold( - "memprof-accesses-per-byte-cold-threshold", cl::init(10.0), cl::Hidden, - cl::desc("The threshold the accesses per byte must be under to consider " - "an allocation cold")); +// Upper bound on lifetime access density (accesses per byte per lifetime sec) +// for marking an allocation cold. +cl::opt MemProfLifetimeAccessDensityColdThreshold( + "memprof-lifetime-access-density-cold-threshold", cl::init(0.05), + cl::Hidden, + cl::desc("The threshold the lifetime access density (accesses per byte per " + "lifetime sec) must be under to consider an allocation cold")); // Lower bound on lifetime to mark an allocation cold (in addition to accesses -// per byte above). This is to avoid pessimizing short lived objects. -cl::opt MemProfMinLifetimeColdThreshold( - "memprof-min-lifetime-cold-threshold", cl::init(200), cl::Hidden, - cl::desc("The minimum lifetime (s) for an allocation to be considered " +// per byte per sec above). This is to avoid pessimizing short lived objects. +cl::opt MemProfAveLifetimeColdThreshold( + "memprof-ave-lifetime-cold-threshold", cl::init(200), cl::Hidden, + cl::desc("The average lifetime (s) for an allocation to be considered " "cold")); -AllocationType llvm::memprof::getAllocType(uint64_t MaxAccessCount, - uint64_t MinSize, - uint64_t MinLifetime) { - if (((float)MaxAccessCount) / MinSize < MemProfAccessesPerByteColdThreshold && - // MinLifetime is expected to be in ms, so convert the threshold to ms. - MinLifetime >= MemProfMinLifetimeColdThreshold * 1000) +AllocationType llvm::memprof::getAllocType(uint64_t TotalLifetimeAccessDensity, + uint64_t AllocCount, + uint64_t TotalLifetime) { + // The access densities are multiplied by 100 to hold 2 decimal places of + // precision, so need to divide by 100. + if (((float)TotalLifetimeAccessDensity) / AllocCount / 100 < + MemProfLifetimeAccessDensityColdThreshold + // Lifetime is expected to be in ms, so convert the threshold to ms. + && ((float)TotalLifetime) / AllocCount >= + MemProfAveLifetimeColdThreshold * 1000) return AllocationType::Cold; return AllocationType::NotCold; } diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1330,9 +1330,9 @@ SmallVector StackIds; for (const auto &StackFrame : AllocInfo->CallStack) StackIds.push_back(computeStackId(StackFrame)); - auto AllocType = getAllocType(AllocInfo->Info.getMaxAccessCount(), - AllocInfo->Info.getMinSize(), - AllocInfo->Info.getMinLifetime()); + auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), + AllocInfo->Info.getAllocCount(), + AllocInfo->Info.getTotalLifetime()); AllocTrie.addCallStack(AllocType, StackIds); } diff --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll --- a/llvm/test/ThinLTO/X86/memprof-basic.ll +++ b/llvm/test/ThinLTO/X86/memprof-basic.ll @@ -26,7 +26,7 @@ ;; return 0; ;; } ;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the ;; memory freed after sleep(10) results in cold lifetimes. ;; ;; The IR was then reduced using llvm-reduce with the expected FileCheck input. diff --git a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll --- a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll +++ b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll @@ -38,7 +38,7 @@ ;; return 0; ;; } ;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the ;; memory freed after sleep(10) results in cold lifetimes. ;; ;; The code below was created by forcing inlining of C into both B and E. diff --git a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll --- a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll +++ b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll @@ -76,7 +76,7 @@ ;; return 0; ;; } ;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the ;; memory freed after sleep(10) results in cold lifetimes. ;; ;; The code below was created by forcing inlining of A into its callers, diff --git a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll --- a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll +++ b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll @@ -44,7 +44,7 @@ ;; return 0; ;; } ;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the ;; memory freed after sleep(10) results in cold lifetimes. ;; ;; Compiled without optimization to prevent inlining and devirtualization. diff --git a/llvm/test/ThinLTO/X86/memprof-inlined.ll b/llvm/test/ThinLTO/X86/memprof-inlined.ll --- a/llvm/test/ThinLTO/X86/memprof-inlined.ll +++ b/llvm/test/ThinLTO/X86/memprof-inlined.ll @@ -27,7 +27,7 @@ ;; return 0; ;; } ;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the ;; memory freed after sleep(10) results in cold lifetimes. ;; ;; The code below was created by forcing inlining of baz into foo, and diff --git a/llvm/test/ThinLTO/X86/memprof-inlined2.ll b/llvm/test/ThinLTO/X86/memprof-inlined2.ll --- a/llvm/test/ThinLTO/X86/memprof-inlined2.ll +++ b/llvm/test/ThinLTO/X86/memprof-inlined2.ll @@ -29,7 +29,7 @@ ;; return 0; ;; } ;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the ;; memory freed after sleep(10) results in cold lifetimes. ;; ;; Both foo and baz are inlined into main, at both foo callsites. diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll --- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll @@ -26,7 +26,7 @@ ;; return 0; ;; } ;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the ;; memory freed after sleep(10) results in cold lifetimes. ;; ;; The IR was then reduced using llvm-reduce with the expected FileCheck input. diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll --- a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll @@ -38,7 +38,7 @@ ;; return 0; ;; } ;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the ;; memory freed after sleep(10) results in cold lifetimes. ;; ;; The code below was created by forcing inlining of C into both B and E. diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll --- a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll @@ -76,7 +76,7 @@ ;; return 0; ;; } ;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the ;; memory freed after sleep(10) results in cold lifetimes. ;; ;; The code below was created by forcing inlining of A into its callers, diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll --- a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll @@ -44,7 +44,7 @@ ;; return 0; ;; } ;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the ;; memory freed after sleep(10) results in cold lifetimes. ;; ;; Compiled without optimization to prevent inlining and devirtualization. diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll --- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll @@ -27,7 +27,7 @@ ;; return 0; ;; } ;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the ;; memory freed after sleep(10) results in cold lifetimes. ;; ;; The code below was created by forcing inlining of baz into foo, and diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll --- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll @@ -29,7 +29,7 @@ ;; return 0; ;; } ;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the ;; memory freed after sleep(10) results in cold lifetimes. ;; ;; Both foo and baz are inlined into main, at both foo callsites. diff --git a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp --- a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp +++ b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp @@ -20,8 +20,8 @@ using namespace llvm; using namespace llvm::memprof; -extern cl::opt MemProfAccessesPerByteColdThreshold; -extern cl::opt MemProfMinLifetimeColdThreshold; +extern cl::opt MemProfLifetimeAccessDensityColdThreshold; +extern cl::opt MemProfAveLifetimeColdThreshold; namespace { @@ -60,30 +60,36 @@ // Basic checks on the allocation type for values just above and below // the thresholds. TEST_F(MemoryProfileInfoTest, GetAllocType) { - // Long lived with more accesses per byte than threshold is not cold. - EXPECT_EQ( - getAllocType(/*MaxAccessCount=*/MemProfAccessesPerByteColdThreshold + 1, - /*MinSize=*/1, - /*MinLifetime=*/MemProfMinLifetimeColdThreshold * 1000 + 1), - AllocationType::NotCold); - // Long lived with less accesses per byte than threshold is cold. - EXPECT_EQ( - getAllocType(/*MaxAccessCount=*/MemProfAccessesPerByteColdThreshold - 1, - /*MinSize=*/1, - /*MinLifetime=*/MemProfMinLifetimeColdThreshold * 1000 + 1), - AllocationType::Cold); - // Short lived with more accesses per byte than threshold is not cold. - EXPECT_EQ( - getAllocType(/*MaxAccessCount=*/MemProfAccessesPerByteColdThreshold + 1, - /*MinSize=*/1, - /*MinLifetime=*/MemProfMinLifetimeColdThreshold * 1000 - 1), - AllocationType::NotCold); - // Short lived with less accesses per byte than threshold is not cold. - EXPECT_EQ( - getAllocType(/*MaxAccessCount=*/MemProfAccessesPerByteColdThreshold - 1, - /*MinSize=*/1, - /*MinLifetime=*/MemProfMinLifetimeColdThreshold * 1000 - 1), - AllocationType::NotCold); + const uint64_t AllocCount = 2; + // To be cold we require that + // ((float)TotalLifetimeAccessDensity) / AllocCount / 100 < + // MemProfLifetimeAccessDensityColdThreshold + // so compute the TotalLifetimeAccessDensity right at the threshold. + const uint64_t TotalLifetimeAccessDensityThreshold = + (uint64_t)(MemProfLifetimeAccessDensityColdThreshold * AllocCount * 100); + // To be cold we require that + // ((float)TotalLifetime) / AllocCount >= + // MemProfAveLifetimeColdThreshold * 1000 + // so compute the TotalLifetime right at the threshold. + const uint64_t TotalLifetimeThreshold = + MemProfAveLifetimeColdThreshold * AllocCount * 1000; + + // Long lived with more accesses per byte per sec than threshold is not cold. + EXPECT_EQ(getAllocType(TotalLifetimeAccessDensityThreshold + 1, AllocCount, + TotalLifetimeThreshold + 1), + AllocationType::NotCold); + // Long lived with less accesses per byte per sec than threshold is cold. + EXPECT_EQ(getAllocType(TotalLifetimeAccessDensityThreshold - 1, AllocCount, + TotalLifetimeThreshold + 1), + AllocationType::Cold); + // Short lived with more accesses per byte per sec than threshold is not cold. + EXPECT_EQ(getAllocType(TotalLifetimeAccessDensityThreshold + 1, AllocCount, + TotalLifetimeThreshold - 1), + AllocationType::NotCold); + // Short lived with less accesses per byte per sec than threshold is not cold. + EXPECT_EQ(getAllocType(TotalLifetimeAccessDensityThreshold - 1, AllocCount, + TotalLifetimeThreshold - 1), + AllocationType::NotCold); } // Test the hasSingleAllocType helper.