Index: compiler-rt/trunk/lib/profile/InstrProfilingInternal.h =================================================================== --- compiler-rt/trunk/lib/profile/InstrProfilingInternal.h +++ compiler-rt/trunk/lib/profile/InstrProfilingInternal.h @@ -150,6 +150,10 @@ VPDataReaderType *lprofGetVPDataReader(); +/* Internal interface used by test to reset the max number of + * tracked values per value site to be \p MaxVals. + */ +void lprofSetMaxValsPerSite(uint32_t MaxVals); void lprofSetupValueProfiler(); COMPILER_RT_VISIBILITY extern char *(*GetEnvHook)(const char *); Index: compiler-rt/trunk/lib/profile/InstrProfilingValue.c =================================================================== --- compiler-rt/trunk/lib/profile/InstrProfilingValue.c +++ compiler-rt/trunk/lib/profile/InstrProfilingValue.c @@ -36,6 +36,10 @@ VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; } +COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) { + VPMaxNumValsPerSite = MaxVals; +} + /* This method is only used in value profiler mock testing. */ COMPILER_RT_VISIBILITY void __llvm_profile_set_num_value_sites(__llvm_profile_data *Data, @@ -93,7 +97,9 @@ ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values; ValueProfNode *PrevVNode = NULL; + ValueProfNode *MinCountVNode = NULL; ValueProfNode *CurrentVNode = ValueCounters[CounterIndex]; + uint64_t MinCount = UINT64_MAX; uint8_t VDataCount = 0; while (CurrentVNode) { @@ -101,13 +107,51 @@ CurrentVNode->Count++; return; } + if (CurrentVNode->Count < MinCount) { + MinCount = CurrentVNode->Count; + MinCountVNode = CurrentVNode; + } PrevVNode = CurrentVNode; CurrentVNode = CurrentVNode->Next; ++VDataCount; } - if (VDataCount >= VPMaxNumValsPerSite) + if (VDataCount >= VPMaxNumValsPerSite) { + /* Bump down the min count node's count. If it reaches 0, + * evict it. This eviction/replacement policy makes hot + * targets more sticky while cold targets less so. In other + * words, it makes it less likely for the hot targets to be + * prematurally evicted during warmup/establishment period, + * when their counts are still low. In a special case when + * the number of values tracked is reduced to only one, this + * policy will guarantee that the dominating target with >50% + * total count will survive in the end. Note that this scheme + * allows the runtime to track the min count node in an adaptive + * manner. It can correct previous mistakes and eventually + * lock on a cold target that is alread in stable state. + * + * In very rare cases, this replacement scheme may still lead + * to target loss. For instance, out of \c N value slots, \c N-1 + * slots are occupied by luke warm targets during the warmup + * period and the remaining one slot is competed by two or more + * very hot targets. If those hot targets occur in an interleaved + * way, none of them will survive (gain enough weight to throw out + * other established entries) due to the ping-pong effect. + * To handle this situation, user can choose to increase the max + * number of tracked values per value site. Alternatively, a more + * expensive eviction mechanism can be implemented. It requires + * the runtime to track the total number of evictions per-site. + * When the total number of evictions reaches certain threshold, + * the runtime can wipe out more than one lowest count entries + * to give space for hot targets. + */ + if (!(--MinCountVNode->Count)) { + CurrentVNode = MinCountVNode; + CurrentVNode->Value = TargetValue; + CurrentVNode->Count++; + } return; + } CurrentVNode = (ValueProfNode *)calloc(1, sizeof(ValueProfNode)); if (!CurrentVNode) Index: compiler-rt/trunk/test/profile/Inputs/instrprof-value-prof-evict.c =================================================================== --- compiler-rt/trunk/test/profile/Inputs/instrprof-value-prof-evict.c +++ compiler-rt/trunk/test/profile/Inputs/instrprof-value-prof-evict.c @@ -0,0 +1,141 @@ +void callee_0() {} +void callee_1() {} +void callee_2() {} +void callee_3() {} + +void *CalleeAddrs[] = {callee_0, callee_1, callee_2, callee_3}; +extern void lprofSetMaxValsPerSite(unsigned); + +// sequences of callee ids + +// In the following sequences, +// there are two targets, the dominating target is +// target 0. +int CallSeqTwoTarget_1[] = {0, 0, 0, 0, 0, 1, 1}; +int CallSeqTwoTarget_2[] = {1, 1, 0, 0, 0, 0, 0}; +int CallSeqTwoTarget_3[] = {1, 0, 0, 1, 0, 0, 0}; +int CallSeqTwoTarget_4[] = {0, 0, 0, 1, 0, 1, 0}; + +// In the following sequences, there are three targets +// The dominating target is 0 and has > 50% of total +// counts. +int CallSeqThreeTarget_1[] = {0, 0, 0, 0, 0, 0, 1, 2, 1}; +int CallSeqThreeTarget_2[] = {1, 2, 1, 0, 0, 0, 0, 0, 0}; +int CallSeqThreeTarget_3[] = {1, 0, 0, 2, 0, 0, 0, 1, 0}; +int CallSeqThreeTarget_4[] = {0, 0, 0, 1, 0, 1, 0, 0, 2}; + +// Four target sequence -- +// There are two cold targets which occupies the value counters +// early. There is also a very hot target and a medium hot target +// which are invoked in an interleaved fashion -- the length of each +// hot period in the sequence is shorter than the cold targets' count. +// 1. If only two values are tracked, the Hot and Medium hot targets +// should surive in the end +// 2. If only three values are tracked, the top three targets should +// surive in the end. +int CallSeqFourTarget_1[] = {1, 1, 1, 2, 2, 2, 2, 0, 0, 3, 0, 0, 3, 0, 0, 3, + 0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 3}; + +// Same as above, but the cold entries are invoked later. +int CallSeqFourTarget_2[] = {0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 3, 0, + 0, 3, 0, 0, 3, 0, 0, 3, 1, 1, 1, 2, 2, 2, 2}; + +// Same as above, but all the targets are interleaved. +int CallSeqFourTarget_3[] = {0, 3, 0, 0, 1, 3, 0, 0, 0, 2, 0, 0, 3, 3, 0, 3, + 2, 2, 0, 3, 3, 1, 0, 0, 1, 0, 0, 3, 0, 2, 0}; + +typedef void (*FPT)(void); + + +// Testing value profiling eviction algorithm. +FPT getCalleeFunc(int I) { return CalleeAddrs[I]; } + +int main() { + int I; + +#define INDIRECT_CALLSITE(Sequence, NumValsTracked) \ + lprofSetMaxValsPerSite(NumValsTracked); \ + for (I = 0; I < sizeof(Sequence) / sizeof(*Sequence); I++) { \ + FPT FP = getCalleeFunc(Sequence[I]); \ + FP(); \ + } + + // check site, target patterns + // CHECK: 0, callee_0 + INDIRECT_CALLSITE(CallSeqTwoTarget_1, 1); + + // CHECK-NEXT: 1, callee_0 + INDIRECT_CALLSITE(CallSeqTwoTarget_2, 1); + + // CHECK-NEXT: 2, callee_0 + INDIRECT_CALLSITE(CallSeqTwoTarget_3, 1); + + // CHECK-NEXT: 3, callee_0 + INDIRECT_CALLSITE(CallSeqTwoTarget_4, 1); + + // CHECK-NEXT: 4, callee_0 + INDIRECT_CALLSITE(CallSeqThreeTarget_1, 1); + + // CHECK-NEXT: 5, callee_0 + INDIRECT_CALLSITE(CallSeqThreeTarget_2, 1); + + // CHECK-NEXT: 6, callee_0 + INDIRECT_CALLSITE(CallSeqThreeTarget_3, 1); + + // CHECK-NEXT: 7, callee_0 + INDIRECT_CALLSITE(CallSeqThreeTarget_4, 1); + + // CHECK-NEXT: 8, callee_0 + // CHECK-NEXT: 8, callee_1 + INDIRECT_CALLSITE(CallSeqThreeTarget_1, 2); + + // CHECK-NEXT: 9, callee_0 + // CHECK-NEXT: 9, callee_1 + INDIRECT_CALLSITE(CallSeqThreeTarget_2, 2); + + // CHECK-NEXT: 10, callee_0 + // CHECK-NEXT: 10, callee_1 + INDIRECT_CALLSITE(CallSeqThreeTarget_3, 2); + + // CHECK-NEXT: 11, callee_0 + // CHECK-NEXT: 11, callee_1 + INDIRECT_CALLSITE(CallSeqThreeTarget_4, 2); + + // CHECK-NEXT: 12, callee_0 + INDIRECT_CALLSITE(CallSeqFourTarget_1, 1); + + // CHECK-NEXT: 13, callee_0 + INDIRECT_CALLSITE(CallSeqFourTarget_2, 1); + + // CHECK-NEXT: 14, callee_0 + INDIRECT_CALLSITE(CallSeqFourTarget_3, 1); + + // CHECK-NEXT: 15, callee_0 + // CHECK-NEXT: 15, callee_3 + INDIRECT_CALLSITE(CallSeqFourTarget_1, 2); + + // CHECK-NEXT: 16, callee_0 + // CHECK-NEXT: 16, callee_3 + INDIRECT_CALLSITE(CallSeqFourTarget_2, 2); + + // CHECK-NEXT: 17, callee_0 + // CHECK-NEXT: 17, callee_3 + INDIRECT_CALLSITE(CallSeqFourTarget_3, 2); + + // CHECK-NEXT: 18, callee_0 + // CHECK-NEXT: 18, callee_3 + // CHECK-NEXT: 18, callee_2 + INDIRECT_CALLSITE(CallSeqFourTarget_1, 3); + + // CHECK-NEXT: 19, callee_0 + // CHECK-NEXT: 19, callee_3 + // CHECK-NEXT: 19, callee_2 + INDIRECT_CALLSITE(CallSeqFourTarget_2, 3); + + // CHECK-NEXT: 20, callee_0 + // CHECK-NEXT: 20, callee_3 + // CHECK-NEXT: 20, callee_2 + INDIRECT_CALLSITE(CallSeqFourTarget_3, 3); + + return 0; +} Index: compiler-rt/trunk/test/profile/Inputs/instrprof-value-prof-real.c =================================================================== --- compiler-rt/trunk/test/profile/Inputs/instrprof-value-prof-real.c +++ compiler-rt/trunk/test/profile/Inputs/instrprof-value-prof-real.c @@ -309,7 +309,7 @@ // CHECK-NEXT: [ 0, foo_1_2_2_2_2_2_1_2_2, 749 ] // CHECK-NEXT: [ 0, foo_1_2_2_2_2_2_2_1_1, 748 ] // CHECK-NEXT: [ 0, foo_1_2_2_2_2_2_2_1_2, 747 ] -// CHECK-NEXT: [ 0, foo_1_2_2_2_2_2_2_2_1, 746 ] +// CHECK-NEXT: [ 0, foo // CHECK-NEXT: [ 1, foo_2_2_2_2_2_2_2_2_2, 2000 ] // CHECK-NEXT: [ 1, foo_2_2_2_2_2_2_2_2_1, 1999 ] // CHECK-NEXT: [ 1, foo_2_2_2_2_2_2_2_1_2, 1998 ] @@ -564,7 +564,7 @@ // CHECK-NEXT: [ 1, foo_2_1_1_1_1_1_2_1_1, 1749 ] // CHECK-NEXT: [ 1, foo_2_1_1_1_1_1_1_2_2, 1748 ] // CHECK-NEXT: [ 1, foo_2_1_1_1_1_1_1_2_1, 1747 ] -// CHECK-NEXT: [ 1, foo_2_1_1_1_1_1_1_1_2, 1746 ] +// CHECK-NEXT: [ 1, foo // SHARED-LABEL: shared_entry: // SHARED: [ 0, foo_1_1_1_1_1_1_1_1_1, 1000 ] @@ -821,7 +821,7 @@ // SHARED-NEXT: [ 0, foo_1_2_2_2_2_2_1_2_2, 749 ] // SHARED-NEXT: [ 0, foo_1_2_2_2_2_2_2_1_1, 748 ] // SHARED-NEXT: [ 0, foo_1_2_2_2_2_2_2_1_2, 747 ] -// SHARED-NEXT: [ 0, foo_1_2_2_2_2_2_2_2_1, 746 ] +// SHARED-NEXT: [ 0, foo // SHARED-NEXT: [ 1, foo_2_2_2_2_2_2_2_2_2, 2000 ] // SHARED-NEXT: [ 1, foo_2_2_2_2_2_2_2_2_1, 1999 ] // SHARED-NEXT: [ 1, foo_2_2_2_2_2_2_2_1_2, 1998 ] @@ -1076,4 +1076,4 @@ // SHARED-NEXT: [ 1, foo_2_1_1_1_1_1_2_1_1, 1749 ] // SHARED-NEXT: [ 1, foo_2_1_1_1_1_1_1_2_2, 1748 ] // SHARED-NEXT: [ 1, foo_2_1_1_1_1_1_1_2_1, 1747 ] -// SHARED-NEXT: [ 1, foo_2_1_1_1_1_1_1_1_2, 1746 ] +// SHARED-NEXT: [ 1, foo Index: compiler-rt/trunk/test/profile/instrprof-value-prof-evict.test =================================================================== --- compiler-rt/trunk/test/profile/instrprof-value-prof-evict.test +++ compiler-rt/trunk/test/profile/instrprof-value-prof-evict.test @@ -0,0 +1,10 @@ +// RUN: %clang_profgen -O2 -mllvm -enable-value-profiling=true -o %t %S/Inputs/instrprof-value-prof-evict.c +// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t +// RUN: llvm-profdata merge -o %t.profdata %t.profraw +// RUN: llvm-profdata show --all-functions -ic-targets %t.profdata | FileCheck %S/Inputs/instrprof-value-prof-evict.c + +// IR level instrumentation +// RUN: %clang_profgen -O2 -mllvm -disable-vp=false -Xclang -fprofile-instrument=llvm -o %t.ir %S/Inputs/instrprof-value-prof-evict.c +// RUN: env LLVM_PROFILE_FILE=%t.ir.profraw %run %t.ir +// RUN: llvm-profdata merge -o %t.ir.profdata %t.ir.profraw +// RUN: llvm-profdata show --all-functions -ic-targets %t.ir.profdata | FileCheck %S/Inputs/instrprof-value-prof-evict.c