Index: lib/profile/InstrProfiling.h =================================================================== --- lib/profile/InstrProfiling.h +++ lib/profile/InstrProfiling.h @@ -93,6 +93,12 @@ int __llvm_profile_check_compatibility(const char *Profile, uint64_t Size); +/*! \brief Returns the profile file size without value profiles. + * + * Returns the profile file size without value profiles. + */ +uint64_t __llvm_profile_size_without_vp(const char *ProfileData); + /*! * \brief Counts the number of times a target value is seen. * @@ -106,6 +112,10 @@ #include "InstrProfData.inc" ); +void __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data, + uint32_t CounterIndex, + uint64_t CounterValue); + /*! * \brief Write instrumentation data to the current file. * Index: lib/profile/InstrProfilingFile.c =================================================================== --- lib/profile/InstrProfilingFile.c +++ lib/profile/InstrProfilingFile.c @@ -183,8 +183,13 @@ /* Now start merging */ __llvm_profile_merge_from_buffer(ProfileBuffer, ProfileFileSize); - (void)munmap(ProfileBuffer, ProfileFileSize); + // Truncate the file in case merging of value profile did not happend to + // prevent from leaving garbage data at the end of the profile file. + uint64_t truncateToSize = __llvm_profile_size_without_vp(ProfileBuffer); + ftruncate(fileno(ProfileFile), truncateToSize); + + (void)munmap(ProfileBuffer, ProfileFileSize); *MergeDone = 1; return 0; @@ -234,6 +239,7 @@ FILE *OutputFile; int MergeDone = 0; + VPMergeHook = &lprofMergeValueProfData; if (!doMerging()) OutputFile = fopen(OutputName, "ab"); else Index: lib/profile/InstrProfilingMerge.c =================================================================== --- lib/profile/InstrProfilingMerge.c +++ lib/profile/InstrProfilingMerge.c @@ -17,8 +17,9 @@ #define INSTR_PROF_VALUE_PROF_DATA #include "InstrProfData.inc" -COMPILER_RT_WEAK void (*VPMergeHook)(ValueProfData *, - __llvm_profile_data *) = NULL; +COMPILER_RT_VISIBILITY +void (*VPMergeHook)(ValueProfData *, __llvm_profile_data *); + COMPILER_RT_VISIBILITY uint64_t lprofGetLoadModuleSignature() { /* A very fast way to compute a module signature. */ @@ -36,6 +37,16 @@ (NumVnodes << 10) + (DataSize > 0 ? FirstD->NameRef : 0); } +/* Returns the profile file size without value profiles. */ +COMPILER_RT_VISIBILITY +uint64_t __llvm_profile_size_without_vp(const char *ProfileData) { + __llvm_profile_header *Header = (__llvm_profile_header *)ProfileData; + return (sizeof(__llvm_profile_header) + + sizeof(__llvm_profile_data) * Header->DataSize + + sizeof(uint64_t) * Header->CountersSize + Header->NamesSize + + __llvm_profile_get_num_padding_bytes(Header->NamesSize)); +} + /* Returns 1 if profile is not structurally compatible. */ COMPILER_RT_VISIBILITY int __llvm_profile_check_compatibility(const char *ProfileData, Index: lib/profile/InstrProfilingMergeFile.c =================================================================== --- lib/profile/InstrProfilingMergeFile.c +++ lib/profile/InstrProfilingMergeFile.c @@ -17,24 +17,24 @@ #define INSTR_PROF_VALUE_PROF_DATA #include "InstrProfData.inc" -void (*VPMergeHook)(ValueProfData *, - __llvm_profile_data *) = &lprofMergeValueProfData; - /* Merge value profile data pointed to by SrcValueProfData into * in-memory profile counters pointed by to DstData. */ void lprofMergeValueProfData(ValueProfData *SrcValueProfData, __llvm_profile_data *DstData) { - unsigned I, S, V, C; + unsigned I, S, V, DstIndex = 0; InstrProfValueData *VData; ValueProfRecord *VR = getFirstValueProfRecord(SrcValueProfData); for (I = 0; I < SrcValueProfData->NumValueKinds; I++) { VData = getValueProfRecordValueData(VR); + unsigned SrcIndex = 0; for (S = 0; S < VR->NumValueSites; S++) { uint8_t NV = VR->SiteCountArray[S]; for (V = 0; V < NV; V++) { - for (C = 0; C < VData[V].Count; C++) - __llvm_profile_instrument_target(VData[V].Value, DstData, S); + __llvm_profile_instrument_target_value(VData[SrcIndex].Value, DstData, + DstIndex, VData[SrcIndex].Count); + ++SrcIndex; } + ++DstIndex; } VR = getValueProfRecordNext(VR); } Index: lib/profile/InstrProfilingValue.c =================================================================== --- lib/profile/InstrProfilingValue.c +++ lib/profile/InstrProfilingValue.c @@ -132,13 +132,14 @@ return Node; } -COMPILER_RT_VISIBILITY void -__llvm_profile_instrument_target(uint64_t TargetValue, void *Data, - uint32_t CounterIndex) { +static inline __attribute__((always_inline)) void +instrumentTargetValueImpl(uint64_t TargetValue, void *Data, + uint32_t CounterIndex, uint64_t CountValue) { __llvm_profile_data *PData = (__llvm_profile_data *)Data; if (!PData) return; - + if (!CountValue) + return; if (!PData->Values) { if (!allocateValueProfileCounters(PData)) return; @@ -153,7 +154,7 @@ uint8_t VDataCount = 0; while (CurVNode) { if (TargetValue == CurVNode->Value) { - CurVNode->Count++; + CurVNode->Count = CountValue; return; } if (CurVNode->Count < MinCount) { @@ -194,11 +195,13 @@ * the runtime can wipe out more than one lowest count entries * to give space for hot targets. */ - if (!MinCountVNode->Count || !(--MinCountVNode->Count)) { + if (MinCountVNode->Count <= CountValue) { CurVNode = MinCountVNode; CurVNode->Value = TargetValue; - CurVNode->Count++; - } + CurVNode->Count = CountValue; + } else + MinCountVNode->Count -= CountValue; + return; } @@ -206,7 +209,7 @@ if (!CurVNode) return; CurVNode->Value = TargetValue; - CurVNode->Count++; + CurVNode->Count += CountValue; uint32_t Success = 0; if (!ValueCounters[CounterIndex]) @@ -221,6 +224,19 @@ } } +COMPILER_RT_VISIBILITY void +__llvm_profile_instrument_target(uint64_t TargetValue, void *Data, + uint32_t CounterIndex) { + instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1); + +} +COMPILER_RT_VISIBILITY void +__llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data, + uint32_t CounterIndex, + uint64_t CountValue) { + instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue); +} + /* * The target values are partitioned into multiple regions/ranges. There is one * contiguous region which is precise -- every value in the range is tracked Index: test/profile/instrprof-value-merge.c =================================================================== --- /dev/null +++ test/profile/instrprof-value-merge.c @@ -0,0 +1,79 @@ +// RUN: %clang_pgogen -o %t -O3 %s +// RUN: rm -rf %t.profdir +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 +// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1 +// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %s + +#include + +void (*f0)(); +void (*f1)(); +void (*f2)(); + +char dst[200]; +char src[200]; +volatile int n; + +__attribute__((noinline)) void foo() {} + +__attribute__((noinline)) void bar() { + f0 = foo; + f1 = foo; + f2 = foo; + n = 4; +} +int main(int argc, char *argv[]) { + int i; + bar(); + if (argc == 1) { + f0(); + for (i = 0; i < 9; i++) + f1(); + for (i = 0; i < 99; i++) + f2(); + } else { + memcpy((void *)dst, (void *)src, n); + for (i = 0; i < 6; i++) + memcpy((void *)(dst + 2), (void *)src, n + 1); + for (i = 0; i < 66; i++) + memcpy((void *)(dst + 9), (void *)src, n + 2); + } +} + +// CHECK: Counters: +// CHECK: main: +// CHECK: Hash: 0x00030012a7ab6e87 +// CHECK: Counters: 6 +// CHECK: Indirect Call Site Count: 3 +// CHECK: Number of Memory Intrinsics Calls: 3 +// CHECK: Block counts: [27, 297, 12, 132, 3, 2] +// CHECK: Indirect Target Results: +// CHECK: [ 0, foo, 3 ] +// CHECK: [ 1, foo, 27 ] +// CHECK: [ 2, foo, 297 ] +// CHECK: Memory Intrinsic Size Results: +// CHECK: [ 0, 4, 2 ] +// CHECK: [ 1, 5, 12 ] +// CHECK: [ 2, 6, 132 ] +// CHECK: Instrumentation level: IR +// CHECK: Functions shown: 1 +// CHECK: Total functions: 3 +// CHECK: Maximum function count: 327 +// CHECK: Maximum internal block count: 297 +// CHECK: Statistics for indirect call sites profile: +// CHECK: Total number of sites: 3 +// CHECK: Total number of sites with values: 3 +// CHECK: Total number of profiled values: 3 +// CHECK: Value sites histogram: +// CHECK: NumTargets, SiteCount +// CHECK: 1, 3 +// CHECK: Statistics for memory intrinsic calls sizes profile: +// CHECK: Total number of sites: 3 +// CHECK: Total number of sites with values: 3 +// CHECK: Total number of profiled values: 3 +// CHECK: Value sites histogram: +// CHECK: NumTargets, SiteCount +// CHECK: 1, 3