Index: compiler-rt/trunk/lib/profile/InstrProfiling.c =================================================================== --- compiler-rt/trunk/lib/profile/InstrProfiling.c +++ compiler-rt/trunk/lib/profile/InstrProfiling.c @@ -9,8 +9,19 @@ #include "InstrProfiling.h" #include +#include #include #include +#define INSTR_PROF_VALUE_PROF_DATA +#define INSTR_PROF_COMMON_API_IMPL +#include "InstrProfData.inc" + +#define PROF_OOM(Msg) PROF_ERR(Msg ":%s\n", "Out of memory"); +#define PROF_OOM_RETURN(Msg) \ + { \ + PROF_OOM(Msg) \ + return 0; \ + } LLVM_LIBRARY_VISIBILITY uint64_t __llvm_profile_get_magic(void) { return sizeof(void *) == sizeof(uint64_t) ? (INSTR_PROF_RAW_MAGIC_64) @@ -60,20 +71,29 @@ } } -/* Total number of value profile data in bytes. */ -static uint64_t TotalValueDataSize = 0; - -#ifdef _MIPS_ARCH +/* This method is only used in value profiler mock testing. */ LLVM_LIBRARY_VISIBILITY void -__llvm_profile_instrument_target(uint64_t TargetValue, void *Data_, - uint32_t CounterIndex) {} +__llvm_profile_set_num_value_sites(__llvm_profile_data *Data, + uint32_t ValueKind, uint16_t NumValueSites) { + *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites; +} + +/* This method is only used in value profiler mock testing. */ +LLVM_LIBRARY_VISIBILITY const __llvm_profile_data * +__llvm_profile_iterate_data(const __llvm_profile_data *Data) { + return Data + 1; +} -#else +/* This method is only used in value profiler mock testing. */ +LLVM_LIBRARY_VISIBILITY void * +__llvm_get_function_addr(const __llvm_profile_data *Data) { + return Data->FunctionPointer; +} /* Allocate an array that holds the pointers to the linked lists of * value profile counter nodes. The number of element of the array * is the total number of value profile sites instrumented. Returns - * 0 if allocation fails. + * 0 if allocation fails. */ static int allocateValueProfileCounters(__llvm_profile_data *Data) { @@ -90,16 +110,27 @@ free(Mem); return 0; } - /* In the raw format, there will be an value count array preceding - * the value profile data. The element type of the array is uint8_t, - * and there is one element in array per value site. The element - * stores the number of values profiled for the corresponding site. - */ - uint8_t Padding = __llvm_profile_get_num_padding_bytes(NumVSites); - __sync_fetch_and_add(&TotalValueDataSize, NumVSites + Padding); return 1; } +static void deallocateValueProfileCounters(__llvm_profile_data *Data) { + uint64_t NumVSites = 0, I; + uint32_t VKI; + if (!Data->Values) + return; + for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI) + NumVSites += Data->NumValueSites[VKI]; + for (I = 0; I < NumVSites; I++) { + ValueProfNode *Node = ((ValueProfNode **)Data->Values)[I]; + while (Node) { + ValueProfNode *Next = Node->Next; + free(Node); + Node = Next; + } + } + free(Data->Values); +} + LLVM_LIBRARY_VISIBILITY void __llvm_profile_instrument_target(uint64_t TargetValue, void *Data, uint32_t CounterIndex) { @@ -149,61 +180,87 @@ free(CurrentVNode); return; } - __sync_fetch_and_add(&TotalValueDataSize, Success * sizeof(ValueProfNode)); } -#endif + +/* For multi-threaded programs, while the profile is being dumped, other + threads may still be updating the value profile data and creating new + value entries. To accommadate this, we need to add extra bytes to the + data buffer. The size of the extra space is controlled by an environment + varaible. */ +static unsigned getVprofExtraBytes() { + const char *ExtraStr = getenv("LLVM_VALUE_PROF_BUFFER_EXTRA"); + if (!ExtraStr || !ExtraStr[0]) + return 1024; + return (unsigned)atoi(ExtraStr); +} LLVM_LIBRARY_VISIBILITY uint64_t __llvm_profile_gather_value_data(uint8_t **VDataArray) { + size_t S = 0, RealSize = 0, BufferCapacity = 0, Extra = 0; + __llvm_profile_data *I; + if (!VDataArray) + PROF_OOM_RETURN("Failed to write value profile data "); - if (!VDataArray || 0 == TotalValueDataSize) - return 0; - - uint64_t NumData = TotalValueDataSize; - *VDataArray = (uint8_t *)calloc(NumData, sizeof(uint8_t)); - if (!*VDataArray) - return 0; - - uint8_t *VDataEnd = *VDataArray + NumData; - uint8_t *PerSiteCountsHead = *VDataArray; const __llvm_profile_data *DataEnd = __llvm_profile_end_data(); const __llvm_profile_data *DataBegin = __llvm_profile_begin_data(); - __llvm_profile_data *I; + + /* + * Compute the total Size of the buffer to hold ValueProfData + * structures for functions with value profile data. + */ for (I = (__llvm_profile_data *)DataBegin; I != DataEnd; ++I) { + ValueProfRuntimeRecord R; + /* Extract the value profile data info from the runtime. */ + if (initializeValueProfRuntimeRecord(&R, I->NumValueSites, I->Values)) + PROF_OOM_RETURN("Failed to write value profile data "); + /* Compute the size of ValueProfData from this runtime record. */ + if (getNumValueKindsRT(&R) != 0) + S += getValueProfDataSizeRT(&R); + finalizeValueProfRuntimeRecord(&R); + } + /* No value sites or no value profile data is collected. */ + if (!S) + return 0; - uint64_t NumVSites = 0; - uint32_t VKI, i; + Extra = getVprofExtraBytes(); + BufferCapacity = S + Extra; + *VDataArray = calloc(BufferCapacity, sizeof(uint8_t)); + if (!*VDataArray) + PROF_OOM_RETURN("Failed to write value profile data "); - if (!I->Values) + ValueProfData *VD = (ValueProfData *)(*VDataArray); + /* + * Extract value profile data and write into ValueProfData structure + * one by one. Note that new value profile data added to any value + * site (from another thread) after the ValueProfRuntimeRecord is + * initialized (when the profile data snapshot is taken) won't be + * collected. This is not a problem as those dropped value will have + * very low taken count. + */ + for (I = (__llvm_profile_data *)DataBegin; I != DataEnd; ++I) { + ValueProfRuntimeRecord R; + if (initializeValueProfRuntimeRecord(&R, I->NumValueSites, I->Values)) + PROF_OOM_RETURN("Failed to write value profile data "); + if (getNumValueKindsRT(&R) == 0) continue; - ValueProfNode **ValueCounters = (ValueProfNode **)I->Values; - - for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI) - NumVSites += I->NumValueSites[VKI]; - uint8_t Padding = __llvm_profile_get_num_padding_bytes(NumVSites); - - uint8_t *PerSiteCountPtr = PerSiteCountsHead; - InstrProfValueData *VDataPtr = - (InstrProfValueData *)(PerSiteCountPtr + NumVSites + Padding); - - for (i = 0; i < NumVSites; ++i) { - - ValueProfNode *VNode = ValueCounters[i]; - - uint8_t VDataCount = 0; - while (VNode && ((uint8_t *)(VDataPtr + 1) <= VDataEnd)) { - *VDataPtr = VNode->VData; - VNode = VNode->Next; - ++VDataPtr; - if (++VDataCount == UCHAR_MAX) - break; - } - *PerSiteCountPtr = VDataCount; - ++PerSiteCountPtr; + /* Record R has taken a snapshot of the VP data at this point. Newly + added VP data for this function will be dropped. */ + /* Check if there is enough space. */ + if (BufferCapacity - RealSize < getValueProfDataSizeRT(&R)) { + PROF_ERR("Value profile data is dropped :%s \n", + "Out of buffer space. Use environment " + " LLVM_VALUE_PROF_BUFFER_EXTRA to allocate more"); + I->Values = 0; } - I->Values = (void *)PerSiteCountsHead; - PerSiteCountsHead = (uint8_t *)VDataPtr; + + serializeValueProfDataFromRT(&R, VD); + deallocateValueProfileCounters(I); + I->Values = VD; + finalizeValueProfRuntimeRecord(&R); + RealSize += VD->TotalSize; + VD = (ValueProfData *)((char *)VD + VD->TotalSize); } - return PerSiteCountsHead - *VDataArray; + + return RealSize; } Index: compiler-rt/trunk/test/profile/instrprof-value-prof.c =================================================================== --- compiler-rt/trunk/test/profile/instrprof-value-prof.c +++ compiler-rt/trunk/test/profile/instrprof-value-prof.c @@ -0,0 +1,183 @@ +// RUN: %clang_profgen -O2 -o %t %s +// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t 1 +// RUN: env LLVM_PROFILE_FILE=%t-2.profraw %run %t +// RUN: llvm-profdata merge -o %t.profdata %t.profraw +// RUN: llvm-profdata merge -o %t-2.profdata %t-2.profraw +// RUN: llvm-profdata merge -o %t-merged.profdata %t.profraw %t-2.profdata +// RUN: llvm-profdata show --all-functions -ic-targets %t-2.profdata | FileCheck %s -check-prefix=NO-VALUE +// RUN: llvm-profdata show --all-functions -ic-targets %t.profdata | FileCheck %s +// value profile merging current do sorting based on target values -- this will destroy the order of the target +// in the list leading to comparison problem. For now just check a small subset of output. +// RUN: llvm-profdata show --all-functions -ic-targets %t-merged.profdata | FileCheck %s -check-prefix=MERGE + +#include +#include +#include +typedef struct __llvm_profile_data __llvm_profile_data; +const __llvm_profile_data *__llvm_profile_begin_data(void); +const __llvm_profile_data *__llvm_profile_end_data(void); +void __llvm_profile_set_num_value_sites(__llvm_profile_data *Data, + uint32_t ValueKind, + uint16_t NumValueSites); +__llvm_profile_data * +__llvm_profile_iterate_data(const __llvm_profile_data *Data); +void *__llvm_get_function_addr(const __llvm_profile_data *Data); +void __llvm_profile_instrument_target(uint64_t TargetValue, void *Data, + uint32_t CounterIndex); + +#define DEF_FUNC(x) \ + void x() {} +#define DEF_2_FUNCS(x) DEF_FUNC(x##_1) DEF_FUNC(x##_2) +#define DEF_4_FUNCS(x) DEF_2_FUNCS(x##_1) DEF_2_FUNCS(x##_2) +#define DEF_8_FUNCS(x) DEF_4_FUNCS(x##_1) DEF_4_FUNCS(x##_2) +#define DEF_16_FUNCS(x) DEF_8_FUNCS(x##_1) DEF_8_FUNCS(x##_2) +#define DEF_32_FUNCS(x) DEF_16_FUNCS(x##_1) DEF_16_FUNCS(x##_2) +#define DEF_64_FUNCS(x) DEF_32_FUNCS(x##_1) DEF_32_FUNCS(x##_2) +#define DEF_128_FUNCS(x) DEF_64_FUNCS(x##_1) DEF_64_FUNCS(x##_2) + +#define FUNC_ADDR(x) &x, +#define FUNC_2_ADDRS(x) FUNC_ADDR(x##_1) FUNC_ADDR(x##_2) +#define FUNC_4_ADDRS(x) FUNC_2_ADDRS(x##_1) FUNC_2_ADDRS(x##_2) +#define FUNC_8_ADDRS(x) FUNC_4_ADDRS(x##_1) FUNC_4_ADDRS(x##_2) +#define FUNC_16_ADDRS(x) FUNC_8_ADDRS(x##_1) FUNC_8_ADDRS(x##_2) +#define FUNC_32_ADDRS(x) FUNC_16_ADDRS(x##_1) FUNC_16_ADDRS(x##_2) +#define FUNC_64_ADDRS(x) FUNC_32_ADDRS(x##_1) FUNC_32_ADDRS(x##_2) +#define FUNC_128_ADDRS(x) FUNC_64_ADDRS(x##_1) FUNC_64_ADDRS(x##_2) + +DEF_8_FUNCS(callee) +DEF_128_FUNCS(caller) + +void *CallerAddrs[] = {FUNC_128_ADDRS(caller)}; + +void *CalleeAddrs[] = {FUNC_8_ADDRS(callee)}; + +static int cmpaddr(const void *p1, const void *p2) { + void *addr1 = *(void **)p1; + void *addr2 = *(void **)p2; + return (intptr_t)addr2 - (intptr_t)addr1; +} + +int main(int argc, const char *argv[]) { + unsigned S, NS = 0, V, doInstrument = 1; + const __llvm_profile_data *Data, *DataEnd; + + if (argc < 2) + doInstrument = 0; + + qsort(CallerAddrs, sizeof(CallerAddrs) / sizeof(void *), sizeof(void *), + cmpaddr); + + /* We will synthesis value profile data for 128 callers functions. + * The number of * value sites. The number values for each value site + * ranges from 0 to 8. */ + + Data = __llvm_profile_begin_data(); + DataEnd = __llvm_profile_end_data(); + + for (; Data < DataEnd; Data = __llvm_profile_iterate_data(Data)) { + void *func = __llvm_get_function_addr(Data); + if (bsearch(&func, CallerAddrs, sizeof(CallerAddrs) / sizeof(void *), + sizeof(void *), cmpaddr)) { + __llvm_profile_set_num_value_sites((__llvm_profile_data *)Data, + 0 /*IPVK_IndirectCallTarget */, NS); + if (!doInstrument) { + NS++; + continue; + } + for (S = 0; S < NS; S++) { + for (V = 0; V < S % 8; V++) { + unsigned C; + for (C = 0; C < V + 1; C++) + __llvm_profile_instrument_target((uint64_t)CalleeAddrs[V], + (void *)Data, S); + } + } + NS++; + } + } +} + +// NO-VALUE: Indirect Call Site Count: 127 +// NO-VALUE-NEXT: Indirect Target Results: +// MERGE: Indirect Call Site Count: 127 +// MERGE-NEXT: Indirect Target Results: +// MERGE-NEXT: [ 1, callee_1_1_1, 1 ] +// CHECK: Indirect Call Site Count: 127 +// CHECK-NEXT: Indirect Target Results: +// CHECK-NEXT: [ 1, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 2, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 2, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 3, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 3, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 3, callee_1_2_1, 3 ] +// CHECK-NEXT: [ 4, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 4, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 4, callee_1_2_1, 3 ] +// CHECK-NEXT: [ 4, callee_1_2_2, 4 ] +// CHECK-NEXT: [ 5, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 5, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 5, callee_1_2_1, 3 ] +// CHECK-NEXT: [ 5, callee_1_2_2, 4 ] +// CHECK-NEXT: [ 5, callee_2_1_1, 5 ] +// CHECK-NEXT: [ 6, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 6, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 6, callee_1_2_1, 3 ] +// CHECK-NEXT: [ 6, callee_1_2_2, 4 ] +// CHECK-NEXT: [ 6, callee_2_1_1, 5 ] +// CHECK-NEXT: [ 6, callee_2_1_2, 6 ] +// CHECK-NEXT: [ 7, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 7, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 7, callee_1_2_1, 3 ] +// CHECK-NEXT: [ 7, callee_1_2_2, 4 ] +// CHECK-NEXT: [ 7, callee_2_1_1, 5 ] +// CHECK-NEXT: [ 7, callee_2_1_2, 6 ] +// CHECK-NEXT: [ 7, callee_2_2_1, 7 ] +// CHECK-NEXT: [ 9, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 10, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 10, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 11, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 11, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 11, callee_1_2_1, 3 ] +// CHECK-NEXT: [ 12, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 12, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 12, callee_1_2_1, 3 ] +// CHECK-NEXT: [ 12, callee_1_2_2, 4 ] +// CHECK-NEXT: [ 13, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 13, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 13, callee_1_2_1, 3 ] +// CHECK-NEXT: [ 13, callee_1_2_2, 4 ] +// CHECK-NEXT: [ 13, callee_2_1_1, 5 ] +// CHECK-NEXT: [ 14, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 14, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 14, callee_1_2_1, 3 ] +// CHECK-NEXT: [ 14, callee_1_2_2, 4 ] +// CHECK-NEXT: [ 14, callee_2_1_1, 5 ] +// CHECK-NEXT: [ 14, callee_2_1_2, 6 ] +// CHECK-NEXT: [ 15, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 15, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 15, callee_1_2_1, 3 ] +// CHECK-NEXT: [ 15, callee_1_2_2, 4 ] +// CHECK-NEXT: [ 15, callee_2_1_1, 5 ] +// CHECK-NEXT: [ 15, callee_2_1_2, 6 ] +// CHECK-NEXT: [ 15, callee_2_2_1, 7 ] +// CHECK-NEXT: [ 17, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 18, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 18, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 19, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 19, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 19, callee_1_2_1, 3 ] +// CHECK-NEXT: [ 20, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 20, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 20, callee_1_2_1, 3 ] +// CHECK-NEXT: [ 20, callee_1_2_2, 4 ] +// CHECK-NEXT: [ 21, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 21, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 21, callee_1_2_1, 3 ] +// CHECK-NEXT: [ 21, callee_1_2_2, 4 ] +// CHECK-NEXT: [ 21, callee_2_1_1, 5 ] +// CHECK-NEXT: [ 22, callee_1_1_1, 1 ] +// CHECK-NEXT: [ 22, callee_1_1_2, 2 ] +// CHECK-NEXT: [ 22, callee_1_2_1, 3 ] +// CHECK-NEXT: [ 22, callee_1_2_2, 4 ] +// CHECK-NEXT: [ 22, callee_2_1_1, 5 ] +