Index: llvm/trunk/include/llvm/ProfileData/InstrProf.h =================================================================== --- llvm/trunk/include/llvm/ProfileData/InstrProf.h +++ llvm/trunk/include/llvm/ProfileData/InstrProf.h @@ -58,6 +58,20 @@ : INSTR_PROF_DATA_SECT_NAME_STR; } +/// Return the name of data section containing pointers to value profile +/// counters/nodes. +inline StringRef getInstrProfValuesSectionName(bool AddSegment) { + return AddSegment ? "__DATA," INSTR_PROF_VALS_SECT_NAME_STR + : INSTR_PROF_VALS_SECT_NAME_STR; +} + +/// Return the name of data section containing nodes holdling value +/// profiling data. +inline StringRef getInstrProfVNodesSectionName(bool AddSegment) { + return AddSegment ? "__DATA," INSTR_PROF_VNODES_SECT_NAME_STR + : INSTR_PROF_VNODES_SECT_NAME_STR; +} + /// Return the name profile runtime entry point to do value profiling /// for a given site. inline StringRef getInstrProfValueProfFuncName() { @@ -80,6 +94,12 @@ /// Return the name prefix of profile counter variables. inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; } +/// Return the name prefix of value profile variables. +inline StringRef getInstrProfValuesVarPrefix() { return "__profvp_"; } + +/// Return the name of value profile node array variables: +inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; } + /// Return the name prefix of the COMDAT group for instrumentation variables /// associated with a COMDAT function. inline StringRef getInstrProfComdatPrefix() { return "__profv_"; } Index: llvm/trunk/include/llvm/ProfileData/InstrProfData.inc =================================================================== --- llvm/trunk/include/llvm/ProfileData/InstrProfData.inc +++ llvm/trunk/include/llvm/ProfileData/InstrProfData.inc @@ -80,7 +80,7 @@ INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), FunctionPointer, \ FunctionAddr) INSTR_PROF_DATA(IntPtrT, llvm::Type::getInt8PtrTy(Ctx), Values, \ - ConstantPointerNull::get(Int8PtrTy)) + ValuesPtrExpr) INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumCounters, \ ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumCounters)) INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \ @@ -597,6 +597,12 @@ #define INSTR_PROF_DATA_SECT_NAME __llvm_prf_data #define INSTR_PROF_NAME_SECT_NAME __llvm_prf_names #define INSTR_PROF_CNTS_SECT_NAME __llvm_prf_cnts +/* Array of pointers. Each pointer points to a list + * of value nodes associated with one value site. + */ +#define INSTR_PROF_VALS_SECT_NAME __llvm_prf_values +/* Value profile nodes section. */ +#define INSTR_PROF_VNODES_SECT_NAME __llvm_prf_vnodes #define INSTR_PROF_COVMAP_SECT_NAME __llvm_covmap #define INSTR_PROF_DATA_SECT_NAME_STR \ @@ -607,6 +613,10 @@ INSTR_PROF_QUOTE(INSTR_PROF_CNTS_SECT_NAME) #define INSTR_PROF_COVMAP_SECT_NAME_STR \ INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_SECT_NAME) +#define INSTR_PROF_VALS_SECT_NAME_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_VALS_SECT_NAME) +#define INSTR_PROF_VNODES_SECT_NAME_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_VNODES_SECT_NAME) /* Macros to define start/stop section symbol for a given * section on Linux. For instance Index: llvm/trunk/include/llvm/Transforms/InstrProfiling.h =================================================================== --- llvm/trunk/include/llvm/Transforms/InstrProfiling.h +++ llvm/trunk/include/llvm/Transforms/InstrProfiling.h @@ -84,6 +84,9 @@ /// Emit the section with compressed function names. void emitNameData(); + /// Emit value nodes section for value profiling. + void emitVNodes(); + /// Emit runtime registration functions for each profile data variable. void emitRegistration(); Index: llvm/trunk/lib/Transforms/Instrumentation/InstrProfiling.cpp =================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ llvm/trunk/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -31,6 +31,20 @@ cl::desc("Enable name string compression"), cl::init(true)); +cl::opt ValueProfileStaticAlloc( + "vp-static-alloc", + cl::desc("Do static counter allocation for value profiler"), + cl::init(true)); +cl::opt NumCountersPerValueSite( + "vp-counters-per-site", + cl::desc("The average number of profile counters allocated " + "per value profiling site."), + // This is set to a very small value because in real programs, only + // a very small percentage of value sites have non-zero targets, e.g, 1/30. + // For those sites with non-zero profile, the average number of targets + // is usually smaller than 2. + cl::init(1.0)); + class InstrProfilingLegacyPass : public ModulePass { InstrProfiling InstrProf; @@ -141,6 +155,7 @@ if (!MadeChange) return false; + emitVNodes(); emitNameData(); emitRegistration(); emitRuntimeHook(); @@ -288,6 +303,20 @@ return M.getOrInsertComdat(StringRef(getVarName(Inc, ComdatPrefix))); } +static bool needsRuntimeRegistrationOfSectionRange(const Module &M) { + // Don't do this for Darwin. compiler-rt uses linker magic. + if (Triple(M.getTargetTriple()).isOSDarwin()) + return false; + + // Use linker script magic to get data/cnts/name start/end. + if (Triple(M.getTargetTriple()).isOSLinux() || + Triple(M.getTargetTriple()).isOSFreeBSD() || + Triple(M.getTargetTriple()).isPS4CPU()) + return false; + + return true; +} + GlobalVariable * InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { GlobalVariable *NamePtr = Inc->getName(); @@ -321,10 +350,34 @@ CounterPtr->setAlignment(8); CounterPtr->setComdat(ProfileVarsComdat); - // Create data variable. auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); + // Allocate statically the array of pointers to value profile nodes for + // the current function. + Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy); + if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(*M)) { + + uint64_t NS = 0; + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + NS += PD.NumValueSites[Kind]; + if (NS) { + ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS); + + auto *ValuesVar = + new GlobalVariable(*M, ValuesTy, false, NamePtr->getLinkage(), + Constant::getNullValue(ValuesTy), + getVarName(Inc, getInstrProfValuesVarPrefix())); + ValuesVar->setVisibility(NamePtr->getVisibility()); + ValuesVar->setSection(getInstrProfValuesSectionName(isMachO())); + ValuesVar->setAlignment(8); + ValuesVar->setComdat(ProfileVarsComdat); + ValuesPtrExpr = + ConstantExpr::getBitCast(ValuesVar, llvm::Type::getInt8PtrTy(Ctx)); + } + } + + // Create data variable. auto *Int16Ty = Type::getInt16Ty(Ctx); - auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last+1); + auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1); Type *DataTypes[] = { #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType, #include "llvm/ProfileData/InstrProfData.inc" @@ -367,18 +420,49 @@ return CounterPtr; } -static bool needsRuntimeRegistrationOfSectionRange(const Module &M) { - // Don't do this for Darwin. compiler-rt uses linker magic. - if (Triple(M.getTargetTriple()).isOSDarwin()) - return false; +void InstrProfiling::emitVNodes() { + if (!ValueProfileStaticAlloc) + return; - // Use linker script magic to get data/cnts/name start/end. - if (Triple(M.getTargetTriple()).isOSLinux() || - Triple(M.getTargetTriple()).isOSFreeBSD() || - Triple(M.getTargetTriple()).isPS4CPU()) - return false; + // For now only support this on platforms that do + // not require runtime registration to discover + // named section start/end. + if (needsRuntimeRegistrationOfSectionRange(*M)) + return; - return true; + size_t TotalNS = 0; + for (auto &PD : ProfileDataMap) { + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + TotalNS += PD.second.NumValueSites[Kind]; + } + + if (!TotalNS) + return; + + uint64_t NumCounters = TotalNS * NumCountersPerValueSite; + // Heuristic for small programs with very few total value sites. + // The default value of vp-counters-per-site is chosen based on + // the observation that large apps usually have a low percentage + // of value sites that actually have any profile data, and thus + // the average number of counters per site is low. For small + // apps with very few sites, this may not be true. Bump up the + // number of counters in this case. + if (NumCounters < 10) + NumCounters *= 2; + + auto &Ctx = M->getContext(); + Type *VNodeTypes[] = { +#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType, +#include "llvm/ProfileData/InstrProfData.inc" + }; + auto *VNodeTy = StructType::get(Ctx, makeArrayRef(VNodeTypes)); + + ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters); + auto *VNodesVar = new GlobalVariable( + *M, VNodesTy, false, llvm::GlobalValue::PrivateLinkage, + Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName()); + VNodesVar->setSection(getInstrProfVNodesSectionName(isMachO())); + UsedVars.push_back(VNodesVar); } void InstrProfiling::emitNameData() { Index: llvm/trunk/test/Instrumentation/InstrProfiling/icall.ll =================================================================== --- llvm/trunk/test/Instrumentation/InstrProfiling/icall.ll +++ llvm/trunk/test/Instrumentation/InstrProfiling/icall.ll @@ -0,0 +1,31 @@ +;; Check that static counters are allocated for value profiler + +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -vp-static-alloc=true -instrprof -S | FileCheck %s --check-prefix=STATIC +; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -vp-static-alloc=true -S | FileCheck %s --check-prefix=STATIC +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -vp-static-alloc=false -instrprof -S | FileCheck %s --check-prefix=DYN +; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -vp-static-alloc=false -S | FileCheck %s --check-prefix=DYN + + +@__profn_foo = private constant [3 x i8] c"foo" + +define i32 @foo(i32 ()* ) { + call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12884901887, i32 1, i32 0) + %2 = ptrtoint i32 ()* %0 to i64 + call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12884901887, i64 %2, i32 0, i32 0) + %3 = tail call i32 %0() + ret i32 %3 +} + +; Function Attrs: nounwind +declare void @llvm.instrprof.increment(i8*, i64, i32, i32) #0 + +; Function Attrs: nounwind +declare void @llvm.instrprof.value.profile(i8*, i64, i64, i32, i32) #0 + +attributes #0 = { nounwind } + +; STATIC: @__profvp_foo +; STATIC: @__llvm_prf_vnodes + +; DYN-NOT: @__profvp_foo +; DYN-NOT: @__llvm_prf_vnodes