Index: docs/CommandGuide/llvm-profdata.rst =================================================================== --- docs/CommandGuide/llvm-profdata.rst +++ docs/CommandGuide/llvm-profdata.rst @@ -196,6 +196,10 @@ Specify that the input profile is a sample-based profile. +.. option:: -memop-sizes + + Show the profiled sizes of the memory intrinsic calls for shown functions. + EXIT STATUS ----------- Index: include/llvm/ProfileData/InstrProf.h =================================================================== --- include/llvm/ProfileData/InstrProf.h +++ include/llvm/ProfileData/InstrProf.h @@ -79,6 +79,11 @@ return INSTR_PROF_VALUE_PROF_FUNC_STR; } +/// Return the name profile runtime entry point to do value range profiling. +inline StringRef getInstrProfValueRangeProfFuncName() { + return INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR; +} + /// Return the name of the section containing function coverage mapping /// data. inline StringRef getInstrProfCoverageSectionName(bool AddSegment) { @@ -646,11 +651,14 @@ private: std::vector IndirectCallSites; + std::vector MemOPSizes; const std::vector & getValueSitesForKind(uint32_t ValueKind) const { switch (ValueKind) { case IPVK_IndirectCallTarget: return IndirectCallSites; + case IPVK_MemOPSize: + return MemOPSizes; default: llvm_unreachable("Unknown value kind!"); } Index: include/llvm/ProfileData/InstrProfData.inc =================================================================== --- include/llvm/ProfileData/InstrProfData.inc +++ include/llvm/ProfileData/InstrProfData.inc @@ -153,7 +153,17 @@ VALUE_PROF_FUNC_PARAM(uint64_t, TargetValue, Type::getInt64Ty(Ctx)) \ INSTR_PROF_COMMA VALUE_PROF_FUNC_PARAM(void *, Data, Type::getInt8PtrTy(Ctx)) INSTR_PROF_COMMA +#ifndef VALUE_RANGE_PROF VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) +#else /* VALUE_RANGE_PROF */ +VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) \ + INSTR_PROF_COMMA +VALUE_PROF_FUNC_PARAM(uint64_t, PreciseRangeStart, Type::getInt64Ty(Ctx)) \ + INSTR_PROF_COMMA +VALUE_PROF_FUNC_PARAM(uint64_t, PreciseRangeLast, Type::getInt64Ty(Ctx)) \ + INSTR_PROF_COMMA +VALUE_PROF_FUNC_PARAM(uint64_t, LargeValue, Type::getInt64Ty(Ctx)) +#endif /*VALUE_RANGE_PROF */ #undef VALUE_PROF_FUNC_PARAM #undef INSTR_PROF_COMMA /* VALUE_PROF_FUNC_PARAM end */ @@ -174,13 +184,15 @@ * name hash and the function address. */ VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0) +/* For memory intrinsic functions size profiling. */ +VALUE_PROF_KIND(IPVK_MemOPSize, 1) /* These two kinds must be the last to be * declared. This is to make sure the string * array created with the template can be * indexed with the kind value. */ VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget) -VALUE_PROF_KIND(IPVK_Last, IPVK_IndirectCallTarget) +VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize) #undef VALUE_PROF_KIND /* VALUE_PROF_KIND end */ @@ -649,6 +661,9 @@ #define INSTR_PROF_VALUE_PROF_FUNC __llvm_profile_instrument_target #define INSTR_PROF_VALUE_PROF_FUNC_STR \ INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_FUNC) +#define INSTR_PROF_VALUE_RANGE_PROF_FUNC __llvm_profile_instrument_range +#define INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_VALUE_RANGE_PROF_FUNC) /* InstrProfile per-function control data alignment. */ #define INSTR_PROF_DATA_ALIGNMENT 8 Index: include/llvm/Transforms/InstrProfiling.h =================================================================== --- include/llvm/Transforms/InstrProfiling.h +++ include/llvm/Transforms/InstrProfiling.h @@ -59,6 +59,14 @@ GlobalVariable *NamesVar; size_t NamesSize; + // The start value of precise value profile range for memory intrinsic sizes. + const int64_t DefaultMemOPSizeRangeStart = 0; + int64_t MemOPSizeRangeStart; + // The end value of precise value profile range for memory intrinsic sizes. + const int64_t DefaultMemOPSizeRangeLast = 8; + int64_t MemOPSizeRangeLast; + int64_t MemOPSizeLargeVal; + bool isMachO() const; /// Get the section name for the counter variables. @@ -109,6 +117,9 @@ /// Create a static initializer for our data, on platforms that need it, /// and for any profile output file that was specified. void emitInitialization(); + + /// Helper funtion that parsing the MemOPSize value profile options + void getMemOPSizeOptions(); }; } // end namespace llvm Index: lib/ProfileData/InstrProfReader.cpp =================================================================== --- lib/ProfileData/InstrProfReader.cpp +++ lib/ProfileData/InstrProfReader.cpp @@ -244,6 +244,7 @@ Record.Counts.push_back(Count); } + Record.clearValueData(); // Check if value profile data exists and read it if so. if (Error E = readValueProfileData(Record)) return E; Index: lib/Transforms/Instrumentation/InstrProfiling.cpp =================================================================== --- lib/Transforms/Instrumentation/InstrProfiling.cpp +++ lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -77,6 +77,17 @@ // is usually smaller than 2. cl::init(1.0)); +cl::opt MemOPSizeRange( + "memop-size-range", + cl::desc("Set the range of size in memory intrinsic calls to be profiled " + "precisely, in a format of :"), + cl::init("")); +cl::opt MemOPSizeLarge( + "memop-size-large", + cl::desc("Set large value thresthold in memory intrinsic size profiling. " + "Value of 0 disables the large value profiling."), + cl::init(8192)); + class InstrProfilingLegacyPass : public ModulePass { InstrProfiling InstrProf; @@ -165,6 +176,7 @@ NamesSize = 0; ProfileDataMap.clear(); UsedVars.clear(); + getMemOPSizeOptions(); // We did not know how many value sites there would be inside // the instrumented function. This is counting the number of instrumented @@ -217,17 +229,34 @@ } static Constant *getOrInsertValueProfilingCall(Module &M, - const TargetLibraryInfo &TLI) { + const TargetLibraryInfo &TLI, + bool IsRange = false) { LLVMContext &Ctx = M.getContext(); auto *ReturnTy = Type::getVoidTy(M.getContext()); - Type *ParamTypes[] = { + + Constant *Res; + if (!IsRange) { + Type *ParamTypes[] = { #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType #include "llvm/ProfileData/InstrProfData.inc" - }; - auto *ValueProfilingCallTy = - FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false); - Constant *Res = M.getOrInsertFunction(getInstrProfValueProfFuncName(), - ValueProfilingCallTy); + }; + auto *ValueProfilingCallTy = + FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false); + Res = M.getOrInsertFunction(getInstrProfValueProfFuncName(), + ValueProfilingCallTy); + } else { + Type *RangeParamTypes[] = { +#define VALUE_RANGE_PROF 1 +#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType +#include "llvm/ProfileData/InstrProfData.inc" +#undef VALUE_RANGE_PROF + }; + auto *ValueRangeProfilingCallTy = + FunctionType::get(ReturnTy, makeArrayRef(RangeParamTypes), false); + Res = M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(), + ValueRangeProfilingCallTy); + } + if (Function *FunRes = dyn_cast(Res)) { if (auto AK = TLI.getExtAttrForI32Param(false)) FunRes->addAttribute(3, AK); @@ -261,11 +290,24 @@ Index += It->second.NumValueSites[Kind]; IRBuilder<> Builder(Ind); - Value *Args[3] = {Ind->getTargetValue(), - Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), - Builder.getInt32(Index)}; - CallInst *Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), - Args); + bool IsRange = (Ind->getValueKind()->getZExtValue() == + llvm::InstrProfValueKind::IPVK_MemOPSize); + CallInst *Call = nullptr; + if (!IsRange) { + Value *Args[3] = {Ind->getTargetValue(), + Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), + Builder.getInt32(Index)}; + Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args); + } else { + Value *Args[6] = {Ind->getTargetValue(), + Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), + Builder.getInt32(Index), + Builder.getInt64(MemOPSizeRangeStart), + Builder.getInt64(MemOPSizeRangeLast), + Builder.getInt64(MemOPSizeLargeVal)}; + Call = + Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args); + } if (auto AK = TLI->getExtAttrForI32Param(false)) Call->addAttribute(3, AK); Ind->replaceAllUsesWith(Call); @@ -658,3 +700,24 @@ appendToGlobalCtors(*M, F, 0); } + +void InstrProfiling::getMemOPSizeOptions() { + // Parse the value profile options. + MemOPSizeRangeStart = DefaultMemOPSizeRangeStart; + MemOPSizeRangeLast = DefaultMemOPSizeRangeLast; + if (!MemOPSizeRange.empty()) { + auto Pos = MemOPSizeRange.find(":"); + if (Pos != std::string::npos) { + if (Pos > 0) + MemOPSizeRangeStart = std::stoi(MemOPSizeRange.substr(0, Pos)); + if (Pos < MemOPSizeRange.size() - 1) + MemOPSizeRangeLast = std::stoi(MemOPSizeRange.substr(Pos + 1)); + } else + MemOPSizeRangeLast = std::stoi(MemOPSizeRange); + } + assert(MemOPSizeRangeLast >= MemOPSizeRangeStart); + + MemOPSizeLargeVal = MemOPSizeLarge; + if (MemOPSizeLargeVal == 0) + MemOPSizeLargeVal = INT64_MIN; +} Index: lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -91,6 +91,7 @@ STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); +STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); STATISTIC(NumOfPGOEdge, "Number of edges."); STATISTIC(NumOfPGOBB, "Number of basic-blocks."); STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); @@ -168,6 +169,10 @@ "display to only one function, use " "filtering option -view-bfi-func-name.")); +// Command line option to enable/disable memop intrinsic calls.. +static cl::opt PGOInstrMemOP("pgo-instr-memop", cl::init(true), + cl::Hidden); + // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts extern cl::opt PGOViewCounts; @@ -183,7 +188,7 @@ /// select instructions. In \c VM_instrument mode, it inserts code to count /// the number times TrueValue of select is taken. In \c VM_annotate mode, /// it reads the profile data and annotate the select instruction with metadata. -enum VisitMode { VM_counting, VM_instrument, VM_annotate }; +enum VisitMode { VM_counting, VM_instrument, VM_annotate, VM_optimize }; class PGOUseFunc; /// Instruction Visitor class to visit select instructions. @@ -232,6 +237,42 @@ unsigned getNumOfSelectInsts() const { return NSIs; } }; +/// Instruction Visitor class to visit memory intrinsic calls. +struct MemIntrinsicVisitor : public InstVisitor { + Function &F; + unsigned NMemIs = 0; // Number of memIntrinsics instrumented. + VisitMode Mode = VM_counting; // Visiting mode. + unsigned CurCtrId = 0; // Current counter index. + unsigned TotalNumCtrs = 0; // Total number of counters + GlobalVariable *FuncNameVar = nullptr; + uint64_t FuncHash = 0; + PGOUseFunc *UseFunc = nullptr; + + MemIntrinsicVisitor(Function &Func) : F(Func) {} + + void countMemIntrinsics(Function &Func) { + NMemIs = 0; + Mode = VM_counting; + visit(Func); + } + void instrumentMemIntrinsics(Function &Func, unsigned TotalNC, + GlobalVariable *FNV, uint64_t FHash) { + Mode = VM_instrument; + TotalNumCtrs = TotalNC; + FuncHash = FHash; + FuncNameVar = FNV; + visit(Func); + } + + // Visit the IR stream and annotate all mem intrinsic call instructions. + void optimizeMemIntrinsics(Function &Func, PGOUseFunc *UF); + void instrumentOneMemIntrinsic(MemIntrinsic &MI); + void optimizeOneMemIntrinsic(MemIntrinsic &MI); + // Visit \p MI instruction and perform tasks according to visit mode. + void visitMemIntrinsic(MemIntrinsic &SI); + unsigned getNumOfMemIntrinsics() const { return NMemIs; } +}; + class PGOInstrumentationGenLegacyPass : public ModulePass { public: static char ID; @@ -351,6 +392,7 @@ public: std::vector IndirectCallSites; SelectInstVisitor SIVisitor; + MemIntrinsicVisitor MIVisitor; std::string FuncName; GlobalVariable *FuncNameVar; // CFG hash value for this function. @@ -380,12 +422,14 @@ std::unordered_multimap &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr) - : F(Func), ComdatMembers(ComdatMembers), SIVisitor(Func), FunctionHash(0), - MST(F, BPI, BFI) { + : F(Func), ComdatMembers(ComdatMembers), SIVisitor(Func), + MIVisitor(Func), FunctionHash(0), MST(F, BPI, BFI) { // This should be done before CFG hash computation. SIVisitor.countSelects(Func); + MIVisitor.countMemIntrinsics(Func); NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); + NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); IndirectCallSites = findIndirectCallSites(Func); FuncName = getPGOFuncName(F); @@ -602,6 +646,11 @@ Builder.getInt32(NumIndirectCallSites++)}); } NumOfPGOICall += NumIndirectCallSites; + + // Now instrument memop instrinsic calls: + FuncInfo.MIVisitor.instrumentMemIntrinsics(F, NumCounters, + FuncInfo.FuncNameVar, + FuncInfo.FunctionHash); } // This class represents a CFG edge in profile use compilation. @@ -1065,11 +1114,50 @@ case VM_annotate: annotateOneSelectInst(SI); return; + default: + break; } llvm_unreachable("Unknown visiting mode"); } +void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) { + Module *M = F.getParent(); + IRBuilder<> Builder(&MI); + Type *Int64Ty = Builder.getInt64Ty(); + Type *I8PtrTy = Builder.getInt8PtrTy(); + Value *Length = MI.getLength(); + assert(!dyn_cast(Length)); + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), + {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), + Builder.getInt64(FuncHash), Builder.CreatePtrToInt(Length, Int64Ty), + Builder.getInt32(llvm::InstrProfValueKind::IPVK_MemOPSize), + Builder.getInt32(CurCtrId)}); + ++CurCtrId; +} + +void MemIntrinsicVisitor::visitMemIntrinsic(MemIntrinsic &MI) { + if (!PGOInstrMemOP) + return; + Value *Length = MI.getLength(); + // Not instrument constant length calls. + if (dyn_cast(Length)) + return; + + NMemIs++; + switch (Mode) { + case VM_counting: + return; + case VM_instrument: + instrumentOneMemIntrinsic(MI); + return; + default: + break; + } + llvm_unreachable("Unknown visiting mode"); +} + // Traverse all the indirect callsites and annotate the instructions. void PGOUseFunc::annotateIndirectCallSites() { if (DisableValueProfiling) Index: test/Instrumentation/InstrProfiling/PR23499.ll =================================================================== --- test/Instrumentation/InstrProfiling/PR23499.ll +++ test/Instrumentation/InstrProfiling/PR23499.ll @@ -15,13 +15,13 @@ ; CHECK-NOT: __profn__Z3barIvEvv ; CHECK: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat($__profv__Z3barIvEvv), align 8 -; CHECK: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [1 x i16] zeroinitializer }, section "{{.*}}__llvm_prf_data{{.*}}", comdat($__profv__Z3barIvEvv), align 8 +; CHECK: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [2 x i16] zeroinitializer }, section "{{.*}}__llvm_prf_data{{.*}}", comdat($__profv__Z3barIvEvv), align 8 ; CHECK: @__llvm_prf_nm = private constant [{{.*}} x i8] c"{{.*}}", section "{{.*}}__llvm_prf_names" ; COFF-NOT: __profn__Z3barIvEvv ; COFF: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat, align 8 -; COFF: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [1 x i16] zeroinitializer }, section "{{.*}}__llvm_prf_data{{.*}}", comdat($__profc__Z3barIvEvv), align 8 +; COFF: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [2 x i16] zeroinitializer }, section "{{.*}}__llvm_prf_data{{.*}}", comdat($__profc__Z3barIvEvv), align 8 declare void @llvm.instrprof.increment(i8*, i64, i32, i32) #1 Index: test/Instrumentation/InstrProfiling/icall.ll =================================================================== --- test/Instrumentation/InstrProfiling/icall.ll +++ test/Instrumentation/InstrProfiling/icall.ll @@ -37,9 +37,9 @@ ; DYN-NOT: @__profvp_foo ; DYN-NOT: @__llvm_prf_vnodes -; STATIC: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [1 x i16] }* @__profd_foo to i8*), i32 0) -; STATIC-EXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [1 x i16] }* @__profd_foo to i8*), i32 zeroext 0) -; STATIC-SEXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [1 x i16] }* @__profd_foo to i8*), i32 signext 0) +; STATIC: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0) +; STATIC-EXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 zeroext 0) +; STATIC-SEXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 signext 0) ; STATIC: declare void @__llvm_profile_instrument_target(i64, i8*, i32) ; STATIC-EXT: declare void @__llvm_profile_instrument_target(i64, i8*, i32 zeroext) Index: test/Transforms/PGOProfile/comdat_internal.ll =================================================================== --- test/Transforms/PGOProfile/comdat_internal.ll +++ test/Transforms/PGOProfile/comdat_internal.ll @@ -14,9 +14,9 @@ ; CHECK: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat ; CHECK-NOT: __profn__stdin__foo ; CHECK: @__profc__stdin__foo.[[FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat($__profv__stdin__foo.[[FOO_HASH]]), align 8 -; CHECK: @__profd__stdin__foo.[[FOO_HASH]] = private global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 -5640069336071256030, i64 [[FOO_HASH]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo.[[FOO_HASH]], i32 0, i32 0), i8* null +; CHECK: @__profd__stdin__foo.[[FOO_HASH]] = private global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 -5640069336071256030, i64 [[FOO_HASH]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo.[[FOO_HASH]], i32 0, i32 0), i8* null ; CHECK-NOT: bitcast (i32 ()* @foo to i8*) -; CHECK-SAME: , i8* null, i32 1, [1 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profv__stdin__foo.[[FOO_HASH]]), align 8 +; CHECK-SAME: , i8* null, i32 1, [2 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profv__stdin__foo.[[FOO_HASH]]), align 8 ; CHECK: @__llvm_prf_nm ; CHECK: @llvm.used Index: test/Transforms/PGOProfile/memcpy.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/memcpy.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -pgo-instr-gen -instrprof -S | FileCheck %s +; RUN: opt <%s -passes=pgo-instr-gen,instrprof -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @foo(i8* %dst, i8* %src, i32* %a, i32 %n) { +entry: + br label %for.cond + +for.cond: + %i.0 = phi i32 [ 0, %entry ], [ %add, %for.cond1 ] + %cmp = icmp slt i32 %i.0, %n + br i1 %cmp, label %for.cond1, label %for.end6 + +for.cond1: + %j.0 = phi i32 [ %inc, %for.body3 ], [ 0, %for.cond ] + %idx.ext = sext i32 %i.0 to i64 + %add.ptr = getelementptr inbounds i32, i32* %a, i64 %idx.ext + %0 = load i32, i32* %add.ptr, align 4 + %cmp2 = icmp slt i32 %j.0, %0 + %add = add nsw i32 %i.0, 1 + br i1 %cmp2, label %for.body3, label %for.cond + +for.body3: + %conv = sext i32 %add to i64 +; CHECK: call void @__llvm_profile_instrument_range(i64 %conv, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0, i64 0, i64 8, i64 8192) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %conv, i32 1, i1 false) + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end6: + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) Index: test/tools/llvm-profdata/memop-size-prof.proftext =================================================================== --- /dev/null +++ test/tools/llvm-profdata/memop-size-prof.proftext @@ -0,0 +1,124 @@ +# RUN: llvm-profdata show -memop-sizes -ic-targets -all-functions %s | FileCheck %s --check-prefixes=MEMOP,MEMOP_SUM,ICALL,ICALL_SUM +# RUN: llvm-profdata show -memop-sizes -ic-targets -counts -text -all-functions %s | FileCheck %s --check-prefixes=TEXT,MEMOP_TEXT,ICALL_TEXT +# RUN: llvm-profdata merge -o %t.profdata %s +# RUN: llvm-profdata show -memop-sizes -ic-targets -all-functions %t.profdata | FileCheck %s --check-prefixes=MEMOP,MEMOP_SUM,ICALL,ICALL_SUM +# RUN: llvm-profdata merge -o %t.proftext -text %s +# RUN: llvm-profdata show -memop-sizes -ic-targets -all-functions %t.proftext| FileCheck %s --check-prefixes=MEMOP,MEMOP_SUM,ICALL,ICALL_SUM + +# IR level Instrumentation Flag +:ir +ic1 +# Func Hash: +10 +# Num Counters: +2 +# Counter Values: +999000 +359800 + +ic2 +# Func Hash: +10 +# Num Counters: +2 +# Counter Values: +1001000 +360200 + +foo +# Func Hash: +35277121310 +# Num Counters: +3 +# Counter Values: +20 +556 +1 +# Num Value Kinds: +2 +# Value Kind IPVK_IndirectCallTarget +0 +# NumSites +3 +# Values for each site +0 +2 +ic2:1000 +ic1:100 +1 +ic2:20000 +#ICALL: Indirect Target Results: +#ICALL-NEXT: [ 1, ic2, 1000 ] +#ICALL-NEXT: [ 1, ic1, 100 ] +#ICALL-NEXT: [ 2, ic2, 20000 ] + + +# ValueKind = IPVK_MemOPSize: +1 +# NumValueSites: +1 +9 +1:99 +2:88 +3:77 +9:72 +4:66 +5:55 +6:44 +7:33 +8:22 + +#MEMOP: Memory Instrinsic Size Results: +#MEMOP-NEXT: [ 0, 1, 99 ] +#MEMOP-NEXT: [ 0, 2, 88 ] +#MEMOP-NEXT: [ 0, 3, 77 ] +#MEMOP-NEXT: [ 0, 9, 72 ] +#MEMOP-NEXT: [ 0, 4, 66 ] +#MEMOP-NEXT: [ 0, 5, 55 ] +#MEMOP-NEXT: [ 0, 6, 44 ] +#MEMOP-NEXT: [ 0, 7, 33 ] +#MEMOP-NEXT: [ 0, 8, 22 ] + +#ICALL_SUM: Statistics for indirect call sites profile: +#ICALL_SUM: Total number of sites: 3 +#ICALL_SUM: Total number of sites with values: 2 +#ICALL_SUM: Total number of profiled values: 3 +#ICALL_SUM: Value sites histogram: +#ICALL_SUM: NumTargets, SiteCount +#ICALL_SUM: 1, 1 +#ICALL_SUM: 2, 1 + +#MEMOP_SUM: Statistics for memory intrinsic calls sizes profile: +#MEMOP_SUM: Total number of sites: 1 +#MEMOP_SUM: Total number of sites with values: 1 +#MEMOP_SUM: Total number of profiled values: 9 +#MEMOP_SUM: Value sites histogram: +#MEMOP_SUM: NumTargets, SiteCount +#MEMOP_SUM: 9, 1 + +#TEXT: # Num Value Kinds: +#TEXT: 2 +#ICALL_TEXT: # ValueKind = IPVK_IndirectCallTarget: +#ICALL_TEXT: 0 +#ICALL_TEXT: # NumValueSites: +#ICALL_TEXT: 3 +#ICALL_TEXT: 0 +#ICALL_TEXT: 2 +#ICALL_TEXT: ic2:1000 +#ICALL_TEXT: ic1:100 +#ICALL_TEXT: 1 +#ICALL_TEXT: ic2:20000 +#MEMOP_TEXT: # ValueKind = IPVK_MemOPSize: +#MEMOP_TEXT: 1 +#MEMOP_TEXT: # NumValueSites: +#MEMOP_TEXT: 1 +#MEMOP_TEXT: 9 +#MEMOP_TEXT: 1:99 +#MEMOP_TEXT: 2:88 +#MEMOP_TEXT: 3:77 +#MEMOP_TEXT: 9:72 +#MEMOP_TEXT: 4:66 +#MEMOP_TEXT: 5:55 +#MEMOP_TEXT: 6:44 +#MEMOP_TEXT: 7:33 +#MEMOP_TEXT: 8:22 Index: test/tools/llvm-profdata/value-prof.proftext =================================================================== --- test/tools/llvm-profdata/value-prof.proftext +++ test/tools/llvm-profdata/value-prof.proftext @@ -63,10 +63,11 @@ #ICTEXT-NEXT: foo2:20000 # -#ICSUM: Total Number of Indirect Call Sites : 3 -#ICSUM: Total Number of Sites With Values : 2 -#ICSUM: Total Number of Profiled Values : 3 +#ICSUM: Statistics for indirect call sites profile: +#ICSUM: Total number of sites: 3 +#ICSUM: Total number of sites with values: 2 +#ICSUM: Total number of profiled values: 3 #ICSUM: NumTargets, SiteCount -#ICSUM 1, 1 -#ICSUM 2, 1 +#ICSUM 1, 1 +#ICSUM 2, 1 Index: tools/llvm-profdata/llvm-profdata.cpp =================================================================== --- tools/llvm-profdata/llvm-profdata.cpp +++ tools/llvm-profdata/llvm-profdata.cpp @@ -446,8 +446,49 @@ return 0; } +typedef struct ValueSitesStats { + ValueSitesStats() + : TotalNumValueSites(0), TotalNumValueSitesWithValueProfile(0), + TotalNumValues(0) {} + uint64_t TotalNumValueSites; + uint64_t TotalNumValueSitesWithValueProfile; + uint64_t TotalNumValues; + std::vector ValueSitesHistogram; +} ValueSitesStats; + +static void CollectValueSitesStats(const InstrProfRecord &Func, uint32_t VK, + ValueSitesStats &Stats) { + uint32_t NS = Func.getNumValueSites(VK); + Stats.TotalNumValueSites += NS; + for (size_t I = 0; I < NS; ++I) { + uint32_t NV = Func.getNumValueDataForSite(VK, I); + std::unique_ptr VD = Func.getValueForSite(VK, I); + Stats.TotalNumValues += NV; + if (NV) { + Stats.TotalNumValueSitesWithValueProfile++; + if (NV > Stats.ValueSitesHistogram.size()) + Stats.ValueSitesHistogram.resize(NV, 0); + Stats.ValueSitesHistogram[NV - 1]++; + } + } +} + +static void ShowValueSitesStats(raw_fd_ostream &OS, uint32_t VK, + ValueSitesStats &Stats) { + OS << " Total number of sites: " << Stats.TotalNumValueSites << "\n"; + OS << " Total number of sites with values: " + << Stats.TotalNumValueSitesWithValueProfile << "\n"; + OS << " Total number of profiled values: " << Stats.TotalNumValues << "\n"; + + OS << " Value sites histogram:\n\tNumTargets, SiteCount\n"; + for (unsigned I = 0; I < Stats.ValueSitesHistogram.size(); I++) { + if (Stats.ValueSitesHistogram[I] > 0) + OS << "\t" << I + 1 << ", " << Stats.ValueSitesHistogram[I] << "\n"; + } +} + static int showInstrProfile(const std::string &Filename, bool ShowCounts, - bool ShowIndirectCallTargets, + bool ShowIndirectCallTargets, bool ShowMemOPSizes, bool ShowDetailedSummary, std::vector DetailedSummaryCutoffs, bool ShowAllFunctions, @@ -465,10 +506,8 @@ auto Reader = std::move(ReaderOrErr.get()); bool IsIRInstr = Reader->isIRLevelProfile(); size_t ShownFunctions = 0; - uint64_t TotalNumValueSites = 0; - uint64_t TotalNumValueSitesWithValueProfile = 0; - uint64_t TotalNumValues = 0; - std::vector ICHistogram; + int NumVPKind = IPVK_Last - IPVK_First + 1; + std::vector VPStats(NumVPKind); for (const auto &Func : *Reader) { bool Show = ShowAllFunctions || (!ShowFunction.empty() && @@ -502,6 +541,11 @@ OS << " Indirect Call Site Count: " << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n"; + uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize); + if (ShowMemOPSizes && NumMemOPCalls > 0) + OS << " Number of Memory Intrinsics Calls: " << NumMemOPCalls + << "\n"; + if (ShowCounts) { OS << " Block counts: ["; size_t Start = (IsIRInstr ? 0 : 1); @@ -512,21 +556,15 @@ } if (ShowIndirectCallTargets) { + CollectValueSitesStats(Func, IPVK_IndirectCallTarget, + VPStats[IPVK_IndirectCallTarget]); + OS << " Indirect Target Results:\n"; InstrProfSymtab &Symtab = Reader->getSymtab(); uint32_t NS = Func.getNumValueSites(IPVK_IndirectCallTarget); - OS << " Indirect Target Results: \n"; - TotalNumValueSites += NS; for (size_t I = 0; I < NS; ++I) { uint32_t NV = Func.getNumValueDataForSite(IPVK_IndirectCallTarget, I); std::unique_ptr VD = Func.getValueForSite(IPVK_IndirectCallTarget, I); - TotalNumValues += NV; - if (NV) { - TotalNumValueSitesWithValueProfile++; - if (NV > ICHistogram.size()) - ICHistogram.resize(NV, 0); - ICHistogram[NV - 1]++; - } for (uint32_t V = 0; V < NV; V++) { OS << "\t[ " << I << ", "; OS << Symtab.getFuncName(VD[V].Value) << ", " << VD[V].Count @@ -534,6 +572,20 @@ } } } + if (ShowMemOPSizes && NumMemOPCalls > 0) { + CollectValueSitesStats(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]); + uint32_t NS = Func.getNumValueSites(IPVK_MemOPSize); + OS << " Memory Instrinsic Size Results:\n"; + for (size_t I = 0; I < NS; ++I) { + uint32_t NV = Func.getNumValueDataForSite(IPVK_MemOPSize, I); + std::unique_ptr VD = + Func.getValueForSite(IPVK_MemOPSize, I); + for (uint32_t V = 0; V < NV; V++) { + OS << "\t[ " << I << ", "; + OS << VD[V].Value << ", " << VD[V].Count << " ]\n"; + } + } + } } } if (Reader->hasError()) @@ -547,17 +599,16 @@ OS << "Total functions: " << PS->getNumFunctions() << "\n"; OS << "Maximum function count: " << PS->getMaxFunctionCount() << "\n"; OS << "Maximum internal block count: " << PS->getMaxInternalCount() << "\n"; + if (ShownFunctions && ShowIndirectCallTargets) { - OS << "Total Number of Indirect Call Sites : " << TotalNumValueSites - << "\n"; - OS << "Total Number of Sites With Values : " - << TotalNumValueSitesWithValueProfile << "\n"; - OS << "Total Number of Profiled Values : " << TotalNumValues << "\n"; - - OS << "IC Value histogram : \n\tNumTargets, SiteCount\n"; - for (unsigned I = 0; I < ICHistogram.size(); I++) { - OS << "\t" << I + 1 << ", " << ICHistogram[I] << "\n"; - } + OS << "Statistics for indirect call sites profile:\n"; + ShowValueSitesStats(OS, IPVK_IndirectCallTarget, + VPStats[IPVK_IndirectCallTarget]); + } + + if (ShownFunctions && ShowMemOPSizes) { + OS << "Statistics for memory intrinsic calls sizes profile:\n"; + ShowValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]); } if (ShowDetailedSummary) { @@ -608,6 +659,10 @@ cl::opt ShowIndirectCallTargets( "ic-targets", cl::init(false), cl::desc("Show indirect call site target values for shown functions")); + cl::opt ShowMemOPSizes( + "memop-sizes", cl::init(false), + cl::desc("Show the profiled sizes of the memory intrinsic calls " + "for shown functions")); cl::opt ShowDetailedSummary("detailed-summary", cl::init(false), cl::desc("Show detailed profile summary")); cl::list DetailedSummaryCutoffs( @@ -646,8 +701,9 @@ DetailedSummaryCutoffs.end()); if (ProfileKind == instr) return showInstrProfile(Filename, ShowCounts, ShowIndirectCallTargets, - ShowDetailedSummary, DetailedSummaryCutoffs, - ShowAllFunctions, ShowFunction, TextFormat, OS); + ShowMemOPSizes, ShowDetailedSummary, + DetailedSummaryCutoffs, ShowAllFunctions, + ShowFunction, TextFormat, OS); else return showSampleProfile(Filename, ShowCounts, ShowAllFunctions, ShowFunction, OS);