Index: include/llvm/IR/IntrinsicInst.h =================================================================== --- include/llvm/IR/IntrinsicInst.h +++ include/llvm/IR/IntrinsicInst.h @@ -392,7 +392,12 @@ class InstrProfValueProfileInst : public IntrinsicInst { public: static inline bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::instrprof_value_profile; + switch (I->getIntrinsicID()) { + case Intrinsic::instrprof_value_profile: + case Intrinsic::instrprof_value_range_profile: + return true; + default: return false; + } } static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); @@ -421,6 +426,13 @@ } }; + /// This represents the llvm.instrprof_value_range_profile intrinsic. + class InstrProfValueRangeProfileInst : public InstrProfValueProfileInst { + public: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::instrprof_value_range_profile; + } + }; } // end namespace llvm #endif // LLVM_IR_INTRINSICINST_H Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -371,6 +371,14 @@ llvm_i32_ty], []>; +// A call to profile runtime for value range profiling of target expressions +// through instrumentation based profiling. +def int_instrprof_value_range_profile : Intrinsic<[], + [llvm_ptr_ty, llvm_i64_ty, + llvm_i64_ty, llvm_i32_ty, + llvm_i32_ty], + []>; + //===------------------- Standard C Library Intrinsics --------------------===// // Index: include/llvm/ProfileData/InstrProf.h =================================================================== --- include/llvm/ProfileData/InstrProf.h +++ include/llvm/ProfileData/InstrProf.h @@ -79,6 +79,11 @@ return INSTR_PROF_VALUE_PROF_FUNC_STR; } +/// Return the name profile runtime entry point to do value range profiling. +inline StringRef getInstrProfValueRangeProfFuncName() { + return INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR; +} + /// Return the name of the section containing function coverage mapping /// data. inline StringRef getInstrProfCoverageSectionName(bool AddSegment) { @@ -270,6 +275,9 @@ inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; } +/// Set the global variable property to be used in profile runtime. +void setRuntimeVariableProperty(Module &M, GlobalVariable *GV); + /// Return the PGOFuncName meta data associated with a function. MDNode *getPGOFuncNameMetadata(const Function &F); @@ -646,11 +654,14 @@ private: std::vector IndirectCallSites; + std::vector MemOPSizes; const std::vector & getValueSitesForKind(uint32_t ValueKind) const { switch (ValueKind) { case IPVK_IndirectCallTarget: return IndirectCallSites; + case IPVK_MemOPSize: + return MemOPSizes; default: llvm_unreachable("Unknown value kind!"); } Index: include/llvm/ProfileData/InstrProfData.inc =================================================================== --- include/llvm/ProfileData/InstrProfData.inc +++ include/llvm/ProfileData/InstrProfData.inc @@ -174,13 +174,15 @@ * name hash and the function address. */ VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0) +/* For memory intrinsic functions size profiling. */ +VALUE_PROF_KIND(IPVK_MemOPSize, 1) /* These two kinds must be the last to be * declared. This is to make sure the string * array created with the template can be * indexed with the kind value. */ VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget) -VALUE_PROF_KIND(IPVK_Last, IPVK_IndirectCallTarget) +VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize) #undef VALUE_PROF_KIND /* VALUE_PROF_KIND end */ @@ -605,6 +607,8 @@ #define VARIANT_MASK_IR_PROF (0x1ULL << 56) #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime +#define INSTR_PROF_MEMOP_SMALLVAL_VAR __llvm_profile_memop_smallval +#define INSTR_PROF_MEMOP_LARGEVAL_VAR __llvm_profile_memop_largeval /* The variable that holds the name of the profile data * specified via command line. */ @@ -649,6 +653,9 @@ #define INSTR_PROF_VALUE_PROF_FUNC __llvm_profile_instrument_target #define INSTR_PROF_VALUE_PROF_FUNC_STR \ INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_FUNC) +#define INSTR_PROF_VALUE_RANGE_PROF_FUNC __llvm_profile_instrument_range +#define INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_VALUE_RANGE_PROF_FUNC) /* InstrProfile per-function control data alignment. */ #define INSTR_PROF_DATA_ALIGNMENT 8 Index: lib/ProfileData/InstrProf.cpp =================================================================== --- lib/ProfileData/InstrProf.cpp +++ lib/ProfileData/InstrProf.cpp @@ -857,4 +857,16 @@ } return true; } + +// Set the global variable property to be used in profile runtime. +void setRuntimeVariableProperty(Module &M, GlobalVariable *GV) { + assert(GV); + GV->setVisibility(GlobalValue::DefaultVisibility); + Triple TT(M.getTargetTriple()); + if (!TT.supportsCOMDAT()) + GV->setLinkage(GlobalValue::WeakAnyLinkage); + else + GV->setComdat(M.getOrInsertComdat( + StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)))); +} } // end namespace llvm Index: lib/Transforms/Instrumentation/InstrProfiling.cpp =================================================================== --- lib/Transforms/Instrumentation/InstrProfiling.cpp +++ lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -76,6 +76,14 @@ // For those sites with non-zero profile, the average number of targets // is usually smaller than 2. cl::init(1.0)); +cl::opt MemOPSizeSmallVal( + "memop-size-small", + cl::desc("Set the threshold value under which to be profiled precisely"), + cl::init(8)); +cl::opt MemOPSizeLargeVal( + "memop-size-large", + cl::desc("Set the threshold value to be profiled seperately"), + cl::init(8192)); class InstrProfilingLegacyPass : public ModulePass { InstrProfiling InstrProf; @@ -217,7 +225,8 @@ } static Constant *getOrInsertValueProfilingCall(Module &M, - const TargetLibraryInfo &TLI) { + const TargetLibraryInfo &TLI, + bool IsRange = false) { LLVMContext &Ctx = M.getContext(); auto *ReturnTy = Type::getVoidTy(M.getContext()); Type *ParamTypes[] = { @@ -226,7 +235,9 @@ }; auto *ValueProfilingCallTy = FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false); - Constant *Res = M.getOrInsertFunction(getInstrProfValueProfFuncName(), + StringRef ValurProfFuncName = (IsRange ? getInstrProfValueRangeProfFuncName() + : getInstrProfValueProfFuncName()); + Constant *Res = M.getOrInsertFunction(ValurProfFuncName, ValueProfilingCallTy); if (Function *FunRes = dyn_cast(Res)) { if (auto AK = TLI.getExtAttrForI32Param(false)) @@ -248,7 +259,30 @@ It->second.NumValueSites[ValueKind] = Index + 1; } +// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR for the profile runtime. +static void createMemOPSizeSmallValVariable(Module *M) { + Type *IntTy32 = Type::getInt32Ty(M->getContext()); + unsigned V = MemOPSizeSmallVal; + auto GV = new GlobalVariable( + *M, IntTy32, true, GlobalVariable::ExternalLinkage, + Constant::getIntegerValue(IntTy32, APInt(32, V)), + INSTR_PROF_QUOTE(INSTR_PROF_MEMOP_LARGELVAL_VAR)); + setRuntimeVariableProperty(*M, GV); +} + +// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR for the profile runtime. +static void createMemOPSizeLargeValVariable(Module *M) { + Type *IntTy32 = Type::getInt32Ty(M->getContext()); + unsigned V = MemOPSizeLargeVal; + auto GV = new GlobalVariable( + *M, IntTy32, true, GlobalVariable::ExternalLinkage, + Constant::getIntegerValue(IntTy32, APInt(32, V)), + INSTR_PROF_QUOTE(INSTR_PROF_MEMOP_LARGEVAL_VAR)); + setRuntimeVariableProperty(*M, GV); +} + void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { + static bool EmitRangeVariable = false; GlobalVariable *Name = Ind->getName(); auto It = ProfileDataMap.find(Name); assert(It != ProfileDataMap.end() && It->second.DataVar && @@ -264,7 +298,15 @@ Value *Args[3] = {Ind->getTargetValue(), Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), Builder.getInt32(Index)}; - CallInst *Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), + bool IsRange = false; + if (dyn_cast(Ind)) + IsRange = true; + if (IsRange && !EmitRangeVariable) { + EmitRangeVariable = true; + createMemOPSizeSmallValVariable(Ind->getModule()); + createMemOPSizeLargeValVariable(Ind->getModule()); + } + CallInst *Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, IsRange), Args); if (auto AK = TLI->getExtAttrForI32Param(false)) Call->addAttribute(3, AK); @@ -477,8 +519,14 @@ size_t TotalNS = 0; for (auto &PD : ProfileDataMap) { - for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) - TotalNS += PD.second.NumValueSites[Kind]; + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { + auto I = PD.second.NumValueSites[Kind]; + TotalNS += I; + // For value range profiles, such as IPVK_MemOPSize, we need preallocate + // more value sites. + if (Kind == IPVK_MemOPSize) + TotalNS += I * (MemOPSizeSmallVal + 1); + } } if (!TotalNS) Index: lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -87,6 +87,7 @@ STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); +STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); STATISTIC(NumOfPGOEdge, "Number of edges."); STATISTIC(NumOfPGOBB, "Number of basic-blocks."); STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); @@ -143,6 +144,11 @@ // Command line option to enable/disable select instruction instrumentation. static cl::opt PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden); + +// Command line option to enable/disable memop intrinsic calls.. +static cl::opt PGOInstrMemOP("pgo-instr-memop", cl::init(true), + cl::Hidden); + namespace { /// The select instruction visitor plays three roles specified @@ -150,7 +156,7 @@ /// select instructions. In \c VM_instrument mode, it inserts code to count /// the number times TrueValue of select is taken. In \c VM_annotate mode, /// it reads the profile data and annotate the select instruction with metadata. -enum VisitMode { VM_counting, VM_instrument, VM_annotate }; +enum VisitMode { VM_counting, VM_instrument, VM_annotate, VM_optimize }; class PGOUseFunc; /// Instruction Visitor class to visit select instructions. @@ -199,6 +205,42 @@ unsigned getNumOfSelectInsts() const { return NSIs; } }; +/// Instruction Visitor class to visit memory intrinsic calls. +struct MemIntrinsicVisitor : public InstVisitor { + Function &F; + unsigned NMemIs = 0; // Number of memIntrinsics instrumented. + VisitMode Mode = VM_counting; // Visiting mode. + unsigned CurCtrId = 0; // Current counter index. + unsigned TotalNumCtrs = 0; // Total number of counters + GlobalVariable *FuncNameVar = nullptr; + uint64_t FuncHash = 0; + PGOUseFunc *UseFunc = nullptr; + + MemIntrinsicVisitor(Function &Func) : F(Func) {} + + void countMemIntrinsics(Function &Func) { + NMemIs = 0; + Mode = VM_counting; + visit(Func); + } + void instrumentMemIntrinsics(Function &Func, unsigned TotalNC, + GlobalVariable *FNV, uint64_t FHash) { + Mode = VM_instrument; + TotalNumCtrs = TotalNC; + FuncHash = FHash; + FuncNameVar = FNV; + visit(Func); + } + + // Visit the IR stream and annotate all mem intrinsic call instructions. + void optimizeMemIntrinsics(Function &Func, PGOUseFunc *UF); + void instrumentOneMemIntrinsic(MemIntrinsic &MI); + void optimizeOneMemIntrinsic(MemIntrinsic &MI); + // Visit \p MI instruction and perform tasks according to visit mode. + void visitMemIntrinsic(MemIntrinsic &SI); + unsigned getNumOfMemIntrinsics() const { return NMemIs; } +}; + class PGOInstrumentationGenLegacyPass : public ModulePass { public: static char ID; @@ -318,6 +360,7 @@ public: std::vector IndirectCallSites; SelectInstVisitor SIVisitor; + MemIntrinsicVisitor MIVisitor; std::string FuncName; GlobalVariable *FuncNameVar; // CFG hash value for this function. @@ -347,12 +390,14 @@ std::unordered_multimap &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr) - : F(Func), ComdatMembers(ComdatMembers), SIVisitor(Func), FunctionHash(0), - MST(F, BPI, BFI) { + : F(Func), ComdatMembers(ComdatMembers), SIVisitor(Func), + MIVisitor(Func), FunctionHash(0), MST(F, BPI, BFI) { // This should be done before CFG hash computation. SIVisitor.countSelects(Func); + MIVisitor.countMemIntrinsics(Func); NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); + NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); IndirectCallSites = findIndirectCallSites(Func); FuncName = getPGOFuncName(F); @@ -569,6 +614,11 @@ Builder.getInt32(NumIndirectCallSites++)}); } NumOfPGOICall += NumIndirectCallSites; + + // Now instrument memop instrinsic calls: + FuncInfo.MIVisitor.instrumentMemIntrinsics(F, NumCounters, + FuncInfo.FuncNameVar, + FuncInfo.FunctionHash); } // This class represents a CFG edge in profile use compilation. @@ -942,7 +992,7 @@ Weights.push_back(scaleBranchCount(ECI, Scale)); DEBUG(dbgs() << "Weight is: "; - for (const auto &W : Weights) { dbgs() << W << " "; } + for (const auto &W : Weights) { dbgs() << W << " "; } dbgs() << "\n";); TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); } @@ -1030,11 +1080,50 @@ case VM_annotate: annotateOneSelectInst(SI); return; + default: + break; } llvm_unreachable("Unknown visiting mode"); } +void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) { + Module *M = F.getParent(); + IRBuilder<> Builder(&MI); + Type *Int64Ty = Builder.getInt64Ty(); + Type *I8PtrTy = Builder.getInt8PtrTy(); + Value *Length = MI.getLength(); + assert(!dyn_cast(Length)); + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_range_profile), + {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), + Builder.getInt64(FuncHash), Builder.CreatePtrToInt(Length, Int64Ty), + Builder.getInt32(llvm::InstrProfValueKind::IPVK_MemOPSize), + Builder.getInt32(CurCtrId)}); + ++CurCtrId; +} + +void MemIntrinsicVisitor::visitMemIntrinsic(MemIntrinsic &MI) { + if (!PGOInstrMemOP) + return; + Value *Length = MI.getLength(); + // Not instrument constant length calls. + if (dyn_cast(Length)) + return; + + NMemIs++; + switch (Mode) { + case VM_counting: + return; + case VM_instrument: + instrumentOneMemIntrinsic(MI); + return; + default: + break; + } + llvm_unreachable("Unknown visiting mode"); +} + // Traverse all the indirect callsites and annotate the instructions. void PGOUseFunc::annotateIndirectCallSites() { if (DisableValueProfiling) @@ -1077,13 +1166,7 @@ M, IntTy64, true, GlobalVariable::ExternalLinkage, Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); - IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); - Triple TT(M.getTargetTriple()); - if (!TT.supportsCOMDAT()) - IRLevelVersionVariable->setLinkage(GlobalValue::WeakAnyLinkage); - else - IRLevelVersionVariable->setComdat(M.getOrInsertComdat( - StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)))); + setRuntimeVariableProperty(M, IRLevelVersionVariable); } // Collect the set of members for each Comdat in module M and store @@ -1197,6 +1280,7 @@ Func.populateCounters(); Func.setBranchWeights(); Func.annotateIndirectCallSites(); + PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); if (FreqAttr == PGOUseFunc::FFA_Cold) ColdFunctions.push_back(&F); Index: test/Instrumentation/InstrProfiling/PR23499.ll =================================================================== --- test/Instrumentation/InstrProfiling/PR23499.ll +++ test/Instrumentation/InstrProfiling/PR23499.ll @@ -15,13 +15,13 @@ ; CHECK: @__profn__Z3barIvEvv = private constant [11 x i8] c"_Z3barIvEvv", align 1 ; CHECK: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat($__profv__Z3barIvEvv), align 8 -; CHECK: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [1 x i16] zeroinitializer }, section "{{.*}}__llvm_prf_data{{.*}}", comdat($__profv__Z3barIvEvv), align 8 +; CHECK: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [2 x i16] zeroinitializer }, section "{{.*}}__llvm_prf_data{{.*}}", comdat($__profv__Z3barIvEvv), align 8 ; CHECK: @__llvm_prf_nm = private constant [{{.*}} x i8] c"{{.*}}", section "{{.*}}__llvm_prf_names" ; COFF: @__profn__Z3barIvEvv = private constant [11 x i8] c"_Z3barIvEvv", align 1 ; COFF: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat, align 8 -; COFF: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [1 x i16] zeroinitializer }, section "{{.*}}__llvm_prf_data{{.*}}", comdat($__profc__Z3barIvEvv), align 8 +; COFF: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [2 x i16] zeroinitializer }, section "{{.*}}__llvm_prf_data{{.*}}", comdat($__profc__Z3barIvEvv), align 8 declare void @llvm.instrprof.increment(i8*, i64, i32, i32) #1 Index: test/Instrumentation/InstrProfiling/icall.ll =================================================================== --- test/Instrumentation/InstrProfiling/icall.ll +++ test/Instrumentation/InstrProfiling/icall.ll @@ -37,9 +37,9 @@ ; DYN-NOT: @__profvp_foo ; DYN-NOT: @__llvm_prf_vnodes -; STATIC: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [1 x i16] }* @__profd_foo to i8*), i32 0) -; STATIC-EXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [1 x i16] }* @__profd_foo to i8*), i32 zeroext 0) -; STATIC-SEXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [1 x i16] }* @__profd_foo to i8*), i32 signext 0) +; STATIC: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0) +; STATIC-EXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 zeroext 0) +; STATIC-SEXT: call void @__llvm_profile_instrument_target(i64 %3, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 signext 0) ; STATIC: declare void @__llvm_profile_instrument_target(i64, i8*, i32) ; STATIC-EXT: declare void @__llvm_profile_instrument_target(i64, i8*, i32 zeroext) Index: test/Transforms/PGOProfile/comdat_internal.ll =================================================================== --- test/Transforms/PGOProfile/comdat_internal.ll +++ test/Transforms/PGOProfile/comdat_internal.ll @@ -14,9 +14,9 @@ ; CHECK: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat ; CHECK: @__profn__stdin__foo = private constant [11 x i8] c":foo" ; CHECK: @__profc__stdin__foo.[[FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat($__profv__stdin__foo.[[FOO_HASH]]), align 8 -; CHECK: @__profd__stdin__foo.[[FOO_HASH]] = private global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 -5640069336071256030, i64 [[FOO_HASH]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo.[[FOO_HASH]], i32 0, i32 0), i8* null +; CHECK: @__profd__stdin__foo.[[FOO_HASH]] = private global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 -5640069336071256030, i64 [[FOO_HASH]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo.[[FOO_HASH]], i32 0, i32 0), i8* null ; CHECK-NOT: bitcast (i32 ()* @foo to i8*) -; CHECK-SAME: , i8* null, i32 1, [1 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profv__stdin__foo.[[FOO_HASH]]), align 8 +; CHECK-SAME: , i8* null, i32 1, [2 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profv__stdin__foo.[[FOO_HASH]]), align 8 ; CHECK: @__llvm_prf_nm ; CHECK: @llvm.used Index: test/Transforms/PGOProfile/memcpy.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/memcpy.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -pgo-instr-gen -instrprof -S | FileCheck %s +; RUN: opt <%s -passes=pgo-instr-gen,instrprof -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @foo(i8* %dst, i8* %src, i32* %a, i32 %n) { +entry: + br label %for.cond + +for.cond: + %i.0 = phi i32 [ 0, %entry ], [ %add, %for.cond1 ] + %cmp = icmp slt i32 %i.0, %n + br i1 %cmp, label %for.cond1, label %for.end6 + +for.cond1: + %j.0 = phi i32 [ %inc, %for.body3 ], [ 0, %for.cond ] + %idx.ext = sext i32 %i.0 to i64 + %add.ptr = getelementptr inbounds i32, i32* %a, i64 %idx.ext + %0 = load i32, i32* %add.ptr, align 4 + %cmp2 = icmp slt i32 %j.0, %0 + %add = add nsw i32 %i.0, 1 + br i1 %cmp2, label %for.body3, label %for.cond + +for.body3: + %conv = sext i32 %add to i64 +; CHECK: call void @__llvm_profile_instrument_range(i64 %conv, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %conv, i32 1, i1 false) + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end6: + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) Index: tools/llvm-profdata/llvm-profdata.cpp =================================================================== --- tools/llvm-profdata/llvm-profdata.cpp +++ tools/llvm-profdata/llvm-profdata.cpp @@ -448,6 +448,7 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts, bool ShowIndirectCallTargets, + bool ShowMemOPSizes, bool ShowDetailedSummary, std::vector DetailedSummaryCutoffs, bool ShowAllFunctions, @@ -502,6 +503,10 @@ OS << " Indirect Call Site Count: " << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n"; + uint32_t MemOPSize = Func.getNumValueSites(IPVK_MemOPSize); + if (ShowMemOPSizes && MemOPSize > 0) + OS << " Memory Intrinsics Size: " << MemOPSize << "\n"; + if (ShowCounts) { OS << " Block counts: ["; size_t Start = (IsIRInstr ? 0 : 1); @@ -534,6 +539,26 @@ } } } + if (ShowMemOPSizes && MemOPSize > 0) { + uint32_t NS = Func.getNumValueSites(IPVK_MemOPSize); + OS << " Memory Instrinsic Size Results:\n"; + for (size_t I = 0; I < NS; ++I) { + uint32_t NV = Func.getNumValueDataForSite(IPVK_MemOPSize, I); + std::unique_ptr VD = + Func.getValueForSite(IPVK_MemOPSize, I); + TotalNumValues += NV; + for (uint32_t V = 0; V < NV; V++) { + OS << "\t[ " << I << ", "; + int64_t Size = VD[V].Value; + if (Size < 0) + OS << ">=" << -Size; + else + OS << Size; + OS << ", " << VD[V].Count + << " ]\n"; + } + } + } } } if (Reader->hasError()) @@ -608,6 +633,9 @@ cl::opt ShowIndirectCallTargets( "ic-targets", cl::init(false), cl::desc("Show indirect call site target values for shown functions")); + cl::opt ShowMemOPSizes( + "memop-sizes", cl::init(false), + cl::desc("Show the sizes of the memory intrinsic calls")); cl::opt ShowDetailedSummary("detailed-summary", cl::init(false), cl::desc("Show detailed profile summary")); cl::list DetailedSummaryCutoffs( @@ -646,6 +674,7 @@ DetailedSummaryCutoffs.end()); if (ProfileKind == instr) return showInstrProfile(Filename, ShowCounts, ShowIndirectCallTargets, + ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs, ShowAllFunctions, ShowFunction, TextFormat, OS); else