Index: llvm/include/llvm/ProfileData/InstrProf.h =================================================================== --- llvm/include/llvm/ProfileData/InstrProf.h +++ llvm/include/llvm/ProfileData/InstrProf.h @@ -1132,6 +1132,9 @@ void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, int64_t &RangeLast); +// Parse ProfileSampleRate option. +void getProfileSampleRate(StringRef Str, int64_t &Sample, int64_t &Whole); + // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime // aware this is an ir_level profile so it can set the version flag. void createIRLevelProfileFlagVar(Module &M, bool IsCS); Index: llvm/include/llvm/ProfileData/InstrProfData.inc =================================================================== --- llvm/include/llvm/ProfileData/InstrProfData.inc +++ llvm/include/llvm/ProfileData/InstrProfData.inc @@ -646,6 +646,7 @@ #define VARIANT_MASK_CSIR_PROF (0x1ULL << 57) #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime +#define INSTR_PROF_PROFILE_SAMPLING_VAR __llvm_profile_sampling /* The variable that holds the name of the profile data * specified via command line. */ Index: llvm/include/llvm/Transforms/Instrumentation.h =================================================================== --- llvm/include/llvm/Transforms/Instrumentation.h +++ llvm/include/llvm/Transforms/Instrumentation.h @@ -94,7 +94,7 @@ createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef(""), bool IsCS = false); ModulePass *createPGOInstrumentationGenCreateVarLegacyPass( - StringRef CSInstrName = StringRef("")); + StringRef CSInstrName = StringRef(""), bool Sampling = false); ModulePass *createPGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false); FunctionPass *createPGOMemOPSizeOptLegacyPass(); @@ -139,12 +139,18 @@ // Use BFI to guide register promotion bool UseBFIInPromotion = false; + // Use sampling to reduce the profile instrumentation runtime overhead. + bool Sampling = false; + // Name of the profile file to use as output std::string InstrProfileOutput; InstrProfOptions() = default; }; +// Create the variable for profile sampling. +void createProfileSamplingVar(Module &M); + /// Insert frontend instrumentation based profiling. Parameter IsCS indicates if // this is the context senstive instrumentation. ModulePass *createInstrProfilingLegacyPass( Index: llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h =================================================================== --- llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -41,6 +41,9 @@ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); bool run(Module &M, const TargetLibraryInfo &TLI); + // Check if profile samping is enabled. + bool isSamplingEnabled() const; + private: InstrProfOptions Options; Module *M; @@ -72,6 +75,12 @@ // The end value of precise value profile range for memory intrinsic sizes. int64_t MemOPSizeRangeLast; + // SampleDuration and WholeDuration are used in profile sampling. We will + // record the first SampleDuration number of count increments for every + // WholeDuration of increments. + int64_t SampleDuration; + int64_t WholeDuration; + int64_t TotalCountersPromoted = 0; /// Lower instrumentation intrinsics in the function. Returns true if there @@ -96,6 +105,9 @@ /// Force emitting of name vars for unused functions. void lowerCoverageData(GlobalVariable *CoverageNamesVar); + /// Lower the incremental instructions under profile sampling predicates. + void doSampling(Instruction *I); + /// Get the region counters for an increment, creating them if necessary. /// /// If the counter array doesn't yet exist, the profile data variables Index: llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h =================================================================== --- llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h +++ llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h @@ -35,12 +35,14 @@ class PGOInstrumentationGenCreateVar : public PassInfoMixin { public: - PGOInstrumentationGenCreateVar(std::string CSInstrName = "") - : CSInstrName(CSInstrName) {} + PGOInstrumentationGenCreateVar(std::string CSInstrName = "", + bool Sampling = false) + : CSInstrName(CSInstrName), ProfileSampling(Sampling) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: std::string CSInstrName; + bool ProfileSampling; }; /// The instrumentation (profile-instr-gen) pass for IR based PGO. Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -226,6 +226,7 @@ extern cl::opt EnableHotColdSplit; extern cl::opt EnableOrderFileInstrumentation; +extern cl::opt PGOSampling; extern cl::opt FlattenedProfileUsed; @@ -590,6 +591,7 @@ Options.InstrProfileOutput = ProfileFile; Options.DoCounterPromotion = true; Options.UseBFIInPromotion = IsCS; + Options.Sampling = PGOSampling; MPM.addPass(InstrProfiling(Options, IsCS)); } else if (!ProfileFile.empty()) { MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); @@ -728,7 +730,8 @@ } if (PGOOpt && Phase != ThinLTOPhase::PostLink && PGOOpt->CSAction == PGOOptions::CSIRInstr) - MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile)); + MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile, + PGOSampling)); // Synthesize function entry counts for non-PGO compilation. if (EnableSyntheticCounts && !PGOOpt) Index: llvm/lib/ProfileData/InstrProf.cpp =================================================================== --- llvm/lib/ProfileData/InstrProf.cpp +++ llvm/lib/ProfileData/InstrProf.cpp @@ -1110,6 +1110,21 @@ return true; } +// Get integer number pair from a string of format +// "" or ":" +static void getIntPairFromString(StringRef Str, int64_t &Val1, int64_t &Val2) { + if (Str.empty()) + return; + auto Pos = Str.find(':'); + if (Pos != std::string::npos) { + if (Pos > 0) + Str.substr(0, Pos).getAsInteger(10, Val1); + if (Pos < Str.size() - 1) + Str.substr(Pos + 1).getAsInteger(10, Val2); + } else + Str.getAsInteger(10, Val2); +} + // Parse the value profile options. void getMemOPSizeRangeFromOption(StringRef MemOPSizeRange, int64_t &RangeStart, int64_t &RangeLast) { @@ -1117,20 +1132,16 @@ static const int64_t DefaultMemOPSizeRangeLast = 8; RangeStart = DefaultMemOPSizeRangeStart; RangeLast = DefaultMemOPSizeRangeLast; - - if (!MemOPSizeRange.empty()) { - auto Pos = MemOPSizeRange.find(':'); - if (Pos != std::string::npos) { - if (Pos > 0) - MemOPSizeRange.substr(0, Pos).getAsInteger(10, RangeStart); - if (Pos < MemOPSizeRange.size() - 1) - MemOPSizeRange.substr(Pos + 1).getAsInteger(10, RangeLast); - } else - MemOPSizeRange.getAsInteger(10, RangeLast); - } + getIntPairFromString(MemOPSizeRange, RangeStart, RangeLast); assert(RangeLast >= RangeStart); } +// Parse the --profile-sample-rate option. +void getProfileSampleRate(StringRef SampleRate, int64_t &Sample, int64_t &Whole) { + getIntPairFromString(SampleRate, Sample, Whole); + assert(Whole >= Sample); +} + // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime // aware this is an ir_level profile so it can set the version flag. void createIRLevelProfileFlagVar(Module &M, bool IsCS) { Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -147,6 +147,10 @@ "enable-order-file-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable order file instrumentation (default = off)")); +cl::opt PGOSampling( + "enable-pgo-sampling", cl::init(false), cl::Hidden, + cl::desc("Enable PGO instrumentation sampling (default = off)")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -304,6 +308,7 @@ Options.InstrProfileOutput = PGOInstrGen; Options.DoCounterPromotion = true; Options.UseBFIInPromotion = IsCS; + Options.Sampling = PGOSampling; MPM.add(createLoopRotatePass()); MPM.add(createInstrProfilingLegacyPass(Options, IsCS)); } @@ -541,7 +546,8 @@ // Create profile COMDAT variables. Lld linker wants to see all variables // before the LTO/ThinLTO link since it needs to resolve symbols/comdats. if (!PerformThinLTO && EnablePGOCSInstrGen) - MPM.add(createPGOInstrumentationGenCreateVarLegacyPass(PGOInstrGen)); + MPM.add(createPGOInstrumentationGenCreateVarLegacyPass(PGOInstrGen, + PGOSampling)); // We add a module alias analysis pass here. In part due to bugs in the // analysis infrastructure this "works" in that the analysis stays alive Index: llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" @@ -71,6 +72,15 @@ "Value of 0 disables the large value profiling."), cl::init(8192)); +// The start and end values of precise value profile range for memory +// intrinsic sizes +cl::opt ProfileSampleRate( + "profile-sample-rate", + cl::desc("Set the sample rate for pgo instrumentation, in a format of " + ":. We will record the count for the first val1 " + "number count increments for every val2 number of increments."), + cl::init("100:100019")); + namespace { cl::opt DoNameCompression("enable-name-compression", @@ -142,6 +152,9 @@ cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true), cl::desc("Allow counter promotion across the whole loop nest.")); +static cl::opt ProfileSampling("profile-sampling", cl::ZeroOrMore, + cl::desc("Do PGO instrumentation sampling")); + class InstrProfilingLegacyPass : public ModulePass { InstrProfiling InstrProf; @@ -161,7 +174,9 @@ } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); + // Sampling instroduces CFG change. + if (!InstrProf.isSamplingEnabled()) + AU.setPreservesCFG(); AU.addRequired(); } }; @@ -399,22 +414,80 @@ return dyn_cast(Instr); } +// Enable instrumentation sampling. +// We transform: +// Increment_Instruction; +// Instructions_after; +// to: +// CountVar = CountVar + 1; +// if (CountVar <= SampleDuration) +// Increment_Instruction; +// else if CountVar >= WholeDuration) +// CountVar = 0; +// Instructions_after; +// CountVar is a thread-local global shared by all PGO intrumentation +// variables (value-instrumenation and edge instrumentation). +void InstrProfiling::doSampling(Instruction *I) { + if (!isSamplingEnabled()) + return; + auto *Int64Ty = Type::getInt64Ty(M->getContext()); + auto *CountVar = + M->getGlobalVariable(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR)); + assert(CountVar && "CountVar not set properly"); + UsedVars.push_back(CountVar); + IRBuilder<> Builder(I); + auto *LoadCountVar = Builder.CreateLoad(Int64Ty, CountVar); + auto *NewVal = Builder.CreateAdd(LoadCountVar, Builder.getInt64(1)); + Builder.CreateStore(NewVal, CountVar); + auto *DurationCond = + Builder.CreateICmpULE(NewVal, Builder.getInt64(SampleDuration)); + Instruction *ThenTerm, *ElseTerm; + uint64_t Scale = calculateCountScale(WholeDuration); + MDBuilder MDB(I->getContext()); + MDNode *BranchWeight = MDB.createBranchWeights( + scaleBranchCount(SampleDuration, Scale), + scaleBranchCount(WholeDuration - SampleDuration, Scale)); + SplitBlockAndInsertIfThenElse(DurationCond, I, &ThenTerm, &ElseTerm, + BranchWeight); + IRBuilder<> ElseBuilder(ElseTerm); + auto *RateCond = + ElseBuilder.CreateICmpUGE(NewVal, ElseBuilder.getInt64(WholeDuration)); + Scale = calculateCountScale(WholeDuration - SampleDuration); + MDNode *BranchWeight2 = MDB.createBranchWeights( + scaleBranchCount(1, Scale), + scaleBranchCount(WholeDuration - SampleDuration - 1, Scale)); + auto *ZBT = + SplitBlockAndInsertIfThen(RateCond, ElseTerm, false, BranchWeight2); + IRBuilder<> Builder2(ZBT); + Builder2.CreateStore(Builder2.getInt64(0), CountVar); + I->moveBefore(ThenTerm); +} + bool InstrProfiling::lowerIntrinsics(Function *F) { bool MadeChange = false; PromotionCandidates.clear(); + SmallVector Incs; + SmallVector Inds; for (BasicBlock &BB : *F) { for (auto I = BB.begin(), E = BB.end(); I != E;) { auto Instr = I++; InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr); - if (Inc) { - lowerIncrement(Inc); - MadeChange = true; - } else if (auto *Ind = dyn_cast(Instr)) { - lowerValueProfileInst(Ind); - MadeChange = true; - } + if (Inc) + Incs.push_back(Inc); + else if (auto *Ind = dyn_cast(Instr)) + Inds.push_back(Ind); } } + for (auto &I : Incs) { + doSampling(I); + lowerIncrement(I); + MadeChange = true; + } + for (auto &I : Inds) { + doSampling(I); + lowerValueProfileInst(I); + MadeChange = true; + } if (!MadeChange) return false; @@ -423,6 +496,12 @@ return true; } +bool InstrProfiling::isSamplingEnabled() const { + if (ProfileSampling.getNumOccurrences() > 0) + return ProfileSampling; + return Options.Sampling; +} + bool InstrProfiling::isCounterPromotionEnabled() const { if (DoCounterPromotion.getNumOccurrences() > 0) return DoCounterPromotion; @@ -491,11 +570,16 @@ UsedVars.clear(); getMemOPSizeRangeFromOption(MemOPSizeRange, MemOPSizeRangeStart, MemOPSizeRangeLast); + if (isSamplingEnabled()) + getProfileSampleRate(ProfileSampleRate, SampleDuration, WholeDuration); + TT = Triple(M.getTargetTriple()); // Emit the runtime hook even if no counters are present. bool MadeChange = emitRuntimeHook(); + if (!IsCS && isSamplingEnabled()) + createProfileSamplingVar(M); // Improve compile time by avoiding linear scans when there is no work. GlobalVariable *CoverageNamesVar = M.getNamedGlobal(getCoverageUnusedNamesVarName()); @@ -1035,3 +1119,22 @@ appendToGlobalCtors(*M, F, 0); } + +namespace llvm { +// Create the variable for profile sampling. +void createProfileSamplingVar(Module &M) { + const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR)); + Type *IntTy64 = Type::getInt64Ty(M.getContext()); + auto SamplingVar = new GlobalVariable( + M, IntTy64, false, GlobalValue::WeakAnyLinkage, + Constant::getIntegerValue(IntTy64, APInt(64, 0)), VarName); + SamplingVar->setVisibility(GlobalValue::DefaultVisibility); + SamplingVar->setThreadLocal(true); + Triple TT(M.getTargetTriple()); + if (TT.supportsCOMDAT()) { + SamplingVar->setLinkage(GlobalValue::ExternalLinkage); + SamplingVar->setComdat(M.getOrInsertComdat(VarName)); + } + appendToUsed(M, SamplingVar); +} +} Index: llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -449,8 +449,10 @@ StringRef getPassName() const override { return "PGOInstrumentationGenCreateVarPass"; } - PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") - : ModulePass(ID), InstrProfileOutput(CSInstrName) { + PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "", + bool Sampling = false) + : ModulePass(ID), InstrProfileOutput(CSInstrName), + ProfileSampling(Sampling) { initializePGOInstrumentationGenCreateVarLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -459,9 +461,12 @@ bool runOnModule(Module &M) override { createProfileFileNameVar(M, InstrProfileOutput); createIRLevelProfileFlagVar(M, true); + if (ProfileSampling) + createProfileSamplingVar(M); return false; } std::string InstrProfileOutput; + bool ProfileSampling; }; } // end anonymous namespace @@ -502,8 +507,9 @@ false) ModulePass * -llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { - return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName); +llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName, + bool Sampling) { + return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName, Sampling); } namespace { @@ -1559,6 +1565,8 @@ PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { createProfileFileNameVar(M, CSInstrName); createIRLevelProfileFlagVar(M, /* IsCS */ true); + if (ProfileSampling) + createProfileSamplingVar(M); return PreservedAnalyses::all(); } Index: llvm/test/Transforms/PGOProfile/Inputs/cspgo_bar_sample.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/Inputs/cspgo_bar_sample.ll @@ -0,0 +1,82 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__llvm_profile_filename = comdat any +$__llvm_profile_raw_version = comdat any +$__llvm_profile_sampling = comdat any + +@odd = common dso_local local_unnamed_addr global i32 0, align 4 +@even = common dso_local local_unnamed_addr global i32 0, align 4 +@__llvm_profile_filename = local_unnamed_addr constant [25 x i8] c"pass2/default_%m.profraw\00", comdat +@__llvm_profile_raw_version = local_unnamed_addr constant i64 216172782113783812, comdat +@__llvm_profile_sampling = thread_local global i64 0, comdat +@llvm.used = appending global [1 x i8*] [i8* bitcast (i64* @__llvm_profile_sampling to i8*)], section "llvm.metadata" + +define dso_local void @bar(i32 %n) !prof !30 { +entry: + %call = tail call fastcc i32 @cond(i32 %n) + %tobool = icmp eq i32 %call, 0 + br i1 %tobool, label %if.else, label %if.then, !prof !31 + +if.then: + %0 = load i32, i32* @odd, align 4, !tbaa !32 + %inc = add i32 %0, 1 + store i32 %inc, i32* @odd, align 4, !tbaa !32 + br label %if.end + +if.else: + %1 = load i32, i32* @even, align 4, !tbaa !32 + %inc1 = add i32 %1, 1 + store i32 %inc1, i32* @even, align 4, !tbaa !32 + br label %if.end + +if.end: + ret void +} + +define internal fastcc i32 @cond(i32 %i) #1 !prof !30 !PGOFuncName !36 { +entry: + %rem = srem i32 %i, 2 + ret i32 %rem +} + +attributes #1 = { inlinehint noinline } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"EnableSplitLTOUnit", i32 0} +!2 = !{i32 1, !"ProfileSummary", !3} +!3 = !{!4, !5, !6, !7, !8, !9, !10, !11} +!4 = !{!"ProfileFormat", !"InstrProf"} +!5 = !{!"TotalCount", i64 500002} +!6 = !{!"MaxCount", i64 200000} +!7 = !{!"MaxInternalCount", i64 100000} +!8 = !{!"MaxFunctionCount", i64 200000} +!9 = !{!"NumCounts", i64 6} +!10 = !{!"NumFunctions", i64 4} +!11 = !{!"DetailedSummary", !12} +!12 = !{!13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28} +!13 = !{i32 10000, i64 200000, i32 1} +!14 = !{i32 100000, i64 200000, i32 1} +!15 = !{i32 200000, i64 200000, i32 1} +!16 = !{i32 300000, i64 200000, i32 1} +!17 = !{i32 400000, i64 200000, i32 1} +!18 = !{i32 500000, i64 100000, i32 4} +!19 = !{i32 600000, i64 100000, i32 4} +!20 = !{i32 700000, i64 100000, i32 4} +!21 = !{i32 800000, i64 100000, i32 4} +!22 = !{i32 900000, i64 100000, i32 4} +!23 = !{i32 950000, i64 100000, i32 4} +!24 = !{i32 990000, i64 100000, i32 4} +!25 = !{i32 999000, i64 100000, i32 4} +!26 = !{i32 999900, i64 100000, i32 4} +!27 = !{i32 999990, i64 100000, i32 4} +!28 = !{i32 999999, i64 1, i32 6} +!30 = !{!"function_entry_count", i64 200000} +!31 = !{!"branch_weights", i32 100000, i32 100000} +!32 = !{!33, !33, i64 0} +!33 = !{!"int", !34, i64 0} +!34 = !{!"omnipotent char", !35, i64 0} +!35 = !{!"Simple C/C++ TBAA"} +!36 = !{!"cspgo_bar.c:cond"} Index: llvm/test/Transforms/PGOProfile/cspgo_sample.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/cspgo_sample.ll @@ -0,0 +1,111 @@ +; REQUIRES: x86-registered-target + +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %S/Inputs/cspgo_bar_sample.ll -o %t2.bc +; RUN: llvm-lto2 run -lto-cspgo-profile-file=alloc -enable-pgo-sampling -lto-cspgo-gen -save-temps -o %t %t1.bc %t2.bc \ +; RUN: -r=%t1.bc,foo,pl \ +; RUN: -r=%t1.bc,bar,l \ +; RUN: -r=%t1.bc,main,plx \ +; RUN: -r=%t1.bc,__llvm_profile_filename,plx \ +; RUN: -r=%t1.bc,__llvm_profile_raw_version,plx \ +; RUN: -r=%t1.bc,__llvm_profile_sampling,pl \ +; RUN: -r=%t2.bc,bar,pl \ +; RUN: -r=%t2.bc,odd,pl \ +; RUN: -r=%t2.bc,even,pl \ +; RUN: -r=%t2.bc,__llvm_profile_filename,x \ +; RUN: -r=%t2.bc,__llvm_profile_raw_version,x \ +; RUN: -r=%t2.bc,__llvm_profile_sampling, +; RUN: llvm-dis %t.1.4.opt.bc -o - | FileCheck %s --check-prefix=CSGEN + +; CSGEN: @__llvm_profile_sampling = thread_local global i64 0, comdat +; CSGEN: @__profc_ +; CSGEN: @__profd_ + +source_filename = "cspgo.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__llvm_profile_filename = comdat any +$__llvm_profile_raw_version = comdat any +$__llvm_profile_sampling = comdat any +@__llvm_profile_filename = local_unnamed_addr constant [25 x i8] c"pass2/default_%m.profraw\00", comdat +@__llvm_profile_raw_version = local_unnamed_addr constant i64 216172782113783812, comdat +@__llvm_profile_sampling = thread_local global i64 0, comdat +@llvm.used = appending global [1 x i8*] [i8* bitcast (i64* @__llvm_profile_sampling to i8*)], section "llvm.metadata" + +define dso_local void @foo() #0 !prof !30 { +entry: + br label %for.body + +for.body: + %i.06 = phi i32 [ 0, %entry ], [ %add1, %for.body ] + tail call void @bar(i32 %i.06) #3 + %add = or i32 %i.06, 1 + tail call void @bar(i32 %add) #3 + %add1 = add nuw nsw i32 %i.06, 2 + %cmp = icmp ult i32 %add1, 200000 + br i1 %cmp, label %for.body, label %for.end, !prof !31 + +for.end: + ret void +} + +; CSGEN: entry: +; CSGEN: [[TMP0:%.*]] = load i64, i64* @__llvm_profile_sampling, align 8 +; CSGEN: [[TMP1:%.*]] = add i64 [[TMP0]], 1 +; CSGEN: store i64 [[TMP1]], i64* @__llvm_profile_sampling, align 8 +; CSGEN: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 101 +; CSGEN: br i1 [[TMP2]], label %{{.*}}, label %[[LABEL1:.*]], !prof [[PROF1:![0-9]+]] +; CSGEN: [[LABEL1]]: +; CSGEN: [[TMP6:%.*]] = icmp ugt i64 %1, 100018 +; CSGEN: br i1 [[TMP6]], label %[[LABEL2:.*]], label %{{.*}}, !prof [[PROF2:![0-9]+]] +; CSGEN: [[LABEL1]]: +; CSGEN: store i64 0, i64* @__llvm_profile_sampling, align 8 + +declare dso_local void @bar(i32) + +define dso_local i32 @main() !prof !30 { +entry: + tail call void @foo() + ret i32 0 +} + +attributes #0 = { "target-cpu"="x86-64" } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"EnableSplitLTOUnit", i32 0} +!2 = !{i32 1, !"ProfileSummary", !3} +!3 = !{!4, !5, !6, !7, !8, !9, !10, !11} +!4 = !{!"ProfileFormat", !"InstrProf"} +!5 = !{!"TotalCount", i64 500002} +!6 = !{!"MaxCount", i64 200000} +!7 = !{!"MaxInternalCount", i64 100000} +!8 = !{!"MaxFunctionCount", i64 200000} +!9 = !{!"NumCounts", i64 6} +!10 = !{!"NumFunctions", i64 4} +!11 = !{!"DetailedSummary", !12} +!12 = !{!13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28} +!13 = !{i32 10000, i64 200000, i32 1} +!14 = !{i32 100000, i64 200000, i32 1} +!15 = !{i32 200000, i64 200000, i32 1} +!16 = !{i32 300000, i64 200000, i32 1} +!17 = !{i32 400000, i64 200000, i32 1} +!18 = !{i32 500000, i64 100000, i32 4} +!19 = !{i32 600000, i64 100000, i32 4} +!20 = !{i32 700000, i64 100000, i32 4} +!21 = !{i32 800000, i64 100000, i32 4} +!22 = !{i32 900000, i64 100000, i32 4} +!23 = !{i32 950000, i64 100000, i32 4} +!24 = !{i32 990000, i64 100000, i32 4} +!25 = !{i32 999000, i64 100000, i32 4} +!26 = !{i32 999900, i64 100000, i32 4} +!27 = !{i32 999990, i64 100000, i32 4} +!28 = !{i32 999999, i64 1, i32 6} +!30 = !{!"function_entry_count", i64 1} +!31 = !{!"branch_weights", i32 100000, i32 1} + +; CSGEN: [[PROF1]] = !{!"branch_weights", i32 100, i32 99919} +; CSGEN: [[PROF2]] = !{!"branch_weights", i32 1, i32 99918} + Index: llvm/test/Transforms/PGOProfile/instrprof_sample.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/instrprof_sample.ll @@ -0,0 +1,79 @@ +; RUN: opt < %s -instrprof -profile-sampling -S | FileCheck %s --check-prefixes=SAMPLE-VAR,DEFAULT-SAMPLE-RATE +; RUN: opt < %s -passes=instrprof -profile-sampling -S | FileCheck %s --check-prefixes=SAMPLE-VAR,DEFAULT-SAMPLE-RATE +; RUN: opt < %s -instrprof -profile-sampling -profile-sample-rate=50:100 -S | FileCheck %s --check-prefixes=SAMPLE-VAR,SAMPLE-RATE-50-100 +; RUN: opt < %s -passes=instrprof -profile-sampling -S -profile-sample-rate=50:100 | FileCheck %s --check-prefixes=SAMPLE-VAR,SAMPLE-RATE-50-100 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__llvm_profile_raw_version = comdat any + +; SAMPLE-VAR: $__llvm_profile_sampling = comdat any + +@__llvm_profile_raw_version = constant i64 72057594037927940, comdat +@__profn_f = private constant [1 x i8] c"f" + +; SAMPLE-VAR: @__llvm_profile_sampling = thread_local global i64 0, comdat +; SAMPLE-VAR: @__profc_f = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8 +; SAMPLE-VAR: @__profd_f = private global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 -3706093650706652785, i64 12884901887, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_f, i32 0, i32 0), i8* bitcast (void ()* @f to i8*), i8* null, i32 1, [2 x i16] zeroinitializer }, section "__llvm_prf_data", align 8 +; SAMPLE-VAR: @__llvm_prf_nm = private constant [11 x i8] c"\01\09x\DAK\03\00\00g\00g", section "__llvm_prf_names", align 1 +; SAMPLE-VAR: @llvm.used = appending global [3 x i8*] [i8* bitcast (i64* @__llvm_profile_sampling to i8*), i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_f to i8*), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__llvm_prf_nm, i32 0, i32 0)], section "llvm.metadata" + +define void @f() { +; DEFAULT-SAMPLE-RATE-LABEL: @f( +; DEFAULT-SAMPLE-RATE-NEXT: entry: +; DEFAULT-SAMPLE-RATE-NEXT: [[TMP0:%.*]] = load i64, i64* @__llvm_profile_sampling +; DEFAULT-SAMPLE-RATE-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 1 +; DEFAULT-SAMPLE-RATE-NEXT: store i64 [[TMP1]], i64* @__llvm_profile_sampling +; DEFAULT-SAMPLE-RATE-NEXT: [[TMP2:%.*]] = icmp ule i64 [[TMP1]], 100 +; DEFAULT-SAMPLE-RATE-NEXT: br i1 [[TMP2]], label %[[TMP3:.*]], label %[[TMP5:.*]], !prof !0 +; DEFAULT-SAMPLE-RATE: [[TMP3]]: +; DEFAULT-SAMPLE-RATE-NEXT: [[PGOCOUNT:%.*]] = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_f, i64 0, i64 0) +; DEFAULT-SAMPLE-RATE-NEXT: [[TMP4:%.*]] = add i64 [[PGOCOUNT]], 1 +; DEFAULT-SAMPLE-RATE-NEXT: store i64 [[TMP4]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_f, i64 0, i64 0) +; DEFAULT-SAMPLE-RATE-NEXT: br label [[TMP9:%.*]] +; DEFAULT-SAMPLE-RATE: [[TMP5]]: +; DEFAULT-SAMPLE-RATE-NEXT: [[TMP6:%.*]] = icmp uge i64 [[TMP1]], 100019 +; DEFAULT-SAMPLE-RATE-NEXT: br i1 [[TMP6]], label %[[TMP7:.*]], label [[TMP8:%.*]], !prof !1 +; DEFAULT-SAMPLE-RATE: [[TMP7]]: +; DEFAULT-SAMPLE-RATE-NEXT: store i64 0, i64* @__llvm_profile_sampling +; DEFAULT-SAMPLE-RATE-NEXT: br label %[[TMP8:.*]] +; DEFAULT-SAMPLE-RATE: [[TMP8]]: +; DEFAULT-SAMPLE-RATE-NEXT: br label %[[TMP9:.*]] +; DEFAULT-SAMPLE-RATE: [[TMP9]]: +; DEFAULT-SAMPLE-RATE-NEXT: ret void +; +; SAMPLE-RATE-50-100-LABEL: @f( +; SAMPLE-RATE-50-100-NEXT: entry: +; SAMPLE-RATE-50-100-NEXT: [[TMP0:%.*]] = load i64, i64* @__llvm_profile_sampling +; SAMPLE-RATE-50-100-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 1 +; SAMPLE-RATE-50-100-NEXT: store i64 [[TMP1]], i64* @__llvm_profile_sampling +; SAMPLE-RATE-50-100-NEXT: [[TMP2:%.*]] = icmp ule i64 [[TMP1]], 50 +; SAMPLE-RATE-50-100-NEXT: br i1 [[TMP2]], label %[[TMP3:.*]], label %[[TMP5:.*]], !prof !0 +; SAMPLE-RATE-50-100: [[TMP3]]: +; SAMPLE-RATE-50-100-NEXT: [[PGOCOUNT:%.*]] = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_f, i64 0, i64 0) +; SAMPLE-RATE-50-100-NEXT: [[TMP4:%.*]] = add i64 [[PGOCOUNT]], 1 +; SAMPLE-RATE-50-100-NEXT: store i64 [[TMP4]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_f, i64 0, i64 0) +; SAMPLE-RATE-50-100-NEXT: br label %[[TMP9:.*]] +; SAMPLE-RATE-50-100: [[TMP5]]: +; SAMPLE-RATE-50-100-NEXT: [[TMP6:%.*]] = icmp uge i64 [[TMP1]], 100 +; SAMPLE-RATE-50-100-NEXT: br i1 [[TMP6]], label %[[TMP7:.*]], label %[[TMP8:.*]], !prof !1 +; SAMPLE-RATE-50-100: [[TMP7]]: +; SAMPLE-RATE-50-100-NEXT: store i64 0, i64* @__llvm_profile_sampling +; SAMPLE-RATE-50-100-NEXT: br label %[[TMP8:.*]] +; SAMPLE-RATE-50-100: [[TMP8]]: +; SAMPLE-RATE-50-100-NEXT: br label %[[TMP9:.*]] +; SAMPLE-RATE-50-100: [[TMP9]]: +; SAMPLE-RATE-50-100-NEXT: ret void +; +entry: + call void @llvm.instrprof.increment(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profn_f, i32 0, i32 0), i64 12884901887, i32 1, i32 0) + ret void +} + +; DEFAULT-SAMPLE-RATE: !0 = !{!"branch_weights", i32 100, i32 99919} +; DEFAULT-SAMPLE-RATE: !1 = !{!"branch_weights", i32 1, i32 99918} +; SAMPLE-RATE-50-100: !0 = !{!"branch_weights", i32 50, i32 50} +; SAMPLE-RATE-50-100: !1 = !{!"branch_weights", i32 1, i32 49} + +declare void @llvm.instrprof.increment(i8*, i64, i32, i32)