Index: llvm/include/llvm/ProfileData/InstrProf.h =================================================================== --- llvm/include/llvm/ProfileData/InstrProf.h +++ llvm/include/llvm/ProfileData/InstrProf.h @@ -679,7 +679,8 @@ void merge(InstrProfValueSiteRecord &Input, uint64_t Weight, function_ref Warn); /// Scale up value profile data counts. - void scale(uint64_t Weight, function_ref Warn); + void scale(uint64_t Norm, uint64_t DeNorm, + function_ref Warn); /// Compute the overlap b/w this record and Input record. void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind, @@ -753,8 +754,9 @@ function_ref Warn); /// Scale up profile counts (including value profile data) by - /// \p Weight. - void scale(uint64_t Weight, function_ref Warn); + /// \p Norm then divide the counts by DeNorm. + void scale(uint64_t Norm, uint64_t DeNorm, + function_ref Warn); /// Sort value profile data (per site) by count. void sortValueData() { @@ -840,7 +842,7 @@ function_ref Warn); // Scale up value profile data count. - void scaleValueProfData(uint32_t ValueKind, uint64_t Weight, + void scaleValueProfData(uint32_t ValueKind, uint64_t Norm, uint64_t DeNorm, function_ref Warn); }; Index: llvm/include/llvm/ProfileData/InstrProfWriter.h =================================================================== --- llvm/include/llvm/ProfileData/InstrProfWriter.h +++ llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -47,6 +47,8 @@ InstrProfWriter(bool Sparse = false); ~InstrProfWriter(); + StringMap &getProfileData() { return FunctionData; } + /// Add function counts for the given function. If there are already counts /// for this function and the hash and number of counts match, each counter is /// summed. Optionally scale counts by \p Weight. Index: llvm/include/llvm/ProfileData/ProfileCommon.h =================================================================== --- llvm/include/llvm/ProfileData/ProfileCommon.h +++ llvm/include/llvm/ProfileData/ProfileCommon.h @@ -62,6 +62,10 @@ public: /// A vector of useful cutoff values for detailed summary. static const ArrayRef DefaultCutoffs; + + /// Find the summary entry for a desired percentile of counts. + static const ProfileSummaryEntry & + getEntryForPercentile(SummaryEntryVector &DS, uint64_t Percentile); }; class InstrProfSummaryBuilder final : public ProfileSummaryBuilder { Index: llvm/lib/Analysis/ProfileSummaryInfo.cpp =================================================================== --- llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/InitializePasses.h" +#include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -70,19 +71,6 @@ "partial-profile", cl::Hidden, cl::init(false), cl::desc("Specify the current profile is used as a partial profile.")); -// Find the summary entry for a desired percentile of counts. -static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS, - uint64_t Percentile) { - auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) { - return Entry.Cutoff < Percentile; - }); - // The required percentile has to be <= one of the percentiles in the - // detailed summary. - if (It == DS.end()) - report_fatal_error("Desired percentile exceeds the maximum cutoff"); - return *It; -} - // The profile summary metadata may be attached either by the frontend or by // any backend passes (IR level instrumentation, for example). This method // checks if the Summary is null and if so checks if the summary metadata is now @@ -270,13 +258,13 @@ if (!computeSummary()) return; auto &DetailedSummary = Summary->getDetailedSummary(); - auto &HotEntry = - getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffHot); + auto &HotEntry = ProfileSummaryBuilder::getEntryForPercentile( + DetailedSummary, ProfileSummaryCutoffHot); HotCountThreshold = HotEntry.MinCount; if (ProfileSummaryHotCount.getNumOccurrences() > 0) HotCountThreshold = ProfileSummaryHotCount; - auto &ColdEntry = - getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffCold); + auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile( + DetailedSummary, ProfileSummaryCutoffCold); ColdCountThreshold = ColdEntry.MinCount; if (ProfileSummaryColdCount.getNumOccurrences() > 0) ColdCountThreshold = ProfileSummaryColdCount; @@ -296,8 +284,8 @@ return iter->second; } auto &DetailedSummary = Summary->getDetailedSummary(); - auto &Entry = - getEntryForPercentile(DetailedSummary, PercentileCutoff); + auto &Entry = ProfileSummaryBuilder::getEntryForPercentile(DetailedSummary, + PercentileCutoff); uint64_t CountThreshold = Entry.MinCount; ThresholdCache[PercentileCutoff] = CountThreshold; return CountThreshold; Index: llvm/lib/ProfileData/InstrProf.cpp =================================================================== --- llvm/lib/ProfileData/InstrProf.cpp +++ llvm/lib/ProfileData/InstrProf.cpp @@ -625,11 +625,11 @@ } } -void InstrProfValueSiteRecord::scale(uint64_t Weight, +void InstrProfValueSiteRecord::scale(uint64_t Norm, uint64_t DeNorm, function_ref Warn) { for (auto I = ValueData.begin(), IE = ValueData.end(); I != IE; ++I) { bool Overflowed; - I->Count = SaturatingMultiply(I->Count, Weight, &Overflowed); + I->Count = SaturatingMultiply(I->Count, Norm, &Overflowed) / DeNorm; if (Overflowed) Warn(instrprof_error::counter_overflow); } @@ -678,22 +678,23 @@ } void InstrProfRecord::scaleValueProfData( - uint32_t ValueKind, uint64_t Weight, + uint32_t ValueKind, uint64_t Norm, uint64_t DeNorm, function_ref Warn) { for (auto &R : getValueSitesForKind(ValueKind)) - R.scale(Weight, Warn); + R.scale(Norm, DeNorm, Warn); } -void InstrProfRecord::scale(uint64_t Weight, +void InstrProfRecord::scale(uint64_t Norm, uint64_t DeNorm, function_ref Warn) { + assert(DeNorm != 0 && "DeNorm cannot be 0"); for (auto &Count : this->Counts) { bool Overflowed; - Count = SaturatingMultiply(Count, Weight, &Overflowed); + Count = SaturatingMultiply(Count, Norm, &Overflowed) / DeNorm; if (Overflowed) Warn(instrprof_error::counter_overflow); } for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) - scaleValueProfData(Kind, Weight, Warn); + scaleValueProfData(Kind, Norm, DeNorm, Warn); } // Map indirect call target name hash to name string. Index: llvm/lib/ProfileData/InstrProfWriter.cpp =================================================================== --- llvm/lib/ProfileData/InstrProfWriter.cpp +++ llvm/lib/ProfileData/InstrProfWriter.cpp @@ -240,7 +240,7 @@ // We've never seen a function with this name and hash, add it. Dest = std::move(I); if (Weight > 1) - Dest.scale(Weight, MapWarn); + Dest.scale(Weight, 1, MapWarn); } else { // We're updating a function we've seen before. Dest.merge(I, Weight, MapWarn); Index: llvm/lib/ProfileData/ProfileSummaryBuilder.cpp =================================================================== --- llvm/lib/ProfileData/ProfileSummaryBuilder.cpp +++ llvm/lib/ProfileData/ProfileSummaryBuilder.cpp @@ -31,6 +31,19 @@ const ArrayRef ProfileSummaryBuilder::DefaultCutoffs = DefaultCutoffsData; +const ProfileSummaryEntry & +ProfileSummaryBuilder::getEntryForPercentile(SummaryEntryVector &DS, + uint64_t Percentile) { + auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) { + return Entry.Cutoff < Percentile; + }); + // The required percentile has to be <= one of the percentiles in the + // detailed summary. + if (It == DS.end()) + report_fatal_error("Desired percentile exceeds the maximum cutoff"); + return *It; +} + void InstrProfSummaryBuilder::addRecord(const InstrProfRecord &R) { // The first counter is not necessarily an entry count for IR // instrumentation profiles. Index: llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -249,6 +249,14 @@ "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation")); +// Insert counter in function entry block. +static cl::opt InsertEntryBlockCounter( + "insert-entry-block-counter", cl::init(false), cl::Hidden, + cl::desc("Always insert a counter in function entry " + "block. That is useful for using sample " + "profile as a supplement for instr " + "profile. ")); + // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts extern cl::opt PGOViewCounts; @@ -717,6 +725,14 @@ } } +// There are basic blocks (such as catchswitch) cannot be instrumented. +// If the returned first insertion point is the end of BB, skip this BB. +static BasicBlock *canInstrument(BasicBlock *BB) { + if (BB->getFirstInsertionPt() == BB->end()) + return nullptr; + return BB; +} + // Collect all the BBs that will be instruments and return them in // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. template @@ -728,9 +744,13 @@ for (auto &E : MST.AllEdges) EdgeList.push_back(E.get()); + BasicBlock *Entry = &*F.begin(); + if (InsertEntryBlockCounter && canInstrument(Entry)) + InstrumentBBs.push_back(Entry); + for (auto &E : EdgeList) { BasicBlock *InstrBB = getInstrBB(E); - if (InstrBB) + if (InstrBB && (!InsertEntryBlockCounter || InstrBB != Entry)) InstrumentBBs.push_back(InstrBB); } Index: llvm/test/Instrumentation/InstrProfiling/instr-entry-block.ll =================================================================== --- /dev/null +++ llvm/test/Instrumentation/InstrProfiling/instr-entry-block.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -mtriple=x86_64-unknown-linux -pgo-instr-gen -insert-entry-block-counter -S | FileCheck %s +; Make sure we have a counter in entry block when -insert-entry-block-counter +; is used. + +define dso_local void @foo(i1 zeroext %cond) { +entry: +; CHECK-LABEL: @foo( +; CHECK: call void @llvm.instrprof.increment( +; CHECK-NEXT: br i1 %cond, label %if.then, label %if.else + br i1 %cond, label %if.then, label %if.else + +if.then: +; CHECK: call void @llvm.instrprof.increment( +; CHECK-NEXT: ret void + ret void + +if.else: +; CHECK: call void @llvm.instrprof.increment( +; CHECK-NEXT: ret void + ret void +} + +define dso_local void @goo(i1 zeroext %cond) { +entry: +; CHECK-LABEL: @goo( +; CHECK: call void @llvm.instrprof.increment( +; CHECK-NEXT: br i1 %cond, label %if.then, label %if.else + br i1 %cond, label %if.then, label %if.else + +if.then: +; CHECK: call void @llvm.instrprof.increment( +; CHECK-NEXT: br label %if.else +; CHECK-NOT: call void @llvm.instrprof.increment( + br label %if.else + +if.else: + ret void +} Index: llvm/test/tools/llvm-profdata/Inputs/mix_instr.proftext =================================================================== --- /dev/null +++ llvm/test/tools/llvm-profdata/Inputs/mix_instr.proftext @@ -0,0 +1,15 @@ +:ir +foo +7 +4 +2 +3 +9 +4 + +goo +5 +3 +0 +0 +0 Index: llvm/test/tools/llvm-profdata/Inputs/mix_sample.proftext =================================================================== --- /dev/null +++ llvm/test/tools/llvm-profdata/Inputs/mix_sample.proftext @@ -0,0 +1,15 @@ +foo:2000:2000 + 1: 2000 +goo:3000:1500 + 1: 1200 + 2: 800 + 3: 1000 +hoo:50:1 + 1: 1 + 2: 2 + 3: 3 + 4: 4 + 5: 5 + 6: 6 + 7: 7 + 8: 8 Index: llvm/test/tools/llvm-profdata/suppl-instr-with-sample.test =================================================================== --- /dev/null +++ llvm/test/tools/llvm-profdata/suppl-instr-with-sample.test @@ -0,0 +1,34 @@ +Some basic tests for supplementing instrumentation profile with sample profile. + +RUN: llvm-profdata merge -mix-instr-sample-profiles \ +RUN: -early-inline-size-threshold=0 \ +RUN: %p/Inputs/mix_instr.proftext \ +RUN: %p/Inputs/mix_sample.proftext -o %t +RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX1 + +MIX1: foo: +MIX1-NEXT: Hash: 0x0000000000000007 +MIX1-NEXT: Counters: 4 +MIX1-NEXT: Block counts: [2000, 3000, 9000, 4000] +MIX1: goo: +MIX1-NEXT: Hash: 0x0000000000000005 +MIX1-NEXT: Counters: 3 +MIX1-NEXT: Block counts: [1500, 0, 0] + +Some basic tests for supplementing instrumentation profile with sample profile. + +RUN: llvm-profdata merge -mix-instr-sample-profiles \ +RUN: -early-inline-size-threshold=0 \ +RUN: -weighted-input=2,%p/Inputs/mix_instr.proftext \ +RUN: -weighted-input=3,%p/Inputs/mix_sample.proftext -o %t +RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX2 + +MIX2: foo: +MIX2-NEXT: Hash: 0x0000000000000007 +MIX2-NEXT: Counters: 4 +MIX2-NEXT: Block counts: [3000, 4500, 13500, 6000] +MIX2: goo: +MIX2-NEXT: Hash: 0x0000000000000005 +MIX2-NEXT: Counters: 3 +MIX2-NEXT: Block counts: [2250, 0, 0] + Index: llvm/tools/llvm-profdata/llvm-profdata.cpp =================================================================== --- llvm/tools/llvm-profdata/llvm-profdata.cpp +++ llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -290,6 +290,22 @@ }); } +static void writeInstrProfile(StringRef OutputFilename, + ProfileFormat OutputFormat, + InstrProfWriter &Writer) { + std::error_code EC; + raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::OF_None); + if (EC) + exitWithErrorCode(EC, OutputFilename); + + if (OutputFormat == PF_Text) { + if (Error E = Writer.writeText(Output)) + exitWithError(std::move(E)); + } else { + Writer.write(Output); + } +} + static void mergeInstrProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, @@ -365,18 +381,144 @@ (NumErrors > 0 && FailMode == failIfAnyAreInvalid)) exitWithError("No profiles could be merged."); - std::error_code EC; - raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::OF_None); - if (EC) - exitWithErrorCode(EC, OutputFilename); + writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer); +} - InstrProfWriter &Writer = Contexts[0]->Writer; - if (OutputFormat == PF_Text) { - if (Error E = Writer.writeText(Output)) - exitWithError(std::move(E)); - } else { - Writer.write(Output); +static bool tryReadInstrProf(std::string Filename, + std::unique_ptr &WC, + bool OutputSparse) { + std::mutex ErrorLock; + SmallSet WriterErrorCodes; + + // Initialize the writer contexts. + WC = std::make_unique(OutputSparse, ErrorLock, + WriterErrorCodes); + + loadInput({Filename, 1}, nullptr, WC.get()); + if (WC->Errors.size() > 0) + exitWithError(std::move(WC->Errors[0].first), Filename); + return true; +} + +static bool +tryReadSampleProf(std::string Filename, + std::unique_ptr &Reader, + bool ExitIfErr) { + LLVMContext Context; + auto ReaderOrErr = sampleprof::SampleProfileReader::create(Filename, Context); + if (std::error_code EC = ReaderOrErr.getError()) { + if (ExitIfErr) + exitWithErrorCode(EC, Filename); + return false; + } + + Reader = std::move(ReaderOrErr.get()); + if (std::error_code EC = Reader->read()) { + if (ExitIfErr) + exitWithErrorCode(EC, Filename); + return false; + } + return true; +} + +// The profile entry for a function in instrumentation profile. +struct InstrProfileEntry { + uint64_t EntryCount; + InstrProfRecord *ProfRecord; +}; + +static void updateInstrProfileEntry(InstrProfileEntry &IFE, + uint64_t SampleEntryCount, + double ScaleFactor) { + uint64_t InstrEntryCount = IFE.EntryCount; + InstrProfRecord *ProfRecord = IFE.ProfRecord; + if (!InstrEntryCount) { + // If the function is never executed in instrumentation profile, + // adjust its entry count using sample profile and leave other + // counters as 0. + ProfRecord->Counts[0] = SampleEntryCount * ScaleFactor; + return; } + // Scale up all the counters in the function equally. + uint64_t Norm = (uint64_t)(SampleEntryCount * ScaleFactor); + uint64_t DeNorm = InstrEntryCount; + // Don't scale down the Instr profile. + if (Norm <= DeNorm) + return; + ProfRecord->scale(Norm, DeNorm, [&](instrprof_error E) { + warn(toString(make_error(E))); + }); +} + +const uint64_t ColdPercentileIdx = 15; + +static void +findFuncProfilesToFix(std::unique_ptr &WC, + std::unique_ptr &Reader, + double ScaleFactor, unsigned EarlyInlineSizeThreshold) { + StringMap InstrProfileMap; + InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs); + for (auto &PD : WC->Writer.getProfileData()) { + for (const auto &PDV : PD.getValue()) { + InstrProfRecord Record = PDV.second; + IPBuilder.addRecord(Record); + } + if (PD.getValue().size() == 1) { + InstrProfRecord *R = &PD.getValue().begin()->second; + InstrProfileMap[PD.getKey()] = {R->Counts[0], R}; + } + } + ProfileSummary InstrPS = *IPBuilder.getSummary(); + ProfileSummary SamplePS = Reader->getSummary(); + + uint64_t ColdSampleThreshold = + ProfileSummaryBuilder::getEntryForPercentile( + SamplePS.getDetailedSummary(), + ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx]) + .MinCount; + uint64_t ColdInstrThreshold = + ProfileSummaryBuilder::getEntryForPercentile( + InstrPS.getDetailedSummary(), + ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx]) + .MinCount; + for (const auto &PD : Reader->getProfiles()) { + StringRef FName = PD.getKey(); + const sampleprof::FunctionSamples &FS = PD.getValue(); + auto It = InstrProfileMap.find(FName); + // Find a hot/warm entry in sample profile which is cold in instr profile. + if (FS.getHeadSamples() > ColdSampleThreshold && + It != InstrProfileMap.end() && + It->second.EntryCount <= ColdInstrThreshold && + FS.getBodySamples().size() >= EarlyInlineSizeThreshold) { + updateInstrProfileEntry(It->second, FS.getHeadSamples(), ScaleFactor); + } + } +} + +static void mergeMixedProfile(const WeightedFileVector &Inputs, + StringRef OutputFilename, + ProfileFormat OutputFormat, bool OutputSparse, + unsigned EarlyInlineSizeThreshold) { + if (OutputFilename.compare("-") == 0) + exitWithError("Cannot write indexed profdata format to stdout."); + if (Inputs.size() != 2) + exitWithError("Expect two inputs when merging profiles in mixed mode."); + + std::unique_ptr Reader; + std::unique_ptr WC; + // Make sure Inputs[i] is sample profile and Inputs[i - 1] is + // instrumentation profile. + int i = 1; + if (!tryReadSampleProf(Inputs[i].Filename, Reader, false)) + i = 0; + + tryReadSampleProf(Inputs[i].Filename, Reader, true); + tryReadInstrProf(Inputs[1 - i].Filename, WC, OutputSparse); + + findFuncProfilesToFix(WC, Reader, + Inputs[i].Weight / (double)Inputs[1 - i].Weight, + EarlyInlineSizeThreshold); + writeInstrProfile(OutputFilename, OutputFormat, WC->Writer); } /// Make a copy of the given function samples with all symbol names remapped @@ -673,6 +815,14 @@ cl::opt GenPartialProfile( "gen-partial-profile", cl::init(false), cl::Hidden, cl::desc("Generate a partial profile (only meaningful for -extbinary)")); + cl::opt MixInstrSampleProfiles( + "mix-instr-sample-profiles", cl::init(false), cl::Hidden, + cl::desc("Supplement an instrumentation profile with sample profile, and " + "output in instrumentation format (only works with -instr)")); + cl::opt EarlyInlineSizeThreshold( + "early-inline-size-threshold", cl::init(10), cl::Hidden, + cl::desc("If a function can be inlined by PGO early inliner, don't " + "scale it up using sample profile. ")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -701,6 +851,15 @@ if (!RemappingFile.empty()) Remapper = SymbolRemapper::create(RemappingFile); + if (MixInstrSampleProfiles) { + if (ProfileKind != instr) + exitWithError("-mix-instr-sample-profiles can only work with -instr. "); + + mergeMixedProfile(WeightedInputs, OutputFilename, OutputFormat, + OutputSparse, EarlyInlineSizeThreshold); + return 0; + } + if (ProfileKind == instr) mergeInstrProfile(WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, OutputSparse, NumThreads, FailureMode);