Index: llvm/docs/CommandGuide/llvm-profdata.rst =================================================================== --- llvm/docs/CommandGuide/llvm-profdata.rst +++ llvm/docs/CommandGuide/llvm-profdata.rst @@ -161,6 +161,19 @@ coverage for the optimized target. This option can only be used with sample-based profile in extbinary format. +.. option:: -supplement-instr-with-sample=[true|false] + + Supplement an instrumentation profile with sample profile, and output in + instrumentation format (only works with -instr). + +.. option:: -base-scale-function=function_name + When supplementing an instrumentation profile with sample profile, use the + entry count of the given function to compute the ScaleFactor. + +.. option:: -early-inline-size-threshold=threshold_number + If a function is smaller than the threshold, assume it can be inlined by + PGO early inliner and don't scale it up using sample profile. + EXAMPLES ^^^^^^^^ Basic Usage Index: llvm/include/llvm/ProfileData/InstrProf.h =================================================================== --- llvm/include/llvm/ProfileData/InstrProf.h +++ llvm/include/llvm/ProfileData/InstrProf.h @@ -678,8 +678,8 @@ /// Optionally scale merged counts by \p Weight. void merge(InstrProfValueSiteRecord &Input, uint64_t Weight, function_ref Warn); - /// Scale up value profile data counts. - void scale(uint64_t Weight, function_ref Warn); + /// Scale up value profile data counts by N (Numerator) / D (Denominator). + void scale(uint64_t N, uint64_t D, function_ref Warn); /// Compute the overlap b/w this record and Input record. void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind, @@ -753,8 +753,8 @@ function_ref Warn); /// Scale up profile counts (including value profile data) by - /// \p Weight. - void scale(uint64_t Weight, function_ref Warn); + /// a factor of (N / D). + void scale(uint64_t N, uint64_t D, function_ref Warn); /// Sort value profile data (per site) by count. void sortValueData() { @@ -839,8 +839,8 @@ uint64_t Weight, function_ref Warn); - // Scale up value profile data count. - void scaleValueProfData(uint32_t ValueKind, uint64_t Weight, + // Scale up value profile data count by N (Numerator) / D (Denominator). + void scaleValueProfData(uint32_t ValueKind, uint64_t N, uint64_t D, function_ref Warn); }; Index: llvm/include/llvm/ProfileData/InstrProfWriter.h =================================================================== --- llvm/include/llvm/ProfileData/InstrProfWriter.h +++ llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -47,6 +47,8 @@ InstrProfWriter(bool Sparse = false); ~InstrProfWriter(); + StringMap &getProfileData() { return FunctionData; } + /// Add function counts for the given function. If there are already counts /// for this function and the hash and number of counts match, each counter is /// summed. Optionally scale counts by \p Weight. Index: llvm/lib/ProfileData/InstrProf.cpp =================================================================== --- llvm/lib/ProfileData/InstrProf.cpp +++ llvm/lib/ProfileData/InstrProf.cpp @@ -625,11 +625,11 @@ } } -void InstrProfValueSiteRecord::scale(uint64_t Weight, +void InstrProfValueSiteRecord::scale(uint64_t N, uint64_t D, function_ref Warn) { for (auto I = ValueData.begin(), IE = ValueData.end(); I != IE; ++I) { bool Overflowed; - I->Count = SaturatingMultiply(I->Count, Weight, &Overflowed); + I->Count = SaturatingMultiply(I->Count, N, &Overflowed) / D; if (Overflowed) Warn(instrprof_error::counter_overflow); } @@ -678,22 +678,23 @@ } void InstrProfRecord::scaleValueProfData( - uint32_t ValueKind, uint64_t Weight, + uint32_t ValueKind, uint64_t N, uint64_t D, function_ref Warn) { for (auto &R : getValueSitesForKind(ValueKind)) - R.scale(Weight, Warn); + R.scale(N, D, Warn); } -void InstrProfRecord::scale(uint64_t Weight, +void InstrProfRecord::scale(uint64_t N, uint64_t D, function_ref Warn) { + assert(D != 0 && "D cannot be 0"); for (auto &Count : this->Counts) { bool Overflowed; - Count = SaturatingMultiply(Count, Weight, &Overflowed); + Count = SaturatingMultiply(Count, N, &Overflowed) / D; if (Overflowed) Warn(instrprof_error::counter_overflow); } for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) - scaleValueProfData(Kind, Weight, Warn); + scaleValueProfData(Kind, N, D, Warn); } // Map indirect call target name hash to name string. Index: llvm/lib/ProfileData/InstrProfWriter.cpp =================================================================== --- llvm/lib/ProfileData/InstrProfWriter.cpp +++ llvm/lib/ProfileData/InstrProfWriter.cpp @@ -240,7 +240,7 @@ // We've never seen a function with this name and hash, add it. Dest = std::move(I); if (Weight > 1) - Dest.scale(Weight, MapWarn); + Dest.scale(Weight, 1, MapWarn); } else { // We're updating a function we've seen before. Dest.merge(I, Weight, MapWarn); Index: llvm/test/tools/llvm-profdata/Inputs/mix_instr.proftext =================================================================== --- /dev/null +++ llvm/test/tools/llvm-profdata/Inputs/mix_instr.proftext @@ -0,0 +1,23 @@ +:ir +foo +7 +4 +2 +3 +9 +4 + +goo +5 +3 +0 +0 +0 + +moo +9 +4 +3000 +1000 +2000 +500 Index: llvm/test/tools/llvm-profdata/Inputs/mix_sample.proftext =================================================================== --- /dev/null +++ llvm/test/tools/llvm-profdata/Inputs/mix_sample.proftext @@ -0,0 +1,17 @@ +foo:2000:2000 + 1: 2000 +goo:3000:1500 + 1: 1200 + 2: 800 + 3: 1000 +moo:1000:1000 + 1: 1000 +hoo:50:1 + 1: 1 + 2: 2 + 3: 3 + 4: 4 + 5: 5 + 6: 6 + 7: 7 + 8: 8 Index: llvm/test/tools/llvm-profdata/suppl-instr-with-sample.test =================================================================== --- /dev/null +++ llvm/test/tools/llvm-profdata/suppl-instr-with-sample.test @@ -0,0 +1,82 @@ +Some basic tests for supplementing instrumentation profile with sample profile. + +Test the inputs with weighted-input being 1 (default value). +RUN: llvm-profdata merge -supplement-instr-with-sample \ +RUN: -early-inline-size-threshold=0 \ +RUN: %p/Inputs/mix_instr.proftext \ +RUN: %p/Inputs/mix_sample.proftext -o %t +RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX1 + +MIX1: foo: +MIX1-NEXT: Hash: 0x0000000000000007 +MIX1-NEXT: Counters: 4 +MIX1-NEXT: Block counts: [2000, 3000, 9000, 4000] +MIX1: goo: +MIX1-NEXT: Hash: 0x0000000000000005 +MIX1-NEXT: Counters: 3 +MIX1-NEXT: Block counts: [1500, 0, 0] +MIX1: moo: +MIX1-NEXT: Hash: 0x0000000000000009 +MIX1-NEXT: Counters: 4 +MIX1-NEXT: Block counts: [3000, 1000, 2000, 500] + +Test the inputs with weighted-input other than 1. +RUN: llvm-profdata merge -supplement-instr-with-sample \ +RUN: -early-inline-size-threshold=0 \ +RUN: -weighted-input=2,%p/Inputs/mix_instr.proftext \ +RUN: -weighted-input=3,%p/Inputs/mix_sample.proftext -o %t +RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX2 + +MIX2: foo: +MIX2-NEXT: Hash: 0x0000000000000007 +MIX2-NEXT: Counters: 4 +MIX2-NEXT: Block counts: [3000, 4500, 13500, 6000] +MIX2: goo: +MIX2-NEXT: Hash: 0x0000000000000005 +MIX2-NEXT: Counters: 3 +MIX2-NEXT: Block counts: [2250, 0, 0] +MIX2: moo: +MIX2-NEXT: Hash: 0x0000000000000009 +MIX2-NEXT: Counters: 4 +MIX2-NEXT: Block counts: [3000, 1000, 2000, 500] + +Test the flag -base-scale-function. +RUN: llvm-profdata merge -supplement-instr-with-sample \ +RUN: -early-inline-size-threshold=0 \ +RUN: -base-scale-function=moo \ +RUN: %p/Inputs/mix_instr.proftext \ +RUN: %p/Inputs/mix_sample.proftext -o %t +RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX3 + +MIX3: foo: +MIX3-NEXT: Hash: 0x0000000000000007 +MIX3-NEXT: Counters: 4 +MIX3-NEXT: Block counts: [6000, 9000, 27000, 12000] +MIX3: goo: +MIX3-NEXT: Hash: 0x0000000000000005 +MIX3-NEXT: Counters: 3 +MIX3-NEXT: Block counts: [4500, 0, 0] +MIX3: moo: +MIX3-NEXT: Hash: 0x0000000000000009 +MIX3-NEXT: Counters: 4 +MIX3-NEXT: Block counts: [3000, 1000, 2000, 500] + +Test the flag -early-inline-size-threshold. +RUN: llvm-profdata merge -supplement-instr-with-sample \ +RUN: -early-inline-size-threshold=2 \ +RUN: %p/Inputs/mix_instr.proftext \ +RUN: %p/Inputs/mix_sample.proftext -o %t +RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX4 + +MIX4: foo: +MIX4-NEXT: Hash: 0x0000000000000007 +MIX4-NEXT: Counters: 4 +MIX4-NEXT: Block counts: [2, 3, 9, 4] +MIX4: goo: +MIX4-NEXT: Hash: 0x0000000000000005 +MIX4-NEXT: Counters: 3 +MIX4-NEXT: Block counts: [1500, 0, 0] +MIX4: moo: +MIX4-NEXT: Hash: 0x0000000000000009 +MIX4-NEXT: Counters: 4 +MIX4-NEXT: Block counts: [3000, 1000, 2000, 500] Index: llvm/tools/llvm-profdata/llvm-profdata.cpp =================================================================== --- llvm/tools/llvm-profdata/llvm-profdata.cpp +++ llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -385,6 +385,192 @@ writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer); } +/// Try to read \p Filename as an instr profile. +static bool tryReadInstrProf(std::string Filename, + std::unique_ptr &WC, + bool OutputSparse) { + std::mutex ErrorLock; + SmallSet WriterErrorCodes; + + // Initialize the writer contexts. + WC = std::make_unique(OutputSparse, ErrorLock, + WriterErrorCodes); + + loadInput({Filename, 1}, nullptr, WC.get()); + if (WC->Errors.size() > 0) + exitWithError(std::move(WC->Errors[0].first), Filename); + return true; +} + +/// Try to read \p Filename as a sample profile. The function will return +/// false instead of exit the program when running into an error and +/// when \p ExitIfErr is false. +static bool +tryReadSampleProf(std::string Filename, + std::unique_ptr &Reader, + bool ExitIfErr) { + LLVMContext Context; + auto ReaderOrErr = sampleprof::SampleProfileReader::create(Filename, Context); + if (std::error_code EC = ReaderOrErr.getError()) { + if (ExitIfErr) + exitWithErrorCode(EC, Filename); + return false; + } + + Reader = std::move(ReaderOrErr.get()); + if (std::error_code EC = Reader->read()) { + if (ExitIfErr) + exitWithErrorCode(EC, Filename); + return false; + } + return true; +} + +/// The profile entry for a function in instrumentation profile. +struct InstrProfileEntry { + uint64_t EntryCount; + InstrProfRecord *ProfRecord; +}; + +/// Scale up the instr profile entry \p IFE based on the entry count of +/// the function in sample profile, and \p ScaleFactor. +static void updateInstrProfileEntry(InstrProfileEntry &IFE, + uint64_t SampleEntryCount, + double ScaleFactor) { + uint64_t InstrEntryCount = IFE.EntryCount; + InstrProfRecord *ProfRecord = IFE.ProfRecord; + if (!InstrEntryCount) { + // If the function is never executed in instrumentation profile, + // adjust its entry count using sample profile and leave other + // counters as 0. + ProfRecord->Counts[0] = SampleEntryCount * ScaleFactor; + return; + } + // Scale up all the counters in the function equally. + uint64_t Numerator = (uint64_t)(SampleEntryCount * ScaleFactor); + uint64_t Denominator = InstrEntryCount; + // Don't scale down the Instr profile. + if (Numerator <= Denominator) + return; + ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) { + warn(toString(make_error(E))); + }); +} + +const uint64_t ColdPercentileIdx = 15; + +/// Adjust the instr profile in \p WC based on the sample profile in \p Reader +/// and \p ScaleFactor. The \p ScaleFactor may be updated if a non-empty +/// \p BaseScaleFunction is given. Instr profile for functions smaller than +/// \p EarlyInlineSizeThreshold won't be adjusted, that is because small +/// function mostly inlined in early inliner could show up correctly as cold +/// in instr profile but hot in flattened sample profile. +static void +adjustInstrProfiles(std::unique_ptr &WC, + std::unique_ptr &Reader, + double ScaleFactor, unsigned EarlyInlineSizeThreshold, + const std::string &BaseScaleFunction) { + uint64_t BaseInstrCounter = 0; + uint64_t BaseSampleCounter = 0; + // Function to its entry in instr profile. + StringMap InstrProfileMap; + InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs); + for (auto &PD : WC->Writer.getProfileData()) { + // Populate IPBuilder. + for (const auto &PDV : PD.getValue()) { + InstrProfRecord Record = PDV.second; + IPBuilder.addRecord(Record); + } + + // If a function has multiple entries in instr profile, skip it. + if (PD.getValue().size() != 1) + continue; + + InstrProfRecord *R = &PD.getValue().begin()->second; + InstrProfileMap[PD.getKey()] = {R->Counts[0], R}; + + // Find the entry counter of BaseScaleFunction in instr profile. + if (!BaseScaleFunction.empty() && PD.getKey() == BaseScaleFunction) + BaseInstrCounter = R->Counts[0]; + } + for (const auto &PD : Reader->getProfiles()) { + StringRef FName = PD.getKey(); + const sampleprof::FunctionSamples &FS = PD.getValue(); + + // Find the entry counter of BaseScaleFunction in sample profile. + if (!BaseScaleFunction.empty() && FName == BaseScaleFunction) + BaseSampleCounter = FS.getHeadSamples(); + } + + // If we have proper entry counters for BaseScaleFunction in instr and + // sample profile, update ScaleFactor accordingly. + if (BaseInstrCounter != 0 && BaseSampleCounter != 0) + ScaleFactor = BaseInstrCounter / (double)BaseSampleCounter; + + ProfileSummary InstrPS = *IPBuilder.getSummary(); + ProfileSummary SamplePS = Reader->getSummary(); + + // Compute cold thresholds for instr profile and sample profile. + uint64_t ColdSampleThreshold = + ProfileSummaryBuilder::getEntryForPercentile( + SamplePS.getDetailedSummary(), + ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx]) + .MinCount; + uint64_t ColdInstrThreshold = + ProfileSummaryBuilder::getEntryForPercentile( + InstrPS.getDetailedSummary(), + ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx]) + .MinCount; + + for (const auto &PD : Reader->getProfiles()) { + StringRef FName = PD.getKey(); + const sampleprof::FunctionSamples &FS = PD.getValue(); + auto It = InstrProfileMap.find(FName); + // If an entry hot/warm in sample profile and cold in instr profile + // is found, scale up the cold instr profile entry. + if (FS.getHeadSamples() > ColdSampleThreshold && + It != InstrProfileMap.end() && + It->second.EntryCount <= ColdInstrThreshold && + FS.getBodySamples().size() >= EarlyInlineSizeThreshold) { + updateInstrProfileEntry(It->second, FS.getHeadSamples(), ScaleFactor); + } + } +} + +/// The main function to supplement instr profile with sample profile. +static void supplementInstrProfile(const WeightedFileVector &Inputs, + StringRef OutputFilename, + ProfileFormat OutputFormat, + bool OutputSparse, + unsigned EarlyInlineSizeThreshold, + const std::string &BaseScaleFunction) { + if (OutputFilename.compare("-") == 0) + exitWithError("Cannot write indexed profdata format to stdout."); + if (Inputs.size() != 2) + exitWithError("Expect two inputs when merging profiles in mixed mode."); + + std::unique_ptr Reader; + std::unique_ptr WC; + // Make sure Inputs[i] is sample profile and Inputs[i - 1] is + // instrumentation profile. + int i = 1; + if (!tryReadSampleProf(Inputs[i].Filename, Reader, false)) + i = 0; + + tryReadSampleProf(Inputs[i].Filename, Reader, true); + tryReadInstrProf(Inputs[1 - i].Filename, WC, OutputSparse); + + if (!BaseScaleFunction.empty() && + (Inputs[i].Weight != 1 || Inputs[1 - i].Weight != 1)) + exitWithError("Don't use -base-scale-function and weighted inputs " + "together. "); + + adjustInstrProfiles(WC, Reader, + Inputs[i].Weight / (double)Inputs[1 - i].Weight, + EarlyInlineSizeThreshold, BaseScaleFunction); + writeInstrProfile(OutputFilename, OutputFormat, WC->Writer); +} + /// Make a copy of the given function samples with all symbol names remapped /// by the provided symbol remapper. static sampleprof::FunctionSamples @@ -679,6 +865,20 @@ cl::opt GenPartialProfile( "gen-partial-profile", cl::init(false), cl::Hidden, cl::desc("Generate a partial profile (only meaningful for -extbinary)")); + cl::opt SupplInstrWithSample( + "supplement-instr-with-sample", cl::init(false), cl::Hidden, + cl::desc("Supplement an instrumentation profile with sample profile, and " + "output in instrumentation format (only works with -instr)")); + cl::opt BaseScaleFunction( + "base-scale-function", cl::init(""), cl::Hidden, + cl::desc("When supplementing an instrumentation profile with sample " + "profile, use entry count of the given function to compute " + "the ScaleFactor. ")); + cl::opt EarlyInlineSizeThreshold( + "early-inline-size-threshold", cl::init(10), cl::Hidden, + cl::desc("If a function is smaller than the threshold, assume it can " + "be inlined by PGO early inliner and don't scale it up using " + "sample profile. ")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -707,6 +907,17 @@ if (!RemappingFile.empty()) Remapper = SymbolRemapper::create(RemappingFile); + if (SupplInstrWithSample) { + if (ProfileKind != instr) + exitWithError( + "-supplement-instr-with-sample can only work with -instr. "); + + supplementInstrProfile(WeightedInputs, OutputFilename, OutputFormat, + OutputSparse, EarlyInlineSizeThreshold, + BaseScaleFunction); + return 0; + } + if (ProfileKind == instr) mergeInstrProfile(WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, OutputSparse, NumThreads, FailureMode);