diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst --- a/llvm/docs/CommandGuide/llvm-profdata.rst +++ b/llvm/docs/CommandGuide/llvm-profdata.rst @@ -161,6 +161,30 @@ coverage for the optimized target. This option can only be used with sample-based profile in extbinary format. +.. option:: -supplement-instr-with-sample=path_to_sample_profile + + Supplement an instrumentation profile with sample profile. The sample profile + is the input of the flag. Output will be in instrumentation format (only works + with -instr). + +.. option:: -zero-counter-threshold=threshold_float_number + + For the function which is cold in instr profile but hot in sample profile, if + the ratio of the number of zero counters divided by the the total number of + counters is above the threshold, the profile of the function will be regarded + as being harmful for performance and will be dropped. + +.. option:: -instr-prof-cold-threshold=threshold_int_number + + User specified cold threshold for instr profile which will override the cold + threshold got from profile summary. + +.. option:: -suppl-min-size-threshold=threshold_int_number + + If the size of a function is smaller than the threshold, assume it can be + inlined by PGO early inliner and it will not be adjusted based on sample + profile. + EXAMPLES ^^^^^^^^ Basic Usage diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -678,8 +678,8 @@ /// Optionally scale merged counts by \p Weight. void merge(InstrProfValueSiteRecord &Input, uint64_t Weight, function_ref Warn); - /// Scale up value profile data counts. - void scale(uint64_t Weight, function_ref Warn); + /// Scale up value profile data counts by N (Numerator) / D (Denominator). + void scale(uint64_t N, uint64_t D, function_ref Warn); /// Compute the overlap b/w this record and Input record. void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind, @@ -753,8 +753,8 @@ function_ref Warn); /// Scale up profile counts (including value profile data) by - /// \p Weight. - void scale(uint64_t Weight, function_ref Warn); + /// a factor of (N / D). + void scale(uint64_t N, uint64_t D, function_ref Warn); /// Sort value profile data (per site) by count. void sortValueData() { @@ -839,8 +839,8 @@ uint64_t Weight, function_ref Warn); - // Scale up value profile data count. - void scaleValueProfData(uint32_t ValueKind, uint64_t Weight, + // Scale up value profile data count by N (Numerator) / D (Denominator). + void scaleValueProfData(uint32_t ValueKind, uint64_t N, uint64_t D, function_ref Warn); }; diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -48,6 +48,8 @@ InstrProfWriter(bool Sparse = false, bool InstrEntryBBEnabled = false); ~InstrProfWriter(); + StringMap &getProfileData() { return FunctionData; } + /// Add function counts for the given function. If there are already counts /// for this function and the hash and number of counts match, each counter is /// summed. Optionally scale counts by \p Weight. diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -625,11 +625,11 @@ } } -void InstrProfValueSiteRecord::scale(uint64_t Weight, +void InstrProfValueSiteRecord::scale(uint64_t N, uint64_t D, function_ref Warn) { for (auto I = ValueData.begin(), IE = ValueData.end(); I != IE; ++I) { bool Overflowed; - I->Count = SaturatingMultiply(I->Count, Weight, &Overflowed); + I->Count = SaturatingMultiply(I->Count, N, &Overflowed) / D; if (Overflowed) Warn(instrprof_error::counter_overflow); } @@ -678,22 +678,23 @@ } void InstrProfRecord::scaleValueProfData( - uint32_t ValueKind, uint64_t Weight, + uint32_t ValueKind, uint64_t N, uint64_t D, function_ref Warn) { for (auto &R : getValueSitesForKind(ValueKind)) - R.scale(Weight, Warn); + R.scale(N, D, Warn); } -void InstrProfRecord::scale(uint64_t Weight, +void InstrProfRecord::scale(uint64_t N, uint64_t D, function_ref Warn) { + assert(D != 0 && "D cannot be 0"); for (auto &Count : this->Counts) { bool Overflowed; - Count = SaturatingMultiply(Count, Weight, &Overflowed); + Count = SaturatingMultiply(Count, N, &Overflowed) / D; if (Overflowed) Warn(instrprof_error::counter_overflow); } for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) - scaleValueProfData(Kind, Weight, Warn); + scaleValueProfData(Kind, N, D, Warn); } // Map indirect call target name hash to name string. diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -241,7 +241,7 @@ // We've never seen a function with this name and hash, add it. Dest = std::move(I); if (Weight > 1) - Dest.scale(Weight, MapWarn); + Dest.scale(Weight, 1, MapWarn); } else { // We're updating a function we've seen before. Dest.merge(I, Weight, MapWarn); diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp --- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp +++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp @@ -119,13 +119,22 @@ } void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) { - addCount(Count); NumFunctions++; + + // Skip invalid count. + if (Count == (uint64_t)-1) + return; + + addCount(Count); if (Count > MaxFunctionCount) MaxFunctionCount = Count; } void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) { + // Skip invalid count. + if (Count == (uint64_t)-1) + return; + addCount(Count); if (Count > MaxInternalBlockCount) MaxInternalBlockCount = Count; diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1020,7 +1020,8 @@ FreqAttr(FFA_Normal), IsCS(IsCS) {} // Read counts for the instrumented BB from profile. - bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros); + bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, + bool &AllMinusOnes); // Populate the counts for all BBs. void populateCounters(); @@ -1203,7 +1204,8 @@ // Read the profile from ProfileFileName and assign the value to the // instrumented BB and the edges. This function also updates ProgramMaxCount. // Return true if the profile are successfully read, and false on errors. -bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) { +bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, + bool &AllMinusOnes) { auto &Ctx = M->getContext(); Expected Result = PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); @@ -1246,10 +1248,13 @@ IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); + AllMinusOnes = (CountFromProfile.size() > 0); uint64_t ValueSum = 0; for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); ValueSum += CountFromProfile[I]; + if (CountFromProfile[I] != (uint64_t)-1) + AllMinusOnes = false; } AllZeros = (ValueSum == 0); @@ -1657,8 +1662,13 @@ SplitIndirectBrCriticalEdges(F, BPI, BFI); PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, InstrumentFuncEntry); + // When AllMinusOnes is true, it means the profile for the function + // is unrepresentative and this function is actually hot. Set the + // entry count of the function to be multiple times of hot threshold + // and drop all its internal counters. + bool AllMinusOnes = false; bool AllZeros = false; - if (!Func.readCounters(PGOReader.get(), AllZeros)) + if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes)) continue; if (AllZeros) { F.setEntryCount(ProfileCount(0, Function::PCT_Real)); @@ -1666,6 +1676,15 @@ ColdFunctions.push_back(&F); continue; } + const unsigned MultiplyFactor = 3; + if (AllMinusOnes) { + uint64_t HotThreshold = PSI->getHotCountThreshold(); + if (HotThreshold) + F.setEntryCount( + ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real)); + HotFunctions.push_back(&F); + continue; + } Func.populateCounters(); Func.setBranchWeights(); Func.annotateValueSites(); diff --git a/llvm/test/Transforms/PGOProfile/Inputs/sample-profile.proftext b/llvm/test/Transforms/PGOProfile/Inputs/sample-profile.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/Inputs/sample-profile.proftext @@ -0,0 +1,12 @@ +test_simple_for:4000:4000 + 1: 1000 + 2: 1000 + 3: 1000 + 4: 1000 + +moo:10:10 + 1: 2 + 2: 2 + 3: 2 + 4: 2 + 5: 2 diff --git a/llvm/test/Transforms/PGOProfile/Inputs/suppl-profile.proftext b/llvm/test/Transforms/PGOProfile/Inputs/suppl-profile.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/Inputs/suppl-profile.proftext @@ -0,0 +1,15 @@ +# :ir is the flag to indicate this is IR level profile. +:ir +test_simple_for +34137660316 +2 +0 +0 + +foo +2582734 +4 +1000 +270 +180 +760 diff --git a/llvm/test/Transforms/PGOProfile/suppl-profile.ll b/llvm/test/Transforms/PGOProfile/suppl-profile.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/suppl-profile.ll @@ -0,0 +1,37 @@ +; Supplement instr profile suppl-profile.proftext with sample profile +; sample-profile.proftext. +; RUN: llvm-profdata merge -instr -suppl-min-size-threshold=0 \ +; RUN: -supplement-instr-with-sample=%p/Inputs/sample-profile.proftext \ +; RUN: %S/Inputs/suppl-profile.proftext -o %t.profdata +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Check test_simple_for has a non-zero entry count and doesn't have any other +; prof metadata. +; CHECK: @test_simple_for(i32 %n) {{.*}} !prof ![[ENTRY_COUNT:[0-9]+]] +; CHECK-NOT: !prof ! +; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 540} +define i32 @test_simple_for(i32 %n) { +entry: + br label %for.cond + +for.cond: + %i = phi i32 [ 0, %entry ], [ %inc1, %for.inc ] + %sum = phi i32 [ 1, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i, %n + br i1 %cmp, label %for.body, label %for.end + +for.body: + %inc = add nsw i32 %sum, 1 + br label %for.inc + +for.inc: + %inc1 = add nsw i32 %i, 1 + br label %for.cond + +for.end: + ret i32 %sum +} diff --git a/llvm/test/tools/llvm-profdata/Inputs/mix_instr.proftext b/llvm/test/tools/llvm-profdata/Inputs/mix_instr.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/mix_instr.proftext @@ -0,0 +1,25 @@ +:ir +foo +7 +5 +12 +13 +0 +0 +0 + +goo +5 +3 +0 +0 +0 + +moo +9 +4 +3000 +1000 +2000 +500 + diff --git a/llvm/test/tools/llvm-profdata/Inputs/mix_sample.proftext b/llvm/test/tools/llvm-profdata/Inputs/mix_sample.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/mix_sample.proftext @@ -0,0 +1,17 @@ +foo:2000:2000 + 1: 2000 +goo:3000:1500 + 1: 1200 + 2: 800 + 3: 1000 +moo:1000:1000 + 1: 1000 +hoo:50:1 + 1: 1 + 2: 2 + 3: 3 + 4: 4 + 5: 5 + 6: 6 + 7: 7 + 8: 8 diff --git a/llvm/test/tools/llvm-profdata/overflow-instr.test b/llvm/test/tools/llvm-profdata/overflow-instr.test --- a/llvm/test/tools/llvm-profdata/overflow-instr.test +++ b/llvm/test/tools/llvm-profdata/overflow-instr.test @@ -2,16 +2,14 @@ 1- Merge profile having maximum counts with itself and verify overflow detected and saturation occurred RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext %p/Inputs/overflow-instr.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_OVERFLOW -RUN: llvm-profdata show -instr %t.out | FileCheck %s --check-prefix=SHOW_OVERFLOW +RUN: llvm-profdata show -instr -all-functions -counts %t.out | FileCheck %s --check-prefix=SHOW_OVERFLOW MERGE_OVERFLOW: {{.*}}: overflow: Counter overflow -SHOW_OVERFLOW: Total functions: 1 -SHOW_OVERFLOW-NEXT: Maximum function count: 18446744073709551615 -SHOW_OVERFLOW-NEXT: Maximum internal block count: 18446744073709551615 +SHOW_OVERFLOW: Function count: 18446744073709551615 +SHOW_OVERFLOW-NEXT: Block counts: [18446744073709551615, 18446744073709551615] 2- Merge profile having maximum counts by itself and verify no overflow RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_NO_OVERFLOW -allow-empty -RUN: llvm-profdata show -instr %t.out | FileCheck %s --check-prefix=SHOW_NO_OVERFLOW +RUN: llvm-profdata show -instr -all-functions -counts %t.out | FileCheck %s --check-prefix=SHOW_NO_OVERFLOW MERGE_NO_OVERFLOW-NOT: {{.*}}: overflow: Counter overflow -SHOW_NO_OVERFLOW: Total functions: 1 -SHOW_NO_OVERFLOW-NEXT: Maximum function count: 18446744073709551615 -SHOW_NO_OVERFLOW-NEXT: Maximum internal block count: 18446744073709551615 +SHOW_NO_OVERFLOW: Function count: 18446744073709551615 +SHOW_NO_OVERFLOW-NEXT: Block counts: [9223372036854775808, 18446744073709551615] diff --git a/llvm/test/tools/llvm-profdata/suppl-instr-with-sample.test b/llvm/test/tools/llvm-profdata/suppl-instr-with-sample.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/suppl-instr-with-sample.test @@ -0,0 +1,102 @@ +Some basic tests for supplementing instrumentation profile with sample profile. + +Test all of goo's counters will be set to -1. +RUN: llvm-profdata merge \ +RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \ +RUN: -suppl-min-size-threshold=0 %p/Inputs/mix_instr.proftext -o %t +RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX1 + +MIX1: foo: +MIX1-NEXT: Hash: 0x0000000000000007 +MIX1-NEXT: Counters: 5 +MIX1-NEXT: Block counts: [12, 13, 0, 0, 0] +MIX1: goo: +MIX1-NEXT: Hash: 0x0000000000000005 +MIX1-NEXT: Counters: 3 +MIX1-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615] +MIX1: moo: +MIX1-NEXT: Hash: 0x0000000000000009 +MIX1-NEXT: Counters: 4 +MIX1-NEXT: Block counts: [3000, 1000, 2000, 500] + +Test when the zero counter ratio of foo is higher than zero-counter-threshold. +RUN: llvm-profdata merge \ +RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \ +RUN: -suppl-min-size-threshold=0 -zero-counter-threshold=0.5 \ +RUN: -instr-prof-cold-threshold=30 %p/Inputs/mix_instr.proftext -o %t +RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX2 + +MIX2: foo: +MIX2-NEXT: Hash: 0x0000000000000007 +MIX2-NEXT: Counters: 5 +MIX2-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615, 18446744073709551615, 18446744073709551615] +MIX2: goo: +MIX2-NEXT: Hash: 0x0000000000000005 +MIX2-NEXT: Counters: 3 +MIX2-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615] +MIX2: moo: +MIX2-NEXT: Hash: 0x0000000000000009 +MIX2-NEXT: Counters: 4 +MIX2-NEXT: Block counts: [3000, 1000, 2000, 500] + +Test when the zero counter ratio of foo is lower than zero-counter-threshold. +RUN: llvm-profdata merge \ +RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \ +RUN: -suppl-min-size-threshold=0 -zero-counter-threshold=0.7 \ +RUN: -instr-prof-cold-threshold=30 %p/Inputs/mix_instr.proftext -o %t +RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX3 + +MIX3: foo: +MIX3-NEXT: Hash: 0x0000000000000007 +MIX3-NEXT: Counters: 5 +MIX3-NEXT: Block counts: [1384, 1500, 0, 0, 0] +MIX3: goo: +MIX3-NEXT: Hash: 0x0000000000000005 +MIX3-NEXT: Counters: 3 +MIX3-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615] +MIX3: moo: +MIX3-NEXT: Hash: 0x0000000000000009 +MIX3-NEXT: Counters: 4 +MIX3-NEXT: Block counts: [3000, 1000, 2000, 500] + +Test foo's profile won't be adjusted because its size is smaller +than suppl-min-size-threshold. +RUN: llvm-profdata merge \ +RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \ +RUN: -suppl-min-size-threshold=2 -zero-counter-threshold=0.7 \ +RUN: -instr-prof-cold-threshold=30 %p/Inputs/mix_instr.proftext -o %t +RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=MIX4 + +MIX4: foo: +MIX4-NEXT: Hash: 0x0000000000000007 +MIX4-NEXT: Counters: 5 +MIX4-NEXT: Block counts: [12, 13, 0, 0, 0] +MIX4: goo: +MIX4-NEXT: Hash: 0x0000000000000005 +MIX4-NEXT: Counters: 3 +MIX4-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615] +MIX4: moo: +MIX4-NEXT: Hash: 0x0000000000000009 +MIX4-NEXT: Counters: 4 +MIX4-NEXT: Block counts: [3000, 1000, 2000, 500] + +Test profile summary won't be affected by -1 counter. +RUN: llvm-profdata merge \ +RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \ +RUN: -suppl-min-size-threshold=0 %p/Inputs/mix_instr.proftext -o %t +RUN: llvm-profdata show %t -detailed-summary | FileCheck %s --check-prefix=MIX5 + +MIX5: Instrumentation level: IR +MIX5-NEXT: Total functions: 3 +MIX5-NEXT: Maximum function count: 3000 +MIX5-NEXT: Maximum internal block count: 2000 +MIX5-NEXT: Total number of blocks: 9 +MIX5-NEXT: Total count: 6525 +MIX5-NEXT: Detailed summary: +MIX5-NEXT: 3 blocks with count >= 1000 account for 80 percentage of the total counts. +MIX5-NEXT: 3 blocks with count >= 1000 account for 90 percentage of the total counts. +MIX5-NEXT: 4 blocks with count >= 500 account for 95 percentage of the total counts. +MIX5-NEXT: 4 blocks with count >= 500 account for 99 percentage of the total counts. +MIX5-NEXT: 6 blocks with count >= 12 account for 99.9 percentage of the total counts. +MIX5-NEXT: 6 blocks with count >= 12 account for 99.99 percentage of the total counts. +MIX5-NEXT: 6 blocks with count >= 12 account for 99.999 percentage of the total counts. diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -386,6 +386,172 @@ writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer); } +/// The profile entry for a function in instrumentation profile. +struct InstrProfileEntry { + uint64_t MaxCount = 0; + float ZeroCounterRatio = 0.0; + InstrProfRecord *ProfRecord; + InstrProfileEntry(InstrProfRecord *Record); + InstrProfileEntry() = default; +}; + +InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) { + ProfRecord = Record; + uint64_t CntNum = Record->Counts.size(); + uint64_t ZeroCntNum = 0; + for (size_t I = 0; I < CntNum; ++I) { + MaxCount = std::max(MaxCount, Record->Counts[I]); + ZeroCntNum += !Record->Counts[I]; + } + ZeroCounterRatio = (float)ZeroCntNum / CntNum; +} + +/// Either set all the counters in the instr profile entry \p IFE to -1 +/// in order to drop the profile or scale up the counters in \p IFP to +/// be above hot threshold. We use the ratio of zero counters in the +/// profile of a function to decide the profile is helpful or harmful +/// for performance, and to choose whether to scale up or drop it. +static void updateInstrProfileEntry(InstrProfileEntry &IFE, + uint64_t HotInstrThreshold, + float ZeroCounterThreshold) { + InstrProfRecord *ProfRecord = IFE.ProfRecord; + if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) { + // If all or most of the counters of the function are zero, the + // profile is unaccountable and shuld be dropped. Reset all the + // counters to be -1 and PGO profile-use will drop the profile. + // All counters being -1 also implies that the function is hot so + // PGO profile-use will also set the entry count metadata to be + // above hot threshold. + for (size_t I = 0; I < ProfRecord->Counts.size(); ++I) + ProfRecord->Counts[I] = -1; + return; + } + + // Scale up the MaxCount to be multiple times above hot threshold. + const unsigned MultiplyFactor = 3; + uint64_t Numerator = HotInstrThreshold * MultiplyFactor; + uint64_t Denominator = IFE.MaxCount; + ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) { + warn(toString(make_error(E))); + }); +} + +const uint64_t ColdPercentileIdx = 15; +const uint64_t HotPercentileIdx = 11; + +/// Adjust the instr profile in \p WC based on the sample profile in +/// \p Reader. +static void +adjustInstrProfile(std::unique_ptr &WC, + std::unique_ptr &Reader, + unsigned SupplMinSizeThreshold, float ZeroCounterThreshold, + unsigned InstrProfColdThreshold) { + // Function to its entry in instr profile. + StringMap InstrProfileMap; + InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs); + for (auto &PD : WC->Writer.getProfileData()) { + // Populate IPBuilder. + for (const auto &PDV : PD.getValue()) { + InstrProfRecord Record = PDV.second; + IPBuilder.addRecord(Record); + } + + // If a function has multiple entries in instr profile, skip it. + if (PD.getValue().size() != 1) + continue; + + // Initialize InstrProfileMap. + InstrProfRecord *R = &PD.getValue().begin()->second; + InstrProfileMap[PD.getKey()] = InstrProfileEntry(R); + } + + ProfileSummary InstrPS = *IPBuilder.getSummary(); + ProfileSummary SamplePS = Reader->getSummary(); + + // Compute cold thresholds for instr profile and sample profile. + uint64_t ColdSampleThreshold = + ProfileSummaryBuilder::getEntryForPercentile( + SamplePS.getDetailedSummary(), + ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx]) + .MinCount; + uint64_t HotInstrThreshold = + ProfileSummaryBuilder::getEntryForPercentile( + InstrPS.getDetailedSummary(), + ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx]) + .MinCount; + uint64_t ColdInstrThreshold = + InstrProfColdThreshold + ? InstrProfColdThreshold + : ProfileSummaryBuilder::getEntryForPercentile( + InstrPS.getDetailedSummary(), + ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx]) + .MinCount; + + // Find hot/warm functions in sample profile which is cold in instr profile + // and adjust the profiles of those functions in the instr profile. + for (const auto &PD : Reader->getProfiles()) { + StringRef FName = PD.getKey(); + const sampleprof::FunctionSamples &FS = PD.getValue(); + auto It = InstrProfileMap.find(FName); + if (FS.getHeadSamples() > ColdSampleThreshold && + It != InstrProfileMap.end() && + It->second.MaxCount <= ColdInstrThreshold && + FS.getBodySamples().size() >= SupplMinSizeThreshold) { + updateInstrProfileEntry(It->second, HotInstrThreshold, + ZeroCounterThreshold); + } + } +} + +/// The main function to supplement instr profile with sample profile. +/// \Inputs contains the instr profile. \p SampleFilename specifies the +/// sample profile. \p OutputFilename specifies the output profile name. +/// \p OutputFormat specifies the output profile format. \p OutputSparse +/// specifies whether to generate sparse profile. \p SupplMinSizeThreshold +/// specifies the minimal size for the functions whose profile will be +/// adjusted. \p ZeroCounterThreshold is the threshold to check whether +/// a function contains too many zero counters and whether its profile +/// should be dropped. \p InstrProfColdThreshold is the user specified +/// cold threshold which will override the cold threshold got from the +/// instr profile summary. +static void supplementInstrProfile( + const WeightedFileVector &Inputs, StringRef SampleFilename, + StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse, + unsigned SupplMinSizeThreshold, float ZeroCounterThreshold, + unsigned InstrProfColdThreshold) { + if (OutputFilename.compare("-") == 0) + exitWithError("Cannot write indexed profdata format to stdout."); + if (Inputs.size() != 1) + exitWithError("Expect one input to be an instr profile."); + if (Inputs[0].Weight != 1) + exitWithError("Expect instr profile doesn't have weight."); + + StringRef InstrFilename = Inputs[0].Filename; + + // Read sample profile. + LLVMContext Context; + auto ReaderOrErr = + sampleprof::SampleProfileReader::create(SampleFilename.str(), Context); + if (std::error_code EC = ReaderOrErr.getError()) + exitWithErrorCode(EC, SampleFilename); + auto Reader = std::move(ReaderOrErr.get()); + if (std::error_code EC = Reader->read()) + exitWithErrorCode(EC, SampleFilename); + + // Read instr profile. + std::mutex ErrorLock; + SmallSet WriterErrorCodes; + auto WC = std::make_unique(OutputSparse, ErrorLock, + WriterErrorCodes); + loadInput(Inputs[0], nullptr, WC.get()); + if (WC->Errors.size() > 0) + exitWithError(std::move(WC->Errors[0].first), InstrFilename); + + adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold, + InstrProfColdThreshold); + writeInstrProfile(OutputFilename, OutputFormat, WC->Writer); +} + /// Make a copy of the given function samples with all symbol names remapped /// by the provided symbol remapper. static sampleprof::FunctionSamples @@ -680,6 +846,28 @@ cl::opt GenPartialProfile( "gen-partial-profile", cl::init(false), cl::Hidden, cl::desc("Generate a partial profile (only meaningful for -extbinary)")); + cl::opt SupplInstrWithSample( + "supplement-instr-with-sample", cl::init(""), cl::Hidden, + cl::desc("Supplement an instr profile with sample profile, to correct " + "the profile unrepresentativeness issue. The sample " + "profile is the input of the flag. Output will be in instr " + "format (The flag only works with -instr)")); + cl::opt ZeroCounterThreshold( + "zero-counter-threshold", cl::init(0.7), cl::Hidden, + cl::desc("For the function which is cold in instr profile but hot in " + "sample profile, if the ratio of the number of zero counters " + "divided by the the total number of counters is above the " + "threshold, the profile of the function will be regarded as " + "being harmful for performance and will be dropped. ")); + cl::opt SupplMinSizeThreshold( + "suppl-min-size-threshold", cl::init(10), cl::Hidden, + cl::desc("If the size of a function is smaller than the threshold, " + "assume it can be inlined by PGO early inliner and it won't " + "be adjusted based on sample profile. ")); + cl::opt InstrProfColdThreshold( + "instr-prof-cold-threshold", cl::init(0), cl::Hidden, + cl::desc("User specified cold threshold for instr profile which will " + "override the cold threshold got from profile summary. ")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); @@ -708,6 +896,17 @@ if (!RemappingFile.empty()) Remapper = SymbolRemapper::create(RemappingFile); + if (!SupplInstrWithSample.empty()) { + if (ProfileKind != instr) + exitWithError( + "-supplement-instr-with-sample can only work with -instr. "); + + supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputFilename, + OutputFormat, OutputSparse, SupplMinSizeThreshold, + ZeroCounterThreshold, InstrProfColdThreshold); + return 0; + } + if (ProfileKind == instr) mergeInstrProfile(WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, OutputSparse, NumThreads, FailureMode); @@ -904,6 +1103,8 @@ uint64_t FuncMax = 0; uint64_t FuncSum = 0; for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) { + if (Func.Counts[I] == (uint64_t)-1) + continue; FuncMax = std::max(FuncMax, Func.Counts[I]); FuncSum += Func.Counts[I]; }