diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -60,6 +60,11 @@ #include "llvm/ProfileData/InstrProfData.inc" }; +/// Return the max count value. We reserver a few large values for special use. +inline uint64_t getInstrMaxCountValue() { + return std::numeric_limits::max() - 2; +} + /// Return the name of the profile section corresponding to \p IPSK. /// /// The name of the section depends on the object format type \p OF. If @@ -819,6 +824,30 @@ OverlapStats &Overlap, OverlapStats &FuncLevelOverlap); + enum CountPseudoKind { + NotPseudo = 0, + PseudoHot, + PseudoWarm, + }; + enum PseudoCountVal { + HotFunctionVal = -1, + WarmFunctionVal = -2, + }; + CountPseudoKind getCountPseudoKind() const { + uint64_t FirstCount = Counts[0]; + if (FirstCount == (uint64_t)HotFunctionVal) + return PseudoHot; + if (FirstCount == (uint64_t)WarmFunctionVal) + return PseudoWarm; + return NotPseudo; + } + void setPseudoCount(CountPseudoKind Kind) { + if (Kind == PseudoHot) + Counts[0] = (uint64_t)HotFunctionVal; + else if (Kind == PseudoWarm) + Counts[0] = (uint64_t)WarmFunctionVal; + } + private: struct ValueProfData { std::vector IndirectCallSites; diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -714,10 +714,33 @@ return; } + // Special handling of the first count as the PseudoCount. + CountPseudoKind OtherKind = Other.getCountPseudoKind(); + CountPseudoKind ThisKind = getCountPseudoKind(); + if (OtherKind != NotPseudo || ThisKind != NotPseudo) { + // We don't allow the merge of a profile with pseudo counts and + // a normal profile (i.e. without pesudo counts). + // Profile supplimenation should be done after the profile merge. + if (OtherKind == NotPseudo || ThisKind == NotPseudo) { + Warn(instrprof_error::count_mismatch); + return; + } + if (OtherKind == PseudoHot || ThisKind == PseudoHot) + setPseudoCount(PseudoHot); + else + setPseudoCount(PseudoWarm); + return; + } + for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) { bool Overflowed; - Counts[I] = + uint64_t Value = SaturatingMultiplyAdd(Other.Counts[I], Weight, Counts[I], &Overflowed); + if (Value > getInstrMaxCountValue()) { + Value = getInstrMaxCountValue(); + Overflowed = true; + } + Counts[I] = Value; if (Overflowed) Warn(instrprof_error::counter_overflow); } @@ -739,6 +762,10 @@ for (auto &Count : this->Counts) { bool Overflowed; Count = SaturatingMultiply(Count, N, &Overflowed) / D; + if (Count > getInstrMaxCountValue()) { + Count = getInstrMaxCountValue(); + Overflowed = true; + } if (Overflowed) Warn(instrprof_error::counter_overflow); } diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp --- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp +++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp @@ -93,6 +93,10 @@ // instrumentation profiles. // Eventually MaxFunctionCount will become obsolete and this can be // removed. + + if (R.getCountPseudoKind() != InstrProfRecord::NotPseudo) + return; + addEntryCount(R.Counts[0]); for (size_t I = 1, E = R.Counts.size(); I < E; ++I) addInternalCount(R.Counts[I]); @@ -220,22 +224,17 @@ } void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) { + assert(Count <= getInstrMaxCountValue() && + "Count value should be less than the max count value."); NumFunctions++; - - // Skip invalid count. - if (Count == (uint64_t)-1) - return; - addCount(Count); if (Count > MaxFunctionCount) MaxFunctionCount = Count; } void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) { - // Skip invalid count. - if (Count == (uint64_t)-1) - return; - + assert(Count <= getInstrMaxCountValue() && + "Count value should be less than the max count value."); addCount(Count); if (Count > MaxInternalBlockCount) MaxInternalBlockCount = Count; diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1019,7 +1019,7 @@ // Read counts for the instrumented BB from profile. bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, - bool &AllMinusOnes); + InstrProfRecord::CountPseudoKind &PseudoKind); // Populate the counts for all BBs. void populateCounters(); @@ -1225,7 +1225,7 @@ // instrumented BB and the edges. This function also updates ProgramMaxCount. // Return true if the profile are successfully read, and false on errors. bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, - bool &AllMinusOnes) { + InstrProfRecord::CountPseudoKind &PseudoKind) { auto &Ctx = M->getContext(); uint64_t MismatchedFuncSum = 0; Expected Result = PGOReader->getInstrProfRecord( @@ -1270,17 +1270,19 @@ return false; } ProfileRecord = std::move(Result.get()); + PseudoKind = ProfileRecord.getCountPseudoKind(); + if (PseudoKind != InstrProfRecord::NotPseudo) { + return true; + } std::vector &CountFromProfile = ProfileRecord.Counts; IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); - AllMinusOnes = (CountFromProfile.size() > 0); + uint64_t ValueSum = 0; for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); ValueSum += CountFromProfile[I]; - if (CountFromProfile[I] != (uint64_t)-1) - AllMinusOnes = false; } AllZeros = (ValueSum == 0); @@ -1818,13 +1820,13 @@ SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, InstrumentFuncEntry); - // When AllMinusOnes is true, it means the profile for the function - // is unrepresentative and this function is actually hot. Set the - // entry count of the function to be multiple times of hot threshold - // and drop all its internal counters. - bool AllMinusOnes = false; + // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo, + // it means the profile for the function is unrepresentative and this + // function is actually hot / warm. We will reset the function hot / cold + // attribute and drop all the profile counters. + InstrProfRecord::CountPseudoKind PseudoKind = InstrProfRecord::NotPseudo; bool AllZeros = false; - if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes)) + if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind)) continue; if (AllZeros) { F.setEntryCount(ProfileCount(0, Function::PCT_Real)); @@ -1832,13 +1834,13 @@ ColdFunctions.push_back(&F); continue; } - const unsigned MultiplyFactor = 3; - if (AllMinusOnes) { - uint64_t HotThreshold = PSI->getHotCountThreshold(); - if (HotThreshold) - F.setEntryCount( - ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real)); - HotFunctions.push_back(&F); + if (PseudoKind != InstrProfRecord::NotPseudo) { + // Clear function attribute cold. + if (F.hasFnAttribute(Attribute::Cold)) + F.removeFnAttr(Attribute::Cold); + // Set function attribute as hot. + if (PseudoKind == InstrProfRecord::PseudoHot) + F.addFnAttr(Attribute::Hot); continue; } Func.populateCounters(); diff --git a/llvm/test/Transforms/PGOProfile/Inputs/sample-profile.proftext b/llvm/test/Transforms/PGOProfile/Inputs/sample-profile-hot.proftext rename from llvm/test/Transforms/PGOProfile/Inputs/sample-profile.proftext rename to llvm/test/Transforms/PGOProfile/Inputs/sample-profile-hot.proftext diff --git a/llvm/test/Transforms/PGOProfile/Inputs/sample-profile-warm.proftext b/llvm/test/Transforms/PGOProfile/Inputs/sample-profile-warm.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/Inputs/sample-profile-warm.proftext @@ -0,0 +1,13 @@ +test_simple_for:40:40 + 1: 10 + 2: 10 + 3: 10 + 4: 10 + +moo:1000:1000 + 1: 2000 + 2: 2000 + 3: 2000 + 4: 2000 + 5: 2000 + diff --git a/llvm/test/Transforms/PGOProfile/Inputs/suppl-profile.proftext b/llvm/test/Transforms/PGOProfile/Inputs/suppl-profile.proftext --- a/llvm/test/Transforms/PGOProfile/Inputs/suppl-profile.proftext +++ b/llvm/test/Transforms/PGOProfile/Inputs/suppl-profile.proftext @@ -13,3 +13,12 @@ 270 180 760 + +boo +2582734 +4 +100 +27 +10 +70 + diff --git a/llvm/test/Transforms/PGOProfile/suppl-profile.ll b/llvm/test/Transforms/PGOProfile/suppl-profile.ll --- a/llvm/test/Transforms/PGOProfile/suppl-profile.ll +++ b/llvm/test/Transforms/PGOProfile/suppl-profile.ll @@ -1,19 +1,30 @@ ; Supplement instr profile suppl-profile.proftext with sample profile ; sample-profile.proftext. +; For hot functions: ; RUN: llvm-profdata merge -instr -suppl-min-size-threshold=0 \ -; RUN: -supplement-instr-with-sample=%p/Inputs/sample-profile.proftext \ +; RUN: -supplement-instr-with-sample=%p/Inputs/sample-profile-hot.proftext \ ; RUN: %S/Inputs/suppl-profile.proftext -o %t.profdata -; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=HOT +; For warm functions: +; RUN: llvm-profdata merge -instr -suppl-min-size-threshold=0 \ +; RUN: -supplement-instr-with-sample=%p/Inputs/sample-profile-warm.proftext \ +; RUN: %S/Inputs/suppl-profile.proftext -o %t1.profdata +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t1.profdata -S | FileCheck %s --check-prefix=WARM target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; Check test_simple_for has a non-zero entry count and doesn't have any other -; prof metadata. -; CHECK: @test_simple_for(i32 %n) {{.*}} !prof ![[ENTRY_COUNT:[0-9]+]] -; CHECK-NOT: !prof ! -; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 540} -define i32 @test_simple_for(i32 %n) { +; Check test_simple_for has proper hot/cold attribute and no profile counts. +; HOT: @test_simple_for(i32 %n) +; HOT-SAME: #[[ATTRIBTE:[0-9]*]] +; HOT-NOT: !prof !{{.*}} +; HOT-SAME: { +; HOT: attributes #[[ATTRIBTE]] = { hot } +; WARM: @test_simple_for(i32 %n) +; WARM-NOT: #{{.*}} +; WARM-NOT: !prof !{{.*}} +; WARM-SAME: { +define i32 @test_simple_for(i32 %n) #0 { entry: br label %for.cond @@ -34,3 +45,5 @@ for.end: ret i32 %sum } + +attributes #0 = { cold } diff --git a/llvm/test/tools/llvm-profdata/Inputs/overflow-instr.proftext b/llvm/test/tools/llvm-profdata/Inputs/overflow-instr.proftext --- a/llvm/test/tools/llvm-profdata/Inputs/overflow-instr.proftext +++ b/llvm/test/tools/llvm-profdata/Inputs/overflow-instr.proftext @@ -1,6 +1,6 @@ overflow 1 3 -18446744073709551615 +18446744073709551613 9223372036854775808 -18446744073709551615 +18446744073709551613 diff --git a/llvm/test/tools/llvm-profdata/Inputs/pseudo-count-hot.proftext b/llvm/test/tools/llvm-profdata/Inputs/pseudo-count-hot.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/pseudo-count-hot.proftext @@ -0,0 +1,6 @@ +overflow +1 +3 +18446744073709551615 +0 +0 diff --git a/llvm/test/tools/llvm-profdata/Inputs/pseudo-count-warm.proftext b/llvm/test/tools/llvm-profdata/Inputs/pseudo-count-warm.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/pseudo-count-warm.proftext @@ -0,0 +1,6 @@ +overflow +1 +3 +18446744073709551614 +0 +0 diff --git a/llvm/test/tools/llvm-profdata/overflow-instr.test b/llvm/test/tools/llvm-profdata/overflow-instr.test --- a/llvm/test/tools/llvm-profdata/overflow-instr.test +++ b/llvm/test/tools/llvm-profdata/overflow-instr.test @@ -4,12 +4,13 @@ RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext %p/Inputs/overflow-instr.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_OVERFLOW RUN: llvm-profdata show -instr -all-functions -counts %t.out | FileCheck %s --check-prefix=SHOW_OVERFLOW MERGE_OVERFLOW: {{.*}}.proftext: overflow: counter overflow -SHOW_OVERFLOW: Function count: 18446744073709551615 -SHOW_OVERFLOW-NEXT: Block counts: [18446744073709551615, 18446744073709551615] +SHOW_OVERFLOW: Function count: 18446744073709551613 +SHOW_OVERFLOW-NEXT: Block counts: [18446744073709551613, 18446744073709551613] 2- Merge profile having maximum counts by itself and verify no overflow RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_NO_OVERFLOW -allow-empty RUN: llvm-profdata show -instr -all-functions -counts %t.out | FileCheck %s --check-prefix=SHOW_NO_OVERFLOW MERGE_NO_OVERFLOW-NOT: {{.*}}.proftext: overflow: counter overflow -SHOW_NO_OVERFLOW: Function count: 18446744073709551615 -SHOW_NO_OVERFLOW-NEXT: Block counts: [9223372036854775808, 18446744073709551615] +SHOW_NO_OVERFLOW: Function count: 18446744073709551613 +SHOW_NO_OVERFLOW-NEXT: Block counts: [9223372036854775808, 18446744073709551613] + diff --git a/llvm/test/tools/llvm-profdata/pseudo_count.test b/llvm/test/tools/llvm-profdata/pseudo_count.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/pseudo_count.test @@ -0,0 +1,22 @@ +Tests for merging instrumented profiles with pseudo counts. + +RUN: llvm-profdata merge -instr %p/Inputs/pseudo-count-warm.proftext -o %t1.out 2>&1 | FileCheck %s -check-prefix=MERGE_OVERFLOW -allow-empty +RUN: llvm-profdata show -instr -all-functions -counts %t1.out | FileCheck %s --check-prefix=MERGE_WARM +RUN: llvm-profdata merge -instr %p/Inputs/pseudo-count-warm.proftext %p/Inputs/pseudo-count-warm.proftext -o %t2.out 2>&1 | FileCheck %s -check-prefix=MERGE_OVERFLOW -allow-empty +RUN: llvm-profdata show -instr -all-functions -counts %t2.out | FileCheck %s --check-prefix=MERGE_WARM +RUN: llvm-profdata merge -instr %p/Inputs/pseudo-count-hot.proftext -o %t3.out 2>&1 | FileCheck %s -check-prefix=MERGE_OVERFLOW -allow-empty +RUN: llvm-profdata show -instr -all-functions -counts %t3.out | FileCheck %s --check-prefix=MERGE_HOT +RUN: llvm-profdata merge -instr %p/Inputs/pseudo-count-hot.proftext %p/Inputs/pseudo-count-hot.proftext -o %t4.out 2>&1 | FileCheck %s -check-prefix=MERGE_OVERFLOW -allow-empty +RUN: llvm-profdata show -instr -all-functions -counts %t4.out | FileCheck %s --check-prefix=MERGE_HOT +RUN: llvm-profdata merge -instr %p/Inputs/pseudo-count-hot.proftext %p/Inputs/pseudo-count-warm.proftext -o %t5.out 2>&1 | FileCheck %s -check-prefix=MERGE_OVERFLOW -allow-empty +RUN: llvm-profdata show -instr -all-functions -counts %t5.out | FileCheck %s --check-prefix=MERGE_HOT +RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext %p/Inputs/pseudo-count-warm.proftext -o %t6.out 2>&1 | FileCheck %s -check-prefix=MERGE_WARNING +RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext %p/Inputs/pseudo-count-hot.proftext -o %t7.out 2>&1 | FileCheck %s -check-prefix=MERGE_WARNING + +MERGE_OVERFLOW-NOT: {{.*}}.proftext: overflow: counter overflow +MERGE_WARM: Counters: 3 +MERGE_WARM-NEXT: Instrumentation level: Front-end +MERGE_HOT: Counters: 3 +MERGE_HOT-NEXT: Instrumentation level: Front-end +MERGE_WARNING: {{.*}}.proftext: overflow: function basic block count change detected (counter mismatch) + diff --git a/llvm/test/tools/llvm-profdata/suppl-instr-with-sample-static-func.test b/llvm/test/tools/llvm-profdata/suppl-instr-with-sample-static-func.test --- a/llvm/test/tools/llvm-profdata/suppl-instr-with-sample-static-func.test +++ b/llvm/test/tools/llvm-profdata/suppl-instr-with-sample-static-func.test @@ -12,4 +12,5 @@ RUN: llvm-profdata merge -supplement-instr-with-sample=%p/Inputs/FUnique.afdotext -suppl-min-size-threshold=2 %p/Inputs/NoFUnique.proftext -o %t4 RUN: llvm-profdata show -function=foo -counts %t4 | FileCheck %s -CHECK: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615] +CHECK: Counters: 3 +CHECK-NEXT: Instrumentation level: diff --git a/llvm/test/tools/llvm-profdata/suppl-instr-with-sample.test b/llvm/test/tools/llvm-profdata/suppl-instr-with-sample.test --- a/llvm/test/tools/llvm-profdata/suppl-instr-with-sample.test +++ b/llvm/test/tools/llvm-profdata/suppl-instr-with-sample.test @@ -1,6 +1,6 @@ Some basic tests for supplementing instrumentation profile with sample profile. -Test all of goo's counters will be set to -1. +Test all of goo's counters will be set to PseudoHot. RUN: llvm-profdata merge \ RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \ RUN: -suppl-min-size-threshold=0 %p/Inputs/mix_instr.proftext -o %t @@ -13,7 +13,8 @@ MIX1: goo: MIX1-NEXT: Hash: 0x0000000000000005 MIX1-NEXT: Counters: 3 -MIX1-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615] +MIX1-NOT: Block counts: +MIX1-SAME: MIX1: moo: MIX1-NEXT: Hash: 0x0000000000000009 MIX1-NEXT: Counters: 4 @@ -29,11 +30,13 @@ MIX2: foo: MIX2-NEXT: Hash: 0x0000000000000007 MIX2-NEXT: Counters: 5 -MIX2-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615, 18446744073709551615, 18446744073709551615] +MIX2-NOT: Block counts: +MIX2-SAME: MIX2: goo: MIX2-NEXT: Hash: 0x0000000000000005 MIX2-NEXT: Counters: 3 -MIX2-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615] +MIX2-NOT: Block counts: +MIX2-SAME: MIX2: moo: MIX2-NEXT: Hash: 0x0000000000000009 MIX2-NEXT: Counters: 4 @@ -53,7 +56,8 @@ MIX3: goo: MIX3-NEXT: Hash: 0x0000000000000005 MIX3-NEXT: Counters: 3 -MIX3-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615] +MIX3-NOT: Block counts: +MIX3-SAME: MIX3: moo: MIX3-NEXT: Hash: 0x0000000000000009 MIX3-NEXT: Counters: 4 @@ -74,20 +78,21 @@ MIX4: goo: MIX4-NEXT: Hash: 0x0000000000000005 MIX4-NEXT: Counters: 3 -MIX4-NEXT: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615] +MIX4-NOT: Block counts: +MIX4-SAME: MIX4: moo: MIX4-NEXT: Hash: 0x0000000000000009 MIX4-NEXT: Counters: 1 MIX4-NEXT: Block counts: [0] -Test profile summary won't be affected by -1 counter. +Test profile summary won't be affected by pseudo counters. RUN: llvm-profdata merge \ RUN: -supplement-instr-with-sample=%p/Inputs/mix_sample.proftext \ RUN: -suppl-min-size-threshold=0 %p/Inputs/mix_instr.proftext -o %t RUN: llvm-profdata show %t -detailed-summary | FileCheck %s --check-prefix=MIX5 MIX5: Instrumentation level: IR -MIX5-NEXT: Total functions: 3 +MIX5-NEXT: Total functions: 2 MIX5-NEXT: Maximum function count: 3000 MIX5-NEXT: Maximum internal block count: 2000 MIX5-NEXT: Total number of blocks: 9 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -485,31 +485,46 @@ NumEdgeCounters = CntNum; } -// Either set all the counters in the instr profile entry \p IFE to -1 -/// in order to drop the profile or scale up the counters in \p IFP to -/// be above hot threshold. We use the ratio of zero counters in the -/// profile of a function to decide the profile is helpful or harmful -/// for performance, and to choose whether to scale up or drop it. -static void updateInstrProfileEntry(InstrProfileEntry &IFE, +/// Either set all the counters in the instr profile entry \p IFE to +/// -1 / -2 /in order to drop the profile or scale up the +/// counters in \p IFP to be above hot / cold threshold. We use +/// the ratio of zero counters in the profile of a function to +/// decide the profile is helpful or harmful for performance, +/// and to choose whether to scale up or drop it. +static void updateInstrProfileEntry(InstrProfileEntry &IFE, bool SetToHot, uint64_t HotInstrThreshold, + uint64_t ColdInstrThreshold, float ZeroCounterThreshold) { InstrProfRecord *ProfRecord = IFE.ProfRecord; if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) { // If all or most of the counters of the function are zero, the - // profile is unaccountable and shuld be dropped. Reset all the - // counters to be -1 and PGO profile-use will drop the profile. + // profile is unaccountable and should be dropped. Reset all the + // counters to be -1 / -2 and PGO profile-use will drop the profile. // All counters being -1 also implies that the function is hot so // PGO profile-use will also set the entry count metadata to be // above hot threshold. - for (size_t I = 0; I < ProfRecord->Counts.size(); ++I) - ProfRecord->Counts[I] = -1; + // All counters being -2 implies that the function is warm so + // PGO profile-use will also set the entry count metadata to be + // above cold threshold. + auto Kind = + (SetToHot ? InstrProfRecord::PseudoHot : InstrProfRecord::PseudoWarm); + ProfRecord->setPseudoCount(Kind); return; } - // Scale up the MaxCount to be multiple times above hot threshold. + // Scale up the MaxCount to be multiple times above hot / cold threshold. const unsigned MultiplyFactor = 3; - uint64_t Numerator = HotInstrThreshold * MultiplyFactor; + uint64_t Threshold = (SetToHot ? HotInstrThreshold : ColdInstrThreshold); + uint64_t Numerator = Threshold * MultiplyFactor; + + // Make sure Threshold for warm counters is below the HotInstrThreshold. + if (!SetToHot && Threshold >= HotInstrThreshold) { + Threshold = (HotInstrThreshold + ColdInstrThreshold) / 2; + } + uint64_t Denominator = IFE.MaxCount; + if (Numerator <= Denominator) + return; ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) { warn(toString(make_error(E))); }); @@ -635,6 +650,11 @@ ProfileSummary SamplePS = Reader->getSummary(); // Compute cold thresholds for instr profile and sample profile. + uint64_t HotSampleThreshold = + ProfileSummaryBuilder::getEntryForPercentile( + SamplePS.getDetailedSummary(), + ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx]) + .MinCount; uint64_t ColdSampleThreshold = ProfileSummaryBuilder::getEntryForPercentile( SamplePS.getDetailedSummary(), @@ -657,7 +677,8 @@ // and adjust the profiles of those functions in the instr profile. for (const auto &PD : Reader->getProfiles()) { const sampleprof::FunctionSamples &FS = PD.second; - if (FS.getMaxCountInside() <= ColdSampleThreshold) + uint64_t SampleMaxCount = FS.getMaxCountInside(); + if (SampleMaxCount < ColdSampleThreshold) continue; auto &FContext = PD.first; auto It = InstrProfileMap.find(FContext.toString()); @@ -676,8 +697,9 @@ It->second.MaxCount > ColdInstrThreshold || It->second.NumEdgeCounters < SupplMinSizeThreshold) continue; - updateInstrProfileEntry(It->second, HotInstrThreshold, - ZeroCounterThreshold); + bool SetToHot = SampleMaxCount >= HotSampleThreshold; + updateInstrProfileEntry(It->second, SetToHot, HotInstrThreshold, + ColdInstrThreshold, ZeroCounterThreshold); } } @@ -2294,9 +2316,27 @@ uint64_t FuncMax = 0; uint64_t FuncSum = 0; + + auto PseudoKind = Func.getCountPseudoKind(); + if (PseudoKind != InstrProfRecord::NotPseudo) { + if (Show) { + if (!ShownFunctions) + OS << "Counters:\n"; + ++ShownFunctions; + OS << " " << Func.Name << ":\n" + << " Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n" + << " Counters: " << Func.Counts.size(); + if (PseudoKind == InstrProfRecord::PseudoHot) + OS << " \n"; + else if (PseudoKind == InstrProfRecord::PseudoWarm) + OS << " \n"; + else + llvm_unreachable("Unknown PseudoKind"); + } + continue; + } + for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) { - if (Func.Counts[I] == (uint64_t)-1) - continue; FuncMax = std::max(FuncMax, Func.Counts[I]); FuncSum += Func.Counts[I]; } diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -766,7 +766,8 @@ TEST_P(MaybeSparseInstrProfTest, get_icall_data_merge1_saturation) { static const char bar[] = "bar"; - const uint64_t Max = std::numeric_limits::max(); + const uint64_t MaxValCount = std::numeric_limits::max(); + const uint64_t MaxEdgeCount = getInstrMaxCountValue(); instrprof_error Result; auto Err = [&](Error E) { Result = InstrProfError::take(std::move(E)); }; @@ -776,7 +777,7 @@ // Verify counter overflow. Result = instrprof_error::success; - Writer.addRecord({"foo", 0x1234, {Max}}, Err); + Writer.addRecord({"foo", 0x1234, {MaxEdgeCount}}, Err); ASSERT_EQ(Result, instrprof_error::counter_overflow); Result = instrprof_error::success; @@ -794,7 +795,7 @@ // Verify value data counter overflow. NamedInstrProfRecord Record5("baz", 0x5678, {5, 6}); Record5.reserveSites(IPVK_IndirectCallTarget, 1); - InstrProfValueData VD5[] = {{uint64_t(bar), Max}}; + InstrProfValueData VD5[] = {{uint64_t(bar), MaxValCount}}; Record5.addValueData(IPVK_IndirectCallTarget, 0, VD5, 1, nullptr); Result = instrprof_error::success; Writer.addRecord(std::move(Record5), Err); @@ -807,7 +808,7 @@ Expected ReadRecord1 = Reader->getInstrProfRecord("foo", 0x1234); EXPECT_THAT_ERROR(ReadRecord1.takeError(), Succeeded()); - ASSERT_EQ(Max, ReadRecord1->Counts[0]); + ASSERT_EQ(MaxEdgeCount, ReadRecord1->Counts[0]); Expected ReadRecord2 = Reader->getInstrProfRecord("baz", 0x5678); @@ -816,7 +817,7 @@ std::unique_ptr VD = ReadRecord2->getValueForSite(IPVK_IndirectCallTarget, 0); ASSERT_EQ(StringRef("bar"), StringRef((const char *)VD[0].Value, 3)); - ASSERT_EQ(Max, VD[0].Count); + ASSERT_EQ(MaxValCount, VD[0].Count); } // This test tests that when there are too many values