Index: llvm/lib/ProfileData/ProfileSummaryBuilder.cpp =================================================================== --- llvm/lib/ProfileData/ProfileSummaryBuilder.cpp +++ llvm/lib/ProfileData/ProfileSummaryBuilder.cpp @@ -223,7 +223,7 @@ NumFunctions++; // Skip invalid count. - if (Count == (uint64_t)-1) + if (Count == (uint64_t)-1 || Count == (uint64_t)-2) return; addCount(Count); @@ -233,7 +233,7 @@ void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) { // Skip invalid count. - if (Count == (uint64_t)-1) + if (Count == (uint64_t)-1 || Count == (uint64_t)-2) return; addCount(Count); Index: llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1019,7 +1019,7 @@ // Read counts for the instrumented BB from profile. bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, - bool &AllMinusOnes); + int &AllMinus); // Populate the counts for all BBs. void populateCounters(); @@ -1225,7 +1225,7 @@ // instrumented BB and the edges. This function also updates ProgramMaxCount. // Return true if the profile are successfully read, and false on errors. bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, - bool &AllMinusOnes) { + int &AllMinus) { auto &Ctx = M->getContext(); uint64_t MismatchedFuncSum = 0; Expected Result = PGOReader->getInstrProfRecord( @@ -1274,15 +1274,26 @@ IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); - AllMinusOnes = (CountFromProfile.size() > 0); uint64_t ValueSum = 0; + bool AllMinusOne = true; + bool AllMinusTwo = true; for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); ValueSum += CountFromProfile[I]; if (CountFromProfile[I] != (uint64_t)-1) - AllMinusOnes = false; - } + AllMinusOne = false; + if (CountFromProfile[I] != (uint64_t)-2) + AllMinusTwo = false; + } + if (AllMinusOne) + AllMinus = -1; + else if (AllMinusTwo) + AllMinus = -2; + else + AllMinus = 0; AllZeros = (ValueSum == 0); + LLVM_DEBUG(dbgs() << "AllMinus = " << AllMinus << " AllZeros = " << AllZeros + << "\n"); LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); @@ -1818,13 +1829,13 @@ SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, InstrumentFuncEntry); - // When AllMinusOnes is true, it means the profile for the function - // is unrepresentative and this function is actually hot. Set the + // When AllMinus is not zero, it means the profile for the function + // is unrepresentative and this function is actually hot / warm. Set the // entry count of the function to be multiple times of hot threshold // and drop all its internal counters. - bool AllMinusOnes = false; + int AllMinus = 0; bool AllZeros = false; - if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes)) + if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinus)) continue; if (AllZeros) { F.setEntryCount(ProfileCount(0, Function::PCT_Real)); @@ -1833,12 +1844,14 @@ continue; } const unsigned MultiplyFactor = 3; - if (AllMinusOnes) { - uint64_t HotThreshold = PSI->getHotCountThreshold(); - if (HotThreshold) + if (AllMinus == -1 || AllMinus == -2) { + uint64_t Threshold = AllMinus == -1 ? PSI->getHotCountThreshold() + : PSI->getColdCountThreshold(); + if (Threshold) F.setEntryCount( - ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real)); - HotFunctions.push_back(&F); + ProfileCount(Threshold * MultiplyFactor, Function::PCT_Real)); + if (AllMinus == -1) + HotFunctions.push_back(&F); continue; } Func.populateCounters(); Index: llvm/test/Transforms/PGOProfile/Inputs/sample-profile-warm.proftext =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/Inputs/sample-profile-warm.proftext @@ -0,0 +1,13 @@ +test_simple_for:40:40 + 1: 10 + 2: 10 + 3: 10 + 4: 10 + +moo:1000:1000 + 1: 2000 + 2: 2000 + 3: 2000 + 4: 2000 + 5: 2000 + Index: llvm/test/Transforms/PGOProfile/Inputs/suppl-profile.proftext =================================================================== --- llvm/test/Transforms/PGOProfile/Inputs/suppl-profile.proftext +++ llvm/test/Transforms/PGOProfile/Inputs/suppl-profile.proftext @@ -13,3 +13,12 @@ 270 180 760 + +boo +2582734 +4 +100 +27 +10 +70 + Index: llvm/test/Transforms/PGOProfile/suppl-profile.ll =================================================================== --- llvm/test/Transforms/PGOProfile/suppl-profile.ll +++ llvm/test/Transforms/PGOProfile/suppl-profile.ll @@ -1,18 +1,27 @@ ; Supplement instr profile suppl-profile.proftext with sample profile ; sample-profile.proftext. +; For hot functions: ; RUN: llvm-profdata merge -instr -suppl-min-size-threshold=0 \ -; RUN: -supplement-instr-with-sample=%p/Inputs/sample-profile.proftext \ +; RUN: -supplement-instr-with-sample=%p/Inputs/sample-profile-hot.proftext \ ; RUN: %S/Inputs/suppl-profile.proftext -o %t.profdata -; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=HOT +; For warm functions: +; RUN: llvm-profdata merge -instr -suppl-min-size-threshold=0 \ +; RUN: -supplement-instr-with-sample=%p/Inputs/sample-profile-warm.proftext \ +; RUN: %S/Inputs/suppl-profile.proftext -o %t1.profdata +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t1.profdata -S | FileCheck %s --check-prefix=WARM target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" ; Check test_simple_for has a non-zero entry count and doesn't have any other ; prof metadata. -; CHECK: @test_simple_for(i32 %n) {{.*}} !prof ![[ENTRY_COUNT:[0-9]+]] -; CHECK-NOT: !prof ! -; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 540} +; HOT: @test_simple_for(i32 %n) {{.*}} !prof ![[ENTRY_COUNT:[0-9]+]] +; HOT-NOT: !prof ! +; HOT: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 81} +; WARM: @test_simple_for(i32 %n) !prof ![[ENTRY_COUNT:[0-9]+]] +; WARM-NOT: !prof ! +; WARM: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 30} define i32 @test_simple_for(i32 %n) { entry: br label %for.cond Index: llvm/tools/llvm-profdata/llvm-profdata.cpp =================================================================== --- llvm/tools/llvm-profdata/llvm-profdata.cpp +++ llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -478,31 +478,39 @@ ZeroCounterRatio = (float)ZeroCntNum / CntNum; } -/// Either set all the counters in the instr profile entry \p IFE to -1 -/// in order to drop the profile or scale up the counters in \p IFP to -/// be above hot threshold. We use the ratio of zero counters in the -/// profile of a function to decide the profile is helpful or harmful -/// for performance, and to choose whether to scale up or drop it. -static void updateInstrProfileEntry(InstrProfileEntry &IFE, +/// Either set all the counters in the instr profile entry \p IFE to +/// -1 / -2 /in order to drop the profile or scale up the +/// counters in \p IFP to be above hot / cold threshold. We use +/// the ratio of zero counters in the profile of a function to +/// decide the profile is helpful or harmful for performance, +/// and to choose whether to scale up or drop it. +static void updateInstrProfileEntry(InstrProfileEntry &IFE, bool SetToHot, uint64_t HotInstrThreshold, + uint64_t ColdInstrThreshold, float ZeroCounterThreshold) { InstrProfRecord *ProfRecord = IFE.ProfRecord; if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) { // If all or most of the counters of the function are zero, the // profile is unaccountable and shuld be dropped. Reset all the - // counters to be -1 and PGO profile-use will drop the profile. + // counters to be -1 / -2 and PGO profile-use will drop the profile. // All counters being -1 also implies that the function is hot so // PGO profile-use will also set the entry count metadata to be // above hot threshold. + // All counters being -2 implies that the function is warm so + // PGO profile-use will also set the entry count metadata to be + // above cold threshold. for (size_t I = 0; I < ProfRecord->Counts.size(); ++I) - ProfRecord->Counts[I] = -1; + ProfRecord->Counts[I] = (SetToHot ? -1 : -2); return; } // Scale up the MaxCount to be multiple times above hot threshold. const unsigned MultiplyFactor = 3; - uint64_t Numerator = HotInstrThreshold * MultiplyFactor; + uint64_t Threshold = (SetToHot ? HotInstrThreshold : ColdInstrThreshold); + uint64_t Numerator = Threshold * MultiplyFactor; uint64_t Denominator = IFE.MaxCount; + if (Numerator <= Denominator) + return; ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) { warn(toString(make_error(E))); }); @@ -560,6 +568,11 @@ ProfileSummary SamplePS = Reader->getSummary(); // Compute cold thresholds for instr profile and sample profile. + uint64_t HotSampleThreshold = + ProfileSummaryBuilder::getEntryForPercentile( + SamplePS.getDetailedSummary(), + ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx]) + .MinCount; uint64_t ColdSampleThreshold = ProfileSummaryBuilder::getEntryForPercentile( SamplePS.getDetailedSummary(), @@ -584,12 +597,17 @@ auto &FContext = PD.first; const sampleprof::FunctionSamples &FS = PD.second; auto It = InstrProfileMap.find(FContext.toString()); - if (FS.getHeadSamples() > ColdSampleThreshold && - It != InstrProfileMap.end() && + + uint64_t SampleMaxCount = FS.getMaxCountInside(); + if (SampleMaxCount < ColdSampleThreshold) + continue; + + if (It != InstrProfileMap.end() && It->second.MaxCount <= ColdInstrThreshold && FS.getBodySamples().size() >= SupplMinSizeThreshold) { - updateInstrProfileEntry(It->second, HotInstrThreshold, - ZeroCounterThreshold); + bool SetToHot = SampleMaxCount >= HotSampleThreshold; + updateInstrProfileEntry(It->second, SetToHot, HotInstrThreshold, + ColdInstrThreshold, ZeroCounterThreshold); } } }