diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -252,6 +252,10 @@ "pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock.")); +static cl::opt + PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, + cl::desc("Fix function entry count in profile use.")); + static cl::opt PGOVerifyHotBFI( "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, cl::desc("Print out the non-match BFI count if a hot raw profile count " @@ -1640,6 +1644,53 @@ return PreservedAnalyses::none(); } +// Using the ratio b/w sums of profile count values and BFI count values to +// adjust the func entry count. +static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, + BranchProbabilityInfo &NBPI) { + Function &F = Func.getFunc(); + BlockFrequencyInfo NBFI(F, NBPI, LI); +#ifndef NDEBUG + auto BFIEntryCount = F.getEntryCount(); + assert(BFIEntryCount.hasValue() && (BFIEntryCount.getCount() > 0) && + "Invalid BFI Entrycount"); +#endif + auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); + auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); + for (auto &BBI : F) { + uint64_t CountValue = 0; + uint64_t BFICountValue = 0; + if (!Func.findBBInfo(&BBI)) + continue; + auto BFICount = NBFI.getBlockProfileCount(&BBI); + CountValue = Func.getBBInfo(&BBI).CountValue; + BFICountValue = BFICount.getValue(); + SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); + SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); + } + if (SumCount.isZero()) + return; + + assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && + "Incorrect sum of BFI counts"); + if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) + return; + double Scale = (SumCount / SumBFICount).convertToDouble(); + if (Scale < 1.001 && Scale > 0.999) + return; + + uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; + uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; + if (NewEntryCount == 0) + NewEntryCount = 1; + if (NewEntryCount != FuncEntryCount) { + F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); + LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() + << ", entry_count " << FuncEntryCount << " --> " + << NewEntryCount << "\n"); + } +} + // Compare the profile count values with BFI count values, and print out // the non-matching ones. static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, @@ -1842,10 +1893,15 @@ } } - // Verify BlockFrequency information. - if (PGOVerifyBFI || PGOVerifyHotBFI) { + if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { LoopInfo LI{DominatorTree(F)}; BranchProbabilityInfo NBPI(F, LI); + + // Fix func entry count. + if (PGOFixEntryCount) + fixFuncEntryCount(Func, LI, NBPI); + + // Verify BlockFrequency information. uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; if (PGOVerifyHotBFI) { HotCountThreshold = PSI->getOrCompHotCountThreshold(); diff --git a/llvm/test/Transforms/PGOProfile/Inputs/fix_bfi.proftext b/llvm/test/Transforms/PGOProfile/Inputs/fix_bfi.proftext new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/Inputs/fix_bfi.proftext @@ -0,0 +1,16 @@ +# IR level Instrumentation Flag +:ir +sort_basket +# Func Hash: +948827210500800754 +# Num Counters: +7 +# Counter Values: +41017879 +31616738 +39637749 +32743703 +13338888 +6990942 +6013544 + diff --git a/llvm/test/Transforms/PGOProfile/bfi_verification.ll b/llvm/test/Transforms/PGOProfile/bfi_verification.ll --- a/llvm/test/Transforms/PGOProfile/bfi_verification.ll +++ b/llvm/test/Transforms/PGOProfile/bfi_verification.ll @@ -1,7 +1,7 @@ ; Note: Verify bfi counter after loading the profile. ; RUN: llvm-profdata merge %S/Inputs/bfi_verification.proftext -o %t.profdata -; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S -pgo-verify-bfi-ratio=2 -pgo-verify-bfi=true -pass-remarks-analysis=pgo 2>&1 | FileCheck %s --check-prefix=THRESHOLD-CHECK -; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S -pgo-verify-hot-bfi=true -pass-remarks-analysis=pgo 2>&1 | FileCheck %s --check-prefix=HOTONLY-CHECK +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S -pgo-verify-bfi-ratio=2 -pgo-verify-bfi=true -pgo-fix-entry-count=false -pass-remarks-analysis=pgo 2>&1 | FileCheck %s --check-prefix=THRESHOLD-CHECK +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S -pgo-verify-hot-bfi=true -pgo-fix-entry-count=false -pass-remarks-analysis=pgo 2>&1 | FileCheck %s --check-prefix=HOTONLY-CHECK target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/PGOProfile/bfi_verification.ll b/llvm/test/Transforms/PGOProfile/fix_bfi.ll copy from llvm/test/Transforms/PGOProfile/bfi_verification.ll copy to llvm/test/Transforms/PGOProfile/fix_bfi.ll --- a/llvm/test/Transforms/PGOProfile/bfi_verification.ll +++ b/llvm/test/Transforms/PGOProfile/fix_bfi.ll @@ -1,7 +1,6 @@ -; Note: Verify bfi counter after loading the profile. -; RUN: llvm-profdata merge %S/Inputs/bfi_verification.proftext -o %t.profdata -; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S -pgo-verify-bfi-ratio=2 -pgo-verify-bfi=true -pass-remarks-analysis=pgo 2>&1 | FileCheck %s --check-prefix=THRESHOLD-CHECK -; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S -pgo-verify-hot-bfi=true -pass-remarks-analysis=pgo 2>&1 | FileCheck %s --check-prefix=HOTONLY-CHECK +; Note: Scaling the func entry count (using the sum of count value) so that BFI counter value is close to raw profile counter values. +; RUN: llvm-profdata merge %S/Inputs/fix_bfi.proftext -o %t.profdata +; RUN: opt -pgo-instr-use -pgo-test-profile-file=%t.profdata -S -pgo-fix-entry-count=true < %s 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -97,15 +96,6 @@ if.end26: ret void } -; THRESHOLD-CHECK: remark: :0:0: BB do.body Count=39637749 BFI_Count=40801304 -; THRESHOLD-CHECK: remark: :0:0: BB while.cond Count=80655628 BFI_Count=83956530 -; THRESHOLD-CHECK: remark: :0:0: BB while.body Count=41017879 BFI_Count=42370585 -; THRESHOLD-CHECK: remark: :0:0: BB while.cond3 Count=71254487 BFI_Count=73756204 -; THRESHOLD-CHECK: remark: :0:0: BB while.body7 Count=31616738 BFI_Count=32954900 -; THRESHOLD-CHECK: remark: :0:0: BB while.end8 Count=39637749 BFI_Count=40801304 -; THRESHOLD-CHECK: remark: :0:0: BB if.then Count=32743703 BFI_Count=33739540 -; THRESHOLD-CHECK: remark: :0:0: BB if.end Count=39637749 BFI_Count=40801304 -; THRESHOLD-CHECK: remark: :0:0: BB if.then25 Count=6013544 BFI_Count=6277124 -; THRESHOLD-CHECK: remark: :0:0: In Func sort_basket: Num_of_BB=14, Num_of_non_zerovalue_BB=14, Num_of_mis_matching_BB=9 -; HOTONLY-CHECK: remark: :0:0: BB if.then25 Count=6013544 BFI_Count=6277124 (raw-Cold to BFI-Hot) -; HOTONLY-CHECK: remark: :0:0: In Func sort_basket: Num_of_BB=14, Num_of_non_zerovalue_BB=14, Num_of_mis_matching_BB=1 + +; CHECK: define dso_local void @sort_basket(i64 %min, i64 %max) #0 !prof [[ENTRY_COUNT:![0-9]+]] +; CHECK: [[ENTRY_COUNT]] = !{!"function_entry_count", i64 12949310}