Index: llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -252,6 +252,28 @@ "pgo-instrument-entry", cl::init(false), cl::Hidden, cl::desc("Force to instrument function entry basicblock.")); +static cl::opt PGOVerifyHotBFI( + "pgo-verify-hot-bfi", cl::init(false), cl::Hidden, + cl::desc("Print out the non-match BFI count if a hot raw profile count " + "becomes non-hot, or a cold raw profile count becomes hot.")); + +static cl::opt PGOVerifyBFI( + "pgo-verify-bfi", cl::init(false), cl::Hidden, + cl::desc("Print out non-match BFI counts after setting profile metadata.")); + +static cl::opt PGOVerifyBFIRatio( + "pgo-verify-bfi-ratio", cl::init(5), cl::Hidden, + cl::desc("Print out non-match BFI if the difference percentage is greater " + "than this value (in percentage).")); + +static cl::opt PGOVerifyBFICutoff( + "pgo-verify-bfi-cutoff", cl::init(1), cl::Hidden, + cl::desc("Skip the counts whose profile count value is below.")); + +static cl::opt + PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, + cl::desc("Fix function entry count in profile use.")); + // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts extern cl::opt PGOViewCounts; @@ -1616,6 +1638,124 @@ return PreservedAnalyses::none(); } +// Using the ratio b/w sums of profile count values and BFI count values to +// adjust the func entry count. +static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, + BranchProbabilityInfo &NBPI) { + Function &F = Func.getFunc(); + BlockFrequencyInfo NBFI(F, NBPI, LI); + auto BFIEntryCount = F.getEntryCount(); + assert(BFIEntryCount.hasValue() && (BFIEntryCount.getCount() > 0) && + "Invalid BFI Entrycount"); + auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); + auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); + for (auto &BBI : F) { + uint64_t CountValue = 0; + uint64_t BFICountValue = 0; + if (!Func.findBBInfo(&BBI)) + continue; + auto BFICount = NBFI.getBlockProfileCount(&BBI); + CountValue = Func.getBBInfo(&BBI).CountValue; + BFICountValue = BFICount.getValue(); + SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); + SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); + } + if (SumCount.isZero()) + return; + + assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && + "Incorrect sum of BFI counts"); + if (SumBFICount.compare(SumCount) == APFloat::cmpEqual) + return; + double Scale = (SumCount / SumBFICount).convertToDouble(); + if (Scale < 1.001 && Scale > 0.999) + return; + + uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; + uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; + if (NewEntryCount == 0) + NewEntryCount = 1; + if (NewEntryCount != FuncEntryCount) { + F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); + LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() + << ", entry_count " << FuncEntryCount << " --> " + << NewEntryCount << "\n"); + } +} + +// Compare the profile count values with BFI count values, and print out +// the non-matching ones. +static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, + BranchProbabilityInfo &NBPI, + uint64_t HotCountThreshold, + uint64_t ColdCountThreshold) { + Function &F = Func.getFunc(); + BlockFrequencyInfo NBFI(F, NBPI, LI); + bool PrintFunc = false; + bool HotBBOnly = PGOVerifyHotBFI; + std::string Remark; + + unsigned BBNum = 0, BBNoMatchNum = 0, NonZeroBBNum = 0; + for (auto &BBI : F) { + uint64_t CountValue = 0; + uint64_t BFICountValue = 0; + + if (Func.getBBInfo(&BBI).CountValid) + CountValue = Func.getBBInfo(&BBI).CountValue; + + BBNum++; + if (CountValue) + NonZeroBBNum++; + auto BFICount = NBFI.getBlockProfileCount(&BBI); + if (BFICount) + BFICountValue = BFICount.getValue(); + + if (HotBBOnly) { + bool rawIsHot = CountValue >= HotCountThreshold; + bool BFIIsHot = BFICountValue >= HotCountThreshold; + bool rawIsCold = CountValue <= ColdCountThreshold; + bool ShowCount = false; + if (rawIsHot && !BFIIsHot) { + Remark = "raw-Hot to BFI-nonHot"; + ShowCount = true; + } else if (rawIsCold && BFIIsHot) { + Remark = "raw-Cold to BFI-Hot"; + ShowCount = true; + } + if (!ShowCount) + continue; + } else { + if ((CountValue < PGOVerifyBFICutoff) && + (BFICountValue < PGOVerifyBFICutoff)) + continue; + uint64_t Diff = (BFICountValue >= CountValue) + ? BFICountValue - CountValue + : CountValue - BFICountValue; + if (Diff < CountValue / 100 * PGOVerifyBFIRatio) + continue; + } + + BBNoMatchNum++; + if (!PrintFunc) { + dbgs() << "VerifyFuncBFI in Func " << F.getName() << ":"; + if (HotBBOnly) + dbgs() << " HotThreshold=" << HotCountThreshold + << " ColdThreshold=" << ColdCountThreshold; + dbgs() << "\n"; + PrintFunc = true; + } + dbgs() << " BB " << BBI.getName() << ": Count=" << CountValue + << " BFI Count=" << BFICountValue; + if (!Remark.empty()) + dbgs() << " (" << Remark << ")"; + dbgs() << "\n"; + } + if (BBNoMatchNum) + dbgs() << "In function " << F.getName() << ": Num_of_BB = " << BBNum + << " Num_of_non_zerovalue_BB = " << NonZeroBBNum + << " Num_of_non_matching_BB = " << BBNoMatchNum << "\n"; +} + static bool annotateAllFunctions( Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, function_ref LookupTLI, @@ -1741,6 +1881,22 @@ Func.dumpInfo(); } } + + // Verify BlockFrequency and fix entry func entry count. + if (PGOVerifyBFI || PGOFixEntryCount || PGOVerifyHotBFI) { + LoopInfo LI{DominatorTree(F)}; + BranchProbabilityInfo NBPI(F, LI); + if (PGOFixEntryCount) + fixFuncEntryCount(Func, LI, NBPI); + if (PGOVerifyBFI || PGOVerifyHotBFI) { + uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; + if (PGOVerifyHotBFI) { + HotCountThreshold = PSI->getOrCompHotCountThreshold(); + ColdCountThreshold = PSI->getOrCompColdCountThreshold(); + } + verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); + } + } } // Set function hotness attribute from the profile. Index: llvm/test/Transforms/PGOProfile/Inputs/fix_bfi.proftext =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/Inputs/fix_bfi.proftext @@ -0,0 +1,16 @@ +# IR level Instrumentation Flag +:ir +sort_basket +# Func Hash: +948827210500800754 +# Num Counters: +7 +# Counter Values: +41017879 +31616738 +39637749 +32743703 +13338888 +6990942 +6013544 + Index: llvm/test/Transforms/PGOProfile/fix_bfi.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/fix_bfi.ll @@ -0,0 +1,124 @@ +; Note: Scaling the func entry count (using the sum of count value) so that BFI counter value is close to raw profile counter values. +; RUN: llvm-profdata merge %S/Inputs/fix_bfi.proftext -o %t.profdata +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S -pgo-fix-entry-count=true -pgo-verify-hot-bfi=false -pgo-verify-bfi-ratio=2 -pgo-verify-bfi=true 2>&1 | FileCheck %s --check-prefix=FIX-CHECK +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S -pgo-fix-entry-count=false -pgo-verify-hot-bfi=false -pgo-verify-bfi-ratio=2 -pgo-verify-bfi=true 2>&1 | FileCheck %s --check-prefix=NOFIX-CHECK +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S -pgo-fix-entry-count=true -pgo-verify-hot-bfi=true 2>&1 | FileCheck %s --check-prefix=FIX-HOT-CHECK +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S -pgo-fix-entry-count=false -pgo-verify-hot-bfi=true 2>&1 | FileCheck %s --check-prefix=NOFIX-HOT-CHECK + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.basket = type { %struct.arc*, i64, i64 } +%struct.arc = type { i64, %struct.node*, %struct.node*, i32, %struct.arc*, %struct.arc*, i64, i64 } +%struct.node = type { i64, i32, %struct.node*, %struct.node*, %struct.node*, %struct.node*, %struct.arc*, %struct.arc*, %struct.arc*, %struct.arc*, i64, i64, i32, i32 } + +@perm = internal unnamed_addr global [351 x %struct.basket*] zeroinitializer, align 16 + +define dso_local void @sort_basket(i64 %min, i64 %max) { +entry: + %add = add nsw i64 %min, %max + %div = sdiv i64 %add, 2 + %arrayidx = getelementptr inbounds [351 x %struct.basket*], [351 x %struct.basket*]* @perm, i64 0, i64 %div + %0 = load %struct.basket*, %struct.basket** %arrayidx, align 8 + %abs_cost = getelementptr inbounds %struct.basket, %struct.basket* %0, i64 0, i32 2 + %1 = load i64, i64* %abs_cost, align 8 + br label %do.body + +do.body: + %r.0 = phi i64 [ %max, %entry ], [ %r.2, %if.end ] + %l.0 = phi i64 [ %min, %entry ], [ %l.2, %if.end ] + br label %while.cond + +while.cond: + %l.1 = phi i64 [ %l.0, %do.body ], [ %inc, %while.body ] + %arrayidx1 = getelementptr inbounds [351 x %struct.basket*], [351 x %struct.basket*]* @perm, i64 0, i64 %l.1 + %2 = load %struct.basket*, %struct.basket** %arrayidx1, align 8 + %abs_cost2 = getelementptr inbounds %struct.basket, %struct.basket* %2, i64 0, i32 2 + %3 = load i64, i64* %abs_cost2, align 8 + %cmp = icmp sgt i64 %3, %1 + br i1 %cmp, label %while.body, label %while.cond3 + +while.body: + %inc = add nsw i64 %l.1, 1 + br label %while.cond + +while.cond3: + %r.1 = phi i64 [ %r.0, %while.cond ], [ %dec, %while.body7 ] + %arrayidx4 = getelementptr inbounds [351 x %struct.basket*], [351 x %struct.basket*]* @perm, i64 0, i64 %r.1 + %4 = load %struct.basket*, %struct.basket** %arrayidx4, align 8 + %abs_cost5 = getelementptr inbounds %struct.basket, %struct.basket* %4, i64 0, i32 2 + %5 = load i64, i64* %abs_cost5, align 8 + %cmp6 = icmp sgt i64 %1, %5 + br i1 %cmp6, label %while.body7, label %while.end8 + +while.body7: + %dec = add nsw i64 %r.1, -1 + br label %while.cond3 + +while.end8: + %cmp9 = icmp slt i64 %l.1, %r.1 + br i1 %cmp9, label %if.then, label %if.end + +if.then: + %6 = bitcast %struct.basket** %arrayidx1 to i64* + %7 = load i64, i64* %6, align 8 + store %struct.basket* %4, %struct.basket** %arrayidx1, align 8 + %8 = bitcast %struct.basket** %arrayidx4 to i64* + store i64 %7, i64* %8, align 8 + br label %if.end + +if.end: + %cmp14 = icmp sgt i64 %l.1, %r.1 + %not.cmp14 = xor i1 %cmp14, true + %9 = zext i1 %not.cmp14 to i64 + %r.2 = sub i64 %r.1, %9 + %not.cmp1457 = xor i1 %cmp14, true + %inc16 = zext i1 %not.cmp1457 to i64 + %l.2 = add nsw i64 %l.1, %inc16 + %cmp19 = icmp sgt i64 %l.2, %r.2 + br i1 %cmp19, label %do.end, label %do.body + +do.end: + %cmp20 = icmp sgt i64 %r.2, %min + br i1 %cmp20, label %if.then21, label %if.end22 + +if.then21: + call void @sort_basket(i64 %min, i64 %r.2) + br label %if.end22 + +if.end22: + %cmp23 = icmp slt i64 %l.2, %max + %cmp24 = icmp slt i64 %l.2, 51 + %or.cond = and i1 %cmp23, %cmp24 + br i1 %or.cond, label %if.then25, label %if.end26 + +if.then25: + call void @sort_basket(i64 %l.2, i64 %max) + br label %if.end26 + +if.end26: + ret void +} +; FIX-CHECK: VerifyFuncBFI in Func sort_basket: +; FIX-CHECK: BB entry: Count=13338888 BFI Count=12949310 +; FIX-CHECK: BB do.end: Count=13338888 BFI Count=12949310 +; FIX-CHECK: BB if.end22: Count=13338888 BFI Count=12949310 +; FIX-CHECK: BB if.end26: Count=13338888 BFI Count=12949310 +; FIX-CHECK: In function sort_basket: Num_of_BB = 14 Num_of_non_zerovalue_BB = 14 Num_of_non_matching_BB = 4 +; NOFIX-CHECK: VerifyFuncBFI in Func sort_basket: +; NOFIX-CHECK: BB do.body: Count=39637749 BFI Count=40801304 +; NOFIX-CHECK: BB while.cond: Count=80655628 BFI Count=83956530 +; NOFIX-CHECK: BB while.body: Count=41017879 BFI Count=42370585 +; NOFIX-CHECK: BB while.cond3: Count=71254487 BFI Count=73756204 +; NOFIX-CHECK: BB while.body7: Count=31616738 BFI Count=32954900 +; NOFIX-CHECK: BB while.end8: Count=39637749 BFI Count=40801304 +; NOFIX-CHECK: BB if.then: Count=32743703 BFI Count=33739540 +; NOFIX-CHECK: BB if.end: Count=39637749 BFI Count=40801304 +; NOFIX-CHECK: BB if.then25: Count=6013544 BFI Count=6277124 +; NOFIX-CHECK: In function sort_basket: Num_of_BB = 14 Num_of_non_zerovalue_BB = 14 Num_of_non_matching_BB = 9 +; FIX-HOT-CHECK: VerifyFuncBFI in Func sort_basket: HotThreshold=6013544 ColdThreshold=6013544 +; FIX-HOT-CHECK: BB if.then25: Count=6013544 BFI Count=6093793 (raw-Cold to BFI-Hot) +; FIX-HOT-CHECK: In function sort_basket: Num_of_BB = 14 Num_of_non_zerovalue_BB = 14 Num_of_non_matching_BB = 1 +; NOFIX-HOT-CHECK: VerifyFuncBFI in Func sort_basket: HotThreshold=6013544 ColdThreshold=6013544 +; NOFIX-HOT-CHECK: BB if.then25: Count=6013544 BFI Count=6277124 (raw-Cold to BFI-Hot) +; NOFIX-HOT-CHECK: In function sort_basket: Num_of_BB = 14 Num_of_non_zerovalue_BB = 14 Num_of_non_matching_BB = 1