Index: llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -245,6 +245,23 @@ "optimization remarks: -{Rpass|" "pass-remarks}=pgo-instrumentation")); +static cl::opt PGOVerifyBFI( + "pgo-verify-bfi", cl::init(false), cl::Hidden, + cl::desc("Print out non-match BFI counts after setting profile metadata.")); + +static cl::opt PGOVerifyBFIRatio( + "pgo-verify-bfi-ratio", cl::init(5), cl::Hidden, + cl::desc("Print out non-match BFI if the difference percentage is greater " + "than this value (in percentage).")); + +static cl::opt PGOVerifyBFICutoff( + "pgo-verify-bfi-cutoff", cl::init(1), cl::Hidden, + cl::desc("Skip the counts whose profile count value is below.")); + +static cl::opt + PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, + cl::desc("Fix function entry count in profile use.")); + // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts extern cl::opt PGOViewCounts; @@ -1254,6 +1271,7 @@ continue; FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); } + markFunctionAttributes(FuncEntryCount, FuncMaxCount); // Now annotate select instructions @@ -1536,6 +1554,97 @@ return PreservedAnalyses::none(); } +// Using the ratio b/w sums of profile count values and BFI count values to +// adjust the func entry count. +static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, + BranchProbabilityInfo &NBPI) { + Function &F = Func.getFunc(); + BlockFrequencyInfo NBFI(F, NBPI, LI); + auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); + auto SumBFCount = APFloat::getZero(APFloat::IEEEdouble()); + for (auto &BBI : F) { + uint64_t CountValue = 0; + uint64_t BFICountValue = 0; + if (!Func.getBBInfo(&BBI).CountValid) + continue; + auto BFICount = NBFI.getBlockProfileCount(&BBI); + if (!BFICount) + continue; + + CountValue = Func.getBBInfo(&BBI).CountValue; + BFICountValue = BFICount.getValue(); + SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven); + SumBFCount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven); + } + if (!SumCount.isZero()) { + double Scale = 1.0; + if (SumBFCount.compare(APFloat(1.0)) == APFloat::cmpGreaterThan && + SumBFCount.compare(SumCount) != APFloat::cmpEqual) + Scale = (SumCount / SumBFCount).convertToDouble(); + + if (Scale >= 1.01 || Scale <= 0.99) { + uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue; + uint64_t NewEntryCount = FuncEntryCount; + if (NewEntryCount == 0) + NewEntryCount = 1; + NewEntryCount = (0.5 + NewEntryCount * Scale); + if (NewEntryCount == 0) + NewEntryCount = 1; + if (NewEntryCount != FuncEntryCount) { + F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); + LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() + << ", entry_count " << FuncEntryCount << " --> " + << NewEntryCount << "\n"); + } + } + return; + } + F.setEntryCount(ProfileCount(0, Function::PCT_Real)); +} + +// Compare the profile count values with BFI count values, and print out +// the non-matching ones. +static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, + BranchProbabilityInfo &NBPI) { + Function &F = Func.getFunc(); + BlockFrequencyInfo NBFI(F, NBPI, LI); + bool PrintFunc = false; + unsigned BBNum = 0, BBNoMatchNum = 0, NonC0BBNum = 0; + for (auto &BBI : F) { + uint64_t CountValue = 0; + uint64_t BFICountValue = 0; + + if (Func.getBBInfo(&BBI).CountValid) + CountValue = Func.getBBInfo(&BBI).CountValue; + + BBNum++; + if (CountValue) + NonC0BBNum++; + auto BFICount = NBFI.getBlockProfileCount(&BBI); + if (BFICount) + BFICountValue = BFICount.getValue(); + + if (CountValue < PGOVerifyBFICutoff && BFICountValue < PGOVerifyBFICutoff) + continue; + + uint64_t Diff = (BFICountValue >= CountValue) ? BFICountValue - CountValue + : CountValue - BFICountValue; + if (Diff != 0 && Diff >= CountValue / 100 * PGOVerifyBFIRatio) { + BBNoMatchNum++; + if (!PrintFunc) { + dbgs() << "VerifyFuncBFI: in " << F.getName() << "\n"; + PrintFunc = true; + } + dbgs() << " BB " << BBI.getName() << ": Count=" << CountValue + << " BFI Count=" << BFICountValue << "\n"; + } + } + if (BBNoMatchNum) + dbgs() << "In function " << F.getName() << ": Num_of_BB = " << BBNum + << " Num_of_zerovalue_BB = " << NonC0BBNum + << " Num_of_non_matching_BB = " << BBNoMatchNum << "\n"; +} + static bool annotateAllFunctions( Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, function_ref LookupBPI, @@ -1570,6 +1679,9 @@ return false; } + M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), + IsCS ? ProfileSummary::PSK_CSInstr + : ProfileSummary::PSK_Instr); std::unordered_multimap ComdatMembers; collectComdatMembers(M, ComdatMembers); std::vector HotFunctions; @@ -1629,10 +1741,17 @@ Func.dumpInfo(); } } + + // Verify BlockFrequency and fix entry func entry count. + if (PGOVerifyBFI || PGOFixEntryCount) { + LoopInfo LI{DominatorTree(F)}; + BranchProbabilityInfo NBPI(F, LI); + if (PGOFixEntryCount) + fixFuncEntryCount(Func, LI, NBPI); + if (PGOVerifyBFI) + verifyFuncBFI(Func, LI, NBPI); + } } - M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), - IsCS ? ProfileSummary::PSK_CSInstr - : ProfileSummary::PSK_Instr); // Set function hotness attribute from the profile. // We have to apply these attributes at the end because their presence Index: llvm/test/Transforms/PGOProfile/Inputs/fix_bfi.proftext =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/Inputs/fix_bfi.proftext @@ -0,0 +1,16 @@ +# IR level Instrumentation Flag +:ir +sort_basket +# Func Hash: +91588919538 +# Num Counters: +7 +# Counter Values: +41017879 +31616738 +39637749 +32743703 +13338888 +6990942 +6013544 + Index: llvm/test/Transforms/PGOProfile/fix_bfi.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PGOProfile/fix_bfi.ll @@ -0,0 +1,116 @@ +; RUN: llvm-profdata merge %S/Inputs/fix_bfi.proftext -o %t.profdata +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S -pgo-fix-entry-count=true -pgo-verify-bfi-ratio=2 -pgo-verify-bfi=true 2>&1 | FileCheck %s --check-prefix=FIX-CHECK +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S -pgo-fix-entry-count=false -pgo-verify-bfi-ratio=2 -pgo-verify-bfi=true 2>&1 | FileCheck %s --check-prefix=NOFIX-CHECK + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.basket = type { %struct.arc*, i64, i64 } +%struct.arc = type { i64, %struct.node*, %struct.node*, i32, %struct.arc*, %struct.arc*, i64, i64 } +%struct.node = type { i64, i32, %struct.node*, %struct.node*, %struct.node*, %struct.node*, %struct.arc*, %struct.arc*, %struct.arc*, %struct.arc*, i64, i64, i32, i32 } + +@perm = internal unnamed_addr global [351 x %struct.basket*] zeroinitializer, align 16 + +define dso_local void @sort_basket(i64 %min, i64 %max) { +entry: + %add = add nsw i64 %min, %max + %div = sdiv i64 %add, 2 + %arrayidx = getelementptr inbounds [351 x %struct.basket*], [351 x %struct.basket*]* @perm, i64 0, i64 %div + %0 = load %struct.basket*, %struct.basket** %arrayidx, align 8 + %abs_cost = getelementptr inbounds %struct.basket, %struct.basket* %0, i64 0, i32 2 + %1 = load i64, i64* %abs_cost, align 8 + br label %do.body + +do.body: + %r.0 = phi i64 [ %max, %entry ], [ %r.2, %if.end ] + %l.0 = phi i64 [ %min, %entry ], [ %l.2, %if.end ] + br label %while.cond + +while.cond: + %l.1 = phi i64 [ %l.0, %do.body ], [ %inc, %while.body ] + %arrayidx1 = getelementptr inbounds [351 x %struct.basket*], [351 x %struct.basket*]* @perm, i64 0, i64 %l.1 + %2 = load %struct.basket*, %struct.basket** %arrayidx1, align 8 + %abs_cost2 = getelementptr inbounds %struct.basket, %struct.basket* %2, i64 0, i32 2 + %3 = load i64, i64* %abs_cost2, align 8 + %cmp = icmp sgt i64 %3, %1 + br i1 %cmp, label %while.body, label %while.cond3 + +while.body: + %inc = add nsw i64 %l.1, 1 + br label %while.cond + +while.cond3: + %r.1 = phi i64 [ %r.0, %while.cond ], [ %dec, %while.body7 ] + %arrayidx4 = getelementptr inbounds [351 x %struct.basket*], [351 x %struct.basket*]* @perm, i64 0, i64 %r.1 + %4 = load %struct.basket*, %struct.basket** %arrayidx4, align 8 + %abs_cost5 = getelementptr inbounds %struct.basket, %struct.basket* %4, i64 0, i32 2 + %5 = load i64, i64* %abs_cost5, align 8 + %cmp6 = icmp sgt i64 %1, %5 + br i1 %cmp6, label %while.body7, label %while.end8 + +while.body7: + %dec = add nsw i64 %r.1, -1 + br label %while.cond3 + +while.end8: + %cmp9 = icmp slt i64 %l.1, %r.1 + br i1 %cmp9, label %if.then, label %if.end + +if.then: + %6 = bitcast %struct.basket** %arrayidx1 to i64* + %7 = load i64, i64* %6, align 8 + store %struct.basket* %4, %struct.basket** %arrayidx1, align 8 + %8 = bitcast %struct.basket** %arrayidx4 to i64* + store i64 %7, i64* %8, align 8 + br label %if.end + +if.end: + %cmp14 = icmp sgt i64 %l.1, %r.1 + %not.cmp14 = xor i1 %cmp14, true + %9 = zext i1 %not.cmp14 to i64 + %r.2 = sub i64 %r.1, %9 + %not.cmp1457 = xor i1 %cmp14, true + %inc16 = zext i1 %not.cmp1457 to i64 + %l.2 = add nsw i64 %l.1, %inc16 + %cmp19 = icmp sgt i64 %l.2, %r.2 + br i1 %cmp19, label %do.end, label %do.body + +do.end: + %cmp20 = icmp sgt i64 %r.2, %min + br i1 %cmp20, label %if.then21, label %if.end22 + +if.then21: + call void @sort_basket(i64 %min, i64 %r.2) + br label %if.end22 + +if.end22: + %cmp23 = icmp slt i64 %l.2, %max + %cmp24 = icmp slt i64 %l.2, 51 + %or.cond = and i1 %cmp23, %cmp24 + br i1 %or.cond, label %if.then25, label %if.end26 + +if.then25: + call void @sort_basket(i64 %l.2, i64 %max) + br label %if.end26 + +if.end26: + ret void +} +; FIX-CHECK: VerifyFuncBFI: in sort_basket +; FIX-CHECK: BB entry: Count=13338888 BFI Count=12949310 +; FIX-CHECK: BB do.end: Count=13338888 BFI Count=12949310 +; FIX-CHECK: BB if.end22: Count=13338888 BFI Count=12949310 +; FIX-CHECK: BB if.end26: Count=13338888 BFI Count=12949310 +; FIX-CHECK: In function sort_basket: Num_of_BB = 14 Num_of_zerovalue_BB = 14 Num_of_non_matching_BB = 4 +; NOFIX-CHECK: VerifyFuncBFI: in sort_basket +; NOFIX-CHECK: BB do.body: Count=39637749 BFI Count=40801304 +; NOFIX-CHECK: BB while.cond: Count=80655628 BFI Count=83956530 +; NOFIX-CHECK: BB while.body: Count=41017879 BFI Count=42370585 +; NOFIX-CHECK: BB while.cond3: Count=71254487 BFI Count=73756204 +; NOFIX-CHECK: BB while.body7: Count=31616738 BFI Count=32954900 +; NOFIX-CHECK: BB while.end8: Count=39637749 BFI Count=40801304 +; NOFIX-CHECK: BB if.then: Count=32743703 BFI Count=33739540 +; NOFIX-CHECK: BB if.end: Count=39637749 BFI Count=40801304 +; NOFIX-CHECK: BB if.then25: Count=6013544 BFI Count=6277124 +; NOFIX-CHECK: In function sort_basket: Num_of_BB = 14 Num_of_zerovalue_BB = 14 Num_of_non_matching_BB = 9 +