diff --git a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp --- a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -40,13 +40,20 @@ // Set the maximum number of targets to promote for a single indirect-call // callsite. -static cl::opt +cl::opt MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of promotions for a single indirect " "call callsite")); +// Set the maximum number of targets to fetch profile counts for a single +// indirect-call callsite. +static cl::opt + MaxNumTargets("icp-max-target", cl::init(6), cl::Hidden, cl::ZeroOrMore, + cl::desc("Max number of targets to fetch profile counts for " + "a single indirect call callsite")); + ICallPromotionAnalysis::ICallPromotionAnalysis() { - ValueDataArray = std::make_unique(MaxNumPromotions); + ValueDataArray = std::make_unique(MaxNumTargets); } bool ICallPromotionAnalysis::isPromotionProfitable(uint64_t Count, @@ -88,7 +95,7 @@ const Instruction *I, uint32_t &NumVals, uint64_t &TotalCount, uint32_t &NumCandidates) { bool Res = - getValueProfDataFromInst(*I, IPVK_IndirectCallTarget, MaxNumPromotions, + getValueProfDataFromInst(*I, IPVK_IndirectCallTarget, MaxNumTargets, ValueDataArray.get(), NumVals, TotalCount); if (!Res) { NumCandidates = 0; diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp --- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -65,6 +65,13 @@ ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of promotions for this compilation")); +extern cl::opt MaxNumPromotions; + +static cl::opt ICPMissingPercentThreshold( + "icp-missing-percent-threshold", cl::init(50), cl::Hidden, cl::ZeroOrMore, + cl::desc("The percentage threshold of unpromoted indirect call counts to " + "print missed remark")); + // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped. // For debug use only. static cl::opt @@ -354,8 +361,57 @@ uint64_t TotalCount; auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction( CB, NumVals, TotalCount, NumCandidates); - if (!NumCandidates || - (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount))) + if (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)) { + if (!ICallProfDataRef.empty()) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "ColdCall", CB) + << " Not promoted: cold call"; + }); + } + continue; + } + uint64_t RemainingCount = TotalCount; + for (uint32_t I = 0; I < NumCandidates; I++) { + RemainingCount -= ICallProfDataRef[I].Count; + } + if (ICallProfDataRef.size() > NumCandidates && + RemainingCount * 100 > TotalCount * ICPMissingPercentThreshold) { + ORE.emit([&]() { + OptimizationRemarkMissed R(DEBUG_TYPE, "NotMeetPercentThreshold", CB); + if (NumCandidates == MaxNumPromotions) { + R << "icp-max-prom (=" + << ore::NV("MaxNumPromotions", MaxNumPromotions) + << ") candidates were chosen, "; + } else { + R << ore::NV("NumCandidates", NumCandidates) + << " candidates were chosen and remaining are not profitable, "; + } + R << "but hasn't reached desired coverage " + << ore::NV("ICPMissingPercentThreshold", + 100 - ICPMissingPercentThreshold) + << "\% for a hot indirect call, and " + << ore::NV("Candidates", ICallProfDataRef.size() - NumCandidates) + << " candidates left with remaining count " + << ore::NV("RemainingCount", RemainingCount) << " out of total count " + << ore::NV("TotalCount", TotalCount) << " ("; + for (uint32_t I = NumCandidates; I < ICallProfDataRef.size(); I++) { + if (I != NumCandidates) { + R << ", "; + } + uint64_t Target = ICallProfDataRef[I].Value; + StringRef Name = Symtab->getFuncName(Target); + if (Name.empty()) { + R << ore::NV("Target md4sum", Target); + } else { + R << ore::NV("TargetName", Name); + } + R << ": " << ore::NV("Target count", ICallProfDataRef[I].Count); + } + R << ")"; + return R; + }); + } + if (!NumCandidates) continue; auto PromotionCandidates = getPromotionCandidatesForCallSite( *CB, ICallProfDataRef, TotalCount, NumCandidates); diff --git a/llvm/test/Transforms/PGOProfile/icp_missed_msg.ll b/llvm/test/Transforms/PGOProfile/icp_missed_msg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/icp_missed_msg.ll @@ -0,0 +1,80 @@ +; RUN: opt < %s -pgo-icall-prom -pass-remarks-missed=pgo-icall-prom -S -icp-missing-percent-threshold=1 -profile-summary-hot-count=10 2>& 1 | FileCheck %s +; RUN: opt < %s -passes=pgo-icall-prom -pass-remarks-missed=pgo-icall-prom -S -icp-missing-percent-threshold=1 -profile-summary-hot-count=10 2>& 1 | FileCheck %s + +; CHECK: remark: :0:0: Not promoted: cold call +; CHECK: remark: :0:0: 1 candidates were chosen and remaining are not profitable, but hasn't reached desired coverage 99% for a hot indirect call, and 1 candidates left with remaining count 100 out of total count 1000 (func2: 10) +; CHECK: remark: :0:0: icp-max-prom (=3) candidates were chosen, but hasn't reached desired coverage 99% for a hot indirect call, and 1 candidates left with remaining count 100 out of total count 1000 (func1: 10) + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@foo = common global i32 ()* null, align 8 +@foo2 = common global i32 ()* null, align 8 +@foo3 = common global i32 ()* null, align 8 + +define i32 @func1() { +entry: + ret i32 0 +} + +define i32 @func2() { +entry: + ret i32 1 +} + +define i32 @func3() { +entry: + ret i32 2 +} + +define i32 @func4() { +entry: + ret i32 3 +} + +define i32 @bar() { +entry: + %tmp = load i32 ()*, i32 ()** @foo, align 8 + %call = call i32 %tmp(), !prof !33 + %tmp2 = load i32 ()*, i32 ()** @foo2, align 8 + %call1 = call i32 %tmp2(), !prof !34 + %add = add nsw i32 %call1, %call + %tmp3 = load i32 ()*, i32 ()** @foo3, align 8 + %call2 = call i32 %tmp3(), !prof !35 + %add2 = add nsw i32 %add, %call2 + ret i32 %add2 +} + +!llvm.module.flags = !{!3} + +!3 = !{i32 1, !"ProfileSummary", !4} +!4 = !{!5, !6, !7, !8, !9, !10, !11, !12} +!5 = !{!"ProfileFormat", !"SampleProfile"} +!6 = !{!"TotalCount", i64 0} +!7 = !{!"MaxCount", i64 0} +!8 = !{!"MaxInternalCount", i64 0} +!9 = !{!"MaxFunctionCount", i64 0} +!10 = !{!"NumCounts", i64 1} +!11 = !{!"NumFunctions", i64 1} +!12 = !{!"DetailedSummary", !13} +!13 = !{!14, !15, !16, !17, !18, !19, !19, !20, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29} +!14 = !{i32 10000, i64 0, i32 0} +!15 = !{i32 100000, i64 0, i32 0} +!16 = !{i32 200000, i64 0, i32 0} +!17 = !{i32 300000, i64 0, i32 0} +!18 = !{i32 400000, i64 0, i32 0} +!19 = !{i32 500000, i64 0, i32 0} +!20 = !{i32 600000, i64 0, i32 0} +!21 = !{i32 700000, i64 0, i32 0} +!22 = !{i32 800000, i64 0, i32 0} +!23 = !{i32 900000, i64 0, i32 0} +!24 = !{i32 950000, i64 0, i32 0} +!25 = !{i32 990000, i64 0, i32 0} +!26 = !{i32 999000, i64 0, i32 0} +!27 = !{i32 999900, i64 0, i32 0} +!28 = !{i32 999990, i64 0, i32 0} +!29 = !{i32 999999, i64 0, i32 0} + +!33 = !{!"VP", i32 0, i64 5, i64 -2545542355363006406, i64 5} +!34 = !{!"VP", i32 0, i64 1000, i64 -2545542355363006406, i64 900, i64 -4377547752858689819, i64 10} +!35 = !{!"VP", i32 0, i64 1000, i64 7651369219802541373, i64 500, i64 -4377547752858689819, i64 300, i64 -6929281286627296573, i64 100, i64 -2545542355363006406, i64 10}