diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -260,7 +260,8 @@ InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], - uint32_t &ActualNumValueData, uint64_t &TotalC); + uint32_t &ActualNumValueData, uint64_t &TotalC, + bool GetZeroCntValue = false); inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; } diff --git a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp --- a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -45,7 +45,7 @@ // Set the maximum number of targets to promote for a single indirect-call // callsite. -static cl::opt +cl::opt MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of promotions for a single indirect " "call callsite")); diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -982,7 +982,8 @@ InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], - uint32_t &ActualNumValueData, uint64_t &TotalC) { + uint32_t &ActualNumValueData, uint64_t &TotalC, + bool GetZeroCntValue) { MDNode *MD = Inst.getMetadata(LLVMContext::MD_prof); if (!MD) return false; @@ -1009,7 +1010,7 @@ // Get total count ConstantInt *TotalCInt = mdconst::dyn_extract(MD->getOperand(2)); - if (!TotalCInt) + if (!TotalCInt && !GetZeroCntValue) return false; TotalC = TotalCInt->getZExtValue(); @@ -1021,7 +1022,7 @@ ConstantInt *Value = mdconst::dyn_extract(MD->getOperand(I)); ConstantInt *Count = mdconst::dyn_extract(MD->getOperand(I + 1)); - if (!Value || !Count) + if (!Value || (!Count && !GetZeroCntValue)) return false; ValueData[ActualNumValueData].Value = Value->getZExtValue(); ValueData[ActualNumValueData].Count = Count->getZExtValue(); diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -182,6 +182,8 @@ "by inlining from sample profile loader."), cl::Hidden); +extern cl::opt MaxNumPromotions; + namespace { using BlockWeightMap = DenseMap; @@ -1053,6 +1055,75 @@ } } +/// If the profile count for the promotion candidate \p Candidate is 0, +/// it means \p Candidate has already been promoted for \p Inst. +static bool isPromotedBefore(const Instruction &Inst, StringRef Candidate) { + uint32_t NumVals = 0; + uint64_t TotalCount = 0; + std::unique_ptr ValueData = + std::make_unique(MaxNumPromotions); + bool Valid = + getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions, + ValueData.get(), NumVals, TotalCount, true); + if (Valid) { + for (uint32_t I = 0; I < NumVals; I++) { + // If the promotion candidate has 0 count in the metadata, it + // means the candidate has been promoted for this indirect call. + if (ValueData[I].Value == Function::getGUID(Candidate) && + ValueData[I].Count == 0) + return true; + } + } + return false; +} + +/// Update indirect call target profile metadata for \p Inst. If \p Total +/// is given, set TotalCount of call targets counts to \p Total, otherwise +/// keep the original value in metadata. +static void +updateIDTMetaData(Instruction &Inst, + const SmallVectorImpl &CallTargets, + uint64_t Total = 0) { + DenseMap ValueCountMap; + + uint32_t NumVals = 0; + uint64_t TotalCount = 0; + std::unique_ptr ValueData = + std::make_unique(MaxNumPromotions); + bool Valid = + getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions, + ValueData.get(), NumVals, TotalCount, true); + if (Valid) { + for (uint32_t I = 0; I < NumVals; I++) + ValueCountMap[ValueData[I].Value] = ValueData[I].Count; + } + + for (const auto &Data : CallTargets) { + auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count); + if (Pair.second) + continue; + // Update existing profile count of the call target if it is not 0. + // If it is 0, the call target has been promoted so keep it as 0. + if (Pair.first->second != 0) + Pair.first->second = Data.Count; + } + + SmallVector NewCallTargets; + for (const auto &ValueCount : ValueCountMap) { + NewCallTargets.emplace_back( + InstrProfValueData{ValueCount.first, ValueCount.second}); + } + llvm::sort(NewCallTargets, + [](const InstrProfValueData &L, const InstrProfValueData &R) { + if (L.Count != R.Count) + return L.Count > R.Count; + return L.Value > R.Value; + }); + annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst, + NewCallTargets, Total ? Total : TotalCount, + IPVK_IndirectCallTarget, NewCallTargets.size()); +} + /// Iteratively inline hot callsites of a function. /// /// Iteratively traverse all callsites of the function \p F, and find if @@ -1129,6 +1200,14 @@ if (!callsiteIsHot(FS, PSI)) continue; + auto CalleeFunctionName = FS->getFuncName(); + auto R = SymbolMap.find(CalleeFunctionName); + if (R == SymbolMap.end() || !R->getValue()) + continue; + + if (isPromotedBefore(*I, R->getValue()->getName())) + continue; + const char *Reason = "Callee function not available"; // R->getValue() != &F is to prevent promoting a recursive call. // If it is a recursive call, we do not inline it as it could bloat @@ -1136,14 +1215,15 @@ // clone the caller first, and inline the cloned caller if it is // recursive. As llvm does not inline recursive calls, we will // simply ignore it instead of handling it explicitly. - auto CalleeFunctionName = FS->getFuncName(); - auto R = SymbolMap.find(CalleeFunctionName); - if (R != SymbolMap.end() && R->getValue() && - !R->getValue()->isDeclaration() && + if (!R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && R->getValue()->hasFnAttribute("use-sample-profile") && R->getValue() != &F && isLegalToPromote(*I, R->getValue(), &Reason)) { + SmallVector SortedCallTargets = { + InstrProfValueData{Function::getGUID(R->getValue()->getName()), + 0}}; + updateIDTMetaData(*I, SortedCallTargets); uint64_t C = FS->getEntrySamples(); auto &DI = pgo::promoteIndirectCall(*I, R->getValue(), C, Sum, false, ORE); @@ -1656,11 +1736,9 @@ continue; SmallVector SortedCallTargets = GetSortedValueDataFromCallTargets(T.get()); - uint64_t Sum; + uint64_t Sum = 0; findIndirectCallFunctionSamples(I, Sum); - annotateValueSite(*I.getParent()->getParent()->getParent(), I, - SortedCallTargets, Sum, IPVK_IndirectCallTarget, - SortedCallTargets.size()); + updateIDTMetaData(I, SortedCallTargets, Sum); } else if (!isa(&I)) { I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights( diff --git a/llvm/test/Transforms/SampleProfile/Inputs/norepeated-icp.prof b/llvm/test/Transforms/SampleProfile/Inputs/norepeated-icp.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/norepeated-icp.prof @@ -0,0 +1,13 @@ +_Z3foov:225715:1 + 2: 5553 + 3: 5391 + 1: _Z3goov:5860 + 1: 5279 + 2: 5279 + 1: _Z3hoov:5860 + 1: 5000 +_Z3goov:5860:1 + 1: 5279 + 2: 5279 + 1: _Z3hoov:5860 + 1: 5000 diff --git a/llvm/test/Transforms/SampleProfile/indirect-call.ll b/llvm/test/Transforms/SampleProfile/indirect-call.ll --- a/llvm/test/Transforms/SampleProfile/indirect-call.ll +++ b/llvm/test/Transforms/SampleProfile/indirect-call.ll @@ -202,7 +202,7 @@ ; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 3457, i64 9191153033785521275, i64 2059, i64 -1069303473483922844, i64 1398} ; CHECK: ![[BR1]] = !{!"branch_weights", i32 4000, i32 4000} ; CHECK: ![[BR2]] = !{!"branch_weights", i32 3000, i32 1000} -; CHECK: ![[VP]] = !{!"VP", i32 0, i64 8000, i64 -6391416044382067764, i64 1000} +; CHECK: ![[VP]] = !{!"VP", i32 0, i64 8000, i64 -6391416044382067764, i64 1000, i64 7476224446746900038, i64 0, i64 925324185419832389, i64 0} ; CHECK: ![[BR3]] = !{!"branch_weights", i32 1, i32 0} !6 = distinct !DISubprogram(name: "test_inline", scope: !1, file: !1, line: 6, unit: !0) !7 = !DILocation(line: 7, scope: !6) diff --git a/llvm/test/Transforms/SampleProfile/norepeated-icp.ll b/llvm/test/Transforms/SampleProfile/norepeated-icp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/norepeated-icp.ll @@ -0,0 +1,74 @@ +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/norepeated-icp.prof -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str = private unnamed_addr constant [5 x i8] c"hoo\0A\00", align 1 +@p = dso_local global void ()* null, align 8 +@str = private unnamed_addr constant [4 x i8] c"hoo\00", align 1 + +; Function Attrs: uwtable mustprogress +define dso_local void @_Z3hoov() #0 !dbg !7 { +entry: + %puts = call i32 @puts(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @str, i64 0, i64 0)), !dbg !9 + ret void, !dbg !10 +} + +; Function Attrs: nofree nounwind +declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) #1 + +; Function Attrs: uwtable mustprogress +define dso_local void @_Z3goov() #0 !dbg !11 { +entry: + %0 = load void ()*, void ()** @p, align 8, !dbg !12, !tbaa !13 + call void %0(), !dbg !17 + ret void, !dbg !18 +} + +; Check the indirect call in _Z3goov inlined into _Z3foov won't be indirect +; call promoted for _Z3hoov twice in _Z3foov. +; CHECK-LABEL: @_Z3foov( +; CHECK: icmp eq void ()* {{.*}} @_Z3hoov +; CHECK-NOT: icmp eq void ()* {{.*}} @_Z3hoov +; CHECK: ret void + +; Function Attrs: uwtable mustprogress +define dso_local void @_Z3foov() #0 !dbg !19 { +entry: + call void @_Z3goov(), !dbg !20 + ret void, !dbg !21 +} + +; Function Attrs: nofree nounwind +declare noundef i32 @puts(i8* nocapture noundef readonly) #2 + +attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" } +attributes #1 = { nofree nounwind "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nofree nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "1.cc", directory: "") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!""} +!7 = distinct !DISubprogram(name: "hoo", linkageName: "_Z3hoov", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 2, column: 3, scope: !7) +!10 = !DILocation(line: 3, column: 1, scope: !7) +!11 = distinct !DISubprogram(name: "goo", linkageName: "_Z3goov", scope: !1, file: !1, line: 6, type: !8, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!12 = !DILocation(line: 7, column: 5, scope: !11) +!13 = !{!14, !14, i64 0} +!14 = !{!"any pointer", !15, i64 0} +!15 = !{!"omnipotent char", !16, i64 0} +!16 = !{!"Simple C++ TBAA"} +!17 = !DILocation(line: 7, column: 3, scope: !11) +!18 = !DILocation(line: 8, column: 1, scope: !11) +!19 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!20 = !DILocation(line: 11, column: 3, scope: !19) +!21 = !DILocation(line: 12, column: 3, scope: !19)