diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -755,14 +755,8 @@ updateIDTMetaData(Instruction &Inst, const SmallVectorImpl &CallTargets, uint64_t Sum) { - assert((Sum != 0 || (CallTargets.size() == 1 && - CallTargets[0].Count == NOMORE_ICP_MAGICNUM)) && - "If sum is 0, assume only one element in CallTargets with count " - "being NOMORE_ICP_MAGICNUM"); - uint32_t NumVals = 0; // OldSum is the existing total count in the value profile data. - // It will be replaced by Sum if Sum is not 0. uint64_t OldSum = 0; std::unique_ptr ValueData = std::make_unique(MaxNumPromotions); @@ -771,34 +765,44 @@ ValueData.get(), NumVals, OldSum, true); DenseMap ValueCountMap; - // Initialize ValueCountMap with existing value profile data. - if (Valid) { - for (uint32_t I = 0; I < NumVals; I++) - ValueCountMap[ValueData[I].Value] = ValueData[I].Count; - } - - for (const auto &Data : CallTargets) { - auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count); - if (Pair.second) - continue; - // Whenever the count is NOMORE_ICP_MAGICNUM for a value, keep it - // in the ValueCountMap. If both the count in CallTargets and the - // count in ValueCountMap is not NOMORE_ICP_MAGICNUM, keep the - // count in CallTargets. - if (Pair.first->second != NOMORE_ICP_MAGICNUM && - Data.Count == NOMORE_ICP_MAGICNUM) { + if (Sum == 0) { + assert((CallTargets.size() == 1 && + CallTargets[0].Count == NOMORE_ICP_MAGICNUM) && + "If sum is 0, assume only one element in CallTargets " + "with count being NOMORE_ICP_MAGICNUM"); + // Initialize ValueCountMap with existing value profile data. + if (Valid) { + for (uint32_t I = 0; I < NumVals; I++) + ValueCountMap[ValueData[I].Value] = ValueData[I].Count; + } + auto Pair = + ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count); + // If the target already exists in value profile, decrease the total + // count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM. + if (!Pair.second) { OldSum -= Pair.first->second; Pair.first->second = NOMORE_ICP_MAGICNUM; - } else if (Pair.first->second == NOMORE_ICP_MAGICNUM && - Data.Count != NOMORE_ICP_MAGICNUM) { + } + Sum = OldSum; + } else { + // Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM + // counts in the value profile. + if (Valid) { + for (uint32_t I = 0; I < NumVals; I++) { + if (ValueData[I].Count == NOMORE_ICP_MAGICNUM) + ValueCountMap[ValueData[I].Value] = ValueData[I].Count; + } + } + + for (const auto &Data : CallTargets) { + auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count); + if (Pair.second) + continue; + // The target represented by Data.Value has already been promoted. + // Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease + // Sum by Data.Count. assert(Sum >= Data.Count && "Sum should never be less than Data.Count"); Sum -= Data.Count; - } else if (Pair.first->second != NOMORE_ICP_MAGICNUM && - Data.Count != NOMORE_ICP_MAGICNUM) { - // Sum will be used in this case. Although the existing count - // for the current value in value profile will be overriden, - // no need to update OldSum. - Pair.first->second = Data.Count; } } @@ -818,8 +822,7 @@ uint32_t MaxMDCount = std::min(NewCallTargets.size(), static_cast(MaxNumPromotions)); annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst, - NewCallTargets, Sum ? Sum : OldSum, IPVK_IndirectCallTarget, - MaxMDCount); + NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount); } /// Attempt to promote indirect call and also inline the promoted call. diff --git a/llvm/test/Transforms/SampleProfile/Inputs/norepeated-icp-3.prof b/llvm/test/Transforms/SampleProfile/Inputs/norepeated-icp-3.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/norepeated-icp-3.prof @@ -0,0 +1,6 @@ +_Z3foov:225715:1 + 2: 5553 + 3: 5391 + 1: _Z3goov:5860 + 1: 5279 _Z3hoov:5860 _Z3moov:210 + 2: 5279 diff --git a/llvm/test/Transforms/SampleProfile/norepeated-icp-3.ll b/llvm/test/Transforms/SampleProfile/norepeated-icp-3.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/norepeated-icp-3.ll @@ -0,0 +1,71 @@ +; RUN: opt < %s -passes=sample-profile -sample-profile-icp-max-prom=4 -sample-profile-file=%S/Inputs/norepeated-icp-3.prof -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str = private unnamed_addr constant [5 x i8] c"hoo\0A\00", align 1 +@p = dso_local global void ()* null, align 8 +@str = private unnamed_addr constant [4 x i8] c"hoo\00", align 1 + +; Function Attrs: nofree nounwind +declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) #1 + +; Function Attrs: uwtable mustprogress +define dso_local void @_Z3goov() #0 !dbg !11 { +entry: + %0 = load void ()*, void ()** @p, align 8, !dbg !12, !tbaa !13 + call void %0(), !dbg !17, !prof !22 + ret void, !dbg !18 +} + +; After the indirect call in _Z3goov is inlined into _Z3foov, it will be +; annotated with new inline instance profile. The existing value profile +; associated with the indirect call should be dropped except those values +; wth NOMORE_ICP_MAGICNUM magic number indicating promoted targets. +; CHECK-LABEL: @_Z3foov( +; CHECK: call void %0(), {{.*}} !prof ![[PROF_ID:[0-9]+]] +; CHECK-NEXT: ret void + +; Function Attrs: uwtable mustprogress +define dso_local void @_Z3foov() #0 !dbg !19 { +entry: + call void @_Z3goov(), !dbg !20 + ret void, !dbg !21 +} + +; Function Attrs: nofree nounwind +declare noundef i32 @puts(i8* nocapture noundef readonly) #2 + +attributes #0 = { uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-sample-profile" "use-soft-float"="false" } +attributes #1 = { nofree nounwind "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nofree nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "1.cc", directory: "") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!""} +!8 = !DISubroutineType(types: !2) +!11 = distinct !DISubprogram(name: "goo", linkageName: "_Z3goov", scope: !1, file: !1, line: 6, type: !8, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!12 = !DILocation(line: 7, column: 5, scope: !11) +!13 = !{!14, !14, i64 0} +!14 = !{!"any pointer", !15, i64 0} +!15 = !{!"omnipotent char", !16, i64 0} +!16 = !{!"Simple C++ TBAA"} +!17 = !DILocation(line: 7, column: 3, scope: !11) +!18 = !DILocation(line: 8, column: 1, scope: !11) +!19 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!20 = !DILocation(line: 11, column: 3, scope: !19) +!21 = !DILocation(line: 12, column: 3, scope: !19) +; The original value 125292384912345234234 and its count 8000 should +; be dropped when the indirect call is annotated with new profile. +; The original value -7383239051784516332 and its count -1 should be kept +; because -1 is NOMORE_ICP_MAGICNUM. +; CHECK: ![[PROF_ID]] = !{!"VP", i32 0, i64 5860, i64 -7383239051784516332, i64 -1, i64 -7701940972712279918, i64 5860} +!22 = !{!"VP", i32 0, i64 8000, i64 -7383239051784516332, i64 -1, i64 125292384912345234234, i64 8000}