Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -1658,7 +1658,7 @@ // unsampled will be treated as cold. uint64_t initialEntryCount = (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) - ? 0 + ? 1 : -1; F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); std::unique_ptr OwnedORE; Index: lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- lib/Transforms/Utils/InlineFunction.cpp +++ lib/Transforms/Utils/InlineFunction.cpp @@ -1464,9 +1464,9 @@ uint64_t newEntryCount = priorEntryCount; // Since CallSiteCount is an estimate, it could exceed the original callee - // count and has to be set to 0 so guard against underflow. + // count and has to be set to 1 so guard against underflow. if (entryDelta < 0 && static_cast(-entryDelta) > priorEntryCount) - newEntryCount = 0; + newEntryCount = 1; else newEntryCount = priorEntryCount + entryDelta; Index: test/Transforms/SampleProfile/Inputs/accurate-samplepgo.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/accurate-samplepgo.prof +++ test/Transforms/SampleProfile/Inputs/accurate-samplepgo.prof @@ -0,0 +1,5 @@ +foo:202:200 + 1: bar:2 + 1: 2 + 2: baz:2 + 2: 200 Index: test/Transforms/SampleProfile/accurate-samplepgo.ll =================================================================== --- test/Transforms/SampleProfile/accurate-samplepgo.ll +++ test/Transforms/SampleProfile/accurate-samplepgo.ll @@ -0,0 +1,97 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/accurate-samplepgo.prof -profile-sample-accurate -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/accurate-samplepgo.prof -profile-sample-accurate -S | FileCheck %s +; +; function entry count needs to be set to 1 instead of 0 no matter during +; initialization or profile update, otherwise we may see "div 0" error +; in the profile update later. In the bug we saw that an inline instance +; in the profile was not inlined by sample profile loader and triggered +; profile update for the outline copy, then "div 0" error happened. +; The test is to make sure no "div 0" error will happen when such profile +; update happen. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@i = common dso_local local_unnamed_addr global i32 0, align 4 + +declare dso_local void @baz() local_unnamed_addr #1 + +; Function Attrs: noinline nounwind uwtable +define dso_local void @bar() local_unnamed_addr #0 !dbg !40 !prof !41 { +entry: + store i32 0, i32* @i, align 4, !dbg !42, !tbaa !43 + tail call void @baz(), !dbg !47, !prof !38 + ret void, !dbg !48 +} + +; Function Attrs: noinline nounwind uwtable +define dso_local void @foo() local_unnamed_addr #0 !dbg !49 !prof !36 { +entry: +; Athough bar is inlined in the profile, bar is not inlined here. It will +; trigger profile update for the outline copy. Make sure the profile update +; will not cause "div 0" error. +; CHECK: call void @bar + tail call void @bar(), !dbg !50, !prof !38 + ret void, !dbg !51 +} + +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} +!llvm.ident = !{!33} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 9.0.0 (trunk 356814)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "1.c", directory: "/usr/local/google/home/wmi/workarea/llvm-r356814/src") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{i32 1, !"ProfileSummary", !7} +!7 = !{!8, !9, !10, !11, !12, !13, !14, !15} +!8 = !{!"ProfileFormat", !"SampleProfile"} +!9 = !{!"TotalCount", i64 200} +!10 = !{!"MaxCount", i64 200} +!11 = !{!"MaxInternalCount", i64 0} +!12 = !{!"MaxFunctionCount", i64 200} +!13 = !{!"NumCounts", i64 1} +!14 = !{!"NumFunctions", i64 2} +!15 = !{!"DetailedSummary", !16} +!16 = !{!17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32} +!17 = !{i32 10000, i64 200, i32 1} +!18 = !{i32 100000, i64 200, i32 1} +!19 = !{i32 200000, i64 200, i32 1} +!20 = !{i32 300000, i64 200, i32 1} +!21 = !{i32 400000, i64 200, i32 1} +!22 = !{i32 500000, i64 200, i32 1} +!23 = !{i32 600000, i64 200, i32 1} +!24 = !{i32 700000, i64 200, i32 1} +!25 = !{i32 800000, i64 200, i32 1} +!26 = !{i32 900000, i64 200, i32 1} +!27 = !{i32 950000, i64 200, i32 1} +!28 = !{i32 990000, i64 200, i32 1} +!29 = !{i32 999000, i64 200, i32 1} +!30 = !{i32 999900, i64 200, i32 1} +!31 = !{i32 999990, i64 200, i32 1} +!32 = !{i32 999999, i64 200, i32 1} +!33 = !{!"clang version 9.0.0 (trunk 356814)"} +!34 = distinct !DISubprogram(name: "baz", scope: !1, file: !1, line: 3, type: !35, scopeLine: 3, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!35 = !DISubroutineType(types: !2) +!36 = !{!"function_entry_count", i64 2} +!37 = !DILocation(line: 4, column: 3, scope: !34) +!38 = !{!"branch_weights", i32 2} +!39 = !DILocation(line: 5, column: 1, scope: !34) +!40 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 8, type: !35, scopeLine: 8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!41 = !{!"function_entry_count", i64 3} +!42 = !DILocation(line: 9, column: 5, scope: !40) +!43 = !{!44, !44, i64 0} +!44 = !{!"int", !45, i64 0} +!45 = !{!"omnipotent char", !46, i64 0} +!46 = !{!"Simple C/C++ TBAA"} +!47 = !DILocation(line: 10, column: 3, scope: !40) +!48 = !DILocation(line: 11, column: 1, scope: !40) +!49 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 13, type: !35, scopeLine: 13, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!50 = !DILocation(line: 14, column: 3, scope: !49) +!51 = !DILocation(line: 15, column: 1, scope: !49) Index: test/Transforms/SampleProfile/section-accurate-samplepgo.ll =================================================================== --- test/Transforms/SampleProfile/section-accurate-samplepgo.ll +++ test/Transforms/SampleProfile/section-accurate-samplepgo.ll @@ -31,9 +31,9 @@ attributes #0 = { "profile-sample-accurate" } ; CHECK: ![[UNKNOWN_ID]] = !{!"function_entry_count", i64 -1} -; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0} +; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 1} ; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"} -; ACCURATE: ![[ZERO_ID]] = !{!"function_entry_count", i64 0} +; ACCURATE: ![[ZERO_ID]] = !{!"function_entry_count", i64 1} ; ACCURATE: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"} !llvm.module.flags = !{!1} !1 = !{i32 1, !"ProfileSummary", !2}