diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1305,14 +1305,8 @@ if (Optional Probe = extractProbe(*CB)) Factor = Probe->Factor; - uint64_t CallsiteCount = 0; - ErrorOr Weight = getBlockWeight(CB->getParent()); - if (Weight) - CallsiteCount = Weight.get(); - if (CalleeSamples) - CallsiteCount = std::max( - CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor)); - + uint64_t CallsiteCount = + CalleeSamples ? CalleeSamples->getEntrySamples() * Factor : 0; *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor}; return true; } diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-context-order-scc.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-context-order-scc.prof --- a/llvm/test/Transforms/SampleProfile/Inputs/profile-context-order-scc.prof +++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-context-order-scc.prof @@ -1,9 +1,9 @@ -[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]:1467299:11 +[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]:1467299:287864 0: 6 1: 6 3: 287884 15: 23 -[main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20 +[main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:287864 0: 15 1: 15 3: 74946 @@ -19,7 +19,7 @@ 3.1: 10 _Z5funcBi:11 [main:3.1 @ _Z5funcBi]:120:19 0: 19 - 1: 19 _Z8funcLeafi:20 + 1: 287864 _Z8funcLeafi:287864 3: 12 [externalA:17 @ _Z5funcBi]:120:3 0: 3 @@ -29,7 +29,7 @@ 1: 10 [main:3 @ _Z5funcAi]:99:11 0: 10 - 1: 10 _Z8funcLeafi:11 + 1: 287864 _Z8funcLeafi:287864 2: 287864 _Z3fibi:315608 3: 24 [main:3 @ _Z5funcAi:2 @ _Z3fibi]:287864:315608 @@ -39,5 +39,4 @@ [main:3 @ _Z5funcAi:1 @ _Z8funcLeafi:1 @ _Z5funcBi]:1467299:6 0: 6 1: 6 - 3: 287884 - 15: 23 \ No newline at end of file + 3: 6 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-context-order.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-context-order.prof --- a/llvm/test/Transforms/SampleProfile/Inputs/profile-context-order.prof +++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-context-order.prof @@ -1,4 +1,4 @@ -[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]:1467299:11 +[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]:1467299:287864 0: 6 1: 6 3: 287884 @@ -29,10 +29,10 @@ 1: 10 [main:3 @ _Z5funcAi]:99:11 0: 10 - 1: 10 _Z8funcLeafi:11 + 1: 287864 _Z8funcLeafi:287864 2: 287864 _Z3fibi:315608 3: 24 [main:3 @ _Z5funcAi:2 @ _Z3fibi]:287864:315608 0: 362839 1: 6 - 3: 287884 \ No newline at end of file + 3: 287884 diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll @@ -57,9 +57,9 @@ !11 = distinct !DISubprogram(name: "zoo", linkageName: "_Z3zoov", scope: !1, file: !1, line: 24, unit: !0) -; ICP-ALL: remark: test.cc:5:0: '_Z3bazv' inlined into 'test' -; ICP-ALL-NEXT: remark: test.cc:4:0: '_Z3foov' inlined into 'test' +; ICP-ALL: remark: test.cc:4:0: '_Z3foov' inlined into 'test' ; ICP-ALL-NEXT: remark: test.cc:4:0: '_Z3barv' inlined into 'test' +; ICP-ALL-NEXT: remark: test.cc:5:0: '_Z3bazv' inlined into 'test' ; ICP-ALL-NOT: remark ; ICP-HOT: remark: test.cc:4:0: '_Z3foov' inlined into 'test' diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll @@ -18,14 +18,14 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -sample-profile-prioritized-inline -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW ; ; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning hot cutoff can get us the same inlining -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999990 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE ; ; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning cold sample profile inline threshold can get us the same inlining ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE ; ; With new FDO early inliner and tuned cutoff, we can control inlining through size growth tuning knob. -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -sample-profile-inline-limit-min=0 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --allow-empty --check-prefix=INLINE-NEW-LIMIT1 -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -sample-profile-inline-limit-min=10 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW-LIMIT2 +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999990 -sample-profile-inline-limit-min=0 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --allow-empty --check-prefix=INLINE-NEW-LIMIT1 +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999990 -sample-profile-inline-limit-min=10 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW-LIMIT2 ; INLINE-BASE: remark: merged.cpp:14:10: '_Z5funcAi' inlined into 'main' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10 @@ -38,6 +38,7 @@ ; INLINE-NEW-LIMIT1-NOT: remark ; INLINE-NEW-LIMIT2: remark: merged.cpp:33:11: '_Z8funcLeafi' inlined into '_Z5funcBi' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11 +; INLINE-NEW-LIMIT2: remark: merged.cpp:27:11: '_Z8funcLeafi' inlined into '_Z5funcAi' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11; ; INLINE-NEW-LIMIT2-NOT: remark @factor = dso_local global i32 3, align 4, !dbg !0