diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h --- a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h @@ -31,6 +31,10 @@ bool EmitRemarks); std::unique_ptr getAdviceImpl(CallBase &CB) override; bool areReplayRemarksLoaded() const { return HasReplayRemarks; } + bool hasRemarksForFunction(Function &F) const { + return (Scope == ReplayInlineScope::Module) || + CallersToReplay.contains(F.getName()); + } private: std::unique_ptr OriginalAdvisor; diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -45,6 +45,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/ReplayInlineAdvisor.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" @@ -488,7 +489,7 @@ bool ProfAccForSymsInList; // External inline advisor used to replay inline decision from remarks. - std::unique_ptr ExternalInlineAdvisor; + std::unique_ptr ExternalInlineAdvisor; // A pseudo probe helper to correlate the imported sample counts. std::unique_ptr ProbeManager; @@ -1073,16 +1074,21 @@ for (auto &I : BB.getInstList()) { const FunctionSamples *FS = nullptr; if (auto *CB = dyn_cast(&I)) { - if (!isa(I) && (FS = findCalleeFunctionSamples(*CB))) { - assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && - "GUIDToFuncNameMap has to be populated"); - AllCandidates.push_back(CB); - if (FS->getEntrySamples() > 0 || ProfileIsCS) - LocalNotInlinedCallSites.try_emplace(CB, FS); - if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) - Hot = true; - else if (shouldInlineColdCallee(*CB)) - ColdCandidates.push_back(CB); + if (!isa(I)) { + if ((FS = findCalleeFunctionSamples(*CB))) { + assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && + "GUIDToFuncNameMap has to be populated"); + AllCandidates.push_back(CB); + if (FS->getEntrySamples() > 0 || ProfileIsCS) + LocalNotInlinedCallSites.try_emplace(CB, FS); + if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) + Hot = true; + else if (shouldInlineColdCallee(*CB)) + ColdCandidates.push_back(CB); + } else if (ExternalInlineAdvisor && + ExternalInlineAdvisor->hasRemarksForFunction(F)) { + AllCandidates.push_back(CB); + } } } } @@ -1129,9 +1135,18 @@ LocalChanged = true; } } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { - findExternalInlineCandidate(findCalleeFunctionSamples(*I), InlinedGUIDs, - SymbolMap, - PSI->getOrCompHotCountThreshold()); + // If replay wants to inline out of module function, make sure + // it is imported + if (ExternalInlineAdvisor && + ExternalInlineAdvisor->hasRemarksForFunction(F)) { + if (shouldInlineCandidate(Candidate)) + InlinedGUIDs.insert( + FunctionSamples::getGUID(I->getCalledFunction()->getName())); + } else { + findExternalInlineCandidate(findCalleeFunctionSamples(*I), + InlinedGUIDs, SymbolMap, + PSI->getOrCompHotCountThreshold()); + } } } Changed |= LocalChanged; @@ -1846,7 +1861,7 @@ } if (FAM && !ProfileInlineReplayFile.empty()) { - ExternalInlineAdvisor = getReplayInlineAdvisor( + ExternalInlineAdvisor = std::make_unique( M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr, ProfileInlineReplayFile, ProfileInlineReplayScope, /*EmitRemarks=*/false); } diff --git a/llvm/test/Transforms/SampleProfile/Inputs/function_metadata_replay.txt b/llvm/test/Transforms/SampleProfile/Inputs/function_metadata_replay.txt new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/function_metadata_replay.txt @@ -0,0 +1 @@ +remark: calls.cc:8:0: 'foo' inlined into 'test_liveness' to match profiling context with (cost=0, threshold=337) at callsite test_liveness:1:0; \ No newline at end of file diff --git a/llvm/test/Transforms/SampleProfile/function_metadata.ll b/llvm/test/Transforms/SampleProfile/function_metadata.ll --- a/llvm/test/Transforms/SampleProfile/function_metadata.ll +++ b/llvm/test/Transforms/SampleProfile/function_metadata.ll @@ -2,6 +2,13 @@ ; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/function_metadata.compact.afdo -S | FileCheck %s ; RUN: opt < %s -passes='pseudo-probe,thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/pseudo-probe-func-metadata.prof -S | FileCheck %s +; Validate that with replay in effect, we import call sites even if they are below the threshold +; Baseline import decisions +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -profile-summary-hot-count=2000 -profile-file=%S/Inputs/function_metadata.prof -S | FileCheck -check-prefix=THRESHOLD %s +; With replay decisions +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -profile-summary-hot-count=2000 -profile-file=%S/Inputs/function_metadata.prof -S -sample-profile-inline-replay-scope=Module -sample-profile-inline-replay=%S/Inputs/function_metadata_replay.txt | FileCheck -check-prefix=THRESHOLD-REPLAY %s + + ; Tests whether the functions in the inline stack are added to the ; function_entry_count metadata. @@ -16,6 +23,8 @@ } ; CHECK: define void @test({{.*}} !prof ![[ENTRY_TEST:[0-9]+]] +; THRESHOLD: define void @test({{.*}} !prof ![[ENTRY_THRESHOLD:[0-9]+]] +; THRESHOLD-REPLAY: define void @test({{.*}} !prof ![[ENTRY_REPLAY_TEST:[0-9]+]] define void @test(void ()*) #0 !dbg !7 { %2 = alloca void ()* store void ()* %0, void ()** %2 @@ -27,6 +36,8 @@ } ; CHECK: define void @test_liveness({{.*}} !prof ![[ENTRY_TEST_LIVENESS:[0-9]+]] +; THRESHOLD: define void @test_liveness({{.*}} !prof ![[ENTRY_THRESHOLD:[0-9]+]] +; THRESHOLD-REPLAY: define void @test_liveness({{.*}} !prof ![[ENTRY_REPLAY_TEST_LIVENESS:[0-9]+]] define void @test_liveness() #0 !dbg !12 { call void @foo(), !dbg !20 ret void @@ -43,6 +54,13 @@ ; to bar. bar_available should not be included as it's within the same module. ; CHECK: ![[ENTRY_TEST_LIVENESS]] = !{!"function_entry_count", i64 1, i64 6699318081062747564, i64 -2012135647395072713, i64 -1522495160813492905} +; With high threshold, nothing should be imported +; THRESHOLD: ![[ENTRY_THRESHOLD]] = !{!"function_entry_count", i64 1} + +; With high threshold and replay, sites that are in the replay should be imported +; THRESHOLD-REPLAY: ![[ENTRY_REPLAY_TEST]] = !{!"function_entry_count", i64 1} +; THRESHOLD-REPLAY: ![[ENTRY_REPLAY_TEST_LIVENESS]] = !{!"function_entry_count", i64 1, i64 6699318081062747564} + attributes #0 = {"use-sample-profile"} !llvm.dbg.cu = !{!0}