diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -294,6 +294,9 @@ void setEntryCount(uint64_t Count, ProfileCountType Type = PCT_Real, const DenseSet *Imports = nullptr); + /// A convenience wrapper for updating entry count while keeping ImportGUIDs + void updateEntryCount(uint64_t Count, ProfileCountType Type = PCT_Real); + /// Get the entry count for this function. /// /// Entry count is the number of times the function was executed. diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -177,9 +177,11 @@ *GetAssumptionCache = nullptr, ProfileSummaryInfo *PSI = nullptr, BlockFrequencyInfo *CallerBFI = nullptr, - BlockFrequencyInfo *CalleeBFI = nullptr) + BlockFrequencyInfo *CalleeBFI = nullptr, + bool UpdateProfile = true) : CG(cg), GetAssumptionCache(GetAssumptionCache), PSI(PSI), - CallerBFI(CallerBFI), CalleeBFI(CalleeBFI) {} + CallerBFI(CallerBFI), CalleeBFI(CalleeBFI), + UpdateProfile(UpdateProfile) {} /// If non-null, InlineFunction will update the callgraph to reflect the /// changes it makes. @@ -203,6 +205,10 @@ /// `InlinedCalls` above is used. SmallVector InlinedCallSites; + /// Update profile for callee as well as cloned version. We need to do this + /// for regular inlining, but not for inlining from sample profile loader. + bool UpdateProfile; + void reset() { StaticAllocas.clear(); InlinedCalls.clear(); diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -1544,6 +1544,12 @@ setEntryCount(ProfileCount(Count, Type), Imports); } +void Function::updateEntryCount(uint64_t Count, ProfileCountType Type) { + auto ImportGUIDs = getImportGUIDs(); + setEntryCount(ProfileCount(Count, Type), + ImportGUIDs.size() ? &ImportGUIDs : nullptr); +} + ProfileCount Function::getEntryCount(bool AllowSynthetic) const { MDNode *MD = getMetadata(LLVMContext::MD_prof); if (MD && MD->getOperand(0)) diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -873,7 +873,7 @@ << "incompatible inlining"); return false; } - InlineFunctionInfo IFI(nullptr, &GetAC); + InlineFunctionInfo IFI(nullptr, &GetAC, nullptr, nullptr, nullptr, false); if (InlineFunction(CS, IFI)) { // The call to InlineFunction erases I, so we can't pass it here. ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB) diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1508,8 +1508,6 @@ else newEntryCount = priorEntryCount + entryDelta; - Callee->setEntryCount(newEntryCount); - // During inlining ? if (VMap) { uint64_t cloneEntryCount = priorEntryCount - newEntryCount; @@ -1518,12 +1516,17 @@ if (auto *CI = dyn_cast_or_null(Entry.second)) CI->updateProfWeight(cloneEntryCount, priorEntryCount); } - for (BasicBlock &BB : *Callee) - // No need to update the callsite if it is pruned during inlining. - if (!VMap || VMap->count(&BB)) - for (Instruction &I : BB) - if (CallInst *CI = dyn_cast(&I)) - CI->updateProfWeight(newEntryCount, priorEntryCount); + + if (entryDelta) { + Callee->updateEntryCount(newEntryCount); + + for (BasicBlock &BB : *Callee) + // No need to update the callsite if it is pruned during inlining. + if (!VMap || VMap->count(&BB)) + for (Instruction &I : BB) + if (CallInst *CI = dyn_cast(&I)) + CI->updateProfWeight(newEntryCount, priorEntryCount); + } } /// This function inlines the called function into the basic block of the diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-callee-update.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-callee-update.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-callee-update.prof @@ -0,0 +1,14 @@ +test_sample_loader_inline:63067:0 + 1: sample_loader_inlinee:40000 +sample_loader_inlinee:3000:0 + 1: direct_leaf_func:3000 + 11: 3000 + 2: indirect_leaf_func:35000 + 1: 3000 +test_cgscc_inline:63067:0 + 1: sample_loader_inlinee:1 +cgscc_inlinee:3000:0 + 1: direct_leaf_func:3000 + 11: 3000 + 2: indirect_leaf_func:35000 + 1: 3000 \ No newline at end of file diff --git a/llvm/test/Transforms/SampleProfile/inline-callee-update.ll b/llvm/test/Transforms/SampleProfile/inline-callee-update.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/inline-callee-update.ll @@ -0,0 +1,80 @@ +; facebook T56498894: Make sure Import GUID list for ThinLTO properly maintained while update function's entry count for inlining + +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/inline-callee-update.prof -S | FileCheck %s + +define i32* @sample_loader_inlinee() !dbg !6{ + %1 = call i32* @direct_leaf_func(i32* null), !dbg !7 + %cmp = icmp ne i32* %1, null + br i1 %cmp, label %then, label %else + +then: + %2 = load i32* ()*, i32* ()** @z, align 8, !dbg !8 + %3 = call i32* %2(), !dbg !8 + ret i32* %3 + +else: + ret i32* null +} + +define i32* @cgscc_inlinee() !dbg !14{ + %1 = call i32* @direct_leaf_func(i32* null), !dbg !15 + %cmp = icmp ne i32* %1, null + br i1 %cmp, label %then, label %else + +then: + %2 = load i32* ()*, i32* ()** @y, align 8, !dbg !16 + %3 = call i32* %2(), !dbg !16 + ret i32* %3 + +else: + ret i32* null +} + +define i32* @test_sample_loader_inline(void ()*) !dbg !3 { + %2 = call i32* @sample_loader_inlinee(), !dbg !4 + ret i32* %2 +} + +define i32* @test_cgscc_inline(void ()*) !dbg !11 { + %2 = call i32* @cgscc_inlinee(), !dbg !12 + ret i32* %2 +} + +@y = global i32* ()* null, align 8 +@z = global i32* ()* null, align 8 + +define i32* @direct_leaf_func(i32* %x) !dbg !9 { + ret i32* %x +} + +define i32* @indirect_leaf_func() !dbg !17{ + ret i32* null +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1) +!1 = !DIFile(filename: "test.cc", directory: "/") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "test_sample_loader_inline", scope: !1, file: !1, line: 3, unit: !0) +!4 = !DILocation(line: 4, scope: !3) +!5 = !DILocation(line: 6, scope: !3) +!6 = distinct !DISubprogram(name: "sample_loader_inlinee", scope: !1, file: !1, line: 11, unit: !0) +!7 = !DILocation(line: 12, scope: !6) +!8 = !DILocation(line: 13, scope: !6) +!9 = distinct !DISubprogram(name: "direct_leaf_func", scope: !1, file: !1, line: 11, unit: !0) +!10 = !{i32 2, !"Debug Info Version", i32 3} +!11 = distinct !DISubprogram(name: "test_cgscc_inline", scope: !1, file: !1, line: 20, unit: !0) +!12 = !DILocation(line: 21, scope: !11) +!13 = !DILocation(line: 23, scope: !11) +!14 = distinct !DISubprogram(name: "cgscc_inlinee", scope: !1, file: !1, line: 31, unit: !0) +!15 = !DILocation(line: 32, scope: !14) +!16 = !DILocation(line: 33, scope: !14) +!17 = distinct !DISubprogram(name: "indirect_leaf_func", scope: !1, file: !1, line: 41, unit: !0) + +; Make sure the ImportGUID stays with entry count metadata for ThinLTO-PreLink +; CHECK: distinct !DISubprogram(name: "sample_loader_inlinee" +; CHECK-NEXT: {!"function_entry_count", i64 1, i64 -1383577875507729398} +; CHECK: distinct !DISubprogram(name: "cgscc_inlinee" +; CHECK-NEXT: !{!"function_entry_count", i64 0, i64 -1383577875507729398}