diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -1533,6 +1533,11 @@ auto PrevCount = getEntryCount(); assert(!PrevCount.hasValue() || PrevCount.getType() == Count.getType()); #endif + + auto ImportGUIDs = getImportGUIDs(); + if (S == nullptr && ImportGUIDs.size()) + S = &ImportGUIDs; + MDBuilder MDB(getContext()); setMetadata( LLVMContext::MD_prof, diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1508,8 +1508,6 @@ else newEntryCount = priorEntryCount + entryDelta; - Callee->setEntryCount(newEntryCount); - // During inlining ? if (VMap) { uint64_t cloneEntryCount = priorEntryCount - newEntryCount; @@ -1518,12 +1516,17 @@ if (auto *CI = dyn_cast_or_null(Entry.second)) CI->updateProfWeight(cloneEntryCount, priorEntryCount); } - for (BasicBlock &BB : *Callee) - // No need to update the callsite if it is pruned during inlining. - if (!VMap || VMap->count(&BB)) - for (Instruction &I : BB) - if (CallInst *CI = dyn_cast(&I)) - CI->updateProfWeight(newEntryCount, priorEntryCount); + + if (entryDelta) { + Callee->setEntryCount(newEntryCount); + + for (BasicBlock &BB : *Callee) + // No need to update the callsite if it is pruned during inlining. + if (!VMap || VMap->count(&BB)) + for (Instruction &I : BB) + if (CallInst *CI = dyn_cast(&I)) + CI->updateProfWeight(newEntryCount, priorEntryCount); + } } /// This function inlines the called function into the basic block of the diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-callee-update.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-callee-update.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-callee-update.prof @@ -0,0 +1,10 @@ +test_sample_loader_inline:63067:0 + 1: sample_loader_inlinee:40000 +sample_loader_inlinee:3000:0 + 1: direct_leaf_func:35000 + 11: 3000 +test_cgscc_inline:63067:0 + 1: sample_loader_inlinee:1 +cgscc_inlinee:3000:0 + 1: direct_leaf_func:35000 + 11: 3000 \ No newline at end of file diff --git a/llvm/test/Transforms/SampleProfile/inline-callee-update.ll b/llvm/test/Transforms/SampleProfile/inline-callee-update.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/inline-callee-update.ll @@ -0,0 +1,73 @@ +; Make sure Import GUID list for ThinLTO properly maintained while update function's entry count for inlining + +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/inline-callee-update.prof -S | FileCheck %s + +@y = global i32* ()* null, align 8 +@z = global i32* ()* null, align 8 + +define i32* @sample_loader_inlinee() !dbg !3 { +bb: + %tmp = call i32* @direct_leaf_func(i32* null), !dbg !4 + %cmp = icmp ne i32* %tmp, null + br i1 %cmp, label %then, label %else + +then: ; preds = %bb + %tmp1 = load i32* ()*, i32* ()** @z, align 8, !dbg !5 + %tmp2 = call i32* %tmp1(), !dbg !5 + ret i32* %tmp2 + +else: ; preds = %bb + ret i32* null +} + +define i32* @cgscc_inlinee() !dbg !6 { +bb: + %tmp = call i32* @direct_leaf_func(i32* null), !dbg !7 + %cmp = icmp ne i32* %tmp, null + br i1 %cmp, label %then, label %else + +then: ; preds = %bb + %tmp1 = load i32* ()*, i32* ()** @y, align 8, !dbg !8 + %tmp2 = call i32* %tmp1(), !dbg !8 + ret i32* %tmp2 + +else: ; preds = %bb + ret i32* null +} + +define i32* @test_sample_loader_inline(void ()* %arg) !dbg !9 { +bb: + %tmp = call i32* @sample_loader_inlinee(), !dbg !10 + ret i32* %tmp +} + +define i32* @test_cgscc_inline(void ()* %arg) !dbg !11 { +bb: + %tmp = call i32* @cgscc_inlinee(), !dbg !12 + ret i32* %tmp +} + +declare i32* @direct_leaf_func(i32*) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug) +!1 = !DIFile(filename: "test.cc", directory: "/") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "sample_loader_inlinee", scope: !1, file: !1, line: 11, spFlags: DISPFlagDefinition, unit: !0) +!4 = !DILocation(line: 12, scope: !3) +!5 = !DILocation(line: 13, scope: !3) +!6 = distinct !DISubprogram(name: "cgscc_inlinee", scope: !1, file: !1, line: 31, spFlags: DISPFlagDefinition, unit: !0) +!7 = !DILocation(line: 32, scope: !6) +!8 = !DILocation(line: 33, scope: !6) +!9 = distinct !DISubprogram(name: "test_sample_loader_inline", scope: !1, file: !1, line: 3, spFlags: DISPFlagDefinition, unit: !0) +!10 = !DILocation(line: 4, scope: !9) +!11 = distinct !DISubprogram(name: "test_cgscc_inline", scope: !1, file: !1, line: 20, spFlags: DISPFlagDefinition, unit: !0) +!12 = !DILocation(line: 21, scope: !11) + +; Make sure the ImportGUID stays with entry count metadata for ThinLTO-PreLink +; CHECK: distinct !DISubprogram(name: "sample_loader_inlinee" +; CHECK-NEXT: {!"function_entry_count", i64 1, i64 -9171813444624716006} +; CHECK: distinct !DISubprogram(name: "cgscc_inlinee" +; CHECK-NEXT: !{!"function_entry_count", i64 0, i64 -9171813444624716006}