Index: include/llvm/ProfileData/SampleProf.h =================================================================== --- include/llvm/ProfileData/SampleProf.h +++ include/llvm/ProfileData/SampleProf.h @@ -352,17 +352,15 @@ return Result; } - /// Recursively traverses all children, if the corresponding function is - /// not defined in module \p M, and its total sample is no less than - /// \p Threshold, add its corresponding GUID to \p S. Also traverse the - /// BodySamples to add hot CallTarget's GUID to \p S. - void findImportedFunctions(DenseSet &S, const Module *M, - uint64_t Threshold) const { + /// Recursively traverses all children, if the total sample count of the + /// corresponding function is no less than \p Threshold, add its corresponding + /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID + /// to \p S. + void findInlinedFunctions(DenseSet &S, const Module *M, + uint64_t Threshold) const { if (TotalSamples <= Threshold) return; - Function *F = M->getFunction(Name); - if (!F || !F->getSubprogram()) - S.insert(Function::getGUID(Name)); + S.insert(Function::getGUID(Name)); // Import hot CallTargets, which may not be available in IR because full // profile annotation cannot be done until backend compilation in ThinLTO. for (const auto &BS : BodySamples) @@ -374,7 +372,7 @@ } for (const auto &CS : CallsiteSamples) for (const auto &NameFS : CS.second) - NameFS.second.findImportedFunctions(S, M, Threshold); + NameFS.second.findInlinedFunctions(S, M, Threshold); } /// Set the name of the function. Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -200,7 +200,7 @@ const FunctionSamples *findFunctionSamples(const Instruction &I) const; bool inlineCallInstruction(Instruction *I); bool inlineHotFunctions(Function &F, - DenseSet &ImportGUIDs); + DenseSet &InlinedGUIDs); void printEdgeWeight(raw_ostream &OS, Edge E); void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const; void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); @@ -766,12 +766,12 @@ /// it to direct call. Each indirect call is limited with a single target. /// /// \param F function to perform iterative inlining. -/// \param ImportGUIDs a set to be updated to include all GUIDs that come -/// from a different module but inlined in the profiled binary. +/// \param InlinedGUIDs a set to be updated to include all GUIDs that are +/// inlined in the profiled binary. /// /// \returns True if there is any inline happened. bool SampleProfileLoader::inlineHotFunctions( - Function &F, DenseSet &ImportGUIDs) { + Function &F, DenseSet &InlinedGUIDs) { DenseSet PromotedInsns; bool Changed = false; while (true) { @@ -804,9 +804,9 @@ uint64_t Sum; for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) { if (IsThinLTOPreLink) { - FS->findImportedFunctions(ImportGUIDs, F.getParent(), - Samples->getTotalSamples() * - SampleProfileHotThreshold / 100); + FS->findInlinedFunctions(InlinedGUIDs, F.getParent(), + Samples->getTotalSamples() * + SampleProfileHotThreshold / 100); continue; } auto CalleeFunctionName = FS->getName(); @@ -844,8 +844,8 @@ if (inlineCallInstruction(I)) LocalChanged = true; } else if (IsThinLTOPreLink) { - findCalleeFunctionSamples(*I)->findImportedFunctions( - ImportGUIDs, F.getParent(), + findCalleeFunctionSamples(*I)->findInlinedFunctions( + InlinedGUIDs, F.getParent(), Samples->getTotalSamples() * SampleProfileHotThreshold / 100); } } @@ -1455,18 +1455,19 @@ DEBUG(dbgs() << "Line number for the first instruction in " << F.getName() << ": " << getFunctionLoc(F) << "\n"); - DenseSet ImportGUIDs; - Changed |= inlineHotFunctions(F, ImportGUIDs); + DenseSet InlinedGUIDs; + Changed |= inlineHotFunctions(F, InlinedGUIDs); // Compute basic block weights. Changed |= computeBlockWeights(F); if (Changed) { // Add an entry count to the function using the samples gathered at the - // function entry. Also sets the GUIDs that comes from a different - // module but inlined in the profiled binary. This is aiming at making - // the IR match the profiled binary before annotation. - F.setEntryCount(Samples->getHeadSamples() + 1, &ImportGUIDs); + // function entry. + // Sets the GUIDs that are inlined in the profiled binary. This is used + // for ThinLink to make correct liveness analysis, and also make the IR + // match the profiled binary before annotation. + F.setEntryCount(Samples->getHeadSamples() + 1, &InlinedGUIDs); // Compute dominance and loop info needed for propagation. computeDominanceAndLoopInfo(F); Index: test/Transforms/SampleProfile/Inputs/function_metadata.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/function_metadata.prof +++ test/Transforms/SampleProfile/Inputs/function_metadata.prof @@ -6,3 +6,6 @@ 1: 1000 4: foo2:1000 1: 1000 foo3:1000 +test_liveness:10000:0 + 1: foo:1000 + 1: foo_available:1000 Index: test/Transforms/SampleProfile/Inputs/import.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/import.prof +++ test/Transforms/SampleProfile/Inputs/import.prof @@ -1,8 +0,0 @@ -test:10000:0 - 3: foo:1000 - 3: bar:200 - 4: baz:10 - 4: foo1:1000 - 1: 1000 - 4: foo2:1000 - 1: 1000 foo3:1000 Index: test/Transforms/SampleProfile/function_metadata.ll =================================================================== --- test/Transforms/SampleProfile/function_metadata.ll +++ test/Transforms/SampleProfile/function_metadata.ll @@ -1,10 +1,15 @@ -; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file=%S/Inputs/import.prof -S | FileCheck %s +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file=%S/Inputs/function_metadata.prof -S | FileCheck %s ; Tests whether the functions in the inline stack are added to the ; function_entry_count metadata. declare void @foo() +define void @foo_available() !dbg !11 { + ret void +} + +; CHECK: define void @test({{.*}} !prof ![[ENTRY_TEST:[0-9]+]] define void @test(void ()*) !dbg !7 { %2 = alloca void ()* store void ()* %0, void ()** %2 @@ -15,9 +20,20 @@ ret void } +; CHECK: define void @test_liveness({{.*}} !prof ![[ENTRY_TEST_LIVENESS:[0-9]+]] +define void @test_liveness() !dbg !12 { + call void @foo(), !dbg !20 + ret void +} + ; GUIDs of foo, bar, foo1, foo2 and foo3 should be included in the metadata to ; make sure hot inline stacks are imported. -; CHECK: !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905, i64 -7908226060800700466, i64 -2012135647395072713} +; CHECK: ![[ENTRY_TEST]] = !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905, i64 -7908226060800700466, i64 -2012135647395072713} + +; Check GUIDs for both foo and foo_available are included in the metadata to +; make sure the liveness analysis can capture the dependency from test_liveness +; to foo_available. +; CHECK: ![[ENTRY_TEST_LIVENESS]] = !{!"function_entry_count", i64 1, i64 4005816710939881937, i64 6699318081062747564} !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!8, !9} @@ -31,7 +47,10 @@ !8 = !{i32 2, !"Dwarf Version", i32 4} !9 = !{i32 1, !"Debug Info Version", i32 3} !10 = !{!"clang version 3.5 "} +!11 = distinct !DISubprogram(name: "foo_available", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, variables: !2) +!12 = distinct !DISubprogram(name: "test_liveness", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, variables: !2) !15 = !DILexicalBlockFile(discriminator: 1, file: !1, scope: !7) !17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7) !18 = !DILocation(line: 10, scope: !17) !19 = !DILocation(line: 11, scope: !17) +!20 = !DILocation(line: 8, scope: !12) Index: test/Transforms/SampleProfile/import.ll =================================================================== --- test/Transforms/SampleProfile/import.ll +++ test/Transforms/SampleProfile/import.ll @@ -1,37 +0,0 @@ -; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file=%S/Inputs/import.prof -S | FileCheck %s - -; Tests whether the functions in the inline stack are added to the -; function_entry_count metadata. - -declare void @foo() - -define void @test(void ()*) !dbg !7 { - %2 = alloca void ()* - store void ()* %0, void ()** %2 - %3 = load void ()*, void ()** %2 - ; CHECK: call {{.*}}, !prof ![[PROF:[0-9]+]] - call void @foo(), !dbg !18 - call void %3(), !dbg !19 - ret void -} - -; GUIDs of foo, bar, foo1, foo2 and foo3 should be included in the metadata to -; make sure hot inline stacks are imported. -; CHECK: !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905, i64 -7908226060800700466, i64 -2012135647395072713} - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!8, !9} -!llvm.ident = !{!10} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) -!1 = !DIFile(filename: "calls.cc", directory: ".") -!2 = !{} -!6 = !DISubroutineType(types: !2) -!7 = distinct !DISubprogram(name: "test", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, variables: !2) -!8 = !{i32 2, !"Dwarf Version", i32 4} -!9 = !{i32 1, !"Debug Info Version", i32 3} -!10 = !{!"clang version 3.5 "} -!15 = !DILexicalBlockFile(discriminator: 1, file: !1, scope: !7) -!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7) -!18 = !DILocation(line: 10, scope: !17) -!19 = !DILocation(line: 11, scope: !17)