Index: lib/Transforms/IPO/FunctionImport.cpp =================================================================== --- lib/Transforms/IPO/FunctionImport.cpp +++ lib/Transforms/IPO/FunctionImport.cpp @@ -187,6 +187,21 @@ using EdgeInfo = std::tuple; +static ValueInfo +updateValueInfoForIndirectCalls(const ModuleSummaryIndex &Index, ValueInfo VI) { + if (!VI.getSummaryList().empty()) + return VI; + // For SamplePGO, the indirect call targets for local functions will + // have its original name annotated in profile. We try to find the + // corresponding PGOFuncName as the GUID. + // FIXME: Consider updating the edges in the graph after building + // it, rather than needing to perform this mapping on each walk. + auto GUID = Index.getGUIDFromOriginalID(VI.getGUID()); + if (GUID == 0) + return nullptr; + return Index.getValueInfo(GUID); +} + /// Compute the list of functions to import for a given caller. Mark these /// imported functions and the symbols they reference in their source module as /// exported from their source module. @@ -201,17 +216,9 @@ DEBUG(dbgs() << " edge -> " << VI.getGUID() << " Threshold:" << Threshold << "\n"); - if (VI.getSummaryList().empty()) { - // For SamplePGO, the indirect call targets for local functions will - // have its original name annotated in profile. We try to find the - // corresponding PGOFuncName as the GUID. - auto GUID = Index.getGUIDFromOriginalID(VI.getGUID()); - if (GUID == 0) - continue; - VI = Index.getValueInfo(GUID); - if (!VI) - continue; - } + VI = updateValueInfoForIndirectCalls(Index, VI); + if (!VI) + continue; if (DefinedGVSummaries.count(VI.getGUID())) { DEBUG(dbgs() << "ignored! Target already in destination module.\n"); @@ -461,6 +468,17 @@ for (auto &S : VI.getSummaryList()) if (S->isLive()) return; + // FIXME: If we knew which edges were created for indirect call profiles, + // we could skip them here. Any that are live should be reached via + // other edges, e.g. reference edges. Otherwise, using a profile collected + // on a slightly different binary might provoke preserving, importing + // and ultimately promoting calls to functions not linked into this + // binary, which increases the binary size unnecessarily. Note that + // if this code changes, the importer needs to change so that edges + // to functions marked dead are skipped. + VI = updateValueInfoForIndirectCalls(Index, VI); + if (!VI) + return; for (auto &S : VI.getSummaryList()) S->setLive(true); ++LiveSymbols; Index: test/Transforms/PGOProfile/Inputs/thinlto_samplepgo_icp2a.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/Inputs/thinlto_samplepgo_icp2a.ll @@ -0,0 +1,21 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: norecurse nounwind readnone uwtable +define internal void @_ZL3foov() #1 { +entry: + call void @_ZL3barv() + ret void +} + +declare void @_ZL3barv() + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3} +!llvm.ident = !{!31} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 297016)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "b.cc", directory: "/ssd/llvm/abc/small") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!31 = !{!"clang version 5.0.0 (trunk 297016)"} Index: test/Transforms/PGOProfile/Inputs/thinlto_samplepgo_icp2b.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/Inputs/thinlto_samplepgo_icp2b.ll @@ -0,0 +1,28 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @_ZL3barv() #1 { +entry: + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + call void @dummy() + ret void +} + +define internal void @dummy() { +entry: + ret void +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3} +!llvm.ident = !{!31} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 297016)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "c.cc", directory: "/ssd/llvm/abc/small") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!31 = !{!"clang version 5.0.0 (trunk 297016)"} Index: test/Transforms/PGOProfile/thinlto_samplepgo_icp2.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/thinlto_samplepgo_icp2.ll @@ -0,0 +1,76 @@ +; Checks if indirect calls to static target functions that are actually +; dead in the new binary target (due to a profile collected from a slightly +; different binary) are properly traversed during ThinLTO liveness analysis. +; If the liveness analysis is changed to ignore indirect edges and the +; importer is changed to check liveness before importing, this test will +; need adjustment (in that case _ZL3foov should not be imported/promoted, +; and _ZL3barv can be internalized/removed). + +; Do setup work for all below tests: generate bitcode and combined index +; RUN: opt -module-summary %s -o %t.bc +; RUN: opt -module-summary %p/Inputs/thinlto_samplepgo_icp2a.ll -o %t2a.bc +; RUN: opt -module-summary %p/Inputs/thinlto_samplepgo_icp2b.ll -o %t2b.bc + +; Use -import-instr-limit=5 so that we don't import _ZL3barv, which would +; hide the problem. +; RUN: llvm-lto2 run -save-temps -import-instr-limit=5 -o %t3 %t.bc %t2a.bc %t2b.bc -r %t.bc,fptr,plx -r %t.bc,main,plx -r %t2a.bc,_ZL3barv,l -r %t2b.bc,_ZL3barv,pl -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS2 +; IMPORTS2-NOT: Import _ZL3barv +; IMPORTS2: Import _ZL3foov.llvm.0 +; IMPORTS2-NOT: Import _ZL3barv +; RUN: llvm-nm %t3.2 | FileCheck %s --check-prefix=NM +; NM: _ZL3barv +; RUN: llvm-dis < %t3.2.2.internalize.bc | FileCheck %s --check-prefix=INTERNALIZE +; INTERNALIZE: define void @_ZL3barv + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@fptr = local_unnamed_addr global void ()* null, align 8 + +; Function Attrs: norecurse uwtable +define i32 @main() local_unnamed_addr #0 !prof !34 { +entry: + %0 = load void ()*, void ()** @fptr, align 8 +; ICALL-PROM: br i1 %{{[0-9]+}}, label %if.true.direct_targ, label %if.false.orig_indirect + tail call void %0(), !prof !40 + ret i32 0 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3,!4} +!llvm.ident = !{!31} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 297016)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "main.cc", directory: ".") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"ProfileSummary", !5} +!5 = !{!6, !7, !8, !9, !10, !11, !12, !13} +!6 = !{!"ProfileFormat", !"SampleProfile"} +!7 = !{!"TotalCount", i64 3003} +!8 = !{!"MaxCount", i64 3000} +!9 = !{!"MaxInternalCount", i64 0} +!10 = !{!"MaxFunctionCount", i64 0} +!11 = !{!"NumCounts", i64 3} +!12 = !{!"NumFunctions", i64 1} +!13 = !{!"DetailedSummary", !14} +!14 = !{!15, !16, !17, !18, !19, !20, !20, !21, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30} +!15 = !{i32 10000, i64 3000, i32 1} +!16 = !{i32 100000, i64 3000, i32 1} +!17 = !{i32 200000, i64 3000, i32 1} +!18 = !{i32 300000, i64 3000, i32 1} +!19 = !{i32 400000, i64 3000, i32 1} +!20 = !{i32 500000, i64 3000, i32 1} +!21 = !{i32 600000, i64 3000, i32 1} +!22 = !{i32 700000, i64 3000, i32 1} +!23 = !{i32 800000, i64 3000, i32 1} +!24 = !{i32 900000, i64 3000, i32 1} +!25 = !{i32 950000, i64 3000, i32 1} +!26 = !{i32 990000, i64 3000, i32 1} +!27 = !{i32 999000, i64 3000, i32 1} +!28 = !{i32 999900, i64 2, i32 2} +!29 = !{i32 999990, i64 2, i32 2} +!30 = !{i32 999999, i64 2, i32 2} +!31 = !{!"clang version 5.0.0 (trunk 297016)"} +!34 = !{!"function_entry_count", i64 1} +!40 = !{!"VP", i32 0, i64 3000, i64 -8789629626369651636, i64 3000}