Index: include/llvm/LTO/LTO.h =================================================================== --- include/llvm/LTO/LTO.h +++ include/llvm/LTO/LTO.h @@ -56,12 +56,19 @@ /// /// This is done for correctness (if value exported, ensure we always /// emit a copy), and compile-time optimization (allow drop of duplicates). +/// +/// If \p PreserveNonPrevailing is true, then we conservatively prevent +/// dropping of non-prevailing copies. It is used in the case when +/// the ThinLTO backends will be a separate process, and the subsequent +/// final native object link might select a different copy as prevailing. void thinLTOResolveWeakForLinkerInIndex( ModuleSummaryIndex &Index, function_ref isPrevailing, + function_ref isExported, function_ref - recordNewLinkage); + recordNewLinkage, + bool PreserveNonPrevailing = false); /// Update the linkages in the given \p Index to mark exported values /// as external and non-exported values as internal. The ThinLTO backends Index: lib/LTO/LTO.cpp =================================================================== --- lib/LTO/LTO.cpp +++ lib/LTO/LTO.cpp @@ -45,8 +45,10 @@ DenseSet &GlobalInvolvedWithAlias, function_ref isPrevailing, + function_ref isExported, function_ref - recordNewLinkage) { + recordNewLinkage, + bool PreserveNonPrevailing) { for (auto &S : GVSummaryList) { if (GlobalInvolvedWithAlias.count(S.get())) continue; @@ -55,13 +57,16 @@ continue; // We need to emit only one of these. The prevailing module will keep it, // but turned into a weak, while the others will drop it when possible. - if (isPrevailing(GUID, S.get())) { + // If PreserveNonPrevailing is true, we need to do the conversion for + // exported non-prevailing copies as well. + if (isPrevailing(GUID, S.get()) || + (PreserveNonPrevailing && isExported(S->modulePath(), GUID))) { if (GlobalValue::isLinkOnceLinkage(OriginalLinkage)) S->setLinkage(GlobalValue::getWeakLinkage( GlobalValue::isLinkOnceODRLinkage(OriginalLinkage))); } // Alias can't be turned into available_externally. - else if (!isa(S.get()) && + else if (!PreserveNonPrevailing && !isa(S.get()) && (GlobalValue::isLinkOnceODRLinkage(OriginalLinkage) || GlobalValue::isWeakODRLinkage(OriginalLinkage))) S->setLinkage(GlobalValue::AvailableExternallyLinkage); @@ -80,8 +85,10 @@ ModuleSummaryIndex &Index, function_ref isPrevailing, + function_ref isExported, function_ref - recordNewLinkage) { + recordNewLinkage, + bool PreserveNonPrevailingODRs) { // We won't optimize the globals that are referenced by an alias for now // Ideally we should turn the alias into a global and duplicate the definition // when needed. @@ -93,7 +100,8 @@ for (auto &I : Index) thinLTOResolveWeakForLinkerGUID(I.second, I.first, GlobalInvolvedWithAlias, - isPrevailing, recordNewLinkage); + isPrevailing, isExported, recordNewLinkage, + PreserveNonPrevailingODRs); } static void thinLTOInternalizeAndPromoteGUID( Index: lib/LTO/ThinLTOCodeGenerator.cpp =================================================================== --- lib/LTO/ThinLTOCodeGenerator.cpp +++ lib/LTO/ThinLTOCodeGenerator.cpp @@ -393,6 +393,8 @@ /// copies when possible). static void resolveWeakForLinkerInIndex( ModuleSummaryIndex &Index, + const StringMap &ExportLists, + const DenseSet &GUIDPreservedSymbols, StringMap> &ResolvedODR) { @@ -407,13 +409,21 @@ return Prevailing->second == S; }; + auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { + const auto &ExportList = ExportLists.find(ModuleIdentifier); + return (ExportList != ExportLists.end() && + ExportList->second.count(GUID)) || + GUIDPreservedSymbols.count(GUID); + }; + auto recordNewLinkage = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID, GlobalValue::LinkageTypes NewLinkage) { ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; }; - thinLTOResolveWeakForLinkerInIndex(Index, isPrevailing, recordNewLinkage); + thinLTOResolveWeakForLinkerInIndex(Index, isPrevailing, isExported, + recordNewLinkage); } // Initialize the TargetMachine builder for a given Triple @@ -527,9 +537,14 @@ ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, ExportLists); + // Convert the preserved symbols set from string to GUID + auto GUIDPreservedSymbols = + computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); + // Resolve LinkOnce/Weak symbols. StringMap> ResolvedODR; - resolveWeakForLinkerInIndex(Index, ResolvedODR); + resolveWeakForLinkerInIndex(Index, ExportLists, GUIDPreservedSymbols, + ResolvedODR); thinLTOResolveWeakForLinkerModule( TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]); @@ -735,7 +750,8 @@ // Resolve LinkOnce/Weak symbols, this has to be computed early because it // impacts the caching. - resolveWeakForLinkerInIndex(*Index, ResolvedODR); + resolveWeakForLinkerInIndex(*Index, ExportLists, GUIDPreservedSymbols, + ResolvedODR); auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { const auto &ExportList = ExportLists.find(ModuleIdentifier); Index: lib/Transforms/IPO/FunctionImport.cpp =================================================================== --- lib/Transforms/IPO/FunctionImport.cpp +++ lib/Transforms/IPO/FunctionImport.cpp @@ -770,6 +770,23 @@ return false; } + // Collect for each module the list of function it defines (GUID -> + // Summary). + StringMap> + ModuleToDefinedGVSummaries; + Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + + // Apply summary-based internalization decisions. Skip if there are no + // defined globals from the summary since not only is it unnecessary, but + // if this module did not have a summary section the internalizer will + // assert if it finds any definitions in this module that aren't in the + // DefinedGlobals set. + std::map &DefinedGlobals = + ModuleToDefinedGVSummaries[M.getModuleIdentifier()]; + + // Apply summary-based LinkOnce/Weak resolution decisions. + thinLTOResolveWeakForLinkerModule(M, DefinedGlobals); + // Perform the import now. auto ModuleLoader = [&M](StringRef Identifier) { return loadFile(Identifier, M.getContext()); Index: test/tools/gold/X86/Inputs/thinlto_preserve_nonprevailodr-1.ll =================================================================== --- /dev/null +++ test/tools/gold/X86/Inputs/thinlto_preserve_nonprevailodr-1.ll @@ -0,0 +1,27 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @foobar() { +entry: + ret void +} + +define void @baz() { +entry: + call void @f() + ret void +} + +define linkonce_odr void @x() { + ret void +} + +define linkonce_odr void @f() { +entry: + call void @x() + call void @x() + call void @x() + call void @x() + call void @x() + ret void +} Index: test/tools/gold/X86/Inputs/thinlto_preserve_nonprevailodr-2.ll =================================================================== --- /dev/null +++ test/tools/gold/X86/Inputs/thinlto_preserve_nonprevailodr-2.ll @@ -0,0 +1,19 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define linkonce_odr void @x() { + ret void +} + +define void @bar() { +entry: + call void @x() + call void @x() + call void @x() + call void @x() + call void @x() + call void (...) @foo() + ret void +} + +declare void @foo(...) Index: test/tools/gold/X86/Inputs/thinlto_preserve_nonprevailodr-3.ll =================================================================== --- /dev/null +++ test/tools/gold/X86/Inputs/thinlto_preserve_nonprevailodr-3.ll @@ -0,0 +1,22 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @foo() { +entry: + call void @f() + ret void +} + +define linkonce_odr void @x() { + ret void +} + +define linkonce_odr void @f() { +entry: + call void @x() + call void @x() + call void @x() + call void @x() + call void @x() + ret void +} Index: test/tools/gold/X86/thinlto_preserve_nonprevailodr.ll =================================================================== --- /dev/null +++ test/tools/gold/X86/thinlto_preserve_nonprevailodr.ll @@ -0,0 +1,101 @@ +; This test ensures that linkonce symbols are converted to weak more +; aggressively when we have separate ThinLTO backend processes, and therefore +; multiple invocations of the gold linker (the ThinLink and then the final +; native object link). Specifically, when linking within --start-lib/--end-lib +; which treats the final object files as being in separate libraries, we can +; run into trouble if the prevailing linkonce from the first link is no +; longer selected by the linker after importing/inlining. The more aggressive +; conversion to weak is needed to avoid undefined references in the final +; link. See the comments before the two link steps that describe this +; scenario. + +; First generate bitcode with a module summary index for each file +; RUN: opt -module-summary %s -o %t.o +; RUN: opt -module-summary %p/Inputs/thinlto_preserve_nonprevailodr-1.ll -o %t2.o +; RUN: opt -module-summary %p/Inputs/thinlto_preserve_nonprevailodr-2.ll -o %t3.o +; RUN: opt -module-summary %p/Inputs/thinlto_preserve_nonprevailodr-3.ll -o %t4.o + +; Next do the ThinLink step, specifying thinlto-index-only so that the gold +; plugin exits after generating individual indexes. Also, that flag should +; be used indicate to the index-based weak/linkonce symbol resolution that +; we need to preserve (by converting to weak) any linkonce that are exported, +; even if they are not the prevailing copy (see below for how this affects f()). +; Also, -import-instr-limit=4 is used to prevent importing of some of +; the functions, chiefly the linkonce_odr f(). +; Finally, although it doesn't impact the ThinLink step, we use +; --start-lib/--end-lib to match the final link step further down, +; where we want archive library linking behavior. +; Note that because at this point there is a strong reference from +; %t.o to %t2.o (foobar), all symbols from %t2.o are selected +; including the linkonce symbols, and therefore the %t2.o copy of f() +; is prevailing. At this point the %t4.o copy of f() is preempted. +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=thinlto-index-only \ +; RUN: --plugin-opt=-import-instr-limit=4 \ +; RUN: -o %t5 \ +; RUN: %t.o \ +; RUN: --start-lib %t2.o --end-lib \ +; RUN: --start-lib %t3.o --end-lib \ +; RUN: --start-lib %t4.o --end-lib + +; Simulate the separate ThinLTO backend processes which will do function +; importing from the individual indexes followed by the optimization pipeline +; including inlining. +; RUN: opt -disable-force-link-odr -function-import -O2 -summary-file %t.o.thinlto.bc %t.o -o %t.opt.bc +; RUN: opt -disable-force-link-odr -function-import -O2 -summary-file %t2.o.thinlto.bc %t2.o -o %t2.opt.bc +; RUN: opt -disable-force-link-odr -function-import -O2 -summary-file %t3.o.thinlto.bc %t3.o -o %t3.opt.bc +; RUN: opt -disable-force-link-odr -function-import -O2 -summary-file %t4.o.thinlto.bc %t4.o -o %t4.opt.bc + +; The first copy of f() was prevailing and should have been made weak_odr +; as a result +; RUN: llvm-dis %t2.opt.bc -o - | FileCheck %s --check-prefix=CHECK-OBJ1 +; CHECK-OBJ1: define weak_odr void @f() + +; The use of f() should have been inlined but not its definition, due to the +; import-instr-limit. +; RUN: llvm-dis %t3.opt.bc -o - | FileCheck %s --check-prefix=CHECK-OBJ2 +; CHECK-OBJ2: declare void @f() + +; The second copy of f() should have been made weak_odr since it is exported +; and with thinlto-index-only we indicate that linkonce_odr should be preserved. +; RUN: llvm-dis %t4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-OBJ3 +; CHECK-OBJ3: define weak_odr void @f() + +; Generate native object files for the final link +; RUN: llc %t.opt.bc -filetype=obj -o %t.opt.o +; RUN: llc %t2.opt.bc -filetype=obj -o %t2.opt.o +; RUN: llc %t3.opt.bc -filetype=obj -o %t3.opt.o +; RUN: llc %t4.opt.bc -filetype=obj -o %t4.opt.o + +; Final link. Here the --start-lib/--end-lib provoke archive library linking +; behavior. The relevant effect of this is that the reference of f() in +; %t3.opt.o will not be resolved to the definition in the earlier %t2.opt.o +; library, which no longer is selected as prevailing (since the only strong +; reference to the %t2.opt.o library was inlined into %t.opt.o). Therefore, +; it needs to see the definition in the subsequent %t4.opt.o library or we +; would get an undefined reference error. Note that the gold-plugin does +; not know whether --start-lib/--end-lib were used. +; RUN: %gold \ +; RUN: -o %t6 \ +; RUN: %t.opt.o \ +; RUN: --start-lib %t2.opt.o --end-lib \ +; RUN: --start-lib %t3.opt.o --end-lib \ +; RUN: --start-lib %t4.opt.o --end-lib + +; Sanity check that f() is defined and weak in final binary. +; RUN: llvm-nm %t6 | FileCheck %s --check-prefix=CHECK-FINAL +; CHECK-FINAL: W f + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() { +entry: + call void (...) @foobar() + call void (...) @bar() + ret i32 0 +} + +declare void @bar(...) +declare void @foobar(...) Index: tools/gold/gold-plugin.cpp =================================================================== --- tools/gold/gold-plugin.cpp +++ tools/gold/gold-plugin.cpp @@ -1397,10 +1397,21 @@ Preserve.count(GUID); }; + // Since we don't know whether the object files were from archive + // libraries (potentially formed via --start-lib/--end-lib pairs), + // we must conservatively ensure that linkonce/weak symbols are + // preserved even when they are not prevailing in the thinlto_index_only + // case. In that case there will be a separate link process to link the + // native objects, and depending on the intervening importing/inlining + // the current prevailing copy may no longer be selected out of its + // library. This is because symbols are not selected out of (archive) + // libraries unless there is a strong reference to a symbol defined in it, + // and that strong reference may go away after importing. thinLTOResolveWeakForLinkerInIndex( - CombinedIndex, isPrevailing, + CombinedIndex, isPrevailing, isExported, [](StringRef ModuleIdentifier, GlobalValue::GUID GUID, - GlobalValue::LinkageTypes NewLinkage) {}); + GlobalValue::LinkageTypes NewLinkage) {}, + /* PreserveNonPrevailing = */ options::thinlto_index_only); // Use global summary-based analysis to identify symbols that can be // internalized (because they aren't exported or preserved as per callback).