Index: llvm/trunk/include/llvm/LTO/LTO.h =================================================================== --- llvm/trunk/include/llvm/LTO/LTO.h +++ llvm/trunk/include/llvm/LTO/LTO.h @@ -320,9 +320,14 @@ bool UnnamedAddr = true; - /// True if IR contains the prevailing definition. + /// True if module contains the prevailing definition. bool Prevailing = false; + /// Returns true if module contains the prevailing definition and symbol is + /// an IR symbol. For example when module-level inline asm block is used, + /// symbol can be prevailing in module but have no IR name. + bool isPrevailingIRSymbol() const { return Prevailing && !IRName.empty(); } + /// This field keeps track of the partition number of this global. The /// regular LTO object is partition 0, while each ThinLTO object has its own /// partition number from 1 onwards. Index: llvm/trunk/include/llvm/Transforms/IPO/FunctionImport.h =================================================================== --- llvm/trunk/include/llvm/Transforms/IPO/FunctionImport.h +++ llvm/trunk/include/llvm/Transforms/IPO/FunctionImport.h @@ -107,12 +107,23 @@ StringRef ModulePath, const ModuleSummaryIndex &Index, FunctionImporter::ImportMapTy &ImportList); +/// PrevailingType enum used as a return type of callback passed +/// to computeDeadSymbols. Yes and No values used when status explicitly +/// set by symbols resolution, otherwise status is Unknown. +enum class PrevailingType { Yes, No, Unknown }; + /// Compute all the symbols that are "dead": i.e these that can't be reached /// in the graph from any of the given symbols listed in -/// \p GUIDPreservedSymbols. +/// \p GUIDPreservedSymbols. Non-prevailing symbols are symbols without a +/// prevailing copy anywhere in IR and are normally dead, \p isPrevailing +/// predicate returns status of symbol. void computeDeadSymbols( ModuleSummaryIndex &Index, - const DenseSet &GUIDPreservedSymbols); + const DenseSet &GUIDPreservedSymbols, + function_ref isPrevailing); + +/// Converts value \p GV to declaration. +void convertToDeclaration(GlobalValue &GV); /// Compute the set of summaries needed for a ThinLTO backend compilation of /// \p ModulePath. Index: llvm/trunk/lib/LTO/LTO.cpp =================================================================== --- llvm/trunk/lib/LTO/LTO.cpp +++ llvm/trunk/lib/LTO/LTO.cpp @@ -423,6 +423,14 @@ "Multiple prevailing defs are not allowed"); GlobalRes.Prevailing = true; GlobalRes.IRName = Sym.getIRName(); + } else if (!GlobalRes.Prevailing && GlobalRes.IRName.empty()) { + // Sometimes it can be two copies of symbol in a module and prevailing + // symbol can have no IR name. That might happen if symbol is defined in + // module level inline asm block. In case we have multiple modules with + // the same symbol we want to use IR name of the prevailing symbol. + // Otherwise, if we haven't seen a prevailing symbol, set the name so that + // we can later use it to check if there is any prevailing copy in IR. + GlobalRes.IRName = Sym.getIRName(); } // Set the partition to external if we know it is re-defined by the linker @@ -747,12 +755,31 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { // Compute "dead" symbols, we don't want to import/export these! DenseSet GUIDPreservedSymbols; - for (auto &Res : GlobalResolutions) + DenseMap GUIDPrevailingResolutions; + for (auto &Res : GlobalResolutions) { + // Normally resolution have IR name of symbol. We can do nothing here + // otherwise. See comments in GlobalResolution struct for more details. + if (Res.second.IRName.empty()) + continue; + + GlobalValue::GUID GUID = GlobalValue::getGUID( + GlobalValue::dropLLVMManglingEscape(Res.second.IRName)); + if (Res.second.VisibleOutsideSummary && Res.second.Prevailing) GUIDPreservedSymbols.insert(GlobalValue::getGUID( GlobalValue::dropLLVMManglingEscape(Res.second.IRName))); - computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols); + GUIDPrevailingResolutions[GUID] = + Res.second.Prevailing ? PrevailingType::Yes : PrevailingType::No; + } + + auto isPrevailing = [&](GlobalValue::GUID G) { + auto It = GUIDPrevailingResolutions.find(G); + if (It == GUIDPrevailingResolutions.end()) + return PrevailingType::Unknown; + return It->second; + }; + computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols, isPrevailing); if (auto E = runRegularLTO(AddStream)) return E; @@ -800,7 +827,7 @@ if (!Conf.CodeGenOnly) { for (const auto &R : GlobalResolutions) { - if (!R.second.Prevailing) + if (!R.second.isPrevailingIRSymbol()) continue; if (R.second.Partition != 0 && R.second.Partition != GlobalResolution::External) @@ -1114,7 +1141,7 @@ // If the symbol does not have external references or it is not prevailing, // then not need to mark it as exported from a ThinLTO partition. if (Res.second.Partition != GlobalResolution::External || - !Res.second.Prevailing) + !Res.second.isPrevailingIRSymbol()) continue; auto GUID = GlobalValue::getGUID( GlobalValue::dropLLVMManglingEscape(Res.second.IRName)); Index: llvm/trunk/lib/LTO/LTOBackend.cpp =================================================================== --- llvm/trunk/lib/LTO/LTOBackend.cpp +++ llvm/trunk/lib/LTO/LTOBackend.cpp @@ -399,6 +399,17 @@ return Error::success(); } +static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals, + const ModuleSummaryIndex &Index) { + for (auto &GV : Mod) { + auto It = DefinedGlobals.find(GV.getGUID()); + if (It == DefinedGlobals.end()) + continue; + if (!Index.isGlobalValueLive(It->second)) + convertToDeclaration(GV); + } +} + Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream, Module &Mod, const ModuleSummaryIndex &CombinedIndex, const FunctionImporter::ImportMapTy &ImportList, @@ -420,6 +431,8 @@ renameModuleForThinLTO(Mod, CombinedIndex); + dropDeadSymbols(Mod, DefinedGlobals, CombinedIndex); + thinLTOResolveWeakForLinkerModule(Mod, DefinedGlobals); if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod)) Index: llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp =================================================================== --- llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp +++ llvm/trunk/lib/LTO/ThinLTOCodeGenerator.cpp @@ -621,6 +621,18 @@ thinLTOInternalizeAndPromoteInIndex(Index, isExported); } +static void computeDeadSymbolsInIndex( + ModuleSummaryIndex &Index, + const DenseSet &GUIDPreservedSymbols) { + // We have no symbols resolution available. And can't do any better now in the + // case where the prevailing symbol is in a native object. It can be refined + // with linker information in the future. + auto isPrevailing = [&](GlobalValue::GUID G) { + return PrevailingType::Unknown; + }; + computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing); +} + /** * Perform promotion and renaming of exported internal functions. * Index is updated to reflect linkage changes from weak resolution. @@ -639,7 +651,7 @@ PreservedSymbols, Triple(TheModule.getTargetTriple())); // Compute "dead" symbols, we don't want to import/export these! - computeDeadSymbols(Index, GUIDPreservedSymbols); + computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); // Generate import/export list StringMap ImportLists(ModuleCount); @@ -678,7 +690,7 @@ PreservedSymbols, Triple(TheModule.getTargetTriple())); // Compute "dead" symbols, we don't want to import/export these! - computeDeadSymbols(Index, GUIDPreservedSymbols); + computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); // Generate import/export list StringMap ImportLists(ModuleCount); @@ -755,7 +767,7 @@ Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); // Compute "dead" symbols, we don't want to import/export these! - computeDeadSymbols(Index, GUIDPreservedSymbols); + computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols); // Generate import/export list StringMap ImportLists(ModuleCount); @@ -901,7 +913,7 @@ computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); // Compute "dead" symbols, we don't want to import/export these! - computeDeadSymbols(*Index, GUIDPreservedSymbols); + computeDeadSymbolsInIndex(*Index, GUIDPreservedSymbols); // Collect the import/export lists for all modules from the call-graph in the // combined index. Index: llvm/trunk/lib/Transforms/IPO/FunctionImport.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/FunctionImport.cpp +++ llvm/trunk/lib/Transforms/IPO/FunctionImport.cpp @@ -163,6 +163,9 @@ CalleeSummaryList, [&](const std::unique_ptr &SummaryPtr) { auto *GVSummary = SummaryPtr.get(); + if (!Index.isGlobalValueLive(GVSummary)) + return false; + // For SamplePGO, in computeImportForFunction the OriginalId // may have been used to locate the callee summary list (See // comment there). @@ -495,7 +498,8 @@ void llvm::computeDeadSymbols( ModuleSummaryIndex &Index, - const DenseSet &GUIDPreservedSymbols) { + const DenseSet &GUIDPreservedSymbols, + function_ref isPrevailing) { assert(!Index.withGlobalValueDeadStripping()); if (!ComputeDead) return; @@ -524,7 +528,6 @@ } // Make value live and add it to the worklist if it was not live before. - // FIXME: we should only make the prevailing copy live here auto visit = [&](ValueInfo VI) { // FIXME: If we knew which edges were created for indirect call profiles, // we could skip them here. Any that are live should be reached via @@ -540,6 +543,11 @@ for (auto &S : VI.getSummaryList()) if (S->isLive()) return; + + // We do not keep live symbols that are known to be non-prevailing. + if (isPrevailing(VI.getGUID()) == PrevailingType::No) + return; + for (auto &S : VI.getSummaryList()) S->setLive(true); ++LiveSymbols; @@ -550,6 +558,8 @@ auto VI = Worklist.pop_back_val(); for (auto &Summary : VI.getSummaryList()) { GlobalValueSummary *Base = Summary->getBaseObject(); + // Set base value live in case it is an alias. + Base->setLive(true); for (auto Ref : Base->refs()) visit(Ref); if (auto *FS = dyn_cast(Base)) @@ -603,26 +613,26 @@ return std::error_code(); } +void llvm::convertToDeclaration(GlobalValue &GV) { + DEBUG(dbgs() << "Converting to a declaration: `" << GV.getName() << "\n"); + if (Function *F = dyn_cast(&GV)) { + F->deleteBody(); + F->clearMetadata(); + } else if (GlobalVariable *V = dyn_cast(&GV)) { + V->setInitializer(nullptr); + V->setLinkage(GlobalValue::ExternalLinkage); + V->clearMetadata(); + } else + // For now we don't resolve or drop aliases. Once we do we'll + // need to add support here for creating either a function or + // variable declaration, and return the new GlobalValue* for + // the caller to use. + llvm_unreachable("Expected function or variable"); +} + /// Fixup WeakForLinker linkages in \p TheModule based on summary analysis. void llvm::thinLTOResolveWeakForLinkerModule( Module &TheModule, const GVSummaryMapTy &DefinedGlobals) { - auto ConvertToDeclaration = [](GlobalValue &GV) { - DEBUG(dbgs() << "Converting to a declaration: `" << GV.getName() << "\n"); - if (Function *F = dyn_cast(&GV)) { - F->deleteBody(); - F->clearMetadata(); - } else if (GlobalVariable *V = dyn_cast(&GV)) { - V->setInitializer(nullptr); - V->setLinkage(GlobalValue::ExternalLinkage); - V->clearMetadata(); - } else - // For now we don't resolve or drop aliases. Once we do we'll - // need to add support here for creating either a function or - // variable declaration, and return the new GlobalValue* for - // the caller to use. - llvm_unreachable("Expected function or variable"); - }; - auto updateLinkage = [&](GlobalValue &GV) { // See if the global summary analysis computed a new resolved linkage. const auto &GS = DefinedGlobals.find(GV.getGUID()); @@ -653,7 +663,7 @@ // the definition in that case. if (GlobalValue::isAvailableExternallyLinkage(NewLinkage) && GlobalValue::isInterposableLinkage(GV.getLinkage())) - ConvertToDeclaration(GV); + convertToDeclaration(GV); else { DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName() << "` from " << GV.getLinkage() << " to " << NewLinkage << "\n"); Index: llvm/trunk/test/LTO/Resolution/X86/Inputs/not-prevailing.ll =================================================================== --- llvm/trunk/test/LTO/Resolution/X86/Inputs/not-prevailing.ll +++ llvm/trunk/test/LTO/Resolution/X86/Inputs/not-prevailing.ll @@ -0,0 +1,6 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @bar() { + ret void +} Index: llvm/trunk/test/LTO/Resolution/X86/not-prevailing.ll =================================================================== --- llvm/trunk/test/LTO/Resolution/X86/not-prevailing.ll +++ llvm/trunk/test/LTO/Resolution/X86/not-prevailing.ll @@ -0,0 +1,37 @@ +; RUN: opt -module-summary %s -o %t1.o +; RUN: opt -module-summary -o %t2.o %S/Inputs/not-prevailing.ll +; RUN: llvm-lto2 run -o %t3.o %t1.o %t2.o -r %t1.o,foo,x -r %t1.o,zed,px -r %t1.o,bar,x \ +; RUN: -r %t2.o,bar,x -save-temps + +; Check that 'foo' and 'bar' were not inlined. +; CHECK: zed: +; CHECK-NEXT: {{.*}} pushq %rbx +; CHECK-NEXT: {{.*}} callq 0 +; CHECK-NEXT: {{.*}} movl %eax, %ebx +; CHECK-NEXT: {{.*}} callq 0 +; CHECK-NEXT: {{.*}} movl %ebx, %eax +; CHECK-NEXT: {{.*}} popq %rbx +; CHECK-NEXT: {{.*}} retq + +; RUN: llvm-objdump -d %t3.o.1 | FileCheck %s +; RUN: llvm-readelf --symbols %t3.o.1 | FileCheck %s --check-prefix=SYMBOLS + +; Check that 'foo' and 'bar' produced as undefined. +; SYMBOLS: NOTYPE GLOBAL DEFAULT UND bar +; SYMBOLS: NOTYPE GLOBAL DEFAULT UND foo +; SYMBOLS: FUNC GLOBAL DEFAULT 2 zed + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define weak i32 @foo() { + ret i32 65 +} + +declare void @bar() + +define i32 @zed() { + %1 = tail call i32 @foo() + call void @bar() + ret i32 %1 +} Index: llvm/trunk/test/ThinLTO/X86/deadstrip.ll =================================================================== --- llvm/trunk/test/ThinLTO/X86/deadstrip.ll +++ llvm/trunk/test/ThinLTO/X86/deadstrip.ll @@ -50,7 +50,7 @@ ; LTO2: define internal void @_GLOBAL__I_a() ; LTO2: define internal void @bar() { ; LTO2: define internal void @bar_internal() -; LTO2: define internal void @dead_func() { +; LTO2: declare dso_local void @dead_func() ; LTO2-NOT: available_externally {{.*}} @baz() ; Make sure we didn't internalize @boo, which is reachable via Index: llvm/trunk/test/ThinLTO/X86/internalize.ll =================================================================== --- llvm/trunk/test/ThinLTO/X86/internalize.ll +++ llvm/trunk/test/ThinLTO/X86/internalize.ll @@ -24,6 +24,7 @@ target triple = "x86_64-apple-macosx10.11.0" define void @foo() { + call void @bar() ret void } define void @bar() { Index: llvm/trunk/test/tools/gold/X86/global_with_section.ll =================================================================== --- llvm/trunk/test/tools/gold/X86/global_with_section.ll +++ llvm/trunk/test/tools/gold/X86/global_with_section.ll @@ -22,7 +22,7 @@ ; RUN: --plugin-opt=save-temps \ ; RUN: -o %t3.o %t.o %t2.o ; Check results of internalization -; RUN: llvm-dis %t.o.2.internalize.bc -o - | FileCheck %s +; RUN: llvm-dis %t.o.2.internalize.bc -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-THINLTO ; RUN: llvm-dis %t2.o.2.internalize.bc -o - | FileCheck %s --check-prefix=CHECK2-THINLTO ; SYMTAB: deadfunc_with_section @@ -57,7 +57,9 @@ ; Confirm via a function with a non-C identifier section that we are getting ; the expected internalization. -; CHECK-DAG: define internal void @deadfunc_with_nonC_section() section ".nonCsection" +; CHECK2-REGULARLTO-DAG: define internal void @deadfunc_with_nonC_section() section ".nonCsection" +; Check dead function converted to declaration. +; CHECK-THINLTO-DAG: declare dso_local void @deadfunc_with_nonC_section() section ".nonCsection" define void @deadfunc_with_nonC_section() section ".nonCsection" { call void @deadfunc2_called_from_nonC_section() ret void @@ -75,5 +77,6 @@ ; Confirm when called from a function with a non-C identifier section that we ; are getting the expected internalization. ; CHECK2-REGULARLTO: define internal void @deadfunc2_called_from_nonC_section -; CHECK2-THINLTO: define internal void @deadfunc2_called_from_nonC_section +; Check dead function converted to declaration. +; CHECK2-THINLTO: declare dso_local void @deadfunc2_called_from_nonC_section declare void @deadfunc2_called_from_nonC_section()