Index: include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- include/llvm/IR/ModuleSummaryIndex.h +++ include/llvm/IR/ModuleSummaryIndex.h @@ -392,6 +392,7 @@ const_gvsummary_iterator begin() const { return GlobalValueMap.begin(); } gvsummary_iterator end() { return GlobalValueMap.end(); } const_gvsummary_iterator end() const { return GlobalValueMap.end(); } + size_t size() const { return GlobalValueMap.size(); } /// Get the list of global value summary objects for a given value name. const GlobalValueSummaryList &getGlobalValueSummaryList(StringRef ValueName) { Index: include/llvm/LTO/LTO.h =================================================================== --- include/llvm/LTO/LTO.h +++ include/llvm/LTO/LTO.h @@ -382,6 +382,9 @@ /// The unmangled name of the global. std::string IRName; + /// Keep track if the symbol is visible outside of LTO. + bool VisibleToRegularObj = false; + bool UnnamedAddr = true; /// This field keeps track of the partition number of this global. The Index: include/llvm/Transforms/IPO/FunctionImport.h =================================================================== --- include/llvm/Transforms/IPO/FunctionImport.h +++ include/llvm/Transforms/IPO/FunctionImport.h @@ -91,11 +91,15 @@ /// \p ExportLists contains for each Module the set of globals (GUID) that will /// be imported by another module, or referenced by such a function. I.e. this /// is the set of globals that need to be promoted/renamed appropriately. +/// +/// \p DeadSymbols (optional) contains a list of GUID that are deemed "dead" and +/// will be ignored for the purpose of importing. void ComputeCrossModuleImport( const ModuleSummaryIndex &Index, const StringMap &ModuleToDefinedGVSummaries, StringMap &ImportLists, - StringMap &ExportLists); + StringMap &ExportLists, + const DenseSet *DeadSymbols = nullptr); /// Compute all the imports for the given module using the Index. /// @@ -105,6 +109,13 @@ StringRef ModulePath, const ModuleSummaryIndex &Index, FunctionImporter::ImportMapTy &ImportList); +/// Compute all the symbols that are "dead": i.e these that can't be reached +/// in the graph from any of the given symbols listed in +/// \p GUIDPreservedSymbols. +DenseSet +computeDeadSymbols(const ModuleSummaryIndex &Index, + const DenseSet &GUIDPreservedSymbols); + /// Compute the set of summaries needed for a ThinLTO backend compilation of /// \p ModulePath. // Index: lib/LTO/LTO.cpp =================================================================== --- lib/LTO/LTO.cpp +++ lib/LTO/LTO.cpp @@ -332,9 +332,10 @@ } if (Res.VisibleToRegularObj || (GV && Used.count(GV)) || (GlobalRes.Partition != GlobalResolution::Unknown && - GlobalRes.Partition != Partition)) + GlobalRes.Partition != Partition)) { GlobalRes.Partition = GlobalResolution::External; - else + GlobalRes.VisibleToRegularObj |= Res.VisibleToRegularObj; + } else GlobalRes.Partition = Partition; } @@ -840,6 +841,16 @@ if (!ModuleToDefinedGVSummaries.count(Mod.first)) ModuleToDefinedGVSummaries.try_emplace(Mod.first); + // Compute "dead" symbols, we don't want to import/export these! + DenseSet GUIDPreservedSymbols; + for (auto &Res : GlobalResolutions) { + if (Res.second.VisibleToRegularObj && !Res.second.IRName.empty()) + GUIDPreservedSymbols.insert(GlobalValue::getGUID(Res.second.IRName)); + } + + auto DeadSymbols = + computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols); + StringMap ImportLists( ThinLTO.ModuleMap.size()); StringMap ExportLists( @@ -848,7 +859,7 @@ if (Conf.OptLevel > 0) { ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - ImportLists, ExportLists); + ImportLists, ExportLists, &DeadSymbols); std::set ExportedGUIDs; for (auto &Res : GlobalResolutions) { @@ -865,7 +876,7 @@ const auto &ExportList = ExportLists.find(ModuleIdentifier); return (ExportList != ExportLists.end() && ExportList->second.count(GUID)) || - ExportedGUIDs.count(GUID); + (!DeadSymbols.count(GUID) && ExportedGUIDs.count(GUID)); }; thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported); Index: lib/LTO/ThinLTOCodeGenerator.cpp =================================================================== --- lib/LTO/ThinLTOCodeGenerator.cpp +++ lib/LTO/ThinLTOCodeGenerator.cpp @@ -578,11 +578,18 @@ StringMap ModuleToDefinedGVSummaries; Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + // Convert the preserved symbols set from string to GUID + auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( + PreservedSymbols, Triple(TheModule.getTargetTriple())); + + // Compute "dead" symbols, we don't want to import/export these! + auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); + // Generate import/export list StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists); + ExportLists, &DeadSymbols); // Resolve LinkOnce/Weak symbols. StringMap> ResolvedODR; @@ -591,17 +598,13 @@ thinLTOResolveWeakForLinkerModule( TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]); - // Convert the preserved symbols set from string to GUID - auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( - PreservedSymbols, Triple(TheModule.getTargetTriple())); - // Promote the exported values in the index, so that they are promoted // in the module. auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { const auto &ExportList = ExportLists.find(ModuleIdentifier); return (ExportList != ExportLists.end() && ExportList->second.count(GUID)) || - GUIDPreservedSymbols.count(GUID); + (!DeadSymbols.count(GUID) && GUIDPreservedSymbols.count(GUID)); }; thinLTOInternalizeAndPromoteInIndex(Index, isExported); @@ -620,11 +623,18 @@ StringMap ModuleToDefinedGVSummaries(ModuleCount); Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + // Convert the preserved symbols set from string to GUID + auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( + PreservedSymbols, Triple(TheModule.getTargetTriple())); + + // Compute "dead" symbols, we don't want to import/export these! + auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); + // Generate import/export list StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists); + ExportLists, &DeadSymbols); auto &ImportList = ImportLists[TheModule.getModuleIdentifier()]; crossImportIntoModule(TheModule, Index, ModuleMap, ImportList); @@ -694,11 +704,14 @@ StringMap ModuleToDefinedGVSummaries(ModuleCount); Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + // Compute "dead" symbols, we don't want to import/export these! + auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); + // Generate import/export list StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists); + ExportLists, &DeadSymbols); auto &ExportList = ExportLists[ModuleIdentifier]; // Be friendly and don't nuke totally the module when the client didn't @@ -711,7 +724,7 @@ const auto &ExportList = ExportLists.find(ModuleIdentifier); return (ExportList != ExportLists.end() && ExportList->second.count(GUID)) || - GUIDPreservedSymbols.count(GUID); + (!DeadSymbols.count(GUID) && GUIDPreservedSymbols.count(GUID)); }; thinLTOInternalizeAndPromoteInIndex(Index, isExported); thinLTOInternalizeModule(TheModule, @@ -832,17 +845,20 @@ StringMap ModuleToDefinedGVSummaries(ModuleCount); Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); + // Convert the preserved symbols set from string to GUID, this is needed for + // computing the caching hash and the internalization. + auto GUIDPreservedSymbols = + computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); + + // Compute "dead" symbols, we don't want to import/export these! + auto DeadSymbols = computeDeadSymbols(*Index, GUIDPreservedSymbols); + // Collect the import/export lists for all modules from the call-graph in the // combined index. StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists); - - // Convert the preserved symbols set from string to GUID, this is needed for - // computing the caching hash and the internalization. - auto GUIDPreservedSymbols = - computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); + ExportLists, &DeadSymbols); // We use a std::map here to be able to have a defined ordering when // producing a hash for the cache entry. @@ -858,7 +874,7 @@ const auto &ExportList = ExportLists.find(ModuleIdentifier); return (ExportList != ExportLists.end() && ExportList->second.count(GUID)) || - GUIDPreservedSymbols.count(GUID); + (!DeadSymbols.count(GUID) && GUIDPreservedSymbols.count(GUID)); }; // Use global summary-based analysis to identify symbols that can be Index: lib/Transforms/IPO/FunctionImport.cpp =================================================================== --- lib/Transforms/IPO/FunctionImport.cpp +++ lib/Transforms/IPO/FunctionImport.cpp @@ -334,6 +334,7 @@ } } + DEBUG(dbgs() << "Imported!\n"); // Insert the newly imported function to the worklist. Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold, GUID); } @@ -345,7 +346,8 @@ static void ComputeImportForModule( const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index, FunctionImporter::ImportMapTy &ImportList, - StringMap *ExportLists = nullptr) { + StringMap *ExportLists = nullptr, + const DenseSet *DeadSymbols = nullptr) { // Worklist contains the list of function imported in this module, for which // we will analyse the callees and may import further down the callgraph. SmallVector Worklist; @@ -353,6 +355,10 @@ // Populate the worklist with the import for the functions in the current // module for (auto &GVSummary : DefinedGVSummaries) { + if (DeadSymbols && DeadSymbols->count(GVSummary.first)) { + DEBUG(dbgs() << "Ignores Dead GUID: " << GVSummary.first << "\n"); + continue; + } auto *Summary = GVSummary.second; if (auto *AS = dyn_cast(Summary)) Summary = &AS->getAliasee(); @@ -392,14 +398,15 @@ const ModuleSummaryIndex &Index, const StringMap &ModuleToDefinedGVSummaries, StringMap &ImportLists, - StringMap &ExportLists) { + StringMap &ExportLists, + const DenseSet *DeadSymbols) { // For each module that has function defined, compute the import/export lists. for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) { auto &ImportList = ImportLists[DefinedGVSummaries.first()]; DEBUG(dbgs() << "Computing import for Module '" << DefinedGVSummaries.first() << "'\n"); ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList, - &ExportLists); + &ExportLists, DeadSymbols); } // When computing imports we added all GUIDs referenced by anything @@ -461,6 +468,70 @@ #endif } +DenseSet llvm::computeDeadSymbols( + const ModuleSummaryIndex &Index, + const DenseSet &GUIDPreservedSymbols) { + if (GUIDPreservedSymbols.empty()) + // Don't do anything when nothing is live, this is friendly with tests. + return DenseSet(); + DenseSet LiveSymbols = GUIDPreservedSymbols; + SmallVector Worklist; + Worklist.reserve(LiveSymbols.size() * 2); + for (auto GUID : LiveSymbols) { + DEBUG(dbgs() << "Live root: " << GUID << "\n"); + Worklist.push_back(GUID); + } + + while (!Worklist.empty()) { + auto GUID = Worklist.pop_back_val(); + auto It = Index.findGlobalValueSummaryList(GUID); + if (It == Index.end()) { + DEBUG(dbgs() << "Not in index: " << GUID << "\n"); + continue; + } + + // FIXME: we should only make the prevailing copy live here + for (auto &Summary : It->second) { + for (auto Ref : Summary->refs()) { + auto RefGUID = Ref.getGUID(); + if (LiveSymbols.insert(RefGUID).second) { + DEBUG(dbgs() << "Marking live (ref): " << RefGUID << "\n"); + Worklist.push_back(RefGUID); + } + } + if (auto *FS = dyn_cast(Summary.get())) { + for (auto Call : FS->calls()) { + auto CallGUID = Call.first.getGUID(); + if (LiveSymbols.insert(CallGUID).second) { + DEBUG(dbgs() << "Marking live (call): " << CallGUID << "\n"); + Worklist.push_back(CallGUID); + } + } + } + if (auto *AS = dyn_cast(Summary.get())) { + auto AliaseeGUID = AS->getAliasee().getOriginalName(); + if (LiveSymbols.insert(AliaseeGUID).second) { + DEBUG(dbgs() << "Marking live (alias): " << AliaseeGUID << "\n"); + Worklist.push_back(AliaseeGUID); + } + } + } + } + DenseSet DeadSymbols; + DeadSymbols.reserve( + std::min(Index.size(), Index.size() - LiveSymbols.size())); + for (auto &Entry : Index) { + auto GUID = Entry.first; + if (!LiveSymbols.count(GUID)) { + DEBUG(dbgs() << "Marking dead: " << GUID << "\n"); + DeadSymbols.insert(GUID); + } + } + DEBUG(dbgs() << LiveSymbols.size() << " symbols Live, and " + << DeadSymbols.size() << " symbols Dead \n"); + return DeadSymbols; +} + /// Compute the set of summaries needed for a ThinLTO backend compilation of /// \p ModulePath. void llvm::gatherImportedSummariesForModule( Index: test/ThinLTO/X86/Inputs/deadstrip.ll =================================================================== --- /dev/null +++ test/ThinLTO/X86/Inputs/deadstrip.ll @@ -0,0 +1,13 @@ +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +; Called from a @dead_func() in the other file, should not be imported there +define void @baz() { + ret void +} + +declare void @dead_func() +define void @another_dead_func() { + call void @dead_func() + ret void +} Index: test/ThinLTO/X86/deadstrip.ll =================================================================== --- /dev/null +++ test/ThinLTO/X86/deadstrip.ll @@ -0,0 +1,61 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/deadstrip.ll -o %t2.bc +; RUN: llvm-lto -thinlto-action=thinlink -o %t.index.bc %t1.bc %t2.bc + +; RUN: llvm-lto -exported-symbol=_main -thinlto-action=promote %t1.bc -thinlto-index=%t.index.bc -o - | llvm-lto -exported-symbol=_main -thinlto-action=internalize -thinlto-index %t.index.bc -thinlto-module-id=%t1.bc - -o - | llvm-dis -o - | FileCheck %s + +; RUN: llvm-lto -exported-symbol=_main -thinlto-action=run %t1.bc %t2.bc +; RUN: llvm-nm %t1.bc.thinlto.o | FileCheck %s --check-prefix=CHECK-NM + +; RUN: llvm-lto2 %t1.bc %t2.bc -o %t.out -save-temps \ +; RUN: -r %t1.bc,_main,plx \ +; RUN: -r %t1.bc,_bar,pl \ +; RUN: -r %t1.bc,_dead_func,pl \ +; RUN: -r %t1.bc,_baz,l \ +; RUN: -r %t2.bc,_baz,pl \ +; RUN: -r %t2.bc,_dead_func,pl \ +; RUN: -r %t2.bc,_another_dead_func,pl +; RUN: llvm-dis < %t.out.0.3.import.bc | FileCheck %s + +; Dead-stripping on the index allows to internalize these, +; and limit the import of @baz thanks to early pruning. +; CHECK-NOT: available_externally {{.*}} @baz() +; CHECK: define internal void @bar() { +; CHECK: define internal void @bar_internal() +; CHECK: define internal void @dead_func() { +; CHECK-NOT: available_externally {{.*}} @baz() + +; The final binary should not contain any of the dead functions, +; only main is expected because bar is expected to be inlined and stripped out. +; CHECK-NM-NOT: bar +; CHECK-NM-NOT: dead +; CHECK-NM: T _main +; CHECK-NM-NOT: bar +; CHECK-NM-NOT: dead + + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +declare void @baz() + +define void @bar() { + ret void +} + +define internal void @bar_internal() { + ret void +} + +define void @dead_func() { + call void @bar() + call void @baz() + call void @bar_internal() + ret void +} + +define void @main() { + call void @bar() + call void @bar_internal() + ret void +}