Index: llvm/trunk/include/llvm/Transforms/IPO/FunctionImport.h =================================================================== --- llvm/trunk/include/llvm/Transforms/IPO/FunctionImport.h +++ llvm/trunk/include/llvm/Transforms/IPO/FunctionImport.h @@ -33,11 +33,17 @@ /// based on the provided summary informations. class FunctionImporter { public: - /// Set of functions to import from a source module. Each entry is a map - /// containing all the functions to import for a source module. - /// The keys is the GUID identifying a function to import, and the value - /// is the threshold applied when deciding to import it. - using FunctionsToImportTy = std::map; + /// Set of functions to import from a source module. Each entry is a set + /// containing all the GUIDs of all functions to import for a source module. + using FunctionsToImportTy = std::unordered_set; + + /// Map of callee GUID considered for import into a given module to a pair + /// consisting of the largest threshold applied when deciding whether to + /// import it and, if we decided to import, a pointer to the summary instance + /// imported. If we decided not to import, the summary will be nullptr. + using ImportThresholdsTy = + DenseMap>; /// The map contains an entry for every module to import from, the key being /// the module identifier to pass to the ModuleLoader. The value is the set of Index: llvm/trunk/lib/LTO/LTO.cpp =================================================================== --- llvm/trunk/lib/LTO/LTO.cpp +++ llvm/trunk/lib/LTO/LTO.cpp @@ -156,7 +156,7 @@ AddUint64(Entry.second.size()); for (auto &Fn : Entry.second) - AddUint64(Fn.first); + AddUint64(Fn); } // Include the hash for the resolved ODR. @@ -221,7 +221,7 @@ // so we need to collect their used resolutions as well. for (auto &ImpM : ImportList) for (auto &ImpF : ImpM.second) - AddUsedThings(Index.findSummaryInModule(ImpF.first, ImpM.first())); + AddUsedThings(Index.findSummaryInModule(ImpF, ImpM.first())); auto AddTypeIdSummary = [&](StringRef TId, const TypeIdSummary &S) { AddString(TId); Index: llvm/trunk/lib/Transforms/IPO/FunctionImport.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/FunctionImport.cpp +++ llvm/trunk/lib/Transforms/IPO/FunctionImport.cpp @@ -262,7 +262,7 @@ !RefSummary->modulePath().empty() && !GlobalValue::isInterposableLinkage(RefSummary->linkage()) && RefSummary->refs().empty()) { - ImportList[RefSummary->modulePath()][VI.getGUID()] = 1; + ImportList[RefSummary->modulePath()].insert(VI.getGUID()); if (ExportLists) (*ExportLists)[RefSummary->modulePath()].insert(VI.getGUID()); break; @@ -278,7 +278,8 @@ const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries, SmallVectorImpl &Worklist, FunctionImporter::ImportMapTy &ImportList, - StringMap *ExportLists = nullptr) { + StringMap *ExportLists, + FunctionImporter::ImportThresholdsTy &ImportThresholds) { computeImportForReferencedGlobals(Summary, DefinedGVSummaries, ImportList, ExportLists); static int ImportCount = 0; @@ -315,19 +316,85 @@ const auto NewThreshold = Threshold * GetBonusMultiplier(Edge.second.getHotness()); - auto *CalleeSummary = selectCallee(Index, VI.getSummaryList(), NewThreshold, - Summary.modulePath()); - if (!CalleeSummary) { - LLVM_DEBUG( - dbgs() << "ignored! No qualifying callee with summary found.\n"); - continue; - } + auto IT = ImportThresholds.insert( + std::make_pair(VI.getGUID(), std::make_pair(NewThreshold, nullptr))); + bool PreviouslyVisited = !IT.second; + auto &ProcessedThreshold = IT.first->second.first; + auto &CalleeSummary = IT.first->second.second; + + const FunctionSummary *ResolvedCalleeSummary = nullptr; + if (CalleeSummary) { + assert(PreviouslyVisited); + // Since the traversal of the call graph is DFS, we can revisit a function + // a second time with a higher threshold. In this case, it is added back + // to the worklist with the new threshold (so that its own callee chains + // can be considered with the higher threshold). + if (NewThreshold <= ProcessedThreshold) { + LLVM_DEBUG( + dbgs() << "ignored! Target was already imported with Threshold " + << ProcessedThreshold << "\n"); + continue; + } + // Update with new larger threshold. + ProcessedThreshold = NewThreshold; + ResolvedCalleeSummary = cast(CalleeSummary); + } else { + // If we already rejected importing a callee at the same or higher + // threshold, don't waste time calling selectCallee. + if (PreviouslyVisited && NewThreshold <= ProcessedThreshold) { + LLVM_DEBUG( + dbgs() << "ignored! Target was already rejected with Threshold " + << ProcessedThreshold << "\n"); + continue; + } - // "Resolve" the summary - const auto *ResolvedCalleeSummary = cast(CalleeSummary->getBaseObject()); + CalleeSummary = selectCallee(Index, VI.getSummaryList(), NewThreshold, + Summary.modulePath()); + if (!CalleeSummary) { + // Update with new larger threshold if this was a retry (otherwise + // we would have already inserted with NewThreshold above). + if (PreviouslyVisited) + ProcessedThreshold = NewThreshold; + LLVM_DEBUG( + dbgs() << "ignored! No qualifying callee with summary found.\n"); + continue; + } - assert(ResolvedCalleeSummary->instCount() <= NewThreshold && - "selectCallee() didn't honor the threshold"); + // "Resolve" the summary + CalleeSummary = CalleeSummary->getBaseObject(); + ResolvedCalleeSummary = cast(CalleeSummary); + + assert(ResolvedCalleeSummary->instCount() <= NewThreshold && + "selectCallee() didn't honor the threshold"); + + auto ExportModulePath = ResolvedCalleeSummary->modulePath(); + auto ILI = ImportList[ExportModulePath].insert(VI.getGUID()); + // We previously decided to import this GUID definition if it was already + // inserted in the set of imports from the exporting module. + bool PreviouslyImported = !ILI.second; + + // Make exports in the source module. + if (ExportLists) { + auto &ExportList = (*ExportLists)[ExportModulePath]; + ExportList.insert(VI.getGUID()); + if (!PreviouslyImported) { + // This is the first time this function was exported from its source + // module, so mark all functions and globals it references as exported + // to the outside if they are defined in the same source module. + // For efficiency, we unconditionally add all the referenced GUIDs + // to the ExportList for this module, and will prune out any not + // defined in the module later in a single pass. + for (auto &Edge : ResolvedCalleeSummary->calls()) { + auto CalleeGUID = Edge.first.getGUID(); + ExportList.insert(CalleeGUID); + } + for (auto &Ref : ResolvedCalleeSummary->refs()) { + auto GUID = Ref.getGUID(); + ExportList.insert(GUID); + } + } + } + } auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) { // Adjust the threshold for next level of imported functions. @@ -342,44 +409,8 @@ Edge.second.getHotness() == CalleeInfo::HotnessType::Hot; const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite); - auto ExportModulePath = ResolvedCalleeSummary->modulePath(); - auto &ProcessedThreshold = ImportList[ExportModulePath][VI.getGUID()]; - /// Since the traversal of the call graph is DFS, we can revisit a function - /// a second time with a higher threshold. In this case, it is added back to - /// the worklist with the new threshold. - if (ProcessedThreshold && ProcessedThreshold >= AdjThreshold) { - LLVM_DEBUG(dbgs() << "ignored! Target was already seen with Threshold " - << ProcessedThreshold << "\n"); - continue; - } - bool PreviouslyImported = ProcessedThreshold != 0; - // Mark this function as imported in this module, with the current Threshold - ProcessedThreshold = AdjThreshold; - ImportCount++; - // Make exports in the source module. - if (ExportLists) { - auto &ExportList = (*ExportLists)[ExportModulePath]; - ExportList.insert(VI.getGUID()); - if (!PreviouslyImported) { - // This is the first time this function was exported from its source - // module, so mark all functions and globals it references as exported - // to the outside if they are defined in the same source module. - // For efficiency, we unconditionally add all the referenced GUIDs - // to the ExportList for this module, and will prune out any not - // defined in the module later in a single pass. - for (auto &Edge : ResolvedCalleeSummary->calls()) { - auto CalleeGUID = Edge.first.getGUID(); - ExportList.insert(CalleeGUID); - } - for (auto &Ref : ResolvedCalleeSummary->refs()) { - auto GUID = Ref.getGUID(); - ExportList.insert(GUID); - } - } - } - // Insert the newly imported function to the worklist. Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold, VI.getGUID()); } @@ -395,6 +426,7 @@ // Worklist contains the list of function imported in this module, for which // we will analyse the callees and may import further down the callgraph. SmallVector Worklist; + FunctionImporter::ImportThresholdsTy ImportThresholds; // Populate the worklist with the import for the functions in the current // module @@ -416,7 +448,7 @@ LLVM_DEBUG(dbgs() << "Initialize import for " << VI << "\n"); computeImportForFunction(*FuncSummary, Index, ImportInstrLimit, DefinedGVSummaries, Worklist, ImportList, - ExportLists); + ExportLists, ImportThresholds); } // Process the newly imported functions and add callees to the worklist. @@ -424,17 +456,10 @@ auto FuncInfo = Worklist.pop_back_val(); auto *Summary = std::get<0>(FuncInfo); auto Threshold = std::get<1>(FuncInfo); - auto GUID = std::get<2>(FuncInfo); - - // Check if we later added this summary with a higher threshold. - // If so, skip this entry. - auto ExportModulePath = Summary->modulePath(); - auto &LatestProcessedThreshold = ImportList[ExportModulePath][GUID]; - if (LatestProcessedThreshold > Threshold) - continue; computeImportForFunction(*Summary, Index, Threshold, DefinedGVSummaries, - Worklist, ImportList, ExportLists); + Worklist, ImportList, ExportLists, + ImportThresholds); } } @@ -451,11 +476,6 @@ static GlobalValue::GUID getGUID(GlobalValue::GUID G) { return G; } -static GlobalValue::GUID -getGUID(const std::pair &P) { - return P.first; -} - template static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index, T &Cont) { @@ -574,9 +594,8 @@ // e.g. record required linkage changes. if (Summary->modulePath() == ModulePath) continue; - // Doesn't matter what value we plug in to the map, just needs an entry - // to provoke importing by thinBackend. - ImportList[Summary->modulePath()][GUID] = 1; + // Add an entry to provoke importing by thinBackend. + ImportList[Summary->modulePath()].insert(GUID); } #ifndef NDEBUG dumpImportListForModule(Index, ModulePath, ImportList); @@ -698,10 +717,10 @@ const auto &DefinedGVSummaries = ModuleToDefinedGVSummaries.lookup(ILI.first()); for (auto &GI : ILI.second) { - const auto &DS = DefinedGVSummaries.find(GI.first); + const auto &DS = DefinedGVSummaries.find(GI); assert(DS != DefinedGVSummaries.end() && "Expected a defined summary for imported global value"); - SummariesForIndex[GI.first] = DS->second; + SummariesForIndex[GI] = DS->second; } } } Index: llvm/trunk/test/Transforms/FunctionImport/Inputs/funcimport_resolved1.ll =================================================================== --- llvm/trunk/test/Transforms/FunctionImport/Inputs/funcimport_resolved1.ll +++ llvm/trunk/test/Transforms/FunctionImport/Inputs/funcimport_resolved1.ll @@ -0,0 +1,34 @@ +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +define void @foo() { + call void @linkonceodrfunc() + call void @linkonceodrfunc2() + ret void +} + +define linkonce_odr void @linkonceodrfunc() { + call void @f() + call void @f() + call void @f() + call void @f() + call void @f() + call void @f() + call void @f() + ret void +} + +define linkonce_odr void @linkonceodrfunc2() { + call void @f() + call void @f() + call void @f() + call void @f() + call void @f() + call void @f() + call void @f() + ret void +} + +define internal void @f() { + ret void +} Index: llvm/trunk/test/Transforms/FunctionImport/Inputs/funcimport_resolved2.ll =================================================================== --- llvm/trunk/test/Transforms/FunctionImport/Inputs/funcimport_resolved2.ll +++ llvm/trunk/test/Transforms/FunctionImport/Inputs/funcimport_resolved2.ll @@ -0,0 +1,6 @@ +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +define linkonce_odr void @linkonceodrfunc() { + ret void +} Index: llvm/trunk/test/Transforms/FunctionImport/funcimport_resolved.ll =================================================================== --- llvm/trunk/test/Transforms/FunctionImport/funcimport_resolved.ll +++ llvm/trunk/test/Transforms/FunctionImport/funcimport_resolved.ll @@ -0,0 +1,50 @@ +; Test to ensure that we always select the same copy of a linkonce function +; when it is encountered with different thresholds. When we encounter the +; copy in funcimport_resolved1.ll with a higher threshold via the direct call +; from main(), it will be selected for importing. When we encounter it with a +; lower threshold by reaching it from the deeper call chain via foo(), it +; won't be selected for importing. We don't want to select both the copy from +; funcimport_resolved1.ll and the smaller one from funcimport_resolved2.ll, +; leaving it up to the backend to figure out which one to actually import. +; The linkonce_odr may have different instruction counts in practice due to +; different inlines in the compile step. + +; Require asserts so we can use -debug-only +; REQUIRES: asserts + +; RUN: opt -module-summary %s -o %t.bc +; RUN: opt -module-summary %p/Inputs/funcimport_resolved1.ll -o %t2.bc +; RUN: opt -module-summary %p/Inputs/funcimport_resolved2.ll -o %t3.bc + +; First do a sanity check that all callees are imported with the default +; instruction limit +; RUN: llvm-lto2 run %t.bc %t2.bc %t3.bc -o %t4 -r=%t.bc,_main,pl -r=%t.bc,_linkonceodrfunc,l -r=%t.bc,_foo,l -r=%t2.bc,_foo,pl -r=%t2.bc,_linkonceodrfunc,pl -r=%t2.bc,_linkonceodrfunc2,pl -r=%t3.bc,_linkonceodrfunc,l -thinlto-threads=1 -debug-only=function-import 2>&1 | FileCheck %s --check-prefix=INSTLIMDEFAULT +; INSTLIMDEFAULT: Is importing function {{.*}} foo from {{.*}}funcimport_resolved1.ll +; INSTLIMDEFAULT: Is importing function {{.*}} linkonceodrfunc from {{.*}}funcimport_resolved1.ll +; INSTLIMDEFAULT: Is importing function {{.*}} linkonceodrfunc2 from {{.*}}funcimport_resolved1.ll +; INSTLIMDEFAULT: Is importing function {{.*}} f from {{.*}}funcimport_resolved1.ll +; INSTLIMDEFAULT-NOT: Is importing function {{.*}} linkonceodrfunc from {{.*}}funcimport_resolved2.ll + +; Now run with the lower threshold that will only allow linkonceodrfunc to be +; imported from funcimport_resolved1.ll when encountered via the direct call +; from main(). Ensure we don't also select the copy in funcimport_resolved2.ll +; when it is encountered via the deeper call chain. +; RUN: llvm-lto2 run %t.bc %t2.bc %t3.bc -o %t4 -r=%t.bc,_main,pl -r=%t.bc,_linkonceodrfunc,l -r=%t.bc,_foo,l -r=%t2.bc,_foo,pl -r=%t2.bc,_linkonceodrfunc,pl -r=%t2.bc,_linkonceodrfunc2,pl -r=%t3.bc,_linkonceodrfunc,l -thinlto-threads=1 -debug-only=function-import -import-instr-limit=8 2>&1 | FileCheck %s --check-prefix=INSTLIM8 +; INSTLIM8: Is importing function {{.*}} foo from {{.*}}funcimport_resolved1.ll +; INSTLIM8: Is importing function {{.*}} linkonceodrfunc from {{.*}}funcimport_resolved1.ll +; INSTLIM8: Not importing function {{.*}} linkonceodrfunc2 from {{.*}}funcimport_resolved1.ll +; INSTLIM8: Is importing function {{.*}} f from {{.*}}funcimport_resolved1.ll +; INSTLIM8-NOT: Is importing function {{.*}} linkonceodrfunc from {{.*}}funcimport_resolved2.ll + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +define i32 @main() #0 { +entry: + call void (...) @foo() + call void (...) @linkonceodrfunc() + ret i32 0 +} + +declare void @foo(...) #1 +declare void @linkonceodrfunc(...) #1 Index: llvm/trunk/tools/llvm-link/llvm-link.cpp =================================================================== --- llvm/trunk/tools/llvm-link/llvm-link.cpp +++ llvm/trunk/tools/llvm-link/llvm-link.cpp @@ -262,7 +262,7 @@ errs() << "Importing " << FunctionName << " from " << FileName << "\n"; auto &Entry = ImportList[FileName]; - Entry.insert(std::make_pair(F->getGUID(), /* (Unused) threshold */ 1.0)); + Entry.insert(F->getGUID()); } auto CachedModuleLoader = [&](StringRef Identifier) { return ModuleLoaderCache.takeModule(Identifier);