diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -91,6 +91,10 @@ Expected> setupStatsFile(StringRef StatsFilename); +/// Produce a re-ordered container for optimal multi-threaded processing. +/// Returns indices to elements into the input array. +std::vector generateModulesOrdering(ArrayRef R); + class LTO; struct SymbolResolution; class ThinBackendProc; diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1443,10 +1443,17 @@ auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; + std::vector ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for (auto &Mod : ModuleMap) + ModulesVec.push_back(&Mod.second); + std::vector ModulesOrdering = generateModulesOrdering(ModulesVec); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for combined // module and parallel code generation partitions. unsigned Task = RegularLTO.ParallelCodeGenParallelismLevel; - for (auto &Mod : ModuleMap) { + for (auto IndexCount : ModulesOrdering) { + auto &Mod = *(ModuleMap.begin() + IndexCount); if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first], ExportLists[Mod.first], ResolvedODR[Mod.first], ThinLTO.ModuleMap)) @@ -1495,3 +1502,18 @@ StatsFile->keep(); return std::move(StatsFile); } + +// Compute the ordering we will process the inputs: the rough heuristic here +// is to sort them per size so that the largest module get schedule as soon as +// possible. This is purely a compile-time optimization. +std::vector lto::generateModulesOrdering(ArrayRef R) { + std::vector ModulesOrdering; + ModulesOrdering.resize(R.size()); + std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0); + llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) { + auto LSize = R[LeftIndex]->getBuffer().size(); + auto RSize = R[RightIndex]->getBuffer().size(); + return LSize > RSize; + }); + return ModulesOrdering; +} diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -1054,19 +1054,11 @@ ModuleToDefinedGVSummaries[ModuleIdentifier]; } - // Compute the ordering we will process the inputs: the rough heuristic here - // is to sort them per size so that the largest module get schedule as soon as - // possible. This is purely a compile-time optimization. - std::vector ModulesOrdering; - ModulesOrdering.resize(Modules.size()); - std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0); - llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) { - auto LSize = - Modules[LeftIndex]->getSingleBitcodeModule().getBuffer().size(); - auto RSize = - Modules[RightIndex]->getSingleBitcodeModule().getBuffer().size(); - return LSize > RSize; - }); + std::vector ModulesVec; + ModulesVec.reserve(Modules.size()); + for (auto &Mod : Modules) + ModulesVec.push_back(&Mod->getSingleBitcodeModule()); + std::vector ModulesOrdering = lto::generateModulesOrdering(ModulesVec); // Parallel optimizer + codegen {