Index: lib/LTO/LTO.cpp =================================================================== --- lib/LTO/LTO.cpp +++ lib/LTO/LTO.cpp @@ -47,6 +47,7 @@ #include "llvm/Transforms/Utils/FunctionImportUtils.h" #include "llvm/Transforms/Utils/SplitModule.h" +#include #include using namespace llvm; @@ -1295,17 +1296,30 @@ ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, AddStream, Cache); - // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for combined - // module and parallel code generation partitions. - unsigned Task = RegularLTO.ParallelCodeGenParallelismLevel; - for (auto &Mod : ThinLTO.ModuleMap) { - if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], - ResolvedODR[Mod.first], ThinLTO.ModuleMap)) + // Compute the ordering we will process the inputs: the rough heuristic here + // is to sort them per size so that the largest module get schedule as soon as + // possible. This is purely a compile-time optimization. + std::vector ModulesOrdering; + ModulesOrdering.resize(ThinLTO.ModuleMap.size()); + std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0); + auto ModuleIter = ThinLTO.ModuleMap.begin(); + llvm::sort(ModulesOrdering, [ModuleIter](int LeftIndex, int RightIndex) { + auto LSize = ModuleIter[LeftIndex].second.getBuffer().size(); + auto RSize = ModuleIter[RightIndex].second.getBuffer().size(); + return LSize > RSize; + }); + + // Task numbers start at ParallelCodeGenParallelismLevel if an LTO + // module is present, as tasks 0 through ParallelCodeGenParallelismLevel-1 + // are reserved for parallel code generation partitions. + unsigned FirstTask = RegularLTO.ParallelCodeGenParallelismLevel; + for (unsigned Task : ModulesOrdering) { + auto &Mod = ModuleIter[Task]; + if (Error E = BackendProc->start( + FirstTask + Task, Mod.second, ImportLists[Mod.first], + ExportLists[Mod.first], ResolvedODR[Mod.first], ThinLTO.ModuleMap)) return E; - ++Task; } - return BackendProc->wait(); }