Index: lib/LTO/LTO.cpp =================================================================== --- lib/LTO/LTO.cpp +++ lib/LTO/LTO.cpp @@ -1295,15 +1295,33 @@ ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, AddStream, Cache); - // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for combined - // module and parallel code generation partitions. + // Task numbers start at ParallelCodeGenParallelismLevel if an LTO + // module is present, as tasks 0 through ParallelCodeGenParallelismLevel-1 + // are reserved for parallel code generation partitions. unsigned Task = RegularLTO.ParallelCodeGenParallelismLevel; - for (auto &Mod : ThinLTO.ModuleMap) { + + // Use buffer size as a proxy for expected compile time and sorted + // modules in terms of decreasing size to provide better compile-time + // load-balance. + using ModuleMapElt = std::pair; + using ModuleVecElt = std::pair; + std::vector ModulesSortedBySize; + ModulesSortedBySize.reserve(ThinLTO.ModuleMap.size()); + for (auto &Mod : ThinLTO.ModuleMap) + ModulesSortedBySize.emplace_back(Task++, &Mod); + std::sort(ModulesSortedBySize.begin(), ModulesSortedBySize.end(), + [](const ModuleVecElt &Left, const ModuleVecElt &Right) { + return Left.second->second.getBuffer().size() > + Right.second->second.getBuffer().size(); + }); + + for (auto Pair : ModulesSortedBySize) { + unsigned Task = Pair.first; + const ModuleMapElt &Mod = *Pair.second; if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first], ExportLists[Mod.first], ResolvedODR[Mod.first], ThinLTO.ModuleMap)) return E; - ++Task; } return BackendProc->wait();