Index: lib/LTO/LTO.cpp
===================================================================
--- lib/LTO/LTO.cpp
+++ lib/LTO/LTO.cpp
@@ -47,6 +47,7 @@
 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
 #include "llvm/Transforms/Utils/SplitModule.h"
 
+#include <numeric>
 #include <set>
 
 using namespace llvm;
@@ -1295,17 +1296,30 @@
       ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
                       AddStream, Cache);
 
-  // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for combined
-  // module and parallel code generation partitions.
-  unsigned Task = RegularLTO.ParallelCodeGenParallelismLevel;
-  for (auto &Mod : ThinLTO.ModuleMap) {
-    if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first],
-                                     ExportLists[Mod.first],
-                                     ResolvedODR[Mod.first], ThinLTO.ModuleMap))
+  // Compute the ordering we will process the inputs: the rough heuristic here
+  // is to sort them per size so that the largest module get schedule as soon as
+  // possible. This is purely a compile-time optimization.
+  std::vector<int> ModulesOrdering;
+  ModulesOrdering.resize(ThinLTO.ModuleMap.size());
+  std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
+  auto ModuleIter = ThinLTO.ModuleMap.begin();
+  llvm::sort(ModulesOrdering, [ModuleIter](int LeftIndex, int RightIndex) {
+    auto LSize = ModuleIter[LeftIndex].second.getBuffer().size();
+    auto RSize = ModuleIter[RightIndex].second.getBuffer().size();
+    return LSize > RSize;
+  });
+
+  // Task numbers start at ParallelCodeGenParallelismLevel if an LTO
+  // module is present, as tasks 0 through ParallelCodeGenParallelismLevel-1
+  // are reserved for parallel code generation partitions.
+  unsigned FirstTask = RegularLTO.ParallelCodeGenParallelismLevel;
+  for (unsigned Task : ModulesOrdering) {
+    auto &Mod = ModuleIter[Task];
+    if (Error E = BackendProc->start(
+            FirstTask + Task, Mod.second, ImportLists[Mod.first],
+            ExportLists[Mod.first], ResolvedODR[Mod.first], ThinLTO.ModuleMap))
       return E;
-    ++Task;
   }
-
   return BackendProc->wait();
 }