Index: llvm/lib/LTO/LTO.cpp
===================================================================
--- llvm/lib/LTO/LTO.cpp
+++ llvm/lib/LTO/LTO.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 #include "llvm/Transforms/Utils/SplitModule.h"
 
+#include <numeric>
 #include <set>
 
 using namespace llvm;
@@ -743,15 +744,32 @@
   // ParallelCodeGenParallelismLevel if an LTO module is present, as tasks 0
   // through ParallelCodeGenParallelismLevel-1 are reserved for parallel code
   // generation partitions.
-  unsigned Task = RegularLTO.CombinedModule
-                      ? RegularLTO.ParallelCodeGenParallelismLevel
-                      : 0;
+  unsigned FirstTask = RegularLTO.CombinedModule
+                           ? RegularLTO.ParallelCodeGenParallelismLevel
+                           : 0;
   unsigned Partition = 1;
 
-  for (auto &Mod : ThinLTO.ModuleMap) {
-    if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first],
-                                     ExportLists[Mod.first],
-                                     ResolvedODR[Mod.first], ThinLTO.ModuleMap))
+  // Compute the ordering we will process the inputs: the rough heuristic here
+  // is to sort them per size so that the largest module get schedule as soon as
+  // possible. This is purely a compile-time optimization.
+  std::vector<unsigned> ModulesOrdering;
+  ModulesOrdering.resize(ThinLTO.ModuleMap.size());
+  std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), FirstTask);
+  std::sort(
+      ModulesOrdering.begin(), ModulesOrdering.end(),
+      [&](int LeftIndex, int RightIndex) {
+        auto LSize =
+            (ThinLTO.ModuleMap.begin() + LeftIndex)->second.getBufferSize();
+        auto RSize =
+            (ThinLTO.ModuleMap.begin() + RightIndex)->second.getBufferSize();
+        return LSize > RSize;
+      });
+
+  for (auto &Task : ModulesOrdering) {
+    auto Mod = ThinLTO.ModuleMap.begin() + Task;
+    if (Error E = BackendProc->start(
+            Task, Mod->second, ImportLists[Mod->first], ExportLists[Mod->first],
+            ResolvedODR[Mod->first], ThinLTO.ModuleMap))
       return E;
 
     ++Task;