Index: llvm/lib/Transforms/Scalar/LICM.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LICM.cpp
+++ llvm/lib/Transforms/Scalar/LICM.cpp
@@ -96,6 +96,10 @@
     DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
                      cl::desc("Disable memory promotion in LICM pass"));
 
+static cl::opt<unsigned> MaxNumPromotions(
+    "licm-max-num-promotions", cl::Hidden, cl::init(0),
+    cl::desc("The maximum number of memory instructions to be hoisted"));
+
 static cl::opt<bool> ControlFlowHoisting(
     "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
     cl::desc("Enable control flow (and PHI) hoisting in LICM"));
@@ -328,6 +332,20 @@
       }
 }
 
+static void SetMaxNumPromotions(TargetTransformInfo *TTI) {
+  // TODO: if the maximum number of promotions isn't user specified,
+  // we set it to number of available scalar registers. This is a very
+  // first step to not let the register pressure explode for some cases.
+  // For now, some instructions are not considered for hoisting when
+  // the maximum is reached, but better would be to come up with a
+  // register pressure estimator.
+  if (!MaxNumPromotions)
+    MaxNumPromotions =
+        TTI->getNumberOfRegisters(TTI->getRegisterClassForType(false));
+  LLVM_DEBUG(dbgs() << "LICM: Using promotion maximum: " <<
+             MaxNumPromotions << "\n");
+}
+
 /// Hoist expressions out of the specified loop. Note, alias info for inner
 /// loop is not preserved so it is not a good idea to run LICM multiple
 /// times on one loop.
@@ -345,6 +363,8 @@
     return false;
   }
 
+  SetMaxNumPromotions(TTI);
+
   std::unique_ptr<AliasSetTracker> CurAST;
   std::unique_ptr<MemorySSAUpdater> MSSAU;
   std::unique_ptr<SinkAndHoistLICMFlags> Flags;
@@ -425,6 +445,7 @@
       if (!CurAST.get())
         CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get());
 
+      unsigned NumPromoted = 0;
       // Loop over all of the alias sets in the tracker object.
       for (AliasSet &AS : *CurAST) {
         // We can promote this alias set if it has a store, if it is a "Must"
@@ -442,9 +463,17 @@
         for (const auto &ASI : AS)
           PointerMustAliases.insert(ASI.getValue());
 
-        Promoted |= promoteLoopAccessesToScalars(
+        if (NumPromoted == MaxNumPromotions) {
+          LLVM_DEBUG(dbgs() << "LICM: maximum reached, not hoisting: \n");
+          break;
+        }
+
+        if (promoteLoopAccessesToScalars(
             PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
-            DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
+            DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE)) {
+          Promoted = true;
+          NumPromoted++;
+        }
       }
 
       // Once we have promoted values across the loop body we have to
@@ -2158,6 +2187,7 @@
            << "Moving accesses to memory location out of the loop";
   });
   ++NumPromoted;
+  ++NumHoisted;
 
   // Look at all the loop uses, and try to merge their locations.
   std::vector<const DILocation *> LoopUsesLocs;
Index: llvm/test/Transforms/LICM/max-num-hoist.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LICM/max-num-hoist.ll
@@ -0,0 +1,96 @@
+; RUN: opt -S -licm  < %s | FileCheck %s --check-prefix=DEFAULT8
+; RUN: opt -S -licm -licm-max-num-promotions=2 < %s | FileCheck %s --check-prefix=MAX2
+
+target datalayout = "E-m:e-p:32:32-i8:8:8-i16:16:16-i64:32:32-f64:32:32-v64:32:32-v128:32:32-a0:0:32-n32"
+
+@test.F = internal global [8 x [8 x i64]] zeroinitializer, align 8
+@test.f = internal global [8 x [8 x i8]] zeroinitializer, align 1
+
+; MAX2:      @test
+; MAX2:      entry:
+; MAX2-NEXT:   %A = alloca [8 x [8 x i64]], align 8
+; MAX2-NEXT:   %i = bitcast [8 x [8 x i64]]* %A to i8*
+; MAX2-NEXT:   %.promoted = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 0), align 8
+; MAX2-NEXT:   %.promoted2 = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 1), align 8
+; MAX2-NEXT:   br label %for.cond16.preheader
+
+; DEFAULT8:      @test
+; DEFAULT8:      entry:
+; DEFAULT8-NEXT:   %A = alloca [8 x [8 x i64]], align 8
+; DEFAULT8-NEXT:   %i = bitcast [8 x [8 x i64]]* %A to i8*
+; DEFAULT8-NEXT:   %.promoted = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 0), align 8
+; DEFAULT8-NEXT:   %.promoted2 = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 1), align 8
+; DEFAULT8-NEXT:   %.promoted4 = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 2), align 8
+; DEFAULT8-NEXT:   %.promoted6 = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 3), align 8
+; DEFAULT8-NEXT:   %.promoted8 = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 4), align 8
+; DEFAULT8-NEXT:   %.promoted10 = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 5), align 8
+; DEFAULT8-NEXT:   %.promoted12 = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 6), align 8
+; DEFAULT8-NEXT:   %.promoted14 = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 7), align 8
+; DEFAULT8-NEXT:   br label %for.cond16.preheader
+
+define void @test() {
+entry:
+  %A = alloca [8 x [8 x i64]], align 8
+  %i = bitcast [8 x [8 x i64]]* %A to i8*
+  br label %for.cond16.preheader
+
+for.cond16.preheader:                             ; preds = %for.cond16.preheader, %for.cond13.preheader
+  %storemerge3844 = phi i64 [ 0, %entry ], [ %inc35, %for.cond16.preheader ]
+  %arrayidx25 = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* %A, i64 0, i64 %storemerge3844, i64 0
+  %arrayidx23 = getelementptr inbounds [8 x [8 x i8]], [8 x [8 x i8]]* @test.f, i64 0, i64 0, i64 %storemerge3844
+  %i7 = load i8, i8* %arrayidx23, align 1
+  %conv = sext i8 %i7 to i64
+  %i8 = load i64, i64* %arrayidx25, align 8
+  %mul = mul nsw i64 %i8, %conv
+  %i9 = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 0), align 8
+  %add = add nsw i64 %i9, %mul
+  store i64 %add, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 0), align 8
+  %arrayidx25.1 = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* %A, i64 0, i64 %storemerge3844, i64 1
+  %i10 = load i64, i64* %arrayidx25.1, align 8
+  %mul.1 = mul nsw i64 %i10, %conv
+  %i11 = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 1), align 8
+  %add.1 = add nsw i64 %i11, %mul.1
+  store i64 %add.1, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 1), align 8
+  %arrayidx25.2 = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* %A, i64 0, i64 %storemerge3844, i64 2
+  %i12 = load i64, i64* %arrayidx25.2, align 8
+  %mul.2 = mul nsw i64 %i12, %conv
+  %i13 = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 2), align 8
+  %add.2 = add nsw i64 %i13, %mul.2
+  store i64 %add.2, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 2), align 8
+  %arrayidx25.3 = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* %A, i64 0, i64 %storemerge3844, i64 3
+  %i14 = load i64, i64* %arrayidx25.3, align 8
+  %mul.3 = mul nsw i64 %i14, %conv
+  %i15 = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 3), align 8
+  %add.3 = add nsw i64 %i15, %mul.3
+  store i64 %add.3, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 3), align 8
+  %arrayidx25.4 = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* %A, i64 0, i64 %storemerge3844, i64 4
+  %i16 = load i64, i64* %arrayidx25.4, align 8
+  %mul.4 = mul nsw i64 %i16, %conv
+  %i17 = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 4), align 8
+  %add.4 = add nsw i64 %i17, %mul.4
+  store i64 %add.4, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 4), align 8
+  %arrayidx25.5 = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* %A, i64 0, i64 %storemerge3844, i64 5
+  %i18 = load i64, i64* %arrayidx25.5, align 8
+  %mul.5 = mul nsw i64 %i18, %conv
+  %i19 = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 5), align 8
+  %add.5 = add nsw i64 %i19, %mul.5
+  store i64 %add.5, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 5), align 8
+  %arrayidx25.6 = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* %A, i64 0, i64 %storemerge3844, i64 6
+  %i20 = load i64, i64* %arrayidx25.6, align 8
+  %mul.6 = mul nsw i64 %i20, %conv
+  %i21 = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 6), align 8
+  %add.6 = add nsw i64 %i21, %mul.6
+  store i64 %add.6, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 6), align 8
+  %arrayidx25.7 = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* %A, i64 0, i64 %storemerge3844, i64 7
+  %i22 = load i64, i64* %arrayidx25.7, align 8
+  %mul.7 = mul nsw i64 %i22, %conv
+  %i23 = load i64, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 7), align 8
+  %add.7 = add nsw i64 %i23, %mul.7
+  store i64 %add.7, i64* getelementptr inbounds ([8 x [8 x i64]], [8 x [8 x i64]]* @test.F, i64 0, i64 0, i64 7), align 8
+  %inc35 = add nuw nsw i64 %storemerge3844, 1
+  %exitcond47.not = icmp eq i64 %inc35, 8
+  br i1 %exitcond47.not, label %for.end36, label %for.cond16.preheader
+
+for.end36:
+  ret void
+}