Index: llvm/lib/Transforms/Scalar/LICM.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LICM.cpp
+++ llvm/lib/Transforms/Scalar/LICM.cpp
@@ -96,6 +96,10 @@
     DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
                      cl::desc("Disable memory promotion in LICM pass"));
 
+static cl::opt<unsigned> MaxNumPromotions(
+    "licm-max-num-promotions", cl::Hidden, cl::init(0),
+    cl::desc("The maximum number of memory instructions to be hoisted"));
+
 static cl::opt<bool> ControlFlowHoisting(
     "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
     cl::desc("Enable control flow (and PHI) hoisting in LICM"));
@@ -328,6 +332,20 @@
       }
 }
 
+static void SetMaxNumPromotions(TargetTransformInfo *TTI) {
+  // TODO: if the maximum number of promotions isn't user specified,
+  // we set it to number of available scalar registers. This is a very
+  // first step to not let the register pressure explode for some cases.
+  // For now, some instructions are not considered for hoisting when
+  // the maximum is reached, but better would be to come up with a
+  // register pressure estimator.
+  if (!MaxNumPromotions)
+    MaxNumPromotions =
+        TTI->getNumberOfRegisters(TTI->getRegisterClassForType(false));
+  LLVM_DEBUG(dbgs() << "LICM: Using promotion maximum: " <<
+             MaxNumPromotions << "\n");
+}
+
 /// Hoist expressions out of the specified loop. Note, alias info for inner
 /// loop is not preserved so it is not a good idea to run LICM multiple
 /// times on one loop.
@@ -345,6 +363,8 @@
     return false;
   }
 
+  SetMaxNumPromotions(TTI);
+
   std::unique_ptr<AliasSetTracker> CurAST;
   std::unique_ptr<MemorySSAUpdater> MSSAU;
   std::unique_ptr<SinkAndHoistLICMFlags> Flags;
@@ -838,6 +858,7 @@
   LoopBlocksRPO Worklist(CurLoop);
   Worklist.perform(LI);
   bool Changed = false;
+  unsigned Hoisted = 0;
   for (BasicBlock *BB : Worklist) {
     // Only need to process the contents of this block if it is not part of a
     // subloop (which would already have been processed).
@@ -863,6 +884,11 @@
         continue;
       }
 
+      if (Hoisted == MaxNumPromotions) {
+        LLVM_DEBUG(dbgs() << "LICM: maximum reached, not hoisting: "; I.dump());
+        continue;
+      }
+
       // Try hoisting the instruction out to the preheader.  We can only do
       // this if all of the operands of the instruction are loop invariant and
       // if it is safe to hoist the instruction. We also check block frequency
@@ -881,6 +907,7 @@
               MSSAU, SE, ORE);
         HoistedInstructions.push_back(&I);
         Changed = true;
+        Hoisted++;
         continue;
       }
 
@@ -907,6 +934,7 @@
               SafetyInfo, MSSAU, SE, ORE);
         HoistedInstructions.push_back(ReciprocalDivisor);
         Changed = true;
+        Hoisted++;
         continue;
       }
 
@@ -926,6 +954,7 @@
               MSSAU, SE, ORE);
         HoistedInstructions.push_back(&I);
         Changed = true;
+        Hoisted++;
         continue;
       }
 
@@ -940,6 +969,7 @@
                 MSSAU, SE, ORE);
           assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
           Changed = true;
+          Hoisted++;
           continue;
         }
       }
Index: llvm/test/Transforms/LICM/hoist-round.ll
===================================================================
--- llvm/test/Transforms/LICM/hoist-round.ll
+++ llvm/test/Transforms/LICM/hoist-round.ll
@@ -1,6 +1,8 @@
-; RUN: opt -S -licm < %s | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s
-; RUN: opt -S -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -licm  < %s | FileCheck %s --check-prefix=DEFAULT
+; RUN: opt -S -licm -licm-max-num-promotions=14 < %s | FileCheck %s --check-prefix=MAX
+; RUN: opt -licm-max-num-promotions=14 -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(licm)' -S %s | FileCheck %s --check-prefix=MAX
+; RUN: opt -S -licm -licm-max-num-promotions=14 -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s --check-prefix=MAX
 
 target datalayout = "E-m:e-p:32:32-i8:8:8-i16:16:16-i64:32:32-f64:32:32-v64:32:32-v128:32:32-a0:0:32-n32"
 
@@ -8,22 +10,63 @@
 ; copysign, minnum, maxnum, minimum, maximum, and fabs intrinsics are
 ; considered safe to speculate.
 
-; CHECK-LABEL: @test
-; CHECK: call float @llvm.ceil.f32
-; CHECK: call float @llvm.floor.f32
-; CHECK: call float @llvm.nearbyint.f32
-; CHECK: call float @llvm.rint.f32
-; CHECK: call float @llvm.round.f32
-; CHECK: call float @llvm.trunc.f32
-; CHECK: call float @llvm.fabs.f32
-; CHECK: call float @llvm.copysign.f32
-; CHECK: call float @llvm.minnum.f32
-; CHECK: call float @llvm.maxnum.f32
-; CHECK: call float @llvm.powi.f32
-; CHECK: call float @llvm.roundeven.f32
-; CHECK: for.body:
-
 define void @test(float %arg1, float %arg2) {
+; DEFAULT-LABEL: @test(
+; DEFAULT-NEXT:  entry:
+; DEFAULT-NEXT:    [[TMP_1:%.*]] = call float @llvm.ceil.f32(float [[ARG1:%.*]])
+; DEFAULT-NEXT:    [[TMP_2:%.*]] = call float @llvm.floor.f32(float [[TMP_1]])
+; DEFAULT-NEXT:    [[TMP_3:%.*]] = call float @llvm.nearbyint.f32(float [[TMP_2]])
+; DEFAULT-NEXT:    [[TMP_4:%.*]] = call float @llvm.rint.f32(float [[TMP_3]])
+; DEFAULT-NEXT:    [[TMP_5:%.*]] = call float @llvm.round.f32(float [[TMP_4]])
+; DEFAULT-NEXT:    [[TMP_6:%.*]] = call float @llvm.trunc.f32(float [[TMP_5]])
+; DEFAULT-NEXT:    [[TMP_7:%.*]] = call float @llvm.fabs.f32(float [[TMP_6]])
+; DEFAULT-NEXT:    [[TMP_8:%.*]] = call float @llvm.copysign.f32(float [[TMP_7]], float [[ARG2:%.*]])
+; DEFAULT-NEXT:    br label [[FOR_HEAD:%.*]]
+; DEFAULT:       for.head:
+; DEFAULT-NEXT:    [[IND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IND_NEW:%.*]], [[FOR_BODY:%.*]] ]
+; DEFAULT-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IND]], 10
+; DEFAULT-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; DEFAULT:       for.body:
+; DEFAULT-NEXT:    [[TMP_9:%.*]] = call float @llvm.minnum.f32(float [[TMP_8]], float [[ARG2]])
+; DEFAULT-NEXT:    [[TMP_10:%.*]] = call float @llvm.maxnum.f32(float [[TMP_9]], float [[ARG2]])
+; DEFAULT-NEXT:    [[TMP_11:%.*]] = call float @llvm.minimum.f32(float [[TMP_10]], float [[ARG2]])
+; DEFAULT-NEXT:    [[TMP_12:%.*]] = call float @llvm.maximum.f32(float [[TMP_11]], float [[ARG2]])
+; DEFAULT-NEXT:    [[TMP_13:%.*]] = call float @llvm.powi.f32(float [[TMP_12]], i32 4)
+; DEFAULT-NEXT:    [[TMP_14:%.*]] = call float @llvm.roundeven.f32(float [[TMP_13]])
+; DEFAULT-NEXT:    call void @consume(float [[TMP_14]])
+; DEFAULT-NEXT:    [[IND_NEW]] = add i32 [[IND]], 1
+; DEFAULT-NEXT:    br label [[FOR_HEAD]]
+; DEFAULT:       exit:
+; DEFAULT-NEXT:    ret void
+;
+; MAX-LABEL: @test(
+; MAX-NEXT:  entry:
+; MAX-NEXT:    [[TMP_1:%.*]] = call float @llvm.ceil.f32(float [[ARG1:%.*]])
+; MAX-NEXT:    [[TMP_2:%.*]] = call float @llvm.floor.f32(float [[TMP_1]])
+; MAX-NEXT:    [[TMP_3:%.*]] = call float @llvm.nearbyint.f32(float [[TMP_2]])
+; MAX-NEXT:    [[TMP_4:%.*]] = call float @llvm.rint.f32(float [[TMP_3]])
+; MAX-NEXT:    [[TMP_5:%.*]] = call float @llvm.round.f32(float [[TMP_4]])
+; MAX-NEXT:    [[TMP_6:%.*]] = call float @llvm.trunc.f32(float [[TMP_5]])
+; MAX-NEXT:    [[TMP_7:%.*]] = call float @llvm.fabs.f32(float [[TMP_6]])
+; MAX-NEXT:    [[TMP_8:%.*]] = call float @llvm.copysign.f32(float [[TMP_7]], float [[ARG2:%.*]])
+; MAX-NEXT:    [[TMP_9:%.*]] = call float @llvm.minnum.f32(float [[TMP_8]], float [[ARG2]])
+; MAX-NEXT:    [[TMP_10:%.*]] = call float @llvm.maxnum.f32(float [[TMP_9]], float [[ARG2]])
+; MAX-NEXT:    [[TMP_11:%.*]] = call float @llvm.minimum.f32(float [[TMP_10]], float [[ARG2]])
+; MAX-NEXT:    [[TMP_12:%.*]] = call float @llvm.maximum.f32(float [[TMP_11]], float [[ARG2]])
+; MAX-NEXT:    [[TMP_13:%.*]] = call float @llvm.powi.f32(float [[TMP_12]], i32 4)
+; MAX-NEXT:    [[TMP_14:%.*]] = call float @llvm.roundeven.f32(float [[TMP_13]])
+; MAX-NEXT:    br label [[FOR_HEAD:%.*]]
+; MAX:       for.head:
+; MAX-NEXT:    [[IND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IND_NEW:%.*]], [[FOR_BODY:%.*]] ]
+; MAX-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IND]], 10
+; MAX-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; MAX:       for.body:
+; MAX-NEXT:    call void @consume(float [[TMP_14]])
+; MAX-NEXT:    [[IND_NEW]] = add i32 [[IND]], 1
+; MAX-NEXT:    br label [[FOR_HEAD]]
+; MAX:       exit:
+; MAX-NEXT:    ret void
+;
 entry:
   br label %for.head