Index: llvm/include/llvm/Transforms/Scalar/GVN.h
===================================================================
--- llvm/include/llvm/Transforms/Scalar/GVN.h
+++ llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -370,7 +370,7 @@
   void replaceInstruction(Instruction *I, Instruction *Repl);
   std::pair<bool, bool> hoistPair(BasicBlock *DestBB, BasicBlock *ThenBB,
                                   BasicBlock *ElseBB, Instruction *ThenI,
-                                  Instruction *ElseI);
+                                  Instruction *ElseI, uint32_t DepLength);
   Value *findLeader(const BasicBlock *BB, uint32_t num);
   void cleanupGlobalSets();
   void verifyRemoved(const Instruction *I) const;
Index: llvm/lib/Transforms/Scalar/GVN.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/GVN.cpp
+++ llvm/lib/Transforms/Scalar/GVN.cpp
@@ -128,6 +128,13 @@
 
 static cl::opt<bool> GVNEnableSimpleGVNHoist("enable-simple-gvn-hoist",
                                              cl::init(true));
+static cl::opt<uint32_t> MaxBlockDepth(
+    "simple-gvn-hoist-max-block-depth", cl::Hidden, cl::init(100),
+    cl::desc("Limit hoist candidates to the first N instructions in a block"));
+
+static cl::opt<uint32_t> MaxChainLength(
+    "simple-gvn-hoist-max-chain-length", cl::Hidden, cl::init(10),
+    cl::desc("Limit the length of dependency chains"));
 
 struct llvm::GVN::Expression {
   uint32_t opcode;
@@ -3120,8 +3127,11 @@
 }
 
 void GVN::collectHoistCandidates(BasicBlock *BB) {
+  uint32_t Depth = 0;
   bool HasHoistBarrier = false;
   for (Instruction &I : *BB) {
+    if (++Depth > MaxBlockDepth)
+      break;
     if (I.isTerminator())
       break;
     if (isa<PHINode>(I))
@@ -3137,8 +3147,11 @@
 }
 
 void GVN::matchHoistCandidates(BasicBlock *BB) {
+  uint32_t Depth = 0;
   bool HasHoistBarrier = false;
   for (Instruction &I : *BB) {
+    if (++Depth > MaxBlockDepth)
+      break;
     if (I.isTerminator())
       break;
     if (isa<PHINode>(I))
@@ -3198,11 +3211,15 @@
 // block.
 std::pair<bool, bool> GVN::hoistPair(BasicBlock *DestBB, BasicBlock *ThenBB,
                                      BasicBlock *ElseBB, Instruction *ThenI,
-                                     Instruction *ElseI) {
+                                     Instruction *ElseI, uint32_t DepLength) {
   // If the instruction is moved out of the "then" block there's nothing to do.
   if (ThenI->getParent() != ThenBB)
     return {false, false};
 
+  // Check if we have exceeded dependency chain length limit.
+  if (DepLength > MaxChainLength)
+    return {false, true};
+
   // Instruction must have already been selected for hoisting and matched with
   // another instruction.
   auto It = HoistPairs.find(VN.lookupOrAdd(ThenI));
@@ -3228,7 +3245,7 @@
       continue;
     bool OpChange, CantHoist;
     std::tie(OpChange, CantHoist) =
-        hoistPair(DestBB, ThenBB, ElseBB, Op, nullptr);
+        hoistPair(DestBB, ThenBB, ElseBB, Op, nullptr, DepLength + 1);
     Change |= OpChange;
     if (CantHoist)
       return {Change, true};
@@ -3250,9 +3267,9 @@
     MemoryAccess *ThenMA = Walker->getClobberingMemoryAccess(ThenI);
     while (!MSSA->isLiveOnEntryDef(ThenMA) && ThenMA->getBlock() == ThenBB) {
       bool DepChange, CantHoist;
-      std::tie(DepChange, CantHoist) =
-          hoistPair(DestBB, ThenBB, ElseBB,
-                    cast<MemoryDef>(ThenMA)->getMemoryInst(), nullptr);
+      std::tie(DepChange, CantHoist) = hoistPair(
+          DestBB, ThenBB, ElseBB, cast<MemoryDef>(ThenMA)->getMemoryInst(),
+          nullptr, DepLength + 1);
       Change |= DepChange;
       if (CantHoist)
         return {Change, true};
@@ -3283,7 +3300,7 @@
       for (Instruction *I : MemoryUses) {
         bool DepChange, CantHoist;
         std::tie(DepChange, CantHoist) =
-            hoistPair(DestBB, ThenBB, ElseBB, I, nullptr);
+            hoistPair(DestBB, ThenBB, ElseBB, I, nullptr, DepLength + 1);
         Change |= DepChange;
         if (CantHoist)
           return {Change, true};
@@ -3351,8 +3368,12 @@
     // Hoist matched pairs.
     for (const auto &P : HoistPairs) {
       bool LocalChange, _;
-      std::tie(LocalChange, _) =
-          hoistPair(BB, Then, Else, P.second.first, P.second.second);
+      Instruction *ThenI = P.second.first, *ElseI = P.second.second;
+      // Don't initiate hoisting from GEPs (they still can be hoisted by their
+      // users).
+      if (isa<GetElementPtrInst>(ThenI))
+        continue;
+      std::tie(LocalChange, _) = hoistPair(BB, Then, Else, ThenI, ElseI, 0);
       Change |= LocalChange;
     }
   }
Index: llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
===================================================================
--- llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
+++ llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
@@ -163,16 +163,16 @@
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4, !alias.scope !8
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 20, i32 20, i32 20, i32 20>
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], align 4, !alias.scope !11
-; CHECK-NEXT:    [[TMP8:%.*]] = fmul <4 x float> [[WIDE_LOAD14]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP5]] to <4 x float>*
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4, !alias.scope !11
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul <4 x float> [[WIDE_LOAD14]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
 ; CHECK-NEXT:    [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4, !alias.scope !13, !noalias !15
-; CHECK-NEXT:    [[TMP10:%.*]] = fadd <4 x float> [[TMP8]], [[WIDE_LOAD15]]
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP8]], <4 x float> [[TMP10]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP5]] to <4 x float>*
+; CHECK-NEXT:    [[TMP10:%.*]] = fadd <4 x float> [[TMP7]], [[WIDE_LOAD15]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP7]], <4 x float> [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
 ; CHECK-NEXT:    store <4 x float> [[PREDPHI]], <4 x float>* [[TMP11]], align 4, !alias.scope !13, !noalias !15
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
@@ -182,18 +182,18 @@
 ; CHECK-NEXT:    [[C_GEP:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[IV1]]
 ; CHECK-NEXT:    [[C_LV:%.*]] = load i32, i32* [[C_GEP]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C_LV]], 20
-; CHECK-NEXT:    [[B_GEP_0:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IV1]]
 ; CHECK-NEXT:    [[A_GEP_0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV1]]
 ; CHECK-NEXT:    [[A_LV_0:%.*]] = load float, float* [[A_GEP_0]], align 4
 ; CHECK-NEXT:    [[MUL2_I81_I:%.*]] = fmul float [[A_LV_0]], [[X]]
+; CHECK-NEXT:    [[B_GEP_0:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IV1]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_LATCH]], label [[ELSE:%.*]]
 ; CHECK:       else:
 ; CHECK-NEXT:    [[B_LV:%.*]] = load float, float* [[B_GEP_0]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[MUL2_I81_I]], [[B_LV]]
 ; CHECK-NEXT:    br label [[LOOP_LATCH]]
 ; CHECK:       loop.latch:
-; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi float [ [[ADD]], [[ELSE]] ], [ [[MUL2_I81_I]], [[LOOP_BODY]] ]
-; CHECK-NEXT:    store float [[STOREMERGE]], float* [[B_GEP_0]], align 4
+; CHECK-NEXT:    [[ADD_SINK:%.*]] = phi float [ [[ADD]], [[ELSE]] ], [ [[MUL2_I81_I]], [[LOOP_BODY]] ]
+; CHECK-NEXT:    store float [[ADD_SINK]], float* [[B_GEP_0]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1
 ; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ult i64 [[IV1]], 9999
 ; CHECK-NEXT:    br i1 [[CMP_0]], label [[LOOP_BODY]], label [[EXIT]], !llvm.loop [[LOOP17:![0-9]+]]