Index: llvm/include/llvm/Transforms/Scalar/GVN.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/GVN.h +++ llvm/include/llvm/Transforms/Scalar/GVN.h @@ -240,6 +240,13 @@ DenseMap LeaderTable; BumpPtrAllocator TableAllocator; + /// A mapping from value numbers to a pair of instructions. This map + /// stores pairs of instructions with the same value number, from two blocks + /// having a single common predecessor, for the duration of a single top level + /// iteration in `performHoist`. + using HoistMap = DenseMap>; + HoistMap HoistPairs; + // Block-local map of equivalent values to their leader, does not // propagate to any successors. Entries added mid-block are applied // to the remaining instructions in the block. @@ -354,6 +361,13 @@ bool performScalarPRE(Instruction *I); bool performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, BasicBlock *Curr, unsigned int ValNo); + bool performHoist(Function &F); + void collectHoistCandidates(BasicBlock *ThenBB); + void matchHoistCandidates(BasicBlock *ElseBB); + void replaceInstruction(Instruction *I, Instruction *Repl); + std::pair hoistPair(BasicBlock *DestBB, BasicBlock *ThenBB, + BasicBlock *ElseBB, Instruction *ThenI, + Instruction *ElseI); Value *findLeader(const BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -126,6 +126,9 @@ "into) when deducing if a value is fully available or not in GVN " "(default = 600)")); +static cl::opt GVNEnableSimpleGVNHoist("enable-simple-gvn-hoist", + cl::init(true)); + struct llvm::GVN::Expression { uint32_t opcode; bool commutative = false; @@ -2503,6 +2506,11 @@ } } + if (GVNEnableSimpleGVNHoist) { + LeaderTable.clear(); + Changed |= performHoist(F); + } + // FIXME: Should perform GVN again after PRE does something. PRE can move // computations into blocks where they become fully redundant. Note that // we can't do this until PRE's critical edge splitting updates memdep. @@ -3049,6 +3057,170 @@ } } +// Won't reorder above these instructions. +static bool isHoistBarrier(const Instruction &I) { + return !isGuaranteedToTransferExecutionToSuccessor(&I); +} + +void GVN::collectHoistCandidates(BasicBlock *BB) { + bool HasHoistBarrier = false; + for (Instruction &I : *BB) { + if (I.isTerminator()) + break; + if (isa(I)) + continue; + + if (!HasHoistBarrier || isSafeToSpeculativelyExecute(&I)) + HoistPairs[VN.lookupOrAdd(&I)].first = &I; + + if ((HasHoistBarrier |= isHoistBarrier(I))) + LLVM_DEBUG(dbgs() << "Simple GVNHoist: reached hoist barrier" << I + << '\n';); + } +} + +void GVN::matchHoistCandidates(BasicBlock *BB) { + bool HasHoistBarrier = false; + for (Instruction &I : *BB) { + if (I.isTerminator()) + break; + if (isa(I)) + continue; + + if (!HasHoistBarrier || isSafeToSpeculativelyExecute(&I)) { + uint32_t N = VN.lookupOrAdd(&I); + auto It = HoistPairs.find(N); + if (It != HoistPairs.end()) + It->second.second = &I; + } + + if ((HasHoistBarrier |= isHoistBarrier(I))) + LLVM_DEBUG(dbgs() << "Simple GVNHoist: reached hoist barrier" << I + << '\n';); + } +} + +void GVN::replaceInstruction(Instruction *I, Instruction *Repl) { + LLVM_DEBUG(dbgs() << "Simple GVNHoist: replacing" << *I << " by" << *Repl + << '\n';); + patchReplacementInstruction(I, Repl); + ICF->removeUsersOf(I); + I->replaceAllUsesWith(Repl); + salvageKnowledge(I, AC); + salvageDebugInfo(*I); + if (MD) + MD->removeInstruction(I); + if (MSSAU) + MSSAU->removeMemoryAccess(I); + VN.erase(I); + ICF->removeInstruction(I); + LLVM_DEBUG(verifyRemoved(I)); + I->eraseFromParent(); +} + +// Only hoist instructions from the "then" block. +// Each hoisted instructions must be paired with an instruction from the "else" +// block. +std::pair GVN::hoistPair(BasicBlock *DestBB, BasicBlock *ThenBB, + BasicBlock *ElseBB, Instruction *ThenI, + Instruction *ElseI) { + // If the instruction is moved out of the "then" block there's nothing to do. + if (ThenI->getParent() != ThenBB) + return {false, false}; + + // Instruction must have already been selected for hoisting and matched with + // another instruction. + auto It = HoistPairs.find(VN.lookupOrAdd(ThenI)); + if (It == HoistPairs.end()) + return {false, true}; + + assert((ElseI == nullptr || ElseI == It->second.second) && + "Attempt to hoist two not paired instructions."); + + if (LLVM_LIKELY(ElseI == nullptr) && (ElseI = It->second.second) == nullptr) + return {false, true}; + It->second.second = nullptr; + + assert(ElseI->getParent() == ElseBB && "Instruction already removed"); + assert(!ThenI->mayReadOrWriteMemory() && !ElseI->mayReadOrWriteMemory() && + "Memory read/write instructions must not be hoisted."); + bool Change = false; + + // Hoist operands. Begin by hoisting all of the operands of the "then" + // instruction, then check all of the operands of the "else" instruction + // dominate its block. + for (unsigned I = 0, N = ThenI->getNumOperands(); I < N; ++I) { + auto *Op = dyn_cast(ThenI->getOperand(I)); + if (Op == nullptr) + continue; + bool OpChange, CantHoist; + std::tie(OpChange, CantHoist) = + hoistPair(DestBB, ThenBB, ElseBB, Op, nullptr); + Change |= OpChange; + if (CantHoist) + return {Change, true}; + } + + for (unsigned I = 0, N = ElseI->getNumOperands(); I < N; ++I) { + auto *Op = dyn_cast(ElseI->getOperand(I)); + if (Op == nullptr) + continue; + if (Op->getParent() == ElseBB) + return {Change, true}; + } + + // Hoist one of the instructions and replace all uses of the other with it. + ICF->removeInstruction(ThenI); + ICF->insertInstructionTo(ThenI, DestBB); + ThenI->moveBefore(DestBB->getTerminator()); + replaceInstruction(ElseI, ThenI); + + return {true, false}; +} + +// Perform trivial hoisting of values from two blocks to their common +// predecessor. +bool GVN::performHoist(Function &F) { + LLVM_DEBUG(dbgs() << "Simple GVNHoist: running on function " << F.getName() + << '\n';); + bool Change = false; + ReversePostOrderTraversal RPOT(&F); + for (BasicBlock *BB : RPOT) { + // Check we have a block of the desired shape. + auto *BI = dyn_cast(BB->getTerminator()); + if (!BI || !BI->isConditional()) + continue; + + BasicBlock *Then = BI->getSuccessor(0); + BasicBlock *Else = BI->getSuccessor(1); + + if (!Then->getSinglePredecessor() || !Else->getSinglePredecessor()) + continue; + + LLVM_DEBUG(dbgs() << "Simple GVNHoist: looking at block " << BB->getName() + << '\n'); + + // Collect all hoistable instructions from the smaller block, then match + // them by value number with the instructions from the other block. + if (Then->size() > Else->size()) + std::swap(Then, Else); + + HoistPairs.clear(); + collectHoistCandidates(Then); + matchHoistCandidates(Else); + + // Hoist matched pairs. + for (const auto &P : HoistPairs) { + bool LocalChange, _; + std::tie(LocalChange, _) = + hoistPair(BB, Then, Else, P.second.first, P.second.second); + Change |= LocalChange; + } + } + + return Change; +} + class llvm::gvn::GVNLegacyPass : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid Index: llvm/test/Transforms/GVN/PRE/local-pre.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/local-pre.ll +++ llvm/test/Transforms/GVN/PRE/local-pre.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -gvn -enable-pre -S | FileCheck %s -; RUN: opt < %s -passes="gvn
" -enable-pre=false -S | FileCheck %s
+; RUN: opt < %s -gvn -enable-pre -enable-simple-gvn-hoist=false -S | FileCheck %s
+; RUN: opt < %s -passes="gvn
" -enable-pre=false -enable-simple-gvn-hoist=false -S | FileCheck %s
 
 declare void @may_exit() nounwind
 
Index: llvm/test/Transforms/GVN/PRE/phi-translate.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/phi-translate.ll
+++ llvm/test/Transforms/GVN/PRE/phi-translate.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basic-aa -gvn -S < %s | FileCheck %s
+; RUN: opt -basic-aa -gvn -enable-simple-gvn-hoist=false -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64"
 
Index: llvm/test/Transforms/GVN/PRE/pre-basic-add.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/pre-basic-add.ll
+++ llvm/test/Transforms/GVN/PRE/pre-basic-add.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -gvn -enable-pre -S | FileCheck %s
-; RUN: opt < %s -passes="gvn
" -enable-pre=false -S | FileCheck %s
+; RUN: opt < %s -gvn -enable-pre  -enable-simple-gvn-hoist=false -S | FileCheck %s
+; RUN: opt < %s -passes="gvn
" -enable-pre=false -enable-simple-gvn-hoist=false -S | FileCheck %s
 
 @H = common global i32 0		;  [#uses=2]
 @G = common global i32 0		;  [#uses=1]
Index: llvm/test/Transforms/GVN/PRE/pre-load.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/pre-load.ll
+++ llvm/test/Transforms/GVN/PRE/pre-load.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -basic-aa -gvn -enable-load-pre -S | FileCheck %s
-; RUN: opt < %s -aa-pipeline=basic-aa -passes="gvn" -enable-load-pre=false -S | FileCheck %s
+; RUN: opt < %s -basic-aa -gvn -enable-load-pre -enable-simple-gvn-hoist=false -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes="gvn" -enable-load-pre=false -enable-simple-gvn-hoist=false -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 define i32 @test1(i32* %p, i1 %C) {
Index: llvm/test/Transforms/GVN/PRE/pre-no-cost-phi.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/pre-no-cost-phi.ll
+++ llvm/test/Transforms/GVN/PRE/pre-no-cost-phi.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | FileCheck %s
+; RUN: opt < %s -gvn -enable-simple-gvn-hoist=false -S | FileCheck %s
 ; This testcase tests insertion of no-cost phis.  That is,
 ; when the value is already available in every predecessor,
 ; and we just need to insert a phi node to merge the available values.
Index: llvm/test/Transforms/GVN/PRE/pre-poison-add.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/pre-poison-add.ll
+++ llvm/test/Transforms/GVN/PRE/pre-poison-add.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -gvn -enable-pre -S | FileCheck %s
+; RUN: opt < %s -gvn -enable-pre -enable-simple-gvn-hoist=false -S | FileCheck %s
+; RUN: opt < %s --passes="gvn
" -S | FileCheck %s --check-prefix=CHECK-HOIST
 
 @H = common global i32 0
 @G = common global i32 0
@@ -29,9 +30,12 @@
 
 define i32 @test2(i1 %cond, i32 %v) nounwind {
 ; CHECK-LABEL: @test2
+; CHECK-HOIST-LABEL: @test2
 entry:
     br i1 %cond, label %bb, label %bb1
 
+; CHECK-HOIST: entry:
+; CHECK-HOIST: %.pre = add i32 %v, 42
 bb:
     %add.1 = add i32 %v, 42
 ; CHECK: %add.1 = add i32 %v, 42
Index: llvm/test/Transforms/GVN/freeze.ll
===================================================================
--- llvm/test/Transforms/GVN/freeze.ll
+++ llvm/test/Transforms/GVN/freeze.ll
@@ -1,11 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -gvn -S | FileCheck %s
-; RUN: opt < %s -passes=gvn -S | FileCheck %s
+; RUN: opt < %s -gvn -enable-simple-gvn-hoist=false -S | FileCheck %s
+; RUN: opt < %s -passes=gvn -enable-simple-gvn-hoist=false -S | FileCheck %s
+; RUN: opt < %s -passes=gvn -S | FileCheck %s --check-prefix=CHECK-HOIST
 
 define i1 @f(i1 %a) {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:    [[B:%.*]] = freeze i1 [[A:%.*]]
 ; CHECK-NEXT:    ret i1 [[B]]
+;
+; CHECK-HOIST-LABEL: @f(
+; CHECK-HOIST-NEXT:    [[B:%.*]] = freeze i1 [[A:%.*]]
+; CHECK-HOIST-NEXT:    ret i1 [[B]]
 ;
   %b = freeze i1 %a
   %c = freeze i1 %a
@@ -20,6 +25,13 @@
 ; CHECK-NEXT:    call void @use1(i1 [[B]])
 ; CHECK-NEXT:    call void @use1(i1 [[B]])
 ; CHECK-NEXT:    ret void
+;
+; CHECK-HOIST-LABEL: @f_multipleuses(
+; CHECK-HOIST-NEXT:    [[B:%.*]] = freeze i1 [[A:%.*]]
+; CHECK-HOIST-NEXT:    call void @use1(i1 [[B]])
+; CHECK-HOIST-NEXT:    call void @use1(i1 [[B]])
+; CHECK-HOIST-NEXT:    call void @use1(i1 [[B]])
+; CHECK-HOIST-NEXT:    ret void
 ;
   %b = freeze i1 %a
   %c = freeze i1 %a
@@ -40,6 +52,16 @@
 ; CHECK-NEXT:    [[Y:%.*]] = freeze i1 [[A]]
 ; CHECK-NEXT:    call void @use2(i1 [[Y]])
 ; CHECK-NEXT:    ret void
+;
+; CHECK-HOIST-LABEL: @f_dom(
+; CHECK-HOIST-NEXT:    [[X:%.*]] = freeze i1 [[A:%.*]]
+; CHECK-HOIST-NEXT:    br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK-HOIST:       BB1:
+; CHECK-HOIST-NEXT:    call void @use1(i1 [[X]])
+; CHECK-HOIST-NEXT:    ret void
+; CHECK-HOIST:       BB2:
+; CHECK-HOIST-NEXT:    call void @use2(i1 [[X]])
+; CHECK-HOIST-NEXT:    ret void
 ;
   br i1 %cond, label %BB1, label %BB2
 BB1:
Index: llvm/test/Transforms/GVN/gc_relocate.ll
===================================================================
--- llvm/test/Transforms/GVN/gc_relocate.ll
+++ llvm/test/Transforms/GVN/gc_relocate.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -basic-aa -gvn -S < %s | FileCheck %s
+; RUN: opt -basic-aa -gvn --enable-simple-gvn-hoist=false -S < %s | FileCheck %s
+; RUN: opt -basic-aa -gvn                                 -S < %s | FileCheck %s --check-prefix=HOIST
 
 declare void @func()
 declare i32 @"personality_function"()
@@ -12,6 +13,13 @@
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 addrspace(1)* [[A]], null
 ; CHECK-NEXT:    ret i1 [[CMP1]]
 ;
+; HOIST-LABEL: @test_trivial(
+; HOIST-NEXT:  entry:
+; HOIST-NEXT:    [[SAFEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* [[IN:%.*]]) ]
+; HOIST-NEXT:    [[A:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 0)
+; HOIST-NEXT:    [[CMP1:%.*]] = icmp eq i32 addrspace(1)* [[A]], null
+; HOIST-NEXT:    ret i1 [[CMP1]]
+;
 entry:
   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["gc-live"(i32 addrspace(1)* %in)]
   %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 0, i32 0)
@@ -33,6 +41,14 @@
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 addrspace(1)* [[A]], null
 ; CHECK-NEXT:    ret i1 [[CMP1]]
 ;
+; HOIST-LABEL: @test_readnone(
+; HOIST-NEXT:  entry:
+; HOIST-NEXT:    [[SAFEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* [[IN:%.*]]) ]
+; HOIST-NEXT:    [[A:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 0)
+; HOIST-NEXT:    store i32 0, i32* @G, align 4
+; HOIST-NEXT:    [[CMP1:%.*]] = icmp eq i32 addrspace(1)* [[A]], null
+; HOIST-NEXT:    ret i1 [[CMP1]]
+;
 entry:
   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["gc-live"(i32 addrspace(1)* %in)]
   %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 0, i32 0)
@@ -54,6 +70,15 @@
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 addrspace(1)* [[BASE_RELOC]], null
 ; CHECK-NEXT:    ret i1 [[CMP1]]
 ;
+; HOIST-LABEL: @test_call(
+; HOIST-NEXT:  entry:
+; HOIST-NEXT:    [[SAFEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* [[IN:%.*]], i32 addrspace(1)* [[IN]]) ]
+; HOIST-NEXT:    [[BASE:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 0)
+; HOIST-NEXT:    [[SAFEPOINT_TOKEN2:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* [[BASE]], i32 addrspace(1)* [[BASE]]) ]
+; HOIST-NEXT:    [[BASE_RELOC:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN2]], i32 0, i32 0)
+; HOIST-NEXT:    [[CMP1:%.*]] = icmp eq i32 addrspace(1)* [[BASE_RELOC]], null
+; HOIST-NEXT:    ret i1 [[CMP1]]
+;
 entry:
   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["gc-live"(i32 addrspace(1)* %in, i32 addrspace(1)* %in)]
   %base = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 0, i32 0)
@@ -82,6 +107,16 @@
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 addrspace(1)* [[IN1_RELOC2]], null
 ; CHECK-NEXT:    ret i1 [[CMP1]]
 ;
+; HOIST-LABEL: @test_two_calls(
+; HOIST-NEXT:  entry:
+; HOIST-NEXT:    [[SAFEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* [[IN1:%.*]], i32 addrspace(1)* [[IN2:%.*]]) ]
+; HOIST-NEXT:    [[IN1_RELOC1:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 0)
+; HOIST-NEXT:    [[IN2_RELOC1:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 1, i32 1)
+; HOIST-NEXT:    [[SAFEPOINT_TOKEN2:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* [[IN1_RELOC1]], i32 addrspace(1)* [[IN2_RELOC1]]) ]
+; HOIST-NEXT:    [[IN1_RELOC2:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN2]], i32 0, i32 1)
+; HOIST-NEXT:    [[CMP1:%.*]] = icmp eq i32 addrspace(1)* [[IN1_RELOC2]], null
+; HOIST-NEXT:    ret i1 [[CMP1]]
+;
 entry:
   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["gc-live"(i32 addrspace(1)* %in1, i32 addrspace(1)* %in2)]
   %in1.reloc1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 0, i32 0)
@@ -108,6 +143,18 @@
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    [[OUT1:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[LANDING_PAD]], i32 0, i32 0)
 ; CHECK-NEXT:    ret i32 addrspace(1)* [[OUT1]]
+;
+; HOIST-LABEL: @test_invoke(
+; HOIST-NEXT:    [[SAFEPOINT_TOKEN:%.*]] = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* [[IN:%.*]]) ]
+; HOIST-NEXT:    to label [[INVOKE_NORMAL_DEST:%.*]] unwind label [[EXCEPTIONAL_RETURN:%.*]]
+; HOIST:       invoke_normal_dest:
+; HOIST-NEXT:    [[OUT:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 0)
+; HOIST-NEXT:    ret i32 addrspace(1)* [[OUT]]
+; HOIST:       exceptional_return:
+; HOIST-NEXT:    [[LANDING_PAD:%.*]] = landingpad token
+; HOIST-NEXT:    cleanup
+; HOIST-NEXT:    [[OUT1:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[LANDING_PAD]], i32 0, i32 0)
+; HOIST-NEXT:    ret i32 addrspace(1)* [[OUT1]]
 ;
   %safepoint_token = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["gc-live"(i32 addrspace(1)* %in)]
   to label %invoke_normal_dest unwind label %exceptional_return
@@ -139,6 +186,17 @@
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 addrspace(1)* [[B]], null
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
+; HOIST-LABEL: @test_non_dominating(
+; HOIST-NEXT:  entry:
+; HOIST-NEXT:    [[SAFEPOINT_TOKEN:%.*]] = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* [[IN:%.*]]) ]
+; HOIST-NEXT:    [[A:%.*]] = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token [[SAFEPOINT_TOKEN]], i32 0, i32 0)
+; HOIST-NEXT:    [[CMP:%.*]] = icmp eq i32 addrspace(1)* [[A]], null
+; HOIST-NEXT:    br i1 [[C:%.*]], label [[TAKEN:%.*]], label [[UNTAKEN:%.*]]
+; HOIST:       taken:
+; HOIST-NEXT:    ret i1 [[CMP]]
+; HOIST:       untaken:
+; HOIST-NEXT:    ret i1 [[CMP]]
+;
 entry:
   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["gc-live"(i32 addrspace(1)* %in)]
   br i1 %c, label %taken, label %untaken
Index: llvm/test/Transforms/GVN/simple-gvnhoist-scalars.ll
===================================================================
--- llvm/test/Transforms/GVN/simple-gvnhoist-scalars.ll
+++ llvm/test/Transforms/GVN/simple-gvnhoist-scalars.ll
@@ -7,17 +7,15 @@
 define dso_local i32 @everything_hoisted(i1 %cc, i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: @everything_hoisted(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; CHECK:       if.then:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = sdiv i32 [[C:%.*]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
 ; CHECK-NEXT:    br label [[IF_END:%.*]]
 ; CHECK:       if.else:
-; CHECK-NEXT:    [[TMP2:%.*]] = add nsw i32 [[A]], [[B]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sdiv i32 [[C]], [[TMP2]]
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP3]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP1]], [[IF_ELSE]] ]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 entry:
@@ -42,19 +40,18 @@
 define dso_local i32 @spec_barrier0(i1 %cc, i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: @spec_barrier0(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    call void @h()
-; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = sdiv i32 [[C:%.*]], [[TMP0]]
 ; CHECK-NEXT:    br label [[IF_END:%.*]]
 ; CHECK:       if.else:
 ; CHECK-NEXT:    call void @h()
-; CHECK-NEXT:    [[TMP2:%.*]] = add nsw i32 [[A]], [[B]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sdiv i32 [[C]], [[TMP2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sdiv i32 [[C]], [[TMP0]]
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP3]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP2]], [[IF_ELSE]] ]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 entry:
@@ -81,19 +78,18 @@
 define dso_local i32 @spec_barrier1(i1 %cc, i32 %a, i32 %b, i32 %c, i32* %p) {
 ; CHECK-LABEL: @spec_barrier1(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    store volatile i32 0, i32* [[P:%.*]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = sdiv i32 [[C:%.*]], [[TMP0]]
 ; CHECK-NEXT:    br label [[IF_END:%.*]]
 ; CHECK:       if.else:
 ; CHECK-NEXT:    store volatile i32 0, i32* [[P]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = add nsw i32 [[A]], [[B]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sdiv i32 [[C]], [[TMP2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sdiv i32 [[C]], [[TMP0]]
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP3]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP2]], [[IF_ELSE]] ]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 entry:
@@ -159,19 +155,17 @@
 define dso_local i32 @no_spec_barrier0(i1 %cc, i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: @no_spec_barrier0(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[C:%.*]], [[TMP0]]
 ; CHECK-NEXT:    br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    call void @will_return()
-; CHECK-NEXT:    [[TMP0:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[C:%.*]], [[TMP0]]
 ; CHECK-NEXT:    br label [[IF_END:%.*]]
 ; CHECK:       if.else:
 ; CHECK-NEXT:    call void @will_return()
-; CHECK-NEXT:    [[TMP2:%.*]] = sdiv i32 [[A]], [[B]]
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw i32 [[C]], [[TMP2]]
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP3]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP1]], [[IF_ELSE]] ]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 entry:
@@ -198,21 +192,19 @@
 define dso_local i32 @no_spec_barrier1(i1 %cc, i32 %a, i32 %b, i32 %c, i32* %p) {
 ; CHECK-LABEL: @no_spec_barrier1(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[C:%.*]], [[TMP0]]
 ; CHECK-NEXT:    br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP0:%.*]] = load atomic volatile i32, i32* [[P:%.*]] acquire, align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = add nsw i32 [[C:%.*]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = mul nsw i32 [[TMP0]], [[TMP2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load atomic volatile i32, i32* [[P:%.*]] acquire, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    br label [[IF_END:%.*]]
 ; CHECK:       if.else:
 ; CHECK-NEXT:    [[TMP4:%.*]] = load atomic volatile i32, i32* [[P]] acquire, align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = sdiv i32 [[A]], [[B]]
-; CHECK-NEXT:    [[TMP6:%.*]] = add nsw i32 [[C]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = mul nsw i32 [[TMP4]], [[TMP6]]
+; CHECK-NEXT:    [[TMP5:%.*]] = mul nsw i32 [[TMP4]], [[TMP1]]
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[TMP3]], [[IF_THEN]] ], [ [[TMP7]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[TMP3]], [[IF_THEN]] ], [ [[TMP5]], [[IF_ELSE]] ]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 entry:
@@ -241,19 +233,16 @@
 define dso_local i32 @multiple_use(i1 %cc, i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: @multiple_use(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; CHECK:       if.then:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw i32 [[TMP0]], [[C:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
 ; CHECK-NEXT:    br label [[IF_END:%.*]]
 ; CHECK:       if.else:
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw i32 [[A]], [[B]]
-; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw i32 [[TMP3]], [[C]]
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw i32 [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[TMP2]], [[IF_THEN]] ], [ [[TMP5]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[TMP2]], [[IF_THEN]] ], [ [[TMP2]], [[IF_ELSE]] ]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 entry:
@@ -280,19 +269,16 @@
 define dso_local i32 @commutative_ops(i1 %cc, i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: @commutative_ops(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; CHECK:       if.then:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[TMP0]], [[C:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = sdiv i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
 ; CHECK-NEXT:    br label [[IF_END:%.*]]
 ; CHECK:       if.else:
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw i32 [[A]], [[B]]
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i32 [[C]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = sdiv i32 [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[TMP2]], [[IF_THEN]] ], [ [[TMP5]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[TMP2]], [[IF_THEN]] ], [ [[TMP2]], [[IF_ELSE]] ]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 entry:
Index: llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll
===================================================================
--- llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll
+++ llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -passes=instcombine,verify -S < %s | FileCheck %s --check-prefixes=ALL,INSTCOMBINE
 
 ; Make sure GVN won't undo the transformation:
-; RUN: opt -passes=instcombine,gvn -S < %s | FileCheck %s --check-prefixes=ALL,INSTCOMBINEGVN
+; RUN: opt -passes=instcombine,gvn --enable-simple-gvn-hoist=false -S < %s | FileCheck %s --check-prefixes=ALL,INSTCOMBINEGVN
 
 declare i8* @get_ptr.i8()
 declare i32* @get_ptr.i32()
Index: llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
===================================================================
--- llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
+++ llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
@@ -163,16 +163,16 @@
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4, !alias.scope !8
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], 
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4, !alias.scope !11
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul <4 x float> [[WIDE_LOAD14]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], align 4, !alias.scope !11
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul <4 x float> [[WIDE_LOAD14]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP5]] to <4 x float>*
 ; CHECK-NEXT:    [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4, !alias.scope !13, !noalias !15
-; CHECK-NEXT:    [[TMP10:%.*]] = fadd <4 x float> [[TMP7]], [[WIDE_LOAD15]]
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP7]], <4 x float> [[TMP10]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
+; CHECK-NEXT:    [[TMP10:%.*]] = fadd <4 x float> [[TMP8]], [[WIDE_LOAD15]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP8]], <4 x float> [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP5]] to <4 x float>*
 ; CHECK-NEXT:    store <4 x float> [[PREDPHI]], <4 x float>* [[TMP11]], align 4, !alias.scope !13, !noalias !15
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
@@ -182,18 +182,18 @@
 ; CHECK-NEXT:    [[C_GEP:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[IV1]]
 ; CHECK-NEXT:    [[C_LV:%.*]] = load i32, i32* [[C_GEP]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C_LV]], 20
+; CHECK-NEXT:    [[B_GEP_0:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IV1]]
 ; CHECK-NEXT:    [[A_GEP_0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV1]]
 ; CHECK-NEXT:    [[A_LV_0:%.*]] = load float, float* [[A_GEP_0]], align 4
 ; CHECK-NEXT:    [[MUL2_I81_I:%.*]] = fmul float [[A_LV_0]], [[X]]
-; CHECK-NEXT:    [[B_GEP_0:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IV1]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_LATCH]], label [[ELSE:%.*]]
 ; CHECK:       else:
 ; CHECK-NEXT:    [[B_LV:%.*]] = load float, float* [[B_GEP_0]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[MUL2_I81_I]], [[B_LV]]
 ; CHECK-NEXT:    br label [[LOOP_LATCH]]
 ; CHECK:       loop.latch:
-; CHECK-NEXT:    [[ADD_SINK:%.*]] = phi float [ [[ADD]], [[ELSE]] ], [ [[MUL2_I81_I]], [[LOOP_BODY]] ]
-; CHECK-NEXT:    store float [[ADD_SINK]], float* [[B_GEP_0]], align 4
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi float [ [[ADD]], [[ELSE]] ], [ [[MUL2_I81_I]], [[LOOP_BODY]] ]
+; CHECK-NEXT:    store float [[STOREMERGE]], float* [[B_GEP_0]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1
 ; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ult i64 [[IV1]], 9999
 ; CHECK-NEXT:    br i1 [[CMP_0]], label [[LOOP_BODY]], label [[EXIT]], !llvm.loop [[LOOP17:![0-9]+]]