Index: llvm/include/llvm/Transforms/Scalar/GVN.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/GVN.h +++ llvm/include/llvm/Transforms/Scalar/GVN.h @@ -351,6 +351,11 @@ bool performScalarPRE(Instruction *I); bool performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, BasicBlock *Curr, unsigned int ValNo); + std::pair performHoist(Function &F); + bool hoistPair(BasicBlock *DestBB, BasicBlock *ThenBB, BasicBlock *ElseBB, + Instruction *ThenI, Instruction *ElseI); + void replaceInstruction(Instruction *, Instruction *); + void eraseInstruction(Instruction *); Value *findLeader(const BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -126,6 +126,7 @@ "into) when deducing if a value is fully available or not in GVN " "(default = 600)")); +static cl::opt GVNEnableSimpleGVNHoist("enable-simple-gvnhoist", cl::init(true)); struct llvm::GVN::Expression { uint32_t opcode; bool commutative = false; @@ -2487,6 +2488,17 @@ } } + if (GVNEnableSimpleGVNHoist && MD) { + LeaderTable.clear(); + bool RunAgain = true; + while (RunAgain) { + bool HoistedAny; + std::tie(HoistedAny, RunAgain) = performHoist(F); + Changed |= HoistedAny; + VN.clear(); + } + } + // FIXME: Should perform GVN again after PRE does something. PRE can move // computations into blocks where they become fully redundant. Note that // we can't do this until PRE's critical edge splitting updates memdep. @@ -2543,14 +2555,7 @@ for (auto *I : InstrsToErase) { assert(I->getParent() == BB && "Removing instruction from wrong block?"); LLVM_DEBUG(dbgs() << "GVN removed: " << *I << '\n'); - salvageKnowledge(I, AC); - salvageDebugInfo(*I); - if (MD) MD->removeInstruction(I); - if (MSSAU) - MSSAU->removeMemoryAccess(I); - LLVM_DEBUG(verifyRemoved(I)); - ICF->removeInstruction(I); - I->eraseFromParent(); + eraseInstruction(I); } InstrsToErase.clear(); @@ -2822,6 +2827,227 @@ return Changed; } +struct HoistCandidate { + HoistCandidate(uint32_t N, Instruction *I) : VN(N), Inst(I) {} + HoistCandidate(uint32_t N, uint32_t M, Instruction *I) + : VN((uint64_t(N) << 32) | M), Inst(I) {} + uint64_t VN; + Instruction *Inst; +}; + +// Won't reorder across these instructions, but still allow for them to be +// hoisted. +static bool isHoistBarrier(const Instruction &I) { + return I.isVolatile() || I.isAtomic() || + !isGuaranteedToTransferExecutionToSuccessor(&I); +} + +static void collectHoistCandidates(MemoryDependenceResults *MD, + GVN::ValueTable &VN, BasicBlock *BB, + std::vector &Candidates, + bool Loads) { + bool HasHoistBarrier = false; + for (Instruction &I : *BB) { + if (HasHoistBarrier || I.isTerminator()) + break; + + if ((HasHoistBarrier = isHoistBarrier(I))) + LLVM_DEBUG(dbgs() << "Simple GVNHoist: reached hoist barrier" << I + << '\n';); + + // Don't initiate hoisting from GEPs (but they still can be hoisted if + // appearing as operands in hoisted instructions). + if (isa(&I)) + continue; + + // Load and non-load instructions are collected separately. + if (Loads != isa(I)) + continue; + + // Move loads and stores only if they depend on a memory access in some + // other block. + if (I.mayReadOrWriteMemory() && !MD->getDependency(&I).isNonLocal()) + continue; + + if (auto *LI = dyn_cast(&I)) + Candidates.emplace_back(VN.lookupOrAdd(LI->getPointerOperand()), &I); + else if (auto *SI = dyn_cast(&I)) + Candidates.emplace_back(VN.lookupOrAdd(SI->getPointerOperand()), + VN.lookupOrAdd(SI->getValueOperand()), &I); + else + Candidates.emplace_back(VN.lookupOrAdd(&I), &I); + } +} + +static void findMatchingPairs( + std::vector &ThenC, std::vector &ElseC, + SmallVectorImpl> &Match) { + auto Order = [](const HoistCandidate &A, const HoistCandidate &B) { + return A.VN < B.VN; + }; + llvm::sort(ThenC, Order); + llvm::sort(ElseC, Order); + for (auto ThenI = ThenC.begin(), ThenE = ThenC.end(), ElseI = ElseC.begin(), + ElseE = ElseC.end(); + ThenI != ThenE && ElseI != ElseE; + /* empty */) { + if (ThenI->VN < ElseI->VN) { + LLVM_DEBUG(dbgs() << "Simple GVNHoist: Skipping VN " << ThenI->VN << " " + << *ThenI->Inst << '\n'); + ++ThenI; + } else if (ElseI->VN < ThenI->VN) { + LLVM_DEBUG(dbgs() << "Simple GVNHoist: Skipping VN " << ElseI->VN << " " + << *ElseI->Inst << '\n'); + ++ElseI; + } else { + LLVM_DEBUG(dbgs() << "Simple GVNHoist: Found a matching pair:" + << "\nSimple GVNHoist: " << *ThenI->Inst + << "\nSimple GVNHoist: " << *ElseI->Inst << '\n'); + Match.emplace_back(ThenI->Inst, ElseI->Inst); + ++ThenI; + ++ElseI; + } + } +} + +void GVN::replaceInstruction(Instruction *I, Instruction *Repl) { + Repl->andIRFlags(I); + + if (auto *LI = dyn_cast(Repl)) + LI->setAlignment(std::min(LI->getAlign(), cast(I)->getAlign())); + else if (auto *SI = dyn_cast(Repl)) + SI->setAlignment(std::min(SI->getAlign(), cast(I)->getAlign())); + else if (auto *AI = dyn_cast(Repl)) + AI->setAlignment(std::max(AI->getAlign(), cast(I)->getAlign())); + + combineMetadata(Repl, I, + {LLVMContext::MD_access_group, LLVMContext::MD_alias_scope, + LLVMContext::MD_fpmath, LLVMContext::MD_invariant_group, + LLVMContext::MD_invariant_load, LLVMContext::MD_noalias, + LLVMContext::MD_range, LLVMContext::MD_tbaa}, + true); + + I->replaceAllUsesWith(Repl); + MD->invalidateCachedPointerInfo(I); + markInstructionForDeletion(I); +} + +void GVN::eraseInstruction(Instruction *I) { + salvageKnowledge(I, AC); + salvageDebugInfo(*I); + if (MD) + MD->removeInstruction(I); + if (MSSAU) + MSSAU->removeMemoryAccess(I); + LLVM_DEBUG(verifyRemoved(I)); + ICF->removeInstruction(I); + I->eraseFromParent(); +} + +bool GVN::hoistPair(BasicBlock *DestBB, BasicBlock *ThenBB, BasicBlock *ElseBB, + Instruction *ThenI, Instruction *ElseI) { + // If one of the instructions is (already) in a dominating block, replace all + // users of the other one with it. + if (ThenI->getParent() != ThenBB) { + if (ElseI->getParent() == ElseBB) { + replaceInstruction(ElseI, ThenI); + return true; + } + return false; + } + + if (ElseI->getParent() != ElseBB) { + assert(ThenI->getParent() == ThenBB); + replaceInstruction(ThenI, ElseI); + return true; + } + + // Hoist operands. + for (unsigned I = 0, N = ThenI->getNumOperands(); I < N; ++I) { + auto *ThenOp = dyn_cast(ThenI->getOperand(I)); + auto *ElseOp = dyn_cast(ElseI->getOperand(I)); + if (ThenOp == nullptr || ElseOp == nullptr) { + assert(ThenOp == nullptr && ElseOp == nullptr); + continue; + } + (void)hoistPair(DestBB, ThenBB, ElseBB, ThenOp, ElseOp); + } + + // Remove the intruction from the memory dependence cache, as the cache might + // keep a reference to the old block. + MD->removeInstruction(ThenI); + + // Hoist one of the instructions and replace all uses of the other with it. + ThenI->moveBefore(DestBB->getTerminator()); + if (MSSAU) { + if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(ThenI)) + MSSAU->moveToPlace(MA, DestBB, + MemorySSA::InsertionPlace::BeforeTerminator); + } + replaceInstruction(ElseI, ThenI); + + return true; +} + +// Perform trivial hoisting of values from two blocks to their common +// predecessor. +std::pair GVN::performHoist(Function &F) { + LLVM_DEBUG(dbgs() << "Simple GVNHoist: running on function " << F.getName() + << '\n';); + bool RunAgain = false; + bool HoistedAny = false; + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { + // Check we have a block of the desired shape. + LLVM_DEBUG(dbgs() << "Simple GVNHoist: looking at block " << BB->getName() + << '\n'); + auto *BI = dyn_cast(BB->getTerminator()); + if (!BI || !BI->isConditional()) + continue; + + BasicBlock *Then = BI->getSuccessor(0); + BasicBlock *Else = BI->getSuccessor(1); + + if (!Then->getSinglePredecessor() || !Else->getSinglePredecessor()) + continue; + + // Collect pairs of instructions from each block with matching value + // numbers. Handle load instructions separately as they don't have their own + // value number (they reuse the value number of the address operand.) + std::vector ThenCandidates, ElseCandidates; + collectHoistCandidates(MD, VN, Then, ThenCandidates, /* Loads = */ false); + collectHoistCandidates(MD, VN, Else, ElseCandidates, /* Loads = */ false); + + SmallVector, 8> Match; + findMatchingPairs(ThenCandidates, ElseCandidates, Match); + + ThenCandidates.clear(); + ElseCandidates.clear(); + collectHoistCandidates(MD, VN, Then, ThenCandidates, /* Loads = */ true); + collectHoistCandidates(MD, VN, Else, ElseCandidates, /* Loads = */ true); + findMatchingPairs(ThenCandidates, ElseCandidates, Match); + + // Hoist all pairs. + for (const auto &P : Match) { + if (hoistPair(BB, Then, Else, P.first, P.second)) { + HoistedAny = true; + // Hoisting these creates opportunities for more hoisting. + if (P.first->mayReadOrWriteMemory() || isHoistBarrier(*P.first)) + RunAgain = true; + } + } + + // Remove dead instructions. + for (auto *I : InstrsToErase) { + assert((I->getParent() == Then || I->getParent() == Else) && + "Removing instruction from the wrong block"); + LLVM_DEBUG(dbgs() << "Simple GVNHoist: remove " << *I << '\n'); + eraseInstruction(I); + } + InstrsToErase.clear(); + } + + return {HoistedAny, RunAgain}; +} /// Split the critical edge connecting the given two blocks, and return /// the block inserted to the critical edge. Index: llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll =================================================================== --- llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll +++ llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll @@ -1,4 +1,4 @@ -; RUN: opt -tbaa -basic-aa -gvn -S < %s | FileCheck %s +; RUN: opt -tbaa -basic-aa -gvn -enable-simple-gvnhoist=false -S < %s | FileCheck %s target datalayout = "e-p:64:64:64" Index: llvm/test/Transforms/GVN/PRE/load-pre-align.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/load-pre-align.ll +++ llvm/test/Transforms/GVN/PRE/load-pre-align.ll @@ -1,5 +1,6 @@ -; RUN: opt < %s -gvn -S | FileCheck %s -; RUN: opt < %s -passes="gvn" -S | FileCheck %s +; RUN: opt < %s -gvn -enable-simple-gvnhoist=false -S | FileCheck %s +; RUN: opt < %s -passes="gvn" -enable-simple-gvnhoist=false -S | FileCheck %s +; RUN: opt < %s -passes="gvn" -S | FileCheck %s -check-prefix=CHECK-HOIST target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" @@ -10,10 +11,13 @@ entry: br label %for.cond -; loads aligned greater than the memory should not be moved past conditionals +; loads aligned greater than the memory should not be moved past conditionals ... ; CHECK-NOT: load ; CHECK: br i1 +; ... unless the same load is on both sides of the conditional. +; CHECK-HOIST: load +; CHECK-HOIST: br i1 for.cond: %i.0 = phi i32 [ 0, %entry ], [ %indvar.next, %for.inc ] %cmp = icmp slt i32 %i.0, %n Index: llvm/test/Transforms/GVN/PRE/local-pre.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/local-pre.ll +++ llvm/test/Transforms/GVN/PRE/local-pre.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -gvn -enable-pre -S | FileCheck %s -; RUN: opt < %s -passes="gvn
" -enable-pre=false -S | FileCheck %s
+; RUN: opt < %s -gvn -enable-pre -enable-simple-gvnhoist=false -S | FileCheck %s
+; RUN: opt < %s -passes="gvn
" -enable-pre=false -enable-simple-gvnhoist=false -S | FileCheck %s
 
 declare void @may_exit() nounwind
 
Index: llvm/test/Transforms/GVN/PRE/phi-translate.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/phi-translate.ll
+++ llvm/test/Transforms/GVN/PRE/phi-translate.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basic-aa -gvn -S < %s | FileCheck %s
+; RUN: opt -basic-aa -gvn -enable-simple-gvnhoist=false -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64"
 
Index: llvm/test/Transforms/GVN/PRE/pre-basic-add.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/pre-basic-add.ll
+++ llvm/test/Transforms/GVN/PRE/pre-basic-add.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -gvn -enable-pre -S | FileCheck %s
-; RUN: opt < %s -passes="gvn
" -enable-pre=false -S | FileCheck %s
+; RUN: opt < %s -gvn -enable-pre  -enable-simple-gvnhoist=false -S | FileCheck %s
+; RUN: opt < %s -passes="gvn
" -enable-pre=false -enable-simple-gvnhoist=false -S | FileCheck %s
 
 @H = common global i32 0		;  [#uses=2]
 @G = common global i32 0		;  [#uses=1]
Index: llvm/test/Transforms/GVN/PRE/pre-load.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/pre-load.ll
+++ llvm/test/Transforms/GVN/PRE/pre-load.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -basic-aa -gvn -enable-load-pre -S | FileCheck %s
-; RUN: opt < %s -aa-pipeline=basic-aa -passes="gvn" -enable-load-pre=false -S | FileCheck %s
+; RUN: opt < %s -basic-aa -gvn -enable-load-pre -enable-simple-gvnhoist=false -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes="gvn" -enable-load-pre=false -enable-simple-gvnhoist=false -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 define i32 @test1(i32* %p, i1 %C) {
Index: llvm/test/Transforms/GVN/PRE/pre-no-cost-phi.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/pre-no-cost-phi.ll
+++ llvm/test/Transforms/GVN/PRE/pre-no-cost-phi.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | FileCheck %s
+; RUN: opt < %s -gvn -enable-simple-gvnhoist=false -S | FileCheck %s
 ; This testcase tests insertion of no-cost phis.  That is,
 ; when the value is already available in every predecessor,
 ; and we just need to insert a phi node to merge the available values.
Index: llvm/test/Transforms/GVN/PRE/pre-poison-add.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/pre-poison-add.ll
+++ llvm/test/Transforms/GVN/PRE/pre-poison-add.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -gvn -enable-pre -S | FileCheck %s
+; RUN: opt < %s -gvn -enable-pre -enable-simple-gvnhoist=false -S | FileCheck %s
+; RUN: opt < %s --passes="gvn
" -S | FileCheck %s --check-prefix=CHECK-HOIST
 
 @H = common global i32 0
 @G = common global i32 0
@@ -29,9 +30,12 @@
 
 define i32 @test2(i1 %cond, i32 %v) nounwind {
 ; CHECK-LABEL: @test2
+; CHECK-HOIST-LABEL: @test2
 entry:
     br i1 %cond, label %bb, label %bb1
 
+; CHECK-HOIST: entry:
+; CHECK-HOIST: %add.1 = add i32 %v, 42
 bb:
     %add.1 = add i32 %v, 42
 ; CHECK: %add.1 = add i32 %v, 42
Index: llvm/test/Transforms/GVN/PRE/pre-single-pred.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/pre-single-pred.ll
+++ llvm/test/Transforms/GVN/PRE/pre-single-pred.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -gvn -enable-load-pre -S | FileCheck %s
-; RUN: opt < %s -passes="gvn" -enable-load-pre=false -S | FileCheck %s
+; RUN: opt < %s -gvn -enable-load-pre -enable-simple-gvnhoist=false -S | FileCheck %s
+; RUN: opt < %s -passes="gvn" -enable-load-pre=false -enable-simple-gvnhoist=false -S | FileCheck %s
 ; This testcase assumed we'll PRE the load into %for.cond, but we don't actually
 ; verify that doing so is safe.  If there didn't _happen_ to be a load in
 ; %for.end, we would actually be lengthening the execution on some paths, and
Index: llvm/test/Transforms/GVN/PRE/volatile.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/volatile.ll
+++ llvm/test/Transforms/GVN/PRE/volatile.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; Tests that check our handling of volatile instructions encountered
 ; when scanning for dependencies
-; RUN: opt -basic-aa -gvn -S < %s | FileCheck %s
+; RUN: opt -basic-aa -gvn --enable-simple-gvnhoist=false -S < %s | FileCheck %s
 
 ; Check that we can bypass a volatile load when searching
 ; for dependencies of a non-volatile load
Index: llvm/test/Transforms/GVN/condprop.ll
===================================================================
--- llvm/test/Transforms/GVN/condprop.ll
+++ llvm/test/Transforms/GVN/condprop.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -basic-aa -gvn -S | FileCheck %s
+; RUN: opt < %s -basic-aa -gvn --enable-simple-gvnhoist=false -S | FileCheck %s
 
 @a = external global i32		;  [#uses=7]
 
Index: llvm/test/Transforms/GVN/freeze.ll
===================================================================
--- llvm/test/Transforms/GVN/freeze.ll
+++ llvm/test/Transforms/GVN/freeze.ll
@@ -1,11 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -gvn -S | FileCheck %s
-; RUN: opt < %s -passes=gvn -S | FileCheck %s
+; RUN: opt < %s -gvn -enable-simple-gvnhoist=false -S | FileCheck %s
+; RUN: opt < %s -passes=gvn -enable-simple-gvnhoist=false -S | FileCheck %s
+; RUN: opt < %s -passes=gvn -S | FileCheck %s --check-prefix=CHECK-HOIST
 
 define i1 @f(i1 %a) {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:    [[B:%.*]] = freeze i1 [[A:%.*]]
 ; CHECK-NEXT:    ret i1 [[B]]
+;
+; CHECK-HOIST-LABEL: @f(
+; CHECK-HOIST-NEXT:    [[B:%.*]] = freeze i1 [[A:%.*]]
+; CHECK-HOIST-NEXT:    ret i1 [[B]]
 ;
   %b = freeze i1 %a
   %c = freeze i1 %a
@@ -20,6 +25,13 @@
 ; CHECK-NEXT:    call void @use1(i1 [[B]])
 ; CHECK-NEXT:    call void @use1(i1 [[B]])
 ; CHECK-NEXT:    ret void
+;
+; CHECK-HOIST-LABEL: @f_multipleuses(
+; CHECK-HOIST-NEXT:    [[B:%.*]] = freeze i1 [[A:%.*]]
+; CHECK-HOIST-NEXT:    call void @use1(i1 [[B]])
+; CHECK-HOIST-NEXT:    call void @use1(i1 [[B]])
+; CHECK-HOIST-NEXT:    call void @use1(i1 [[B]])
+; CHECK-HOIST-NEXT:    ret void
 ;
   %b = freeze i1 %a
   %c = freeze i1 %a
@@ -40,6 +52,16 @@
 ; CHECK-NEXT:    [[Y:%.*]] = freeze i1 [[A]]
 ; CHECK-NEXT:    call void @use2(i1 [[Y]])
 ; CHECK-NEXT:    ret void
+;
+; CHECK-HOIST-LABEL: @f_dom(
+; CHECK-HOIST-NEXT:    [[X:%.*]] = freeze i1 [[A:%.*]]
+; CHECK-HOIST-NEXT:    br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK-HOIST:       BB1:
+; CHECK-HOIST-NEXT:    call void @use1(i1 [[X]])
+; CHECK-HOIST-NEXT:    ret void
+; CHECK-HOIST:       BB2:
+; CHECK-HOIST-NEXT:    call void @use2(i1 [[X]])
+; CHECK-HOIST-NEXT:    ret void
 ;
   br i1 %cond, label %BB1, label %BB2
 BB1:
Index: llvm/test/Transforms/GVN/gc_relocate.ll
===================================================================
--- llvm/test/Transforms/GVN/gc_relocate.ll
+++ llvm/test/Transforms/GVN/gc_relocate.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -basic-aa -gvn -S < %s | FileCheck %s
+; RUN: opt -basic-aa -gvn --enable-simple-gvnhoist=false -S < %s | FileCheck %s
 
 declare void @func()
 declare i32 @"personality_function"()
Index: llvm/test/Transforms/GVN/simple-gvnhoist.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/GVN/simple-gvnhoist.ll
@@ -0,0 +1,337 @@
+; RUN: opt -S --passes=gvn %s | FileCheck %s
+
+; Basic hoisting
+define i32 @basic_hoisting(i32 %a, i32 %b, i32 %c) {
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %mul = mul nsw i32 %b, %c
+  %add = add nsw i32 %a, %mul
+  br label %return
+
+if.else:
+  %sub = sub nsw i32 0, %a
+  %mul1 = mul nsw i32 %b, %c
+  %add2 = add nsw i32 %sub, %mul1
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add2, %if.else ]
+  ret i32 %retval.0
+}
+; CHECK-LABEL: @basic_hoisting
+; CHECK: entry:
+; CHECK:   %mul = mul nsw i32 %b, %c
+; CHECK: if.then:
+; CHECK:   %add = add nsw i32 %a, %mul
+; CHECK: if.else:
+; CHECK:   %add2 = add nsw i32 %sub, %mul
+
+; No reorder across volatile instructions (a bit too conservative).
+define i32 @no_reorder_accross_volatile(i32 %a, i32 %b, i32 %c, i32* %v) {
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %0 = load volatile i32, i32* %v, align 4
+  %mul = mul nsw i32 %b, %c
+  %add = add nsw i32 %a, %mul
+  br label %return
+
+if.else:
+  %sub = sub nsw i32 0, %a
+  %mul1 = mul nsw i32 %b, %c
+  %add2 = add nsw i32 %sub, %mul1
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add2, %if.else ]
+  ret i32 %retval.0
+}
+; CHECK-LABEL: @no_reorder_accross_volatile
+; CHECK: entry:
+; CHECK-NOT: load
+; CHECK-NOT: add
+; CHECK-NOT: sub
+; CHECK-NOT: mul
+; CHECK: if.then:
+; CHECK: if.else:
+
+; Volatile operations themselves can be hoisted.
+define i32 @can_hoist_volatile(i32 %a, i32 %b, i32* %v) {
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %0 = load volatile i32, i32* %v, align 4
+  %mul = mul nsw i32 %b, %0
+  %add = add nsw i32 %a, %mul
+  br label %return
+
+if.else:
+  %1 = load volatile i32, i32* %v, align 4
+  %sub = sub nsw i32 0, %a
+  %mul1 = mul nsw i32 %b, %1
+  %add2 = add nsw i32 %sub, %mul1
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add2, %if.else ]
+  ret i32 %retval.0
+}
+; CHECK-LABEL: @can_hoist_volatile
+; CHECK: entry:
+; CHECK:   %0 = load volatile i32, i32* %v, align 4
+; CHECK:   %mul = mul nsw i32 %b, %0
+; CHECK: if.then:
+; CHECK:   %add = add nsw i32 %a, %mul
+; CHECK: if.else:
+; CHECK:   %add2 = add nsw i32 %sub, %mul
+
+; Don't reorder across implicit control flow.
+define i32 @no_reorder_across_icf(i32 %a, i32 %b, i32 %c) {
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %0 = call i32 @g()
+  %mul = mul nsw i32 %b, %c
+  %add = add nsw i32 %a, %mul
+  br label %return
+
+if.else:
+  %sub = sub nsw i32 0, %a
+  %mul1 = mul nsw i32 %b, %c
+  %add2 = add nsw i32 %sub, %mul1
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add2, %if.else ]
+  ret i32 %retval.0
+}
+
+declare i32 @g() readnone
+; CHECK-LABEL: @no_reorder_across_icf
+; CHECK-NOT: call
+; CHECK-NOT: add
+; CHECK-NOT: sub
+; CHECK-NOT: mul
+; CHECK: if.then:
+; CHECK: if.else:
+
+; Can hoist above some calls
+define i32 @can_reorder_across_calls(i32 %a, i32 %b, i32 %c) {
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %0 = call i32 @h()
+  %mul = mul nsw i32 %b, %c
+  %add = add nsw i32 %a, %mul
+  br label %return
+
+if.else:
+  %sub = sub nsw i32 0, %a
+  %mul1 = mul nsw i32 %b, %c
+  %add2 = add nsw i32 %sub, %mul1
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add2, %if.else ]
+  ret i32 %retval.0
+}
+
+declare i32 @h() readnone nounwind willreturn
+; CHECK-LABEL: @can_reorder_across_calls
+; CHECK: entry:
+; CHECK:   %mul = mul nsw i32 %b, %c
+; CHECK: if.then:
+; CHECK:   %0 = call i32 @h()
+; CHECK:   %add = add nsw i32 %a, %mul
+; CHECK: if.else:
+; CHECK:   %add2 = add nsw i32 %sub, %mul
+
+; Some calls themselves can be hoisted and merged
+define i32 @can_hoist_calls(i32 %a, i32 %b) {
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %0 = call i32 @h()
+  %mul = mul nsw i32 %b, %0
+  %add = add nsw i32 %a, %mul
+  br label %return
+
+if.else:
+  %sub = sub nsw i32 0, %a
+  %1 = call i32 @h()
+  %mul1 = mul nsw i32 %b, %1
+  %add2 = add nsw i32 %sub, %mul1
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add2, %if.else ]
+  ret i32 %retval.0
+}
+; CHECK: @can_hoist_calls
+; CHECK: entry:
+; CHECK:   %0 = call i32 @h()
+; CHECK:   %mul = mul nsw i32 %b, %0
+; CHECK: if.then:
+; CHECK:   %add = add nsw i32 %a, %mul
+; CHECK: if.else:
+; CHECK:   %add2 = add nsw i32 %sub, %mul
+
+; Merge instruction attributes
+define i32 @merge_instr_attr(i32 %a, i32 %b, i32* %c) {
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %0 = load i32, i32* %c, align 4
+  %mul = mul nsw i32 %b, %0
+  %add = add nsw i32 %a, %mul
+  br label %return
+
+if.else:
+  %sub = sub nsw i32 0, %a
+  %1 = load i32, i32* %c, align 8
+  %mul1 = mul i32 %b, %1
+  %add2 = add nsw i32 %sub, %mul1
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add2, %if.else ]
+  ret i32 %retval.0
+}
+
+; CHECK-LABEL: @merge_instr_attr
+; CHECK: entry:
+; Merged alignment is the minimum of the incoming ones.
+; CHECK:   %0 = load i32, i32* %c, align 4
+; The mul loses the `nsw` flag.
+; CHECK:   %mul = mul i32 %b, %0
+; CHECK:   br i1 %cmp, label %if.then, label %if.else
+; CHECK: if.then:
+; CHECK:   %add = add nsw i32 %a, %mul
+; CHECK: if.else:
+; CHECK:   %add2 = add nsw i32 %sub, %mul
+
+; Do not merge loads of different sizes/types.
+define i32 @load_size_mismatch(i32 %a, i32 %b, i32* %c) {
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %0 = load i32, i32* %c, align 4
+  %mul = mul nsw i32 %b, %0
+  %add = add nsw i32 %a, %mul
+  br label %return
+
+if.else:
+  %sub = sub nsw i32 0, %a
+  %1 = bitcast i32* %c to i16*
+  %2 = load i16, i16* %1, align 4
+  %3 = zext i16 %2 to i32
+  %mul1 = mul nsw i32 %b, %3
+  %add2 = add nsw i32 %sub, %mul1
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add2, %if.else ]
+  ret i32 %retval.0
+}
+; CHECK-LABEL: @load_size_mismatch
+; CHECK: entry:
+; CHECK-NOT: load
+; CHECK: if.then:
+; CHECK: load
+; CHECK: if.else:
+; CHECK: load
+
+; Don't reorder loads/stores if they alias.
+define i32 @aliasing_load_store(i32 %a, i32 %b, i32* %c, i32* %d) {
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %0 = load i32, i32* %c, align 4
+  %mul = mul nsw i32 %b, %0
+  store i32 0, i32* %d, align 4
+  %add = add nsw i32 %a, %mul
+  br label %return
+
+if.else:
+  %sub = sub nsw i32 0, %a
+  %1 = bitcast i32* %c to i16*
+  %2 = load i16, i16* %1, align 4
+  %3 = zext i16 %2 to i32
+  %mul1 = mul nsw i32 %b, %3
+  store i32 0, i32* %d, align 4
+  %add2 = add nsw i32 %sub, %mul1
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add2, %if.else ]
+  ret i32 %retval.0
+}
+; CHECK-LABEL: @aliasing_load_store
+; CHECK: entry:
+; CHECK-NOT: load
+; CHECK-NOT: store
+; CHECK: if.then:
+; CHECK: load
+; CHECK: store
+; CHECK: if.else:
+; CHECK: load
+; CHECK: store
+
+; Can reorder non-aliasing loads/stores.
+define i32 @non_aliasing_load_store(i32 %a, i32 %b, i32* %ca, i32* %cb, i32* %d) {
+entry:
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %0 = load i32, i32* %ca, align 4, !tbaa !3
+  %mul = mul nsw i32 %b, %0
+  store i32 0, i32* %d, align 4, !tbaa !4
+  %add = add nsw i32 %a, %mul
+  br label %return
+
+if.else:
+  %sub = sub nsw i32 0, %a
+  %1 = load i32, i32* %cb, align 4, !tbaa !3
+  %mul1 = mul nsw i32 %b, %1
+  store i32 0, i32* %d, align 4, !tbaa !4
+  %add2 = add nsw i32 %sub, %mul1
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add2, %if.else ]
+  ret i32 %retval.0
+}
+
+!0 = !{!"Simple TBAA"}
+!1 = !{!"int", !0, i64 0}
+!2 = !{!"S", !1, i64 0, !1, i64 4}
+!3 = !{!2, !1, i64 0}
+!4 = !{!2, !1, i64 4}
+
+; CHECK-LABEL: @non_aliasing_load_store
+; CHECK: entry:
+; CHECK:  store i32 0, i32* %d, align 4, !tbaa
+; CHECK: if.then:
+; CHECK: load
+; CHECK: if.else:
+; CHECK: load
Index: llvm/test/Transforms/GVNHoist/hoist-inline.ll
===================================================================
--- llvm/test/Transforms/GVNHoist/hoist-inline.ll
+++ llvm/test/Transforms/GVNHoist/hoist-inline.ll
@@ -4,7 +4,6 @@
 ; CHECK-LABEL: define i32 @fun(
 ; CHECK-LABEL: entry:
 ; CHECK: load i32, i32* @A
-; CHECK: if.then:
 
 @A = external global i32
 @B = external global i32
Index: llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll
===================================================================
--- llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll
+++ llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -passes=instcombine,verify -S < %s | FileCheck %s --check-prefixes=ALL,INSTCOMBINE
 
 ; Make sure GVN won't undo the transformation:
-; RUN: opt -passes=instcombine,gvn -S < %s | FileCheck %s --check-prefixes=ALL,INSTCOMBINEGVN
+; RUN: opt -passes=instcombine,gvn --enable-simple-gvnhoist=false -S < %s | FileCheck %s --check-prefixes=ALL,INSTCOMBINEGVN
 
 declare i8* @get_ptr.i8()
 declare i32* @get_ptr.i32()
Index: llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
===================================================================
--- llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
+++ llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
@@ -2,16 +2,26 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-; RUN: opt -O2 -S < %s | FileCheck %s
+; RUN: opt -O2 -S --enable-simple-gvnhoist=false < %s | FileCheck %s
+; RUN: opt -O2 -S                                < %s | FileCheck %s --check-prefix=CHECK-HOIST
 
 ; CHECK-LABEL: main
 ; CHECK: if.end
 ; CHECK: store
 ; CHECK: memset
 ; CHECK: if.then
+; CHECK: load
 ; CHECK: store
 ; CHECK: memset
 
+; CHECK-HOIST-LABEL: main
+; CHECK-HOIST: entry:
+; CHECK-HOIST: store
+; CHECK-HOIST: if.end
+; CHECK-HOIST: memset
+; CHECK-HOIST: if.then
+; CHECK-HOIST: load
+; CHECK-HOIST: memset
 @d = common global i32 0, align 4
 @b = common global i32 0, align 4
 @f = common global [1 x [3 x i8]] zeroinitializer, align 1