Index: llvm/include/llvm/Transforms/Scalar/GVN.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/GVN.h +++ llvm/include/llvm/Transforms/Scalar/GVN.h @@ -240,6 +240,13 @@ DenseMap LeaderTable; BumpPtrAllocator TableAllocator; + /// A mapping from value numbers to a pair of instructions. This map + /// stores pairs of instructions with the same value number, from two blocks + /// having a single common predecessor, for the duration of a single top level + /// iteration in `performHoist`. + using HoistMap = DenseMap>; + HoistMap HoistPairs; + // Block-local map of equivalent values to their leader, does not // propagate to any successors. Entries added mid-block are applied // to the remaining instructions in the block. @@ -354,6 +361,13 @@ bool performScalarPRE(Instruction *I); bool performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, BasicBlock *Curr, unsigned int ValNo); + bool performHoist(Function &F); + void collectHoistCandidates(BasicBlock *ThenBB); + void matchHoistCandidates(BasicBlock *ElseBB); + void replaceInstruction(Instruction *, Instruction *); + std::pair hoistPair(BasicBlock *DestBB, BasicBlock *ThenBB, + BasicBlock *ElseBB, Instruction *ThenI, + Instruction *ElseI); Value *findLeader(const BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -126,6 +126,9 @@ "into) when deducing if a value is fully available or not in GVN " "(default = 600)")); +static cl::opt GVNEnableSimpleGVNHoist("enable-simple-gvnhoist", + cl::init(true)); + struct llvm::GVN::Expression { uint32_t opcode; bool commutative = false; @@ -2503,6 +2506,11 @@ } } + if (GVNEnableSimpleGVNHoist) { + LeaderTable.clear(); + Changed |= performHoist(F); + } + // FIXME: Should perform GVN again after PRE does something. PRE can move // computations into blocks where they become fully redundant. Note that // we can't do this until PRE's critical edge splitting updates memdep. @@ -3049,6 +3057,167 @@ } } +// Won't reorder above these instructions. +static bool isHoistBarrier(const Instruction &I) { + return !isGuaranteedToTransferExecutionToSuccessor(&I); +} + +void GVN::collectHoistCandidates(BasicBlock *BB) { + bool HasHoistBarrier = false; + for (Instruction &I : *BB) { + // FIXME(chill): Limit how deep we go into the block. + if (I.isTerminator()) + break; + + if (!HasHoistBarrier || isSafeToSpeculativelyExecute(&I)) + HoistPairs[VN.lookupOrAdd(&I)].first = &I; + + if ((HasHoistBarrier |= isHoistBarrier(I))) + LLVM_DEBUG(dbgs() << "Simple GVNHoist: reached hoist barrier" << I + << '\n';); + } +} + +void GVN::matchHoistCandidates(BasicBlock *BB) { + bool HasHoistBarrier = false; + for (Instruction &I : *BB) { + // FIXME(chill): Limit how deep we go into the block. + if (I.isTerminator()) + break; + + if (!HasHoistBarrier || isSafeToSpeculativelyExecute(&I)) { + uint32_t N = VN.lookupOrAdd(&I); + auto It = HoistPairs.find(N); + if (It != HoistPairs.end()) + It->second.second = &I; + } + + if ((HasHoistBarrier |= isHoistBarrier(I))) + LLVM_DEBUG(dbgs() << "Simple GVNHoist: reached hoist barrier" << I + << '\n';); + } +} + +void GVN::replaceInstruction(Instruction *I, Instruction *Repl) { + LLVM_DEBUG(dbgs() << "Simple GVNHoist: replacing" << *I << " by" << *Repl + << '\n';); + patchReplacementInstruction(I, Repl); + ICF->removeUsersOf(I); + I->replaceAllUsesWith(Repl); + salvageKnowledge(I, AC); + salvageDebugInfo(*I); + if (MD) + MD->removeInstruction(I); + if (MSSAU) + MSSAU->removeMemoryAccess(I); + VN.erase(I); + ICF->removeInstruction(I); + LLVM_DEBUG(verifyRemoved(I)); + I->eraseFromParent(); +} + +// Only hoist instructions from the "then" block. +// Each hoisted instructions must be paired with an instruction from the "else" +// block. +std::pair GVN::hoistPair(BasicBlock *DestBB, BasicBlock *ThenBB, + BasicBlock *ElseBB, Instruction *ThenI, + Instruction *ElseI) { + // If the instruction is moved out of the "then" block there's nothing to do. + if (ThenI->getParent() != ThenBB) + return {false, false}; + + // Instruction must have already been selected for hoisting and matched with + // another instruction. + auto It = HoistPairs.find(VN.lookupOrAdd(ThenI)); + if (It == HoistPairs.end()) + return {false, true}; + + assert((ElseI == nullptr || ElseI == It->second.second) && + "Attempt to hoist two not paired instructions."); + + if (LLVM_LIKELY(ElseI == nullptr) && (ElseI = It->second.second) == nullptr) + return {false, true}; + It->second.second = nullptr; + + assert(ElseI->getParent() == ElseBB && "Instruction already removed"); + + bool Change = false; + + // Hoist operands. Begin by hoisting all of the operands of the "then" + // instruction, then check all of the operands of the "else" instruction + // dominate its block. + for (unsigned I = 0, N = ThenI->getNumOperands(); I < N; ++I) { + auto *Op = dyn_cast(ThenI->getOperand(I)); + if (Op == nullptr) + continue; + bool OpChange, CantHoist; + std::tie(OpChange, CantHoist) = + hoistPair(DestBB, ThenBB, ElseBB, Op, nullptr); + Change |= OpChange; + if (CantHoist) + return {Change, true}; + } + + for (unsigned I = 0, N = ElseI->getNumOperands(); I < N; ++I) { + auto *Op = dyn_cast(ElseI->getOperand(I)); + if (Op == nullptr) + continue; + if (Op->getParent() == ElseBB) + return {Change, true}; + } + + // Hoist one of the instructions and replace all uses of the other with it. + ICF->removeInstruction(ThenI); + ICF->insertInstructionTo(ThenI, DestBB); + ThenI->moveBefore(DestBB->getTerminator()); + replaceInstruction(ElseI, ThenI); + + return {true, false}; +} + +// Perform trivial hoisting of values from two blocks to their common +// predecessor. +bool GVN::performHoist(Function &F) { + LLVM_DEBUG(dbgs() << "Simple GVNHoist: running on function " << F.getName() + << '\n';); + bool Change = false; + ReversePostOrderTraversal RPOT(&F); + for (BasicBlock *BB : RPOT) { + // Check we have a block of the desired shape. + auto *BI = dyn_cast(BB->getTerminator()); + if (!BI || !BI->isConditional()) + continue; + + BasicBlock *Then = BI->getSuccessor(0); + BasicBlock *Else = BI->getSuccessor(1); + + if (!Then->getSinglePredecessor() || !Else->getSinglePredecessor()) + continue; + + LLVM_DEBUG(dbgs() << "Simple GVNHoist: looking at block " << BB->getName() + << '\n'); + + // Collect all hoistable instructions from the smaller block, then match + // them by value number with the instructions from the other block. + if (Then->size() > Else->size()) + std::swap(Then, Else); + + HoistPairs.clear(); + collectHoistCandidates(Then); + matchHoistCandidates(Else); + + // Hoist matched pairs. + for (const auto &P : HoistPairs) { + bool LocalChange, _; + std::tie(LocalChange, _) = + hoistPair(BB, Then, Else, P.second.first, P.second.second); + Change |= LocalChange; + } + } + + return Change; +} + class llvm::gvn::GVNLegacyPass : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid Index: llvm/test/Transforms/GVN/PRE/local-pre.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/local-pre.ll +++ llvm/test/Transforms/GVN/PRE/local-pre.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -gvn -enable-pre -S | FileCheck %s -; RUN: opt < %s -passes="gvn
" -enable-pre=false -S | FileCheck %s
+; RUN: opt < %s -gvn -enable-pre -enable-simple-gvnhoist=false -S | FileCheck %s
+; RUN: opt < %s -passes="gvn
" -enable-pre=false -enable-simple-gvnhoist=false -S | FileCheck %s
 
 declare void @may_exit() nounwind
 
Index: llvm/test/Transforms/GVN/PRE/phi-translate.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/phi-translate.ll
+++ llvm/test/Transforms/GVN/PRE/phi-translate.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basic-aa -gvn -S < %s | FileCheck %s
+; RUN: opt -basic-aa -gvn -enable-simple-gvnhoist=false -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64"
 
Index: llvm/test/Transforms/GVN/PRE/pre-basic-add.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/pre-basic-add.ll
+++ llvm/test/Transforms/GVN/PRE/pre-basic-add.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -gvn -enable-pre -S | FileCheck %s
-; RUN: opt < %s -passes="gvn
" -enable-pre=false -S | FileCheck %s
+; RUN: opt < %s -gvn -enable-pre  -enable-simple-gvnhoist=false -S | FileCheck %s
+; RUN: opt < %s -passes="gvn
" -enable-pre=false -enable-simple-gvnhoist=false -S | FileCheck %s
 
 @H = common global i32 0		;  [#uses=2]
 @G = common global i32 0		;  [#uses=1]
Index: llvm/test/Transforms/GVN/PRE/pre-load.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/pre-load.ll
+++ llvm/test/Transforms/GVN/PRE/pre-load.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -basic-aa -gvn -enable-load-pre -S | FileCheck %s
-; RUN: opt < %s -aa-pipeline=basic-aa -passes="gvn" -enable-load-pre=false -S | FileCheck %s
+; RUN: opt < %s -basic-aa -gvn -enable-load-pre -enable-simple-gvnhoist=false -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes="gvn" -enable-load-pre=false -enable-simple-gvnhoist=false -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 define i32 @test1(i32* %p, i1 %C) {
Index: llvm/test/Transforms/GVN/PRE/pre-no-cost-phi.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/pre-no-cost-phi.ll
+++ llvm/test/Transforms/GVN/PRE/pre-no-cost-phi.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | FileCheck %s
+; RUN: opt < %s -gvn -enable-simple-gvnhoist=false -S | FileCheck %s
 ; This testcase tests insertion of no-cost phis.  That is,
 ; when the value is already available in every predecessor,
 ; and we just need to insert a phi node to merge the available values.
Index: llvm/test/Transforms/GVN/PRE/pre-poison-add.ll
===================================================================
--- llvm/test/Transforms/GVN/PRE/pre-poison-add.ll
+++ llvm/test/Transforms/GVN/PRE/pre-poison-add.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -gvn -enable-pre -S | FileCheck %s
+; RUN: opt < %s -gvn -enable-pre -enable-simple-gvnhoist=false -S | FileCheck %s
+; RUN: opt < %s --passes="gvn
" -S | FileCheck %s --check-prefix=CHECK-HOIST
 
 @H = common global i32 0
 @G = common global i32 0
@@ -29,9 +30,12 @@
 
 define i32 @test2(i1 %cond, i32 %v) nounwind {
 ; CHECK-LABEL: @test2
+; CHECK-HOIST-LABEL: @test2
 entry:
     br i1 %cond, label %bb, label %bb1
 
+; CHECK-HOIST: entry:
+; CHECK-HOIST: %.pre = add i32 %v, 42
 bb:
     %add.1 = add i32 %v, 42
 ; CHECK: %add.1 = add i32 %v, 42
Index: llvm/test/Transforms/GVN/freeze.ll
===================================================================
--- llvm/test/Transforms/GVN/freeze.ll
+++ llvm/test/Transforms/GVN/freeze.ll
@@ -1,11 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -gvn -S | FileCheck %s
-; RUN: opt < %s -passes=gvn -S | FileCheck %s
+; RUN: opt < %s -gvn -enable-simple-gvnhoist=false -S | FileCheck %s
+; RUN: opt < %s -passes=gvn -enable-simple-gvnhoist=false -S | FileCheck %s
+; RUN: opt < %s -passes=gvn -S | FileCheck %s --check-prefix=CHECK-HOIST
 
 define i1 @f(i1 %a) {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:    [[B:%.*]] = freeze i1 [[A:%.*]]
 ; CHECK-NEXT:    ret i1 [[B]]
+;
+; CHECK-HOIST-LABEL: @f(
+; CHECK-HOIST-NEXT:    [[B:%.*]] = freeze i1 [[A:%.*]]
+; CHECK-HOIST-NEXT:    ret i1 [[B]]
 ;
   %b = freeze i1 %a
   %c = freeze i1 %a
@@ -20,6 +25,13 @@
 ; CHECK-NEXT:    call void @use1(i1 [[B]])
 ; CHECK-NEXT:    call void @use1(i1 [[B]])
 ; CHECK-NEXT:    ret void
+;
+; CHECK-HOIST-LABEL: @f_multipleuses(
+; CHECK-HOIST-NEXT:    [[B:%.*]] = freeze i1 [[A:%.*]]
+; CHECK-HOIST-NEXT:    call void @use1(i1 [[B]])
+; CHECK-HOIST-NEXT:    call void @use1(i1 [[B]])
+; CHECK-HOIST-NEXT:    call void @use1(i1 [[B]])
+; CHECK-HOIST-NEXT:    ret void
 ;
   %b = freeze i1 %a
   %c = freeze i1 %a
@@ -40,6 +52,16 @@
 ; CHECK-NEXT:    [[Y:%.*]] = freeze i1 [[A]]
 ; CHECK-NEXT:    call void @use2(i1 [[Y]])
 ; CHECK-NEXT:    ret void
+;
+; CHECK-HOIST-LABEL: @f_dom(
+; CHECK-HOIST-NEXT:    [[X:%.*]] = freeze i1 [[A:%.*]]
+; CHECK-HOIST-NEXT:    br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK-HOIST:       BB1:
+; CHECK-HOIST-NEXT:    call void @use1(i1 [[X]])
+; CHECK-HOIST-NEXT:    ret void
+; CHECK-HOIST:       BB2:
+; CHECK-HOIST-NEXT:    call void @use2(i1 [[X]])
+; CHECK-HOIST-NEXT:    ret void
 ;
   br i1 %cond, label %BB1, label %BB2
 BB1:
Index: llvm/test/Transforms/GVN/gc_relocate.ll
===================================================================
--- llvm/test/Transforms/GVN/gc_relocate.ll
+++ llvm/test/Transforms/GVN/gc_relocate.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -basic-aa -gvn -S < %s | FileCheck %s
+; RUN: opt -basic-aa -gvn --enable-simple-gvnhoist=false -S < %s | FileCheck %s
 
 declare void @func()
 declare i32 @"personality_function"()
Index: llvm/test/Transforms/GVN/simple-gvnhoist-scalars.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/GVN/simple-gvnhoist-scalars.ll
@@ -0,0 +1,262 @@
+; RUN: opt --passes=gvn -S %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux"
+
+; everything hoisted
+define dso_local i32 @everything_hoisted(i1 %cc, i32 %a, i32 %b, i32 %c) {
+entry:
+  br i1 %cc, label %if.then, label %if.else
+
+if.then:
+  %0 = add nsw i32 %a, %b
+  %1 = sdiv i32 %c, %0
+  br label %if.end
+
+if.else:
+  %2 = add nsw i32 %a, %b
+  %3 = sdiv i32 %c, %2
+  br label %if.end
+
+if.end:
+  %r = phi i32 [%1, %if.then], [%3, %if.else]
+  ret i32 %r
+}
+; CHECK-LABEL: @everything_hoisted
+; CHECK:     entry:
+; CHECK:       %0 = add nsw i32 %a, %b
+; CHECK:       %1 = sdiv i32 %c, %0
+; CHECK:     if.then:
+; CHECK-NEXT:   br label %if.end
+; CHECK:     if.else:
+; CHECK-NEXT:   br label %if.end
+; CHECK:     if.end:
+; CHECK:       %r = phi i32 [ %1, %if.then ], [ %1, %if.else ]
+
+; speculation barrier
+define dso_local i32 @spec_barrier0(i1 %cc, i32 %a, i32 %b, i32 %c) {
+entry:
+  br i1 %cc, label %if.then, label %if.else
+
+if.then:
+  call void @h()
+  %0 = add nsw i32 %a, %b
+  %1 = sdiv i32 %c, %0
+  br label %if.end
+
+if.else:
+  call void @h()
+  %2 = add nsw i32 %a, %b
+  %3 = sdiv i32 %c, %2
+  br label %if.end
+
+if.end:
+  %r = phi i32 [%1, %if.then], [%3, %if.else]
+  ret i32 %r
+}
+; CHECK-LABEL: @spec_barrier0
+; CHECK: %0 = add nsw i32 %a, %b
+; CHECK-NOT: sdiv
+; CHECK: if.then:
+; CHECK:   %1 = sdiv i32 %c, %0
+; CHECK: if.else:
+; CHECK:   %2 = sdiv i32 %c, %0
+; CHECK: if.end:
+; CHECK:   %r = phi i32 [ %1, %if.then ], [ %2, %if.else ]
+
+; speculation barrier
+define dso_local i32 @spec_barrier1(i1 %cc, i32 %a, i32 %b, i32 %c, i32* %p) {
+entry:
+  br i1 %cc, label %if.then, label %if.else
+
+if.then:
+  store volatile i32 0, i32* %p
+  %0 = add nsw i32 %a, %b
+  %1 = sdiv i32 %c, %0
+  br label %if.end
+
+if.else:
+  store volatile i32 0, i32* %p
+  %2 = add nsw i32 %a, %b
+  %3 = sdiv i32 %c, %2
+  br label %if.end
+
+if.end:
+  %r = phi i32 [%1, %if.then], [%3, %if.else]
+  ret i32 %r
+}
+; CHECK-LABEL: @spec_barrier1
+; CHECK:      entry:
+; CHECK:        %0 = add nsw i32 %a, %b
+; CHECK-NOT:    sdiv
+; CHECK:      if.then:
+; CHECK:        %1 = sdiv i32 %c, %0
+; CHECK:      if.else:
+; CHECK:        %2 = sdiv i32 %c, %0
+; CHECK:      if.end:
+; CHECK-NEXT:   %r = phi i32 [ %1, %if.then ], [ %2, %if.else ]
+
+; speculation barrier
+define dso_local i32 @spec_barrier2(i1 %cc, i32 %a, i32 %b, i32 %c) {
+entry:
+  br i1 %cc, label %if.then, label %if.else
+
+if.then:
+  call void @h()
+  %0 = sdiv i32 %a, %b
+  %1 = add nsw i32 %c, %0
+  br label %if.end
+
+if.else:
+  call void @h()
+  %2 = sdiv i32 %a, %b
+  %3 = add nsw i32 %c, %2
+  br label %if.end
+
+if.end:
+  %r = phi i32 [%1, %if.then], [%3, %if.else]
+  ret i32 %r
+}
+; CHECK-LABEL: @spec_barrier2
+; CHECK-NOT: add
+; CHECK-NOT: sdiv
+; CHECK:     if.then:
+
+; no speculation barrier
+define dso_local i32 @no_spec_barrier0(i1 %cc, i32 %a, i32 %b, i32 %c) {
+entry:
+  br i1 %cc, label %if.then, label %if.else
+
+if.then:
+  call void @will_return()
+  %0 = sdiv i32 %a, %b
+  %1 = add nsw i32 %c, %0
+  br label %if.end
+
+if.else:
+  call void @will_return()
+  %2 = sdiv i32 %a, %b
+  %3 = add nsw i32 %c, %2
+  br label %if.end
+
+if.end:
+  %r = phi i32 [%1, %if.then], [%3, %if.else]
+  ret i32 %r
+}
+; CHECK-LABEL: @no_spec_barrier0
+; CHECK:     entry:
+; CHECK:       %0 = sdiv i32 %a, %b
+; CHECK:       %1 = add nsw i32 %c, %0
+; CHECK:     if.then:
+; CHECK-NEXT:   call void @will_return()
+; CHECK-NEXT:   br label %if.end
+; CHECK:     if.else:
+; CHECK-NEXT:   call void @will_return()
+; CHECK-NEXT:   br label %if.end
+; CHECK:     if.end:
+; CHECK:       %r = phi i32 [ %1, %if.then ], [ %1, %if.else ]
+
+
+; no speculation barrier
+define dso_local i32 @no_spec_barrier1(i1 %cc, i32 %a, i32 %b, i32 %c, i32* %p) {
+entry:
+  br i1 %cc, label %if.then, label %if.else
+
+if.then:
+  %0 = load atomic volatile i32, i32* %p acquire, align 4
+  %1 = sdiv i32 %a, %b
+  %2 = add nsw i32 %c, %1
+  %3 = mul nsw i32 %0, %2
+  br label %if.end
+
+if.else:
+  %4 = load atomic volatile i32, i32* %p acquire, align 4
+  %5 = sdiv i32 %a, %b
+  %6 = add nsw i32 %c, %5
+  %7 = mul nsw i32 %4, %6
+  br label %if.end
+
+if.end:
+  %r = phi i32 [%3, %if.then], [%7, %if.else]
+  ret i32 %r
+}
+; CHECK-LABEL: @no_spec_barrier1
+; CHECK: entry:
+; CHECK:   %0 = sdiv i32 %a, %b
+; CHECK:   %1 = add nsw i32 %c, %0
+; CHECK: if.then:
+; CHECK:   %2 = load atomic volatile i32, i32* %p acquire, align 4
+; CHECK:   %3 = mul nsw i32 %2, %1
+; CHECK: if.else:
+; CHECK:   %4 = load atomic volatile i32, i32* %p acquire, align 4
+; CHECK:   %5 = mul nsw i32 %4, %1
+; CHECK: if.end:
+; CHECK:   %r = phi i32 [ %3, %if.then ], [ %5, %if.else ]
+
+; multiple uses of a hoisted instruction
+define dso_local i32 @multiple_use(i1 %cc, i32 %a, i32 %b, i32 %c) {
+entry:
+  br i1 %cc, label %if.then, label %if.else
+
+if.then:
+  %0 = add nsw i32 %a, %b
+  %1 = mul nsw i32 %0, %c
+  %2 = add nsw i32 %0, %1
+  br label %if.end
+
+if.else:
+  %3 = add nsw i32 %a, %b
+  %4 = mul nsw i32 %3, %c
+  %5 = add nsw i32 %3, %4
+  br label %if.end
+
+if.end:
+  %r = phi i32 [%2, %if.then], [%5, %if.else]
+  ret i32 %r
+}
+; CHECK-LABEL: @multiple_use
+; CHECK:      entry:
+; CHECK:        %0 = add nsw i32 %a, %b
+; CHECK:        %1 = mul nsw i32 %0, %c
+; CHECK:        %2 = add nsw i32 %0, %1
+; CHECK:      if.then:
+; CHECK-NEXT:   br label %if.end
+; CHECK:      if.else:
+; CHECK-NEXT:   br label %if.end
+; CHECK:      if.end:
+; CHECK-NEXT:   %r = phi i32 [ %2, %if.then ], [ %2, %if.else ]
+
+; Different operand order in commutative operations
+define dso_local i32 @commutative_ops(i1 %cc, i32 %a, i32 %b, i32 %c) {
+entry:
+  br i1 %cc, label %if.then, label %if.else
+
+if.then:
+  %0 = add nsw i32 %a, %b
+  %1 = add nsw i32 %0, %c
+  %2 = sdiv i32 %0, %1
+  br label %if.end
+
+if.else:
+  %3 = add nsw i32 %a, %b
+  %4 = add nsw i32 %c, %3
+  %5 = sdiv i32 %3, %4
+  br label %if.end
+
+if.end:
+  %r = phi i32 [%2, %if.then], [%5, %if.else]
+  ret i32 %r
+}
+; CHECK-LABEL: @commutative_ops
+; CHECK:      entry:
+; CHECK:        %0 = add nsw i32 %a, %b
+; CHECK:        %1 = add nsw i32 %0, %c
+; CHECK:        %2 = sdiv i32 %0, %1
+; CHECK:      if.then:
+; CHECK-NEXT:   br label %if.end
+; CHECK:      if.else:
+; CHECK-NEXT:   br label %if.end
+; CHECK:      if.end:
+; CHECK-NEXT:   %r = phi i32 [ %2, %if.then ], [ %2, %if.else ]
+
+declare void @h()
+declare void @will_return() nounwind willreturn
Index: llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll
===================================================================
--- llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll
+++ llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -passes=instcombine,verify -S < %s | FileCheck %s --check-prefixes=ALL,INSTCOMBINE
 
 ; Make sure GVN won't undo the transformation:
-; RUN: opt -passes=instcombine,gvn -S < %s | FileCheck %s --check-prefixes=ALL,INSTCOMBINEGVN
+; RUN: opt -passes=instcombine,gvn --enable-simple-gvnhoist=false -S < %s | FileCheck %s --check-prefixes=ALL,INSTCOMBINEGVN
 
 declare i8* @get_ptr.i8()
 declare i32* @get_ptr.i32()
Index: llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
===================================================================
--- llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
+++ llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
@@ -163,16 +163,16 @@
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4, !alias.scope !8
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], 
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4, !alias.scope !11
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul <4 x float> [[WIDE_LOAD14]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], align 4, !alias.scope !11
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul <4 x float> [[WIDE_LOAD14]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP5]] to <4 x float>*
 ; CHECK-NEXT:    [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4, !alias.scope !13, !noalias !15
-; CHECK-NEXT:    [[TMP10:%.*]] = fadd <4 x float> [[TMP7]], [[WIDE_LOAD15]]
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP7]], <4 x float> [[TMP10]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
+; CHECK-NEXT:    [[TMP10:%.*]] = fadd <4 x float> [[TMP8]], [[WIDE_LOAD15]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP8]], <4 x float> [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP5]] to <4 x float>*
 ; CHECK-NEXT:    store <4 x float> [[PREDPHI]], <4 x float>* [[TMP11]], align 4, !alias.scope !13, !noalias !15
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
@@ -182,18 +182,18 @@
 ; CHECK-NEXT:    [[C_GEP:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[IV1]]
 ; CHECK-NEXT:    [[C_LV:%.*]] = load i32, i32* [[C_GEP]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C_LV]], 20
+; CHECK-NEXT:    [[B_GEP_0:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IV1]]
 ; CHECK-NEXT:    [[A_GEP_0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV1]]
 ; CHECK-NEXT:    [[A_LV_0:%.*]] = load float, float* [[A_GEP_0]], align 4
 ; CHECK-NEXT:    [[MUL2_I81_I:%.*]] = fmul float [[A_LV_0]], [[X]]
-; CHECK-NEXT:    [[B_GEP_0:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IV1]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_LATCH]], label [[ELSE:%.*]]
 ; CHECK:       else:
 ; CHECK-NEXT:    [[B_LV:%.*]] = load float, float* [[B_GEP_0]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[MUL2_I81_I]], [[B_LV]]
 ; CHECK-NEXT:    br label [[LOOP_LATCH]]
 ; CHECK:       loop.latch:
-; CHECK-NEXT:    [[ADD_SINK:%.*]] = phi float [ [[ADD]], [[ELSE]] ], [ [[MUL2_I81_I]], [[LOOP_BODY]] ]
-; CHECK-NEXT:    store float [[ADD_SINK]], float* [[B_GEP_0]], align 4
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi float [ [[ADD]], [[ELSE]] ], [ [[MUL2_I81_I]], [[LOOP_BODY]] ]
+; CHECK-NEXT:    store float [[STOREMERGE]], float* [[B_GEP_0]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1
 ; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ult i64 [[IV1]], 9999
 ; CHECK-NEXT:    br i1 [[CMP_0]], label [[LOOP_BODY]], label [[EXIT]], !llvm.loop [[LOOP17:![0-9]+]]