Index: llvm/trunk/include/llvm/Transforms/Scalar/GVN.h
===================================================================
--- llvm/trunk/include/llvm/Transforms/Scalar/GVN.h
+++ llvm/trunk/include/llvm/Transforms/Scalar/GVN.h
@@ -18,6 +18,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -27,6 +28,7 @@
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Transforms/Utils/OrderedInstructions.h"
 #include <cstdint>
 #include <utility>
 #include <vector>
@@ -156,7 +158,11 @@
   AssumptionCache *AC;
   SetVector<BasicBlock *> DeadBlocks;
   OptimizationRemarkEmitter *ORE;
+  // Maps a block to the topmost instruction with implicit control flow in it.
+  DenseMap<const BasicBlock *, const Instruction *>
+      FirstImplicitControlFlowInsts;
 
+  OrderedInstructions *OI;
   ValueTable VN;
 
   /// A mapping from value numbers to lists of Value*'s that
@@ -268,6 +274,7 @@
                                  BasicBlock *Curr, unsigned int ValNo);
   Value *findLeader(const BasicBlock *BB, uint32_t num);
   void cleanupGlobalSets();
+  void fillImplicitControlFlowInfo(BasicBlock *BB);
   void verifyRemoved(const Instruction *I) const;
   bool splitCriticalEdges();
   BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
Index: llvm/trunk/lib/Transforms/Scalar/GVN.cpp
===================================================================
--- llvm/trunk/lib/Transforms/Scalar/GVN.cpp
+++ llvm/trunk/lib/Transforms/Scalar/GVN.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
@@ -1048,7 +1049,32 @@
   // backwards through predecessors if needed.
   BasicBlock *LoadBB = LI->getParent();
   BasicBlock *TmpBB = LoadBB;
+  bool IsSafeToSpeculativelyExecute = isSafeToSpeculativelyExecute(LI);
 
+  // Check that there is no implicit control flow instructions above our load in
+  // its block. If there is an instruction that doesn't always pass the
+  // execution to the following instruction, then moving through it may become
+  // invalid. For example:
+  //
+  // int arr[LEN];
+  // int index = ???;
+  // ...
+  // guard(0 <= index && index < LEN);
+  // use(arr[index]);
+  //
+  // It is illegal to move the array access to any point above the guard,
+  // because if the index is out of bounds we should deoptimize rather than
+  // access the array.
+  // Check that there is no guard in this block above our intruction.
+  if (!IsSafeToSpeculativelyExecute) {
+    auto It = FirstImplicitControlFlowInsts.find(TmpBB);
+    if (It != FirstImplicitControlFlowInsts.end()) {
+      assert(It->second->getParent() == TmpBB &&
+             "Implicit control flow map broken?");
+      if (OI->dominates(It->second, LI))
+        return false;
+    }
+  }
   while (TmpBB->getSinglePredecessor()) {
     TmpBB = TmpBB->getSinglePredecessor();
     if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1063,6 +1089,11 @@
     // which it was not previously executed.
     if (TmpBB->getTerminator()->getNumSuccessors() != 1)
       return false;
+
+    // Check that there is no implicit control flow in a block above.
+    if (!IsSafeToSpeculativelyExecute &&
+        FirstImplicitControlFlowInsts.count(TmpBB))
+      return false;
   }
 
   assert(TmpBB);
@@ -1982,6 +2013,8 @@
   TLI = &RunTLI;
   VN.setAliasAnalysis(&RunAA);
   MD = RunMD;
+  OrderedInstructions OrderedInstrs(DT);
+  OI = &OrderedInstrs;
   VN.setMemDep(MD);
   ORE = RunORE;
 
@@ -2064,14 +2097,26 @@
     if (!AtStart)
       --BI;
 
+    bool InvalidateImplicitCF = false;
+    const Instruction *MaybeFirstICF = FirstImplicitControlFlowInsts.lookup(BB);
     for (auto *I : InstrsToErase) {
       assert(I->getParent() == BB && "Removing instruction from wrong block?");
       DEBUG(dbgs() << "GVN removed: " << *I << '\n');
       if (MD) MD->removeInstruction(I);
       DEBUG(verifyRemoved(I));
+      if (MaybeFirstICF == I) {
+        // We have erased the first ICF in block. The map needs to be updated.
+        InvalidateImplicitCF = true;
+        // Do not keep dangling pointer on the erased instruction.
+        MaybeFirstICF = nullptr;
+      }
       I->eraseFromParent();
     }
+
+    OI->invalidateBlock(BB);
     InstrsToErase.clear();
+    if (InvalidateImplicitCF)
+      fillImplicitControlFlowInfo(BB);
 
     if (AtStart)
       BI = BB->begin();
@@ -2265,7 +2310,14 @@
   if (MD)
     MD->removeInstruction(CurInst);
   DEBUG(verifyRemoved(CurInst));
+  bool InvalidateImplicitCF =
+      FirstImplicitControlFlowInsts.lookup(CurInst->getParent()) == CurInst;
+  // FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes
+  // some assertion failures.
+  OI->invalidateBlock(CurrentBlock);
   CurInst->eraseFromParent();
+  if (InvalidateImplicitCF)
+    fillImplicitControlFlowInfo(CurrentBlock);
   ++NumGVNInstr;
 
   return true;
@@ -2332,6 +2384,9 @@
   // RPOT walks the graph in its constructor and will not be invalidated during
   // processBlock.
   ReversePostOrderTraversal<Function *> RPOT(&F);
+
+  for (BasicBlock *BB : RPOT)
+    fillImplicitControlFlowInfo(BB);
   for (BasicBlock *BB : RPOT)
     Changed |= processBlock(BB);
 
@@ -2343,6 +2398,48 @@
   LeaderTable.clear();
   BlockRPONumber.clear();
   TableAllocator.Reset();
+  FirstImplicitControlFlowInsts.clear();
+}
+
+void
+GVN::fillImplicitControlFlowInfo(BasicBlock *BB) {
+  // Make sure that all marked instructions are actually deleted by this point,
+  // so that we don't need to care about omitting them.
+  assert(InstrsToErase.empty() && "Filling before removed all marked insns?");
+  auto MayNotTransferExecutionToSuccessor = [&](const Instruction *I) {
+    // If a block's instruction doesn't always pass the control to its successor
+    // instruction, mark the block as having implicit control flow. We use them
+    // to avoid wrong assumptions of sort "if A is executed and B post-dominates
+    // A, then B is also executed". This is not true is there is an implicit
+    // control flow instruction (e.g. a guard) between them.
+    //
+    // TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false
+    // for volatile stores and loads because they can trap. The discussion on
+    // whether or not it is correct is still ongoing. We might want to get rid
+    // of this logic in the future. Anyways, trapping instructions shouldn't
+    // introduce implicit control flow, so we explicitly allow them here. This
+    // must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed.
+    if (isGuaranteedToTransferExecutionToSuccessor(I))
+      return false;
+    if (auto *LI = dyn_cast<LoadInst>(I)) {
+      assert(LI->isVolatile() && "Non-volatile load should transfer execution"
+                                 " to successor!");
+      return false;
+    }
+    if (auto *SI = dyn_cast<StoreInst>(I)) {
+      assert(SI->isVolatile() && "Non-volatile store should transfer execution"
+                                 " to successor!");
+      return false;
+    }
+    return true;
+  };
+  FirstImplicitControlFlowInsts.erase(BB);
+
+  for (auto &I : *BB)
+    if (MayNotTransferExecutionToSuccessor(&I)) {
+      FirstImplicitControlFlowInsts[BB] = &I;
+      break;
+    }
 }
 
 /// Verify that the specified instruction does not occur in our
Index: llvm/trunk/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll
===================================================================
--- llvm/trunk/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll
+++ llvm/trunk/test/Transforms/GVN/PRE/2017-10-16-LoadPRECrash.ll
@@ -17,6 +17,13 @@
 define hidden void @wrapon_fn173() {
 
 ; CHECK-LABEL: @wrapon_fn173
+; CHECK:       entry:
+; CHECK-NEXT:    call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef)
+; CHECK-NEXT:    %.pre = load i64 addrspace(100)*, i64 addrspace(100)** null, align 8
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    call i64* @getaddr_i64(i64 addrspace(100)* %.pre)
+; CHECK-NEXT:    br label %loop
 
 entry:
   %0 = call %ArrayImpl* @getaddr_ArrayImpl(%ArrayImpl addrspace(100)* undef)
Index: llvm/trunk/test/Transforms/GVN/PRE/pre-load-guards.ll
===================================================================
--- llvm/trunk/test/Transforms/GVN/PRE/pre-load-guards.ll
+++ llvm/trunk/test/Transforms/GVN/PRE/pre-load-guards.ll
@@ -0,0 +1,146 @@
+; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+declare void @llvm.experimental.guard(i1, ...)
+
+; This is a motivating example on why we prohibit hoisting through guards.
+; In the bottom block, we check that the index is within bounds and only access
+; the element in this case and deoptimize otherwise. If we hoist the load to a
+; place above the guard, it will may lead to out-of-bound array access.
+define i32 @test_motivation(i32* %p, i32* %q, i1 %C, i32 %index, i32 %len) {
+; CHECK-LABEL: @test_motivation(
+block1:
+  %el1 = getelementptr inbounds i32, i32* %q, i32 %index
+  %el2 = getelementptr inbounds i32, i32* %p, i32 %index
+	br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     br
+; CHECK-NOT:      load
+; CHECK-NOT:      sge
+; CHECK-NOT:      slt
+; CHECK-NOT:      and
+  br label %block4
+
+block3:
+  store i32 0, i32* %el1
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK:          %cond1 = icmp sge i32 %index, 0
+; CHECK-NEXT:     %cond2 = icmp slt i32 %index, %len
+; CHECK-NEXT:     %in.bounds = and i1 %cond1, %cond2
+; CHECK:          call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds)
+; CHECK-NEXT:     %PRE = load i32, i32* %P2
+; CHECK:          ret i32 %PRE
+
+  %P2 = phi i32* [%el2, %block3], [%el1, %block2]
+  %cond1 = icmp sge i32 %index, 0
+  %cond2 = icmp slt i32 %index, %len
+  %in.bounds = and i1 %cond1, %cond2
+  call void (i1, ...) @llvm.experimental.guard(i1 %in.bounds) [ "deopt"() ]
+  %PRE = load i32, i32* %P2
+  ret i32 %PRE
+}
+
+; Guard in load's block that is above the load should prohibit the PRE.
+define i32 @test_guard_01(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_01(
+block1:
+	br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     br
+; CHECK-NOT:      load
+
+ br label %block4
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK:          call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NEXT:     load
+; CHECK:          ret i32
+
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+  %PRE = load i32, i32* %P2
+  ret i32 %PRE
+}
+
+; Guard in load's block that is below the load should not prohibit the PRE.
+define i32 @test_guard_02(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_02(
+block1:
+	br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     load i32, i32* %q
+
+ br label %block4
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK-NEXT:     phi i32 [
+; CHECK-NEXT:     phi i32* [
+; CHECK-NEXT:     call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NOT:      load
+; CHECK:          ret i32
+
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  %PRE = load i32, i32* %P2
+  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+  ret i32 %PRE
+}
+
+; Guard above the load's block should prevent PRE from hoisting through it.
+define i32 @test_guard_03(i32* %p, i32* %q, i1 %C, i1 %G) {
+; CHECK-LABEL: @test_guard_03(
+block1:
+	br i1 %C, label %block2, label %block3
+
+block2:
+
+; CHECK:        block2:
+; CHECK-NEXT:     br
+; CHECK-NOT:      load
+
+ br label %block4
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+
+; CHECK:        block4:
+; CHECK-NEXT:     phi i32*
+; CHECK-NEXT:     call void (i1, ...) @llvm.experimental.guard(i1 %G)
+; CHECK-NEXT:     load
+; CHECK-NEXT:     ret i32
+
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  call void (i1, ...) @llvm.experimental.guard(i1 %G) [ "deopt"() ]
+  br label %block5
+
+block5:
+  %PRE = load i32, i32* %P2
+  ret i32 %PRE
+}
Index: llvm/trunk/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll
===================================================================
--- llvm/trunk/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll
+++ llvm/trunk/test/Transforms/GVN/PRE/pre-load-implicit-cf-updates.ll
@@ -0,0 +1,118 @@
+; RUN: opt -S -gvn -enable-load-pre < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; These tests exercise situations when instructions that were first instructions
+; with implicit control flow get removed. We make sure that after that we don't
+; face crashes and are still able to perform PRE correctly.
+
+declare i32 @foo(i32 %arg) #0
+
+define hidden void @test_01(i32 %x, i32 %y) {
+
+; c2 only throws if c1 throws, so it can be safely removed and then PRE can
+; hoist the load out of loop.
+
+; CHECK-LABEL: @test_01
+; CHECK:       entry:
+; CHECK-NEXT:    %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT:    %val.pre = load i32, i32* null, align 8
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    %c3 = call i32 @foo(i32 %val.pre)
+; CHECK-NEXT:    br label %loop
+
+entry:
+  %c1 = call i32 @foo(i32 %x)
+  br label %loop
+
+loop:
+  %c2 = call i32 @foo(i32 %x)
+  %val = load i32, i32* null, align 8
+  %c3 = call i32 @foo(i32 %val)
+  br label %loop
+}
+
+define hidden void @test_02(i32 %x, i32 %y) {
+
+; PRE is not allowed because c2 may throw.
+
+; CHECK-LABEL: @test_02
+; CHECK:       entry:
+; CHECK-NEXT:    %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    %c2 = call i32 @foo(i32 %y)
+; CHECK-NEXT:    %val = load i32, i32* null, align 8
+; CHECK-NEXT:    %c3 = call i32 @foo(i32 %val)
+; CHECK-NEXT:    br label %loop
+
+entry:
+  %c1 = call i32 @foo(i32 %x)
+  br label %loop
+
+loop:
+  %c2 = call i32 @foo(i32 %y)
+  %val = load i32, i32* null, align 8
+  %c3 = call i32 @foo(i32 %val)
+  br label %loop
+}
+
+define hidden void @test_03(i32 %x, i32 %y) {
+
+; PRE of load is allowed because c2 only throws if c1 throws. c3 should
+; not be eliminated. c4 is eliminated because it only throws if c3 throws.
+
+; CHECK-LABEL: @test_03
+; CHECK:       entry:
+; CHECK-NEXT:    %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT:    %val.pre = load i32, i32* null, align 8
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    %c3 = call i32 @foo(i32 %y)
+; CHECK-NEXT:    %c5 = call i32 @foo(i32 %val.pre)
+; CHECK-NEXT:    br label %loop
+
+entry:
+  %c1 = call i32 @foo(i32 %x)
+  br label %loop
+
+loop:
+  %c2 = call i32 @foo(i32 %x)
+  %val = load i32, i32* null, align 8
+  %c3 = call i32 @foo(i32 %y)
+  %val2 = load i32, i32* null, align 8
+  %c4 = call i32 @foo(i32 %y)
+  %c5 = call i32 @foo(i32 %val)
+  br label %loop
+}
+
+define hidden void @test_04(i32 %x, i32 %y) {
+
+; PRE is not allowed even after we remove c2 because now c3 prevents us from it.
+
+; CHECK-LABEL: @test_04
+; CHECK:       entry:
+; CHECK-NEXT:    %c1 = call i32 @foo(i32 %x)
+; CHECK-NEXT:    br label %loop
+; CHECK:       loop:
+; CHECK-NEXT:    %c3 = call i32 @foo(i32 %y)
+; CHECK-NEXT:    %val = load i32, i32* null, align 8
+; CHECK-NEXT:    %c5 = call i32 @foo(i32 %val)
+; CHECK-NEXT:    br label %loop
+
+entry:
+  %c1 = call i32 @foo(i32 %x)
+  br label %loop
+
+loop:
+  %c2 = call i32 @foo(i32 %x)
+  %c3 = call i32 @foo(i32 %y)
+  %val = load i32, i32* null, align 8
+  %c4 = call i32 @foo(i32 %y)
+  %c5 = call i32 @foo(i32 %val)
+  br label %loop
+}
+
+attributes #0 = { readnone }
Index: llvm/trunk/test/Transforms/GVN/PRE/pre-load.ll
===================================================================
--- llvm/trunk/test/Transforms/GVN/PRE/pre-load.ll
+++ llvm/trunk/test/Transforms/GVN/PRE/pre-load.ll
@@ -430,3 +430,161 @@
   call void @g(i32 %NOTPRE)
   cleanupret from %c2 unwind to caller
 }
+
+; Don't PRE load across potentially throwing calls.
+
+define i32 @test13(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test13(
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: if.then:
+; CHECK-NEXT: load i32
+; CHECK-NEXT: store i32
+
+if.then:
+  %uu = load i32, i32* %x, align 4
+  store i32 %uu, i32* %r, align 4
+  br label %if.end
+
+; CHECK: if.end:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: load i32
+
+if.end:
+  call void @f()
+  %vv = load i32, i32* %x, align 4
+  ret i32 %vv
+}
+
+; Same as test13, but now the blocking function is not immediately in load's
+; block.
+
+define i32 @test14(i32* noalias nocapture readonly %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test14(
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: if.then:
+; CHECK-NEXT: load i32
+; CHECK-NEXT: store i32
+
+if.then:
+  %uu = load i32, i32* %x, align 4
+  store i32 %uu, i32* %r, align 4
+  br label %if.end
+
+; CHECK: if.end:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: load i32
+
+if.end:
+  call void @f()
+  br label %follow_1
+
+follow_1:
+  br label %follow_2
+
+follow_2:
+  %vv = load i32, i32* %x, align 4
+  ret i32 %vv
+}
+
+; Same as test13, but %x here is dereferenceable. A pointer that is
+; dereferenceable can be loaded from speculatively without a risk of trapping.
+; Since it is OK to speculate, PRE is allowed.
+
+define i32 @test15(i32* noalias nocapture readonly dereferenceable(8) %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test15
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: entry.if.end_crit_edge:
+; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4
+; CHECK-NEXT: br label %if.end
+
+if.then:
+  %uu = load i32, i32* %x, align 4
+  store i32 %uu, i32* %r, align 4
+  br label %if.end
+
+; CHECK: if.then:
+; CHECK-NEXT: %uu = load i32, i32* %x, align 4
+; CHECK-NEXT: store i32 %uu, i32* %r, align 4
+; CHECK-NEXT: br label %if.end
+
+if.end:
+  call void @f()
+  %vv = load i32, i32* %x, align 4
+  ret i32 %vv
+
+; CHECK: if.end:
+; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ]
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 %vv
+
+}
+
+; Same as test14, but %x here is dereferenceable. A pointer that is
+; dereferenceable can be loaded from speculatively without a risk of trapping.
+; Since it is OK to speculate, PRE is allowed.
+
+define i32 @test16(i32* noalias nocapture readonly dereferenceable(8) %x, i32* noalias nocapture %r, i32 %a) {
+
+; CHECK-LABEL: @test16(
+; CHECK: entry:
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: br i1
+
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: entry.if.end_crit_edge:
+; CHECK-NEXT: %vv.pre = load i32, i32* %x, align 4
+; CHECK-NEXT: br label %if.end
+
+if.then:
+  %uu = load i32, i32* %x, align 4
+  store i32 %uu, i32* %r, align 4
+  br label %if.end
+
+; CHECK: if.then:
+; CHECK-NEXT: %uu = load i32, i32* %x, align 4
+; CHECK-NEXT: store i32 %uu, i32* %r, align 4
+; CHECK-NEXT: br label %if.end
+
+if.end:
+  call void @f()
+  br label %follow_1
+
+; CHECK: if.end:
+; CHECK-NEXT: %vv = phi i32 [ %vv.pre, %entry.if.end_crit_edge ], [ %uu, %if.then ]
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 %vv
+
+follow_1:
+  br label %follow_2
+
+follow_2:
+  %vv = load i32, i32* %x, align 4
+  ret i32 %vv
+}