Index: include/llvm/Transforms/Utils/PredicateInfo.h
===================================================================
--- include/llvm/Transforms/Utils/PredicateInfo.h
+++ include/llvm/Transforms/Utils/PredicateInfo.h
@@ -53,6 +53,7 @@
 #define LLVM_TRANSFORMS_UTILS_PREDICATEINFO_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/ilist.h"
@@ -196,8 +197,8 @@
   typedef SmallVectorImpl<ValueDFS> ValueDFSStack;
   void convertUsesToDFSOrdered(Value *, SmallVectorImpl<ValueDFS> &);
   Value *materializeStack(unsigned int &, ValueDFSStack &, Value *);
-  bool stackIsInScope(const ValueDFSStack &, int DFSIn, int DFSOut) const;
-  void popStackUntilDFSScope(ValueDFSStack &, int DFSIn, int DFSOut);
+  bool stackIsInScope(const ValueDFSStack &, const ValueDFS &) const;
+  void popStackUntilDFSScope(ValueDFSStack &, const ValueDFS &);
   ValueInfo &getOrCreateValueInfo(Value *);
   const ValueInfo &getValueInfo(Value *) const;
   Function &F;
@@ -217,6 +218,9 @@
   DenseMap<Value *, unsigned int> ValueInfoNums;
   // OrderedBasicBlocks used during sorting uses
   DenseMap<const BasicBlock *, std::unique_ptr<OrderedBasicBlock>> OBBMap;
+  // The set of edges along which we can only handle phi uses, due to critical
+  // edges.
+  DenseSet<BasicBlockEdge> PhiUsesOnly;
 };
 
 // This pass does eager building and then printing of PredicateInfo. It is used
Index: lib/Transforms/Utils/PredicateInfo.cpp
===================================================================
--- lib/Transforms/Utils/PredicateInfo.cpp
+++ lib/Transforms/Utils/PredicateInfo.cpp
@@ -66,10 +66,12 @@
   int DFSIn = 0;
   int DFSOut = 0;
   unsigned int LocalNum = LN_Middle;
-  PredicateBase *PInfo = nullptr;
   // Only one of Def or Use will be set.
   Value *Def = nullptr;
   Use *Use = nullptr;
+  // Neither PInfo nor PhiOnly participate in the ordering
+  PredicateBase *PInfo = nullptr;
+  bool PhiOnly = false;
 };
 
 // This compares ValueDFS structures, creating OrderedBasicBlocks where
@@ -90,11 +92,48 @@
 
     bool SameBlock = std::tie(A.DFSIn, A.DFSOut) == std::tie(B.DFSIn, B.DFSOut);
 
+    // We want to put the def that will get used for a given set of phi uses,
+    // before those phi uses.
+    // So we sort by edge, then by def.
+    // Note that only phi nodes uses and defs can come last.
+    if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last)
+      return comparePHIRelated(A, B);
+
     if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle)
       return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, A.Def, A.Use) <
              std::tie(B.DFSIn, B.DFSOut, B.LocalNum, B.Def, B.Use);
     return localComesBefore(A, B);
   }
+  bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const {
+    std::pair<const BasicBlock *, const BasicBlock *> ABlockEdge;
+    std::pair<const BasicBlock *, const BasicBlock *> BBlockEdge;
+    if (!A.Def && A.Use) {
+      auto *PHI = cast<PHINode>(A.Use->getUser());
+      ABlockEdge.first = PHI->getIncomingBlock(*A.Use);
+      ABlockEdge.second = PHI->getParent();
+    } else {
+      // This is really a non-materialized def.
+      auto *PBranch = cast<PredicateBranch>(A.PInfo);
+      ABlockEdge.first = PBranch->BranchBB;
+      ABlockEdge.second = PBranch->SplitBB;
+    }
+    if (!B.Def && B.Use) {
+      auto *PHI = cast<PHINode>(B.Use->getUser());
+      BBlockEdge.first = PHI->getIncomingBlock(*B.Use);
+      BBlockEdge.second = PHI->getParent();
+    } else {
+      // This is really a non-materialized def.
+      auto *PBranch = cast<PredicateBranch>(B.PInfo);
+      BBlockEdge.first = PBranch->BranchBB;
+      BBlockEdge.second = PBranch->SplitBB;
+    }
+    if (ABlockEdge < BBlockEdge)
+      return true;
+    if (ABlockEdge > BBlockEdge)
+      return false;
+    // Now sort defs before uses
+    return std::tie(A.Def, A.Use) < std::tie(B.Def, B.Use);
+  }
 
   // This performs the necessary local basic block ordering checks to tell
   // whether A comes before B, where both are in the same basic block.
@@ -163,16 +202,37 @@
 
 } // namespace PredicateInfoClasses
 
-bool PredicateInfo::stackIsInScope(const ValueDFSStack &Stack, int DFSIn,
-                                   int DFSOut) const {
+bool PredicateInfo::stackIsInScope(const ValueDFSStack &Stack,
+                                   const ValueDFS &VDUse) const {
   if (Stack.empty())
     return false;
-  return DFSIn >= Stack.back().DFSIn && DFSOut <= Stack.back().DFSOut;
+  // If it's a phi only use, make sure it's for this phi node edge, and that the
+  // use is in a phi node.  If it's anything else, and the top of the stack is
+  // phionly, we need to pop the stack.  We deliberately sort phi uses next to
+  // the defs they must go with so that we can know it's time to pop the stack
+  // when we hit the end of the phi uses for a given def.
+  if (Stack.back().PhiOnly) {
+    if (!VDUse.Use)
+      return false;
+    auto *PHI = dyn_cast<PHINode>(VDUse.Use->getUser());
+    if (!PHI)
+      return false;
+    // The only phionly defs should be branch info.
+    auto *PBranch = dyn_cast<PredicateBranch>(Stack.back().PInfo);
+    assert(PBranch && "Only branches should have PHIOnly defs");
+    // Check edge
+    BasicBlock *EdgePred = PHI->getIncomingBlock(*VDUse.Use);
+    if (EdgePred != PBranch->BranchBB)
+      return false;
+  }
+
+  return (VDUse.DFSIn >= Stack.back().DFSIn &&
+          VDUse.DFSOut <= Stack.back().DFSOut);
 }
 
-void PredicateInfo::popStackUntilDFSScope(ValueDFSStack &Stack, int DFSIn,
-                                          int DFSOut) {
-  while (!Stack.empty() && !stackIsInScope(Stack, DFSIn, DFSOut))
+void PredicateInfo::popStackUntilDFSScope(ValueDFSStack &Stack,
+                                          const ValueDFS &VD) {
+  while (!Stack.empty() && !stackIsInScope(Stack, VD))
     Stack.pop_back();
 }
 
@@ -274,20 +334,11 @@
   SmallVector<Value *, 8> CmpOperands;
   BasicBlock *FirstBB = BI->getSuccessor(0);
   BasicBlock *SecondBB = BI->getSuccessor(1);
-  bool FirstSinglePred = FirstBB->getSinglePredecessor();
-  bool SecondSinglePred = SecondBB->getSinglePredecessor();
   SmallVector<BasicBlock *, 2> SuccsToProcess;
   bool isAnd = false;
   bool isOr = false;
-  // First make sure we have single preds for these successors, as we can't
-  // usefully propagate true/false info to them if there are multiple paths to
-  // them.
-  if (FirstSinglePred)
-    SuccsToProcess.push_back(FirstBB);
-  if (SecondSinglePred)
-    SuccsToProcess.push_back(SecondBB);
-  if (SuccsToProcess.empty())
-    return;
+  SuccsToProcess.push_back(FirstBB);
+  SuccsToProcess.push_back(SecondBB);
   // Second, see if we have a comparison we support
   SmallVector<Value *, 2> ComparisonsToProcess;
   CmpInst::Predicate Pred;
@@ -324,6 +375,8 @@
               new PredicateBranch(Op, BranchBB, Succ, Cmp, TakenEdge);
           AllInfos.push_back(PB);
           OperandInfo.Infos.push_back(PB);
+          if (!Succ->getSinglePredecessor())
+            PhiUsesOnly.insert({BranchBB, Succ});
         }
       }
       CmpOperands.clear();
@@ -371,29 +424,14 @@
         RenameIter == RenameStack.begin() ? OrigOp : (RenameIter - 1)->Def;
     ValueDFS &Result = *RenameIter;
     auto *ValInfo = Result.PInfo;
-    // For branches, we can just place the operand in the split block.  For
-    // assume, we have to place it right before the assume to ensure we dominate
-    // all of our uses.
+    // For branches, we can just place the operand in the branch block before
+    // the terminator.  For assume, we have to place it right before the assume
+    // to ensure we dominate all of our uses.  Always insert right before the
+    // relevant instruction (terminator, assume), so that we insert in proper
+    // order in the case of multiple predicateinfo in the same block.
     if (isa<PredicateBranch>(ValInfo)) {
       auto *PBranch = cast<PredicateBranch>(ValInfo);
-      // It's possible we are trying to insert multiple predicateinfos in the
-      // same block at the beginning of the block.  When we do this, we need to
-      // insert them one after the other, not one before the other. To see if we
-      // have already inserted predicateinfo into this block, we see if Op !=
-      // OrigOp && Op->getParent() == PBranch->SplitBB.  Op must be an
-      // instruction we inserted if it's not the original op.
-      BasicBlock::iterator InsertPt;
-      if (Op == OrigOp ||
-          cast<Instruction>(Op)->getParent() != PBranch->SplitBB) {
-        InsertPt = PBranch->SplitBB->begin();
-        // Insert after last phi node.
-        while (isa<PHINode>(InsertPt))
-          ++InsertPt;
-      } else {
-        // Insert after op.
-        InsertPt = ++(cast<Instruction>(Op)->getIterator());
-      }
-      IRBuilder<> B(PBranch->SplitBB, InsertPt);
+      IRBuilder<> B(PBranch->BranchBB->getTerminator());
       Function *IF = Intrinsic::getDeclaration(
           F.getParent(), Intrinsic::ssa_copy, Op->getType());
       Value *PIC = B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
@@ -403,12 +441,9 @@
       auto *PAssume = dyn_cast<PredicateAssume>(ValInfo);
       assert(PAssume &&
              "Should not have gotten here without it being an assume");
-      // Unlike above, this should already insert in the right order when we
-      // insert multiple predicateinfos in the same block.  Because we are
-      // always inserting right before the assume (instead of the beginning of a
-      // block), newer insertions will end up after older ones.
-      IRBuilder<> B(PAssume->AssumeInst->getParent(),
-                    PAssume->AssumeInst->getIterator());
+      IRBuilder<> B(PAssume->AssumeInst);
+      // PAssume->AssumeInst->getParent(),
+      //            PAssume->AssumeInst->getIterator());
       Function *IF = Intrinsic::getDeclaration(
           F.getParent(), Intrinsic::ssa_copy, Op->getType());
       Value *PIC = B.CreateCall(IF, Op);
@@ -450,28 +485,49 @@
     // created otherwise.
     for (auto &PossibleCopy : ValueInfo.Infos) {
       ValueDFS VD;
-      BasicBlock *CopyBB = nullptr;
       // Determine where we are going to place the copy by the copy type.
       // The predicate info for branches always come first, they will get
       // materialized in the split block at the top of the block.
       // The predicate info for assumes will be somewhere in the middle,
       // it will get materialized in front of the assume.
-      if (const auto *PBranch = dyn_cast<PredicateBranch>(PossibleCopy)) {
-        CopyBB = PBranch->SplitBB;
-        VD.LocalNum = LN_First;
-      } else if (const auto *PAssume =
-                     dyn_cast<PredicateAssume>(PossibleCopy)) {
-        CopyBB = PAssume->AssumeInst->getParent();
+      if (const auto *PAssume = dyn_cast<PredicateAssume>(PossibleCopy)) {
         VD.LocalNum = LN_Middle;
-      } else
-        llvm_unreachable("Unhandled predicate info type");
-      DomTreeNode *DomNode = DT.getNode(CopyBB);
-      if (!DomNode)
-        continue;
-      VD.DFSIn = DomNode->getDFSNumIn();
-      VD.DFSOut = DomNode->getDFSNumOut();
-      VD.PInfo = PossibleCopy;
-      OrderedUses.push_back(VD);
+        DomTreeNode *DomNode = DT.getNode(PAssume->AssumeInst->getParent());
+        if (!DomNode)
+          continue;
+        VD.DFSIn = DomNode->getDFSNumIn();
+        VD.DFSOut = DomNode->getDFSNumOut();
+        VD.PInfo = PossibleCopy;
+        OrderedUses.push_back(VD);
+      } else if (const auto *PBranch =
+                     dyn_cast<PredicateBranch>(PossibleCopy)) {
+        // If we can only do phi uses, we treat it like it's in the branch
+        // block, and handle it specially. We know that it goes last, and only
+        // dominate phi uses.
+        if (PhiUsesOnly.count({PBranch->BranchBB, PBranch->SplitBB})) {
+          VD.LocalNum = LN_Last;
+          auto *DomNode = DT.getNode(PBranch->BranchBB);
+          if (DomNode) {
+            VD.DFSIn = DomNode->getDFSNumIn();
+            VD.DFSOut = DomNode->getDFSNumOut();
+            VD.PInfo = PossibleCopy;
+            VD.PhiOnly = true;
+            OrderedUses.push_back(VD);
+          }
+        } else {
+          // Otherwise, we are in the split block (even though we perform
+          // insertion in the branch block).
+          // Insert a possible copy at the split block and before the branch.
+          VD.LocalNum = LN_First;
+          auto *DomNode = DT.getNode(PBranch->SplitBB);
+          if (DomNode) {
+            VD.DFSIn = DomNode->getDFSNumIn();
+            VD.DFSOut = DomNode->getDFSNumOut();
+            VD.PInfo = PossibleCopy;
+            OrderedUses.push_back(VD);
+          }
+        }
+      }
     }
 
     convertUsesToDFSOrdered(Op, OrderedUses);
@@ -495,10 +551,10 @@
                    << VD.DFSOut << ")\n");
 
       bool ShouldPush = (VD.Def || PossibleCopy);
-      bool OutOfScope = !stackIsInScope(RenameStack, VD.DFSIn, VD.DFSOut);
+      bool OutOfScope = !stackIsInScope(RenameStack, VD);
       if (OutOfScope || ShouldPush) {
         // Sync to our current scope.
-        popStackUntilDFSScope(RenameStack, VD.DFSIn, VD.DFSOut);
+        popStackUntilDFSScope(RenameStack, VD);
         ShouldPush |= (VD.Def || PossibleCopy);
         if (ShouldPush) {
           RenameStack.push_back(VD);
Index: test/Transforms/Util/PredicateInfo/condprop.ll
===================================================================
--- test/Transforms/Util/PredicateInfo/condprop.ll
+++ test/Transforms/Util/PredicateInfo/condprop.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -print-predicateinfo -S | FileCheck %s
+; RUN: opt  -print-predicateinfo -S < %s 2>&1 | FileCheck %s
 
 @a = external global i32		; <i32*> [#uses=7]
 
@@ -99,12 +99,12 @@
 ; CHECK-NEXT:    [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0
 ; CHECK-NEXT:    [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0
 ; CHECK-NEXT:    [[Z:%.*]] = and i1 [[XZ]], [[YZ]]
+; CHECK-NEXT:    [[XZ_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[XZ]])
+; CHECK-NEXT:    [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK-NEXT:    [[YZ_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[YZ]])
+; CHECK-NEXT:    [[Y_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
 ; CHECK-NEXT:    br i1 [[Z]], label [[BOTH_ZERO:%.*]], label [[NOPE:%.*]]
 ; CHECK:       both_zero:
-; CHECK-NEXT:    [[Y_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
-; CHECK-NEXT:    [[YZ_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[YZ]])
-; CHECK-NEXT:    [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
-; CHECK-NEXT:    [[XZ_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[XZ]])
 ; CHECK-NEXT:    call void @foo(i1 [[XZ_0]])
 ; CHECK-NEXT:    call void @foo(i1 [[YZ_0]])
 ; CHECK-NEXT:    call void @bar(i32 [[X_0]])
@@ -181,15 +181,15 @@
 define i1 @test5(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK-NEXT:    [[X_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK-NEXT:    [[Y_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
+; CHECK-NEXT:    [[Y_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
 ; CHECK-NEXT:    br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]]
 ; CHECK:       same:
-; CHECK-NEXT:    [[Y_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
-; CHECK-NEXT:    [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[X_0]], [[Y_0]]
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ; CHECK:       different:
-; CHECK-NEXT:    [[Y_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
-; CHECK-NEXT:    [[X_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[X_1]], [[Y_1]]
 ; CHECK-NEXT:    ret i1 [[CMP3]]
 ;
@@ -257,15 +257,15 @@
 define i1 @test7(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK-NEXT:    [[X_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK-NEXT:    [[Y_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
+; CHECK-NEXT:    [[Y_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
 ; CHECK-NEXT:    br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]]
 ; CHECK:       same:
-; CHECK-NEXT:    [[Y_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
-; CHECK-NEXT:    [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[X_0]], [[Y_0]]
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ; CHECK:       different:
-; CHECK-NEXT:    [[Y_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
-; CHECK-NEXT:    [[X_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp sgt i32 [[X_1]], [[Y_1]]
 ; CHECK-NEXT:    ret i1 [[CMP3]]
 ;
@@ -285,15 +285,15 @@
 define i1 @test7_fp(float %x, float %y) {
 ; CHECK-LABEL: @test7_fp(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[X_0:%.*]] = call float @llvm.ssa.copy.f32(float [[X]])
+; CHECK-NEXT:    [[X_1:%.*]] = call float @llvm.ssa.copy.f32(float [[X]])
+; CHECK-NEXT:    [[Y_0:%.*]] = call float @llvm.ssa.copy.f32(float [[Y]])
+; CHECK-NEXT:    [[Y_1:%.*]] = call float @llvm.ssa.copy.f32(float [[Y]])
 ; CHECK-NEXT:    br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]]
 ; CHECK:       same:
-; CHECK-NEXT:    [[Y_0:%.*]] = call float @llvm.ssa.copy.f32(float [[Y]])
-; CHECK-NEXT:    [[X_0:%.*]] = call float @llvm.ssa.copy.f32(float [[X]])
 ; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ule float [[X_0]], [[Y_0]]
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ; CHECK:       different:
-; CHECK-NEXT:    [[Y_1:%.*]] = call float @llvm.ssa.copy.f32(float [[Y]])
-; CHECK-NEXT:    [[X_1:%.*]] = call float @llvm.ssa.copy.f32(float [[X]])
 ; CHECK-NEXT:    [[CMP3:%.*]] = fcmp ogt float [[X_1]], [[Y_1]]
 ; CHECK-NEXT:    ret i1 [[CMP3]]
 ;
@@ -362,10 +362,10 @@
 define i32 @test9(i32 %i, i32 %j) {
 ; CHECK-LABEL: @test9(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], [[J:%.*]]
+; CHECK-NEXT:    [[I_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[I]])
+; CHECK-NEXT:    [[J_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[J]])
 ; CHECK-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[RET:%.*]]
 ; CHECK:       cond_true:
-; CHECK-NEXT:    [[J_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[J]])
-; CHECK-NEXT:    [[I_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[I]])
 ; CHECK-NEXT:    [[DIFF:%.*]] = sub i32 [[I_0]], [[J_0]]
 ; CHECK-NEXT:    ret i32 [[DIFF]]
 ; CHECK:       ret:
@@ -387,10 +387,10 @@
 define i32 @test10(i32 %j, i32 %i) {
 ; CHECK-LABEL: @test10(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], [[J:%.*]]
+; CHECK-NEXT:    [[I_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[I]])
+; CHECK-NEXT:    [[J_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[J]])
 ; CHECK-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[RET:%.*]]
 ; CHECK:       cond_true:
-; CHECK-NEXT:    [[J_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[J]])
-; CHECK-NEXT:    [[I_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[I]])
 ; CHECK-NEXT:    [[DIFF:%.*]] = sub i32 [[I_0]], [[J_0]]
 ; CHECK-NEXT:    ret i32 [[DIFF]]
 ; CHECK:       ret:
@@ -415,16 +415,16 @@
 ; CHECK-NEXT:    [[V0:%.*]] = call i32 @yogibar()
 ; CHECK-NEXT:    [[V1:%.*]] = call i32 @yogibar()
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V0]], [[V1]]
+; CHECK-NEXT:    [[V0_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[V0]])
+; CHECK-NEXT:    [[V1_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[V1]])
 ; CHECK-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[NEXT:%.*]]
 ; CHECK:       cond_true:
-; CHECK-NEXT:    [[V1_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[V1]])
 ; CHECK-NEXT:    ret i32 [[V1_0]]
 ; CHECK:       next:
-; CHECK-NEXT:    [[V0_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[V0]])
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[X:%.*]], [[V0_0]]
+; CHECK-NEXT:    [[V0_0_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[V0_0]])
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_TRUE2:%.*]], label [[NEXT2:%.*]]
 ; CHECK:       cond_true2:
-; CHECK-NEXT:    [[V0_0_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[V0_0]])
 ; CHECK-NEXT:    ret i32 [[V0_0_1]]
 ; CHECK:       next2:
 ; CHECK-NEXT:    ret i32 0
@@ -452,12 +452,12 @@
 define i32 @test12(i32 %x) {
 ; CHECK-LABEL: @test12(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK-NEXT:    [[X_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
 ; CHECK-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
 ; CHECK:       cond_true:
-; CHECK-NEXT:    [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
 ; CHECK-NEXT:    br label [[RET:%.*]]
 ; CHECK:       cond_false:
-; CHECK-NEXT:    [[X_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
 ; CHECK-NEXT:    br label [[RET]]
 ; CHECK:       ret:
 ; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[X_0]], [[COND_TRUE]] ], [ [[X_1]], [[COND_FALSE]] ]
Index: test/Transforms/Util/PredicateInfo/diamond.ll
===================================================================
--- /dev/null
+++ test/Transforms/Util/PredicateInfo/diamond.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -print-predicateinfo < %s 2>&1 | FileCheck %s
+define i1 @f(i32 %x, i1 %y) {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:    br i1 [[Y:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK:       bb0:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[X:%.*]], 0
+; CHECK:         [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK-NEXT:    br i1 [[CMP]], label [[BB2:%.*]], label [[BB3:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[X2:%.*]] = add nuw nsw i32 [[X]], 1
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sge i32 [[X2]], 2
+; CHECK:         [[X2_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X2]])
+; CHECK-NEXT:    br i1 [[CMP2]], label [[BB2]], label [[BB3]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[X3:%.*]] = phi i32 [ [[X_0]], [[BB0]] ], [ [[X2_0]], [[BB1]] ]
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret i1 false
+;
+  br i1 %y, label %bb0, label %bb1
+  bb0:
+  %cmp = icmp sge i32 %x, 0  ; x > 0
+  br i1 %cmp, label %bb2, label %bb3
+  bb1:
+  %x2 = add nsw nuw i32 %x, 1
+  %cmp2 = icmp sge i32 %x2, 2     ; x+1 > 2 / x > 1
+  br i1 %cmp2, label %bb2, label %bb3
+  bb2:
+  %x3 = phi i32 [ %x, %bb0 ], [ %x2, %bb1 ]
+  br label %bb3
+  bb3:
+  ret i1 0
+}
+
+define i1 @g(i32 %x, i1 %y) {
+; CHECK-LABEL: @g(
+; CHECK-NEXT:    br i1 [[Y:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK:       bb0:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[X:%.*]], 0
+; CHECK:         [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK-NEXT:    br i1 [[CMP]], label [[BB3:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[X2:%.*]] = add nuw nsw i32 [[X]], 1
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sge i32 [[X2]], 2
+; CHECK:         [[X2_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X2]])
+; CHECK-NEXT:    br i1 [[CMP2]], label [[BB3]], label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[X3:%.*]] = phi i32 [ [[X_0]], [[BB0]] ], [ [[X2_0]], [[BB1]] ]
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret i1 false
+;
+  br i1 %y, label %bb0, label %bb1
+  bb0:
+  %cmp = icmp sge i32 %x, 0  ; x > 0
+  br i1 %cmp, label %bb3, label %bb2
+  bb1:
+  %x2 = add nsw nuw i32 %x, 1
+  %cmp2 = icmp sge i32 %x2, 2     ; x+1 > 2 / x > 1
+  br i1 %cmp2, label %bb3, label %bb2
+  bb2:
+  %x3 = phi i32 [ %x, %bb0 ], [ %x2, %bb1 ]
+  br label %bb3
+  bb3:
+  ret i1 0
+}
+
Index: test/Transforms/Util/PredicateInfo/testandor.ll
===================================================================
--- test/Transforms/Util/PredicateInfo/testandor.ll
+++ test/Transforms/Util/PredicateInfo/testandor.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -print-predicateinfo -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -print-predicateinfo < %s 2>&1 | FileCheck %s
 
 declare void @foo(i1)
 declare void @bar(i32)