Index: include/llvm/Transforms/Utils/PredicateInfo.h
===================================================================
--- include/llvm/Transforms/Utils/PredicateInfo.h
+++ include/llvm/Transforms/Utils/PredicateInfo.h
@@ -53,6 +53,7 @@
 #define LLVM_TRANSFORMS_UTILS_PREDICATEINFO_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/ilist.h"
@@ -192,8 +193,8 @@
   typedef SmallVectorImpl<ValueDFS> ValueDFSStack;
   void convertUsesToDFSOrdered(Value *, SmallVectorImpl<ValueDFS> &);
   Value *materializeStack(unsigned int &, ValueDFSStack &, Value *);
-  bool stackIsInScope(const ValueDFSStack &, int DFSIn, int DFSOut) const;
-  void popStackUntilDFSScope(ValueDFSStack &, int DFSIn, int DFSOut);
+  bool stackIsInScope(const ValueDFSStack &, const ValueDFS &) const;
+  void popStackUntilDFSScope(ValueDFSStack &, const ValueDFS &);
   ValueInfo &getOrCreateValueInfo(Value *);
   const ValueInfo &getValueInfo(Value *) const;
   Function &F;
@@ -213,6 +214,9 @@
   DenseMap<Value *, unsigned int> ValueInfoNums;
   // OrderedBasicBlocks used during sorting uses
   DenseMap<const BasicBlock *, std::unique_ptr<OrderedBasicBlock>> OBBMap;
+  // The set of edges along which we can only handle phi uses, due to critical
+  // edges.
+  DenseSet<BasicBlockEdge> PhiUsesOnly;
 };
 
 // This pass does eager building and then printing of PredicateInfo. It is used
Index: lib/Transforms/Utils/PredicateInfo.cpp
===================================================================
--- lib/Transforms/Utils/PredicateInfo.cpp
+++ lib/Transforms/Utils/PredicateInfo.cpp
@@ -66,10 +66,12 @@
   int DFSIn = 0;
   int DFSOut = 0;
   unsigned int LocalNum = LN_Middle;
-  PredicateBase *PInfo = nullptr;
   // Only one of Def or Use will be set.
   Value *Def = nullptr;
   Use *Use = nullptr;
+  // Neither PInfo nor PhiOnly participate in the ordering
+  PredicateBase *PInfo = nullptr;
+  bool PhiOnly = false;
 };
 
 // This compares ValueDFS structures, creating OrderedBasicBlocks where
@@ -90,12 +92,43 @@
 
     bool SameBlock = std::tie(A.DFSIn, A.DFSOut) == std::tie(B.DFSIn, B.DFSOut);
 
+    // We want to put the def that will get used for a given set of phi uses,
+    // before those phi uses.
+    // So we sort by edge, then by def.
+    // Note that only phi nodes uses and defs can come last.
+    if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last)
+      return comparePHIRelated(A, B);
+
     if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle)
       return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, A.Def, A.Use) <
              std::tie(B.DFSIn, B.DFSOut, B.LocalNum, B.Def, B.Use);
     return localComesBefore(A, B);
   }
 
+  // For a phi use, or a non-materialized def, return the edge it represents.
+  const std::pair<const BasicBlock *, const BasicBlock *>
+  getBlockEdge(const ValueDFS &VD) const {
+    if (!VD.Def && VD.Use) {
+      auto *PHI = cast<PHINode>(VD.Use->getUser());
+      return std::make_pair(PHI->getIncomingBlock(*VD.Use), PHI->getParent());
+    }
+    // This is really a non-materialized def.
+    auto *PBranch = cast<PredicateBranch>(VD.PInfo);
+    return std::make_pair(PBranch->BranchBB, PBranch->SplitBB);
+  }
+
+  // For two phi related values, return the ordering.
+  bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const {
+    auto &ABlockEdge = getBlockEdge(A);
+    auto &BBlockEdge = getBlockEdge(B);
+    if (ABlockEdge < BBlockEdge)
+      return true;
+    if (ABlockEdge > BBlockEdge)
+      return false;
+    // Now sort defs before uses
+    return std::tie(A.Def, A.Use) < std::tie(B.Def, B.Use);
+  }
+
   // Get the definition of an instruction that occurs in the middle of a block.
   Value *getMiddleDef(const ValueDFS &VD) const {
     if (VD.Def)
@@ -160,16 +193,37 @@
 
 } // namespace PredicateInfoClasses
 
-bool PredicateInfo::stackIsInScope(const ValueDFSStack &Stack, int DFSIn,
-                                   int DFSOut) const {
+bool PredicateInfo::stackIsInScope(const ValueDFSStack &Stack,
+                                   const ValueDFS &VDUse) const {
   if (Stack.empty())
     return false;
-  return DFSIn >= Stack.back().DFSIn && DFSOut <= Stack.back().DFSOut;
+  // If it's a phi only use, make sure it's for this phi node edge, and that the
+  // use is in a phi node.  If it's anything else, and the top of the stack is
+  // phionly, we need to pop the stack.  We deliberately sort phi uses next to
+  // the defs they must go with so that we can know it's time to pop the stack
+  // when we hit the end of the phi uses for a given def.
+  if (Stack.back().PhiOnly) {
+    if (!VDUse.Use)
+      return false;
+    auto *PHI = dyn_cast<PHINode>(VDUse.Use->getUser());
+    if (!PHI)
+      return false;
+    // The only phionly defs should be branch info.
+    auto *PBranch = dyn_cast<PredicateBranch>(Stack.back().PInfo);
+    assert(PBranch && "Only branches should have PHIOnly defs");
+    // Check edge
+    BasicBlock *EdgePred = PHI->getIncomingBlock(*VDUse.Use);
+    if (EdgePred != PBranch->BranchBB)
+      return false;
+  }
+
+  return (VDUse.DFSIn >= Stack.back().DFSIn &&
+          VDUse.DFSOut <= Stack.back().DFSOut);
 }
 
-void PredicateInfo::popStackUntilDFSScope(ValueDFSStack &Stack, int DFSIn,
-                                          int DFSOut) {
-  while (!Stack.empty() && !stackIsInScope(Stack, DFSIn, DFSOut))
+void PredicateInfo::popStackUntilDFSScope(ValueDFSStack &Stack,
+                                          const ValueDFS &VD) {
+  while (!Stack.empty() && !stackIsInScope(Stack, VD))
     Stack.pop_back();
 }
 
@@ -271,20 +325,11 @@
   SmallVector<Value *, 8> CmpOperands;
   BasicBlock *FirstBB = BI->getSuccessor(0);
   BasicBlock *SecondBB = BI->getSuccessor(1);
-  bool FirstSinglePred = FirstBB->getSinglePredecessor();
-  bool SecondSinglePred = SecondBB->getSinglePredecessor();
   SmallVector<BasicBlock *, 2> SuccsToProcess;
   bool isAnd = false;
   bool isOr = false;
-  // First make sure we have single preds for these successors, as we can't
-  // usefully propagate true/false info to them if there are multiple paths to
-  // them.
-  if (FirstSinglePred)
-    SuccsToProcess.push_back(FirstBB);
-  if (SecondSinglePred)
-    SuccsToProcess.push_back(SecondBB);
-  if (SuccsToProcess.empty())
-    return;
+  SuccsToProcess.push_back(FirstBB);
+  SuccsToProcess.push_back(SecondBB);
   // Second, see if we have a comparison we support
   SmallVector<Value *, 2> ComparisonsToProcess;
   CmpInst::Predicate Pred;
@@ -321,6 +366,8 @@
               new PredicateBranch(Op, BranchBB, Succ, Cmp, TakenEdge);
           AllInfos.push_back(PB);
           OperandInfo.Infos.push_back(PB);
+          if (!Succ->getSinglePredecessor())
+            PhiUsesOnly.insert({BranchBB, Succ});
         }
       }
       CmpOperands.clear();
@@ -368,29 +415,14 @@
         RenameIter == RenameStack.begin() ? OrigOp : (RenameIter - 1)->Def;
     ValueDFS &Result = *RenameIter;
     auto *ValInfo = Result.PInfo;
-    // For branches, we can just place the operand in the split block.  For
-    // assume, we have to place it right before the assume to ensure we dominate
-    // all of our uses.
+    // For branches, we can just place the operand in the branch block before
+    // the terminator.  For assume, we have to place it right before the assume
+    // to ensure we dominate all of our uses.  Always insert right before the
+    // relevant instruction (terminator, assume), so that we insert in proper
+    // order in the case of multiple predicateinfo in the same block.
     if (isa<PredicateBranch>(ValInfo)) {
       auto *PBranch = cast<PredicateBranch>(ValInfo);
-      // It's possible we are trying to insert multiple predicateinfos in the
-      // same block at the beginning of the block.  When we do this, we need to
-      // insert them one after the other, not one before the other. To see if we
-      // have already inserted predicateinfo into this block, we see if Op !=
-      // OrigOp && Op->getParent() == PBranch->SplitBB.  Op must be an
-      // instruction we inserted if it's not the original op.
-      BasicBlock::iterator InsertPt;
-      if (Op == OrigOp ||
-          cast<Instruction>(Op)->getParent() != PBranch->SplitBB) {
-        InsertPt = PBranch->SplitBB->begin();
-        // Insert after last phi node.
-        while (isa<PHINode>(InsertPt))
-          ++InsertPt;
-      } else {
-        // Insert after op.
-        InsertPt = ++(cast<Instruction>(Op)->getIterator());
-      }
-      IRBuilder<> B(PBranch->SplitBB, InsertPt);
+      IRBuilder<> B(PBranch->BranchBB->getTerminator());
       Function *IF = Intrinsic::getDeclaration(
           F.getParent(), Intrinsic::ssa_copy, Op->getType());
       Value *PIC = B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
@@ -400,12 +432,7 @@
       auto *PAssume = dyn_cast<PredicateAssume>(ValInfo);
       assert(PAssume &&
              "Should not have gotten here without it being an assume");
-      // Unlike above, this should already insert in the right order when we
-      // insert multiple predicateinfos in the same block.  Because we are
-      // always inserting right before the assume (instead of the beginning of a
-      // block), newer insertions will end up after older ones.
-      IRBuilder<> B(PAssume->AssumeInst->getParent(),
-                    PAssume->AssumeInst->getIterator());
+      IRBuilder<> B(PAssume->AssumeInst);
       Function *IF = Intrinsic::getDeclaration(
           F.getParent(), Intrinsic::ssa_copy, Op->getType());
       Value *PIC = B.CreateCall(IF, Op);
@@ -447,28 +474,49 @@
     // created otherwise.
     for (auto &PossibleCopy : ValueInfo.Infos) {
       ValueDFS VD;
-      BasicBlock *CopyBB = nullptr;
       // Determine where we are going to place the copy by the copy type.
       // The predicate info for branches always come first, they will get
       // materialized in the split block at the top of the block.
       // The predicate info for assumes will be somewhere in the middle,
       // it will get materialized in front of the assume.
-      if (const auto *PBranch = dyn_cast<PredicateBranch>(PossibleCopy)) {
-        CopyBB = PBranch->SplitBB;
-        VD.LocalNum = LN_First;
-      } else if (const auto *PAssume =
-                     dyn_cast<PredicateAssume>(PossibleCopy)) {
-        CopyBB = PAssume->AssumeInst->getParent();
+      if (const auto *PAssume = dyn_cast<PredicateAssume>(PossibleCopy)) {
         VD.LocalNum = LN_Middle;
-      } else
-        llvm_unreachable("Unhandled predicate info type");
-      DomTreeNode *DomNode = DT.getNode(CopyBB);
-      if (!DomNode)
-        continue;
-      VD.DFSIn = DomNode->getDFSNumIn();
-      VD.DFSOut = DomNode->getDFSNumOut();
-      VD.PInfo = PossibleCopy;
-      OrderedUses.push_back(VD);
+        DomTreeNode *DomNode = DT.getNode(PAssume->AssumeInst->getParent());
+        if (!DomNode)
+          continue;
+        VD.DFSIn = DomNode->getDFSNumIn();
+        VD.DFSOut = DomNode->getDFSNumOut();
+        VD.PInfo = PossibleCopy;
+        OrderedUses.push_back(VD);
+      } else if (const auto *PBranch =
+                     dyn_cast<PredicateBranch>(PossibleCopy)) {
+        // If we can only do phi uses, we treat it like it's in the branch
+        // block, and handle it specially. We know that it goes last, and only
+        // dominate phi uses.
+        if (PhiUsesOnly.count({PBranch->BranchBB, PBranch->SplitBB})) {
+          VD.LocalNum = LN_Last;
+          auto *DomNode = DT.getNode(PBranch->BranchBB);
+          if (DomNode) {
+            VD.DFSIn = DomNode->getDFSNumIn();
+            VD.DFSOut = DomNode->getDFSNumOut();
+            VD.PInfo = PossibleCopy;
+            VD.PhiOnly = true;
+            OrderedUses.push_back(VD);
+          }
+        } else {
+          // Otherwise, we are in the split block (even though we perform
+          // insertion in the branch block).
+          // Insert a possible copy at the split block and before the branch.
+          VD.LocalNum = LN_First;
+          auto *DomNode = DT.getNode(PBranch->SplitBB);
+          if (DomNode) {
+            VD.DFSIn = DomNode->getDFSNumIn();
+            VD.DFSOut = DomNode->getDFSNumOut();
+            VD.PInfo = PossibleCopy;
+            OrderedUses.push_back(VD);
+          }
+        }
+      }
     }
 
     convertUsesToDFSOrdered(Op, OrderedUses);
@@ -492,10 +540,10 @@
                    << VD.DFSOut << ")\n");
 
       bool ShouldPush = (VD.Def || PossibleCopy);
-      bool OutOfScope = !stackIsInScope(RenameStack, VD.DFSIn, VD.DFSOut);
+      bool OutOfScope = !stackIsInScope(RenameStack, VD);
       if (OutOfScope || ShouldPush) {
         // Sync to our current scope.
-        popStackUntilDFSScope(RenameStack, VD.DFSIn, VD.DFSOut);
+        popStackUntilDFSScope(RenameStack, VD);
         ShouldPush |= (VD.Def || PossibleCopy);
         if (ShouldPush) {
           RenameStack.push_back(VD);
Index: test/Transforms/Util/PredicateInfo/diamond.ll
===================================================================
--- /dev/null
+++ test/Transforms/Util/PredicateInfo/diamond.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -print-predicateinfo < %s 2>&1 | FileCheck %s
+define i1 @f(i32 %x, i1 %y) {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:    br i1 [[Y:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK:       bb0:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[X:%.*]], 0
+; CHECK:         [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK-NEXT:    br i1 [[CMP]], label [[BB2:%.*]], label [[BB3:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[X2:%.*]] = add nuw nsw i32 [[X]], 1
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sge i32 [[X2]], 2
+; CHECK:         [[X2_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X2]])
+; CHECK-NEXT:    br i1 [[CMP2]], label [[BB2]], label [[BB3]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[X3:%.*]] = phi i32 [ [[X_0]], [[BB0]] ], [ [[X2_0]], [[BB1]] ]
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret i1 false
+;
+  br i1 %y, label %bb0, label %bb1
+  bb0:
+  %cmp = icmp sge i32 %x, 0  ; x > 0
+  br i1 %cmp, label %bb2, label %bb3
+  bb1:
+  %x2 = add nsw nuw i32 %x, 1
+  %cmp2 = icmp sge i32 %x2, 2     ; x+1 > 2 / x > 1
+  br i1 %cmp2, label %bb2, label %bb3
+  bb2:
+  %x3 = phi i32 [ %x, %bb0 ], [ %x2, %bb1 ]
+  br label %bb3
+  bb3:
+  ret i1 0
+}
+
+define i1 @g(i32 %x, i1 %y) {
+; CHECK-LABEL: @g(
+; CHECK-NEXT:    br i1 [[Y:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK:       bb0:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[X:%.*]], 0
+; CHECK:         [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK-NEXT:    br i1 [[CMP]], label [[BB3:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[X2:%.*]] = add nuw nsw i32 [[X]], 1
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sge i32 [[X2]], 2
+; CHECK:         [[X2_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X2]])
+; CHECK-NEXT:    br i1 [[CMP2]], label [[BB3]], label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[X3:%.*]] = phi i32 [ [[X_0]], [[BB0]] ], [ [[X2_0]], [[BB1]] ]
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret i1 false
+;
+  br i1 %y, label %bb0, label %bb1
+  bb0:
+  %cmp = icmp sge i32 %x, 0  ; x > 0
+  br i1 %cmp, label %bb3, label %bb2
+  bb1:
+  %x2 = add nsw nuw i32 %x, 1
+  %cmp2 = icmp sge i32 %x2, 2     ; x+1 > 2 / x > 1
+  br i1 %cmp2, label %bb3, label %bb2
+  bb2:
+  %x3 = phi i32 [ %x, %bb0 ], [ %x2, %bb1 ]
+  br label %bb3
+  bb3:
+  ret i1 0
+}
+
Index: test/Transforms/Util/PredicateInfo/testandor.ll
===================================================================
--- test/Transforms/Util/PredicateInfo/testandor.ll
+++ test/Transforms/Util/PredicateInfo/testandor.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -print-predicateinfo -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -print-predicateinfo < %s 2>&1 | FileCheck %s
 
 declare void @foo(i1)
 declare void @bar(i32)