Index: lib/Transforms/InstCombine/InstCombinePHI.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -24,6 +24,52 @@
 
 #define DEBUG_TYPE "instcombine"
 
+/// Verify that if we fold the PHI arguments into a single operation with
+/// the PHI node as input, the original operations become unused.  This
+/// is the case if the only user(s) of the original operations are either
+/// this PHI node, or another PHI node with the same set of arguments
+/// (because the operation will be performed on all such nodes if it is
+/// performed on any such node).  In the case of multiple PHI nodes, we
+/// only want to perform the transformation if the number of PHI nodes
+/// (i.e. the number of new operations that will be introduced) is not
+/// larger than the number of original operations.
+static bool IsFoldPHIArgProfitable(PHINode &PN) {
+  SmallPtrSet<PHINode*, 8> PHIs;
+  SmallPtrSet<Value*, 8> Values;
+
+  PHIs.insert(&PN);
+  for (Value *IncValue : PN.incoming_values())
+    Values.insert(IncValue);
+
+  for (Value *IncValue : Values)
+    if (!isa<Constant>(IncValue))
+      for (auto &U : IncValue->uses()) {
+        // All uses must be PHI nodes.
+        auto *P = dyn_cast<PHINode>(U.getUser());
+        if (!P)
+          return false;
+        // If we already know this PHI node, we're good.  Otherwise,
+        // add it to our set.  If the set is now so large that the
+        // operation would become unprofitable, bail.
+        if (!PHIs.insert(P).second)
+          continue;
+        if (PHIs.size() > Values.size())
+          return false;
+        // Verify that the new PHI node has exactly the same set of
+        // argument values as the original one.
+        SmallPtrSet<Value*, 8> OtherValues;
+        for (Value *OtherValue : P->incoming_values()) {
+          OtherValues.insert(OtherValue);
+          if (!Values.count(OtherValue))
+            return false;
+        }
+        if (OtherValues.size() != Values.size())
+          return false;
+      }
+
+  return true;
+}
+
 /// The PHI arguments will be folded into a single operation with a PHI node
 /// as input. The debug location of the single operation will be the merged
 /// locations of the original PHI node arguments.
@@ -51,10 +97,10 @@
   Type *LHSType = LHSVal->getType();
   Type *RHSType = RHSVal->getType();
 
-  // Scan to see if all operands are the same opcode, and all have one use.
+  // Scan to see if all operands are the same opcode.
   for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
     Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
-    if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
+    if (!I || I->getOpcode() != Opc ||
         // Verify type of the LHS matches so we don't fold cmp's of different
         // types.
         I->getOperand(0)->getType() != LHSType ||
@@ -78,6 +124,10 @@
   if (!LHSVal && !RHSVal)
     return nullptr;
 
+  // Verify that folding is profitable (the original operands are dead).
+  if (!IsFoldPHIArgProfitable(PN))
+    return nullptr;
+
   // Otherwise, this is safe to transform!
 
   Value *InLHS = FirstInst->getOperand(0);
@@ -150,10 +200,10 @@
 
   bool AllInBounds = true;
 
-  // Scan to see if all operands are the same opcode, and all have one use.
+  // Scan to see if all operands are the same opcode.
   for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
     GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
-    if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
+    if (!GEP || GEP->getType() != FirstInst->getType() ||
       GEP->getNumOperands() != FirstInst->getNumOperands())
       return nullptr;
 
@@ -203,6 +253,10 @@
   if (AllBasePointersAreAllocas)
     return nullptr;
 
+  // Verify that folding is profitable (the original operands are dead).
+  if (!IsFoldPHIArgProfitable(PN))
+    return nullptr;
+
   // Otherwise, this is safe to transform.  Insert PHI nodes for each operand
   // that is variable.
   SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
@@ -322,7 +376,7 @@
   // Check to see if all arguments are the same operation.
   for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
     LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
-    if (!LI || !LI->hasOneUse())
+    if (!LI)
       return nullptr;
 
     // We can't sink the load if the loaded value could be modified between
@@ -348,6 +402,10 @@
       return nullptr;
   }
 
+  // Verify that folding is profitable (the original operands are dead).
+  if (!IsFoldPHIArgProfitable(PN))
+    return nullptr;
+
   // Okay, they are all the same operation.  Create a new PHI node of the
   // correct type, and PHI together all of the LHS's of the instructions.
   PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
@@ -438,8 +496,8 @@
   unsigned NumConsts = 0;
   for (Value *V : Phi.incoming_values()) {
     if (auto *Zext = dyn_cast<ZExtInst>(V)) {
-      // All zexts must be identical and have one use.
-      if (Zext->getSrcTy() != NarrowType || !Zext->hasOneUse())
+      // All zexts must be identical.
+      if (Zext->getSrcTy() != NarrowType)
         return nullptr;
       NewIncoming.push_back(Zext->getOperand(0));
       NumZexts++;
@@ -465,6 +523,10 @@
   if (NumConsts == 0 || NumZexts < 2)
     return nullptr;
 
+  // Verify that folding is profitable (the original operands are dead).
+  if (!IsFoldPHIArgProfitable(Phi))
+    return nullptr;
+
   // All incoming values are zexts or constants that are safe to truncate.
   // Create a new phi node of the narrow type, phi together all of the new
   // operands, and zext the result back to the original type.
@@ -523,7 +585,7 @@
   // Check to see if all arguments are the same operation.
   for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
     Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
-    if (!I || !I->hasOneUse() || !I->isSameOperationAs(FirstInst))
+    if (!I || !I->isSameOperationAs(FirstInst))
       return nullptr;
     if (CastSrcTy) {
       if (I->getOperand(0)->getType() != CastSrcTy)
@@ -533,6 +595,10 @@
     }
   }
 
+  // Verify that folding is profitable (the original operands are dead).
+  if (!IsFoldPHIArgProfitable(PN))
+    return nullptr;
+
   // Okay, they are all the same operation.  Create a new PHI node of the
   // correct type, and PHI together all of the LHS's of the instructions.
   PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(),
@@ -891,10 +957,7 @@
   if (isa<Instruction>(PN.getIncomingValue(0)) &&
       isa<Instruction>(PN.getIncomingValue(1)) &&
       cast<Instruction>(PN.getIncomingValue(0))->getOpcode() ==
-      cast<Instruction>(PN.getIncomingValue(1))->getOpcode() &&
-      // FIXME: The hasOneUse check will fail for PHIs that use the value more
-      // than themselves more than once.
-      PN.getIncomingValue(0)->hasOneUse())
+      cast<Instruction>(PN.getIncomingValue(1))->getOpcode())
     if (Instruction *Result = FoldPHIArgOpIntoPHI(PN))
       return Result;
 
Index: test/Transforms/InstCombine/phi.ll
===================================================================
--- test/Transforms/InstCombine/phi.ll
+++ test/Transforms/InstCombine/phi.ll
@@ -879,3 +879,56 @@
   %cmp1 = icmp ne i32 %a.0, 0
   ret i1  %cmp1
 }
+
+
+; Verify that we fold arguments through multiple identical PHI nodes
+
+; CHECK-LABEL: @phi_multi
+; CHECK: loop_start:
+; CHECK:   %[[RES2:.*]] = phi i8 [ %valB, %entry ], [ %valB.next, %loop_next ]
+; CHECK: exit1:
+; CHECK-NEXT:  %[[RES1:.*]] = phi i8 [ %valB, %entry ], [ %valB.next, %loop_next ]
+; CHECK-NEXT:  %[[RES1A:.*]] = zext i8 %[[RES1]] to i64
+; CHECK-NEXT:  %[[RES1B:.*]] = or i64 %[[RES1A]], %c
+; CHECK-NEXT:  ret i64 %[[RES1B]]
+; CHECK: exit2:
+; CHECK-NEXT:  %[[RES2A:.*]] = zext i8 %[[RES2]] to i64
+; CHECK-NEXT:  %[[RES2B:.*]] = or i64 %[[RES2A]], %c
+; CHECK-NEXT:  ret i64 %[[RES2B]]
+
+define i64 @phi_multi(i8* %ptrA, i8* %ptrB, i64 %c) {
+entry:
+  %valA = load i8, i8* %ptrA
+  %valB = load i8, i8* %ptrB
+  %valB.ext = zext i8 %valB to i64
+  %valB.res = or i64 %c, %valB.ext
+  %cmp1 = icmp eq i8 %valA, %valB
+  br i1 %cmp1, label %exit1, label %loop_start
+
+loop_start:
+  %ptrA.0 = phi i8* [ %ptrA, %entry ], [ %ptrA.next, %loop_next ]
+  %ptrB.0 = phi i8* [ %ptrB, %entry ], [ %ptrB.next, %loop_next ]
+  %valA.0 = phi i8 [ %valA, %entry ], [ %valA.next, %loop_next ]
+  %valB.res.0 = phi i64 [ %valB.res, %entry ], [ %valB.next.res, %loop_next ]
+  %cmp2 = icmp eq i8 %valA.0, 0
+  br i1 %cmp2, label %exit2, label %loop_next
+
+loop_next:
+  %ptrA.next = getelementptr inbounds i8, i8* %ptrA.0, i64 1
+  %ptrB.next = getelementptr inbounds i8, i8* %ptrB.0, i64 1
+  %valA.next = load i8, i8* %ptrA.next
+  %valB.next = load i8, i8* %ptrB.next
+  %valB.next.ext = zext i8 %valB.next to i64
+  %valB.next.res = or i64 %c, %valB.next.ext
+  %cmp3 = icmp eq i8 %valA.next, %valB.next
+  br i1 %cmp3, label %loop_start, label %exit1
+
+exit1:
+  %valB.res.1 = phi i64 [ %valB.res, %entry ], [ %valB.next.res, %loop_next ]
+  ret i64 %valB.res.1
+
+exit2:
+  ret i64 %valB.res.0
+}
+
+
Index: test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
===================================================================
--- test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
+++ test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
@@ -280,26 +280,26 @@
 define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG: test5(
 ; EPILOG:   exit1.loopexit:
-; EPILOG-NEXT:      %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ],
+; EPILOG-NEXT:      %iv.pn = phi i64 [ %iv, %loop_exiting ], [ %iv, %loop_exiting ], [ %iv_next, %loop_exiting.1 ], [ %iv_next, %loop_exiting.1 ], [ %iv_next.1, %loop_exiting.2 ],
 ; EPILOG-NEXT:      br label %exit1
 ; EPILOG:   exit1.loopexit2:
-; EPILOG-NEXT:      %ivy.epil = add i64 %iv.epil, %add
 ; EPILOG-NEXT:      br label %exit1
 ; EPILOG:   exit1:
-; EPILOG-NEXT:      %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.epil, %exit1.loopexit2 ]
+; EPILOG-NEXT:      %iv.pn.pn = phi i64 [ %iv.pn, %exit1.loopexit ], [ %iv.epil, %exit1.loopexit2 ]
+; EPILOG-NEXT:      %result = add i64 %iv.pn.pn, %add
 ; EPILOG-NEXT:      ret i64 %result
 ; EPILOG:   loop_latch.7:
 ; EPILOG:      %niter.nsub.7 = add i64 %niter, -8
 
 ; PROLOG: test5(
 ; PROLOG:   exit1.loopexit:
-; PROLOG-NEXT:      %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ],
+; PROLOG-NEXT:      %iv.pn = phi i64 [ %iv, %loop_exiting ], [ %iv, %loop_exiting ], [ %iv_next, %loop_exiting.1 ], [ %iv_next, %loop_exiting.1 ], [ %iv_next.1, %loop_exiting.2 ],
 ; PROLOG-NEXT:      br label %exit1
 ; PROLOG:   exit1.loopexit1:
-; PROLOG-NEXT:      %ivy.prol = add i64 %iv.prol, %add
 ; PROLOG-NEXT:      br label %exit1
 ; PROLOG:   exit1:
-; PROLOG-NEXT:      %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.prol, %exit1.loopexit1 ]
+; PROLOG-NEXT:      %iv.pn.pn = phi i64 [ %iv.pn, %exit1.loopexit ], [ %iv.prol, %exit1.loopexit1 ]
+; PROLOG-NEXT:      %result = add i64 %iv.pn.pn, %add
 ; PROLOG-NEXT:      ret i64 %result
 ; PROLOG:   loop_latch.7:
 ; PROLOG:      %iv_next.7 = add nsw i64 %iv, 8