Index: lib/Transforms/InstCombine/InstCombinePHI.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -14,12 +14,18 @@
 #include "InstCombineInternal.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "instcombine"
 
+STATISTIC(NumPhiNodeUsersFullySimplified,
+          "Number of phi node users simplified into the phi inputs");
+STATISTIC(NumPhiNodeUsersHoisted,
+          "Number of phi node users hoisted into one of the inputs");
+
 /// If we have something like phi [add (a,b), add(a,c)] and if a/b/c and the
 /// adds all have a single use, turn this into a phi and a single binop.
 Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
@@ -875,6 +881,237 @@
   return ReplaceInstUsesWith(FirstPhi, Undef);
 }
 
+/// Take an instruction and a replacement for one of its operands and try to
+/// simplify that to a constant or some existing instruction.
+// FIXME: Generalize this and move it to InstructionSimplify.h.
+static Value *simplifyInstWithReplacedOperand(
+    Instruction &I, Value &Operand, Value &ReplacementOperand,
+    const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr,
+    AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr) {
+  const DataLayout &DL = I.getModule()->getDataLayout();
+  unsigned Opcode = I.getOpcode();
+  SmallVector<Value *, 2> Operands(I.value_op_begin(), I.value_op_end());
+  std::replace(Operands.begin(), Operands.end(), &Operand, &ReplacementOperand);
+  switch (Opcode) {
+  case Instruction::FAdd:
+  case Instruction::FSub:
+  case Instruction::FMul:
+  case Instruction::FDiv:
+  case Instruction::FRem:
+    return SimplifyFPBinOp(Opcode, Operands[0], Operands[1],
+                           I.getFastMathFlags(), DL, TLI, DT, AC, CxtI);
+
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::SDiv:
+  case Instruction::UDiv:
+  case Instruction::SRem:
+  case Instruction::URem:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    return SimplifyBinOp(Opcode, Operands[0], Operands[1], DL, TLI, DT, AC,
+                         CxtI);
+
+  case Instruction::ICmp:
+    return SimplifyICmpInst(cast<ICmpInst>(I).getPredicate(), Operands[0],
+                            Operands[1], DL, TLI, DT, AC, CxtI);
+
+  case Instruction::FCmp:
+    return SimplifyFCmpInst(cast<FCmpInst>(I).getPredicate(), Operands[0],
+                            Operands[1], I.getFastMathFlags(), DL, TLI, DT, AC,
+                            CxtI);
+
+  case Instruction::Select:
+    return SimplifySelectInst(Operands[0], Operands[1], Operands[2], DL, TLI,
+                              DT, AC, CxtI);
+
+  case Instruction::GetElementPtr:
+    return SimplifyGEPInst(Operands, DL, TLI, DT, AC, CxtI);
+
+  case Instruction::InsertValue:
+    return SimplifyInsertValueInst(Operands[0], Operands[1],
+                                   cast<InsertValueInst>(I).getIndices(), DL,
+                                   TLI, DT, AC, CxtI);
+
+  case Instruction::ExtractValue:
+    return SimplifyExtractValueInst(Operands[0],
+                                    cast<ExtractValueInst>(I).getIndices(), DL,
+                                    TLI, DT, AC, CxtI);
+
+  case Instruction::ExtractElement:
+    return SimplifyExtractElementInst(Operands[0], Operands[1], DL, TLI, DT, AC,
+                                      CxtI);
+
+  default: {
+    SmallVector<Constant *, 2> ConstantOperands;
+    ConstantOperands.reserve(Operands.size());
+    for (auto *Op : Operands) {
+      if (auto *C = dyn_cast<Constant>(Op))
+        ConstantOperands.push_back(C);
+      else
+        return nullptr; // Can't constant fold.
+    }
+    return ConstantFoldInstOperands(Opcode, I.getType(), ConstantOperands, DL,
+                                    TLI);
+  }
+  }
+}
+
+/// See if there is a single user of a phi node that simplifies with all but
+/// one of the phi operands. If so, hoist the user and use a phi of the
+/// simplified values. Returns true if it was able to simplify the phi.
+static bool simplifyOrHoistPhiUsers(InstCombiner &IC, PHINode &PN) {
+  // Currently we only handle single-use phi nodes. This actually generalizes
+  // cleanly to multi-use phi nodes, but in that case we will grow the total
+  // number of phi nodes which might not be a clear win.
+  // TODO: We could extend this with a cost model to handle multi-use nodes as
+  // well.
+  if (!PN.hasOneUse())
+    return false;
+  Instruction &User = *cast<Instruction>(PN.user_back());
+  // Skip phi nodes which just use themselves.
+  if (&User == &PN)
+    return false;
+  // If the user isn't in the same basic block as the phi node, even though we
+  // *can* speculate it, we don't have a realistic cost model for whether this
+  // is necessary. This restricts the combine to hoisting operations out of
+  // straight line code.
+  // FIXME: This isn't terribly principled. We should have a better model for
+  // what is "canonical" here.
+  if (User.getParent() != PN.getParent())
+    return false;
+
+  auto *AC = IC.getAssumptionCache();
+  auto *DT = IC.getDominatorTree();
+  auto *TLI = IC.getTargetLibraryInfo();
+
+  // The goal of this is to speculate the user around the phi node. However, if
+  // there is a function call between the phi node and the user and that
+  // function call might never return, then doing this can change observable
+  // behavior unless the user is safe to speculate. So we model this as
+  // speculative execution.
+  if (!isSafeToSpeculativelyExecute(&User, /*Context*/nullptr, DT, TLI))
+    return false;
+
+  // We also don't handle any instruction which read or write memory so we
+  // don't have to scan for interfering instructions.
+  // TODO: We could potentially do a short scan to see if it happens to be safe
+  // even in the face of memory operations.
+  if (User.mayReadOrWriteMemory())
+    return false;
+
+  // Ok, this is a user that can be trivially speculated. Look to see if all
+  // but one of the phi's operands will trivially simplify away. We actually
+  // use this to build up the list of operands for the new phi node we'll
+  // create if everything goes well.
+  int NumOperands = PN.getNumIncomingValues();
+  SmallVector<Value *, 4> NewOperands;
+  NewOperands.reserve(NumOperands);
+  int HoistOpIdx = -1;
+  for (int i = 0; i < NumOperands; ++i) {
+    Value &IV = *PN.getIncomingValue(i);
+    BasicBlock &IB = *PN.getIncomingBlock(i);
+    // See if we can just simplify the user with the operand.
+    if (Value *SimplifiedOp = simplifyInstWithReplacedOperand(
+            User, PN, IV, TLI, DT, AC, IB.getTerminator())) {
+      // If this didn't simplify to an instruction, dominance is trivially satisfied.
+      Instruction *SimplifiedOpI = dyn_cast<Instruction>(SimplifiedOp);
+      if (!SimplifiedOpI) {
+        NewOperands.push_back(SimplifiedOp);
+        continue;
+      }
+
+      // If we simplified an operand back into the original phi node, we won't
+      // actually elimanet the use of this phi, so bail out.
+      if (SimplifiedOpI == &PN)
+        return false;
+
+      // Otherwise we have to see if the simplified instruction is available in
+      // the predecessor block.
+      if (DT->dominates(SimplifiedOpI->getParent(), &IB)) {
+        NewOperands.push_back(SimplifiedOpI);
+        continue;
+      }
+    }
+
+    // If we cannot simplify the user with the operand, and this is the first
+    // such operand, we will try to speculate the user into the incoming block.
+    // Record this and put an undef into the new phi operands temporarily.
+    if (HoistOpIdx != -1)
+      // Already needed to hoist one operand, give up now that we have a second.
+      return false;
+    // Make sure user doesn't come from the same basic block as the incoming
+    // value. If so, they may be related to each other and we could flip/flop
+    // between two values in that basic block endlessly. The whole goal of
+    // hoisting the user is to get it into the basic block with the incoming
+    // value to enable further combining.
+    if (User.getParent() == &IB)
+      return false;
+    // Also make sure that the incoming value isn't the
+    // terminator of the predecessor -- if it is we won't have a place to hoist
+    // the user.
+    if (&IV == IB.getTerminator())
+      return false;
+    // Make sure that the incoming block has a single successor (this
+    // block). Otherwise, we might be speculating code into a loop body.
+    // FIXME: We can do much better than this when LoopInfo is present.
+    BasicBlock *Succ = IB.getSingleSuccessor();
+    if (!Succ)
+      return false;
+    assert(Succ == PN.getParent() && "PN's basic block isn't a successor?!");
+    // Check that all of the other operands to the user are available in the
+    // incoming block.
+    for (Value *UserOp : User.operands())
+      if (Instruction *UserOpI = dyn_cast<Instruction>(UserOp))
+        if (!DT->dominates(UserOpI->getParent(), &IB))
+          return false;
+    HoistOpIdx = i;
+    NewOperands.push_back(UndefValue::get(User.getType()));
+  }
+  assert(NumOperands == (int)NewOperands.size() &&
+         "Unexpected number of operands!");
+
+  // Create the new phi node and RAUW from the old user to the new phi.
+  IC.Builder->SetInsertPoint(&PN);
+  auto *NewPN = IC.Builder->CreatePHI(User.getType(), NumOperands);
+  for (int i = 0; i < NumOperands; ++i)
+    NewPN->addIncoming(NewOperands[i], PN.getIncomingBlock(i));
+  IC.ReplaceInstUsesWith(User, NewPN);
+
+  // Check if we need to hoist the user into one of the incoming blocks. This
+  // may not be necessary if, for instance, every incoming value folded away.
+  if (HoistOpIdx != -1) {
+    ++NumPhiNodeUsersHoisted;
+
+    // Replace every use of the phi node with this index's incoming value.
+    for (Use &U : User.operands())
+      if (U.get() == &PN)
+        U = PN.getIncomingValue(HoistOpIdx);
+    assert(PN.use_empty() && "Stale uses of the phi node left!");
+
+    // Now splice the user into the incoming block just befor ethe terminator.
+    User.moveBefore(PN.getIncomingBlock(HoistOpIdx)->getTerminator());
+
+    // And wire it up to the new phi node's incoming value.
+    NewPN->setIncomingValue(HoistOpIdx, &User);
+  } else {
+    ++NumPhiNodeUsersFullySimplified;
+
+    // Everything folded away, erase the user completely.
+    IC.EraseInstFromFunction(User);
+  }
+
+  // Now erase the PN node which is dead, and return that we have successfully
+  // simplified it.
+  IC.EraseInstFromFunction(PN);
+  return true;
+}
+
 // PHINode simplification
 //
 Instruction *InstCombiner::visitPHINode(PHINode &PN) {
@@ -980,6 +1217,9 @@
       }
     }
 
+  if (simplifyOrHoistPhiUsers(*this, PN))
+    return nullptr;
+
   // If this is an integer PHI and we know that it has an illegal type, see if
   // it is only used by trunc or trunc(lshr) operations.  If so, we split the
   // PHI into the various pieces being extracted.  This sort of thing is
Index: test/Transforms/InstCombine/select.ll
===================================================================
--- test/Transforms/InstCombine/select.ll
+++ test/Transforms/InstCombine/select.ll
@@ -412,8 +412,8 @@
   %b = select i1 %a, i32 10, i32 20
   ret i32 %b
 ; CHECK-LABEL: @test25(
-; CHECK: %a = phi i32 [ 10, %jump ], [ 20, %entry ]
-; CHECK-NEXT: ret i32 %a
+; CHECK: %[[a:.*]] = phi i32 [ 10, %jump ], [ 20, %entry ]
+; CHECK-NEXT: ret i32 %[[a]]
 }
 
 define i32 @test26(i1 %cond)  {
@@ -427,8 +427,8 @@
   %b = select i1 %a, i32 20, i32 10
   ret i32 %b
 ; CHECK-LABEL: @test26(
-; CHECK: %a = phi i32 [ 20, %entry ], [ 10, %jump ]
-; CHECK-NEXT: ret i32 %a
+; CHECK: %[[a:.*]] = phi i32 [ 20, %entry ], [ 10, %jump ]
+; CHECK-NEXT: ret i32 %[[a]]
 }
 
 define i32 @test27(i1 %c, i32 %A, i32 %B)  {
@@ -441,8 +441,8 @@
   %b = select i1 %a, i32 %A, i32 %B
   ret i32 %b
 ; CHECK-LABEL: @test27(
-; CHECK: %a = phi i32 [ %A, %jump ], [ %B, %entry ]
-; CHECK-NEXT: ret i32 %a
+; CHECK: %[[a:.*]] = phi i32 [ %A, %jump ], [ %B, %entry ]
+; CHECK-NEXT: ret i32 %[[a]]
 }
 
 define i32 @test28(i1 %cond, i32 %A, i32 %B)  {
Index: test/Transforms/InstCombine/shift-sra.ll
===================================================================
--- test/Transforms/InstCombine/shift-sra.ll
+++ test/Transforms/InstCombine/shift-sra.ll
@@ -34,8 +34,8 @@
   ret i64 %S
   
 ; CHECK-LABEL: @test3(
-; CHECK: %P = phi i64
-; CHECK-NEXT: ret i64 %P
+; CHECK: %[[P:.*]] = phi i64
+; CHECK-NEXT: ret i64 %[[P]]
 }
 
 define i64 @test4(i1 %X, i64 %Y, i1 %Cond) {