diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -52,14 +52,16 @@
     const auto [Value, IsOffset] = ValuesToInspect.pop_back_val();
     for (auto &U : Value->uses()) {
       auto *I = cast<Instruction>(U.getUser());
-
       if (auto *LI = dyn_cast<LoadInst>(I)) {
         // Ignore non-volatile loads, they are always ok.
         if (!LI->isSimple()) return false;
         continue;
       }
-
-      if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
+      if (isa<PHINode>(I)) {
+        ValuesToInspect.emplace_back(I, true);
+        continue;
+      }
+      if (isa<BitCastInst, AddrSpaceCastInst>(I)) {
         // If uses of the bitcast are ok, we are ok.
         ValuesToInspect.emplace_back(I, IsOffset);
         continue;
@@ -106,7 +108,8 @@
       }
 
       // If this is isn't our memcpy/memmove, reject it as something we can't
-      // handle.
+      // handle. If a PHI was already defined, then this MemTransferInst is a
+      // descendant of the PHI. Reject this case as well.
       MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
       if (!MI)
         return false;
@@ -241,7 +244,7 @@
 public:
   PointerReplacer(InstCombinerImpl &IC) : IC(IC) {}
 
-  bool collectUsers(Instruction &I);
+  bool collectUsers(Instruction &I, MemTransferInst *Copy);
   void replacePointer(Instruction &I, Value *V);
 
 private:
@@ -254,16 +257,34 @@
 };
 } // end anonymous namespace
 
-bool PointerReplacer::collectUsers(Instruction &I) {
+bool PointerReplacer::collectUsers(Instruction &I, MemTransferInst *Copy) {
+  dbgs() << "Collecting users for Inst ";
+  I.dump();
   for (auto *U : I.users()) {
     auto *Inst = cast<Instruction>(&*U);
+    dbgs() << "Use " << *U << '\n';
     if (auto *Load = dyn_cast<LoadInst>(Inst)) {
       if (Load->isVolatile())
         return false;
       Worklist.insert(Load);
-    } else if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst)) {
+    } else if (auto *PHI = dyn_cast<PHINode>(Inst)) {
+      // Check if any of the incoming values of PHI is the destination of Copy
+      auto CopySrcAddrSpace = Copy->getSourceAddressSpace();
+      auto PHIAddrSpace = PHI->getType()->getPointerAddressSpace();
+      for (unsigned Idx = 0; Idx < PHI->getNumIncomingValues(); ++Idx) {
+        auto *V = PHI->getIncomingValue(Idx);
+        if (CopySrcAddrSpace != PHIAddrSpace && V == Copy->getDest())
+          return false;
+        if (auto *Inst = dyn_cast<Instruction>(V))
+          Worklist.insert(Inst);
+      }
+
+      Worklist.insert(PHI);
+      if (!collectUsers(*PHI, Copy))
+        return false;
+    } else if (isa<GetElementPtrInst, BitCastInst>(Inst)) {
       Worklist.insert(Inst);
-      if (!collectUsers(*Inst))
+      if (!collectUsers(*Inst, Copy))
         return false;
     } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
       if (MI->isVolatile())
@@ -298,6 +319,14 @@
     IC.InsertNewInstWith(NewI, *LT);
     IC.replaceInstUsesWith(*LT, NewI);
     WorkMap[LT] = NewI;
+  } else if (auto *PHI = dyn_cast<PHINode>(I)) {
+    Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType();
+    auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(),
+                                   PHI->getName(), PHI);
+    for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I)
+      NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)),
+                          PHI->getIncomingBlock(I));
+    WorkMap[PHI] = NewPHI;
   } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
     auto *V = getReplacement(GEP->getPointerOperand());
     assert(V && "Operand not replaced");
@@ -432,10 +461,9 @@
       }
 
       PointerReplacer PtrReplacer(*this);
-      if (PtrReplacer.collectUsers(AI)) {
+      if (PtrReplacer.collectUsers(AI, Copy)) {
         for (Instruction *Delete : ToDelete)
           eraseInstFromFunction(*Delete);
-
         Value *Cast = Builder.CreateBitCast(TheSrc, DestTy);
         PtrReplacer.replacePointer(AI, Cast);
         ++NumGlobalCopies;
diff --git a/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll b/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll
@@ -0,0 +1,258 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=instcombine -S -o - %s | FileCheck %s
+
+target datalayout="p5:32:32-A5"
+
+@g1 = constant [32 x i8] zeroinitializer
+@g2 = addrspace(1) constant [32 x i8] zeroinitializer
+
+define i8 @remove_alloca_use_arg(i1 %cond) {
+; CHECK-LABEL: @remove_alloca_use_arg(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    br label [[SINK:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    br label [[SINK]]
+; CHECK:       sink:
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 2), [[IF]] ], [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 1), [[ELSE]] ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[PTR]], align 1
+; CHECK-NEXT:    ret i8 [[LOAD]]
+;
+entry:
+  %alloca = alloca [32 x i8], align 4, addrspace(1)
+  call void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1) %alloca, ptr @g1, i64 256, i1 false)
+  br i1 %cond, label %if, label %else
+
+if:
+  %val.if = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2
+  br label %sink
+
+else:
+  %val.else = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1
+  br label %sink
+
+sink:
+  %ptr = phi ptr addrspace(1) [ %val.if, %if ], [ %val.else, %else ]
+  %load = load i8, ptr addrspace(1) %ptr
+  ret i8 %load
+}
+
+define i8 @volatile_load_keep_alloca(i1 %cond) {
+; CHECK-LABEL: @volatile_load_keep_alloca(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(1)
+; CHECK-NEXT:    call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef align 4 dereferenceable(256) [[ALLOCA]], ptr noundef nonnull align 16 dereferenceable(256) @g1, i64 256, i1 false)
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    [[VAL_IF:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(1) [[ALLOCA]], i64 0, i64 1
+; CHECK-NEXT:    br label [[SINK:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[VAL_ELSE:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(1) [[ALLOCA]], i64 0, i64 2
+; CHECK-NEXT:    br label [[SINK]]
+; CHECK:       sink:
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ [[VAL_IF]], [[IF]] ], [ [[VAL_ELSE]], [[ELSE]] ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i8, ptr addrspace(1) [[PTR]], align 1
+; CHECK-NEXT:    ret i8 [[LOAD]]
+;
+entry:
+  %alloca = alloca [32 x i8], align 4, addrspace(1)
+  call void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1) %alloca, ptr @g1, i64 256, i1 false)
+  br i1 %cond, label %if, label %else
+
+if:
+  %val.if = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1
+  br label %sink
+
+else:
+  %val.else = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2
+  br label %sink
+
+sink:
+  %ptr = phi ptr addrspace(1) [ %val.if, %if ], [ %val.else, %else ]
+  %load = load volatile i8, ptr addrspace(1) %ptr
+  ret i8 %load
+}
+
+
+define i8 @no_memcpy_keep_alloca(i1 %cond) {
+; CHECK-LABEL: @no_memcpy_keep_alloca(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(1)
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    [[VAL_IF:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(1) [[ALLOCA]], i64 0, i64 1
+; CHECK-NEXT:    br label [[SINK:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[VAL_ELSE:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(1) [[ALLOCA]], i64 0, i64 2
+; CHECK-NEXT:    br label [[SINK]]
+; CHECK:       sink:
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ [[VAL_IF]], [[IF]] ], [ [[VAL_ELSE]], [[ELSE]] ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i8, ptr addrspace(1) [[PTR]], align 1
+; CHECK-NEXT:    ret i8 [[LOAD]]
+;
+entry:
+  %alloca = alloca [32 x i8], align 4, addrspace(1)
+  br i1 %cond, label %if, label %else
+
+if:
+  %val.if = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1
+  br label %sink
+
+else:
+  %val.else = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2
+  br label %sink
+
+sink:
+  %ptr = phi ptr addrspace(1) [ %val.if, %if ], [ %val.else, %else ]
+  %load = load volatile i8, ptr addrspace(1) %ptr
+  ret i8 %load
+}
+
+define i8 @loop_phi_remove_alloca(i1 %cond) {
+; CHECK-LABEL: @loop_phi_remove_alloca(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[BB_0:%.*]]
+; CHECK:       bb.0:
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 1), [[ENTRY:%.*]] ], [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 2), [[BB_1:%.*]] ]
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[BB_1]], label [[EXIT:%.*]]
+; CHECK:       bb.1:
+; CHECK-NEXT:    br label [[BB_0]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[PTR]], align 1
+; CHECK-NEXT:    ret i8 [[LOAD]]
+;
+entry:
+  %alloca = alloca [32 x i8], align 4, addrspace(1)
+  call void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1) %alloca, ptr @g1, i64 256, i1 false)
+  %val1 = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1
+  br label %bb.0
+
+bb.0:
+  %ptr = phi ptr addrspace(1) [ %val1, %entry ], [ %val2, %bb.1 ]
+  br i1 %cond, label %bb.1, label %exit
+
+bb.1:
+  %val2 = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2
+  br label %bb.0
+
+exit:
+  %load = load i8, ptr addrspace(1) %ptr
+  ret i8 %load
+}
+
+define i32 @remove_alloca_ptr_arg(i1 %c, ptr %ptr) {
+; CHECK-LABEL: @remove_alloca_ptr_arg(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    br label [[JOIN]]
+; CHECK:       join:
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr [ @g1, [[IF]] ], [ [[PTR:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[PHI]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
+entry:
+  %alloca = alloca [32 x i8]
+  call void @llvm.memcpy.p0.p0.i64(ptr %alloca, ptr @g1, i64 32, i1 false)
+  br i1 %c, label %if, label %join
+
+if:
+  br label %join
+
+join:
+  %phi = phi ptr [ %alloca, %if ], [ %ptr, %entry ]
+  %v = load i32, ptr %phi
+  ret i32 %v
+}
+
+define i8 @loop_phi_late_memtransfer(i1 %cond) {
+; CHECK-LABEL: @loop_phi_late_memtransfer(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[BB_0:%.*]]
+; CHECK:       bb.0:
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 1), [[ENTRY:%.*]] ], [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 2), [[BB_1:%.*]] ]
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[BB_1]], label [[EXIT:%.*]]
+; CHECK:       bb.1:
+; CHECK-NEXT:    br label [[BB_0]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[PTR]], align 1
+; CHECK-NEXT:    ret i8 [[LOAD]]
+;
+entry:
+  %alloca = alloca [32 x i8], align 4, addrspace(1)
+  %val1 = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1
+  br label %bb.0
+
+bb.0:
+  %ptr = phi ptr addrspace(1) [ %val1, %entry ], [ %val2, %bb.1 ]
+  br i1 %cond, label %bb.1, label %exit
+
+bb.1:
+  %val2 = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2
+  call void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1) %alloca, ptr @g1, i64 256, i1 false)
+  br label %bb.0
+
+exit:
+  %load = load i8, ptr addrspace(1) %ptr
+  ret i8 %load
+}
+
+define i32 @test_memcpy_after_phi(i1 %cond, ptr %ptr) {
+; CHECK-LABEL: @test_memcpy_after_phi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca [32 x i8], align 1
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF:%.*]], label [[JOIN:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    br label [[JOIN]]
+; CHECK:       join:
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr [ [[A]], [[IF]] ], [ [[PTR:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(32) [[PHI]], ptr noundef nonnull align 16 dereferenceable(32) @g1, i64 32, i1 false)
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[PHI]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
+entry:
+  %a = alloca [32 x i8]
+  br i1 %cond, label %if, label %join
+
+if:
+  br label %join
+
+join:
+  %phi = phi ptr [ %a, %if ], [ %ptr, %entry ]
+  call void @llvm.memcpy.p0.p0.i64(ptr %phi, ptr @g1, i64 32, i1 false)
+  %v = load i32, ptr %phi
+  ret i32 %v
+}
+
+define i32 @addrspace_diff_keep_alloca(i1 %cond, ptr %x) {
+; CHECK-LABEL: @addrspace_diff_keep_alloca(
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   [[ALLOCA:%.*]] = alloca [32 x i8], align 1
+; CHECK-NEXT:   call void @llvm.memcpy.p0.p1.i64(ptr noundef nonnull align 1 dereferenceable(32) [[ALLOCA]], ptr addrspace(1) noundef align 16 dereferenceable(32) [[G2:@.*]], i64 32, i1 false)
+; CHECK-NEXT:   br i1 %cond, label [[IF:%.*]], label [[JOIN:%.*]]
+; CHECK:      if:                                               ; preds = %entry
+; CHECK-NEXT:   br label [[JOIN]]
+; CHECK:      join:                                             ; preds = %if, %entry
+; CHECK-NEXT:   [[PHI:%.*]] = phi ptr [ [[ALLOCA]], [[IF]] ], [ [[X:%.*]], %entry ]
+; CHECK-NEXT:   [[VAL:%.*]] = load i32, ptr [[PHI]], align 4
+; CHECK-NEXT:   ret i32 [[VAL]]
+; CHECK-NEXT: }
+;
+entry:
+  %a = alloca [32 x i8]
+  call void @llvm.memcpy.p0.p1.i64(ptr %a, ptr addrspace(1) @g2, i64 32, i1 false)
+  br i1 %cond, label %if, label %join
+
+if:
+  br label %join
+
+join:
+  %phi = phi ptr [ %a, %if ], [ %x, %entry ]
+  %v = load i32, ptr %phi
+  ret i32 %v
+}
+
+declare void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1), ptr, i64, i1)
+declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
+declare void @llvm.memcpy.p0.p1.i64(ptr, ptr addrspace(1), i64, i1)