diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -318,38 +318,63 @@
   LLVM_DEBUG(dbgs() << "markDefs " << PrintState(Flag) << ": " << UseMI);
 
   LiveQueryResult UseLRQ = LR.Query(LIS->getInstructionIndex(UseMI));
-  if (!UseLRQ.valueIn())
+  const VNInfo *Value = UseLRQ.valueIn();
+  if (!Value)
     return;
 
   // Note: this code assumes that lane masks on AMDGPU completely
   // cover registers.
+  const LaneBitmask UseLanes =
+      SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
+             : (Reg.isVirtual() ? MRI->getMaxLaneMaskForVReg(Reg)
+                                : LaneBitmask::getNone());
+
+  // Perform a depth-first iteration of the LiveRange graph marking defs.
+  // Stop processing of a given branch when all use lanes have been defined.
+  // The first definition stops processing for a physical register.
+  struct PhiEntry {
+    const VNInfo *Phi;
+    unsigned PredIdx;
+    unsigned VisitIdx;
+    LaneBitmask DefinedLanes;
+
+    PhiEntry(const VNInfo *Phi, unsigned PredIdx, unsigned VisitIdx,
+             LaneBitmask DefinedLanes)
+        : Phi(Phi), PredIdx(PredIdx), VisitIdx(VisitIdx),
+          DefinedLanes(DefinedLanes) {}
+  };
+  SmallSetVector<const VNInfo *, 4> Visited;
+  SmallVector<PhiEntry, 2> PhiStack;
   LaneBitmask DefinedLanes;
-  LaneBitmask UseLanes;
-  if (SubReg) {
-    UseLanes = TRI->getSubRegIndexLaneMask(SubReg);
-  } else if (Reg.isVirtual()) {
-    UseLanes = MRI->getMaxLaneMaskForVReg(Reg);
-  }
-
-  SmallPtrSet<const VNInfo *, 4> Visited;
-  SmallVector<const VNInfo *, 4> ToProcess;
-  ToProcess.push_back(UseLRQ.valueIn());
+  unsigned NextPredIdx; // Only used for processing phi nodes
   do {
-    const VNInfo *Value = ToProcess.pop_back_val();
-    Visited.insert(Value);
+    const VNInfo *NextValue = nullptr;
+
+    if (!Visited.count(Value)) {
+      Visited.insert(Value);
+      // On first visit to a phi then start processing first predecessor
+      NextPredIdx = 0;
+    }
 
     if (Value->isPHIDef()) {
-      // Need to mark all defs used in the PHI node
+      // Each predecessor node in the phi must be processed as a subgraph
       const MachineBasicBlock *MBB = LIS->getMBBFromIndex(Value->def);
       assert(MBB && "Phi-def has no defining MBB");
-      for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
-                                                  PE = MBB->pred_end();
-           PI != PE; ++PI) {
+
+      // Find next predecessor to process
+      unsigned Idx = NextPredIdx;
+      auto PI = MBB->pred_begin() + Idx;
+      auto PE = MBB->pred_end();
+      for (; PI != PE && !NextValue; ++PI, ++Idx) {
         if (const VNInfo *VN = LR.getVNInfoBefore(LIS->getMBBEndIdx(*PI))) {
           if (!Visited.count(VN))
-            ToProcess.push_back(VN);
+            NextValue = VN;
         }
       }
+
+      // If there are more predecessors to process; add phi to stack
+      if (PI != PE)
+        PhiStack.emplace_back(Value, Idx, Visited.size(), DefinedLanes);
     } else {
       MachineInstr *MI = LIS->getInstructionFromIndex(Value->def);
       assert(MI && "Def has no defining instruction");
@@ -370,17 +395,20 @@
           // Record if this instruction defined any of use
           HasDef |= Overlap.any();
 
-          // Check if all lanes of use have been defined
+          // Mark any lanes defined
           DefinedLanes |= OpLanes;
-          if ((DefinedLanes & UseLanes) != UseLanes) {
-            // Definition not complete; need to process input value
-            LiveQueryResult LRQ = LR.Query(LIS->getInstructionIndex(*MI));
-            if (const VNInfo *VN = LRQ.valueIn()) {
-              if (!Visited.count(VN))
-                ToProcess.push_back(VN);
-            }
+        }
+
+        // Check if all lanes of use have been defined
+        if ((DefinedLanes & UseLanes) != UseLanes) {
+          // Definition not complete; need to process input value
+          LiveQueryResult LRQ = LR.Query(LIS->getInstructionIndex(*MI));
+          if (const VNInfo *VN = LRQ.valueIn()) {
+            if (!Visited.count(VN))
+              NextValue = VN;
           }
         }
+
         // Only mark the instruction if it defines some part of the use
         if (HasDef)
           markInstruction(*MI, Flag, Worklist);
@@ -389,9 +417,21 @@
         markInstruction(*MI, Flag, Worklist);
       }
     }
-  } while (!ToProcess.empty());
 
-  assert(!Reg.isVirtual() || ((DefinedLanes & UseLanes) == UseLanes));
+    if (!NextValue && !PhiStack.empty()) {
+      // Reach end of chain; revert to processing last phi
+      PhiEntry &Entry = PhiStack.back();
+      NextValue = Entry.Phi;
+      NextPredIdx = Entry.PredIdx;
+      DefinedLanes = Entry.DefinedLanes;
+      // Rewind visited set to correct state
+      while (Visited.size() > Entry.VisitIdx)
+        Visited.pop_back();
+      PhiStack.pop_back();
+    }
+
+    Value = NextValue;
+  } while (Value);
 }
 
 void SIWholeQuadMode::markOperand(const MachineInstr &MI,
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -859,6 +859,10 @@
 ; CHECK-NEXT: ; %entry
 ; CHECK-NEXT: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
 ; CHECK: s_wqm_b64 exec, exec
+; CHECK: v_mov
+; CHECK: v_mov
+; CHECK: v_mov
+; CHECK: v_mov
 ; CHECK: s_and_b64 exec, exec, [[LIVE]]
 ; CHECK: image_store
 ; CHECK: s_wqm_b64 exec, exec
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir
--- a/llvm/test/CodeGen/AMDGPU/wqm.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -259,3 +259,42 @@
     $vgpr1 = STRICT_WWM %3.sub1:vreg_64, implicit $exec
     SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
 ...
+
+---
+# Check that WQM marking occurs correctly through phi nodes in live range graph.
+# If not then initial V_MOV will not be in WQM.
+#
+#CHECK-LABEL: name: test_wqm_lr_phi
+#CHECK: COPY $exec
+#CHECK-NEXT: S_WQM
+#CHECK-NEXT: V_MOV_B32_e32 -10
+#CHECK-NEXT: V_MOV_B32_e32 0
+name:            test_wqm_lr_phi
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    undef %0.sub0:vreg_64 = V_MOV_B32_e32 -10, implicit $exec
+    %0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    %1:sreg_64 = S_GETPC_B64
+    %2:sgpr_256 = S_LOAD_DWORDX8_IMM %1:sreg_64, 32, 0, 0
+
+   bb.1:
+    $vcc = V_CMP_LT_U32_e64 4, 4, implicit $exec
+    S_CBRANCH_VCCNZ %bb.3, implicit $vcc
+    S_BRANCH %bb.2
+
+   bb.2:
+    %0.sub0:vreg_64 = V_ADD_U32_e32 1, %0.sub1, implicit $exec
+    S_BRANCH %bb.3
+
+   bb.3:
+    %0.sub1:vreg_64 = V_ADD_U32_e32 1, %0.sub1, implicit $exec
+    S_BRANCH %bb.4
+
+   bb.4:
+    %3:sgpr_128 = IMPLICIT_DEF
+    %4:vreg_128 = IMAGE_SAMPLE_V4_V2 %0:vreg_64, %2:sgpr_256, %3:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+    $vgpr0 = COPY %4.sub0:vreg_128
+    $vgpr1 = COPY %4.sub1:vreg_128
+    SI_RETURN_TO_EPILOG $vgpr0, $vgpr1
+...