diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -223,9 +223,9 @@ // uses all the registers that are livein to the successor blocks. for (const MachineBasicBlock *Succ : BB->successors()) { for (const auto &LI : Succ->liveins()) { - // TODO: Use LI.LaneMask to refine this. - for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) { - if (!Uses.contains(Unit)) + for (MCRegUnitMaskIterator U(LI.PhysReg, TRI); U.isValid(); ++U) { + auto [Unit, Mask] = *U; + if ((Mask & LI.LaneMask).any() && !Uses.contains(Unit)) Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit)); } } diff --git a/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll b/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll --- a/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll +++ b/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll @@ -10,9 +10,9 @@ ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v3, v2, v2 -; CHECK-NEXT: v_add_f32_e32 v0, v0, v0 ; CHECK-NEXT: s_mov_b64 s[4:5], 0 ; CHECK-NEXT: ; kill: killed $vgpr1 +; CHECK-NEXT: v_add_f32_e32 v0, v0, v0 ; CHECK-NEXT: .LBB0_1: ; %bb1 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_cmp_neq_f32_e32 vcc, 0, v2