Index: lib/CodeGen/ScheduleDAGInstrs.cpp =================================================================== --- lib/CodeGen/ScheduleDAGInstrs.cpp +++ lib/CodeGen/ScheduleDAGInstrs.cpp @@ -400,6 +400,18 @@ // earlier instruction. KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask; + if (MO.getSubReg() != 0 && MO.isUndef()) { + // There may be other subregister defs on the same instruction of the same + // register in later operands. The lanes of other defs will now be live + // after this instruction, so these should not be treated as killd by the + // instruction even though they appear to be killed in this one operand. + for (int I = OperIdx + 1, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &OtherMO = MI->getOperand(I); + if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg) + KillLaneMask &= ~getLaneMaskForMO(OtherMO); + } + } + // Clear undef flag, we'll re-add it later once we know which subregister // Def is first. MO.setIsUndef(false); Index: test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s + +# Deciding which lanes are killed needs to account for other defs in the +# instruction. +# +# addVRegDefDeps would encounter the %0.sub0 def and erase %0 from +# current vreg uses because it shared no lanes with %0.sub1 use on the +# nop. It then didn't see the lanemask when it reached the second +# subreg def, and failed to add the necessary dependency between the +# asm and S_NOP + +--- +name: no_live_subrange_at_use +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; CHECK-LABEL: name: no_live_subrange_at_use + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: %0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3) + ; CHECK: INLINEASM &"", 1, 851978, def %0, 2147549193, %0(tied-def 3) + ; CHECK: INLINEASM &"", 1, 851977, [[DS_READ_B32_gfx9_]] + ; CHECK: INLINEASM &"", 1, 851978, def undef %0.sub0, 851978, def undef %0.sub1 + ; CHECK: S_NOP 0, implicit %0.sub1 + ; CHECK: $sgpr10 = S_MOV_B32 -1 + ; CHECK: S_BRANCH %bb.1 + bb.0: + undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec + %0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + + bb.1: + %2:vgpr_32 = DS_READ_B32_gfx9 %1, 0, 0, implicit $exec :: (load 4, addrspace 3) + INLINEASM &"", 1, 851978, def %0, 2147549193, %0(tied-def 3) + INLINEASM &"", 1, 851977, %2 + INLINEASM &"", 1, 851978, def undef %0.sub0, 851978, def %0.sub1 + S_NOP 0, implicit %0.sub1 + $sgpr10 = S_MOV_B32 -1 + S_BRANCH %bb.1 + +... + +# Different operand order +--- +name: no_live_subrange_at_use_swap +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; CHECK-LABEL: name: no_live_subrange_at_use_swap + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: %0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3) + ; CHECK: INLINEASM &"", 1, 851978, def %0, 2147549193, %0(tied-def 3) + ; CHECK: INLINEASM &"", 1, 851977, [[DS_READ_B32_gfx9_]] + ; CHECK: INLINEASM &"", 1, 851978, def undef %0.sub1, 851978, def undef %0.sub0 + ; CHECK: S_NOP 0, implicit %0.sub1 + ; CHECK: $sgpr10 = S_MOV_B32 -1 + ; CHECK: S_BRANCH %bb.1 + bb.0: + undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec + %0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + + bb.1: + %2:vgpr_32 = DS_READ_B32_gfx9 %1, 0, 0, implicit $exec :: (load 4, addrspace 3) + INLINEASM &"", 1, 851978, def %0, 2147549193, %0(tied-def 3) + INLINEASM &"", 1, 851977, %2 + INLINEASM &"", 1, 851978, def %0.sub1, 851978, def undef %0.sub0 + S_NOP 0, implicit %0.sub1 + $sgpr10 = S_MOV_B32 -1 + S_BRANCH %bb.1 + +...