diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -159,6 +159,9 @@ return false; Register SelReg = Op1->getReg(); + if (SelReg.isPhysical()) + return false; + auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, *MRI, LIS); if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64) return false; @@ -264,13 +267,11 @@ // Try to remove v_cndmask_b32. if (SelLI) { - bool CanRemoveSel = SelLI->Query(CmpIdx.getRegSlot()).isKill(); - if (!CanRemoveSel) { - // Try to shrink the live interval and check for dead def instead. - LIS->shrinkToUses(SelLI, nullptr); - CanRemoveSel = SelLI->Query(SelIdx.getRegSlot()).isDeadDef(); - } - if (CanRemoveSel) { + // Kill status must be checked before shrinking the live range. + bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill(); + LIS->shrinkToUses(SelLI); + bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef(); + if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) { LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n'); LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot()); diff --git a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir --- a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir +++ b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir @@ -272,6 +272,7 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.3 @@ -279,7 +280,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_NOP 0, implicit undef %1:vgpr_32 + ; CHECK-NEXT: S_NOP 0, implicit undef %1 ; CHECK-NEXT: S_BRANCH %bb.4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: @@ -473,3 +474,215 @@ bb.4: ... + +# We would need to extend the live range of %0 to be live out of %bb.2 + +--- +name: register_not_marked_liveout +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + returnsVoid: true +body: | + ; CHECK-LABEL: name: register_not_marked_liveout + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 0 + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[S_MOV_B64_]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 0, [[V_CNDMASK_B32_e64_]], implicit $exec + ; CHECK-NEXT: $exec = S_MOV_B64_term [[V_CMP_GT_I32_e64_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_MOV_B64_term [[V_CMP_GT_I32_e64_]] + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[S_MOV_B64_]], implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16 + + %0:sreg_64_xexec = S_MOV_B64 0 + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec + %2:sreg_64 = V_CMP_GT_I32_e64 0, %1, implicit $exec + $exec = S_MOV_B64_term %2 + + bb.1: + $exec = S_MOV_B64_term %2 + S_CBRANCH_EXECZ %bb.3, implicit $exec + + bb.2: + %3:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec + $vcc = S_AND_B64 $exec, %3, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc + + bb.3: + +... + +# Can't delete V_CNDMASK_B32 when folding into the use in %bb.3 since +# it's also used in %bb.0 +--- +name: cndmask_multiple_uses +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cndmask_multiple_uses + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %1:sreg_64_xexec, implicit $exec + ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.5, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, undef %1:sreg_64_xexec, implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.5, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16 + + %0:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %1:sreg_64_xexec, implicit $exec + %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %0, implicit $exec + + bb.1: + S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + + bb.2: + $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.6, implicit killed $vcc + + bb.3: + %3:sreg_64_xexec = V_CMP_NE_U32_e64 1, %0, implicit $exec + $vcc = S_AND_B64 $exec, %3, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.6, implicit killed $vcc + + bb.5: + + bb.6: + +... + +# The live segment of %1 from V_CMP_GT_I32 needs to be shrunk after the use in %bb.1 is deleted +--- +name: leftover_use_of_selreg_extends_liverange +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: leftover_use_of_selreg_extends_liverange + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 0, [[V_CNDMASK_B32_e64_]], implicit $exec + ; CHECK-NEXT: $exec = S_MOV_B64_term [[V_CMP_GT_I32_e64_]] + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16 + + %0:sreg_64_xexec = IMPLICIT_DEF + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec + %2:sreg_64 = V_CMP_GT_I32_e64 0, %1, implicit $exec + $exec = S_MOV_B64_term %2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + %3:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec + $vcc = S_AND_B64 $exec, %3, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc + + bb.2: + + bb.3: + +... + +--- +name: leftover_use_of_selreg_extends_liverange_subrange +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: leftover_use_of_selreg_extends_liverange_subrange + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: undef %1.sub1:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec + ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 0, %1.sub1, implicit $exec + ; CHECK-NEXT: %1.sub0:vreg_64 = V_MOV_B32_e32 123, implicit $exec + ; CHECK-NEXT: $exec = S_MOV_B64_term [[V_CMP_GT_I32_e64_]] + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_NOP 0, implicit %1.sub0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16 + + %0:sreg_64_xexec = IMPLICIT_DEF + undef %1.sub1:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec + %1.sub0 = V_MOV_B32_e32 123, implicit $exec + %2:sreg_64 = V_CMP_GT_I32_e64 0, %1.sub1, implicit $exec + $exec = S_MOV_B64_term %2 + S_CBRANCH_EXECZ %bb.2, implicit $exec + + bb.1: + %3:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1.sub1, implicit $exec + $vcc = S_AND_B64 $exec, %3, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc + + bb.2: + S_NOP 0, implicit %1.sub0 + + bb.3: + +...