Index: llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -221,14 +221,11 @@ LIS->RemoveMachineInstrFromMaps(*Cmp); Cmp->eraseFromParent(); - LiveInterval *SelLI = - SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr; // Try to remove v_cndmask_b32. - if (SelLI && SelLI->Query(CmpIdx.getRegSlot()).isKill()) { - LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n'); - - if (SelLI) - LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot()); + if (SelReg.isVirtual() && MRI->use_nodbg_empty(SelReg)) { + LLVM_DEBUG(dbgs() << "Erasing: " << *Sel); + LiveInterval &SelLI = LIS->getInterval(SelReg); + LIS->removeVRegDefAt(SelLI, SelIdx.getRegSlot()); LIS->RemoveMachineInstrFromMaps(*Sel); Sel->eraseFromParent(); } Index: llvm/test/CodeGen/AMDGPU/no-live-segment-at-use.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/no-live-segment-at-use.mir @@ -0,0 +1,64 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-regalloc -start-before=machine-scheduler -stop-after=greedy,0 -o - %s | FileCheck %s + + +# Can't delete V_CNDMASK_B32 when folding into the use in %bb.3 since +# it's also used in %bb.0 +--- +name: cndmask_multiple_uses +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cndmask_multiple_uses + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %1:sreg_64_xexec, implicit $exec + ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.5, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.5, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16 + + %0:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %1:sreg_64_xexec, implicit $exec + %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %0, implicit $exec + + bb.1: + S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + + bb.2: + $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.6, implicit killed $vcc + + bb.3: + %3:sreg_64_xexec = V_CMP_NE_U32_e64 1, %0, implicit $exec + $vcc = S_AND_B64 $exec, %3, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.6, implicit killed $vcc + + bb.5: + + bb.6: + +... Index: llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir +++ llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir @@ -182,7 +182,8 @@ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec - ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc + ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def $scc ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} @@ -272,14 +273,16 @@ ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec + ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def $scc ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_NOP 0, implicit undef %1:vgpr_32 + ; CHECK-NEXT: S_NOP 0, implicit undef [[V_CNDMASK_B32_e64_]] ; CHECK-NEXT: S_BRANCH %bb.4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: @@ -355,3 +358,64 @@ bb.4: ... + +# Can't delete V_CNDMASK_B32 when folding into the use in %bb.3 since +# it's also used in %bb.0 +--- +name: cndmask_multiple_uses +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cndmask_multiple_uses + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %1:sreg_64_xexec, implicit $exec + ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.5, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.5, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16 + + %0:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %1:sreg_64_xexec, implicit $exec + %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %0, implicit $exec + + bb.1: + S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + + bb.2: + $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.6, implicit killed $vcc + + bb.3: + %3:sreg_64_xexec = V_CMP_NE_U32_e64 1, %0, implicit $exec + $vcc = S_AND_B64 $exec, %3, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.6, implicit killed $vcc + + bb.5: + + bb.6: + +...