Index: llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -39,7 +39,7 @@ MCRegister CondReg; MCRegister ExecReg; - Register optimizeVcndVcmpPair(MachineBasicBlock &MBB); + bool optimizeVcndVcmpPair(MachineBasicBlock &MBB); bool optimizeElseBranch(MachineBasicBlock &MBB); public: @@ -90,8 +90,8 @@ static bool isDefBetween(const SIRegisterInfo &TRI, LiveIntervals *LIS, Register Reg, const MachineInstr &Sel, const MachineInstr &And) { - SlotIndex AndIdx = LIS->getInstructionIndex(And); - SlotIndex SelIdx = LIS->getInstructionIndex(Sel); + SlotIndex AndIdx = LIS->getInstructionIndex(And).getRegSlot(); + SlotIndex SelIdx = LIS->getInstructionIndex(Sel).getRegSlot(); if (Reg.isVirtual()) return isDefBetween(LIS->getInterval(Reg), AndIdx, SelIdx); @@ -119,21 +119,20 @@ // required part of the pattern since V_CNDMASK_B32 writes zeroes for inactive // lanes. // -// Returns %cc register on success. -Register -SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { +// Returns true on success. +bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { auto I = llvm::find_if(MBB.terminators(), [](const MachineInstr &MI) { unsigned Opc = MI.getOpcode(); return Opc == AMDGPU::S_CBRANCH_VCCZ || Opc == AMDGPU::S_CBRANCH_VCCNZ; }); if (I == MBB.terminators().end()) - return Register(); + return false; auto *And = TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister, *I, *MRI, LIS); if (!And || And->getOpcode() != AndOpc || !And->getOperand(1).isReg() || !And->getOperand(2).isReg()) - return Register(); + return false; MachineOperand *AndCC = &And->getOperand(1); Register CmpReg = AndCC->getReg(); @@ -143,49 +142,49 @@ CmpReg = AndCC->getReg(); CmpSubReg = AndCC->getSubReg(); } else if (And->getOperand(2).getReg() != Register(ExecReg)) { - return Register(); + return false; } auto *Cmp = TRI->findReachingDef(CmpReg, CmpSubReg, *And, *MRI, LIS); if (!Cmp || !(Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 || Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) || Cmp->getParent() != And->getParent()) - return Register(); + return false; MachineOperand *Op1 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src0); MachineOperand *Op2 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src1); if (Op1->isImm() && Op2->isReg()) std::swap(Op1, Op2); if (!Op1->isReg() || !Op2->isImm() || Op2->getImm() != 1) - return Register(); + return false; Register SelReg = Op1->getReg(); auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, *MRI, LIS); if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64) - return Register(); + return false; if (TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) || TII->hasModifiersSet(*Sel, AMDGPU::OpName::src1_modifiers)) - return Register(); + return false; Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0); Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1); MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2); if (!Op1->isImm() || !Op2->isImm() || !CC->isReg() || Op1->getImm() != 0 || Op2->getImm() != 1) - return Register(); + return false; Register CCReg = CC->getReg(); // If there was a def between the select and the and, we would need to move it // to fold this. if (isDefBetween(*TRI, LIS, CCReg, *Sel, *And)) - return Register(); + return false; + // TODO: Guard against implicit def operands? LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t' << *And); - LIS->RemoveMachineInstrFromMaps(*And); MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc), And->getOperand(0).getReg()) @@ -196,34 +195,75 @@ MachineOperand &Andn2SCC = Andn2->getOperand(3); assert(Andn2SCC.getReg() == AMDGPU::SCC); Andn2SCC.setIsDead(AndSCC.isDead()); + + SlotIndex AndIdx = LIS->ReplaceMachineInstrInMaps(*And, *Andn2); And->eraseFromParent(); - LIS->InsertMachineInstrInMaps(*Andn2); LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n'); + SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp); + SlotIndex SelIdx = LIS->getInstructionIndex(*Sel); + + LiveInterval *CmpLI = + CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr; + // Try to remove compare. Cmp value should not used in between of cmp // and s_and_b64 if VCC or just unused if any other register. - if ((CmpReg.isVirtual() && MRI->use_nodbg_empty(CmpReg)) || + if ((CmpReg.isVirtual() && CmpLI->Query(AndIdx.getRegSlot()).isKill()) || (CmpReg == Register(CondReg) && std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(), [&](const MachineInstr &MI) { return MI.readsRegister(CondReg, TRI); }))) { LLVM_DEBUG(dbgs() << "Erasing: " << *Cmp << '\n'); - + if (CmpLI) + LIS->removeVRegDefAt(*CmpLI, CmpIdx.getRegSlot()); LIS->RemoveMachineInstrFromMaps(*Cmp); Cmp->eraseFromParent(); + LiveInterval *SelLI = + SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr; // Try to remove v_cndmask_b32. - if (SelReg.isVirtual() && MRI->use_nodbg_empty(SelReg)) { + if (SelLI && SelLI->Query(CmpIdx.getRegSlot()).isKill()) { LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n'); + if (SelLI) + LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot()); LIS->RemoveMachineInstrFromMaps(*Sel); Sel->eraseFromParent(); } } - return CCReg; + if (CCReg.isVirtual()) { + LiveInterval &CCLI = LIS->getInterval(CCReg); + auto CCQ = CCLI.Query(SelIdx.getRegSlot()); + if (CCQ.valueIn()) { + CCLI.addSegment(LiveRange::Segment(SelIdx.getRegSlot(), + AndIdx.getRegSlot(), CCQ.valueIn())); + } + + if (CC->getSubReg()) { + LaneBitmask Mask = TRI->getSubRegIndexLaneMask(CC->getSubReg()); + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + CCLI.refineSubRanges( + Allocator, Mask, + [=](LiveInterval::SubRange &SR) { + auto CCQS = SR.Query(SelIdx.getRegSlot()); + if (CCQS.valueIn()) { + SR.addSegment(LiveRange::Segment( + SelIdx.getRegSlot(), AndIdx.getRegSlot(), CCQS.valueIn())); + } + }, + *LIS->getSlotIndexes(), *TRI); + CCLI.removeEmptySubRanges(); + + SmallVector SplitLIs; + LIS->splitSeparateComponents(CCLI, SplitLIs); + } + } else + LIS->removeAllRegUnitsForPhysReg(CCReg); + + return true; } // Optimize sequence @@ -330,8 +370,7 @@ Changed = true; } - if (Register Reg = optimizeVcndVcmpPair(MBB)) { - RecalcRegs.insert(Reg); + if (optimizeVcndVcmpPair(MBB)) { RecalcRegs.insert(AMDGPU::VCC_LO); RecalcRegs.insert(AMDGPU::VCC_HI); RecalcRegs.insert(AMDGPU::SCC); Index: llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness-wave32.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness-wave32.mir @@ -0,0 +1,172 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# REQUIRES: asserts +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-regalloc -misched-only-block=999 -start-before=machine-scheduler -stop-after=greedy,0 -o - %s | FileCheck %s + +# This run line is a total hack to get the live intervals to make it +# to the verifier. This requires asserts to use +# -misched-only-block. We use the scheduler only because -start-before +# doesn't see si-optimize-exec-masking-pre-ra unless the scheduler is +# part of the pass pipeline. + +--- +name: subreg_value_undef +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: subreg_value_undef + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), align 8, addrspace 1) + ; CHECK-NEXT: undef %2.sub1:sgpr_128 = S_MOV_B32 -1 + ; CHECK-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, undef %2.sub0, implicit-def dead $scc + ; CHECK-NEXT: %2.sub1:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub0 + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: S_NOP 0, implicit %2.sub1 + bb.0: + liveins: $sgpr0_sgpr1 + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), align 8, addrspace 1) + undef %2.sub1:sgpr_128 = S_MOV_B32 -1 + %3:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %2.sub0, implicit $exec + %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %3, implicit $exec + $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc + %2.sub1:sgpr_128 = COPY %1.sub0 + S_CBRANCH_VCCNZ %bb.1, implicit $vcc + + bb.1: + S_NOP 0, implicit %2.sub1 +... + +--- +name: needs_distribute_0 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: needs_distribute_0 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), align 8, addrspace 1) + ; CHECK-NEXT: undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1 + ; CHECK-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %2.sub0, implicit-def dead $scc + ; CHECK-NEXT: dead %2.sub1:sreg_64_xexec = COPY [[S_LOAD_DWORDX4_IMM]].sub0 + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + bb.0: + liveins: $sgpr0_sgpr1 + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), align 8, addrspace 1) + undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1 + %3:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %2.sub0, implicit $exec + %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %3, implicit $exec + $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc + %2.sub1:sreg_64_xexec = COPY %1.sub0 + S_CBRANCH_VCCNZ %bb.1, implicit $vcc + + bb.1: +... + +--- +name: needs_distribute_1 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: needs_distribute_1 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), align 8, addrspace 1) + ; CHECK-NEXT: undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1 + ; CHECK-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %2.sub0, implicit-def dead $scc + ; CHECK-NEXT: %2.sub1:sreg_64_xexec = COPY [[S_LOAD_DWORDX4_IMM]].sub0 + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: S_NOP 0, implicit %2.sub1 + bb.0: + liveins: $sgpr0_sgpr1 + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), align 8, addrspace 1) + undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1 + %3:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %2.sub0, implicit $exec + %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %3, implicit $exec + $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc + %2.sub1:sreg_64_xexec = COPY %1.sub0 + S_CBRANCH_VCCNZ %bb.1, implicit $vcc + + bb.1: + S_NOP 0, implicit %2.sub1 +... + +--- +name: needs_distribute_2 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: needs_distribute_2 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), align 8, addrspace 1) + ; CHECK-NEXT: undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1 + ; CHECK-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %2.sub0, implicit-def dead $scc + ; CHECK-NEXT: %2.sub1:sreg_64_xexec = COPY [[S_LOAD_DWORDX4_IMM]].sub0 + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: S_NOP 0, implicit %2 + bb.0: + liveins: $sgpr0_sgpr1 + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), align 8, addrspace 1) + undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1 + %3:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %2.sub0, implicit $exec + %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %3, implicit $exec + $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc + %2.sub1:sreg_64_xexec = COPY %1.sub0 + S_CBRANCH_VCCNZ %bb.1, implicit $vcc + + bb.1: + S_NOP 0, implicit %2 +... + +--- +name: needs_distribute_3 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: needs_distribute_3 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), align 8, addrspace 1) + ; CHECK-NEXT: undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1 + ; CHECK-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %2.sub0, implicit-def dead $scc + ; CHECK-NEXT: %2.sub1:sreg_64_xexec = COPY [[S_LOAD_DWORDX4_IMM]].sub0 + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: S_NOP 0, implicit %2.sub0 + bb.0: + liveins: $sgpr0_sgpr1 + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), align 8, addrspace 1) + undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1 + %3:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %2.sub0, implicit $exec + %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %3, implicit $exec + $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc + %2.sub1:sreg_64_xexec = COPY %1.sub0 + S_CBRANCH_VCCNZ %bb.1, implicit $vcc + + bb.1: + S_NOP 0, implicit %2.sub0 +... Index: llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir @@ -0,0 +1,331 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-regalloc -start-before=machine-scheduler -stop-after=greedy,0 -o - %s | FileCheck %s + +# Make sure liveness is correctly updated when folding the cndmask and +# compare. + +--- +name: cndmask_same_block +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cndmask_same_block + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc + ; CHECK-NEXT: S_BRANCH %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_BRANCH %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + bb.0: + %0:sreg_64_xexec = IMPLICIT_DEF + S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc + + bb.1: + S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc + S_BRANCH %bb.4 + + bb.2: + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec + %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec + $vcc = S_AND_B64 $exec, %2, implicit-def $scc + S_CBRANCH_VCCNZ %bb.4, implicit $vcc + S_BRANCH %bb.3 + + bb.3: + S_BRANCH %bb.4 + + bb.4: +... + +--- +name: cndmask_separate_block +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cndmask_separate_block + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc + ; CHECK-NEXT: S_BRANCH %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_BRANCH %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + bb.0: + %0:sreg_64_xexec = IMPLICIT_DEF + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec + S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc + + bb.1: + S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc + S_BRANCH %bb.4 + + bb.2: + %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec + $vcc = S_AND_B64 $exec, %2, implicit-def $scc + S_CBRANCH_VCCNZ %bb.4, implicit $vcc + S_BRANCH %bb.3 + + bb.3: + S_BRANCH %bb.4 + + bb.4: +... + +--- +name: cndmask_same_block_other_cmpreg_use +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cndmask_same_block_other_cmpreg_use + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc + ; CHECK-NEXT: S_BRANCH %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec + ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec + ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc + ; CHECK-NEXT: S_NOP 0, implicit [[V_CMP_NE_U32_e64_]] + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_BRANCH %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + bb.0: + %0:sreg_64_xexec = IMPLICIT_DEF + S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc + + bb.1: + S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc + S_BRANCH %bb.4 + + bb.2: + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec + %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec + S_NOP 0, implicit %2 + $vcc = S_AND_B64 $exec, %2, implicit-def $scc + S_CBRANCH_VCCNZ %bb.4, implicit $vcc + S_BRANCH %bb.3 + + bb.3: + S_BRANCH %bb.4 + + bb.4: +... + +--- +name: cndmask_same_block_liveout_use +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cndmask_same_block_liveout_use + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc + ; CHECK-NEXT: S_BRANCH %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec + ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: S_BRANCH %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + bb.0: + %0:sreg_64_xexec = IMPLICIT_DEF + S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc + + bb.1: + S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc + S_BRANCH %bb.4 + + bb.2: + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec + %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec + $vcc = S_AND_B64 $exec, %2, implicit-def $scc + S_CBRANCH_VCCNZ %bb.4, implicit $vcc + S_BRANCH %bb.3 + + bb.3: + S_NOP 0, implicit %1 + S_BRANCH %bb.4 + + bb.4: +... + +# The legality check for removing the compare used to rely on +# use_nodbg_empty, which fails on the undef use of %2. We would then +# fail to update the interval correctly. + +--- +name: cmp_reg_extra_undef_use +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cmp_reg_extra_undef_use + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, undef %1:sreg_64_xexec, implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, undef %2:sreg_64_xexec, implicit-def dead $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + bb.0: + %0:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %1:sreg_64_xexec, implicit $exec + %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %0, implicit $exec + $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc + + bb.1: + $vcc = S_AND_B64 $exec, undef %2, implicit-def dead $scc + + bb.3: + +... + +# use_nodbg_empty is insufficient for erasing %1's def when removing +# V_CNDMASK_B32. + +--- +name: cndmask_undef_extra_use +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cndmask_undef_extra_use + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc + ; CHECK-NEXT: S_BRANCH %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_NOP 0, implicit undef %1:vgpr_32 + ; CHECK-NEXT: S_BRANCH %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + bb.0: + %0:sreg_64_xexec = IMPLICIT_DEF + S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc + + bb.1: + S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc + S_BRANCH %bb.4 + + bb.2: + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec + %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec + $vcc = S_AND_B64 $exec, %2, implicit-def $scc + S_CBRANCH_VCCNZ %bb.4, implicit $vcc + S_BRANCH %bb.3 + + bb.3: + S_NOP 0, implicit undef %1 + S_BRANCH %bb.4 + + bb.4: +... + +--- +name: cndmask_is_undef +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_64_xexec = S_MOV_B64 0 + S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc + + bb.1: + S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc + S_BRANCH %bb.4 + + bb.2: + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %0, implicit $exec + %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec + $vcc = S_AND_B64 $exec, %2, implicit-def $scc + S_CBRANCH_VCCNZ %bb.4, implicit $vcc + S_BRANCH %bb.3 + + bb.3: + S_BRANCH %bb.4 + + bb.4: +...