diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp --- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -541,7 +541,7 @@ MachineSSAUpdater SSAUpdater(*MF); LoopFinder LF(*DT, *PDT); PhiIncomingAnalysis PIA(*PDT); - SmallVector DeadPhis; + SmallVector Vreg1Phis; SmallVector IncomingBlocks; SmallVector IncomingRegs; SmallVector IncomingUpdated; @@ -550,118 +550,117 @@ #endif for (MachineBasicBlock &MBB : *MF) { - LF.initialize(MBB); - for (MachineInstr &MI : MBB.phis()) { - Register DstReg = MI.getOperand(0).getReg(); - if (!isVreg1(DstReg)) - continue; + if (isVreg1(MI.getOperand(0).getReg())) + Vreg1Phis.push_back(&MI); + } + } - LLVM_DEBUG(dbgs() << "Lower PHI: " << MI); + MachineBasicBlock *PrevMBB = nullptr; + for (MachineInstr *MI : Vreg1Phis) { + MachineBasicBlock &MBB = *MI->getParent(); + if (&MBB != PrevMBB) { + LF.initialize(MBB); + PrevMBB = &MBB; + } - MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass - : &AMDGPU::SReg_64RegClass); + LLVM_DEBUG(dbgs() << "Lower PHI: " << *MI); - // Collect incoming values. - for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { - assert(i + 1 < MI.getNumOperands()); - Register IncomingReg = MI.getOperand(i).getReg(); - MachineBasicBlock *IncomingMBB = MI.getOperand(i + 1).getMBB(); - MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg); - - if (IncomingDef->getOpcode() == AMDGPU::COPY) { - IncomingReg = IncomingDef->getOperand(1).getReg(); - assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg)); - assert(!IncomingDef->getOperand(1).getSubReg()); - } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) { - continue; - } else { - assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg)); - } + Register DstReg = MI->getOperand(0).getReg(); + MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass + : &AMDGPU::SReg_64RegClass); + + // Collect incoming values. + for (unsigned i = 1; i < MI->getNumOperands(); i += 2) { + assert(i + 1 < MI->getNumOperands()); + Register IncomingReg = MI->getOperand(i).getReg(); + MachineBasicBlock *IncomingMBB = MI->getOperand(i + 1).getMBB(); + MachineInstr *IncomingDef = MRI->getUniqueVRegDef(IncomingReg); - IncomingBlocks.push_back(IncomingMBB); - IncomingRegs.push_back(IncomingReg); + if (IncomingDef->getOpcode() == AMDGPU::COPY) { + IncomingReg = IncomingDef->getOperand(1).getReg(); + assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg)); + assert(!IncomingDef->getOperand(1).getSubReg()); + } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) { + continue; + } else { + assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg)); } + IncomingBlocks.push_back(IncomingMBB); + IncomingRegs.push_back(IncomingReg); + } + #ifndef NDEBUG - PhiRegisters.insert(DstReg); + PhiRegisters.insert(DstReg); #endif - // Phis in a loop that are observed outside the loop receive a simple but - // conservatively correct treatment. - std::vector DomBlocks = {&MBB}; - for (MachineInstr &Use : MRI->use_instructions(DstReg)) - DomBlocks.push_back(Use.getParent()); + // Phis in a loop that are observed outside the loop receive a simple but + // conservatively correct treatment. + std::vector DomBlocks = {&MBB}; + for (MachineInstr &Use : MRI->use_instructions(DstReg)) + DomBlocks.push_back(Use.getParent()); - MachineBasicBlock *PostDomBound = - PDT->findNearestCommonDominator(DomBlocks); - unsigned FoundLoopLevel = LF.findLoop(PostDomBound); - - SSAUpdater.Initialize(DstReg); - - if (FoundLoopLevel) { - LF.addLoopEntries(FoundLoopLevel, SSAUpdater, IncomingBlocks); + MachineBasicBlock *PostDomBound = + PDT->findNearestCommonDominator(DomBlocks); + unsigned FoundLoopLevel = LF.findLoop(PostDomBound); - for (unsigned i = 0; i < IncomingRegs.size(); ++i) { - IncomingUpdated.push_back(createLaneMaskReg(*MF)); - SSAUpdater.AddAvailableValue(IncomingBlocks[i], - IncomingUpdated.back()); - } + SSAUpdater.Initialize(DstReg); - for (unsigned i = 0; i < IncomingRegs.size(); ++i) { - MachineBasicBlock &IMBB = *IncomingBlocks[i]; - buildMergeLaneMasks( - IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i], - SSAUpdater.GetValueInMiddleOfBlock(&IMBB), IncomingRegs[i]); - } - } else { - // The phi is not observed from outside a loop. Use a more accurate - // lowering. - PIA.analyze(MBB, IncomingBlocks); - - for (MachineBasicBlock *MBB : PIA.predecessors()) - SSAUpdater.AddAvailableValue(MBB, insertUndefLaneMask(*MBB)); - - for (unsigned i = 0; i < IncomingRegs.size(); ++i) { - MachineBasicBlock &IMBB = *IncomingBlocks[i]; - if (PIA.isSource(IMBB)) { - IncomingUpdated.push_back(0); - SSAUpdater.AddAvailableValue(&IMBB, IncomingRegs[i]); - } else { - IncomingUpdated.push_back(createLaneMaskReg(*MF)); - SSAUpdater.AddAvailableValue(&IMBB, IncomingUpdated.back()); - } - } + if (FoundLoopLevel) { + LF.addLoopEntries(FoundLoopLevel, SSAUpdater, IncomingBlocks); - for (unsigned i = 0; i < IncomingRegs.size(); ++i) { - if (!IncomingUpdated[i]) - continue; + for (unsigned i = 0; i < IncomingRegs.size(); ++i) { + IncomingUpdated.push_back(createLaneMaskReg(*MF)); + SSAUpdater.AddAvailableValue(IncomingBlocks[i], + IncomingUpdated.back()); + } - MachineBasicBlock &IMBB = *IncomingBlocks[i]; - buildMergeLaneMasks( - IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i], - SSAUpdater.GetValueInMiddleOfBlock(&IMBB), IncomingRegs[i]); + for (unsigned i = 0; i < IncomingRegs.size(); ++i) { + MachineBasicBlock &IMBB = *IncomingBlocks[i]; + buildMergeLaneMasks( + IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i], + SSAUpdater.GetValueInMiddleOfBlock(&IMBB), IncomingRegs[i]); + } + } else { + // The phi is not observed from outside a loop. Use a more accurate + // lowering. + PIA.analyze(MBB, IncomingBlocks); + + for (MachineBasicBlock *MBB : PIA.predecessors()) + SSAUpdater.AddAvailableValue(MBB, insertUndefLaneMask(*MBB)); + + for (unsigned i = 0; i < IncomingRegs.size(); ++i) { + MachineBasicBlock &IMBB = *IncomingBlocks[i]; + if (PIA.isSource(IMBB)) { + IncomingUpdated.push_back(0); + SSAUpdater.AddAvailableValue(&IMBB, IncomingRegs[i]); + } else { + IncomingUpdated.push_back(createLaneMaskReg(*MF)); + SSAUpdater.AddAvailableValue(&IMBB, IncomingUpdated.back()); } } - unsigned NewReg = SSAUpdater.GetValueInMiddleOfBlock(&MBB); - if (NewReg != DstReg) { - MRI->replaceRegWith(NewReg, DstReg); + for (unsigned i = 0; i < IncomingRegs.size(); ++i) { + if (!IncomingUpdated[i]) + continue; - // Ensure that DstReg has a single def and mark the old PHI node for - // deletion. - MI.getOperand(0).setReg(NewReg); - DeadPhis.push_back(&MI); + MachineBasicBlock &IMBB = *IncomingBlocks[i]; + buildMergeLaneMasks( + IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i], + SSAUpdater.GetValueInMiddleOfBlock(&IMBB), IncomingRegs[i]); } - - IncomingBlocks.clear(); - IncomingRegs.clear(); - IncomingUpdated.clear(); } - for (MachineInstr *MI : DeadPhis) + unsigned NewReg = SSAUpdater.GetValueInMiddleOfBlock(&MBB); + if (NewReg != DstReg) { + MRI->replaceRegWith(NewReg, DstReg); MI->eraseFromParent(); - DeadPhis.clear(); + } + + IncomingBlocks.clear(); + IncomingRegs.clear(); + IncomingUpdated.clear(); } } diff --git a/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir b/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/i1_copy_phi_with_phi_incoming_value.mir @@ -0,0 +1,140 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass=si-i1-copies %s -o - | FileCheck -check-prefix=GCN %s +--- + +name: kernel_i1_copy_phi_with_phi_incoming_value +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: kernel_i1_copy_phi_with_phi_incoming_value + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GCN: liveins: $vgpr0, $sgpr4_sgpr5 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 0, 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4) + ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORD_IMM]] + ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) + ; GCN: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[COPY1]](s32), [[S_LOAD_DWORD_IMM]], implicit $exec + ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; GCN: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_I32_e64_]], %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GCN: S_BRANCH %bb.1 + ; GCN: bb.1: + ; GCN: successors: %bb.6(0x80000000) + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GCN: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY3]], killed [[S_MOV_B32_]], 0, implicit $exec + ; GCN: [[V_CMP_GE_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GE_I32_e64 [[V_ADD_U32_e64_]], [[COPY2]], implicit $exec + ; GCN: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY [[V_CMP_GE_I32_e64_]] + ; GCN: S_BRANCH %bb.6 + ; GCN: bb.2: + ; GCN: successors: %bb.5(0x80000000) + ; GCN: [[PHI:%[0-9]+]]:sreg_64 = PHI %15, %bb.6 + ; GCN: SI_END_CF [[PHI]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GCN: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; GCN: [[COPY5:%[0-9]+]]:sreg_64 = COPY $exec + ; GCN: S_BRANCH %bb.5 + ; GCN: bb.3: + ; GCN: successors: %bb.4(0x40000000), %bb.7(0x40000000) + ; GCN: ATOMIC_FENCE 5, 2 + ; GCN: S_BARRIER + ; GCN: ATOMIC_FENCE 4, 2 + ; GCN: [[COPY6:%[0-9]+]]:sreg_64 = COPY %18 + ; GCN: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF [[COPY6]], %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GCN: S_BRANCH %bb.4 + ; GCN: bb.4: + ; GCN: successors: %bb.7(0x80000000) + ; GCN: S_BRANCH %bb.7 + ; GCN: bb.5: + ; GCN: successors: %bb.3(0x80000000) + ; GCN: [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, [[COPY5]], %bb.2 + ; GCN: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GCN: S_BRANCH %bb.3 + ; GCN: bb.6: + ; GCN: successors: %bb.2(0x40000000), %bb.6(0x40000000) + ; GCN: [[PHI2:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_1]], %bb.1, %15, %bb.6 + ; GCN: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[COPY4]] + ; GCN: [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK [[COPY7]], [[PHI2]], implicit-def dead $scc + ; GCN: SI_LOOP [[SI_IF_BREAK]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GCN: S_BRANCH %bb.2 + ; GCN: bb.7: + ; GCN: SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GCN: S_ENDPGM 0 + bb.0: + successors: %bb.1, %bb.5 + liveins: $vgpr0, $sgpr4_sgpr5 + + %1:sgpr_64(p4) = COPY $sgpr4_sgpr5 + %2:vgpr_32(s32) = COPY $vgpr0 + %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %1:sgpr_64(p4), 0, 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4) + %8:sreg_32 = COPY %3:sreg_32_xm0_xexec + %14:vgpr_32 = COPY %2:vgpr_32(s32) + %9:sreg_64 = V_CMP_LT_I32_e64 %2:vgpr_32(s32), %3:sreg_32_xm0_xexec, implicit $exec + %4:sreg_64 = S_MOV_B64 0 + %17:vreg_1 = COPY %4:sreg_64, implicit $exec + %16:sreg_64 = SI_IF killed %9:sreg_64, %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + ; predecessors: %bb.0 + successors: %bb.6 + + %10:sreg_32 = S_MOV_B32 16 + %18:vgpr_32 = V_ADD_U32_e64 %14:vgpr_32, killed %10:sreg_32, 0, implicit $exec + %11:sreg_64 = V_CMP_GE_I32_e64 %18:vgpr_32, %8:sreg_32, implicit $exec + %12:sreg_64 = S_MOV_B64 0 + %19:vreg_1 = COPY %11:sreg_64 + S_BRANCH %bb.6 + + bb.2: + ; predecessors: %bb.6 + successors: %bb.5 + + %20:sreg_64 = PHI %6:sreg_64, %bb.6 + SI_END_CF %20:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + %15:sreg_64 = S_MOV_B64 -1 + %21:vreg_1 = COPY %15:sreg_64, implicit $exec + S_BRANCH %bb.5 + + bb.3: + ; predecessors: %bb.5 + successors: %bb.4, %bb.7 + + %22:vreg_1 = PHI %7:vreg_1, %bb.5 + ATOMIC_FENCE 5, 2 + S_BARRIER + ATOMIC_FENCE 4, 2 + %23:sreg_64 = COPY %22:vreg_1 + %24:sreg_64 = SI_IF %23:sreg_64, %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.4 + + bb.4: + ; predecessors: %bb.3 + successors: %bb.7 + + S_BRANCH %bb.7 + + bb.5: + ; predecessors: %bb.0, %bb.2 + successors: %bb.3 + + %7:vreg_1 = PHI %17:vreg_1, %bb.0, %21:vreg_1, %bb.2 + SI_END_CF %16:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.3 + + bb.6: + ; predecessors: %bb.1, %bb.6 + successors: %bb.2, %bb.6 + + %5:sreg_64 = PHI %12:sreg_64, %bb.1, %6:sreg_64, %bb.6 + %13:sreg_64 = COPY %19:vreg_1 + %6:sreg_64 = SI_IF_BREAK %13:sreg_64, %5:sreg_64, implicit-def dead $scc + SI_LOOP %6:sreg_64, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.7: + ; predecessors: %bb.3, %bb.4 + + SI_END_CF %24:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_ENDPGM 0 + +...