diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -34,11 +34,9 @@ bool Commuted; FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, - bool Commuted_ = false, - int ShrinkOp = -1) : - UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo), - Kind(FoldOp->getType()), - Commuted(Commuted_) { + bool Commuted_ = false, int ShrinkOp = -1) + : UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo), + Kind(FoldOp->getType()), Commuted(Commuted_) { if (FoldOp->isImm()) { ImmToFold = FoldOp->getImm(); } else if (FoldOp->isFI()) { @@ -49,17 +47,11 @@ } } - bool isFI() const { - return Kind == MachineOperand::MO_FrameIndex; - } + bool isFI() const { return Kind == MachineOperand::MO_FrameIndex; } - bool isImm() const { - return Kind == MachineOperand::MO_Immediate; - } + bool isImm() const { return Kind == MachineOperand::MO_Immediate; } - bool isReg() const { - return Kind == MachineOperand::MO_Register; - } + bool isReg() const { return Kind == MachineOperand::MO_Register; } bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; } @@ -91,9 +83,7 @@ bool tryToFoldACImm(const MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx, SmallVectorImpl &FoldList) const; - void foldOperand(MachineOperand &OpToFold, - MachineInstr *UseMI, - int UseOpIdx, + void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, int UseOpIdx, SmallVectorImpl &FoldList, SmallVectorImpl &CopiesToReplace) const; @@ -133,8 +123,7 @@ } // End anonymous namespace. -INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE, - "SI Fold Operands", false, false) +INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE, "SI Fold Operands", false, false) char SIFoldOperands::ID = 0; @@ -192,16 +181,13 @@ return OpNo == VIdx && SIdx == -1; } -FunctionPass *llvm::createSIFoldOperandsPass() { - return new SIFoldOperands(); -} +FunctionPass *llvm::createSIFoldOperandsPass() { return new SIFoldOperands(); } bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const { MachineInstr *MI = Fold.UseMI; MachineOperand &Old = MI->getOperand(Fold.UseOpNo); assert(Old.isReg()); - const uint64_t TSFlags = MI->getDesc().TSFlags; if (Fold.isImm()) { if (TSFlags & SIInstrFlags::IsPacked && !(TSFlags & SIInstrFlags::IsMAI) && @@ -274,7 +260,7 @@ if (HaveNonDbgCarryUse) { BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::COPY), Dst1.getReg()) - .addReg(AMDGPU::VCC, RegState::Kill); + .addReg(AMDGPU::VCC, RegState::Kill); } // Keep the old instruction around to avoid breaking iterators, but @@ -402,7 +388,6 @@ CommuteOpNo = CommuteIdx0; } - // One of operands might be an Imm operand, and OpNo may refer to it after // the call of commuteInstruction() below. Such situations are avoided // here explicitly as OpNo must be a register operand to be a candidate @@ -416,14 +401,14 @@ return false; if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) { - if ((Opc == AMDGPU::V_ADD_CO_U32_e64 || - Opc == AMDGPU::V_SUB_CO_U32_e64 || + if ((Opc == AMDGPU::V_ADD_CO_U32_e64 || Opc == AMDGPU::V_SUB_CO_U32_e64 || Opc == AMDGPU::V_SUBREV_CO_U32_e64) && // FIXME (OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) { // Verify the other operand is a VGPR, otherwise we would violate the // constant bus restriction. - unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0; + unsigned OtherIdx = + CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0; MachineOperand &OtherOp = MI->getOperand(OtherIdx); if (!OtherOp.isReg() || !TII->getRegisterInfo().isVGPR(*MRI, OtherOp.getReg())) @@ -551,7 +536,7 @@ } } - SmallVector, 32> Defs; + SmallVector, 32> Defs; if (!getRegSeqInit(Defs, UseReg, OpTy)) return false; @@ -579,11 +564,9 @@ } void SIFoldOperands::foldOperand( - MachineOperand &OpToFold, - MachineInstr *UseMI, - int UseOpIdx, - SmallVectorImpl &FoldList, - SmallVectorImpl &CopiesToReplace) const { + MachineOperand &OpToFold, MachineInstr *UseMI, int UseOpIdx, + SmallVectorImpl &FoldList, + SmallVectorImpl &CopiesToReplace) const { const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); if (!isUseSafeToFold(*UseMI, UseOp)) @@ -601,7 +584,8 @@ Register RegSeqDstReg = UseMI->getOperand(0).getReg(); unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); - for (auto &RSUse : make_early_inc_range(MRI->use_nodbg_operands(RegSeqDstReg))) { + for (auto &RSUse : + make_early_inc_range(MRI->use_nodbg_operands(RegSeqDstReg))) { MachineInstr *RSUseMI = RSUse.getParent(); if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI, @@ -734,7 +718,7 @@ // can only accept VGPR or inline immediate. Recreate a reg_sequence with // its initializers right here, so we will rematerialize immediates and // avoid copies via different reg classes. - SmallVector, 32> Defs; + SmallVector, 32> Defs; if (Size > 4 && TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) && getRegSeqInit(Defs, UseReg, AMDGPU::OPERAND_REG_INLINE_C_INT32)) { const DebugLoc &DL = UseMI->getDebugLoc(); @@ -755,8 +739,9 @@ int64_t Imm = Def->getImm(); auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass); - BuildMI(MBB, UseMI, DL, - TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).addImm(Imm); + BuildMI(MBB, UseMI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), + Tmp) + .addImm(Imm); B.addReg(Tmp); } else if (Def->isReg() && TRI->isAGPR(*MRI, Def->getReg())) { auto Src = getRegSubRegPair(*Def); @@ -797,8 +782,9 @@ VGPRCopies[CopyToVGPR] = Vgpr; } auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass); - BuildMI(MBB, UseMI, DL, - TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).addReg(Vgpr); + BuildMI(MBB, UseMI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), + Tmp) + .addReg(Vgpr); B.addReg(Tmp); } @@ -827,7 +813,7 @@ if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 || (UseOpc == AMDGPU::V_READLANE_B32 && (int)UseOpIdx == - AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) { + AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) { // %vgpr = V_MOV_B32 imm // %sgpr = V_READFIRSTLANE_B32 %vgpr // => @@ -835,8 +821,7 @@ if (FoldingImmLike) { if (execMayBeModifiedBeforeUse(*MRI, UseMI->getOperand(UseOpIdx).getReg(), - *OpToFold.getParent(), - *UseMI)) + *OpToFold.getParent(), *UseMI)) return; UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32)); @@ -852,8 +837,7 @@ if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) { if (execMayBeModifiedBeforeUse(*MRI, UseMI->getOperand(UseOpIdx).getReg(), - *OpToFold.getParent(), - *UseMI)) + *OpToFold.getParent(), *UseMI)) return; // %vgpr = COPY %sgpr0 @@ -902,7 +886,6 @@ return; } - const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc(); const TargetRegisterClass *FoldRC = TRI->getRegClass(FoldDesc.operands()[0].RegClass); @@ -1085,8 +1068,7 @@ } int32_t Src1Val = static_cast(Src1->getImm()); - if (Opc == AMDGPU::V_OR_B32_e64 || - Opc == AMDGPU::V_OR_B32_e32 || + if (Opc == AMDGPU::V_OR_B32_e64 || Opc == AMDGPU::V_OR_B32_e32 || Opc == AMDGPU::S_OR_B32) { if (Src1Val == 0) { // y = or x, 0 => y = copy x @@ -1348,8 +1330,7 @@ // Make sure sources are identical. const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); - if (!Src0->isReg() || !Src1->isReg() || - Src0->getReg() != Src1->getReg() || + if (!Src0->isReg() || !Src1->isReg() || Src0->getReg() != Src1->getReg() || Src0->getSubReg() != Src1->getSubReg() || Src0->getSubReg() != AMDGPU::NoSubRegister) return nullptr; @@ -1358,15 +1339,15 @@ if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) return nullptr; - unsigned Src0Mods - = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm(); - unsigned Src1Mods - = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm(); + unsigned Src0Mods = + TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm(); + unsigned Src1Mods = + TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm(); // Having a 0 op_sel_hi would require swizzling the output in the source // instruction, which we can't do. - unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1 - : 0u; + unsigned UnsetMods = + (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1 : 0u; if (Src0Mods != UnsetMods && Src1Mods != UnsetMods) return nullptr; return Src0; @@ -1570,7 +1551,7 @@ !MRI->hasOneNonDBGUse(Reg)) return false; - SmallVector, 32> Defs; + SmallVector, 32> Defs; if (!getRegSeqInit(Defs, Reg, MCOI::OPERAND_REGISTER)) return false; @@ -1744,6 +1725,8 @@ if (!ARC) return false; + bool IsAGPR32 = (ARC == &AMDGPU::AGPR_32RegClass); + // Rewrite the PHI's incoming values to ARC. LLVM_DEBUG(dbgs() << "Folding AGPR copies into: " << PHI); for (unsigned K = 1; K < PHI.getNumExplicitOperands(); K += 2) { @@ -1754,7 +1737,7 @@ MachineBasicBlock *InsertMBB = nullptr; // Look at the def of Reg, ignoring all copies. - bool UseAccVGPRWrite = false; + unsigned CopyOpc = AMDGPU::COPY; if (MachineInstr *Def = MRI->getVRegDef(Reg)) { // Look at pre-existing COPY instructions from ARC: Steal the operand. If @@ -1772,10 +1755,12 @@ // GFX908 directly instead of a COPY. Otherwise, SIFoldOperand may try // to fold the sgpr -> vgpr -> agpr copy into a sgpr -> agpr copy which // is unlikely to be profitable. + // + // Note that V_ACCVGPR_WRITE is only used for AGPR_32. MachineOperand &CopyIn = Def->getOperand(1); - if (!ST->hasGFX90AInsts() && !MRI->hasOneNonDBGUse(Reg) && + if (IsAGPR32 && !ST->hasGFX90AInsts() && !MRI->hasOneNonDBGUse(Reg) && TRI->isSGPRReg(*MRI, CopyIn.getReg())) - UseAccVGPRWrite = true; + CopyOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64; } InsertPt = ++Def->getIterator(); @@ -1785,8 +1770,10 @@ InsertPt = InsertMBB->getFirstTerminator(); } - const unsigned CopyOpc = - UseAccVGPRWrite ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::COPY; + // We can't insert a COPY between PHI nodes so skip them. + while (InsertPt != InsertMBB->end() && InsertPt->isPHI()) + ++InsertPt; + Register NewReg = MRI->createVirtualRegister(ARC); MachineInstr *MI = BuildMI(*InsertMBB, InsertPt, PHI.getDebugLoc(), TII->get(CopyOpc), NewReg) @@ -1827,7 +1814,7 @@ if (DefReg.isPhysical() || !TRI->isVGPR(*MRI, DefReg)) return false; - SmallVector Users; + SmallVector Users; SmallVector MoveRegs; for (const MachineInstr &I : MRI->use_nodbg_instructions(DefReg)) Users.push_back(&I); diff --git a/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir b/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir --- a/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir @@ -40,6 +40,7 @@ ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: S_ENDPGM 0 + ; ; GFX90A-LABEL: name: test_sgpr_init_multiuse ; GFX90A: bb.0: ; GFX90A-NEXT: successors: %bb.1(0x80000000) @@ -101,6 +102,90 @@ S_ENDPGM 0 ... +--- +name: test_sgpr_init_multiuse_agprtuple +tracksRegLiveness: true + +body: | + ; GFX908-LABEL: name: test_sgpr_init_multiuse_agprtuple + ; GFX908: bb.0: + ; GFX908-NEXT: successors: %bb.1(0x80000000) + ; GFX908-NEXT: liveins: $sgpr0_sgpr1, $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1 + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]] + ; GFX908-NEXT: [[COPY2:%[0-9]+]]:areg_64_align2 = COPY [[COPY1]] + ; GFX908-NEXT: [[COPY3:%[0-9]+]]:areg_64_align2 = COPY [[COPY1]] + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: bb.1: + ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX908-NEXT: liveins: $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[PHI:%[0-9]+]]:areg_64_align2 = PHI [[COPY3]], %bb.0, %9.sub0_sub1, %bb.1 + ; GFX908-NEXT: [[PHI1:%[0-9]+]]:areg_64_align2 = PHI [[COPY2]], %bb.0, %9.sub2_sub3, %bb.1 + ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[PHI1]] + ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[PHI]] + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY5]].sub0, %subreg.sub0, [[COPY5]].sub1, %subreg.sub1, [[COPY4]].sub0, %subreg.sub2, [[COPY4]].sub1, %subreg.sub3 + ; GFX908-NEXT: [[V_MOV_B64_e32_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e32 1073741824, implicit $exec + ; GFX908-NEXT: [[V_MOV_B64_e32_1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e32 1065353216, implicit $exec + ; GFX908-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B64_e32_1]].sub0, [[V_MOV_B64_e32_]].sub1, [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX908-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: bb.2: + ; GFX908-NEXT: S_ENDPGM 0 + ; + ; GFX90A-LABEL: name: test_sgpr_init_multiuse_agprtuple + ; GFX90A: bb.0: + ; GFX90A-NEXT: successors: %bb.1(0x80000000) + ; GFX90A-NEXT: liveins: $sgpr0_sgpr1, $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]] + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:areg_64_align2 = COPY [[COPY1]] + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:areg_64_align2 = COPY [[COPY1]] + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.1: + ; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX90A-NEXT: liveins: $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[PHI:%[0-9]+]]:areg_64_align2 = PHI [[COPY3]], %bb.0, %9.sub0_sub1, %bb.1 + ; GFX90A-NEXT: [[PHI1:%[0-9]+]]:areg_64_align2 = PHI [[COPY2]], %bb.0, %9.sub2_sub3, %bb.1 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[PHI1]] + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vreg_64_align2 = COPY [[PHI]] + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY5]].sub0, %subreg.sub0, [[COPY5]].sub1, %subreg.sub1, [[COPY4]].sub0, %subreg.sub2, [[COPY4]].sub1, %subreg.sub3 + ; GFX90A-NEXT: [[V_MOV_B64_e32_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e32 1073741824, implicit $exec + ; GFX90A-NEXT: [[V_MOV_B64_e32_1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e32 1065353216, implicit $exec + ; GFX90A-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B64_e32_1]].sub0, [[V_MOV_B64_e32_]].sub1, [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.2: + ; GFX90A-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1 + liveins: $sgpr0_sgpr1, $scc + + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:vreg_64_align2 = COPY %0:sgpr_64 + + bb.1: + successors: %bb.1, %bb.2 + liveins: $scc + + %2:vreg_64_align2 = PHI %1, %bb.0, %3, %bb.1 + %4:vreg_64_align2 = PHI %1, %bb.0, %5, %bb.1 + %6:areg_128_align2 = REG_SEQUENCE %2.sub0, %subreg.sub0, %2.sub1, %subreg.sub1, %4.sub0, %subreg.sub2, %4.sub1, %subreg.sub3 + %7:vreg_64_align2 = V_MOV_B64_e32 1073741824, implicit $exec + %8:vreg_64_align2 = V_MOV_B64_e32 1065353216, implicit $exec + %9:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %8.sub0, %7.sub1, %6:areg_128_align2, 0, 0, 0, implicit $mode, implicit $exec + %3:vreg_64_align2 = COPY %9.sub0_sub1:areg_128_align2 + %5:vreg_64_align2 = COPY %9.sub2_sub3:areg_128_align2 + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + + S_ENDPGM 0 +... + --- name: test_sgpr_init_singleuse tracksRegLiveness: true @@ -141,6 +226,7 @@ ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: S_ENDPGM 0 + ; ; GFX90A-LABEL: name: test_sgpr_init_singleuse ; GFX90A: bb.0: ; GFX90A-NEXT: successors: %bb.1(0x80000000) @@ -244,6 +330,7 @@ ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: S_ENDPGM 0 + ; ; GFX90A-LABEL: name: test_vgpr_init ; GFX90A: bb.0: ; GFX90A-NEXT: successors: %bb.1(0x80000000) @@ -343,6 +430,7 @@ ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: S_ENDPGM 0 + ; ; GFX90A-LABEL: name: test_use_vgpr_temp ; GFX90A: bb.0: ; GFX90A-NEXT: successors: %bb.1(0x80000000) @@ -445,6 +533,7 @@ ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: ; GFX908-NEXT: S_ENDPGM 0 + ; ; GFX90A-LABEL: name: test_vgpr_init_two_copies ; GFX90A: bb.0: ; GFX90A-NEXT: successors: %bb.1(0x80000000) @@ -504,3 +593,125 @@ bb.2: S_ENDPGM 0 ... + +--- +name: test_vgpr_init_skip_phis_insertpt +tracksRegLiveness: true + +body: | + ; GFX908-LABEL: name: test_vgpr_init_skip_phis_insertpt + ; GFX908: bb.0: + ; GFX908-NEXT: successors: %bb.1(0x80000000) + ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: bb.1: + ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX908-NEXT: liveins: $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1 + ; GFX908-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1 + ; GFX908-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[PHI]] + ; GFX908-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[PHI]] + ; GFX908-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[PHI]] + ; GFX908-NEXT: [[COPY5:%[0-9]+]]:agpr_32 = COPY [[PHI]] + ; GFX908-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: bb.2: + ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX908-NEXT: liveins: $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[COPY5]], %bb.1, %15.sub0, %bb.2 + ; GFX908-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY4]], %bb.1, %15.sub1, %bb.2 + ; GFX908-NEXT: [[PHI4:%[0-9]+]]:agpr_32 = PHI [[COPY3]], %bb.1, %15.sub2, %bb.2 + ; GFX908-NEXT: [[PHI5:%[0-9]+]]:agpr_32 = PHI [[COPY2]], %bb.1, %15.sub3, %bb.2 + ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI5]] + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[PHI4]] + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[PHI3]] + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[PHI2]] + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 + ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + ; GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + ; GFX908-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX908-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: bb.3: + ; GFX908-NEXT: S_ENDPGM 0 + ; + ; GFX90A-LABEL: name: test_vgpr_init_skip_phis_insertpt + ; GFX90A: bb.0: + ; GFX90A-NEXT: successors: %bb.1(0x80000000) + ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.1: + ; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX90A-NEXT: liveins: $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1 + ; GFX90A-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[PHI]] + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[PHI]] + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[PHI]] + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:agpr_32 = COPY [[PHI]] + ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.2: + ; GFX90A-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX90A-NEXT: liveins: $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[COPY5]], %bb.1, %15.sub0, %bb.2 + ; GFX90A-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY4]], %bb.1, %15.sub1, %bb.2 + ; GFX90A-NEXT: [[PHI4:%[0-9]+]]:agpr_32 = PHI [[COPY3]], %bb.1, %15.sub2, %bb.2 + ; GFX90A-NEXT: [[PHI5:%[0-9]+]]:agpr_32 = PHI [[COPY2]], %bb.1, %15.sub3, %bb.2 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI5]] + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[PHI4]] + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[PHI3]] + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[PHI2]] + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 + ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + ; GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + ; GFX90A-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.3: + ; GFX90A-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $vgpr1, $scc + successors: %bb.1 + + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = COPY $vgpr0 + + bb.1: + liveins: $scc + successors: %bb.1, %bb.2 + + %6:vgpr_32 = PHI %0, %bb.0, %1, %bb.1 + %7:vgpr_32 = PHI %0, %bb.0, %1, %bb.1 + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + liveins: $scc + successors: %bb.2, %bb.3 + %8:vgpr_32 = PHI %6, %bb.1, %16, %bb.2 + %9:vgpr_32 = PHI %6, %bb.1, %17, %bb.2 + %10:vgpr_32 = PHI %6, %bb.1, %18, %bb.2 + %11:vgpr_32 = PHI %6, %bb.1, %19, %bb.2 + %12:areg_128_align2 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1, %10, %subreg.sub2, %11, %subreg.sub3 + %13:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + %14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + %15:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %14:vgpr_32, %13:vgpr_32, %12:areg_128_align2, 0, 0, 0, implicit $mode, implicit $exec + %16:vgpr_32 = COPY %15.sub0 + %17:vgpr_32 = COPY %15.sub1 + %18:vgpr_32 = COPY %15.sub2 + %19:vgpr_32 = COPY %15.sub3 + S_CBRANCH_SCC1 %bb.2, implicit $scc + + bb.3: + S_ENDPGM 0 +...