diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -111,9 +111,11 @@ std::pair isOMod(const MachineInstr &MI) const; bool tryFoldOMod(MachineInstr &MI); bool tryFoldRegSequence(MachineInstr &MI); - bool tryFoldLCSSAPhi(MachineInstr &MI); + bool tryFoldPhiAGPR(MachineInstr &MI); bool tryFoldLoad(MachineInstr &MI); + bool tryOptimizeAGPRPhis(MachineBasicBlock &MBB); + public: SIFoldOperands() : MachineFunctionPass(ID) { initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry()); @@ -138,6 +140,16 @@ char &llvm::SIFoldOperandsID = SIFoldOperands::ID; +static const TargetRegisterClass *getRegOpRC(const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const MachineOperand &MO) { + const TargetRegisterClass *RC = MRI.getRegClass(MO.getReg()); + if (const TargetRegisterClass *SubRC = + TRI.getSubRegisterClass(RC, MO.getSubReg())) + RC = SubRC; + return RC; +} + // Map multiply-accumulate opcode to corresponding multiply-add opcode if any. static unsigned macToMad(unsigned Opc) { switch (Opc) { @@ -1631,52 +1643,133 @@ return true; } -// Try to hoist an AGPR to VGPR copy out of the loop across a LCSSA PHI. +// Try to hoist an AGPR to VGPR copy across a PHI. // This should allow folding of an AGPR into a consumer which may support it. -// I.e.: // -// loop: // loop: -// %1:vreg = COPY %0:areg // exit: -// exit: => // %1:areg = PHI %0:areg, %loop -// %2:vreg = PHI %1:vreg, %loop // %2:vreg = COPY %1:areg -bool SIFoldOperands::tryFoldLCSSAPhi(MachineInstr &PHI) { +// Example 1: LCSSA PHI +// loop: +// %1:vreg = COPY %0:areg +// exit: +// %2:vreg = PHI %1:vreg, %loop +// => +// loop: +// exit: +// %1:areg = PHI %0:areg, %loop +// %2:vreg = COPY %1:areg +// +// Example 2: PHI with multiple incoming values: +// entry: +// %1:vreg = GLOBAL_LOAD(..) +// loop: +// %2:vreg = PHI %1:vreg, %entry, %5:vreg, %loop +// %3:areg = COPY %2:vreg +// %4:areg = (instr using %3:areg) +// %5:vreg = COPY %4:areg +// => +// entry: +// %1:vreg = GLOBAL_LOAD(..) +// %2:areg = COPY %1:vreg +// loop: +// %3:areg = PHI %2:areg, %entry, %X:areg, +// %4:areg = (instr using %3:areg) +bool SIFoldOperands::tryFoldPhiAGPR(MachineInstr &PHI) { assert(PHI.isPHI()); - if (PHI.getNumExplicitOperands() != 3) // Single input LCSSA PHI - return false; - - Register PhiIn = PHI.getOperand(1).getReg(); Register PhiOut = PHI.getOperand(0).getReg(); - if (PHI.getOperand(1).getSubReg() || - !TRI->isVGPR(*MRI, PhiIn) || !TRI->isVGPR(*MRI, PhiOut)) + if (!TRI->isVGPR(*MRI, PhiOut)) return false; - // A single use should not matter for correctness, but if it has another use - // inside the loop we may perform copy twice in a worst case. - if (!MRI->hasOneNonDBGUse(PhiIn)) - return false; + // Iterate once over all incoming values of the PHI to check if this PHI is + // eligible, and determine the exact AGPR RC we'll target. + const TargetRegisterClass *ARC = nullptr; + for (unsigned K = 1; K < PHI.getNumExplicitOperands(); K += 2) { + MachineOperand &MO = PHI.getOperand(K); - MachineInstr *Copy = MRI->getVRegDef(PhiIn); - if (!Copy || !Copy->isCopy()) - return false; + Register PhiIn = MO.getReg(); + if (MO.getSubReg() || !TRI->isVGPR(*MRI, PhiIn)) + return false; + + MachineInstr *Copy = MRI->getVRegDef(PhiIn); + if (!Copy || !Copy->isCopy()) + continue; - Register CopyIn = Copy->getOperand(1).getReg(); - if (!TRI->isAGPR(*MRI, CopyIn) || Copy->getOperand(1).getSubReg()) + Register CopyIn = Copy->getOperand(1).getReg(); + if (CopyIn.isVirtual() && TRI->isAGPR(*MRI, CopyIn)) { + const TargetRegisterClass *CopyInRC = + getRegOpRC(*MRI, *TRI, Copy->getOperand(1)); + if (ARC && !ARC->hasSubClassEq(CopyInRC)) + return false; + ARC = CopyInRC; + } + } + + if (!ARC) return false; - const TargetRegisterClass *ARC = MRI->getRegClass(CopyIn); + // Rewrite the PHI's incoming values to ARC. + LLVM_DEBUG(dbgs() << "Folding AGPR copies into: " << PHI); + for (unsigned K = 1; K < PHI.getNumExplicitOperands(); K += 2) { + MachineOperand &MO = PHI.getOperand(K); + Register Reg = MO.getReg(); + + MachineBasicBlock::iterator InsertPt; + MachineBasicBlock *InsertMBB = nullptr; + + // Look at the def of Reg, ignoring all copies. + bool UseAccVGPRWrite = false; + if (MachineInstr *Def = MRI->getVRegDef(Reg)) { + + // Look at pre-existing COPY instructions from ARC: Steal the operand. If + // the copy was single-use, it will be removed by DCE later. + if (Def->isCopy()) { + MachineOperand &CopyIn = Def->getOperand(1); + if (CopyIn.getReg().isVirtual() && + getRegOpRC(*MRI, *TRI, CopyIn)->hasSubClassEq(ARC)) { + MO.setReg(CopyIn.getReg()); + MO.setSubReg(CopyIn.getSubReg()); + continue; + } + + // If this is a multi-use SGPR -> VGPR copy, use V_ACCVGPR_WRITE on + // GFX908 directly instead of a COPY. Otherwise, SIFoldOperand may try + // to fold the sgpr -> vgpr -> agpr copy into a sgpr -> agpr copy which + // is unlikely to be profitable. + if (!ST->hasGFX90AInsts() && !MRI->hasOneNonDBGUse(Reg) && + TRI->isSGPRReg(*MRI, CopyIn.getReg())) + UseAccVGPRWrite = true; + } + + InsertPt = ++Def->getIterator(); + InsertMBB = Def->getParent(); + } else { + InsertMBB = PHI.getOperand(MO.getOperandNo() + 1).getMBB(); + InsertPt = InsertMBB->getFirstTerminator(); + } + + const unsigned CopyOpc = + UseAccVGPRWrite ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::COPY; + Register NewReg = MRI->createVirtualRegister(ARC); + MachineInstr *MI = BuildMI(*InsertMBB, InsertPt, PHI.getDebugLoc(), + TII->get(CopyOpc), NewReg) + .addReg(Reg); + MO.setReg(NewReg); + + (void)MI; + LLVM_DEBUG(dbgs() << " Created COPY: " << *MI); + } + + // Replace the PHI's result with a new register. Register NewReg = MRI->createVirtualRegister(ARC); - PHI.getOperand(1).setReg(CopyIn); PHI.getOperand(0).setReg(NewReg); + // COPY that new register back to the original PhiOut register. This COPY will + // usually be folded out later. MachineBasicBlock *MBB = PHI.getParent(); - BuildMI(*MBB, MBB->getFirstNonPHI(), Copy->getDebugLoc(), + BuildMI(*MBB, MBB->getFirstNonPHI(), PHI.getDebugLoc(), TII->get(AMDGPU::COPY), PhiOut) - .addReg(NewReg, RegState::Kill); - Copy->eraseFromParent(); // We know this copy had a single use. - - LLVM_DEBUG(dbgs() << "Folded " << PHI); + .addReg(NewReg); + LLVM_DEBUG(dbgs() << " Done: Folded " << PHI); return true; } @@ -1736,6 +1829,101 @@ return true; } +// tryFoldPhiAGPR will aggressively try to create AGPR PHIs. +// For GFX90A and later, this is pretty much always a good thing, but for GFX908 +// there's cases where it can create a lot more AGPR-AGPR copies, which are +// expensive on this architecture due to the lack of V_ACCVGPR_MOV. +// +// This function looks at all AGPR PHIs in a basic block and collects their +// operands. Then, it checks for register that are used more than once across +// all PHIs and caches them in a VGPR. This prevents ExpandPostRAPseudo from +// having to create one VGPR temporary per use, which can get very messy if +// these PHIs come from a broken-up large PHI (e.g. 32 AGPR phis, one per vector +// element). +// +// Example +// a: +// %in:agpr_256 = COPY %foo:vgpr_256 +// c: +// %x:agpr_32 = .. +// b: +// %0:areg = PHI %in.sub0:agpr_32, %a, %x, %c +// %1:areg = PHI %in.sub0:agpr_32, %a, %y, %c +// %2:areg = PHI %in.sub0:agpr_32, %a, %z, %c +// => +// a: +// %in:agpr_256 = COPY %foo:vgpr_256 +// %tmp:vgpr_32 = V_ACCVGPR_READ_B32_e64 %in.sub0:agpr_32 +// %tmp_agpr:agpr_32 = COPY %tmp +// c: +// %x:agpr_32 = .. +// b: +// %0:areg = PHI %tmp_agpr, %a, %x, %c +// %1:areg = PHI %tmp_agpr, %a, %y, %c +// %2:areg = PHI %tmp_agpr, %a, %z, %c +bool SIFoldOperands::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) { + // This is only really needed on GFX908 where AGPR-AGPR copies are + // unreasonably difficult. + if (ST->hasGFX90AInsts()) + return false; + + // Look at all AGPR Phis and collect the register + subregister used. + DenseMap, std::vector> + RegToMO; + + for (auto &MI : MBB) { + if (!MI.isPHI()) + break; + + if (!TRI->isAGPR(*MRI, MI.getOperand(0).getReg())) + continue; + + for (unsigned K = 1; K < MI.getNumOperands(); K += 2) { + MachineOperand &PhiMO = MI.getOperand(K); + RegToMO[{PhiMO.getReg(), PhiMO.getSubReg()}].push_back(&PhiMO); + } + } + + // For all (Reg, SubReg) pair that are used more than once, cache the value in + // a VGPR. + bool Changed = false; + for (const auto &[Entry, MOs] : RegToMO) { + if (MOs.size() == 1) + continue; + + const auto [Reg, SubReg] = Entry; + MachineInstr *Def = MRI->getVRegDef(Reg); + MachineBasicBlock *DefMBB = Def->getParent(); + + // Create a copy in a VGPR using V_ACCVGPR_READ_B32_e64 so it's not folded + // out. + const TargetRegisterClass *ARC = getRegOpRC(*MRI, *TRI, *MOs.front()); + Register TempVGPR = + MRI->createVirtualRegister(TRI->getEquivalentVGPRClass(ARC)); + MachineInstr *VGPRCopy = + BuildMI(*DefMBB, ++Def->getIterator(), Def->getDebugLoc(), + TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TempVGPR) + .addReg(Reg, /* flags */ 0, SubReg); + + // Copy back to an AGPR and use that instead of the AGPR subreg in all MOs. + Register TempAGPR = MRI->createVirtualRegister(ARC); + BuildMI(*DefMBB, ++VGPRCopy->getIterator(), Def->getDebugLoc(), + TII->get(AMDGPU::COPY), TempAGPR) + .addReg(TempVGPR); + + LLVM_DEBUG(dbgs() << "Caching AGPR into VGPR: " << *VGPRCopy); + for (MachineOperand *MO : MOs) { + MO->setReg(TempAGPR); + MO->setSubReg(AMDGPU::NoSubRegister); + LLVM_DEBUG(dbgs() << " Changed PHI Operand: " << *MO << "\n"); + } + + Changed = true; + } + + return Changed; +} + bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -1769,7 +1957,7 @@ continue; } - if (MI.isPHI() && tryFoldLCSSAPhi(MI)) { + if (MI.isPHI() && tryFoldPhiAGPR(MI)) { Changed = true; continue; } @@ -1794,6 +1982,8 @@ !tryFoldOMod(MI)) Changed |= tryFoldClamp(MI); } + + Changed |= tryOptimizeAGPRPhis(*MBB); } return Changed; diff --git a/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir b/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir @@ -0,0 +1,410 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX908 +# RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX90A +# RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s --check-prefixes=GFX90A + +--- +name: test_sgpr_init_multiuse +tracksRegLiveness: true + +body: | + ; GFX908-LABEL: name: test_sgpr_init_multiuse + ; GFX908: bb.0: + ; GFX908-NEXT: successors: %bb.1(0x80000000) + ; GFX908-NEXT: liveins: $sgpr0, $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY1]], implicit $exec + ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY1]], implicit $exec + ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY1]], implicit $exec + ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY1]], implicit $exec + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: bb.1: + ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX908-NEXT: liveins: $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[V_ACCVGPR_WRITE_B32_e64_3]], %bb.0, %13.sub0, %bb.1 + ; GFX908-NEXT: [[PHI1:%[0-9]+]]:agpr_32 = PHI [[V_ACCVGPR_WRITE_B32_e64_2]], %bb.0, %13.sub1, %bb.1 + ; GFX908-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[V_ACCVGPR_WRITE_B32_e64_1]], %bb.0, %13.sub2, %bb.1 + ; GFX908-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[V_ACCVGPR_WRITE_B32_e64_]], %bb.0, %13.sub3, %bb.1 + ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI3]] + ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[PHI2]] + ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[PHI1]] + ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI]] + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + ; GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + ; GFX908-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX908-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: bb.2: + ; GFX908-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: test_sgpr_init_multiuse + ; GFX90A: bb.0: + ; GFX90A-NEXT: successors: %bb.1(0x80000000) + ; GFX90A-NEXT: liveins: $sgpr0, $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY1]] + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[COPY1]] + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY1]] + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:agpr_32 = COPY [[COPY1]] + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.1: + ; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX90A-NEXT: liveins: $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[COPY5]], %bb.0, %13.sub0, %bb.1 + ; GFX90A-NEXT: [[PHI1:%[0-9]+]]:agpr_32 = PHI [[COPY4]], %bb.0, %13.sub1, %bb.1 + ; GFX90A-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[COPY3]], %bb.0, %13.sub2, %bb.1 + ; GFX90A-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY2]], %bb.0, %13.sub3, %bb.1 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI3]] + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[PHI2]] + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[PHI1]] + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[PHI]] + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3 + ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + ; GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + ; GFX90A-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.2: + ; GFX90A-NEXT: S_ENDPGM 0 + bb.0: + liveins: $sgpr0, $scc + successors: %bb.1 + + %0:sgpr_32 = COPY $sgpr0 + %1:vgpr_32 = COPY %0 + + bb.1: + liveins: $scc + successors: %bb.1, %bb.2 + + %8:vgpr_32 = PHI %1, %bb.0, %16, %bb.1 + %9:vgpr_32 = PHI %1, %bb.0, %17, %bb.1 + %10:vgpr_32 = PHI %1, %bb.0, %18, %bb.1 + %11:vgpr_32 = PHI %1, %bb.0, %19, %bb.1 + %12:areg_128_align2 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1, %10, %subreg.sub2, %11, %subreg.sub3 + %13:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + %14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + %15:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %14:vgpr_32, %13:vgpr_32, %12:areg_128_align2, 0, 0, 0, implicit $mode, implicit $exec + %16:vgpr_32 = COPY %15.sub0 + %17:vgpr_32 = COPY %15.sub1 + %18:vgpr_32 = COPY %15.sub2 + %19:vgpr_32 = COPY %15.sub3 + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: test_sgpr_init_singleuse +tracksRegLiveness: true + +body: | + ; GFX908-LABEL: name: test_sgpr_init_singleuse + ; GFX908: bb.0: + ; GFX908-NEXT: successors: %bb.1(0x80000000) + ; GFX908-NEXT: liveins: $sgpr0, $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX908-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY1]] + ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX908-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX908-NEXT: [[COPY6:%[0-9]+]]:agpr_32 = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:agpr_32 = COPY [[COPY7]] + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: bb.1: + ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX908-NEXT: liveins: $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[COPY2]], %bb.0, %16.sub0, %bb.1 + ; GFX908-NEXT: [[PHI1:%[0-9]+]]:agpr_32 = PHI [[COPY4]], %bb.0, %16.sub1, %bb.1 + ; GFX908-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[COPY6]], %bb.0, %16.sub2, %bb.1 + ; GFX908-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY8]], %bb.0, %16.sub3, %bb.1 + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[PHI3]] + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[PHI2]] + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[PHI1]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[PHI]] + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY11]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY9]], %subreg.sub3 + ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + ; GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + ; GFX908-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX908-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: bb.2: + ; GFX908-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: test_sgpr_init_singleuse + ; GFX90A: bb.0: + ; GFX90A-NEXT: successors: %bb.1(0x80000000) + ; GFX90A-NEXT: liveins: $sgpr0, $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY1]] + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY3]] + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:agpr_32 = COPY [[COPY5]] + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:agpr_32 = COPY [[COPY7]] + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.1: + ; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX90A-NEXT: liveins: $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[COPY2]], %bb.0, %16.sub0, %bb.1 + ; GFX90A-NEXT: [[PHI1:%[0-9]+]]:agpr_32 = PHI [[COPY4]], %bb.0, %16.sub1, %bb.1 + ; GFX90A-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[COPY6]], %bb.0, %16.sub2, %bb.1 + ; GFX90A-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY8]], %bb.0, %16.sub3, %bb.1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[PHI3]] + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[PHI2]] + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[PHI1]] + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[PHI]] + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY11]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY9]], %subreg.sub3 + ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + ; GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + ; GFX90A-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.2: + ; GFX90A-NEXT: S_ENDPGM 0 + bb.0: + liveins: $sgpr0, $scc + successors: %bb.1 + + %0:sgpr_32 = COPY $sgpr0 + %1:vgpr_32 = COPY %0 + %2:vgpr_32 = COPY %0 + %3:vgpr_32 = COPY %0 + %4:vgpr_32 = COPY %0 + + bb.1: + liveins: $scc + successors: %bb.1, %bb.2 + + %8:vgpr_32 = PHI %1, %bb.0, %16, %bb.1 + %9:vgpr_32 = PHI %2, %bb.0, %17, %bb.1 + %10:vgpr_32 = PHI %3, %bb.0, %18, %bb.1 + %11:vgpr_32 = PHI %4, %bb.0, %19, %bb.1 + %12:areg_128_align2 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1, %10, %subreg.sub2, %11, %subreg.sub3 + %13:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + %14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + %15:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %14:vgpr_32, %13:vgpr_32, %12:areg_128_align2, 0, 0, 0, implicit $mode, implicit $exec + %16:vgpr_32 = COPY %15.sub0 + %17:vgpr_32 = COPY %15.sub1 + %18:vgpr_32 = COPY %15.sub2 + %19:vgpr_32 = COPY %15.sub3 + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: test_vgpr_init +tracksRegLiveness: true + +body: | + ; GFX908-LABEL: name: test_vgpr_init + ; GFX908: bb.0: + ; GFX908-NEXT: successors: %bb.1(0x80000000) + ; GFX908-NEXT: liveins: $vgpr0, $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]] + ; GFX908-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]] + ; GFX908-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[COPY]] + ; GFX908-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY]] + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: bb.1: + ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX908-NEXT: liveins: $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[COPY4]], %bb.0, %12.sub0, %bb.1 + ; GFX908-NEXT: [[PHI1:%[0-9]+]]:agpr_32 = PHI [[COPY3]], %bb.0, %12.sub1, %bb.1 + ; GFX908-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[COPY2]], %bb.0, %12.sub2, %bb.1 + ; GFX908-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY1]], %bb.0, %12.sub3, %bb.1 + ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI3]] + ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI2]] + ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[PHI1]] + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[PHI]] + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + ; GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + ; GFX908-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX908-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: bb.2: + ; GFX908-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: test_vgpr_init + ; GFX90A: bb.0: + ; GFX90A-NEXT: successors: %bb.1(0x80000000) + ; GFX90A-NEXT: liveins: $vgpr0, $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]] + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]] + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[COPY]] + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY]] + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.1: + ; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX90A-NEXT: liveins: $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[COPY4]], %bb.0, %12.sub0, %bb.1 + ; GFX90A-NEXT: [[PHI1:%[0-9]+]]:agpr_32 = PHI [[COPY3]], %bb.0, %12.sub1, %bb.1 + ; GFX90A-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[COPY2]], %bb.0, %12.sub2, %bb.1 + ; GFX90A-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY1]], %bb.0, %12.sub3, %bb.1 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI3]] + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI2]] + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[PHI1]] + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[PHI]] + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + ; GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + ; GFX90A-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.2: + ; GFX90A-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $scc + successors: %bb.1 + + %0:vgpr_32 = COPY $vgpr0 + + bb.1: + liveins: $scc + successors: %bb.1, %bb.2 + + %8:vgpr_32 = PHI %0, %bb.0, %16, %bb.1 + %9:vgpr_32 = PHI %0, %bb.0, %17, %bb.1 + %10:vgpr_32 = PHI %0, %bb.0, %18, %bb.1 + %11:vgpr_32 = PHI %0, %bb.0, %19, %bb.1 + %12:areg_128_align2 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1, %10, %subreg.sub2, %11, %subreg.sub3 + %13:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + %14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + %15:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %14:vgpr_32, %13:vgpr_32, %12:areg_128_align2, 0, 0, 0, implicit $mode, implicit $exec + %16:vgpr_32 = COPY %15.sub0 + %17:vgpr_32 = COPY %15.sub1 + %18:vgpr_32 = COPY %15.sub2 + %19:vgpr_32 = COPY %15.sub3 + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 +... + +--- +name: test_use_vgpr_temp +tracksRegLiveness: true + +body: | + ; GFX908-LABEL: name: test_use_vgpr_temp + ; GFX908: bb.0: + ; GFX908-NEXT: successors: %bb.1(0x80000000) + ; GFX908-NEXT: liveins: $sgpr0, $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0 + ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec + ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec + ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec + ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3 + ; GFX908-NEXT: [[V_ACCVGPR_READ_B32_e64_:%[0-9]+]]:vgpr_32 = V_ACCVGPR_READ_B32_e64 [[REG_SEQUENCE]].sub0, implicit $exec + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[V_ACCVGPR_READ_B32_e64_]] + ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: bb.1: + ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX908-NEXT: liveins: $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[COPY1]], %bb.0, %18.sub0, %bb.1 + ; GFX908-NEXT: [[PHI1:%[0-9]+]]:agpr_32 = PHI [[COPY1]], %bb.0, %18.sub1, %bb.1 + ; GFX908-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[COPY1]], %bb.0, %18.sub2, %bb.1 + ; GFX908-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY1]], %bb.0, %18.sub3, %bb.1 + ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[PHI3]] + ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[PHI2]] + ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI1]] + ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[PHI]] + ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + ; GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + ; GFX908-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX908-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: bb.2: + ; GFX908-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: test_use_vgpr_temp + ; GFX90A: bb.0: + ; GFX90A-NEXT: successors: %bb.1(0x80000000) + ; GFX90A-NEXT: liveins: $sgpr0, $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec + ; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec + ; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec + ; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.1: + ; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX90A-NEXT: liveins: $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[REG_SEQUENCE]].sub0, %bb.0, %18.sub0, %bb.1 + ; GFX90A-NEXT: [[PHI1:%[0-9]+]]:agpr_32 = PHI [[REG_SEQUENCE]].sub0, %bb.0, %18.sub1, %bb.1 + ; GFX90A-NEXT: [[PHI2:%[0-9]+]]:agpr_32 = PHI [[REG_SEQUENCE]].sub0, %bb.0, %18.sub2, %bb.1 + ; GFX90A-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[REG_SEQUENCE]].sub0, %bb.0, %18.sub3, %bb.1 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI3]] + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[PHI2]] + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[PHI1]] + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[PHI]] + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + ; GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + ; GFX90A-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.2: + ; GFX90A-NEXT: S_ENDPGM 0 + bb.0: + ; Tests that tryOptimizeAGPRPhis kicks in for GFX908. + liveins: $sgpr0, $scc + successors: %bb.1 + + %1:vgpr_32 = COPY $sgpr0 + %2:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %1, implicit $exec + %3:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %1, implicit $exec + %4:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %1, implicit $exec + %5:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %1, implicit $exec + %6:areg_128_align2 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1, %4, %subreg.sub2, %5, %subreg.sub3 + %7:vgpr_32 = COPY %6.sub0 + bb.1: + liveins: $scc + successors: %bb.1, %bb.2 + + %8:vgpr_32 = PHI %7, %bb.0, %16, %bb.1 + %9:vgpr_32 = PHI %7, %bb.0, %17, %bb.1 + %10:vgpr_32 = PHI %7, %bb.0, %18, %bb.1 + %11:vgpr_32 = PHI %7, %bb.0, %19, %bb.1 + %12:areg_128_align2 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1, %10, %subreg.sub2, %11, %subreg.sub3 + %13:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + %14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + %15:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %14:vgpr_32, %13:vgpr_32, %12:areg_128_align2, 0, 0, 0, implicit $mode, implicit $exec + %16:vgpr_32 = COPY %15.sub0 + %17:vgpr_32 = COPY %15.sub1 + %18:vgpr_32 = COPY %15.sub2 + %19:vgpr_32 = COPY %15.sub3 + S_CBRANCH_SCC1 %bb.1, implicit $scc + bb.2: + S_ENDPGM 0 +...