diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -662,7 +662,9 @@ static bool isWWMRegSpillOpcode(uint16_t Opcode) { return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE || - Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE; + Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE || + Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE || + Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE; } static bool isDPP(const MachineInstr &MI) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1587,11 +1587,16 @@ } } -static unsigned getWWMRegSpillSaveOpcode(unsigned Size) { +static unsigned getWWMRegSpillSaveOpcode(unsigned Size, + bool HasVectorSuperClass) { // Currently, there is only 32-bit WWM register spills needed. if (Size != 4) llvm_unreachable("unknown wwm register spill size"); + // Return the AV pseudo opcode for gfx908+. + if (HasVectorSuperClass) + return AMDGPU::SI_SPILL_WWM_AV32_SAVE; + return AMDGPU::SI_SPILL_WWM_V32_SAVE; } @@ -1599,10 +1604,11 @@ const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, - const SIMachineFunctionInfo &MFI) { + const SIMachineFunctionInfo &MFI, + bool HasVectorSuperClass) { // Choose the right opcode if spilling a WWM register. if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG)) - return getWWMRegSpillSaveOpcode(Size); + return getWWMRegSpillSaveOpcode(Size, HasVectorSuperClass); if (TRI.isVectorSuperClass(RC)) return getAVSpillSaveOpcode(Size); @@ -1655,8 +1661,9 @@ return; } - unsigned Opcode = getVectorRegSpillSaveOpcode(VReg ? VReg : SrcReg, RC, - SpillSize, RI, *MFI); + bool HasVectorSuperClass = MF->getSubtarget().hasMAIInsts(); + unsigned Opcode = getVectorRegSpillSaveOpcode( + VReg ? VReg : SrcReg, RC, SpillSize, RI, *MFI, HasVectorSuperClass); MFI->setHasSpilledVGPRs(); BuildMI(MBB, MI, DL, get(Opcode)) @@ -1807,21 +1814,28 @@ } } -static unsigned getWWMRegSpillRestoreOpcode(unsigned Size) { +static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, + bool HasVectorSuperClass) { // Currently, there is only 32-bit WWM register spills needed. if (Size != 4) llvm_unreachable("unknown wwm register spill size"); + // Return the AV pseudo opcode for gfx908+. + if (HasVectorSuperClass) + return AMDGPU::SI_SPILL_WWM_AV32_RESTORE; + return AMDGPU::SI_SPILL_WWM_V32_RESTORE; } -static unsigned -getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, - unsigned Size, const SIRegisterInfo &TRI, - const SIMachineFunctionInfo &MFI) { +static unsigned getVectorRegSpillRestoreOpcode(Register Reg, + const TargetRegisterClass *RC, + unsigned Size, + const SIRegisterInfo &TRI, + const SIMachineFunctionInfo &MFI, + bool HasVectorSuperClass) { // Choose the right opcode if restoring a WWM register. if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG)) - return getWWMRegSpillRestoreOpcode(Size); + return getWWMRegSpillRestoreOpcode(Size, HasVectorSuperClass); if (TRI.isVectorSuperClass(RC)) return getAVSpillRestoreOpcode(Size); @@ -1873,8 +1887,9 @@ return; } - unsigned Opcode = getVectorRegSpillRestoreOpcode(VReg ? VReg : DestReg, RC, - SpillSize, RI, *MFI); + bool HasVectorSuperClass = MF->getSubtarget().hasMAIInsts(); + unsigned Opcode = getVectorRegSpillRestoreOpcode( + VReg ? VReg : DestReg, RC, SpillSize, RI, *MFI, HasVectorSuperClass); BuildMI(MBB, MI, DL, get(Opcode), DestReg) .addFrameIndex(FrameIndex) // vaddr .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -940,8 +940,10 @@ defm SI_SPILL_AV512 : SI_SPILL_VGPR ; defm SI_SPILL_AV1024 : SI_SPILL_VGPR ; -let isConvergent = 1 in -defm SI_SPILL_WWM_V32 : SI_SPILL_VGPR ; +let isConvergent = 1 in { + defm SI_SPILL_WWM_V32 : SI_SPILL_VGPR ; + defm SI_SPILL_WWM_AV32 : SI_SPILL_VGPR ; +} def SI_PC_ADD_REL_OFFSET : SPseudoInstSI < (outs SReg_64:$dst), diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1065,6 +1065,8 @@ case AMDGPU::SI_SPILL_AV32_RESTORE: case AMDGPU::SI_SPILL_WWM_V32_SAVE: case AMDGPU::SI_SPILL_WWM_V32_RESTORE: + case AMDGPU::SI_SPILL_WWM_AV32_SAVE: + case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: return 1; default: llvm_unreachable("Invalid spill opcode"); } @@ -2144,7 +2146,8 @@ case AMDGPU::SI_SPILL_AV96_SAVE: case AMDGPU::SI_SPILL_AV64_SAVE: case AMDGPU::SI_SPILL_AV32_SAVE: - case AMDGPU::SI_SPILL_WWM_V32_SAVE: { + case AMDGPU::SI_SPILL_WWM_V32_SAVE: + case AMDGPU::SI_SPILL_WWM_AV32_SAVE: { const MachineOperand *VData = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == @@ -2211,7 +2214,8 @@ case AMDGPU::SI_SPILL_AV384_RESTORE: case AMDGPU::SI_SPILL_AV512_RESTORE: case AMDGPU::SI_SPILL_AV1024_RESTORE: - case AMDGPU::SI_SPILL_WWM_V32_RESTORE: { + case AMDGPU::SI_SPILL_WWM_V32_RESTORE: + case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: { const MachineOperand *VData = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll --- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll @@ -1,4 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --stop-after=greedy,1 --verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX906-GREEDY %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 --stop-after=greedy,1 --verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX908-GREEDY %s + ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -O0 --verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN-O0 %s @@ -13,6 +16,18 @@ ; are whole-wave operations and hence the VGPRs involved in such operations require whole-wave spilling. define void @test() #0 { +; GFX906-GREEDY-LABEL: name: test +; GFX906-GREEDY: bb.0 (%ir-block.0): +; GFX906-GREEDY: %[[WWMREG:[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr16, 0, %[[WWMREG]] +; GFX906-GREEDY: SI_SPILL_WWM_V32_SAVE %[[WWMREG]], {{.*}}, $sgpr32, 0, implicit $exec :: (store (s32) into {{.*}}, addrspace 5) +; GFX906-GREEDY: %{{[0-9]+}}:vgpr_32 = SI_SPILL_WWM_V32_RESTORE {{.*}}, $sgpr32, 0, implicit $exec :: (load (s32) from {{.*}}, addrspace 5) + +; GFX908-GREEDY-LABEL: name: test +; GFX908-GREEDY: bb.0 (%ir-block.0): +; GFX908-GREEDY: %[[WWMREG:[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr16, 0, %[[WWMREG]] +; GFX908-GREEDY: SI_SPILL_WWM_AV32_SAVE %[[WWMREG]], {{.*}}, $sgpr32, 0, implicit $exec :: (store (s32) into {{.*}}, addrspace 5) +; GFX908-GREEDY: %{{[0-9]+}}:vgpr_32 = SI_SPILL_WWM_AV32_RESTORE {{.*}}, $sgpr32, 0, implicit $exec :: (load (s32) from {{.*}}, addrspace 5) + ; GCN-LABEL: test: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)