diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -670,7 +670,9 @@ static bool isWWMRegSpillOpcode(uint16_t Opcode) { return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE || - Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE; + Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE || + Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE || + Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE; } static bool isDPP(const MachineInstr &MI) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1589,11 +1589,15 @@ } } -static unsigned getWWMRegSpillSaveOpcode(unsigned Size) { +static unsigned getWWMRegSpillSaveOpcode(unsigned Size, + bool IsVectorSuperClass) { // Currently, there is only 32-bit WWM register spills needed. if (Size != 4) llvm_unreachable("unknown wwm register spill size"); + if (IsVectorSuperClass) + return AMDGPU::SI_SPILL_WWM_AV32_SAVE; + return AMDGPU::SI_SPILL_WWM_V32_SAVE; } @@ -1602,11 +1606,13 @@ unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI) { + bool IsVectorSuperClass = TRI.isVectorSuperClass(RC); + // Choose the right opcode if spilling a WWM register. if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG)) - return getWWMRegSpillSaveOpcode(Size); + return getWWMRegSpillSaveOpcode(Size, IsVectorSuperClass); - if (TRI.isVectorSuperClass(RC)) + if (IsVectorSuperClass) return getAVSpillSaveOpcode(Size); return TRI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(Size) @@ -1809,11 +1815,15 @@ } } -static unsigned getWWMRegSpillRestoreOpcode(unsigned Size) { +static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, + bool IsVectorSuperClass) { // Currently, there is only 32-bit WWM register spills needed. if (Size != 4) llvm_unreachable("unknown wwm register spill size"); + if (IsVectorSuperClass) + return AMDGPU::SI_SPILL_WWM_AV32_RESTORE; + return AMDGPU::SI_SPILL_WWM_V32_RESTORE; } @@ -1821,11 +1831,13 @@ getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &MFI) { + bool IsVectorSuperClass = TRI.isVectorSuperClass(RC); + // Choose the right opcode if restoring a WWM register. if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG)) - return getWWMRegSpillRestoreOpcode(Size); + return getWWMRegSpillRestoreOpcode(Size, IsVectorSuperClass); - if (TRI.isVectorSuperClass(RC)) + if (IsVectorSuperClass) return getAVSpillRestoreOpcode(Size); return TRI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(Size) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -952,8 +952,10 @@ defm SI_SPILL_AV512 : SI_SPILL_VGPR ; defm SI_SPILL_AV1024 : SI_SPILL_VGPR ; -let isConvergent = 1 in -defm SI_SPILL_WWM_V32 : SI_SPILL_VGPR ; +let isConvergent = 1 in { + defm SI_SPILL_WWM_V32 : SI_SPILL_VGPR ; + defm SI_SPILL_WWM_AV32 : SI_SPILL_VGPR ; +} def SI_PC_ADD_REL_OFFSET : SPseudoInstSI < (outs SReg_64:$dst), diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1065,6 +1065,8 @@ case AMDGPU::SI_SPILL_AV32_RESTORE: case AMDGPU::SI_SPILL_WWM_V32_SAVE: case AMDGPU::SI_SPILL_WWM_V32_RESTORE: + case AMDGPU::SI_SPILL_WWM_AV32_SAVE: + case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: return 1; default: llvm_unreachable("Invalid spill opcode"); } @@ -2144,7 +2146,8 @@ case AMDGPU::SI_SPILL_AV96_SAVE: case AMDGPU::SI_SPILL_AV64_SAVE: case AMDGPU::SI_SPILL_AV32_SAVE: - case AMDGPU::SI_SPILL_WWM_V32_SAVE: { + case AMDGPU::SI_SPILL_WWM_V32_SAVE: + case AMDGPU::SI_SPILL_WWM_AV32_SAVE: { const MachineOperand *VData = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == @@ -2211,7 +2214,8 @@ case AMDGPU::SI_SPILL_AV384_RESTORE: case AMDGPU::SI_SPILL_AV512_RESTORE: case AMDGPU::SI_SPILL_AV1024_RESTORE: - case AMDGPU::SI_SPILL_WWM_V32_RESTORE: { + case AMDGPU::SI_SPILL_WWM_V32_RESTORE: + case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: { const MachineOperand *VData = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == diff --git a/llvm/test/CodeGen/AMDGPU/wwm-spill-superclass-pseudo.mir b/llvm/test/CodeGen/AMDGPU/wwm-spill-superclass-pseudo.mir --- a/llvm/test/CodeGen/AMDGPU/wwm-spill-superclass-pseudo.mir +++ b/llvm/test/CodeGen/AMDGPU/wwm-spill-superclass-pseudo.mir @@ -1,12 +1,5 @@ -# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=si-lower-sgpr-spills -stop-after=greedy,1 -verify-machineinstrs --stress-regalloc=2 %s -o /dev/null 2>&1 | FileCheck %s - -# This test would fail as there is no wwm-register spill pseudo instructions supported for the vector superclass (AV). -# Currently there is only VGPR_32 regclass spilling allowed for wwm-registers. - -# CHECK: Bad machine code: Illegal virtual register for instruction -# CHECK: instruction: {{.*}} [[AV_REG:%[0-9]+]]:av_32 = SI_SPILL_WWM_V32_RESTORE -# CHECK-NEXT: - operand 0: [[AV_REG]]:av_32 -# CHECK-NEXT: Expected a VGPR_32 register, but got a AV_32 register +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=si-lower-sgpr-spills -stop-after=greedy,1 -verify-machineinstrs --stress-regalloc=2 -o - %s | FileCheck -check-prefix GCN-REGALLOC %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=si-lower-sgpr-spills -stop-after=virtregrewriter,1 -verify-machineinstrs --stress-regalloc=2 -o - %s | FileCheck -check-prefix GCN-REWRITER %s name: test_wwm_reg_superclass_spill tracksRegLiveness: true @@ -19,6 +12,11 @@ sgprForEXECCopy: '$sgpr100_sgpr101' body: | bb.0: + ; GCN-REGALLOC-NUM-2: %{{[0-9]+}}:av_32 = SI_SPILL_WWM_AV32_RESTORE + ; GCN-REGALLOC: S_ENDPGM 0 + ; + ; GCN-REWRITER-NUM-2: renamable $vgpr0 = SI_SPILL_WWM_AV32_RESTORE + ; GCN-REWRITER: S_ENDPGM 0 $vgpr0 = IMPLICIT_DEF $sgpr0_sgpr1 = IMPLICIT_DEF %temp0:vgpr_32(s32) = COPY $vgpr0