Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -118,6 +118,11 @@ "true", "Support flat address space">; +def FeatureXNACK : SubtargetFeature<"xnack", + "EnableXNACK", + "true", + "Enable XNACK support">; + def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", "EnableVGPRSpilling", "true", Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -417,13 +417,13 @@ } } - if (VCCUsed || FlatUsed) + if (VCCUsed || FlatUsed || STM.isXNACKEnabled()) { MaxSGPR += 2; - if (FlatUsed) { - MaxSGPR += 2; - // 2 additional for VI+. - if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (FlatUsed) + MaxSGPR += 2; + + if (STM.isXNACKEnabled()) MaxSGPR += 2; } Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -76,6 +76,7 @@ bool EnableIfCvt; bool EnableLoadStoreOpt; bool EnableUnsafeDSOffsetFolding; + bool EnableXNACK; unsigned WavefrontSize; bool CFALUBug; int LocalMemorySize; @@ -290,6 +291,10 @@ } bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const; + bool isXNACKEnabled() const { + return EnableXNACK; + } + unsigned getMaxWavesPerCU() const { if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) return 10; Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -73,6 +73,7 @@ CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false), EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false), + EnableXNACK(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -37,13 +37,17 @@ const AMDGPUSubtarget &ST = MF.getSubtarget(); if (ST.hasSGPRInitBug()) { unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4; + if (ST.isXNACKEnabled()) + BaseIdx -= 4; + unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); } if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - // 98/99 need to be reserved for flat_scr, and 100/101 for vcc. This is the - // next sgpr128 down. + // 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and + // 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down + // either way. return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95; } @@ -54,13 +58,25 @@ const MachineFunction &MF) const { const AMDGPUSubtarget &ST = MF.getSubtarget(); if (ST.hasSGPRInitBug()) { - unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5; + unsigned Idx; + + if (!ST.isXNACKEnabled()) + Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5; + else + Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1; + return AMDGPU::SGPR_32RegClass.getRegister(Idx); } if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - // Next register before reservations for flat_scr and vcc. - return AMDGPU::SGPR97; + if (!ST.isXNACKEnabled()) { + // Next register before reservations for flat_scr and vcc. + return AMDGPU::SGPR97; + } else { + // Next register before reservations for flat_scr, xnack_mask, vcc, + // and scratch resource. + return AMDGPU::SGPR91; + } } return AMDGPU::SGPR95; @@ -86,6 +102,9 @@ // for VCC/FLAT_SCR. reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99); reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101); + + if (ST.isXNACKEnabled()) + reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97); } // Tonga and Iceland can only allocate a fixed number of SGPRs due @@ -93,9 +112,11 @@ if (ST.hasSGPRInitBug()) { unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs). - // Assume XNACK_MASK is unused. unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4; + if (ST.isXNACKEnabled()) + Limit -= 2; + for (unsigned i = Limit; i < NumSGPRs; ++i) { unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); reserveRegisterTuples(Reserved, Reg); Index: test/CodeGen/AMDGPU/flat-scratch-reg.ll =================================================================== --- test/CodeGen/AMDGPU/flat-scratch-reg.ll +++ test/CodeGen/AMDGPU/flat-scratch-reg.ll @@ -1,8 +1,10 @@ -; RUN: llc < %s -march=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=CI -; RUN: llc < %s -march=amdgcn -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI +; RUN: llc < %s -march=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=CI --check-prefix=NO-XNACK +; RUN: llc < %s -march=amdgcn -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI --check-prefix=NO-XNACK +; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=XNACK ; GCN-LABEL: {{^}}no_vcc_no_flat: -; GCN: ; NumSgprs: 8 +; NO-XNACK: ; NumSgprs: 8 +; XNACK: ; NumSgprs: 12 define void @no_vcc_no_flat() { entry: call void asm sideeffect "", "~{SGPR7}"() @@ -10,7 +12,8 @@ } ; GCN-LABEL: {{^}}vcc_no_flat: -; GCN: ; NumSgprs: 10 +; NO-XNACK: ; NumSgprs: 10 +; XNACK: ; NumSgprs: 12 define void @vcc_no_flat() { entry: call void asm sideeffect "", "~{SGPR7},~{VCC}"() @@ -19,7 +22,8 @@ ; GCN-LABEL: {{^}}no_vcc_flat: ; CI: ; NumSgprs: 12 -; VI: ; NumSgprs: 14 +; VI: ; NumSgprs: 12 +; XNACK: ; NumSgprs: 14 define void @no_vcc_flat() { entry: call void asm sideeffect "", "~{SGPR7},~{FLAT_SCR}"() @@ -28,7 +32,8 @@ ; GCN-LABEL: {{^}}vcc_flat: ; CI: ; NumSgprs: 12 -; VI: ; NumSgprs: 14 +; VI: ; NumSgprs: 12 +; XNACK: ; NumSgprs: 14 define void @vcc_flat() { entry: call void asm sideeffect "", "~{SGPR7},~{VCC},~{FLAT_SCR}"()