Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -327,12 +327,6 @@ "Enable private/scratch buffer sizes greater than 128 GB" >; -def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", - "EnableVGPRSpilling", - "true", - "Enable spilling of VGPRs to scratch memory" ->; - def FeatureDumpCode : SubtargetFeature <"DumpCode", "DumpCode", "true", Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1008,7 +1008,6 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo) { - const GCNSubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv()); @@ -1029,10 +1028,9 @@ OutStreamer->EmitIntValue(RsrcReg, 4); OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) | S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4); - if (STM.isVGPRSpillingEnabled(MF.getFunction())) { - OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4); - OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4); - } + OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4); + OutStreamer->EmitIntValue( + S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4); } if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) { Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -744,8 +744,6 @@ void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override; - bool isVGPRSpillingEnabled(const Function &F) const; - unsigned getMaxNumUserSGPRs() const { return 16; } Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -480,10 +480,6 @@ Policy.ShouldTrackLaneMasks = true; } -bool GCNSubtarget::isVGPRSpillingEnabled(const Function& F) const { - return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); -} - unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { if (SGPRs <= 80) Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -908,16 +908,6 @@ return; } - if (!ST.isVGPRSpillingEnabled(MF->getFunction())) { - LLVMContext &Ctx = MF->getFunction().getContext(); - Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to" - " spill register"); - BuildMI(MBB, MI, DL, get(AMDGPU::KILL)) - .addReg(SrcReg); - - return; - } - assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize); @@ -1010,15 +1000,6 @@ return; } - if (!ST.isVGPRSpillingEnabled(MF->getFunction())) { - LLVMContext &Ctx = MF->getFunction().getContext(); - Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to" - " restore register"); - BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg); - - return; - } - assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize); Index: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -117,7 +117,6 @@ } const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); - bool MaySpill = ST.isVGPRSpillingEnabled(F); bool HasStackObjects = FrameInfo.hasStackObjects(); if (isEntryFunction()) { @@ -126,21 +125,18 @@ if (WorkItemIDZ) WorkItemIDY = true; - if (HasStackObjects || MaySpill) { - PrivateSegmentWaveByteOffset = true; + PrivateSegmentWaveByteOffset = true; // HS and GS always have the scratch wave offset in SGPR5 on GFX9. if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) - ArgInfo.PrivateSegmentWaveByteOffset - = ArgDescriptor::createRegister(AMDGPU::SGPR5); - } + ArgInfo.PrivateSegmentWaveByteOffset = + ArgDescriptor::createRegister(AMDGPU::SGPR5); } bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); if (isAmdHsaOrMesa) { - if (HasStackObjects || MaySpill) - PrivateSegmentBuffer = true; + PrivateSegmentBuffer = true; if (F.hasFnAttribute("amdgpu-dispatch-ptr")) DispatchPtr = true; @@ -151,8 +147,7 @@ if (F.hasFnAttribute("amdgpu-dispatch-id")) DispatchID = true; } else if (ST.isMesaGfxShader(F)) { - if (HasStackObjects || MaySpill) - ImplicitBufferPtr = true; + ImplicitBufferPtr = true; } if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr")) Index: test/CodeGen/AMDGPU/local-stack-slot-offset.ll =================================================================== --- test/CodeGen/AMDGPU/local-stack-slot-offset.ll +++ test/CodeGen/AMDGPU/local-stack-slot-offset.ll @@ -1,5 +1,5 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -mattr=+vgpr-spilling -mattr=-promote-alloca -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=+vgpr-spilling -mattr=-promote-alloca -verify-machineinstrs | FileCheck %s -check-prefix=CHECK +;RUN: llc < %s -march=amdgcn -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs | FileCheck %s -check-prefix=CHECK +;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca -verify-machineinstrs | FileCheck %s -check-prefix=CHECK ; Allocate two stack slots of 2052 bytes each requiring a total of 4104 bytes. ; Extracting the last element of each does not fit into the offset field of Index: test/CodeGen/AMDGPU/noop-shader-O0.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/noop-shader-O0.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; Ensure NOOP shaders compile at OptNone. + +; Confirm registers reserved in SIMachineFunctionInfo are those expected during +; lowering, even when e.g. spilling is required due to being at OptNone. + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target triple = "amdgcn-amd-amdpal" + +define amdgpu_vs void @noop_vs() { +; GCN-LABEL: noop_vs: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_endpgm +entry: + ret void +} + +define amdgpu_ls void @noop_ls() { +; GCN-LABEL: noop_ls: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_endpgm +entry: + ret void +} + +define amdgpu_hs void @noop_hs() { +; GCN-LABEL: noop_hs: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_endpgm +entry: + ret void +} + +define amdgpu_es void @noop_es() { +; GCN-LABEL: noop_es: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_endpgm +entry: + ret void +} + +define amdgpu_gs void @noop_gs() { +; GCN-LABEL: noop_gs: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_endpgm +entry: + ret void +} + +define amdgpu_ps void @noop_ps() { +; GCN-LABEL: noop_ps: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_endpgm +entry: + ret void +} + +define amdgpu_cs void @noop_cs() { +; GCN-LABEL: noop_cs: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_endpgm +entry: + ret void +} Index: test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll +++ test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -march=amdgcn -verify-machineinstrs -mattr=+vgpr-spilling < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+vgpr-spilling < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare void @llvm.amdgcn.s.barrier() nounwind convergent Index: test/CodeGen/AMDGPU/scratch-simple.ll =================================================================== --- test/CodeGen/AMDGPU/scratch-simple.ll +++ test/CodeGen/AMDGPU/scratch-simple.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx803 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx803 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s ; This used to fail due to a v_add_i32 instruction with an illegal immediate ; operand that was created during Local Stack Slot Allocation. Test case derived Index: test/CodeGen/AMDGPU/selected-stack-object.ll =================================================================== --- test/CodeGen/AMDGPU/selected-stack-object.ll +++ /dev/null @@ -1,15 +0,0 @@ -; "Assertion failure" should be caught with both XFAIL * and +Asserts. -; XFAIL: * -; REQUIRES: asserts - -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s - -; See also local-stack-slot-bug.ll -; This fails because a stack object is created during instruction selection. - -; CHECK-LABEL: {{^}}main: -define amdgpu_ps float @main(i32 %idx) { -main_body: - %v1 = extractelement <81 x float> , i32 %idx - ret float %v1 -} Index: test/CodeGen/AMDGPU/si-sgpr-spill.ll =================================================================== --- test/CodeGen/AMDGPU/si-sgpr-spill.ll +++ test/CodeGen/AMDGPU/si-sgpr-spill.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=TOVGPR %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling,-mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=TOVGPR %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; These tests check that the compiler won't crash when it needs to spill ; SGPRs. Index: test/CodeGen/AMDGPU/spill-m0.ll =================================================================== --- test/CodeGen/AMDGPU/spill-m0.ll +++ test/CodeGen/AMDGPU/spill-m0.ll @@ -1,8 +1,8 @@ -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -amdgpu-spill-sgpr-to-smem=0 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -amdgpu-spill-sgpr-to-smem=0 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -amdgpu-spill-sgpr-to-smem=1 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -amdgpu-spill-sgpr-to-smem=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -amdgpu-spill-sgpr-to-smem=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -amdgpu-spill-sgpr-to-smem=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=GCN %s ; XXX - Why does it like to use vcc? Index: test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll =================================================================== --- test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll +++ test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s -; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s +; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s ; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s ; This ends up using all 256 registers and requires register Index: test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll =================================================================== --- test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll +++ test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; This ends up using all 255 registers and requires register ; scavenging which will fail to find an unsued register.