diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -977,8 +977,10 @@ if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) { OutStreamer->emitInt32(R_00B02C_SPI_SHADER_PGM_RSRC2_PS); - OutStreamer->emitInt32( - S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks)); + unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11 + ? divideCeil(CurrentProgramInfo.LDSBlocks, 2) + : CurrentProgramInfo.LDSBlocks; + OutStreamer->emitInt32(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize)); OutStreamer->emitInt32(R_0286CC_SPI_PS_INPUT_ENA); OutStreamer->emitInt32(MFI->getPSInputEnable()); OutStreamer->emitInt32(R_0286D0_SPI_PS_INPUT_ADDR); @@ -1022,7 +1024,10 @@ // ScratchSize is in bytes, 16 aligned. MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16)); if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) { - MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks)); + unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11 + ? divideCeil(CurrentProgramInfo.LDSBlocks, 2) + : CurrentProgramInfo.LDSBlocks; + MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize)); MD->setSpiPsInputEna(MFI->getPSInputEnable()); MD->setSpiPsInputAddr(MFI->getPSInputAddr()); } diff --git a/llvm/test/CodeGen/AMDGPU/extra-lds-size.ll b/llvm/test/CodeGen/AMDGPU/extra-lds-size.ll --- a/llvm/test/CodeGen/AMDGPU/extra-lds-size.ll +++ b/llvm/test/CodeGen/AMDGPU/extra-lds-size.ll @@ -1,5 +1,7 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10-PAL %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10-MESA %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11-PAL %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11-MESA %s ; Check EXTRA_LDS_SIZE in SPI_SHADER_PGM_RSRC2_PS. @@ -8,6 +10,11 @@ ; GFX10-MESA: .long 45100 ; GFX10-MESA-NEXT: .long 2048 +; GFX11-PAL: 0x2c0b (SPI_SHADER_PGM_RSRC2_PS): 0x400 + +; GFX11-MESA: .long 45100 +; GFX11-MESA-NEXT: .long 1024 + @lds = internal addrspace(3) global [4096 x i8] undef define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs(i32 %voffset) {