Index: llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -65,7 +65,7 @@ unsigned getMaxKernArgAlign() const { return MaxKernArgAlign.value(); } - unsigned getLDSSize() const { + uint32_t getLDSSize() const { return LDSSize; } Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -270,7 +270,8 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { uint64_t ExplicitKernArgSize = 0; unsigned MaxKernArgAlign = 0; - unsigned LDSSize = 0; + uint32_t LDSSize = 0; + uint32_t GDSSize = 0; Align DynLDSAlign; bool IsEntryFunction = false; bool NoSignedZerosFPMath = false; @@ -308,6 +309,7 @@ UINT64_C(0)); YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u); YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u); + YamlIO.mapOptional("gdsSize", MFI.GDSSize, 0u); YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align()); YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false); YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false); Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -578,6 +578,7 @@ const llvm::MachineFunction &MF) : ExplicitKernArgSize(MFI.getExplicitKernArgSize()), MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()), + GDSSize(MFI.getGDSSize()), DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()), NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()), MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()), @@ -607,6 +608,7 @@ ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize; MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign); LDSSize = YamlMFI.LDSSize; + GDSSize = YamlMFI.GDSSize; DynLDSAlign = YamlMFI.DynLDSAlign; HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; Occupancy = YamlMFI.Occupancy; Index: llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll =================================================================== --- llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll +++ llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll @@ -7,6 +7,7 @@ ; AFTER-PEI-NEXT: explicitKernArgSize: 12 ; AFTER-PEI-NEXT: maxKernArgAlign: 8 ; AFTER-PEI-NEXT: ldsSize: 0 +; AFTER-PEI-NEXT: gdsSize: 0 ; AFTER-PEI-NEXT: dynLDSAlign: 1 ; AFTER-PEI-NEXT: isEntryFunction: true ; AFTER-PEI-NEXT: noSignedZerosFPMath: false Index: llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir =================================================================== --- llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir +++ llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir @@ -8,6 +8,7 @@ # FULL-NEXT: explicitKernArgSize: 128 # FULL-NEXT: maxKernArgAlign: 64 # FULL-NEXT: ldsSize: 2048 +# FULL-NEXT: gdsSize: 256 # FULL-NEXT: dynLDSAlign: 1 # FULL-NEXT: isEntryFunction: true # FULL-NEXT: noSignedZerosFPMath: false @@ -47,6 +48,7 @@ # SIMPLE-NEXT: explicitKernArgSize: 128 # SIMPLE-NEXT: maxKernArgAlign: 64 # SIMPLE-NEXT: ldsSize: 2048 +# SIMPLE-NEXT: gdsSize: 256 # SIMPLE-NEXT: isEntryFunction: true # SIMPLE-NEXT: memoryBound: true # SIMPLE-NEXT: waveLimiter: true @@ -74,6 +76,7 @@ explicitKernArgSize: 128 maxKernArgAlign: 64 ldsSize: 2048 + gdsSize: 256 isEntryFunction: true noSignedZerosFPMath: false memoryBound: true @@ -100,6 +103,7 @@ # FULL-NEXT: explicitKernArgSize: 0 # FULL-NEXT: maxKernArgAlign: 1 # FULL-NEXT: ldsSize: 0 +# FULL-NEXT: gdsSize: 0 # FULL-NEXT: dynLDSAlign: 1 # FULL-NEXT: isEntryFunction: false # FULL-NEXT: noSignedZerosFPMath: false @@ -163,6 +167,7 @@ # FULL-NEXT: explicitKernArgSize: 0 # FULL-NEXT: maxKernArgAlign: 1 # FULL-NEXT: ldsSize: 0 +# FULL-NEXT: gdsSize: 0 # FULL-NEXT: dynLDSAlign: 1 # FULL-NEXT: isEntryFunction: false # FULL-NEXT: noSignedZerosFPMath: false @@ -227,6 +232,7 @@ # FULL-NEXT: explicitKernArgSize: 0 # FULL-NEXT: maxKernArgAlign: 1 # FULL-NEXT: ldsSize: 0 +# FULL-NEXT: gdsSize: 0 # FULL-NEXT: dynLDSAlign: 1 # FULL-NEXT: isEntryFunction: true # FULL-NEXT: noSignedZerosFPMath: false @@ -387,6 +393,7 @@ # ALL-LABEL: name: dyn_lds_with_alignment # FULL: ldsSize: 0 +# FULL-NEXT: gdsSize: 0 # FULL-NEXT: dynLDSAlign: 8 # SIMPLE: dynLDSAlign: 8 Index: llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll =================================================================== --- llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll +++ llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll @@ -11,6 +11,7 @@ ; CHECK-NEXT: explicitKernArgSize: 128 ; CHECK-NEXT: maxKernArgAlign: 64 ; CHECK-NEXT: ldsSize: 2048 +; CHECK-NEXT: gdsSize: 0 ; CHECK-NEXT: dynLDSAlign: 1 ; CHECK-NEXT: isEntryFunction: true ; CHECK-NEXT: noSignedZerosFPMath: false @@ -43,11 +44,14 @@ ret void } +@gds = addrspace(2) global [128 x i32] undef, align 4 + ; CHECK-LABEL: {{^}}name: ps_shader ; CHECK: machineFunctionInfo: ; CHECK-NEXT: explicitKernArgSize: 0 ; CHECK-NEXT: maxKernArgAlign: 4 ; CHECK-NEXT: ldsSize: 0 +; CHECK-NEXT: gdsSize: 0 ; CHECK-NEXT: dynLDSAlign: 1 ; CHECK-NEXT: isEntryFunction: true ; CHECK-NEXT: noSignedZerosFPMath: false @@ -75,11 +79,18 @@ ret void } +; CHECK-LABEL: {{^}}name: gds_size_shader +; CHECK: gdsSize: 4096 +define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 { + ret void +} + ; CHECK-LABEL: {{^}}name: function ; CHECK: machineFunctionInfo: ; CHECK-NEXT: explicitKernArgSize: 0 ; CHECK-NEXT: maxKernArgAlign: 1 ; CHECK-NEXT: ldsSize: 0 +; CHECK-NEXT: gdsSize: 0 ; CHECK-NEXT: dynLDSAlign: 1 ; CHECK-NEXT: isEntryFunction: false ; CHECK-NEXT: noSignedZerosFPMath: false @@ -121,6 +132,7 @@ ; CHECK-NEXT: explicitKernArgSize: 0 ; CHECK-NEXT: maxKernArgAlign: 1 ; CHECK-NEXT: ldsSize: 0 +; CHECK-NEXT: gdsSize: 0 ; CHECK-NEXT: dynLDSAlign: 1 ; CHECK-NEXT: isEntryFunction: false ; CHECK-NEXT: noSignedZerosFPMath: true @@ -214,11 +226,12 @@ ret void } -declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #5 +declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #6 attributes #0 = { "no-signed-zeros-fp-math" = "true" } attributes #1 = { "amdgpu-dx10-clamp" = "false" } attributes #2 = { "amdgpu-ieee" = "false" } attributes #3 = { "amdgpu-dx10-clamp" = "false" "amdgpu-ieee" = "false" } attributes #4 = { "amdgpu-32bit-address-high-bits"="0xffff8000" } -attributes #5 = { convergent nounwind readnone willreturn } +attributes #5 = { "amdgpu-gds-size"="4096" } +attributes #6 = { convergent nounwind readnone willreturn }