Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -276,6 +276,10 @@ bool ReturnsVoid = true; std::optional ArgInfo; + + unsigned PSInputAddr = 0; + unsigned PSInputEnable = 0; + SIMode Mode; std::optional ScavengeFI; StringValue VGPRForAGPRCopy; @@ -312,6 +316,8 @@ YamlIO.mapOptional("bytesInStackArgArea", MFI.BytesInStackArgArea, 0u); YamlIO.mapOptional("returnsVoid", MFI.ReturnsVoid, true); YamlIO.mapOptional("argumentInfo", MFI.ArgInfo); + YamlIO.mapOptional("psInputAddr", MFI.PSInputAddr, 0u); + YamlIO.mapOptional("psInputEnable", MFI.PSInputEnable, 0u); YamlIO.mapOptional("mode", MFI.Mode, SIMode()); YamlIO.mapOptional("highBitsOf32BitAddress", MFI.HighBitsOf32BitAddress, 0u); Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -638,7 +638,10 @@ StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)), BytesInStackArgArea(MFI.getBytesInStackArgArea()), ReturnsVoid(MFI.returnsVoid()), - ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) { + ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), + PSInputAddr(MFI.getPSInputAddr()), + PSInputEnable(MFI.getPSInputEnable()), + Mode(MFI.getMode()) { for (Register Reg : MFI.getWWMReservedRegs()) WWMReservedRegs.push_back(regToString(Reg, TRI)); @@ -661,6 +664,8 @@ LDSSize = YamlMFI.LDSSize; GDSSize = YamlMFI.GDSSize; DynLDSAlign = YamlMFI.DynLDSAlign; + PSInputAddr = YamlMFI.PSInputAddr; + PSInputEnable = YamlMFI.PSInputEnable; HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; Occupancy = YamlMFI.Occupancy; IsEntryFunction = YamlMFI.IsEntryFunction; Index: llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll =================================================================== --- llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll +++ llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll @@ -26,6 +26,8 @@ ; AFTER-PEI-NEXT: workGroupIDX: { reg: '$sgpr6' } ; AFTER-PEI-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } ; AFTER-PEI-NEXT: workItemIDX: { reg: '$vgpr0' } +; AFTER-PEI-NEXT: psInputAddr: 0 +; AFTER-PEI-NEXT: psInputEnable: 0 ; AFTER-PEI-NEXT: mode: ; AFTER-PEI-NEXT: ieee: true ; AFTER-PEI-NEXT: dx10-clamp: true Index: llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir =================================================================== --- llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir +++ llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir @@ -36,6 +36,8 @@ # FULL-NEXT: workItemIDX: { reg: '$vgpr0' } # FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } # FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } +# FULL-NEXT: psInputAddr: 0 +# FULL-NEXT: psInputEnable: 0 # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true @@ -134,6 +136,8 @@ # FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } # FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } +# FULL-NEXT: psInputAddr: 0 +# FULL-NEXT: psInputEnable: 0 # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true @@ -203,6 +207,8 @@ # FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } # FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } +# FULL-NEXT: psInputAddr: 0 +# FULL-NEXT: psInputEnable: 0 # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true @@ -273,6 +279,8 @@ # FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } # FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } +# FULL-NEXT: psInputAddr: 0 +# FULL-NEXT: psInputEnable: 0 # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true Index: llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll =================================================================== --- llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll +++ llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll @@ -30,6 +30,8 @@ ; CHECK-NEXT: workGroupIDX: { reg: '$sgpr6' } ; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } ; CHECK-NEXT: workItemIDX: { reg: '$vgpr0' } +; CHECK-NEXT: psInputAddr: 0 +; CHECK-NEXT: psInputEnable: 0 ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: true @@ -70,6 +72,8 @@ ; CHECK-NEXT: argumentInfo: ; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr3' } ; CHECK-NEXT: implicitBufferPtr: { reg: '$sgpr0_sgpr1' } +; CHECK-NEXT: psInputAddr: 1 +; CHECK-NEXT: psInputEnable: 1 ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: false ; CHECK-NEXT: dx10-clamp: true @@ -87,6 +91,16 @@ ret void } +; CHECK-LABEL: {{^}}name: ps_shader_ps_input_enable +; CHECK: machineFunctionInfo: +; CHECK: psInputAddr: 36983 +; CHECK-NEXT: psInputEnable: 1{{$}} +define amdgpu_ps void @ps_shader_ps_input_enable(i32 %arg0, i32 inreg %arg1) #7 { + %gep = getelementptr inbounds [128 x i32], ptr addrspace(2) @gds, i32 0, i32 %arg0 + atomicrmw add ptr addrspace(2) %gep, i32 8 seq_cst + ret void +} + ; CHECK-LABEL: {{^}}name: gds_size_shader ; CHECK: gdsSize: 4096 define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 { @@ -124,6 +138,8 @@ ; CHECK-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } ; CHECK-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } ; CHECK-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } +; CHECK-NEXT: psInputAddr: 0 +; CHECK-NEXT: psInputEnable: 0 ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: true @@ -170,6 +186,8 @@ ; CHECK-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } ; CHECK-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } ; CHECK-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } +; CHECK-NEXT: psInputAddr: 0 +; CHECK-NEXT: psInputEnable: 0 ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: true @@ -251,3 +269,4 @@ attributes #4 = { "amdgpu-32bit-address-high-bits"="0xffff8000" } attributes #5 = { "amdgpu-gds-size"="4096" } attributes #6 = { convergent nounwind readnone willreturn } +attributes #7 = { "InitialPSInputAddr"="36983" }