diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -752,6 +752,19 @@ ET_INVALID = 255, }; +enum Format : unsigned { + SPI_SHADER_ZERO = 0x0, + SPI_SHADER_32_R = 0x1, + SPI_SHADER_32_GR = 0x2, + SPI_SHADER_32_AR = 0x3, + SPI_SHADER_FP16_ABGR = 0x4, + SPI_SHADER_UNORM16_ABGR = 0x5, + SPI_SHADER_SNORM16_ABGR = 0x6, + SPI_SHADER_UINT16_ABGR = 0x7, + SPI_SHADER_SINT16_ABGR = 0x8, + SPI_SHADER_32_ABGR = 0x9, +}; + } // namespace Exp namespace VOP3PEncoding { diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp --- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp @@ -67,9 +67,13 @@ static void generateEndPgm(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, - const SIInstrInfo *TII, bool IsPS) { + const SIInstrInfo *TII, MachineFunction &MF) { + const Function &F = MF.getFunction(); + bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS; + bool HasExports = AMDGPU::getPSColFormat(F) != AMDGPU::Exp::SPI_SHADER_ZERO || + AMDGPU::getPSZFormat(F) != AMDGPU::Exp::SPI_SHADER_ZERO; // "null export" - if (IsPS) { + if (IsPS && HasExports) { BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE)) .addImm(AMDGPU::Exp::ET_NULL) .addReg(AMDGPU::VGPR0, RegState::Undef) @@ -168,8 +172,7 @@ BuildMI(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII->get(MovOpc), ExecReg) .addImm(0); - generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, - MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS); + generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, MF); for (MachineInstr *Instr : EarlyTermInstrs) { // Early termination in GS does nothing diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -689,6 +689,10 @@ unsigned getInitialPSInputAddr(const Function &F); +unsigned getPSColFormat(const Function &F); + +unsigned getPSZFormat(const Function &F); + LLVM_READNONE bool isShader(CallingConv::ID CC); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1344,6 +1344,15 @@ return getIntegerAttribute(F, "InitialPSInputAddr", 0); } +unsigned getPSColFormat(const Function &F) { + // As a safe default always respond as if there is one export. + return getIntegerAttribute(F, "PSColFormat", Exp::SPI_SHADER_32_ABGR); +} + +unsigned getPSZFormat(const Function &F) { + return getIntegerAttribute(F, "PSZFormat", Exp::SPI_SHADER_ZERO); +} + bool isShader(CallingConv::ID cc) { switch(cc) { case CallingConv::AMDGPU_VS: diff --git a/llvm/test/CodeGen/AMDGPU/early-term.mir b/llvm/test/CodeGen/AMDGPU/early-term.mir --- a/llvm/test/CodeGen/AMDGPU/early-term.mir +++ b/llvm/test/CodeGen/AMDGPU/early-term.mir @@ -21,6 +21,12 @@ define amdgpu_cs void @early_term_scc0_cs() { ret void } + + define amdgpu_ps void @early_term_no_export() #0 { + ret void + } + + attributes #0 = { "PSColFormat"="0" "PSZFormat"="0" } ... --- @@ -164,6 +170,9 @@ ; GFX10: bb.1: ; GFX10: liveins: $vgpr0 ; GFX10: S_ENDPGM 0 + ; GFX10: bb.2: + ; GFX10: $exec_lo = S_MOV_B32 0 + ; GFX10: S_ENDPGM 0 bb.0: liveins: $sgpr0, $sgpr1 successors: %bb.1 @@ -209,3 +218,38 @@ liveins: $vgpr0 S_ENDPGM 0 ... + +--- +name: early_term_no_export +tracksRegLiveness: true +liveins: + - { reg: '$sgpr0' } + - { reg: '$sgpr1' } +body: | + ; GFX10-LABEL: name: early_term_no_export + ; GFX10: bb.0: + ; GFX10: successors: %bb.1(0x80000000), %bb.2(0x00000000) + ; GFX10: liveins: $sgpr0, $sgpr1 + ; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc + ; GFX10: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GFX10: bb.1: + ; GFX10: liveins: $vgpr0 + ; GFX10: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec + ; GFX10: S_ENDPGM 0 + ; GFX10: bb.2: + ; GFX10: $exec_lo = S_MOV_B32 0 + ; GFX10: S_ENDPGM 0 + bb.0: + liveins: $sgpr0, $sgpr1 + successors: %bb.1 + + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc + SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec + + bb.1: + liveins: $vgpr0 + EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec + S_ENDPGM 0 +...