diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp --- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp @@ -67,9 +67,19 @@ static void generateEndPgm(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, - const SIInstrInfo *TII, bool IsPS) { - // "null export" - if (IsPS) { + const SIInstrInfo *TII, MachineFunction &MF) { + const Function &F = MF.getFunction(); + bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS; + + // Check if hardware has been configured to expect color or depth exports. + bool HasExports = + AMDGPU::getHasColorExport(F) || AMDGPU::getHasDepthExport(F); + + // Prior to GFX10, hardware always expects at least one export for PS. + bool MustExport = !AMDGPU::isGFX10Plus(TII->getSubtarget()); + + if (IsPS && (HasExports || MustExport)) { + // Generate "null export" if hardware is expecting PS to export. BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE)) .addImm(AMDGPU::Exp::ET_NULL) .addReg(AMDGPU::VGPR0, RegState::Undef) @@ -80,6 +90,7 @@ .addImm(0) // compr .addImm(0); // en } + // s_endpgm BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0); } @@ -168,8 +179,7 @@ BuildMI(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII->get(MovOpc), ExecReg) .addImm(0); - generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, - MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS); + generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, MF); for (MachineInstr *Instr : EarlyTermInstrs) { // Early termination in GS does nothing diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -689,6 +689,10 @@ unsigned getInitialPSInputAddr(const Function &F); +bool getHasColorExport(const Function &F); + +bool getHasDepthExport(const Function &F); + LLVM_READNONE bool isShader(CallingConv::ID CC); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1344,6 +1344,17 @@ return getIntegerAttribute(F, "InitialPSInputAddr", 0); } +bool getHasColorExport(const Function &F) { + // As a safe default always respond as if PS has color exports. + return getIntegerAttribute( + F, "amdgpu-color-export", + F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0; +} + +bool getHasDepthExport(const Function &F) { + return getIntegerAttribute(F, "amdgpu-depth-export", 0) != 0; +} + bool isShader(CallingConv::ID cc) { switch(cc) { case CallingConv::AMDGPU_VS: diff --git a/llvm/test/CodeGen/AMDGPU/early-term.mir b/llvm/test/CodeGen/AMDGPU/early-term.mir --- a/llvm/test/CodeGen/AMDGPU/early-term.mir +++ b/llvm/test/CodeGen/AMDGPU/early-term.mir @@ -1,5 +1,5 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s --- | define amdgpu_ps void @early_term_scc0_end_block() { @@ -21,6 +21,12 @@ define amdgpu_cs void @early_term_scc0_cs() { ret void } + + define amdgpu_ps void @early_term_no_export() #0 { + ret void + } + + attributes #0 = { "amdgpu-color-export"="0" "amdgpu-depth-export"="0" } ... --- @@ -30,21 +36,21 @@ - { reg: '$sgpr0' } - { reg: '$sgpr1' } body: | - ; GFX10-LABEL: name: early_term_scc0_end_block - ; GFX10: bb.0: - ; GFX10: successors: %bb.1(0x80000000), %bb.2(0x00000000) - ; GFX10: liveins: $sgpr0, $sgpr1 - ; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc - ; GFX10: S_CBRANCH_SCC0 %bb.2, implicit $scc - ; GFX10: bb.1: - ; GFX10: liveins: $vgpr0 - ; GFX10: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec - ; GFX10: S_ENDPGM 0 - ; GFX10: bb.2: - ; GFX10: $exec_lo = S_MOV_B32 0 - ; GFX10: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec - ; GFX10: S_ENDPGM 0 + ; GCN-LABEL: name: early_term_scc0_end_block + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000) + ; GCN: liveins: $sgpr0, $sgpr1 + ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN: bb.1: + ; GCN: liveins: $vgpr0 + ; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec + ; GCN: S_ENDPGM 0 + ; GCN: bb.2: + ; GCN: $exec = S_MOV_B64 0 + ; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec + ; GCN: S_ENDPGM 0 bb.0: liveins: $sgpr0, $sgpr1 successors: %bb.1 @@ -66,25 +72,25 @@ - { reg: '$sgpr0' } - { reg: '$sgpr1' } body: | - ; GFX10-LABEL: name: early_term_scc0_next_terminator - ; GFX10: bb.0: - ; GFX10: successors: %bb.2(0x80000000), %bb.3(0x00000000) - ; GFX10: liveins: $sgpr0, $sgpr1 - ; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc - ; GFX10: S_CBRANCH_SCC0 %bb.3, implicit $scc - ; GFX10: S_BRANCH %bb.2 - ; GFX10: bb.1: - ; GFX10: successors: %bb.2(0x80000000) - ; GFX10: $vgpr0 = V_MOV_B32_e32 1, implicit $exec - ; GFX10: bb.2: - ; GFX10: liveins: $vgpr0 - ; GFX10: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec - ; GFX10: S_ENDPGM 0 - ; GFX10: bb.3: - ; GFX10: $exec_lo = S_MOV_B32 0 - ; GFX10: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec - ; GFX10: S_ENDPGM 0 + ; GCN-LABEL: name: early_term_scc0_next_terminator + ; GCN: bb.0: + ; GCN: successors: %bb.2(0x80000000), %bb.3(0x00000000) + ; GCN: liveins: $sgpr0, $sgpr1 + ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.3, implicit $scc + ; GCN: S_BRANCH %bb.2 + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: $vgpr0 = V_MOV_B32_e32 1, implicit $exec + ; GCN: bb.2: + ; GCN: liveins: $vgpr0 + ; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec + ; GCN: S_ENDPGM 0 + ; GCN: bb.3: + ; GCN: $exec = S_MOV_B64 0 + ; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec + ; GCN: S_ENDPGM 0 bb.0: liveins: $sgpr0, $sgpr1 successors: %bb.2 @@ -112,26 +118,26 @@ - { reg: '$sgpr0' } - { reg: '$sgpr1' } body: | - ; GFX10-LABEL: name: early_term_scc0_in_block - ; GFX10: bb.0: - ; GFX10: successors: %bb.3(0x40000000), %bb.2(0x40000000) - ; GFX10: liveins: $sgpr0, $sgpr1 - ; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc - ; GFX10: S_CBRANCH_SCC0 %bb.2, implicit $scc - ; GFX10: bb.3: - ; GFX10: successors: %bb.1(0x80000000) - ; GFX10: liveins: $vgpr0, $scc - ; GFX10: $vgpr1 = V_MOV_B32_e32 1, implicit $exec - ; GFX10: bb.1: - ; GFX10: liveins: $vgpr0, $vgpr1 - ; GFX10: EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec - ; GFX10: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec - ; GFX10: S_ENDPGM 0 - ; GFX10: bb.2: - ; GFX10: $exec_lo = S_MOV_B32 0 - ; GFX10: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec - ; GFX10: S_ENDPGM 0 + ; GCN-LABEL: name: early_term_scc0_in_block + ; GCN: bb.0: + ; GCN: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; GCN: liveins: $sgpr0, $sgpr1 + ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN: bb.3: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0, $scc + ; GCN: $vgpr1 = V_MOV_B32_e32 1, implicit $exec + ; GCN: bb.1: + ; GCN: liveins: $vgpr0, $vgpr1 + ; GCN: EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + ; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec + ; GCN: S_ENDPGM 0 + ; GCN: bb.2: + ; GCN: $exec = S_MOV_B64 0 + ; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec + ; GCN: S_ENDPGM 0 bb.0: liveins: $sgpr0, $sgpr1 successors: %bb.1 @@ -155,15 +161,18 @@ - { reg: '$sgpr0' } - { reg: '$sgpr1' } body: | - ; GFX10-LABEL: name: early_term_scc0_gs - ; GFX10: bb.0: - ; GFX10: successors: %bb.1(0x80000000) - ; GFX10: liveins: $sgpr0, $sgpr1 - ; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc - ; GFX10: bb.1: - ; GFX10: liveins: $vgpr0 - ; GFX10: S_ENDPGM 0 + ; GCN-LABEL: name: early_term_scc0_gs + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $sgpr0, $sgpr1 + ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc + ; GCN: bb.1: + ; GCN: liveins: $vgpr0 + ; GCN: S_ENDPGM 0 + ; GCN: bb.2: + ; GCN: $exec = S_MOV_B64 0 + ; GCN: S_ENDPGM 0 bb.0: liveins: $sgpr0, $sgpr1 successors: %bb.1 @@ -184,19 +193,55 @@ - { reg: '$sgpr0' } - { reg: '$sgpr1' } body: | - ; GFX10-LABEL: name: early_term_scc0_cs - ; GFX10: bb.0: - ; GFX10: successors: %bb.1(0x80000000), %bb.2(0x00000000) - ; GFX10: liveins: $sgpr0, $sgpr1 - ; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc - ; GFX10: S_CBRANCH_SCC0 %bb.2, implicit $scc - ; GFX10: bb.1: - ; GFX10: liveins: $vgpr0 - ; GFX10: S_ENDPGM 0 - ; GFX10: bb.2: - ; GFX10: $exec_lo = S_MOV_B32 0 - ; GFX10: S_ENDPGM 0 + ; GCN-LABEL: name: early_term_scc0_cs + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000) + ; GCN: liveins: $sgpr0, $sgpr1 + ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN: bb.1: + ; GCN: liveins: $vgpr0 + ; GCN: S_ENDPGM 0 + ; GCN: bb.2: + ; GCN: $exec = S_MOV_B64 0 + ; GCN: S_ENDPGM 0 + bb.0: + liveins: $sgpr0, $sgpr1 + successors: %bb.1 + + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc + SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec + + bb.1: + liveins: $vgpr0 + S_ENDPGM 0 +... + +--- +name: early_term_no_export +tracksRegLiveness: true +liveins: + - { reg: '$sgpr0' } + - { reg: '$sgpr1' } +body: | + ; GCN-LABEL: name: early_term_no_export + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000) + ; GCN: liveins: $sgpr0, $sgpr1 + ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN: bb.1: + ; GCN: liveins: $vgpr0 + ; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec + ; GCN: S_ENDPGM 0 + ; GCN: bb.2: + ; GCN: $exec = S_MOV_B64 0 + ; GFX9: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec + ; GFX10-NOT: EXP_DONE + ; GCN: S_ENDPGM 0 bb.0: liveins: $sgpr0, $sgpr1 successors: %bb.1 @@ -207,5 +252,6 @@ bb.1: liveins: $vgpr0 + EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec S_ENDPGM 0 ...