diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -840,9 +840,24 @@ First = FirstWQM; } + /* Whether we need to save SCC depends on start and end states */ + bool SaveSCC = false; + switch (State) { + case StateExact: + SaveSCC = (Needs & StateWWM) || ((Needs & StateWQM) && WQMFromExec); + break; + case StateWWM: + SaveSCC = (Needs != StateExact) && WQMFromExec; + break; + case StateWQM: + SaveSCC = (Needs != StateWQM); + break; + default: + assert(false && "Unknown state"); + break; + } MachineBasicBlock::iterator Before = - prepareInsertion(MBB, First, II, Needs == StateWQM, - Needs == StateExact || WQMFromExec); + prepareInsertion(MBB, First, II, Needs == StateWQM, SaveSCC); if (State == StateWWM) { assert(SavedNonWWMReg); diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir --- a/llvm/test/CodeGen/AMDGPU/wqm.mir +++ b/llvm/test/CodeGen/AMDGPU/wqm.mir @@ -49,6 +49,65 @@ ... +--- +# Second test for awareness that s_or_saveexec_b64 clobbers SCC +# Because entry block is treated differently. +# +#CHECK: %bb.1 +#CHECK: S_CMP_LT_I32 +#CHECK: COPY $scc +#CHECK: ENTER_WWM +#CHECK: $scc = COPY +#CHECK: S_CSELECT_B32 +name: test_wwm_scc2 +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: sgpr_32, preferred-register: '' } + - { id: 1, class: sgpr_32, preferred-register: '' } + - { id: 2, class: sgpr_32, preferred-register: '' } + - { id: 3, class: vgpr_32, preferred-register: '' } + - { id: 4, class: vgpr_32, preferred-register: '' } + - { id: 5, class: sgpr_32, preferred-register: '' } + - { id: 6, class: vgpr_32, preferred-register: '' } + - { id: 7, class: vgpr_32, preferred-register: '' } + - { id: 8, class: sreg_32_xm0, preferred-register: '' } + - { id: 9, class: sreg_32, preferred-register: '' } + - { id: 10, class: vgpr_32, preferred-register: '' } + - { id: 11, class: vgpr_32, preferred-register: '' } + - { id: 12, class: vgpr_32, preferred-register: '' } + - { id: 13, class: sgpr_128, preferred-register: '' } +liveins: + - { reg: '$sgpr0', virtual-reg: '%0' } + - { reg: '$sgpr1', virtual-reg: '%1' } + - { reg: '$sgpr2', virtual-reg: '%2' } + - { reg: '$vgpr0', virtual-reg: '%3' } +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 + + %3 = COPY $vgpr0 + %2 = COPY $sgpr2 + %1 = COPY $sgpr1 + %0 = COPY $sgpr0 + %13 = IMPLICIT_DEF + + bb.1: + S_CMP_LT_I32 0, %0, implicit-def $scc + %10 = BUFFER_LOAD_DWORD_OFFEN %3, %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec + %12 = V_ADD_CO_U32_e32 %3, %3, implicit-def $vcc, implicit $exec + %5 = S_CSELECT_B32 %2, %1, implicit $scc + %11 = V_ADD_CO_U32_e32 %5, %12, implicit-def $vcc, implicit $exec + $vgpr0 = WWM %11, implicit $exec + $vgpr1 = COPY %10 + SI_RETURN_TO_EPILOG $vgpr0, $vgpr1 + +... + --- # V_SET_INACTIVE, when its second operand is undef, is replaced by a # COPY by si-wqm. Ensure the instruction is removed.