diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -840,9 +840,26 @@ First = FirstWQM; } + // Whether we need to save SCC depends on start and end states + bool SaveSCC = false; + switch (State) { + case StateExact: + case StateWWM: + // Exact/WWM -> WWM: save SCC + // Exact/WWM -> WQM: save SCC if WQM mask is generated from exec + // Exact/WWM -> Exact: no save + SaveSCC = (Needs & StateWWM) || ((Needs & StateWQM) && WQMFromExec); + break; + case StateWQM: + // WQM -> Exact/WMM: save SCC + SaveSCC = !(Needs & StateWQM); + break; + default: + llvm_unreachable("Unknown state"); + break; + } MachineBasicBlock::iterator Before = - prepareInsertion(MBB, First, II, Needs == StateWQM, - Needs == StateExact || WQMFromExec); + prepareInsertion(MBB, First, II, Needs == StateWQM, SaveSCC); if (State == StateWWM) { assert(SavedNonWWMReg); diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir --- a/llvm/test/CodeGen/AMDGPU/wqm.mir +++ b/llvm/test/CodeGen/AMDGPU/wqm.mir @@ -49,6 +49,40 @@ ... +--- +# Second test for awareness that s_or_saveexec_b64 clobbers SCC +# Because entry block is treated differently. +# +#CHECK: %bb.1 +#CHECK: S_CMP_LT_I32 +#CHECK: COPY $scc +#CHECK: ENTER_WWM +#CHECK: $scc = COPY +#CHECK: S_CSELECT_B32 +name: test_wwm_scc2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0 + + %3:vgpr_32 = COPY $vgpr0 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = IMPLICIT_DEF + + bb.1: + S_CMP_LT_I32 0, %0:sgpr_32, implicit-def $scc + %10:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %3:vgpr_32, %13:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec + %12:vgpr_32 = V_ADD_CO_U32_e32 %3:vgpr_32, %3:vgpr_32, implicit-def $vcc, implicit $exec + %5:sgpr_32 = S_CSELECT_B32 %2:sgpr_32, %1:sgpr_32, implicit $scc + %11:vgpr_32 = V_ADD_CO_U32_e32 %5:sgpr_32, %12:vgpr_32, implicit-def $vcc, implicit $exec + $vgpr0 = WWM %11:vgpr_32, implicit $exec + $vgpr1 = COPY %10:vgpr_32 + SI_RETURN_TO_EPILOG $vgpr0, $vgpr1 + +... + --- # V_SET_INACTIVE, when its second operand is undef, is replaced by a # COPY by si-wqm. Ensure the instruction is removed.