diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -130,9 +130,31 @@ return false; } -static bool readsExecAsData(const MachineInstr &MI) { - if (MI.isCompare()) - return true; +// Returns true if the scalar result of a VALU instruction depends on exec. +static bool resultDependsOnExec(const MachineInstr &MI) { + // Ignore comparisons which are only used masked with exec. + // This allows some hoisting/sinking of VALU comparisons. + if (MI.isCompare()) { + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + Register DstReg = MI.getOperand(0).getReg(); + if (!DstReg.isVirtual()) + return true; + for (MachineInstr &Use : MRI.use_nodbg_instructions(DstReg)) { + switch (Use.getOpcode()) { + case AMDGPU::S_AND_SAVEEXEC_B32: + case AMDGPU::S_AND_SAVEEXEC_B64: + break; + case AMDGPU::S_AND_B32: + case AMDGPU::S_AND_B64: + if (!Use.readsRegister(AMDGPU::EXEC)) + return true; + break; + default: + return true; + } + } + return false; + } switch (MI.getOpcode()) { default: @@ -147,7 +169,7 @@ bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const { // Any implicit use of exec by VALU is not a real register read. return MO.getReg() == AMDGPU::EXEC && MO.isImplicit() && - isVALU(*MO.getParent()) && !readsExecAsData(*MO.getParent()); + isVALU(*MO.getParent()) && !resultDependsOnExec(*MO.getParent()); } bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, diff --git a/llvm/test/CodeGen/AMDGPU/licm-valu.mir b/llvm/test/CodeGen/AMDGPU/licm-valu.mir --- a/llvm/test/CodeGen/AMDGPU/licm-valu.mir +++ b/llvm/test/CodeGen/AMDGPU/licm-valu.mir @@ -47,7 +47,7 @@ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec - ; GCN-NEXT: $exec = S_OR_B64 $exec, 1, implicit-def $scc + ; GCN-NEXT: $exec = S_OR_B64 $exec, [[V_CMP_EQ_U32_e64_]], implicit-def $scc ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} @@ -58,7 +58,39 @@ bb.1: %0:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec - $exec = S_OR_B64 $exec, 1, implicit-def $scc + $exec = S_OR_B64 $exec, %0:sreg_64, implicit-def $scc + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... +--- +name: allowable_hoist_cmp +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: allowable_hoist_cmp + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $exec = S_AND_B64 $exec, [[V_CMP_EQ_U32_e64_]], implicit-def $scc + ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + S_BRANCH %bb.1 + + bb.1: + %0:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec + $exec = S_AND_B64 $exec, %0:sreg_64, implicit-def $scc S_CBRANCH_EXECNZ %bb.1, implicit $exec S_BRANCH %bb.2