diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -322,26 +322,44 @@ return nullptr; } + // Determine if a register Reg is not re-defined and still in use -// in the range (Stop..BB.end]. +// in the range (Stop..Start]. // It does so by backwards calculating liveness from the end of the BB until // either Stop or the beginning of the BB is reached. // After liveness is calculated, we can determine if Reg is still in use and not // defined inbetween the instructions. -static bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg, - const SIRegisterInfo *TRI, - MachineRegisterInfo &MRI) { +static bool isRegisterInUseBetween(MachineInstr &Stop, MachineInstr &Start, + MCRegister Reg, const SIRegisterInfo *TRI, + MachineRegisterInfo &MRI, + bool useLiveOuts = false, + bool ignoreStart = false) { LivePhysRegs LR(*TRI); - LR.addLiveOuts(*Stop.getParent()); + if (useLiveOuts) + LR.addLiveOuts(*Stop.getParent()); + + MachineBasicBlock::reverse_iterator A(Start); + MachineBasicBlock::reverse_iterator E(Stop); + + if (ignoreStart) + ++A; - for (auto A = Stop.getParent()->rbegin(); - A != Stop.getParent()->rend() && A != Stop; ++A) { + for (; A != Stop.getParent()->rend() && A != Stop; ++A) { LR.stepBackward(*A); } return !LR.available(MRI, Reg); } +// Determine if a register Reg is not re-defined and still in use +// in the range (Stop..BB.end]. +static bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg, + const SIRegisterInfo *TRI, + MachineRegisterInfo &MRI) { + return isRegisterInUseBetween(Stop, *Stop.getParent()->rbegin(), Reg, TRI, + MRI, true); +} + // Tries to find a possibility to optimize a v_cmp ..., s_and_saveexec sequence // by looking at an instance of a s_and_saveexec instruction. Returns a pointer // to the v_cmp instruction if it is safe to replace the sequence (see the @@ -396,9 +414,11 @@ if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg())) return nullptr; - // If the v_cmp target is in use after the s_and_saveexec, skip the - // optimization. - if (isRegisterInUseAfter(SaveExec, VCmpDest->getReg(), TRI, MRI)) + // If the v_cmp target is in use between v_cmp and s_and_saveexec or after the + // s_and_saveexec, skip the optimization. + if (isRegisterInUseBetween(*VCmp, SaveExec, VCmpDest->getReg(), TRI, MRI, + false, true) || + isRegisterInUseAfter(SaveExec, VCmpDest->getReg(), TRI, MRI)) return nullptr; // Try to determine if there is a write to any of the VCmp diff --git a/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.mir b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.mir --- a/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.mir +++ b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.mir @@ -25,6 +25,27 @@ ... --- + +# Ensure the transformation does not get applied when the v_cmp target is used before the s_and_saveexec instruction. + +# GCN-LABEL: name: vcmp_saveexec_to_mov_vcmpx_exec_intermediate_use +# GCN: V_CMP_LT_F32_e64 +# GCN: V_WRITELANE_B32 +# GCN: S_AND_SAVEEXEC_B64 +name: vcmp_saveexec_to_mov_vcmpx_exec_intermediate_use +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr2 + renamable $sgpr0_sgpr1 = V_CMP_LT_F32_e64 0, 953267991, 2, $vgpr1, 0, implicit $mode, implicit $exec + $vgpr0 = V_WRITELANE_B32 0, $sgpr0, $vgpr0 + $sgpr2_sgpr3 = COPY $exec, implicit-def $exec + $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr2_sgpr3, killed renamable $sgpr0_sgpr1, implicit-def dead $scc + $exec = S_MOV_B64_term killed renamable $sgpr2_sgpr3 +... + +--- + # Check if the modifiers are preserved when generating the V_CMPX instruction. # GCN-LABEL: name: vcmp_saveexec_to_mov_vcmpx_check_abs diff --git a/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.s b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.s new file mode 100644