diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -302,12 +302,15 @@ findInstrBackwards(MachineInstr &Origin, std::function Pred, ArrayRef NonModifiableRegs, - const SIRegisterInfo *TRI, unsigned MaxInstructions = 5) { + const SIRegisterInfo *TRI, unsigned MaxInstructions = 20) { MachineBasicBlock::reverse_iterator A = Origin.getReverseIterator(), E = Origin.getParent()->rend(); unsigned CurrentIteration = 0; for (++A; CurrentIteration < MaxInstructions && A != E; ++A) { + if (A->isDebugInstr()) + continue; + if (Pred(&*A)) return &*A; @@ -315,7 +318,7 @@ if (A->modifiesRegister(Reg, TRI)) return nullptr; } - + ++CurrentIteration; } diff --git a/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.mir b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.mir --- a/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.mir +++ b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.mir @@ -1,6 +1,5 @@ # RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck --check-prefixes=GCN,GFX1010 %s # RUN: llc -march=amdgcn -mcpu=gfx1030 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck --check-prefixes=GCN,GFX1030 %s - --- # After the Optimize exec masking (post-RA) pass, there's a change of having v_cmpx instructions @@ -62,3 +61,29 @@ $sgpr2_sgpr3 = COPY $exec, implicit-def $exec $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr2_sgpr3, killed renamable $sgpr0_sgpr1, implicit-def dead $scc $exec = S_MOV_B64_term killed renamable $sgpr2_sgpr3 +... + +--- + +# Check if the sequence will be optimized even with more than 5 (unrelated) instructions inbetween the v_cmp and s_and_saveexec. + +# GCN-LABEL: name: vcmp_saveexec_to_mov_vcmpx_check_many_instrs +# GFX1010: V_CMP_LT_F32_e64 +# GFX1010: S_AND_SAVEEXEC_B64 +# GFX1030: S_MOV_B64 +# GFX1030: V_CMPX_LT_F32_nosdst_e64 0, 953267991, 2 +name: vcmp_saveexec_to_mov_vcmpx_check_many_instrs +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $sgpr2, $vgpr1 + renamable $sgpr0_sgpr1 = V_CMP_LT_F32_e64 0, 953267991, 2, $vgpr0, 0, implicit $mode, implicit $exec + $vgpr1 = V_WRITELANE_B32 0, $sgpr2, $vgpr1 + $vgpr1 = V_WRITELANE_B32 0, $sgpr2, $vgpr1 + $vgpr1 = V_WRITELANE_B32 0, $sgpr2, $vgpr1 + $vgpr1 = V_WRITELANE_B32 0, $sgpr2, $vgpr1 + $vgpr1 = V_WRITELANE_B32 0, $sgpr2, $vgpr1 + $vgpr1 = V_WRITELANE_B32 0, $sgpr2, $vgpr1 + $sgpr2_sgpr3 = COPY $exec, implicit-def $exec + $sgpr2_sgpr3 = S_AND_B64 killed renamable $sgpr2_sgpr3, killed renamable $sgpr0_sgpr1, implicit-def dead $scc + $exec = S_MOV_B64_term killed renamable $sgpr2_sgpr3