Index: llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -224,15 +224,22 @@ const SIInstrInfo &TII, MachineBasicBlock &MBB) { MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend(); + + bool Seen = false; + MachineBasicBlock::reverse_iterator FirstNonTerm = I; for (; I != E; ++I) { if (!I->isTerminator()) - return I; + return FirstNonTerm; - if (removeTerminatorBit(TII, *I)) - return I; + if (removeTerminatorBit(TII, *I)) { + if (!Seen) { + FirstNonTerm = I; + Seen = true; + } + } } - return E; + return FirstNonTerm; } static MachineBasicBlock::reverse_iterator findExecCopy( @@ -291,6 +298,7 @@ if (I == E) continue; + // TODO: It's possible to see other terminator copies after the exec copy. Register CopyToExec = isCopyToExec(*I, ST); if (!CopyToExec.isValid()) continue; Index: llvm/test/CodeGen/AMDGPU/optimize-exec-masking-strip-terminator-bits.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/optimize-exec-masking-strip-terminator-bits.mir @@ -0,0 +1,44 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-optimize-exec-masking -o - %s | FileCheck %s + +# There are multiple _term pseudos here. Starting from the end of the +# block, they all need to be converted to regular copies removed in +# order to avoid making the verifier unhappy. + +--- +name: multi_term_pseudos +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: multi_term_pseudos + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 + ; CHECK: $exec = COPY killed renamable $sgpr4_sgpr5 + ; CHECK: renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr6_sgpr7, implicit $exec + ; CHECK: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr8_sgpr9, implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.1: + ; CHECK: liveins: $sgpr12_sgpr13 + ; CHECK: S_ENDPGM 0, implicit $sgpr12_sgpr13 + ; CHECK: bb.2: + ; CHECK: liveins: $sgpr12_sgpr13 + ; CHECK: S_ENDPGM 0, implicit $sgpr12_sgpr13 + bb.0: + successors: %bb.2(0x40000000), %bb.1(0x40000000) + liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 + $exec = S_MOV_B64_term killed renamable $sgpr4_sgpr5 + renamable $sgpr10_sgpr11 = S_MOV_B64_term killed renamable $sgpr6_sgpr7, implicit $exec + renamable $sgpr12_sgpr13 = S_MOV_B64_term killed renamable $sgpr8_sgpr9, implicit $exec + S_CBRANCH_EXECZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.1: + liveins: $sgpr12_sgpr13 + S_ENDPGM 0, implicit $sgpr12_sgpr13 + + bb.2: + liveins: $sgpr12_sgpr13 + S_ENDPGM 0, implicit $sgpr12_sgpr13 + +...