Index: llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -176,8 +176,7 @@ } // These are only terminators to get correct spill code placement during -// register allocation, so turn them back into normal instructions. Only one of -// these is expected per block. +// register allocation, so turn them back into normal instructions. static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) { switch (MI.getOpcode()) { case AMDGPU::S_MOV_B64_term: @@ -220,19 +219,29 @@ } } +// Turn all pseudoterminators in the block into their equivalent non-terminator +// instructions. Returns the reverse iterator to the first non-terminator +// instruction in the block. static MachineBasicBlock::reverse_iterator fixTerminators( const SIInstrInfo &TII, MachineBasicBlock &MBB) { MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend(); + + bool Seen = false; + MachineBasicBlock::reverse_iterator FirstNonTerm = I; for (; I != E; ++I) { if (!I->isTerminator()) - return I; + return Seen ? FirstNonTerm : I; - if (removeTerminatorBit(TII, *I)) - return I; + if (removeTerminatorBit(TII, *I)) { + if (!Seen) { + FirstNonTerm = I; + Seen = true; + } + } } - return E; + return FirstNonTerm; } static MachineBasicBlock::reverse_iterator findExecCopy( @@ -291,6 +300,7 @@ if (I == E) continue; + // TODO: It's possible to see other terminator copies after the exec copy. Register CopyToExec = isCopyToExec(*I, ST); if (!CopyToExec.isValid()) continue; Index: llvm/test/CodeGen/AMDGPU/optimize-exec-masking-strip-terminator-bits.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/optimize-exec-masking-strip-terminator-bits.mir @@ -0,0 +1,44 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-optimize-exec-masking -o - %s | FileCheck %s + +# There are multiple _term pseudos here. Starting from the end of the +# block, they all need to be converted to regular copies removed in +# order to avoid making the verifier unhappy. + +--- +name: multi_term_pseudos +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: multi_term_pseudos + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 + ; CHECK: $exec = COPY killed renamable $sgpr4_sgpr5 + ; CHECK: renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr6_sgpr7, implicit $exec + ; CHECK: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr8_sgpr9, implicit $exec + ; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.1: + ; CHECK: liveins: $sgpr12_sgpr13 + ; CHECK: S_ENDPGM 0, implicit $sgpr12_sgpr13 + ; CHECK: bb.2: + ; CHECK: liveins: $sgpr12_sgpr13 + ; CHECK: S_ENDPGM 0, implicit $sgpr12_sgpr13 + bb.0: + successors: %bb.2(0x40000000), %bb.1(0x40000000) + liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 + $exec = S_MOV_B64_term killed renamable $sgpr4_sgpr5 + renamable $sgpr10_sgpr11 = S_MOV_B64_term killed renamable $sgpr6_sgpr7, implicit $exec + renamable $sgpr12_sgpr13 = S_MOV_B64_term killed renamable $sgpr8_sgpr9, implicit $exec + S_CBRANCH_EXECZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.1: + liveins: $sgpr12_sgpr13 + S_ENDPGM 0, implicit $sgpr12_sgpr13 + + bb.2: + liveins: $sgpr12_sgpr13 + S_ENDPGM 0, implicit $sgpr12_sgpr13 + +...