diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -753,6 +753,35 @@ continue; LLVM_DEBUG(dbgs() << "All defs dead: " << *MI); DeadDefs.push_back(MI); + // If MI is a bundle header, also try removing copies inside the bundle, + // otherwise the verifier would complain "live range continues after dead + // def flag". + if (MI->isBundledWithSucc() && !MI->isBundledWithPred()) { + MachineBasicBlock::instr_iterator BeginIt = MI->getIterator(), + EndIt = MI->getParent()->instr_end(); + ++BeginIt; // Skip MI that was already handled. + + bool OnlyDeadCopies = true; + for (MachineBasicBlock::instr_iterator It = BeginIt; + It != EndIt && It->isBundledWithPred(); ++It) { + + auto DestSrc = TII.isCopyInstr(*It); + bool IsCopyToDeadReg = + DestSrc && DestSrc->Destination->getReg() == Reg; + if (!IsCopyToDeadReg) { + OnlyDeadCopies = false; + break; + } + } + if (OnlyDeadCopies) { + for (MachineBasicBlock::instr_iterator It = BeginIt; + It != EndIt && It->isBundledWithPred(); ++It) { + It->addRegisterDead(Reg, &TRI); + LLVM_DEBUG(dbgs() << "All defs dead: " << *It); + DeadDefs.push_back(&*It); + } + } + } } } diff --git a/llvm/test/CodeGen/AMDGPU/dead_bundle.mir b/llvm/test/CodeGen/AMDGPU/dead_bundle.mir --- a/llvm/test/CodeGen/AMDGPU/dead_bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/dead_bundle.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn--amdpal -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs=0 -start-before=greedy,0 -stop-after=virtregrewriter,0 -stress-regalloc=5 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn--amdpal -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs=1 -start-before=greedy,0 -stop-after=virtregrewriter,0 -stress-regalloc=5 %s -o - | FileCheck %s -# This test currently fails with verify-machineinstrs=1 due to dead bundle mishandling: "Live range continues after dead def flag". +# This test checks that dead bundles are handled correctly. --- name: psmain tracksRegLiveness: true @@ -20,7 +20,6 @@ ; CHECK-NEXT: renamable $sgpr1 = KILL undef $sgpr1 ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_BUFFER_LOAD_DWORDX8_IMM undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, 416, 0 :: (dereferenceable invariant load (s256), align 4) ; CHECK-NEXT: dead [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_U32_F32_e64 0, $sgpr4, 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: SI_SPILL_S256_SAVE renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s256) into %stack.0, align 4, addrspace 5) ; CHECK-NEXT: dead renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_BUFFER_LOAD_DWORDX8_IMM undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, 416, 0 :: (dereferenceable invariant load (s256), align 4) ; CHECK-NEXT: renamable $sgpr3 = COPY killed renamable $sgpr7