diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -366,7 +366,6 @@ MDT = &getAnalysis(); SkipThreshold = SkipThresholdFlag; - MachineBasicBlock *EmptyMBBAtEnd = nullptr; SmallVector KillInstrs; bool MadeChange = false; @@ -417,29 +416,6 @@ break; } - case AMDGPU::SI_RETURN_TO_EPILOG: - // FIXME: Should move somewhere else - assert(!MF.getInfo()->returnsVoid()); - - // Graphics shaders returning non-void shouldn't contain S_ENDPGM, - // because external bytecode will be appended at the end. - if (&MBB != &MF.back() || &MI != &MBB.back()) { - // SI_RETURN_TO_EPILOG is not the last instruction. Add an empty block at - // the end and jump there. - if (!EmptyMBBAtEnd) { - EmptyMBBAtEnd = MF.CreateMachineBasicBlock(); - MF.insert(MF.end(), EmptyMBBAtEnd); - } - - MBB.addSuccessor(EmptyMBBAtEnd); - BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH)) - .addMBB(EmptyMBBAtEnd); - MI.eraseFromParent(); - - MDT->getBase().insertEdge(&MBB, EmptyMBBAtEnd); - } - break; - default: break; } diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -148,6 +148,7 @@ TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); bool Changed = false; + MachineBasicBlock *EmptyMBBAtEnd = nullptr; for (MachineBasicBlock &MBB : MF) { MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); @@ -160,6 +161,28 @@ case AMDGPU::S_CBRANCH_VCCNZ: Changed |= optimizeVccBranch(MI); break; + + case AMDGPU::SI_RETURN_TO_EPILOG: + // FIXME: Should move somewhere else + assert(!MF.getInfo()->returnsVoid()); + + // Graphics shaders returning non-void shouldn't contain S_ENDPGM, + // because external bytecode will be appended at the end. + if (&MBB != &MF.back() || &MI != &MBB.back()) { + // SI_RETURN_TO_EPILOG is not the last instruction. Add an empty block + // at the end and jump there. + if (!EmptyMBBAtEnd) { + EmptyMBBAtEnd = MF.CreateMachineBasicBlock(); + MF.insert(MF.end(), EmptyMBBAtEnd); + } + + MBB.addSuccessor(EmptyMBBAtEnd); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH)) + .addMBB(EmptyMBBAtEnd); + MI.eraseFromParent(); + } + break; + default: break; }