Index: llvm/include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1749,6 +1749,8 @@ return false; } + virtual bool IsValidForLISplit(MachineBasicBlock *MBB) const { return true; } + /// During PHI eleimination lets target to make necessary checks and /// insert the copy to the PHI destination register in a target specific /// manner. Index: llvm/lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocGreedy.cpp +++ llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -1259,7 +1259,8 @@ for (unsigned i = 0; i != Blocks.size(); ++i) { unsigned Number = Blocks[i]; Intf.moveToBlock(Number); - + if (!TII->IsValidForLISplit(MF->getBlockNumbered(Number))) + return false; if (!Intf.hasInterference()) { assert(T < GroupSize && "Array overflow"); TBS[T] = Number; Index: llvm/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -981,6 +981,8 @@ bool isBasicBlockPrologue(const MachineInstr &MI) const override; + bool IsValidForLISplit(MachineBasicBlock *MBB) const override; + MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6803,6 +6803,37 @@ MI.modifiesRegister(AMDGPU::EXEC, &RI); } +bool SIInstrInfo::IsValidForLISplit(MachineBasicBlock *MBB) const { + MachineFunction *MF = MBB->getParent(); + const TargetRegisterInfo *TRI = MF->getRegInfo().getTargetRegisterInfo(); + auto IsExecRestore = [TRI](MachineInstr *Instr) { + return (Instr->modifiesRegister(AMDGPU::EXEC, TRI) && + (Instr->getOpcode() == AMDGPU::S_OR_B64) || + (Instr->getOpcode() == AMDGPU::S_MOV_B64 && + Instr->getOperand(0).getReg() == AMDGPU::EXEC)); + }; + for (auto Pred : MBB->predecessors()) { + MachineBasicBlock::iterator I = Pred->getFirstTerminator(); + if (I != Pred->end() && I->modifiesRegister(AMDGPU::EXEC, TRI)) { + while (++I != Pred->end()) { + if (I->getOpcode() == AMDGPU::S_CBRANCH_EXECNZ) { + if ((++I)->getOpcode() == AMDGPU::S_BRANCH && + I->getOperand(0).getMBB() == MBB) { + break; + } + } else if (I->getOpcode() == AMDGPU::S_CBRANCH_EXECZ && + I->getOperand(0).getMBB() == MBB) { + break; + } + } + if (I != MBB->end()) { + return false; + } + } + } + return true; +} + MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Index: llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -39,7 +39,7 @@ ; GFX6-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9:]+}}], s32 ; GFX6-NEXT: s_sub_u32 s32, s32, 0x[[OFFSET:[0-9]+]] ; GFX6: NumSgprs: 48 -; GFX6: ScratchSize: 8624 +; GFX6: ScratchSize: 8656 define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64 x i32> addrspace(1)* %in) #0 { entry: %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)