Index: llvm/include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOutliner.h" @@ -1638,6 +1639,30 @@ return false; } + /// During PHI eleimination lets target to make necessary checks and + /// insert the copy to the PHI destination register in a target specific + /// manner. + virtual void createPHIDestinationCopy(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &InsPt, + const DebugLoc &DL, unsigned Src, + unsigned Dst) const { + BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst) + .addReg(Src); + } + + /// During PHI eleimination lets target to make necessary checks and + /// insert the copy to the PHI destination register in a target specific + /// manner. + virtual MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &InsPt, + const DebugLoc &DL, unsigned Src, + unsigned SrcSubReg, + unsigned Dst) const { + return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), + Dst) + .addReg(Src, 0, SrcSubReg); + } + /// Returns a \p outliner::OutlinedFunction struct containing target-specific /// information for a set of outlining candidates. virtual outliner::OutlinedFunction getOutliningCandidateInfo( Index: llvm/lib/CodeGen/PHIElimination.cpp =================================================================== --- llvm/lib/CodeGen/PHIElimination.cpp +++ llvm/lib/CodeGen/PHIElimination.cpp @@ -31,7 +31,9 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Pass.h" @@ -273,9 +275,9 @@ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); } - BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), - TII->get(TargetOpcode::COPY), DestReg) - .addReg(IncomingReg); + // Give the target possiblity to handle special cases fallthrough otherwise + TII->createPHIDestinationCopy(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + IncomingReg, DestReg); } // Update live variable information if there is any. @@ -406,9 +408,9 @@ if (DefMI->isImplicitDef()) ImpDefs.insert(DefMI); } else { - NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), - TII->get(TargetOpcode::COPY), IncomingReg) - .addReg(SrcReg, 0, SrcSubReg); + NewSrcInstr = + TII->createPHISourceCopy(opBlock, InsertPos, MPhi->getDebugLoc(), + SrcReg, SrcSubReg, IncomingReg); } } Index: llvm/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -954,6 +954,17 @@ bool isBasicBlockPrologue(const MachineInstr &MI) const override; + void createPHIDestinationCopy(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &InsPt, + const DebugLoc &DL, unsigned Src, + unsigned Dst) const override; + + MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &InsPt, + const DebugLoc &DL, unsigned Src, + unsigned SrcSubReg, + unsigned Dst) const override; + /// Return a partially built integer add instruction without carry. /// Caller must add source operands. /// For pre-GFX9 it will generate unused carry destination operand. Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6393,3 +6393,41 @@ return true; } } + +void SIInstrInfo::createPHIDestinationCopy( + MachineBasicBlock &MBB, MachineBasicBlock::iterator &LastPHIIt, + const DebugLoc &DL, unsigned Src, unsigned Dst) const { + MachineBasicBlock::iterator Pos = MBB.begin(); + MachineBasicBlock::iterator Cur = LastPHIIt; + // rewind iterator one step back to handle empty block or one instruction + // block + if (Cur == MBB.end()) + Cur--; + // assume the MBB is well formed and all the PHIs are at the top + while (Cur != MBB.begin() && !Cur->isPHI()) { + if (Cur->readsRegister(Dst)) { + Pos = Cur; + } + Cur--; + } + if (Pos != MBB.begin()) { + BuildMI(MBB, Pos, DL, get(TargetOpcode::COPY), Dst).addReg(Src); + LastPHIIt = Pos; + } else { + TargetInstrInfo::createPHIDestinationCopy(MBB, LastPHIIt, DL, Src, Dst); + } +} + +MachineInstr *SIInstrInfo::createPHISourceCopy( + MachineBasicBlock &MBB, MachineBasicBlock::iterator &InsPt, + const DebugLoc &DL, unsigned Src, unsigned SrcSubReg, unsigned Dst) const { + if (InsPt != MBB.end() && InsPt->isPseudo() && InsPt->definesRegister(Src)) { + InsPt++; + return BuildMI(MBB, InsPt, InsPt->getDebugLoc(), get(TargetOpcode::COPY), + Dst) + .addReg(Src, 0, SrcSubReg) + .addReg(AMDGPU::EXEC, RegState::Implicit); + } + return TargetInstrInfo::createPHISourceCopy(MBB, InsPt, DL, Src, SrcSubReg, + Dst); +} Index: llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -400,13 +400,17 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + unsigned CFMask = MI.getOperand(0).getReg(); + MachineInstr *Def = MRI.getUniqueVRegDef(CFMask); const DebugLoc &DL = MI.getDebugLoc(); - MachineBasicBlock::iterator InsPt = MBB.begin(); - MachineInstr *NewMI = - BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec) - .addReg(Exec) - .add(MI.getOperand(0)); + MachineBasicBlock::iterator InsPt = + Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def)) + : MBB.begin(); + MachineInstr *NewMI = BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec) + .addReg(Exec) + .add(MI.getOperand(0)); if (LIS) LIS->ReplaceMachineInstrInMaps(MI, *NewMI);