Please use GitHub pull requests for new patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
Show All 33 Lines | class SIOptimizeExecMasking : public MachineFunctionPass { | ||||
SmallVector<std::pair<MachineInstr *, MachineInstr *>, 1> OrXors; | SmallVector<std::pair<MachineInstr *, MachineInstr *>, 1> OrXors; | ||||
Register isCopyFromExec(const MachineInstr &MI) const; | Register isCopyFromExec(const MachineInstr &MI) const; | ||||
Register isCopyToExec(const MachineInstr &MI) const; | Register isCopyToExec(const MachineInstr &MI) const; | ||||
bool removeTerminatorBit(MachineInstr &MI) const; | bool removeTerminatorBit(MachineInstr &MI) const; | ||||
MachineBasicBlock::reverse_iterator | MachineBasicBlock::reverse_iterator | ||||
fixTerminators(MachineBasicBlock &MBB) const; | fixTerminators(MachineBasicBlock &MBB) const; | ||||
MachineBasicBlock::reverse_iterator | MachineBasicBlock::reverse_iterator | ||||
findExecCopy(MachineBasicBlock &MBB, MachineBasicBlock::reverse_iterator I, | findExecCopy(MachineBasicBlock &MBB, | ||||
unsigned CopyToExec) const; | MachineBasicBlock::reverse_iterator I) const; | ||||
bool isRegisterInUseBetween(MachineInstr &Stop, MachineInstr &Start, | bool isRegisterInUseBetween(MachineInstr &Stop, MachineInstr &Start, | ||||
MCRegister Reg, bool UseLiveOuts = false, | MCRegister Reg, bool UseLiveOuts = false, | ||||
bool IgnoreStart = false) const; | bool IgnoreStart = false) const; | ||||
bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg) const; | bool isRegisterInUseAfter(MachineInstr &Stop, MCRegister Reg) const; | ||||
MachineInstr *findInstrBackwards(MachineInstr &Origin, | MachineInstr *findInstrBackwards(MachineInstr &Origin, | ||||
std::function<bool(MachineInstr *)> Pred, | std::function<bool(MachineInstr *)> Pred, | ||||
ArrayRef<MCRegister> NonModifiableRegs, | ArrayRef<MCRegister> NonModifiableRegs, | ||||
▲ Show 20 Lines • Show All 239 Lines • ▼ Show 20 Lines | if (removeTerminatorBit(*I)) { | ||||
Seen = true; | Seen = true; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
return FirstNonTerm; | return FirstNonTerm; | ||||
} | } | ||||
MachineBasicBlock::reverse_iterator | MachineBasicBlock::reverse_iterator SIOptimizeExecMasking::findExecCopy( | ||||
SIOptimizeExecMasking::findExecCopy(MachineBasicBlock &MBB, | MachineBasicBlock &MBB, MachineBasicBlock::reverse_iterator I) const { | ||||
MachineBasicBlock::reverse_iterator I, | |||||
unsigned CopyToExec) const { | |||||
const unsigned InstLimit = 25; | const unsigned InstLimit = 25; | ||||
auto E = MBB.rend(); | auto E = MBB.rend(); | ||||
for (unsigned N = 0; N <= InstLimit && I != E; ++I, ++N) { | for (unsigned N = 0; N <= InstLimit && I != E; ++I, ++N) { | ||||
Register CopyFromExec = isCopyFromExec(*I); | Register CopyFromExec = isCopyFromExec(*I); | ||||
if (CopyFromExec.isValid()) | if (CopyFromExec.isValid()) | ||||
return I; | return I; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines | bool SIOptimizeExecMasking::isRegisterInUseBetween(MachineInstr &Stop, | ||||
MCRegister Reg, | MCRegister Reg, | ||||
bool UseLiveOuts, | bool UseLiveOuts, | ||||
bool IgnoreStart) const { | bool IgnoreStart) const { | ||||
LivePhysRegs LR(*TRI); | LivePhysRegs LR(*TRI); | ||||
if (UseLiveOuts) | if (UseLiveOuts) | ||||
LR.addLiveOuts(*Stop.getParent()); | LR.addLiveOuts(*Stop.getParent()); | ||||
MachineBasicBlock::reverse_iterator A(Start); | MachineBasicBlock::reverse_iterator A(Start); | ||||
MachineBasicBlock::reverse_iterator E(Stop); | |||||
if (IgnoreStart) | if (IgnoreStart) | ||||
++A; | ++A; | ||||
for (; A != Stop.getParent()->rend() && A != Stop; ++A) { | for (; A != Stop.getParent()->rend() && A != Stop; ++A) { | ||||
LR.stepBackward(*A); | LR.stepBackward(*A); | ||||
} | } | ||||
Show All 38 Lines | while (I != E && SearchCount++ < SearchLimit) { | ||||
++I; | ++I; | ||||
} | } | ||||
if (!CopyToExec) | if (!CopyToExec) | ||||
continue; | continue; | ||||
// Scan backwards to find the def. | // Scan backwards to find the def. | ||||
auto *CopyToExecInst = &*I; | auto *CopyToExecInst = &*I; | ||||
auto CopyFromExecInst = findExecCopy(MBB, I, CopyToExec); | auto CopyFromExecInst = findExecCopy(MBB, I); | ||||
if (CopyFromExecInst == E) { | if (CopyFromExecInst == E) { | ||||
auto PrepareExecInst = std::next(I); | auto PrepareExecInst = std::next(I); | ||||
if (PrepareExecInst == E) | if (PrepareExecInst == E) | ||||
continue; | continue; | ||||
// Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec | // Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec | ||||
if (CopyToExecInst->getOperand(1).isKill() && | if (CopyToExecInst->getOperand(1).isKill() && | ||||
isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) { | isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) { | ||||
LLVM_DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst); | LLVM_DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst); | ||||
▲ Show 20 Lines • Show All 193 Lines • ▼ Show 20 Lines | void SIOptimizeExecMasking::tryRecordVCmpxAndSaveexecSequence( | ||||
if (!TRI->isSGPRReg(*MRI, SaveExecDest)) | if (!TRI->isSGPRReg(*MRI, SaveExecDest)) | ||||
return; | return; | ||||
MachineOperand *SaveExecSrc0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); | MachineOperand *SaveExecSrc0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); | ||||
if (!SaveExecSrc0->isReg()) | if (!SaveExecSrc0->isReg()) | ||||
return; | return; | ||||
// Tries to find a possibility to optimize a v_cmp ..., s_and_saveexec | // Tries to find a possibility to optimize a v_cmp ..., s_and_saveexec | ||||
// sequence by looking at an instance of a s_and_saveexec instruction. Returns | // sequence by looking at an instance of an s_and_saveexec instruction. Returns | ||||
// a pointer to the v_cmp instruction if it is safe to replace the sequence | // a pointer to the v_cmp instruction if it is safe to replace the sequence | ||||
// (see the conditions in the function body). This is after register | // (see the conditions in the function body). This is after register | ||||
// allocation, so some checks on operand dependencies need to be considered. | // allocation, so some checks on operand dependencies need to be considered. | ||||
MachineInstr *VCmp = nullptr; | MachineInstr *VCmp = nullptr; | ||||
// Try to find the last v_cmp instruction that defs the saveexec input | // Try to find the last v_cmp instruction that defs the saveexec input | ||||
// operand without any write to Exec or the saveexec input operand inbetween. | // operand without any write to Exec or the saveexec input operand inbetween. | ||||
VCmp = findInstrBackwards( | VCmp = findInstrBackwards( | ||||
Show All 17 Lines | if (Src0->isReg() && TRI->isSGPRReg(*MRI, Src0->getReg()) && | ||||
return; | return; | ||||
MachineOperand *Src1 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src1); | MachineOperand *Src1 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src1); | ||||
if (Src1->isReg() && TRI->isSGPRReg(*MRI, Src1->getReg()) && | if (Src1->isReg() && TRI->isSGPRReg(*MRI, Src1->getReg()) && | ||||
MI.modifiesRegister(Src1->getReg(), TRI)) | MI.modifiesRegister(Src1->getReg(), TRI)) | ||||
return; | return; | ||||
// Don't do the transformation if the destination operand is included in | // Don't do the transformation if the destination operand is included in | ||||
// it's MBB Live-outs, meaning it's used in any of it's successors, leading | // it's MBB Live-outs, meaning it's used in any of its successors, leading | ||||
// to incorrect code if the v_cmp and therefore the def of | // to incorrect code if the v_cmp and therefore the def of | ||||
// the dest operand is removed. | // the dest operand is removed. | ||||
if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg())) | if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg())) | ||||
return; | return; | ||||
// If the v_cmp target is in use between v_cmp and s_and_saveexec or after the | // If the v_cmp target is in use between v_cmp and s_and_saveexec or after the | ||||
// s_and_saveexec, skip the optimization. | // s_and_saveexec, skip the optimization. | ||||
if (isRegisterInUseBetween(*VCmp, MI, VCmpDest->getReg(), false, true) || | if (isRegisterInUseBetween(*VCmp, MI, VCmpDest->getReg(), false, true) || | ||||
▲ Show 20 Lines • Show All 135 Lines • Show Last 20 Lines |