Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -56,30 +56,30 @@ void swapOperands(MachineInstr &Inst) const; - void lowerScalarAbs(SmallVectorImpl &Worklist, + void lowerScalarAbs(SmallPtrSetImpl &Worklist, MachineInstr &Inst) const; - void splitScalar64BitUnaryOp(SmallVectorImpl &Worklist, + void splitScalar64BitUnaryOp(SmallPtrSetImpl &Worklist, MachineInstr &Inst, unsigned Opcode) const; - void splitScalar64BitBinaryOp(SmallVectorImpl &Worklist, + void splitScalar64BitBinaryOp(SmallPtrSetImpl &Worklist, MachineInstr &Inst, unsigned Opcode) const; - void splitScalar64BitBCNT(SmallVectorImpl &Worklist, + void splitScalar64BitBCNT(SmallPtrSetImpl &Worklist, MachineInstr &Inst) const; - void splitScalar64BitBFE(SmallVectorImpl &Worklist, + void splitScalar64BitBFE(SmallPtrSetImpl &Worklist, MachineInstr &Inst) const; - void movePackToVALU(SmallVectorImpl &Worklist, + void movePackToVALU(SmallPtrSetImpl &Worklist, MachineRegisterInfo &MRI, MachineInstr &Inst) const; void addUsersToMoveToVALUWorklist( unsigned Reg, MachineRegisterInfo &MRI, - SmallVectorImpl &Worklist) const; + SmallPtrSetImpl &Worklist) const; void addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst, - SmallVectorImpl &Worklist) const; + SmallPtrSetImpl &Worklist) const; const TargetRegisterClass * getDestEquivalentVGPRClass(const MachineInstr &Inst) const; Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3406,11 +3406,18 @@ } void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { - SmallVector Worklist; - Worklist.push_back(&TopInst); + typedef SmallPtrSetIterator InstrSetIterator; + SmallPtrSet Worklist; + Worklist.insert(&TopInst); while (!Worklist.empty()) { - MachineInstr &Inst = *Worklist.pop_back_val(); + // Pick last instruction entered + InstrSetIterator MIIterator = Worklist.begin(); + for (InstrSetIterator I = MIIterator, E = Worklist.end(); I != E; I++) { + MIIterator = I; + } + MachineInstr &Inst = **MIIterator; + Worklist.erase(&Inst); MachineBasicBlock *MBB = Inst.getParent(); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); @@ -3608,7 +3615,7 @@ } } -void SIInstrInfo::lowerScalarAbs(SmallVectorImpl &Worklist, +void SIInstrInfo::lowerScalarAbs(SmallPtrSetImpl &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3633,7 +3640,7 @@ } void SIInstrInfo::splitScalar64BitUnaryOp( - SmallVectorImpl &Worklist, MachineInstr &Inst, + SmallPtrSetImpl &Worklist, MachineInstr &Inst, unsigned Opcode) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3684,7 +3691,7 @@ } void SIInstrInfo::splitScalar64BitBinaryOp( - SmallVectorImpl &Worklist, MachineInstr &Inst, + SmallPtrSetImpl &Worklist, MachineInstr &Inst, unsigned Opcode) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3751,7 +3758,7 @@ } void SIInstrInfo::splitScalar64BitBCNT( - SmallVectorImpl &Worklist, MachineInstr &Inst) const { + SmallPtrSetImpl &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3787,7 +3794,7 @@ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } -void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl &Worklist, +void SIInstrInfo::splitScalar64BitBFE(SmallPtrSetImpl &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3851,13 +3858,13 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist( unsigned DstReg, MachineRegisterInfo &MRI, - SmallVectorImpl &Worklist) const { + SmallPtrSetImpl &Worklist) const { for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg), E = MRI.use_end(); I != E;) { MachineInstr &UseMI = *I->getParent(); if (!canReadVGPR(UseMI, I.getOperandNo())) { - Worklist.push_back(&UseMI); - + // Should only add to worklist set once + Worklist.insert(&UseMI); do { ++I; } while (I != E && I->getParent() == &UseMI); @@ -3867,7 +3874,7 @@ } } -void SIInstrInfo::movePackToVALU(SmallVectorImpl &Worklist, +void SIInstrInfo::movePackToVALU(SmallPtrSetImpl &Worklist, MachineRegisterInfo &MRI, MachineInstr &Inst) const { unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); @@ -3930,7 +3937,7 @@ } void SIInstrInfo::addSCCDefUsersToVALUWorklist( - MachineInstr &SCCDefInst, SmallVectorImpl &Worklist) const { + MachineInstr &SCCDefInst, SmallPtrSetImpl &Worklist) const { // This assumes that all the users of SCC are in the same block // as the SCC def. for (MachineInstr &MI : @@ -3941,7 +3948,8 @@ return; if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1) - Worklist.push_back(&MI); + // Should only add to worklist set once + Worklist.insert(&MI); } }