Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h @@ -19,6 +19,7 @@ #include "AMDGPUInstrInfo.h" #include "SIDefines.h" #include "SIRegisterInfo.h" +#include "llvm/ADT/SetVector.h" namespace llvm { @@ -38,6 +39,8 @@ EXECZ = 3 }; + typedef SmallSetVector SetVectorType; + static unsigned getBranchOpcode(BranchPredicate Cond); static BranchPredicate getBranchPredicate(unsigned Opcode); @@ -56,30 +59,30 @@ void swapOperands(MachineInstr &Inst) const; - void lowerScalarAbs(SmallVectorImpl &Worklist, + void lowerScalarAbs(SetVectorType &Worklist, MachineInstr &Inst) const; - void splitScalar64BitUnaryOp(SmallVectorImpl &Worklist, + void splitScalar64BitUnaryOp(SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const; - void splitScalar64BitBinaryOp(SmallVectorImpl &Worklist, + void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const; - void splitScalar64BitBCNT(SmallVectorImpl &Worklist, + void splitScalar64BitBCNT(SetVectorType &Worklist, MachineInstr &Inst) const; - void splitScalar64BitBFE(SmallVectorImpl &Worklist, + void splitScalar64BitBFE(SetVectorType &Worklist, MachineInstr &Inst) const; - void movePackToVALU(SmallVectorImpl &Worklist, + void movePackToVALU(SetVectorType &Worklist, MachineRegisterInfo &MRI, MachineInstr &Inst) const; void addUsersToMoveToVALUWorklist( unsigned Reg, MachineRegisterInfo &MRI, - SmallVectorImpl &Worklist) const; + SetVectorType &Worklist) const; void addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst, - SmallVectorImpl &Worklist) const; + SetVectorType &Worklist) const; const TargetRegisterClass * getDestEquivalentVGPRClass(const MachineInstr &Inst) const; Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3408,8 +3408,8 @@ } void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { - SmallVector Worklist; - Worklist.push_back(&TopInst); + SetVectorType Worklist; + Worklist.insert(&TopInst); while (!Worklist.empty()) { MachineInstr &Inst = *Worklist.pop_back_val(); @@ -3610,7 +3610,7 @@ } } -void SIInstrInfo::lowerScalarAbs(SmallVectorImpl &Worklist, +void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3635,7 +3635,7 @@ } void SIInstrInfo::splitScalar64BitUnaryOp( - SmallVectorImpl &Worklist, MachineInstr &Inst, + SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3686,7 +3686,7 @@ } void SIInstrInfo::splitScalar64BitBinaryOp( - SmallVectorImpl &Worklist, MachineInstr &Inst, + SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3753,7 +3753,7 @@ } void SIInstrInfo::splitScalar64BitBCNT( - SmallVectorImpl &Worklist, MachineInstr &Inst) const { + SetVectorType &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3789,7 +3789,7 @@ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } -void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl &Worklist, +void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -3853,12 +3853,12 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist( unsigned DstReg, MachineRegisterInfo &MRI, - SmallVectorImpl &Worklist) const { + SetVectorType &Worklist) const { for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg), E = MRI.use_end(); I != E;) { MachineInstr &UseMI = *I->getParent(); if (!canReadVGPR(UseMI, I.getOperandNo())) { - Worklist.push_back(&UseMI); + Worklist.insert(&UseMI); do { ++I; @@ -3869,7 +3869,7 @@ } } -void SIInstrInfo::movePackToVALU(SmallVectorImpl &Worklist, +void SIInstrInfo::movePackToVALU(SetVectorType &Worklist, MachineRegisterInfo &MRI, MachineInstr &Inst) const { unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); @@ -3932,7 +3932,7 @@ } void SIInstrInfo::addSCCDefUsersToVALUWorklist( - MachineInstr &SCCDefInst, SmallVectorImpl &Worklist) const { + MachineInstr &SCCDefInst, SetVectorType &Worklist) const { // This assumes that all the users of SCC are in the same block // as the SCC def. for (MachineInstr &MI : @@ -3943,7 +3943,7 @@ return; if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1) - Worklist.push_back(&MI); + Worklist.insert(&MI); } } Index: llvm/trunk/test/CodeGen/AMDGPU/move-to-valu-worklist.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/move-to-valu-worklist.ll +++ llvm/trunk/test/CodeGen/AMDGPU/move-to-valu-worklist.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s + +; In moveToVALU(), move to vector ALU is performed, all instrs in +; the use chain will be visited. We do not want the same node to be +; pushed to the visit worklist more than once. + +; GCN-LABEL: {{^}}in_worklist_once: +; GCN: buffer_load_dword +; GCN: BB0_1: +; GCN: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN-NEXT: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GCN-NEXT: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define amdgpu_kernel void @in_worklist_once() #0 { +bb: + %tmp = load i64, i64* undef +br label %bb1 + +bb1: ; preds = %bb1, %bb + %tmp2 = phi i64 [ undef, %bb ], [ %tmp16, %bb1 ] + %tmp3 = phi i64 [ %tmp, %bb ], [ undef, %bb1 ] + %tmp11 = shl i64 %tmp2, 14 + %tmp13 = xor i64 %tmp11, %tmp2 + %tmp15 = and i64 %tmp3, %tmp13 + %tmp16 = xor i64 %tmp15, %tmp3 +br label %bb1 +} + +attributes #0 = { nounwind }