Index: lib/CodeGen/ProcessImplicitDefs.cpp =================================================================== --- lib/CodeGen/ProcessImplicitDefs.cpp +++ lib/CodeGen/ProcessImplicitDefs.cpp @@ -69,7 +69,7 @@ !MI->isPHI()) return false; for (const MachineOperand &MO : MI->operands()) - if (MO.isReg() && MO.isUse() && MO.readsReg() && !MO.isImplicit()) + if (MO.isReg() && MO.isUse() && MO.readsReg()) return false; return true; } Index: lib/CodeGen/TwoAddressInstructionPass.cpp =================================================================== --- lib/CodeGen/TwoAddressInstructionPass.cpp +++ lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1723,31 +1723,20 @@ /// /// The instruction is turned into a sequence of sub-register copies: /// -/// %dst = REG_SEQUENCE %v1, ssub0, %v2, ssub1, [implicit uses] +/// %dst = REG_SEQUENCE %v1, ssub0, %v2, ssub1 /// /// Becomes: /// -/// %dst:ssub0 = COPY %v1, [implicit uses] -/// %dst:ssub1 = COPY %v2, [implicit uses] +/// %dst:ssub0 = COPY %v1 +/// %dst:ssub1 = COPY %v2 /// void TwoAddressInstructionPass:: eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { MachineInstr *MI = MBBI; unsigned DstReg = MI->getOperand(0).getReg(); - unsigned NumTrailingImplicit = 0; - - for (unsigned i = MI->getNumOperands(); i > 0; --i) { - const MachineOperand &MO = MI->getOperand(i - 1); - if (!MO.isReg() || !MO.isImplicit()) - break; - NumTrailingImplicit++; - } - - unsigned NumOperands = MI->getNumOperands() - NumTrailingImplicit; - if (MI->getOperand(0).getSubReg() || TargetRegisterInfo::isPhysicalRegister(DstReg) || - !(NumOperands & 1)) { + !(MI->getNumOperands() & 1)) { DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); llvm_unreachable(nullptr); } @@ -1755,12 +1744,12 @@ SmallVector OrigRegs; if (LIS) { OrigRegs.push_back(MI->getOperand(0).getReg()); - for (unsigned i = 1; i < NumOperands; i += 2) + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) OrigRegs.push_back(MI->getOperand(i).getReg()); } bool DefEmitted = false; - for (unsigned i = 1; i < NumOperands; i += 2) { + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { MachineOperand &UseMO = MI->getOperand(i); unsigned SrcReg = UseMO.getReg(); unsigned SubIdx = MI->getOperand(i+1).getImm(); @@ -1772,7 +1761,7 @@ // might insert a COPY that uses SrcReg after is was killed. bool isKill = UseMO.isKill(); if (isKill) - for (unsigned j = i + 2; j < NumOperands; j += 2) + for (unsigned j = i + 2; j < e; j += 2) if (MI->getOperand(j).getReg() == SrcReg) { MI->getOperand(j).setIsKill(); UseMO.setIsKill(false); @@ -1786,9 +1775,6 @@ .addReg(DstReg, RegState::Define, SubIdx) .addOperand(UseMO); - for (unsigned j = 0; j < NumTrailingImplicit; ++j) - CopyMI->addOperand(MI->getOperand(NumOperands + j)); - // The first def needs an flag because there is no live register // before it. if (!DefEmitted) { @@ -1811,7 +1797,7 @@ if (!DefEmitted) { DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); - for (int j = NumOperands - 1, ee = 0; j > ee; --j) + for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) MI->RemoveOperand(j); } else { DEBUG(dbgs() << "Eliminated: " << *MI); Index: lib/Target/AMDGPU/AMDGPUInstrInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -62,6 +62,9 @@ int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override; + bool isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const override; /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. /// Return -1 if the target-specific opcode for the pseudo instruction does Index: lib/Target/AMDGPU/AMDGPUInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -63,6 +63,23 @@ return (NumLoads <= 16 && (Offset1 - Offset0) < 64); } +bool AMDGPUInstrInfo::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + // Terminators and labels can't be scheduled around. + if (MI->isTerminator() || MI->isPosition()) + return true; + + // Target-independent instructions do not have an implicit-use of EXEC, even + // when they operate on VGPRs. Treating EXEC modifications as scheduling + // boundaries prevents incorrect movements of such instructions. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (MI->modifiesRegister(AMDGPU::EXEC, TRI)) + return true; + + return false; +} + int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); const MachineFrameInfo *MFI = MF.getFrameInfo(); Index: lib/Target/AMDGPU/SIWholeQuadMode.cpp =================================================================== --- lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -374,10 +374,6 @@ if (TRI->isSGPRReg(*MRI, Op.getReg())) { // SGPR instructions are not affected by EXEC continue; - } else { - // Generic instructions on VGPRs must be marked as implicitly using - // EXEC or subsequent passes might reschedule them incorrectly. - MI.addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); } } } @@ -453,21 +449,6 @@ // For a shader that needs only WQM, we can just set it once. BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC).addReg(AMDGPU::EXEC); - - for (MachineInstr &MI : Entry) { - if (TargetInstrInfo::isGenericOpcode(MI.getOpcode()) && - MI.getNumExplicitOperands() >= 1) { - const MachineOperand &Op = MI.getOperand(0); - if (Op.isReg()) { - if (!TRI->isSGPRReg(*MRI, Op.getReg())) { - // Generic instructions on VGPRs must be marked as implicitly using - // EXEC or subsequent passes might reschedule them incorrectly. - MI.addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); - } - } - } - } - return true; } Index: test/CodeGen/AMDGPU/si-scheduler.ll =================================================================== --- test/CodeGen/AMDGPU/si-scheduler.ll +++ test/CodeGen/AMDGPU/si-scheduler.ll @@ -3,9 +3,9 @@ ; The test checks the "si" machine scheduler pass works correctly. ; CHECK-LABEL: {{^}}main: +; CHECK: s_wqm ; CHECK: s_load_dwordx4 ; CHECK: s_load_dwordx8 -; CHECK: s_wqm ; CHECK: s_waitcnt lgkmcnt(0) ; CHECK: image_sample ; CHECK: s_waitcnt vmcnt(0)