diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -32,7 +32,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { public: - typedef function_ref IsHazardFn; + typedef function_ref IsHazardFn; private: // Distinguish if we are called from scheduler or hazard recognizer diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -387,23 +387,22 @@ // Helper Functions //===----------------------------------------------------------------------===// -typedef function_ref IsExpiredFn; +typedef function_ref IsExpiredFn; // Returns a minimum wait states since \p I walking all predecessors. // Only scans until \p IsExpired does not return true. // Can only be run in a hazard recognizer mode. static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, - MachineBasicBlock *MBB, - MachineBasicBlock::reverse_instr_iterator I, - int WaitStates, - IsExpiredFn IsExpired, + const MachineBasicBlock *MBB, + MachineBasicBlock::const_reverse_instr_iterator I, + int WaitStates, IsExpiredFn IsExpired, DenseSet &Visited) { for (auto E = MBB->instr_rend(); I != E; ++I) { // Don't add WaitStates for parent BUNDLE instructions. if (I->isBundle()) continue; - if (IsHazard(&*I)) + if (IsHazard(*I)) return WaitStates; if (I->isInlineAsm() || I->isMetaInstruction()) @@ -411,7 +410,7 @@ WaitStates += SIInstrInfo::getNumWaitStates(*I); - if (IsExpired(&*I, WaitStates)) + if (IsExpired(*I, WaitStates)) return std::numeric_limits::max(); } @@ -428,9 +427,6 @@ continue; MinWaitStates = Found ? std::min(MinWaitStates, W) : W; - if (IsExpired(nullptr, MinWaitStates)) - return MinWaitStates; - Found = true; } @@ -441,8 +437,7 @@ } static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, - MachineInstr *MI, - IsExpiredFn IsExpired) { + const MachineInstr *MI, IsExpiredFn IsExpired) { DenseSet Visited; return getWaitStatesSince(IsHazard, MI->getParent(), std::next(MI->getReverseIterator()), @@ -451,7 +446,7 @@ int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) { if (IsHazardRecognizerMode) { - auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) { + auto IsExpiredFn = [Limit](const MachineInstr &, int WaitStates) { return WaitStates >= Limit; }; return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn); @@ -460,7 +455,7 @@ int WaitStates = 0; for (MachineInstr *MI : EmittedInstrs) { if (MI) { - if (IsHazard(MI)) + if (IsHazard(*MI)) return WaitStates; if (MI->isInlineAsm()) @@ -479,8 +474,8 @@ int Limit) { const SIRegisterInfo *TRI = ST.getRegisterInfo(); - auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) { - return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI); + auto IsHazardFn = [IsHazardDef, TRI, Reg](const MachineInstr &MI) { + return IsHazardDef(MI) && MI.modifiesRegister(Reg, TRI); }; return getWaitStatesSince(IsHazardFn, Limit); @@ -488,8 +483,8 @@ int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard, int Limit) { - auto IsHazardFn = [IsHazard] (MachineInstr *MI) { - return isSSetReg(MI->getOpcode()) && IsHazard(MI); + auto IsHazardFn = [IsHazard](const MachineInstr &MI) { + return isSSetReg(MI.getOpcode()) && IsHazard(MI); }; return getWaitStatesSince(IsHazardFn, Limit); @@ -588,8 +583,12 @@ // A read of an SGPR by SMRD instruction requires 4 wait states when the // SGPR was written by a VALU instruction. int SmrdSgprWaitStates = 4; - auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); }; - auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); }; + auto IsHazardDefFn = [this](const MachineInstr &MI) { + return TII.isVALU(MI); + }; + auto IsBufferHazardDefFn = [this](const MachineInstr &MI) { + return TII.isSALU(MI); + }; bool IsBufferSMRD = TII.isBufferSMRD(*SMRD); @@ -629,7 +628,9 @@ // A read of an SGPR by a VMEM instruction requires 5 wait states when the // SGPR was written by a VALU Instruction. const int VmemSgprWaitStates = 5; - auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); }; + auto IsHazardDefFn = [this](const MachineInstr &MI) { + return TII.isVALU(MI); + }; for (const MachineOperand &Use : VMEM->uses()) { if (!Use.isReg() || TRI.isVectorRegister(MF.getRegInfo(), Use.getReg())) continue; @@ -650,15 +651,18 @@ int DppVgprWaitStates = 2; int DppExecWaitStates = 5; int WaitStatesNeeded = 0; - auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; + auto IsHazardDefFn = [TII](const MachineInstr &MI) { + return TII->isVALU(MI); + }; for (const MachineOperand &Use : DPP->uses()) { if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) continue; int WaitStatesNeededForUse = - DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(), - [](MachineInstr *) { return true; }, - DppVgprWaitStates); + DppVgprWaitStates - getWaitStatesSinceDef( + Use.getReg(), + [](const MachineInstr &) { return true; }, + DppVgprWaitStates); WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); } @@ -676,7 +680,9 @@ // v_div_fmas requires 4 wait states after a write to vcc from a VALU // instruction. const int DivFMasWaitStates = 4; - auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; + auto IsHazardDefFn = [TII](const MachineInstr &MI) { + return TII->isVALU(MI); + }; int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn, DivFMasWaitStates); @@ -688,8 +694,8 @@ unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr); const int GetRegWaitStates = 2; - auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) { - return GetRegHWReg == getHWReg(TII, *MI); + auto IsHazardFn = [TII, GetRegHWReg](const MachineInstr &MI) { + return GetRegHWReg == getHWReg(TII, MI); }; int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates); @@ -701,8 +707,8 @@ unsigned HWReg = getHWReg(TII, *SetRegInstr); const int SetRegWaitStates = ST.getSetRegWaitStates(); - auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) { - return HWReg == getHWReg(TII, *MI); + auto IsHazardFn = [TII, HWReg](const MachineInstr &MI) { + return HWReg == getHWReg(TII, MI); }; int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates); return SetRegWaitStates - WaitStatesNeeded; @@ -770,10 +776,10 @@ if (!TRI->isVectorRegister(MRI, Def.getReg())) return WaitStatesNeeded; Register Reg = Def.getReg(); - auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) { - int DataIdx = createsVALUHazard(*MI); + auto IsHazardFn = [this, Reg, TRI](const MachineInstr &MI) { + int DataIdx = createsVALUHazard(MI); return DataIdx >= 0 && - TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg); + TRI->regsOverlap(MI.getOperand(DataIdx).getReg(), Reg); }; int WaitStatesNeededForDef = VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates); @@ -836,9 +842,7 @@ return 0; Register LaneSelectReg = LaneSelectOp->getReg(); - auto IsHazardFn = [TII] (MachineInstr *MI) { - return TII->isVALU(*MI); - }; + auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isVALU(MI); }; const int RWLaneWaitStates = 4; int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn, @@ -854,8 +858,8 @@ const int RFEWaitStates = 1; - auto IsHazardFn = [TII] (MachineInstr *MI) { - return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS; + auto IsHazardFn = [TII](const MachineInstr &MI) { + return getHWReg(TII, MI) == AMDGPU::Hwreg::ID_TRAPSTS; }; int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates); return RFEWaitStates - WaitStatesNeeded; @@ -864,9 +868,7 @@ int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) { const SIInstrInfo *TII = ST.getInstrInfo(); const int SMovRelWaitStates = 1; - auto IsHazardFn = [TII] (MachineInstr *MI) { - return TII->isSALU(*MI); - }; + auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isSALU(MI); }; return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, SMovRelWaitStates); } @@ -884,18 +886,12 @@ return false; const SIInstrInfo *TII = ST.getInstrInfo(); - auto IsHazardFn = [TII] (MachineInstr *MI) { - return TII->isVOPC(*MI); - }; + auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isVOPC(MI); }; - auto IsExpiredFn = [] (MachineInstr *MI, int) { - if (!MI) - return false; - unsigned Opc = MI->getOpcode(); - return SIInstrInfo::isVALU(*MI) && - Opc != AMDGPU::V_NOP_e32 && - Opc != AMDGPU::V_NOP_e64 && - Opc != AMDGPU::V_NOP_sdwa; + auto IsExpiredFn = [](const MachineInstr &MI, int) { + unsigned Opc = MI.getOpcode(); + return SIInstrInfo::isVALU(MI) && Opc != AMDGPU::V_NOP_e32 && + Opc != AMDGPU::V_NOP_e64 && Opc != AMDGPU::V_NOP_sdwa; }; if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == @@ -928,13 +924,14 @@ const SIRegisterInfo *TRI = ST.getRegisterInfo(); - auto IsHazardFn = [TRI, MI] (MachineInstr *I) { - if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) && - !SIInstrInfo::isFLAT(*I)) + auto IsHazardFn = [TRI, MI](const MachineInstr &I) { + if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I) && + !SIInstrInfo::isFLAT(I)) return false; for (const MachineOperand &Def : MI->defs()) { - MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI); + const MachineOperand *Op = + I.findRegisterUseOperand(Def.getReg(), false, TRI); if (!Op) continue; return true; @@ -942,12 +939,12 @@ return false; }; - auto IsExpiredFn = [](MachineInstr *MI, int) { - return MI && (SIInstrInfo::isVALU(*MI) || - (MI->getOpcode() == AMDGPU::S_WAITCNT && - !MI->getOperand(0).getImm()) || - (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && - MI->getOperand(0).getImm() == 0xffe3)); + auto IsExpiredFn = [](const MachineInstr &MI, int) { + return SIInstrInfo::isVALU(MI) || + (MI.getOpcode() == AMDGPU::S_WAITCNT && + !MI.getOperand(0).getImm()) || + (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && + MI.getOperand(0).getImm() == 0xffe3); }; if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == @@ -996,43 +993,41 @@ return false; const Register SDSTReg = SDST->getReg(); - auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) { - return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI); + auto IsHazardFn = [SDSTReg, TRI](const MachineInstr &I) { + return SIInstrInfo::isSMRD(I) && I.readsRegister(SDSTReg, TRI); }; - auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) { - if (MI) { - if (TII->isSALU(*MI)) { - switch (MI->getOpcode()) { - case AMDGPU::S_SETVSKIP: - case AMDGPU::S_VERSION: - case AMDGPU::S_WAITCNT_VSCNT: - case AMDGPU::S_WAITCNT_VMCNT: - case AMDGPU::S_WAITCNT_EXPCNT: - // These instructions cannot not mitigate the hazard. + auto IsExpiredFn = [TII, IV](const MachineInstr &MI, int) { + if (TII->isSALU(MI)) { + switch (MI.getOpcode()) { + case AMDGPU::S_SETVSKIP: + case AMDGPU::S_VERSION: + case AMDGPU::S_WAITCNT_VSCNT: + case AMDGPU::S_WAITCNT_VMCNT: + case AMDGPU::S_WAITCNT_EXPCNT: + // These instructions cannot not mitigate the hazard. + return false; + case AMDGPU::S_WAITCNT_LGKMCNT: + // Reducing lgkmcnt count to 0 always mitigates the hazard. + return (MI.getOperand(1).getImm() == 0) && + (MI.getOperand(0).getReg() == AMDGPU::SGPR_NULL); + case AMDGPU::S_WAITCNT: { + const int64_t Imm = MI.getOperand(0).getImm(); + AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm); + return (Decoded.LgkmCnt == 0); + } + default: + // SOPP instructions cannot mitigate the hazard. + if (TII->isSOPP(MI)) return false; - case AMDGPU::S_WAITCNT_LGKMCNT: - // Reducing lgkmcnt count to 0 always mitigates the hazard. - return (MI->getOperand(1).getImm() == 0) && - (MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL); - case AMDGPU::S_WAITCNT: { - const int64_t Imm = MI->getOperand(0).getImm(); - AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm); - return (Decoded.LgkmCnt == 0); - } - default: - // SOPP instructions cannot mitigate the hazard. - if (TII->isSOPP(*MI)) - return false; - // At this point the SALU can be assumed to mitigate the hazard - // because either: - // (a) it is independent of the at risk SMEM (breaking chain), - // or - // (b) it is dependent on the SMEM, in which case an appropriate - // s_waitcnt lgkmcnt _must_ exist between it and the at risk - // SMEM instruction. - return true; - } + // At this point the SALU can be assumed to mitigate the hazard + // because either: + // (a) it is independent of the at risk SMEM (breaking chain), + // or + // (b) it is dependent on the SMEM, in which case an appropriate + // s_waitcnt lgkmcnt _must_ exist between it and the at risk + // SMEM instruction. + return true; } } return false; @@ -1056,25 +1051,23 @@ if (!MI->modifiesRegister(AMDGPU::EXEC, TRI)) return false; - auto IsHazardFn = [TRI] (MachineInstr *I) { - if (SIInstrInfo::isVALU(*I)) + auto IsHazardFn = [TRI](const MachineInstr &I) { + if (SIInstrInfo::isVALU(I)) return false; - return I->readsRegister(AMDGPU::EXEC, TRI); + return I.readsRegister(AMDGPU::EXEC, TRI); }; const SIInstrInfo *TII = ST.getInstrInfo(); - auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) { - if (!MI) - return false; - if (SIInstrInfo::isVALU(*MI)) { - if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst)) + auto IsExpiredFn = [TII, TRI](const MachineInstr &MI, int) { + if (SIInstrInfo::isVALU(MI)) { + if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) return true; - for (auto MO : MI->implicit_operands()) + for (auto MO : MI.implicit_operands()) if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) return true; } - if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && - (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe) + if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && + (MI.getOperand(0).getImm() & 0xfffe) == 0xfffe) return true; return false; }; @@ -1093,48 +1086,44 @@ if (!ST.hasLdsBranchVmemWARHazard()) return false; - auto IsHazardInst = [] (const MachineInstr *MI) { - if (SIInstrInfo::isDS(*MI)) + auto IsHazardInst = [](const MachineInstr &MI) { + if (SIInstrInfo::isDS(MI)) return 1; - if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI)) + if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) return 2; return 0; }; - auto InstType = IsHazardInst(MI); + auto InstType = IsHazardInst(*MI); if (!InstType) return false; - auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) { - return I && (IsHazardInst(I) || - (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT && - I->getOperand(0).getReg() == AMDGPU::SGPR_NULL && - !I->getOperand(1).getImm())); + auto IsExpiredFn = [&IsHazardInst](const MachineInstr &I, int) { + return IsHazardInst(I) || (I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT && + I.getOperand(0).getReg() == AMDGPU::SGPR_NULL && + !I.getOperand(1).getImm()); }; - auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) { - if (!I->isBranch()) + auto IsHazardFn = [InstType, &IsHazardInst](const MachineInstr &I) { + if (!I.isBranch()) return false; - auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) { + auto IsHazardFn = [InstType, IsHazardInst](const MachineInstr &I) { auto InstType2 = IsHazardInst(I); return InstType2 && InstType != InstType2; }; - auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) { - if (!I) - return false; - + auto IsExpiredFn = [InstType, &IsHazardInst](const MachineInstr &I, int) { auto InstType2 = IsHazardInst(I); if (InstType == InstType2) return true; - return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT && - I->getOperand(0).getReg() == AMDGPU::SGPR_NULL && - !I->getOperand(1).getImm(); + return I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT && + I.getOperand(0).getReg() == AMDGPU::SGPR_NULL && + !I.getOperand(1).getImm(); }; - return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) != + return ::getWaitStatesSince(IsHazardFn, &I, IsExpiredFn) != std::numeric_limits::max(); }; @@ -1165,12 +1154,12 @@ if (!Offset || (Offset->getImm() & 6) == 0) return 0; - auto IsHazardFn = [TII] (MachineInstr *I) { - if (!SIInstrInfo::isMIMG(*I)) + auto IsHazardFn = [TII](const MachineInstr &I) { + if (!SIInstrInfo::isMIMG(I)) return false; - const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode()); + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I.getOpcode()); return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA && - TII->getInstSizeInBytes(*I) >= 16; + TII->getInstSizeInBytes(I) >= 16; }; return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1); @@ -1182,17 +1171,17 @@ if (MI->getOpcode() != AMDGPU::S_DENORM_MODE) return 0; - auto IsHazardFn = [] (MachineInstr *I) { - if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I)) + auto IsHazardFn = [](const MachineInstr &I) { + if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I)) return false; - return SIInstrInfo::isFPAtomic(*I); + return SIInstrInfo::isFPAtomic(I); }; - auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) { - if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI)) + auto IsExpiredFn = [](const MachineInstr &MI, int WaitStates) { + if (WaitStates >= 3 || SIInstrInfo::isVALU(MI)) return true; - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { case AMDGPU::S_WAITCNT: case AMDGPU::S_WAITCNT_VSCNT: case AMDGPU::S_WAITCNT_VMCNT: @@ -1207,7 +1196,6 @@ return false; }; - return FPAtomicToDenormModeWaitStates - ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn); } @@ -1222,8 +1210,8 @@ int WaitStatesNeeded = 0; unsigned Opc = MI->getOpcode(); - auto IsVALUFn = [] (MachineInstr *MI) { - return SIInstrInfo::isVALU(*MI); + auto IsVALUFn = [](const MachineInstr &MI) { + return SIInstrInfo::isVALU(MI); }; if (Opc != AMDGPU::V_ACCVGPR_READ_B32_e64) { // MFMA or v_accvgpr_write @@ -1252,10 +1240,10 @@ } } - auto IsMFMAFn = [] (MachineInstr *MI) { - return SIInstrInfo::isMAI(*MI) && - MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && - MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; + auto IsMFMAFn = [](const MachineInstr &MI) { + return SIInstrInfo::isMAI(MI) && + MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && + MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; }; for (const MachineOperand &Op : MI->explicit_operands()) { @@ -1277,15 +1265,15 @@ Register Reg = Op.getReg(); unsigned HazardDefLatency = 0; - auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this] - (MachineInstr *MI) { + auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, + this](const MachineInstr &MI) { if (!IsMFMAFn(MI)) return false; - Register DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if (DstReg == Reg) return false; - HazardDefLatency = std::max(HazardDefLatency, - TSchedModel.computeInstrLatency(MI)); + HazardDefLatency = + std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&MI)); return TRI.regsOverlap(DstReg, Reg); }; @@ -1324,10 +1312,10 @@ if (WaitStatesNeeded == MaxWaitStates) return WaitStatesNeeded; // Early exit. - auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) { - if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64) + auto IsAccVgprWriteFn = [Reg, this](const MachineInstr &MI) { + if (MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64) return false; - Register DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); return TRI.regsOverlap(Reg, DstReg); }; @@ -1356,13 +1344,13 @@ Register DstReg = MI->getOperand(0).getReg(); unsigned HazardDefLatency = 0; - auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this] - (MachineInstr *MI) { + auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, + this](const MachineInstr &MI) { if (!IsMFMAFn(MI)) return false; - Register Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg(); - HazardDefLatency = std::max(HazardDefLatency, - TSchedModel.computeInstrLatency(MI)); + Register Reg = TII.getNamedOperand(MI, AMDGPU::OpName::src2)->getReg(); + HazardDefLatency = + std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&MI)); return TRI.regsOverlap(Reg, DstReg); }; @@ -1389,22 +1377,21 @@ int WaitStatesNeeded = 0; unsigned Opc = MI->getOpcode(); - auto IsMFMAFn = [] (MachineInstr *MI) { - return SIInstrInfo::isMAI(*MI) && - MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && - MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; + auto IsMFMAFn = [](const MachineInstr &MI) { + return SIInstrInfo::isMAI(MI) && + MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && + MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; }; - auto IsLegacyVALUFn = [&IsMFMAFn] (MachineInstr *MI) { - return SIInstrInfo::isVALU(*MI) && !IsMFMAFn(MI); + auto IsLegacyVALUFn = [&IsMFMAFn](const MachineInstr &MI) { + return SIInstrInfo::isVALU(MI) && !IsMFMAFn(MI); }; - auto IsLegacyVALUNotDotFn = [&IsMFMAFn] (MachineInstr *MI) { - return SIInstrInfo::isVALU(*MI) && - !IsMFMAFn(MI) && !SIInstrInfo::isDOT(*MI); + auto IsLegacyVALUNotDotFn = [&IsMFMAFn](const MachineInstr &MI) { + return SIInstrInfo::isVALU(MI) && !IsMFMAFn(MI) && !SIInstrInfo::isDOT(MI); }; - if (!IsMFMAFn(MI)) + if (!IsMFMAFn(*MI)) return WaitStatesNeeded; const int VALUWritesExecWaitStates = 4; @@ -1438,17 +1425,17 @@ continue; unsigned Reg = Use.getReg(); bool FullReg; - MachineInstr *MI1; + const MachineInstr *MI1; - auto IsOverlappedDGEMMorXDLFn = [Reg, &IsMFMAFn, &FullReg, &MI1, this] - (MachineInstr *MI) { + auto IsOverlappedDGEMMorXDLFn = [Reg, &IsMFMAFn, &FullReg, &MI1, + this](const MachineInstr &MI) { if (!IsMFMAFn(MI)) return false; - if (!isDGEMM(MI->getOpcode()) && !isXDL(ST, *MI)) + if (!isDGEMM(MI.getOpcode()) && !isXDL(ST, MI)) return false; - Register DstReg = MI->getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); FullReg = (DstReg == Reg); - MI1 = MI; + MI1 = &MI; return TRI.regsOverlap(DstReg, Reg); }; @@ -1549,8 +1536,8 @@ int WaitStatesNeeded = 0; - auto IsAccVgprReadFn = [] (MachineInstr *MI) { - return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64; + auto IsAccVgprReadFn = [](const MachineInstr &MI) { + return MI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64; }; for (const MachineOperand &Op : MI->explicit_uses()) { @@ -1570,12 +1557,12 @@ if (WaitStatesNeeded == MaxWaitStates) return WaitStatesNeeded; // Early exit. - auto IsVALUAccVgprRdWrCheckFn = [Reg, this](MachineInstr *MI) { - if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64 && - MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64) + auto IsVALUAccVgprRdWrCheckFn = [Reg, this](const MachineInstr &MI) { + if (MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64 && + MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64) return false; - auto IsVALUFn = [] (MachineInstr *MI) { - return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI); + auto IsVALUFn = [](const MachineInstr &MI) { + return SIInstrInfo::isVALU(MI) && !SIInstrInfo::isMAI(MI); }; return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) < std::numeric_limits::max(); @@ -1593,18 +1580,18 @@ if (!ST.hasGFX90AInsts()) return 0; - auto IsMFMAFn = [] (MachineInstr *MI) -> bool { - return SIInstrInfo::isMAI(*MI) && - MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && - MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; + auto IsMFMAFn = [](const MachineInstr &MI) -> bool { + return SIInstrInfo::isMAI(MI) && + MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && + MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; }; - auto IsDGEMMFn = [] (MachineInstr *MI) -> bool { - return isDGEMM(MI->getOpcode()); + auto IsDGEMMFn = [](const MachineInstr &MI) -> bool { + return isDGEMM(MI.getOpcode()); }; // This is checked in checkMAIHazards90A() - if (IsMFMAFn(MI)) + if (IsMFMAFn(*MI)) return 0; int WaitStatesNeeded = 0; @@ -1615,23 +1602,24 @@ SIInstrInfo::isEXP(*MI); bool IsVALU = SIInstrInfo::isVALU(*MI); - MachineInstr *MFMA = nullptr; + const MachineInstr *MFMA = nullptr; unsigned Reg; - auto IsDGEMMorXDLWriteFn = [&Reg, &IsMFMAFn, &MFMA, this] (MachineInstr *MI) { - if (!IsMFMAFn(MI) || !TRI.regsOverlap(MI->getOperand(0).getReg(), Reg)) + auto IsDGEMMorXDLWriteFn = [&Reg, &IsMFMAFn, &MFMA, + this](const MachineInstr &MI) { + if (!IsMFMAFn(MI) || !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg)) return false; - if (!isDGEMM(MI->getOpcode()) && !isXDL(ST, *MI)) + if (!isDGEMM(MI.getOpcode()) && !isXDL(ST, MI)) return false; - MFMA = MI; + MFMA = &MI; return true; }; - MachineInstr *DOT = nullptr; - auto IsDotWriteFn = [&Reg, &DOT, this] (MachineInstr *MI) { - if (!SIInstrInfo::isDOT(*MI) || - !TRI.regsOverlap(MI->getOperand(0).getReg(), Reg)) + const MachineInstr *DOT = nullptr; + auto IsDotWriteFn = [&Reg, &DOT, this](const MachineInstr &MI) { + if (!SIInstrInfo::isDOT(MI) || + !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg)) return false; - DOT = MI; + DOT = &MI; return true; }; @@ -1777,18 +1765,19 @@ break; } - auto IsSMFMAReadAsCFn = [&Reg, &IsMFMAFn, &MFMA, this] - (MachineInstr *MI) { - if (!IsMFMAFn(MI) || isDGEMM(MI->getOpcode()) || - !MI->readsRegister(Reg, &TRI)) + auto IsSMFMAReadAsCFn = [&Reg, &IsMFMAFn, &MFMA, + this](const MachineInstr &MI) { + if (!IsMFMAFn(MI) || isDGEMM(MI.getOpcode()) || + !MI.readsRegister(Reg, &TRI)) return false; - MachineOperand *SrcC = TII.getNamedOperand(*MI, AMDGPU::OpName::src2); + const MachineOperand *SrcC = + TII.getNamedOperand(MI, AMDGPU::OpName::src2); assert(SrcC); if (!SrcC->isReg() || !TRI.regsOverlap(SrcC->getReg(), Reg)) return false; - MFMA = MI; + MFMA = &MI; return true; }; @@ -1821,18 +1810,18 @@ if (!SU->isInstr()) return false; - MachineInstr *MAI = nullptr; - auto IsMFMAFn = [&MAI] (MachineInstr *MI) { + const MachineInstr *MAI = nullptr; + auto IsMFMAFn = [&MAI](const MachineInstr &MI) { MAI = nullptr; - if (SIInstrInfo::isMAI(*MI) && - MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && - MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64) - MAI = MI; + if (SIInstrInfo::isMAI(MI) && + MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && + MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64) + MAI = &MI; return MAI != nullptr; }; MachineInstr *MI = SU->getInstr(); - if (IsMFMAFn(MI)) { + if (IsMFMAFn(*MI)) { int W = getWaitStatesSince(IsMFMAFn, 16); if (MAI) return W < (int)TSchedModel.computeInstrLatency(MAI);