Index: llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp =================================================================== --- llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1442,12 +1442,10 @@ bool FullReg; const MachineInstr *MI1; - auto IsOverlappedDGEMMorXDLFn = [Reg, &IsMFMAFn, &FullReg, &MI1, - this](const MachineInstr &MI) { + auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &FullReg, &MI1, + this](const MachineInstr &MI) { if (!IsMFMAFn(MI)) return false; - if (!isDGEMM(MI.getOpcode()) && !isXDL(ST, MI)) - return false; Register DstReg = MI.getOperand(0).getReg(); FullReg = (DstReg == Reg); MI1 = &MI; @@ -1458,8 +1456,8 @@ getWaitStatesSinceDef(Reg, IsLegacyVALUNotDotFn, MaxWaitStates); WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); - int NumWaitStates = getWaitStatesSinceDef(Reg, IsOverlappedDGEMMorXDLFn, - MaxWaitStates); + int NumWaitStates = + getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, MaxWaitStates); if (NumWaitStates == std::numeric_limits::max()) continue; @@ -1619,12 +1617,9 @@ const MachineInstr *MFMA = nullptr; unsigned Reg; - auto IsDGEMMorXDLWriteFn = [&Reg, &IsMFMAFn, &MFMA, - this](const MachineInstr &MI) { + auto IsMFMAWriteFn = [&Reg, &IsMFMAFn, &MFMA, this](const MachineInstr &MI) { if (!IsMFMAFn(MI) || !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg)) return false; - if (!isDGEMM(MI.getOpcode()) && !isXDL(ST, MI)) - return false; MFMA = &MI; return true; }; @@ -1675,8 +1670,8 @@ } MFMA = nullptr; - WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDGEMMorXDLWriteFn, - MaxWaitStates); + WaitStatesSinceDef = + getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates); if (!MFMA) continue; @@ -1750,8 +1745,8 @@ WaitStatesSinceDef); MFMA = nullptr; - WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDGEMMorXDLWriteFn, - MaxWaitStates); + WaitStatesSinceDef = + getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates); if (MFMA) { int NeedWaitStates = MaxWaitStates; switch (TSchedModel.computeInstrLatency(MFMA)) {