Please use GitHub pull requests for new patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Show First 20 Lines • Show All 2,020 Lines • ▼ Show 20 Lines | auto IsOverlappedMFMAFn = [Reg, &HazardDefLatency, | ||||
std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&MI)); | std::max(HazardDefLatency, TSchedModel.computeInstrLatency(&MI)); | ||||
return TRI.regsOverlap(DstReg, Reg); | return TRI.regsOverlap(DstReg, Reg); | ||||
}; | }; | ||||
int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, | int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, | ||||
MaxWaitStates); | MaxWaitStates); | ||||
int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates; | int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates; | ||||
int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); | int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); | ||||
int OpNo = MI->getOperandNo(&Op); | int OpNo = Op.getOperandNo(); | ||||
if (OpNo == SrcCIdx) { | if (OpNo == SrcCIdx) { | ||||
NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates; | NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates; | ||||
} else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64) { | } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64) { | ||||
switch (HazardDefLatency) { | switch (HazardDefLatency) { | ||||
case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates; | case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates; | ||||
break; | break; | ||||
case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates; | case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates; | ||||
break; | break; | ||||
▲ Show 20 Lines • Show All 162 Lines • ▼ Show 20 Lines | WaitStatesNeededForUse = LegacyVALUNotDotWritesVGPRWaitStates - | ||||
getWaitStatesSinceDef(Reg, IsLegacyVALUNotDotFn, MaxWaitStates); | getWaitStatesSinceDef(Reg, IsLegacyVALUNotDotFn, MaxWaitStates); | ||||
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); | WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); | ||||
int NumWaitStates = | int NumWaitStates = | ||||
getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, MaxWaitStates); | getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, MaxWaitStates); | ||||
if (NumWaitStates == std::numeric_limits<int>::max()) | if (NumWaitStates == std::numeric_limits<int>::max()) | ||||
continue; | continue; | ||||
int OpNo = MI->getOperandNo(&Use); | int OpNo = Use.getOperandNo(); | ||||
unsigned Opc1 = MI1->getOpcode(); | unsigned Opc1 = MI1->getOpcode(); | ||||
int NeedWaitStates = 0; | int NeedWaitStates = 0; | ||||
if (OpNo == SrcCIdx) { | if (OpNo == SrcCIdx) { | ||||
if (!isDGEMM(Opc) && (!ST.hasGFX940Insts() && isDGEMM(Opc1))) { | if (!isDGEMM(Opc) && (!ST.hasGFX940Insts() && isDGEMM(Opc1))) { | ||||
NeedWaitStates = 0; | NeedWaitStates = 0; | ||||
} else if (FullReg) { | } else if (FullReg) { | ||||
if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || | if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || | ||||
Opc == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64) && | Opc == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64) && | ||||
▲ Show 20 Lines • Show All 634 Lines • Show Last 20 Lines |