Diff 449090

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Show First 20 Lines • Show All 2,262 Lines • ▼ Show 20 Lines	int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
auto IsDGEMMFn = [](const MachineInstr &MI) -> bool {		auto IsDGEMMFn = [](const MachineInstr &MI) -> bool {
return isDGEMM(MI.getOpcode());		return isDGEMM(MI.getOpcode());
};		};

// This is checked in checkMAIHazards90A()		// This is checked in checkMAIHazards90A()
if (SIInstrInfo::isMFMA(*MI))		if (SIInstrInfo::isMFMA(*MI))
return 0;		return 0;

		const MachineRegisterInfo &MRI = MF.getRegInfo();

int WaitStatesNeeded = 0;		int WaitStatesNeeded = 0;

bool IsMemOrExport = SIInstrInfo::isVMEM(*MI) \|\|		bool IsMem = SIInstrInfo::isVMEM(*MI) \|\|
SIInstrInfo::isFLAT(*MI) \|\|		SIInstrInfo::isFLAT(*MI) \|\|
SIInstrInfo::isDS(*MI) \|\|		SIInstrInfo::isDS(*MI);
SIInstrInfo::isEXP(*MI);		bool IsMemOrExport = IsMem \|\| SIInstrInfo::isEXP(*MI);
bool IsVALU = SIInstrInfo::isVALU(*MI);		bool IsVALU = SIInstrInfo::isVALU(*MI);

const MachineInstr *MFMA = nullptr;		const MachineInstr *MFMA = nullptr;
unsigned Reg;		unsigned Reg;
auto IsMFMAWriteFn = [&Reg, &MFMA, this](const MachineInstr &MI) {		auto IsMFMAWriteFn = [&Reg, &MFMA, this](const MachineInstr &MI) {
if (!SIInstrInfo::isMFMA(MI) \|\|		if (!SIInstrInfo::isMFMA(MI) \|\|
!TRI.regsOverlap(MI.getOperand(0).getReg(), Reg))		!TRI.regsOverlap(MI.getOperand(0).getReg(), Reg))
return false;		return false;
MFMA = &MI;		MFMA = &MI;
return true;		return true;
};		};

const MachineInstr *DOT = nullptr;		const MachineInstr *DOT = nullptr;
auto IsDotWriteFn = [&Reg, &DOT, this](const MachineInstr &MI) {		auto IsDotWriteFn = [&Reg, &DOT, this](const MachineInstr &MI) {
if (!SIInstrInfo::isDOT(MI) \|\|		if (!SIInstrInfo::isDOT(MI) \|\|
!TRI.regsOverlap(MI.getOperand(0).getReg(), Reg))		!TRI.regsOverlap(MI.getOperand(0).getReg(), Reg))
return false;		return false;
DOT = &MI;		DOT = &MI;
return true;		return true;
};		};

		bool DGEMMAfterVALUWrite = false;
		auto IsDGEMMHazard = [&DGEMMAfterVALUWrite, this](const MachineInstr &MI) {
		// Found DGEMM on reverse traversal to def.
		if (isDGEMM(MI.getOpcode()))
		DGEMMAfterVALUWrite = true;

		// Only hazard if register is defined by a VALU and a DGEMM is found after
		// after the def.
		if (!TII.isVALU(MI) \|\| !DGEMMAfterVALUWrite)
		return false;

		return true;
		};

int SrcCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),		int SrcCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::src2);		AMDGPU::OpName::src2);

if (IsMemOrExport \|\| IsVALU) {		if (IsMemOrExport \|\| IsVALU) {
const int SMFMA4x4WriteVgprVALUMemExpReadWaitStates = 5;		const int SMFMA4x4WriteVgprVALUMemExpReadWaitStates = 5;
const int SMFMA16x16WriteVgprVALUMemExpReadWaitStates = 11;		const int SMFMA16x16WriteVgprVALUMemExpReadWaitStates = 11;
const int SMFMA32x32WriteVgprVALUMemExpReadWaitStates = 19;		const int SMFMA32x32WriteVgprVALUMemExpReadWaitStates = 19;
const int GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates = 4;		const int GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates = 4;
const int GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates = 6;		const int GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates = 6;
const int GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates = 10;		const int GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates = 10;
const int GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates = 18;		const int GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates = 18;
const int GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates = 5;		const int GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates = 5;
const int GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates = 7;		const int GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates = 7;
const int GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates = 11;		const int GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates = 11;
const int GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates = 19;		const int GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates = 19;
const int DMFMA4x4WriteVgprMemExpReadWaitStates = 9;		const int DMFMA4x4WriteVgprMemExpReadWaitStates = 9;
const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18;		const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18;
const int DMFMA4x4WriteVgprVALUReadWaitStates = 6;		const int DMFMA4x4WriteVgprVALUReadWaitStates = 6;
const int DMFMA16x16WriteVgprVALUReadWaitStates = 11;		const int DMFMA16x16WriteVgprVALUReadWaitStates = 11;
const int DotWriteSameDotReadSrcAB = 3;		const int DotWriteSameDotReadSrcAB = 3;
const int DotWriteDifferentVALURead = 3;		const int DotWriteDifferentVALURead = 3;
		const int DMFMABetweenVALUWriteVMEMRead = 2;
const int MaxWaitStates = 19;		const int MaxWaitStates = 19;

		rampitecUnsubmitted Done Reply Inline Actions Move it one line above, MaxWaitStates is always last. rampitec: Move it one line above, MaxWaitStates is always last.
for (const MachineOperand &Use : MI->explicit_uses()) {		for (const MachineOperand &Use : MI->explicit_uses()) {
if (!Use.isReg())		if (!Use.isReg())
continue;		continue;
Reg = Use.getReg();		Reg = Use.getReg();

DOT = nullptr;		DOT = nullptr;
int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDotWriteFn,		int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDotWriteFn,
MaxWaitStates);		MaxWaitStates);
if (DOT) {		if (DOT) {
int NeedWaitStates = 0;		int NeedWaitStates = 0;
if (DOT->getOpcode() == MI->getOpcode()) {		if (DOT->getOpcode() == MI->getOpcode()) {
if (&Use - &MI->getOperand(0) != SrcCIdx)		if (&Use - &MI->getOperand(0) != SrcCIdx)
NeedWaitStates = DotWriteSameDotReadSrcAB;		NeedWaitStates = DotWriteSameDotReadSrcAB;
} else {		} else {
NeedWaitStates = DotWriteDifferentVALURead;		NeedWaitStates = DotWriteDifferentVALURead;
}		}

int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;		int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);		WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
}		}

		// Workaround for HW data hazard bug observed only in GFX90A. When there
		// is a DGEMM instruction in-between a VALU and a VMEM instruction it
		// causes the SQ to incorrectly not insert two wait states between the two
		// instructions needed to avoid data hazard.
		if (IsMem && ST.hasGFX90AInsts() && !ST.hasGFX940Insts()) {
		DGEMMAfterVALUWrite = false;
		rampitecUnsubmitted Done Reply Inline Actions There is already `Reg` variable available here. rampitec: There is already `Reg` variable available here.
		rampitecUnsubmitted Done Reply Inline Actions Hoist MRI initialization out of the loop. rampitec: Hoist MRI initialization out of the loop.
		if (TRI.isVectorRegister(MRI, Reg)) {
		int WaitStatesNeededForUse =
		rampitecUnsubmitted Not Done Reply Inline Actions In fact you are using getWaitStatesSinceDef, so all you you need to check there is a DGEMM on path. Instead of the whole this function you could just `return isDGEMM(MI.getOpcode())`. No even need in a custom IsExpired or capturing anything. rampitec: In fact you are using getWaitStatesSinceDef, so all you you need to check there is a DGEMM on…
		vangthaoAuthorUnsubmitted Done Reply Inline Actions I believe `getWaitStatesSinceDef` also includes a check if the MI modifies the reg. Since the DGEMM instruction itself does not modify the reg, it will never return as a hazard. I have simplified the hazard check to include if we saw a DGEMM and if reg is defined by VALU. vangthao: I believe `getWaitStatesSinceDef` also includes a check if the MI modifies the reg. Since the…
		DMFMABetweenVALUWriteVMEMRead -
		getWaitStatesSinceDef(Reg, IsDGEMMHazard,
		DMFMABetweenVALUWriteVMEMRead);

		WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
		}
		}
		rampitecUnsubmitted Not Done Reply Inline Actions This is an overkill and way insufficient at the same time (what if an instruction is a meta/debug?). `getWaitStatesSince` will traverse instructions backwards one by one and take care about all of that. It will either see DGEMM before the offending VALU or not. In fact since it is only 2 waitstates DGEMM must be the first instruction it will see, otherwise you can immediately bail. To bail immediately you could use custom IsExpired function, or just capture a variable to detect a DGEMM. Whatever you prefer. rampitec: This is an overkill and way insufficient at the same time (what if an instruction is a…

MFMA = nullptr;		MFMA = nullptr;
WaitStatesSinceDef =		WaitStatesSinceDef =
getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates);		getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates);
if (!MFMA)		if (!MFMA)
continue;		continue;

unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA);		unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA);
int NeedWaitStates = MaxWaitStates;		int NeedWaitStates = MaxWaitStates;
▲ Show 20 Lines • Show All 191 Lines • ▼ Show 20 Lines	bool GCNHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
const MachineInstr *MAI = nullptr;		const MachineInstr *MAI = nullptr;

auto IsMFMAFn = [&MAI](const MachineInstr &MI) {		auto IsMFMAFn = [&MAI](const MachineInstr &MI) {
MAI = nullptr;		MAI = nullptr;
if (SIInstrInfo::isMFMA(MI))		if (SIInstrInfo::isMFMA(MI))
MAI = &MI;		MAI = &MI;
return MAI != nullptr;		return MAI != nullptr;
};		};

		rampitecUnsubmitted Not Done Reply Inline Actions There can be a block with fallthrough, and the next block may start with DGEMM. This check for MBB->end() does not work in this case. rampitec: There can be a block with fallthrough, and the next block may start with DGEMM. This check for…
MachineInstr *MI = SU->getInstr();		MachineInstr *MI = SU->getInstr();
if (IsMFMAFn(*MI)) {		if (IsMFMAFn(*MI)) {
int W = getWaitStatesSince(IsMFMAFn, 16);		int W = getWaitStatesSince(IsMFMAFn, 16);
if (MAI)		if (MAI)
return W < (int)TSchedModel.computeInstrLatency(MAI);		return W < (int)TSchedModel.computeInstrLatency(MAI);
}		}
		rampitecUnsubmitted Done Reply Inline Actions There is already a loop across uses above at line 2317, you could just add the check there. rampitec: There is already a loop across uses above at line 2317, you could just add the check there.

return false;		return false;
}		}

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir

	Show First 20 Lines • Show All 1,302 Lines • ▼ Show 20 Lines
	# GCN-NEXT: S_NOP 1			# GCN-NEXT: S_NOP 1
	# GCN-NEXT: V_FMAC_F64			# GCN-NEXT: V_FMAC_F64
	name: dgemm_accvgr_to_fmac64			name: dgemm_accvgr_to_fmac64
	body: \|			body: \|
	bb.0:			bb.0:
	$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec			$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
	$vgpr4_vgpr5 = V_FMAC_F64_e32 $vgpr4_vgpr5, $vgpr4_vgpr5, $vgpr4_vgpr5, implicit $mode, implicit $exec			$vgpr4_vgpr5 = V_FMAC_F64_e32 $vgpr4_vgpr5, $vgpr4_vgpr5, $vgpr4_vgpr5, implicit $mode, implicit $exec
	...			...
				# GCN-LABEL: name: dgemm_between_valu_write_buffer_store
				# GCN: V_MOV_B32_e32
				# GCN-NEXT: V_MFMA
				# GCN-NEXT: S_NOP 0
				# GCN-NEXT: BUFFER_STORE_DWORD
				name: dgemm_between_valu_write_buffer_store
				body: \|
				bb.0:
				$vgpr0 = V_MOV_B32_e32 0, implicit $exec
				$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
				BUFFER_STORE_DWORDX2_OFFEN_exact $vgpr2_vgpr3, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
				...
				# GCN-LABEL: name: dgemm_between_valu_write_buffer_load
				# GCN: V_MOV_B32_e32
				# GCN-NEXT: V_MFMA
				# GCN-NEXT: S_NOP 0
				# GCN-NEXT: BUFFER_LOAD_DWORD
				name: dgemm_between_valu_write_buffer_load
				body: \|
				bb.0:
				$vgpr0 = V_MOV_B32_e32 0, implicit $exec
				$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
				$vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec
				...
				# GCN-LABEL: name: dgemm_between_valu_write_global_store
				# GCN: V_MOV_B32_e32
				# GCN-NEXT: V_MFMA
				# GCN-NEXT: S_NOP 0
				# GCN-NEXT: GLOBAL_STORE_DWORD

				name: dgemm_between_valu_write_global_store
				body: \|
				bb.0:
				$vgpr0 = V_MOV_B32_e32 0, implicit $exec
				$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
				GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec
				...
				# GCN-LABEL: name: dgemm_between_valu_write_global_load
				# GCN: V_MOV_B32_e32
				# GCN-NEXT: V_MOV_B32_e32
				# GCN-NEXT: V_MFMA
				# GCN-NEXT: S_NOP 0
				# GCN-NEXT: GLOBAL_LOAD_DWORD
				name: dgemm_between_valu_write_global_load
				body: \|
				bb.0:
				$vgpr0 = V_MOV_B32_e32 0, implicit $exec
				$vgpr1 = V_MOV_B32_e32 0, implicit $exec
				$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
				$vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec
				...
				# GCN-LABEL: name: dgemm_between_valu_write_ds_write
				# GCN: V_MOV_B32_e32
				# GCN-NEXT: V_MFMA
				# GCN-NEXT: S_NOP 0
				# GCN-NEXT: DS_WRITE_B32
				name: dgemm_between_valu_write_ds_write
				body: \|
				bb.0:
				$vgpr0 = V_MOV_B32_e32 0, implicit $exec
				$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
				DS_WRITE_B32 $vgpr1, $vgpr0, 0, 0, implicit $m0, implicit $mode, implicit $exec
				...
				# GCN-LABEL: name: dgemm_between_valu_write_ds_read
				# GCN: V_MOV_B32_e32
				# GCN-NEXT: V_MFMA
				# GCN-NEXT: S_NOP 0
				# GCN-NEXT: DS_READ_B32_gfx9
				name: dgemm_between_valu_write_ds_read
				body: \|
				bb.0:
				$vgpr0 = V_MOV_B32_e32 0, implicit $exec
				$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
				$vgpr1 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
				...
				# GCN-LABEL: name: dgemm_between_valu_write_flat_store
				# GCN: V_MOV_B32_e32
				# GCN-NEXT: V_MOV_B32_e32
				# GCN-NEXT: V_MFMA
				# GCN-NEXT: S_NOP 0
				# GCN-NEXT: FLAT_STORE_DWORD
				name: dgemm_between_valu_write_flat_store
				body: \|
				bb.0:
				$vgpr0 = V_MOV_B32_e32 0, implicit $exec
				$vgpr1 = V_MOV_B32_e32 0, implicit $exec
				$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
				FLAT_STORE_DWORD $vgpr0_vgpr1, $agpr2, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr
				...
				# GCN-LABEL: name: dgemm_between_valu_write_flat_load
				# GCN: V_MOV_B32_e32
				# GCN-NEXT: V_MOV_B32_e32
				# GCN-NEXT: V_MFMA
				# GCN-NEXT: S_NOP 0
				# GCN-NEXT: FLAT_LOAD_DWORD
				name: dgemm_between_valu_write_flat_load
				body: \|
				bb.0:
				$vgpr0 = V_MOV_B32_e32 0, implicit $exec
				$vgpr1 = V_MOV_B32_e32 0, implicit $exec
				$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
				$vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
				...
				# GCN-LABEL: name: dgemm_between_valu_write_scratch_store
				# GCN: V_MOV_B32_e32
				# GCN-NEXT: V_MFMA
				# GCN-NEXT: S_NOP 0
				# GCN-NEXT: SCRATCH_STORE_DWORD
				name: dgemm_between_valu_write_scratch_store
				body: \|
				bb.0:
				$vgpr0 = V_MOV_B32_e32 0, implicit $exec
				$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
				SCRATCH_STORE_DWORD $vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
				...
				# GCN-LABEL: name: dgemm_between_valu_write_scratch_load
				# GCN: V_MOV_B32_e32
				# GCN-NEXT: V_MFMA
				# GCN-NEXT: S_NOP 0
				# GCN-NEXT: SCRATCH_LOAD_DWORD
				name: dgemm_between_valu_write_scratch_load
				body: \|
				bb.0:
				$vgpr0 = V_MOV_B32_e32 0, implicit $exec
				$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
				$vgpr1 = SCRATCH_LOAD_DWORD undef $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
				...
				# GCN-LABEL: name: dgemm_between_valu_write_buffer_store_fallthrough1
				# GCN: V_MOV_B32_e32
				# GCN-NEXT: V_MFMA
				# GCN: bb.1:
				# GCN-NEXT: S_NOP
				# GCN-NEXT: BUFFER_STORE_DWORD
				name: dgemm_between_valu_write_buffer_store_fallthrough1
				body: \|
				bb.0:
				$vgpr0 = V_MOV_B32_e32 0, implicit $exec
				$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec

				bb.1:
				BUFFER_STORE_DWORDX2_OFFEN_exact $vgpr2_vgpr3, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
				...
				# GCN-LABEL: name: dgemm_between_valu_write_buffer_store_fallthrough2
				# GCN: V_MOV_B32_e32
				# GCN: bb.1:
				# GCN-NEXT: V_MFMA
				# GCN-NEXT: S_NOP
				# GCN-NEXT: BUFFER_STORE_DWORD
				name: dgemm_between_valu_write_buffer_store_fallthrough2
				body: \|
				bb.0:
				$vgpr0 = V_MOV_B32_e32 0, implicit $exec

				bb.1:
				$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
				BUFFER_STORE_DWORDX2_OFFEN_exact $vgpr2_vgpr3, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
				...
				# GCN-LABEL: name: dgemm_between_valu_write_buffer_store_fallthrough3
				# GCN: V_MOV_B32_e32
				# GCN: bb.1:
				# GCN: bb.2:
				# GCN-NEXT: V_MFMA
				# GCN-NEXT: S_NOP
				# GCN-NEXT: BUFFER_STORE_DWORD
				name: dgemm_between_valu_write_buffer_store_fallthrough3
				body: \|
				bb.0:
				$vgpr0 = V_MOV_B32_e32 0, implicit $exec

				bb.1:

				bb.2:
				$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
				BUFFER_STORE_DWORDX2_OFFEN_exact $vgpr2_vgpr3, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
				...

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

	Show First 20 Lines • Show All 2,010 Lines • ▼ Show 20 Lines
	# GCN: V_MFMA			# GCN: V_MFMA
	# GCN-NEXT: V_MOV_B32			# GCN-NEXT: V_MOV_B32
	name: nonxdl_sgemm32x32_mfma_read_vgpr_srcc_valu_write			name: nonxdl_sgemm32x32_mfma_read_vgpr_srcc_valu_write
	body: \|			body: \|
	bb.0:			bb.0:
	$vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_MFMA_F32_32X32X1F32_vgprcd_e64 $agpr26, $agpr28, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec			$vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_MFMA_F32_32X32X1F32_vgprcd_e64 $agpr26, $agpr28, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec
	$vgpr1 = V_MOV_B32_e32 0, implicit $exec			$vgpr1 = V_MOV_B32_e32 0, implicit $exec
	...			...
				# GCN-LABEL: name: dgemm_between_valu_write_buffer_store_no_snop
				# GCN: V_MOV_B32_e32
				# GCN-NEXT: V_MFMA_F64
				# GCN-NOT: S_NOP
				# GCN-NEXT: BUFFER_STORE_DWORD
				name: dgemm_between_valu_write_buffer_store_no_snop
				body: \|
				bb.0:
				$vgpr0 = V_MOV_B32_e32 0, implicit $exec
				$agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec
				BUFFER_STORE_DWORDX2_OFFEN_exact $vgpr2_vgpr3, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
				...

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Fix DGEMM hazard for GFX90a
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 449090

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Fix DGEMM hazard for GFX90aClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 449090

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

[AMDGPU] Fix DGEMM hazard for GFX90a
ClosedPublic