Index: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -16,6 +16,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/TargetSchedule.h" #include namespace llvm { @@ -46,6 +47,7 @@ const GCNSubtarget &ST; const SIInstrInfo &TII; const SIRegisterInfo &TRI; + TargetSchedModel TSchedModel; /// RegUnits of uses in the current soft memory clause. BitVector ClauseUses; @@ -92,6 +94,9 @@ bool fixVcmpxExecWARHazard(MachineInstr *MI); bool fixLdsBranchVmemWARHazard(MachineInstr *MI); + int checkMAIHazards(MachineInstr *MI); + int checkMAILdStHazards(MachineInstr *MI); + public: GCNHazardRecognizer(const MachineFunction &MF); // We can only issue one instruction per cycle. Index: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -46,7 +46,8 @@ TRI(TII.getRegisterInfo()), ClauseUses(TRI.getNumRegUnits()), ClauseDefs(TRI.getNumRegUnits()) { - MaxLookAhead = 5; + MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 18 : 5; + TSchedModel.init(&ST); } void GCNHazardRecognizer::EmitInstruction(SUnit *SU) { @@ -181,6 +182,12 @@ checkReadM0Hazards(MI) > 0) return NoopHazard; + if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0) + return NoopHazard; + + if ((MI->mayLoad() || MI->mayStore()) && checkMAILdStHazards(MI) > 0) + return NoopHazard; + if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0) return NoopHazard; @@ -286,6 +293,12 @@ if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) return std::max(WaitStates, checkReadM0Hazards(MI)); + if (SIInstrInfo::isMAI(*MI)) + return std::max(WaitStates, checkMAIHazards(MI)); + + if (MI->mayLoad() || MI->mayStore()) + return std::max(WaitStates, checkMAILdStHazards(MI)); + return WaitStates; } @@ -1179,3 +1192,217 @@ return FPAtomicToDenormModeWaitStates - ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn); } + +int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) { + assert(SIInstrInfo::isMAI(*MI)); + + int WaitStatesNeeded = 0; + unsigned Opc = MI->getOpcode(); + + auto IsVALUFn = [] (MachineInstr *MI) { + return SIInstrInfo::isVALU(*MI); + }; + + if (Opc != AMDGPU::V_ACCVGPR_READ_B32) { // MFMA or v_accvgpr_write + const int LegacyVALUWritesVGPRWaitStates = 2; + const int VALUWritesExecWaitStates = 4; + const int MaxWaitStates = 4; + + int WaitStatesNeededForUse = VALUWritesExecWaitStates - + getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); + + if (WaitStatesNeeded < MaxWaitStates) { + for (const MachineOperand &Use : MI->explicit_uses()) { + const int MaxWaitStates = 2; + + if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg())) + continue; + + int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates - + getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); + + if (WaitStatesNeeded == MaxWaitStates) + break; + } + } + } + + auto IsMFMAFn = [] (MachineInstr *MI) { + return SIInstrInfo::isMAI(*MI) && + MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 && + MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32; + }; + + for (const MachineOperand &Op : MI->explicit_operands()) { + if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg())) + continue; + + if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32) + continue; + + const int MFMAWritesAGPROverlappedSrcABWaitStates = 4; + const int MFMAWritesAGPROverlappedSrcCWaitStates = 2; + const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4; + const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10; + const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18; + const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1; + const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7; + const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15; + const int MaxWaitStates = 18; + unsigned Reg = Op.getReg(); + unsigned HazardDefLatency = 0; + + auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this] + (MachineInstr *MI) { + if (!IsMFMAFn(MI)) + return false; + unsigned DstReg = MI->getOperand(0).getReg(); + if (DstReg == Reg) + return false; + HazardDefLatency = std::max(HazardDefLatency, + TSchedModel.computeInstrLatency(MI)); + return TRI.regsOverlap(DstReg, Reg); + }; + + int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, + MaxWaitStates); + int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates; + int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); + int OpNo = MI->getOperandNo(&Op); + if (OpNo == SrcCIdx) { + NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates; + } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) { + switch (HazardDefLatency) { + case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates; + break; + case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates; + break; + case 16: LLVM_FALLTHROUGH; + default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates; + break; + } + } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) { + switch (HazardDefLatency) { + case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates; + break; + case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates; + break; + case 16: LLVM_FALLTHROUGH; + default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates; + break; + } + } + + int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef; + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); + + if (WaitStatesNeeded == MaxWaitStates) + return WaitStatesNeeded; // Early exit. + + auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) { + if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32) + return false; + unsigned DstReg = MI->getOperand(0).getReg(); + return TRI.regsOverlap(Reg, DstReg); + }; + + const int AccVGPRWriteMFMAReadSrcCWaitStates = 1; + const int AccVGPRWriteMFMAReadSrcABWaitStates = 3; + const int AccVGPRWriteAccVgprReadWaitStates = 3; + NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates; + if (OpNo == SrcCIdx) + NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates; + else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) + NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates; + + WaitStatesNeededForUse = NeedWaitStates - + getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); + + if (WaitStatesNeeded == MaxWaitStates) + return WaitStatesNeeded; // Early exit. + } + + if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) { + const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0; + const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5; + const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13; + const int MaxWaitStates = 13; + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned HazardDefLatency = 0; + + auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this] + (MachineInstr *MI) { + if (!IsMFMAFn(MI)) + return false; + unsigned Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg(); + HazardDefLatency = std::max(HazardDefLatency, + TSchedModel.computeInstrLatency(MI)); + return TRI.regsOverlap(Reg, DstReg); + }; + + int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates); + int NeedWaitStates; + switch (HazardDefLatency) { + case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates; + break; + case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates; + break; + case 16: LLVM_FALLTHROUGH; + default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates; + break; + } + + int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince; + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); + } + + return WaitStatesNeeded; +} + +int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) { + if (!ST.hasMAIInsts()) + return 0; + + int WaitStatesNeeded = 0; + + auto IsAccVgprReadFn = [] (MachineInstr *MI) { + return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32; + }; + + for (const MachineOperand &Op : MI->explicit_uses()) { + if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg())) + continue; + + unsigned Reg = Op.getReg(); + + const int AccVgprReadLdStWaitStates = 2; + const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1; + const int MaxWaitStates = 2; + + int WaitStatesNeededForUse = AccVgprReadLdStWaitStates - + getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); + + if (WaitStatesNeeded == MaxWaitStates) + return WaitStatesNeeded; // Early exit. + + auto IsVALUAccVgprReadCheckFn = [Reg, this] (MachineInstr *MI) { + if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32) + return false; + auto IsVALUFn = [] (MachineInstr *MI) { + return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI); + }; + return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) < + std::numeric_limits::max(); + }; + + WaitStatesNeededForUse = VALUWriteAccVgprReadLdStDepVALUWaitStates - + getWaitStatesSince(IsVALUAccVgprReadCheckFn, MaxWaitStates); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); + } + + return WaitStatesNeeded; +} Index: llvm/trunk/test/CodeGen/AMDGPU/mai-hazards.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/mai-hazards.mir +++ llvm/trunk/test/CodeGen/AMDGPU/mai-hazards.mir @@ -0,0 +1,457 @@ +# RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: valu_write_vgpr_mfma_read +# GCN: V_MOV_B32 +# GCN: V_MOV_B32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: valu_write_vgpr_mfma_read +body: | + bb.0: + $vgpr0 = V_MOV_B32_e32 1, implicit $exec + $vgpr1 = V_MOV_B32_e32 1, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec +... +--- + +# GCN-LABEL: name: valu_write_vgpr_accvgpr_write_read +# GCN: V_MOV_B32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_ACCVGPR_WRITE_B32 +name: valu_write_vgpr_accvgpr_write_read +body: | + bb.0: + $vgpr0 = V_MOV_B32_e32 1, implicit $exec + $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec +... +--- + +# GCN-LABEL: name: mfma_write_agpr_mfma_read_same_agpr +# GCN: V_MFMA +# GCN-NEXT: V_MFMA +name: mfma_write_agpr_mfma_read_same_agpr +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec +... +--- + +# GCN-LABEL: name: mfma_write_agpr_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: mfma_write_agpr_mfma_read_overlap +body: | + bb.0: + $agpr1_agpr2_agpr3_agpr4 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec +... +--- + +# GCN-LABEL: name: mfma_write_agpr_mfma_read_partial +# GCN: V_MFMA +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: mfma_write_agpr_mfma_read_partial +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec +... +--- + +# GCN-LABEL: name: mfma_write_agpr_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: mfma_write_agpr_mfma_srca_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $agpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec +... +--- + +# GCN-LABEL: name: mfma_write_agpr_mfma_srcb_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: mfma_write_agpr_mfma_srcb_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $agpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec +... +--- + +# GCN-LABEL: name: mfma_4x4_write_agpr_accvgpr_read +# GCN: V_MFMA_F32_4X4X1F32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_ACCVGPR_READ_B32 +name: mfma_4x4_write_agpr_accvgpr_read +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec +... +--- + +# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_read +# GCN: V_MFMA_F32_16X16X1F32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_ACCVGPR_READ_B32 +name: mfma_16x16_write_agpr_accvgpr_read +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec + $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec +... +--- + +# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_read +# GCN: V_MFMA_F32_32X32X2F32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_ACCVGPR_READ_B32 +name: mfma_32x32_write_agpr_accvgpr_read +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec + $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec +... +--- + +# GCN-LABEL: name: mfma_4x4_write_agpr_accvgpr_write +# GCN: V_MFMA_F32_4X4X1F32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_ACCVGPR_WRITE_B32 +name: mfma_4x4_write_agpr_accvgpr_write +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec +... +--- + +# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_write +# GCN: V_MFMA_F32_16X16X1F32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_ACCVGPR_WRITE_B32 +name: mfma_16x16_write_agpr_accvgpr_write +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec + $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec +... +--- + +# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_write +# GCN: V_MFMA_F32_32X32X2F32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_ACCVGPR_WRITE_B32 +name: mfma_32x32_write_agpr_accvgpr_write +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec + $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec +... +--- + +# GCN-LABEL: name: mfma_4x4_read_srcc_accvgpr_write +# GCN: V_MFMA_F32_4X4X1F32 +# GCN-NEXT: V_ACCVGPR_WRITE_B32 +name: mfma_4x4_read_srcc_accvgpr_write +body: | + bb.0: + $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec +... +--- + +# GCN-LABEL: name: mfma_16x16_read_srcc_accvgpr_write +# GCN: V_MFMA_F32_16X16X1F32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_ACCVGPR_WRITE_B32 +name: mfma_16x16_read_srcc_accvgpr_write +body: | + bb.0: + $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec + $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec +... +--- + +# GCN-LABEL: name: mfma_32x32_read_srcc_accvgpr_write +# GCN: V_MFMA_F32_32X32X2F32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_ACCVGPR_WRITE_B32 +name: mfma_32x32_read_srcc_accvgpr_write +body: | + bb.0: + $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec + $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec +... +--- + +# GCN-LABEL: name: accvgpr_read_write_vgpr_valu_read +# GCN: V_ACCVGPR_READ_B32 +# GCN-NEXT: V_ADD_F32 +name: accvgpr_read_write_vgpr_valu_read +body: | + bb.0: + $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr4, implicit $exec + $vgpr1 = V_ADD_F32_e32 0, killed $vgpr0, implicit $exec +... +--- + +# GCN-LABEL: name: accvgpr_read_write_vgpr_mfma_read +# GCN: V_ACCVGPR_READ_B32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: accvgpr_read_write_vgpr_mfma_read +body: | + bb.0: + $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr4, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr0, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec +... +--- + +# GCN-LABEL: name: accvgpr_read_write_vgpr_accvgpr_write_read +# GCN: V_ACCVGPR_READ_B32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_ACCVGPR_WRITE_B32 +name: accvgpr_read_write_vgpr_accvgpr_write_read +body: | + bb.0: + $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec + $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec +... +--- + +# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srcc +# GCN: V_ACCVGPR_WRITE_B32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: accvgpr_write_agpr_mfma_read_srcc +body: | + bb.0: + $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr2, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec +... +--- + +# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srca +# GCN: V_ACCVGPR_WRITE_B32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: accvgpr_write_agpr_mfma_read_srca +body: | + bb.0: + $agpr8 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $agpr8, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec +... +--- + +# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srcb +# GCN: V_ACCVGPR_WRITE_B32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: accvgpr_write_agpr_mfma_read_srcb +body: | + bb.0: + $agpr8 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $agpr8, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec +... +--- + +# GCN-LABEL: name: accvgpr_write_agpr_accvgpr_read +# GCN: V_ACCVGPR_WRITE_B32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_ACCVGPR_READ_B32 +name: accvgpr_write_agpr_accvgpr_read +body: | + bb.0: + $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec + $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec +... +--- + +# GCN-LABEL: name: vcmpx_write_exec_mfma +# GCN: V_CMPX_EQ_I32_e32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: vcmpx_write_exec_mfma +body: | + bb.0: + implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec + $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $agpr8, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec +... +--- + +# GCN-LABEL: name: vcmpx_write_exec_accvgpr_write +# GCN: V_CMPX_EQ_I32_e32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_ACCVGPR_WRITE_B32 +name: vcmpx_write_exec_accvgpr_write +body: | + bb.0: + implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec + $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec +... +--- + +# GCN-LABEL: name: accvgpr_read_write_vgpr_load +# GCN: V_ACCVGPR_READ_B32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: FLAT_LOAD_DWORD +name: accvgpr_read_write_vgpr_load +body: | + bb.0: + $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec + $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr +... +--- + +# GCN-LABEL: name: accvgpr_read_write_vgpr_store +# GCN: V_ACCVGPR_READ_B32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: DS_WRITE_B32 +name: accvgpr_read_write_vgpr_store +body: | + bb.0: + $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec + DS_WRITE_B32 $vgpr0, $vgpr1, 0, 0, implicit $m0, implicit $exec +... +--- + +# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_no_dependency +# GCN: V_MOV_B32 +# GCN-NEXT: V_ACCVGPR_READ_B32 +# GCN-NEXT: FLAT_LOAD_DWORD +name: valu_write_vgpr_accvgpr_read_load_no_dependency +body: | + bb.0: + $vgpr0 = V_MOV_B32_e32 1, implicit $exec + $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec + $vgpr4 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr +... +--- + +# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_1_and_3_depend +# GCN: V_MOV_B32 +# GCN-NEXT: V_ACCVGPR_READ_B32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: FLAT_LOAD_DWORD +name: valu_write_vgpr_accvgpr_read_load_1_and_3_depend +body: | + bb.0: + $vgpr0 = V_MOV_B32_e32 1, implicit $exec + $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec + $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr +... +--- + +# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend +# GCN: V_MOV_B32 +# GCN-NEXT: V_ACCVGPR_READ_B32 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: FLAT_LOAD_DWORD +name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend +body: | + bb.0: + $vgpr0 = V_MOV_B32_e32 1, implicit $exec + $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec + $vgpr4 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr +... +---