diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -133,7 +133,10 @@ Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64) return false; - return true; + if (!ST.hasGFX940Insts()) + return true; + + return AMDGPU::getMAIIsGFX940XDL(Opcode); } static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, @@ -1494,6 +1497,13 @@ for (const MachineOperand &Use : MI->explicit_uses()) { const int LegacyVALUNotDotWritesVGPRWaitStates = 2; const int SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates = 2; + const int GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates = 3; + const int GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates = 5; + const int GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates = 4; + const int GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates = 9; + const int GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates = 8; + const int GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates = 17; + const int GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates = 16; const int SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates = 8; const int SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates = 16; const int SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates = 3; @@ -1504,9 +1514,18 @@ const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5; const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11; const int SMFMA32x32WritesVGPROverlappedSrcABWaitStates = 19; + const int GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates = 4; + const int GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates = 6; + const int GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates = 10; + const int GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates = 18; + const int GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates = 5; + const int GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates = 7; + const int GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates = 11; + const int GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates = 19; const int DMFMA4x4WritesVGPROverlappedMFMASrcABWaitStates = 6; const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11; const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4; + const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2; const int MaxWaitStates = 19; if (!Use.isReg()) @@ -1538,7 +1557,7 @@ unsigned Opc1 = MI1->getOpcode(); int NeedWaitStates = 0; if (OpNo == SrcCIdx) { - if (!isDGEMM(Opc) && isDGEMM(Opc1)) { + if (!isDGEMM(Opc) && (!ST.hasGFX940Insts() && isDGEMM(Opc1))) { NeedWaitStates = 0; } else if (FullReg) { if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || @@ -1546,6 +1565,9 @@ (Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_e64 || Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64)) NeedWaitStates = DMFMA4x4WritesVGPRFullSrcCWaitStates; + else if (ST.hasGFX940Insts() && + TSchedModel.computeInstrLatency(MI1) == 2) + NeedWaitStates = GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates; } else { switch (Opc1) { case AMDGPU::V_MFMA_F64_16X16X4F64_e64: @@ -1561,22 +1583,42 @@ NeedWaitStates = DMFMA4x4WritesVGPROverlappedSrcCWaitStates; break; default: + if (ST.hasGFX940Insts() && isXDL(ST, *MI) && !isXDL(ST, *MI1)) + break; switch (TSchedModel.computeInstrLatency(MI1)) { case 2: - NeedWaitStates = isDGEMM(Opc) - ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates - : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates; + NeedWaitStates = ST.hasGFX940Insts() + ? isXDL(ST, *MI1) + ? GFX940_XDL2PassWritesVGPROverlappedSMFMASrcCWaitStates + : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates + : isDGEMM(Opc) + ? SMFMA4x4WritesVGPROverlappedDMFMASrcCWaitStates + : SMFMA4x4WritesVGPROverlappedSMFMASrcCWaitStates; + break; + case 4: + assert(ST.hasGFX940Insts()); + NeedWaitStates = isXDL(ST, *MI1) + ? GFX940_XDL4PassWritesVGPROverlappedSMFMASrcCWaitStates + : GFX940_SMFMA4PassWritesVGPROverlappedSMFMASrcCWaitStates; break; case 8: - NeedWaitStates = isDGEMM(Opc) - ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates - : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates; + NeedWaitStates = ST.hasGFX940Insts() + ? isXDL(ST, *MI1) + ? GFX940_XDL8PassWritesVGPROverlappedSMFMASrcCWaitStates + : GFX940_SMFMA8PassWritesVGPROverlappedSMFMASrcCWaitStates + : isDGEMM(Opc) + ? SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates + : SMFMA16x16WritesVGPROverlappedSMFMASrcCWaitStates; break; case 16: LLVM_FALLTHROUGH; default: - NeedWaitStates = isDGEMM(Opc) - ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates - : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates; + NeedWaitStates = ST.hasGFX940Insts() + ? isXDL(ST, *MI1) + ? GFX940_XDL16PassWritesVGPROverlappedSMFMASrcCWaitStates + : GFX940_SMFMA16PassWritesVGPROverlappedSMFMASrcCWaitStates + : isDGEMM(Opc) + ? SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates + : SMFMA32x32WritesVGPROverlappedSMFMASrcCWaitStates; } } } @@ -1595,14 +1637,32 @@ default: switch (TSchedModel.computeInstrLatency(MI1)) { case 2: - NeedWaitStates = SMFMA4x4WritesVGPROverlappedSrcABWaitStates; + NeedWaitStates = ST.hasGFX940Insts() + ? isXDL(ST, *MI1) + ? GFX940_XDL2PassWritesVGPROverlappedSrcABWaitStates + : GFX940_SMFMA2PassWritesVGPROverlappedSrcABWaitStates + : SMFMA4x4WritesVGPROverlappedSrcABWaitStates; + break; + case 4: + assert(ST.hasGFX940Insts()); + NeedWaitStates = isXDL(ST, *MI1) + ? GFX940_XDL4PassWritesVGPROverlappedSrcABWaitStates + : GFX940_SMFMA4PassWritesVGPROverlappedSrcABWaitStates; break; case 8: - NeedWaitStates = SMFMA16x16WritesVGPROverlappedSrcABWaitStates; + NeedWaitStates = ST.hasGFX940Insts() + ? isXDL(ST, *MI1) + ? GFX940_XDL8PassWritesVGPROverlappedSrcABWaitStates + : GFX940_SMFMA8PassWritesVGPROverlappedSrcABWaitStates + : SMFMA16x16WritesVGPROverlappedSrcABWaitStates; break; case 16: LLVM_FALLTHROUGH; default: - NeedWaitStates = SMFMA32x32WritesVGPROverlappedSrcABWaitStates; + NeedWaitStates = ST.hasGFX940Insts() + ? isXDL(ST, *MI1) + ? GFX940_XDL16PassWritesVGPROverlappedSrcABWaitStates + : GFX940_SMFMA16PassWritesVGPROverlappedSrcABWaitStates + : SMFMA32x32WritesVGPROverlappedSrcABWaitStates; } } } @@ -1717,6 +1777,14 @@ const int SMFMA4x4WriteVgprVALUMemExpReadWaitStates = 5; const int SMFMA16x16WriteVgprVALUMemExpReadWaitStates = 11; const int SMFMA32x32WriteVgprVALUMemExpReadWaitStates = 19; + const int GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates = 4; + const int GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates = 6; + const int GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates = 10; + const int GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates = 18; + const int GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates = 5; + const int GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates = 7; + const int GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates = 11; + const int GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates = 19; const int DMFMA4x4WriteVgprMemExpReadWaitStates = 9; const int DMFMA16x16WriteVgprMemExpReadWaitStates = 18; const int DMFMA4x4WriteVgprVALUReadWaitStates = 6; @@ -1756,16 +1824,30 @@ int NeedWaitStates = MaxWaitStates; switch (HazardDefLatency) { case 2: - NeedWaitStates = SMFMA4x4WriteVgprVALUMemExpReadWaitStates; + NeedWaitStates = + ST.hasGFX940Insts() + ? isXDL(ST, *MFMA) + ? GFX940_XDL2PassWriteVgprVALUMemExpReadWaitStates + : GFX940_SMFMA2PassWriteVgprVALUMemExpReadWaitStates + : SMFMA4x4WriteVgprVALUMemExpReadWaitStates; break; case 4: assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts()); NeedWaitStates = - IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates - : DMFMA4x4WriteVgprVALUReadWaitStates; + isDGEMM(MFMA->getOpcode()) + ? IsMemOrExport ? DMFMA4x4WriteVgprMemExpReadWaitStates + : DMFMA4x4WriteVgprVALUReadWaitStates + : isXDL(ST, *MFMA) + ? GFX940_XDL4PassWriteVgprVALUMemExpReadWaitStates + : GFX940_SMFMA4PassWriteVgprVALUMemExpReadWaitStates; break; case 8: - NeedWaitStates = SMFMA16x16WriteVgprVALUMemExpReadWaitStates; + NeedWaitStates = + ST.hasGFX940Insts() + ? isXDL(ST, *MFMA) + ? GFX940_XDL8PassWriteVgprVALUMemExpReadWaitStates + : GFX940_SMFMA8PassWriteVgprVALUMemExpReadWaitStates + : SMFMA16x16WriteVgprVALUMemExpReadWaitStates; break; case 16: LLVM_FALLTHROUGH; default: @@ -1773,7 +1855,11 @@ isDGEMM(MFMA->getOpcode()) ? IsMemOrExport ? DMFMA16x16WriteVgprMemExpReadWaitStates : DMFMA16x16WriteVgprVALUReadWaitStates - : SMFMA32x32WriteVgprVALUMemExpReadWaitStates; + : ST.hasGFX940Insts() + ? isXDL(ST, *MFMA) + ? GFX940_XDL16PassWriteVgprVALUMemExpReadWaitStates + : GFX940_SMFMA16PassWriteVgprVALUMemExpReadWaitStates + : SMFMA32x32WriteVgprVALUMemExpReadWaitStates; break; } @@ -1803,7 +1889,16 @@ const int SMFMA4x4WriteVgprVALUWawWaitStates = 5; const int SMFMA16x16WriteVgprVALUWawWaitStates = 11; const int SMFMA32x32WriteVgprVALUWawWaitStates = 19; + const int GFX940_SMFMA2PassWriteVgprVALUWawWaitStates = 4; + const int GFX940_SMFMA4PassWriteVgprVALUWawWaitStates = 6; + const int GFX940_SMFMA8PassWriteVgprVALUWawWaitStates = 10; + const int GFX940_SMFMA16PassWriteVgprVALUWawWaitStates = 18; + const int GFX940_XDL2PassWriteVgprVALUWawWaitStates = 5; + const int GFX940_XDL4PassWriteVgprVALUWawWaitStates = 7; + const int GFX940_XDL8PassWriteVgprVALUWawWaitStates = 11; + const int GFX940_XDL16PassWriteVgprVALUWawWaitStates = 19; const int SMFMA4x4ReadVgprVALUWarWaitStates = 1; + const int GFX940_XDL4PassReadVgprVALUWarWaitStates = 3; const int SMFMA16x16ReadVgprVALUWarWaitStates = 7; const int SMFMA32x32ReadVgprVALUWarWaitStates = 15; const int DMFMA4x4WriteVgprVALUWriteWaitStates = 6; @@ -1828,19 +1923,35 @@ int NeedWaitStates = MaxWaitStates; switch (TSchedModel.computeInstrLatency(MFMA)) { case 2: - NeedWaitStates = SMFMA4x4WriteVgprVALUWawWaitStates; + NeedWaitStates = ST.hasGFX940Insts() + ? isXDL(ST, *MFMA) + ? GFX940_XDL2PassWriteVgprVALUWawWaitStates + : GFX940_SMFMA2PassWriteVgprVALUWawWaitStates + : SMFMA4x4WriteVgprVALUWawWaitStates; break; case 4: - assert(isDGEMM(MFMA->getOpcode())); - NeedWaitStates = DMFMA4x4WriteVgprVALUWriteWaitStates; + assert(isDGEMM(MFMA->getOpcode()) || ST.hasGFX940Insts()); + NeedWaitStates = isDGEMM(MFMA->getOpcode()) + ? DMFMA4x4WriteVgprVALUWriteWaitStates + : isXDL(ST, *MFMA) + ? GFX940_XDL4PassWriteVgprVALUWawWaitStates + : GFX940_SMFMA4PassWriteVgprVALUWawWaitStates; break; case 8: - NeedWaitStates = SMFMA16x16WriteVgprVALUWawWaitStates; + NeedWaitStates = ST.hasGFX940Insts() + ? isXDL(ST, *MFMA) + ? GFX940_XDL8PassWriteVgprVALUWawWaitStates + : GFX940_SMFMA8PassWriteVgprVALUWawWaitStates + : SMFMA16x16WriteVgprVALUWawWaitStates; break; case 16: LLVM_FALLTHROUGH; default: NeedWaitStates = isDGEMM(MFMA->getOpcode()) ? DMFMA16x16WriteVgprVALUWriteWaitStates + : ST.hasGFX940Insts() + ? isXDL(ST, *MFMA) + ? GFX940_XDL16PassWriteVgprVALUWawWaitStates + : GFX940_SMFMA16PassWriteVgprVALUWawWaitStates : SMFMA32x32WriteVgprVALUWawWaitStates; break; } @@ -1858,6 +1969,9 @@ !MI.readsRegister(Reg, &TRI)) return false; + if (ST.hasGFX940Insts() && !isXDL(ST, MI)) + return false; + const MachineOperand *SrcC = TII.getNamedOperand(MI, AMDGPU::OpName::src2); assert(SrcC); @@ -1879,6 +1993,9 @@ switch (HazardDefLatency) { case 2: NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates; break; + case 4: assert(ST.hasGFX940Insts()); + NeedWaitStates = GFX940_XDL4PassReadVgprVALUWarWaitStates; + break; case 8: NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates; break; case 16: LLVM_FALLTHROUGH; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -68,12 +68,18 @@ unsigned DataFormat; }; +struct MAIInstInfo { + uint16_t Opcode; + bool is_gfx940_xdl; +}; + #define GET_MIMGBaseOpcode_DECL #define GET_MIMGDim_DECL #define GET_MIMGEncoding_DECL #define GET_MIMGLZMapping_DECL #define GET_MIMGMIPMapping_DECL #define GET_MIMGBiASMapping_DECL +#define GET_MAIInstInfoTable_DECL #include "AMDGPUGenSearchableTables.inc" namespace IsaInfo { @@ -444,6 +450,9 @@ LLVM_READONLY bool getVOP3IsSingle(unsigned Opc); +LLVM_READONLY +bool getMAIIsGFX940XDL(unsigned Opc); + LLVM_READONLY const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -164,6 +164,7 @@ #define GET_MIMGBiasMappingTable_IMPL #define GET_MIMGOffsetMappingTable_IMPL #define GET_MIMGG16MappingTable_IMPL +#define GET_MAIInstInfoTable_IMPL #include "AMDGPUGenSearchableTables.inc" int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, @@ -342,6 +343,11 @@ return Info ? Info->IsSingle : false; } +bool getMAIIsGFX940XDL(unsigned Opc) { + const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); + return Info ? Info->is_gfx940_xdl : false; +} + // Wrapper for Tablegen'd function. enum Subtarget is not defined in any // header files, so we need to wrap it in a function that takes unsigned // instead. diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -484,50 +484,59 @@ } // End isMoveImm = 1 } // End isAsCheapAsAMove = 1, isReMaterializable = 1 +class MAIInst + : VOP3InstBase { + Instruction Opcode = !cast(NAME); + bit is_gfx940_xdl = 0; +} + multiclass MAIInst("VOPProfileMAI_" # P).NoDstOverlap> { let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in { // FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported. let Constraints = !if(NoDstOverlap, "@earlyclobber $vdst", "") in { - defm "" : VOP3Inst("VOPProfileMAI_" # P), + def _e64 : MAIInst("VOPProfileMAI_" # P), !if(NoDstOverlap, null_frag, AgprMAIFrag)>, - MFMATable<0, NAME # "_e64">; + MFMATable<0, NAME # "_e64">; let SubtargetPredicate = isGFX90APlus, Mnemonic = OpName in - defm _vgprcd : VOP3Inst("VOPProfileMAI_" # P # "_VCD"), - !if(NoDstOverlap, null_frag, VgprMAIFrag)>, - MFMATable<0, NAME # "_vgprcd_e64">; + def _vgprcd_e64 : MAIInst("VOPProfileMAI_" # P # "_VCD"), + !if(NoDstOverlap, null_frag, VgprMAIFrag)>, + MFMATable<0, NAME # "_vgprcd_e64">; } foreach _ = BoolToList.ret in { let Constraints = !if(NoDstOverlap, "$vdst = $src2", ""), isConvertibleToThreeAddress = NoDstOverlap, Mnemonic = OpName in { - defm "_mac" : VOP3Inst("VOPProfileMAI_" # P), AgprMAIFrag>, - MFMATable<1, NAME # "_e64">; + def "_mac_e64" : MAIInst("VOPProfileMAI_" # P), AgprMAIFrag>, + MFMATable<1, NAME # "_e64">; let SubtargetPredicate = isGFX90APlus in - defm _mac_vgprcd : VOP3Inst("VOPProfileMAI_" # P # "_VCD"), - VgprMAIFrag>, - MFMATable<1, NAME # "_vgprcd_e64">; + def _mac_vgprcd_e64 : MAIInst("VOPProfileMAI_" # P # "_VCD"), + VgprMAIFrag>, + MFMATable<1, NAME # "_vgprcd_e64">; } } } // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 } defm V_MFMA_F32_4X4X1F32 : MAIInst<"v_mfma_f32_4x4x1f32", "F32_F32_X4", int_amdgcn_mfma_f32_4x4x1f32>; -defm V_MFMA_F32_4X4X4F16 : MAIInst<"v_mfma_f32_4x4x4f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_4x4x4f16>; -defm V_MFMA_I32_4X4X4I8 : MAIInst<"v_mfma_i32_4x4x4i8", "I32_I32_X4", int_amdgcn_mfma_i32_4x4x4i8>; defm V_MFMA_F32_16X16X1F32 : MAIInst<"v_mfma_f32_16x16x1f32", "F32_F32_X16", int_amdgcn_mfma_f32_16x16x1f32>; defm V_MFMA_F32_16X16X4F32 : MAIInst<"v_mfma_f32_16x16x4f32", "F32_F32_X4", int_amdgcn_mfma_f32_16x16x4f32>; +defm V_MFMA_F32_32X32X1F32 : MAIInst<"v_mfma_f32_32x32x1f32", "F32_F32_X32", int_amdgcn_mfma_f32_32x32x1f32>; +defm V_MFMA_F32_32X32X2F32 : MAIInst<"v_mfma_f32_32x32x2f32", "F32_F32_X16", int_amdgcn_mfma_f32_32x32x2f32>; + +let is_gfx940_xdl = 1 in { +defm V_MFMA_F32_4X4X4F16 : MAIInst<"v_mfma_f32_4x4x4f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_4x4x4f16>; +defm V_MFMA_I32_4X4X4I8 : MAIInst<"v_mfma_i32_4x4x4i8", "I32_I32_X4", int_amdgcn_mfma_i32_4x4x4i8>; defm V_MFMA_F32_16X16X4F16 : MAIInst<"v_mfma_f32_16x16x4f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_16x16x4f16>; defm V_MFMA_F32_16X16X16F16 : MAIInst<"v_mfma_f32_16x16x16f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_16x16x16f16>; defm V_MFMA_I32_16X16X4I8 : MAIInst<"v_mfma_i32_16x16x4i8", "I32_I32_X16", int_amdgcn_mfma_i32_16x16x4i8>; -defm V_MFMA_F32_32X32X1F32 : MAIInst<"v_mfma_f32_32x32x1f32", "F32_F32_X32", int_amdgcn_mfma_f32_32x32x1f32>; -defm V_MFMA_F32_32X32X2F32 : MAIInst<"v_mfma_f32_32x32x2f32", "F32_F32_X16", int_amdgcn_mfma_f32_32x32x2f32>; defm V_MFMA_F32_32X32X4F16 : MAIInst<"v_mfma_f32_32x32x4f16", "F32_V4F16_X32", int_amdgcn_mfma_f32_32x32x4f16>; defm V_MFMA_F32_32X32X8F16 : MAIInst<"v_mfma_f32_32x32x8f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_32x32x8f16>; defm V_MFMA_I32_32X32X4I8 : MAIInst<"v_mfma_i32_32x32x4i8", "I32_I32_X32", int_amdgcn_mfma_i32_32x32x4i8>; +} let Predicates = [isGFX908orGFX90A] in { defm V_MFMA_I32_16X16X16I8 : MAIInst<"v_mfma_i32_16x16x16i8", "I32_I32_X4", int_amdgcn_mfma_i32_16x16x16i8>; @@ -542,27 +551,29 @@ } // End SubtargetPredicate = HasMAIInsts let Predicates = [isGFX90APlus] in { + let is_gfx940_xdl = 1 in { defm V_MFMA_F32_32X32X4BF16_1K : MAIInst<"v_mfma_f32_32x32x4bf16_1k", "F32_V4I16_X32", int_amdgcn_mfma_f32_32x32x4bf16_1k>; defm V_MFMA_F32_16X16X4BF16_1K : MAIInst<"v_mfma_f32_16x16x4bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_16x16x4bf16_1k>; defm V_MFMA_F32_4X4X4BF16_1K : MAIInst<"v_mfma_f32_4x4x4bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_4x4x4bf16_1k>; defm V_MFMA_F32_32X32X8BF16_1K : MAIInst<"v_mfma_f32_32x32x8bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_32x32x8bf16_1k>; defm V_MFMA_F32_16X16X16BF16_1K : MAIInst<"v_mfma_f32_16x16x16bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_16x16x16bf16_1k>; + } defm V_MFMA_F64_16X16X4F64 : MAIInst<"v_mfma_f64_16x16x4f64", "F64_16X16X4F64", int_amdgcn_mfma_f64_16x16x4f64>; defm V_MFMA_F64_4X4X4F64 : MAIInst<"v_mfma_f64_4x4x4f64", "F64_4X4X4F64", int_amdgcn_mfma_f64_4x4x4f64>; } // End Predicates = [isGFX90APlus] -let Predicates = [isGFX940Plus] in { +let Predicates = [isGFX940Plus], is_gfx940_xdl = 1 in { defm V_MFMA_I32_32X32X16I8 : MAIInst<"v_mfma_i32_32x32x16i8", "I32_I64_X32", int_amdgcn_mfma_i32_32x32x16_i8>; defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16", int_amdgcn_mfma_i32_16x16x32_i8>; defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>; defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>; -} // End Predicates = [isGFX940Plus] +} // End Predicates = [isGFX940Plus], is_gfx940_xdl = 1 multiclass SMFMACInst { let Constraints = "$vdst = $src2", DisableEncoding = "$src2", - isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in { - defm NAME : VOP3Inst("VOPProfileSMFMAC_" # P), node>; + isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1, is_gfx940_xdl = 1 in { + def _e64 : MAIInst("VOPProfileSMFMAC_" # P), node>; } } @@ -575,6 +586,17 @@ defm V_SMFMAC_I32_32X32X32_I8 : SMFMACInst<"v_smfmac_i32_32x32x32_i8", "I32_32X32X32_I8", int_amdgcn_smfmac_i32_32x32x32_i8>; } +def MAIInstInfoTable : GenericTable { + let FilterClass = "MAIInst"; + let CppTypeName = "MAIInstInfo"; + let Fields = [ + "Opcode", "is_gfx940_xdl" + ]; + + let PrimaryKey = ["Opcode"]; + let PrimaryKeyName = "getMAIInstInfoHelper"; +} + let SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1 in { defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3_Profile, any_fma>; defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3_Profile, any_fmul>; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll @@ -390,6 +390,7 @@ ; GFX908_A: v_mfma_f32_4x4x1f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}] ; GFX908_A-NEXT: v_mfma_f32_4x4x1f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] ; GFX940: v_mfma_f32_4x4x1_16b_f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}] +; GFX940-NEXT: s_nop 1 ; GFX940-NEXT: v_mfma_f32_4x4x1_16b_f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] define amdgpu_kernel void @test_mfma_f32_4x4x1f32_forward_acc(<4 x float> addrspace(1)* %arg) #0 { bb: diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir @@ -0,0 +1,2018 @@ +# RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: valu_write_vgpr_sgemm_mfma_read +# GCN: V_MOV_B32 +# GCN: V_MOV_B32 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MFMA +name: valu_write_vgpr_sgemm_mfma_read +body: | + bb.0: + $vgpr0 = V_MOV_B32_e32 1, implicit $exec + $vgpr1 = V_MOV_B32_e32 1, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: valu_write_agpr_sgemm_mfma_read +# GCN: V_ACCVGPR_WRITE_B32_e64 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MFMA +name: valu_write_agpr_sgemm_mfma_read +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $agpr4 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr4, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: valu_write_vgpr_dgemm_mfma_read +# GCN: V_MOV_B32 +# GCN: V_MOV_B32 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MFMA +name: valu_write_vgpr_dgemm_mfma_read +body: | + bb.0: + $vgpr0 = V_MOV_B32_e32 1, implicit $exec + $vgpr1 = V_MOV_B32_e32 1, implicit $exec + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: valu_write_vgpr_smfmac_read +# GCN: V_MOV_B32 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_SMFMAC +name: valu_write_vgpr_smfmac_read +body: | + bb.0: + $vgpr32 = V_MOV_B32_e32 1, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: accmov_write_agpr_sgemm_mfma_read +# GCN: V_ACCVGPR_MOV_B32 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MFMA +name: accmov_write_agpr_sgemm_mfma_read +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $agpr4 = V_ACCVGPR_MOV_B32 $agpr5, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr4, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_agpr_mfma_read_same_agpr_as_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MFMA +name: nonxdl_sgemm4x4_mfma_write_agpr_mfma_read_same_agpr_as_srcc +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MFMA +name: nonxdl_sgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_sgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc +# GCN: V_MFMA +# GCN-NEXT: V_MFMA +name: nonxdl_sgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_agpr_smfmac_read_same_agpr_as_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_SMFMAC +name: nonxdl_sgemm4x4_mfma_write_agpr_smfmac_read_same_agpr_as_srcc +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr4, $vgpr5, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc +# GCN: V_MFMA +# GCN-NEXT: V_MFMA +name: dgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_MFMA +name: dgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc +body: | + bb.0: + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MFMA +name: xdl_sgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc +# GCN: V_MFMA +# GCN-NEXT: V_MFMA +name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfmac16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc +# GCN: V_SMFMAC +# GCN-NEXT: V_SMFMAC +name: smfmac16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: sgemm4x4_mfma_write_agpr_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: sgemm4x4_mfma_write_agpr_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr6, $vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr6, $vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: sgemm4x4_mfma_write_agpr_smfmac_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_SMFMAC +name: sgemm4x4_mfma_write_agpr_smfmac_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_MFMA_I32_16X16X4I8_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_overlap +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_sgemm16x16_mfma_write_agpr_xdl_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: V_MFMA +name: nonxdl_sgemm16x16_mfma_write_agpr_xdl_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_MFMA_F32_16X16X1F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_I32_16X16X4I8_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_sgemm16x16_mfma_write_agpr_nonxdl_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: V_MFMA +name: nonxdl_sgemm16x16_mfma_write_agpr_nonxdl_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_MFMA_F32_16X16X1F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_smfmac_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_SMFMAC +name: xdl_sgemm16x16_mfma_write_agpr_smfmac_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_MFMA_I32_16X16X4I8_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_agpr_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: xdl_sgemm32x32_mfma_write_agpr_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31_agpr32_agpr33 = V_MFMA_F32_32X32X4F16_e64 $vgpr26_vgpr27, $vgpr28_vgpr29, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_vgpr_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: xdl_sgemm32x32_mfma_write_vgpr_mfma_read_overlap +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X2F32_vgprcd_e64 $vgpr126, $vgpr127, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_sgemm32x32_mfma_write_agpr_xdl_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: V_MFMA +name: nonxdl_sgemm32x32_mfma_write_agpr_xdl_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31_agpr32_agpr33 = V_MFMA_F32_32X32X1F32_e64 $vgpr26, $vgpr28, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_sgemm32x32_mfma_write_agpr_nonxdl_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: V_MFMA +name: nonxdl_sgemm32x32_mfma_write_agpr_nonxdl_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31_agpr32_agpr33 = V_MFMA_F32_32X32X1F32_e64 $vgpr26, $vgpr28, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_agpr_smfmac_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_SMFMAC +name: xdl_sgemm32x32_mfma_write_agpr_smfmac_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31_agpr32_agpr33 = V_MFMA_F32_32X32X4F16_e64 $vgpr26_vgpr27, $vgpr28_vgpr29, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_MFMA +name: dgemm4x4_mfma_write_vgpr_mfma_read_overlap +body: | + bb.0: + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr10, $vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_sgemm_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_MFMA +name: dgemm4x4_mfma_write_vgpr_sgemm_mfma_read_overlap +body: | + bb.0: + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr10, $vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr10, $vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_dgemm_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: xdl_sgemm16x16_mfma_write_vgpr_dgemm_mfma_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr20_vgpr21, $vgpr20_vgpr21, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_vgpr_dgemm_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: xdl_sgemm32x32_mfma_write_vgpr_dgemm_mfma_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr120_vgpr121, $vgpr120_vgpr121, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_partial +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_partial +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_I32_16X16X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_partial +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_partial +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_sgemm4x4_mfma_write_agpr_mfma_srca_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: xdl_sgemm16x16_mfma_write_agpr_mfma_srca_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_I32_16X16X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + $agpr20_agpr21_agpr22_agpr23 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr20_agpr21_agpr22_agpr23, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_sgemm16x16_mfma_write_agpr_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MFMA +name: nonxdl_sgemm16x16_mfma_write_agpr_mfma_srca_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + $agpr20_agpr21_agpr22_agpr23 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr20_agpr21_agpr22_agpr23, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfmac32x32_write_agpr_mfma_srca_read_overlap +# GCN: V_SMFMAC +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: smfmac32x32_write_agpr_mfma_srca_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $mode, implicit $exec + $agpr20_agpr21_agpr22_agpr23 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr20_agpr21_agpr22_agpr23, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfmac32x32_write_agpr_smfmac_srcc_read_overlap +# GCN: V_SMFMAC +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_SMFMAC +name: smfmac32x32_write_agpr_smfmac_srcc_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $agpr0_agpr1, $agpr2_agpr3_agpr4_agpr5, $vgpr2, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $agpr0_agpr1, $agpr2_agpr3_agpr4_agpr5, $vgpr2, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_agpr_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: xdl_sgemm32x32_mfma_write_agpr_mfma_srca_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X4F16_e64 $vgpr26_vgpr27, $vgpr28_vgpr29, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec + $agpr20_agpr21_agpr22_agpr23 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr20_agpr21_agpr22_agpr23, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_sgemm32x32_mfma_write_agpr_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MFMA +name: nonxdl_sgemm32x32_mfma_write_agpr_mfma_srca_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X1F32_e64 $vgpr26, $vgpr28, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec + $agpr120_agpr121_agpr122_agpr123 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr120_agpr121_agpr122_agpr123, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_sgemm4x4_mfma_write_vgpr_mfma_srca_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr0, $agpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_dmfma4x4_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_sgemm4x4_mfma_write_vgpr_dmfma4x4_srca_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_dmfma16x16_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_sgemm4x4_mfma_write_vgpr_dmfma16x16_srca_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_smfmac_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_SMFMAC +name: xdl_sgemm4x4_mfma_write_vgpr_smfmac_srca_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr12_vgpr13_vgpr14_vgpr15, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 5 +# GCN-NEXT: V_MFMA +name: dgemm4x4_mfma_write_vgpr_mfma_srca_read_overlap +body: | + bb.0: + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr2_vgpr3, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_sgemm_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 5 +# GCN-NEXT: V_MFMA +name: dgemm4x4_mfma_write_vgpr_sgemm_mfma_srca_read_overlap +body: | + bb.0: + $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_dgemm_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_sgemm4x4_mfma_write_vgpr_dgemm_mfma_srca_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr4, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_mfma_srcb_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_sgemm4x4_mfma_write_agpr_mfma_srcb_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $agpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_mfma_srcb_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_sgemm4x4_mfma_write_vgpr_mfma_srcb_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_mfma_srcb_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 5 +# GCN-NEXT: V_MFMA +name: dgemm4x4_mfma_write_vgpr_mfma_srcb_read_overlap +body: | + bb.0: + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_smfmac_srcb_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 5 +# GCN-NEXT: V_SMFMAC +name: dgemm4x4_mfma_write_vgpr_smfmac_srcb_read_overlap +body: | + bb.0: + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr10_vgpr11, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_smfmac_srcc_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 5 +# GCN-NEXT: V_SMFMAC +name: dgemm4x4_mfma_write_vgpr_smfmac_srcc_read_overlap +body: | + bb.0: + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15, $vgpr2, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr0_vgpr1, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_SMFMAC +name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr10_vgpr11, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_SMFMAC +name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15, $vgpr2, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_smfmac_srcc_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_SMFMAC +name: xdl_sgemm4x4_mfma_write_vgpr_smfmac_srcc_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15, $vgpr1, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_vm_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: BUFFER_STORE_DWORD +name: xdl_smfma4x4_write_vgpr_vm_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_flat_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: FLAT_STORE_DWORD +name: xdl_smfma4x4_write_vgpr_flat_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr +... +# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_lds_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: DS_WRITE_B32 +name: xdl_smfma4x4_write_vgpr_lds_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + DS_WRITE_B32 $vgpr0, $vgpr4, 0, 0, implicit $m0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_exp_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: EXP_DONE +name: xdl_smfma4x4_write_vgpr_exp_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + EXP_DONE 12, $vgpr4, $vgpr0, $vgpr0, $vgpr0, 0, 0, 15, implicit $exec +... +# GCN-LABEL: name: smfmac16x16_write_vgpr_flat_read +# GCN: V_SMFMAC +# GCN-NEXT: S_NOP 6 +# GCN-NEXT: FLAT_STORE_DWORD +name: smfmac16x16_write_vgpr_flat_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3, implicit $mode, implicit $exec + FLAT_STORE_DWORD $vgpr16_vgpr17, $vgpr1, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr +... +# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_flat_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: FLAT_STORE_DWORD +name: xdl_smfma16x16_write_vgpr_flat_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + FLAT_STORE_DWORD $vgpr16_vgpr17, $vgpr1, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr +... +# GCN-LABEL: name: smfmac32x32_write_vgpr_flat_read +# GCN: V_SMFMAC +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: FLAT_STORE_DWORD +name: smfmac32x32_write_vgpr_flat_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $agpr0_agpr1, $agpr2_agpr3_agpr4_agpr5, $vgpr2, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $mode, implicit $exec + FLAT_STORE_DWORD $vgpr16_vgpr17, $vgpr1, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr +... +# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_flat_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: FLAT_STORE_DWORD +name: xdl_smfma32x32_write_vgpr_flat_read +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X4F16_e64 $vgpr26_vgpr27, $vgpr28_vgpr29, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec + FLAT_STORE_DWORD $vgpr16_vgpr17, $agpr1, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr +... +# GCN-LABEL: name: dmfma4x4_write_vgpr_flat_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: FLAT_STORE_DWORD +name: dmfma4x4_write_vgpr_flat_read_overlap +body: | + bb.0: + $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec + FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr +... +# GCN-LABEL: name: dmfma4x4_write_vgpr_flat_read_full +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: FLAT_STORE_DWORD +name: dmfma4x4_write_vgpr_flat_read_full +body: | + bb.0: + $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec + FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr +... +# GCN-LABEL: name: dmfma16x16_write_vgpr_flat_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: FLAT_STORE_DWORD +name: dmfma16x16_write_vgpr_flat_read +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec + FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr +... +# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_valu_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MOV_B32 +name: xdl_smfma4x4_write_vgpr_valu_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MOV_B32 +name: xdl_smfma16x16_write_vgpr_valu_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_valu_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MOV_B32 +name: xdl_smfma32x32_write_vgpr_valu_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dmfma4x4_write_vgpr_valu_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 5 +# GCN-NEXT: V_MOV_B32 +name: dmfma4x4_write_vgpr_valu_read +body: | + bb.0: + $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr6 = V_MOV_B32_e32 $vgpr5, implicit $exec +... +# GCN-LABEL: name: dmfma16x16_write_vgpr_valu_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MOV_B32 +name: dmfma16x16_write_vgpr_valu_read +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec + $vgpr12 = V_MOV_B32_e32 $vgpr4, implicit $exec +... +# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_accv_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 +name: xdl_smfma4x4_write_vgpr_accv_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_accv_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 +name: xdl_smfma16x16_write_vgpr_accv_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_accv_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 +name: xdl_smfma32x32_write_vgpr_accv_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_dot_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_DOT +name: xdl_smfma4x4_write_vgpr_dot_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec +... +# GCN-LABEL: name: dmfma4x4_write_vgpr_dot_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 5 +# GCN-NEXT: V_DOT +name: dmfma4x4_write_vgpr_dot_read +body: | + bb.0: + $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr5, $vgpr1, implicit $exec +... +# GCN-LABEL: name: dmfma16x16_write_vgpr_dot_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_DOT +name: dmfma16x16_write_vgpr_dot_read +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr4, $vgpr1, implicit $exec +... +# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MOV_B32 +name: xdl_smfma4x4_write_vgpr_valu_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr4, $vgpr0, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MOV_B32 +name: xdl_smfma16x16_write_vgpr_valu_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MOV_B32 +name: xdl_smfma32x32_write_vgpr_valu_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_valu_f16_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_FMA_F16_e64 +name: xdl_smfma4x4_write_vgpr_valu_f16_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_FMA_F16_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_f16_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_FMA_F16_e64 +name: xdl_smfma16x16_write_vgpr_valu_f16_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_FMA_F16_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_valu_f16_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_FMA_F16_e64 +name: xdl_smfma32x32_write_vgpr_valu_f16_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_FMA_F16_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_valu_sdwa_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MOV_B32_sdwa +name: xdl_smfma4x4_write_vgpr_valu_sdwa_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_sdwa 0, $vgpr16, 0, 5, 2, 4, implicit $exec, implicit $vgpr1(tied-def 0) +... +# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_sdwa_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MOV_B32_sdwa +name: xdl_smfma16x16_write_vgpr_valu_sdwa_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_sdwa 0, $vgpr16, 0, 5, 2, 4, implicit $exec, implicit $vgpr1(tied-def 0) +... +# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_valu_sdwa_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MOV_B32_sdwa +name: xdl_smfma32x32_write_vgpr_valu_sdwa_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_sdwa 0, $vgpr16, 0, 5, 2, 4, implicit $exec, implicit $vgpr1(tied-def 0) +... +# GCN-LABEL: name: dmfma4x4_write_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 5 +# GCN-NEXT: V_MOV_B32 +name: dmfma4x4_write_vgpr_valu_write +body: | + bb.0: + $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dmfma16x16_write_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MOV_B32 +name: dmfma16x16_write_vgpr_valu_write +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec + $vgpr3 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_accv_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_ACCVGPR_READ_B32_e64 +name: xdl_smfma4x4_write_vgpr_accv_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_dot_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_DOT +name: xdl_smfma4x4_write_vgpr_dot_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_smfma4x4_read_srcc_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: V_MOV_B32 +name: nonxdl_smfma4x4_read_srcc_vgpr_valu_write +body: | + bb.0: + $vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr9, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfma16x16_read_srcc_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: V_MOV_B32 +name: smfma16x16_read_srcc_vgpr_valu_write +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfma32x32_read_srcc_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: V_MOV_B32 +name: smfma32x32_read_srcc_vgpr_valu_write +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X2F32_vgprcd_e64 $vgpr0, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfma4x4_read_srca_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: V_MOV_B32 +name: smfma4x4_read_srca_vgpr_valu_write +body: | + bb.0: + $vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr9, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr8 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfma16x16_read_srca_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: V_MOV_B32 +name: smfma16x16_read_srca_vgpr_valu_write +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr18 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfma32x32_read_srca_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: V_MOV_B32 +name: smfma32x32_read_srca_vgpr_valu_write +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X2F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr18 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfma4x4_read_srcb_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: V_MOV_B32 +name: smfma4x4_read_srcb_vgpr_valu_write +body: | + bb.0: + $vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr9, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr9 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfma16x16_read_srcb_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: V_MOV_B32 +name: smfma16x16_read_srcb_vgpr_valu_write +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr19 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfma32x32_read_srcb_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: V_MOV_B32 +name: smfma32x32_read_srcb_vgpr_valu_write +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X2F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr19 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dmfma4x4_read_srcc_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: V_MOV_B32 +name: dmfma4x4_read_srcc_vgpr_valu_write +body: | + bb.0: + $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dmfma16x16_read_srcc_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: V_MOV_B32 +name: dmfma16x16_read_srcc_vgpr_valu_write +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfma16x16_read_srcc_vgpr_accv_write +# GCN: V_MFMA +# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 +name: smfma16x16_read_srcc_vgpr_accv_write +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_MFMA_F32_16X16X1F32_e64 $agpr18, $agpr19, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + $agpr1 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: sgemm_to_fma64 +# GCN: V_MFMA +# GCN-NEXT: V_FMA_F64_e64 +name: sgemm_to_fma64 +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm_to_fma64 +# GCN: V_MFMA +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_FMA_F64_e64 +name: dgemm_to_fma64 +body: | + bb.0: + $vgpr0_vgpr1 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm_to_fmac64 +# GCN: V_MFMA +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_FMAC_F64 +name: dgemm_to_fmac64 +body: | + bb.0: + $vgpr0_vgpr1 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4_vgpr5 = V_FMAC_F64_e32 $vgpr4_vgpr5, $vgpr4_vgpr5, $vgpr4_vgpr5, implicit $mode, implicit $exec +... +# GCN-LABEL: name: flat_store_data_agpr_overwritten +# GCN: FLAT_STORE_DWORDX4 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 +name: flat_store_data_agpr_overwritten +body: | + bb.0: + FLAT_STORE_DWORDX4 $vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr + $agpr0 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dot_write_vgpr_accv_read +# GCN: V_DOT +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 +name: dot_write_vgpr_accv_read +body: | + bb.0: + $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec + $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec +... +# GCN-LABEL: name: valu_write_vgpr_dot_read +# GCN: V_MOV_B32 +# GCN-NEXT: V_DOT +name: valu_write_vgpr_dot_read +body: | + bb.0: + $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec + $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec +... +# GCN-LABEL: name: accv_write_vgpr_dot_read +# GCN: V_ACCVGPR_READ +# GCN-NEXT: V_DOT +name: accv_write_vgpr_dot_read +body: | + bb.0: + $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec + $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec +... +# GCN-LABEL: name: dot_write_vgpr_same_dot_read_srcc +# GCN: V_DOT +# GCN-NEXT: V_DOT +name: dot_write_vgpr_same_dot_read_srcc +body: | + bb.0: + $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec + $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec +... +# GCN-LABEL: name: dot_write_vgpr_different_dot_read_srcc +# GCN: V_DOT +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_DOT +name: dot_write_vgpr_different_dot_read_srcc +body: | + bb.0: + $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec + $vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dot_write_vgpr_different_dot_write +# GCN: V_DOT +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_DOT +name: dot_write_vgpr_different_dot_write +body: | + bb.0: + $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec + $vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dot_write_vgpr_different_valu_read +# GCN: V_DOT +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MOV_B32_e32 +name: dot_write_vgpr_different_valu_read +body: | + bb.0: + $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec + $vgpr0 = V_MOV_B32_e32 $vgpr4, implicit $exec +... +# GCN-LABEL: name: dot_write_vgpr_different_valu_write +# GCN: V_DOT +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MOV_B32_e32 +name: dot_write_vgpr_different_valu_write +body: | + bb.0: + $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec + $vgpr4 = V_MOV_B32_e32 1, implicit $exec +... +# GCN-LABEL: name: dot_write_vgpr_same_dot_read_srca +# GCN: V_DOT +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_DOT +name: dot_write_vgpr_same_dot_read_srca +body: | + bb.0: + $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec + $vgpr0 = V_DOT4C_I32_I8_e32 $vgpr4, $vgpr1, $vgpr0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dot_write_vgpr_same_dot_read_srcb +# GCN: V_DOT +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_DOT +name: dot_write_vgpr_same_dot_read_srcb +body: | + bb.0: + $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec + $vgpr0 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr4, $vgpr0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: vcmpx_write_exec_mfma +# GCN: V_CMPX_EQ_I32_e32 +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_MFMA +name: vcmpx_write_exec_mfma +body: | + bb.0: + implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec + $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32_e64 killed $agpr8, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: valu_write_agpr_dgemm_mfma_read +# GCN: V_ACCVGPR_WRITE_B32_e64 +# GCN: V_ACCVGPR_WRITE_B32_e64 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MFMA +name: valu_write_agpr_dgemm_mfma_read +body: | + bb.0: + $agpr0 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec + $agpr1 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec + $agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_read_same_agpr_as_srcc +# GCN: V_MFMA +# GCN-NEXT: V_MFMA +name: dgemm16x16_mfma_write_agpr_mfma_read_same_agpr_as_srcc +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $agpr10_agpr11, $agpr10_agpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $agpr10_agpr11, $agpr10_agpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm4x4_mfma_write_agpr_mfma_read_same_agpr_as_srcc +# GCN: V_MFMA +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_MFMA +name: dgemm4x4_mfma_write_agpr_mfma_read_same_agpr_as_srcc +body: | + bb.0: + $agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $vgpr0_vgpr1, $agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $vgpr0_vgpr1, $agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: dgemm16x16_mfma_write_agpr_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm4x4_mfma_write_agpr_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_MFMA +name: dgemm4x4_mfma_write_agpr_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr10, $vgpr11, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr10, $vgpr11, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_sgpr_dgemm_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: xdl_sgemm16x16_mfma_write_sgpr_dgemm_mfma_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_I32_16X16X4I8_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr20_vgpr21, $vgpr20_vgpr21, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_agpr_dgemm_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MFMA +name: xdl_sgemm32x32_mfma_write_agpr_dgemm_mfma_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X4F16_e64 $vgpr26_vgpr27, $vgpr28_vgpr29, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr120_vgpr121, $vgpr120_vgpr121, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_dmfma4x4_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_sgemm4x4_mfma_write_agpr_dmfma4x4_srca_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $agpr0_agpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_dmfma16x16_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_sgemm4x4_mfma_write_agpr_dmfma16x16_srca_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $agpr0_agpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm4x4_mfma_write_agpr_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 5 +# GCN-NEXT: V_MFMA +name: dgemm4x4_mfma_write_agpr_mfma_srca_read_overlap +body: | + bb.0: + $agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $agpr2_agpr3, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $agpr0_agpr1, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm4x4_mfma_write_agpr_sgemm_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 5 +# GCN-NEXT: V_MFMA +name: dgemm4x4_mfma_write_agpr_sgemm_mfma_srca_read_overlap +body: | + bb.0: + $agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $agpr4, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $agpr4, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_dgemm_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_sgemm4x4_mfma_write_agpr_dgemm_mfma_srca_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr4, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $agpr0_agpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm4x4_mfma_write_agpr_mfma_srcb_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 5 +# GCN-NEXT: V_MFMA +name: dgemm4x4_mfma_write_agpr_mfma_srcb_read_overlap +body: | + bb.0: + $agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $agpr2_agpr3, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $agpr0_agpr1, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dmfma4x4_write_agpr_flat_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: FLAT_STORE_DWORD +name: dmfma4x4_write_agpr_flat_read_overlap +body: | + bb.0: + $agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec + FLAT_STORE_DWORD $vgpr0_vgpr1, $agpr5, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr +... +# GCN-LABEL: name: dmfma4x4_write_agpr_flat_read_full +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: FLAT_STORE_DWORD +name: dmfma4x4_write_agpr_flat_read_full +body: | + bb.0: + $agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec + FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $agpr4_agpr5, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr +... +# GCN-LABEL: name: dmfma16x16_write_agpr_flat_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: FLAT_STORE_DWORD +name: dmfma16x16_write_agpr_flat_read +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec + FLAT_STORE_DWORD $vgpr0_vgpr1, $agpr4, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr +... +# GCN-LABEL: name: dmfma4x4_write_agpr_valu_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 5 +# GCN-NEXT: V_ACCVGPR_READ_B32_e64 +name: dmfma4x4_write_agpr_valu_read +body: | + bb.0: + $agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec +... +# GCN-LABEL: name: dmfma16x16_write_agpr_valu_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_ACCVGPR_READ_B32_e64 +name: dmfma16x16_write_agpr_valu_read +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec + $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec +... +# GCN-LABEL: name: dmfma4x4_write_agpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 5 +# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 +name: dmfma4x4_write_agpr_valu_write +body: | + bb.0: + $agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec + $agpr4 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dmfma16x16_write_agpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 +name: dmfma16x16_write_agpr_valu_write +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec + $agpr3 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dmfma4x4_read_srcc_agpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 +name: dmfma4x4_read_srcc_agpr_valu_write +body: | + bb.0: + $agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec + $agpr1 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dmfma16x16_read_srcc_agpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 +name: dmfma16x16_read_srcc_agpr_valu_write +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec + $agpr1 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm_accvgr_to_fma64 +# GCN: V_MFMA +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_FMA_F64_e64 +name: dgemm_accvgr_to_fma64 +body: | + bb.0: + $agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm_accvgr_to_fmac64 +# GCN: V_MFMA +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_FMAC_F64 +name: dgemm_accvgr_to_fmac64 +body: | + bb.0: + $agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4_vgpr5 = V_FMAC_F64_e32 $vgpr4_vgpr5, $vgpr4_vgpr5, $vgpr4_vgpr5, implicit $mode, implicit $exec +... +# GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: sgemm16X16X16_mfma_write_agpr_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5 = V_MFMA_F32_16X16X16F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_16X16X16F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: sgemm16X16X32_mfma_write_agpr_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: sgemm16X16X32_mfma_write_agpr_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_dgemm_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: sgemm16X16X16_mfma_write_agpr_dgemm_read_overlap +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr8_vgpr9, $vgpr8_vgpr9, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr6_vgpr7 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_smfmac_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_SMFMAC +name: sgemm16X16X16_mfma_write_agpr_smfmac_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5 = V_MFMA_F32_16X16X16F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfmac16x16_write_agpr_smfmac_read_overlap +# GCN: V_SMFMAC +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_SMFMAC +name: smfmac16x16_write_agpr_smfmac_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_agpr_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 6 +# GCN-NEXT: V_MFMA +name: xdl_sgemm16X16X16_mfma_write_agpr_mfma_srca_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16X16X32_mfma_write_agpr_mfma_srcb_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 6 +# GCN-NEXT: V_MFMA +name: xdl_sgemm16X16X32_mfma_write_agpr_mfma_srcb_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr0, $agpr1, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_dmfma16x16_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 6 +# GCN-NEXT: V_MFMA +name: xdl_sgemm16X16X16_mfma_write_vgpr_dmfma16x16_srca_read_overlap +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 6 +# GCN-NEXT: V_MOV_B32 +name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_vm_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 6 +# GCN-NEXT: BUFFER_STORE_DWORD +name: xdl_sgemm16X16X16_mfma_write_vgpr_vm_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 6 +# GCN-NEXT: V_MOV_B32 +name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_dot_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 6 +# GCN-NEXT: V_DOT +name: xdl_sgemm16X16X16_mfma_write_vgpr_dot_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec +... +# GCN-LABEL: name: smfmac16x16x32_write_agpr_mfma_read_same_agpr_as_srcc +# GCN: V_SMFMAC +# GCN-NEXT: V_SMFMAC +name: smfmac16x16x32_write_agpr_mfma_read_same_agpr_as_srcc +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfmac32x32x32_write_agpr_mfma_read_same_agpr_as_srcc +# GCN: V_SMFMAC +# GCN-NEXT: V_SMFMAC +name: smfmac32x32x32_write_agpr_mfma_read_same_agpr_as_srcc +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfmac16x16x32_mfma_write_agpr_mfma_read_overlap +# GCN: V_SMFMAC +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_SMFMAC +name: smfmac16x16x32_mfma_write_agpr_mfma_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec + $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfmac32x32x32_mfma_write_agpr_mfma_read_overlap +# GCN: V_SMFMAC +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_SMFMAC +name: smfmac32x32x32_mfma_write_agpr_mfma_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_SMFMAC_F32_32X32X16_BF16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $mode, implicit $exec + $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_SMFMAC_F32_32X32X16_BF16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfmac16x16x32_mfma_write_vgpr_smfmac_read_idx +# GCN: V_SMFMAC +# GCN-NEXT: S_NOP 6 +# GCN-NEXT: V_SMFMAC +name: smfmac16x16x32_mfma_write_vgpr_smfmac_read_idx +body: | + bb.0: + $vgpr6_vgpr7_vgpr8_vgpr9 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $vgpr6_vgpr7_vgpr8_vgpr9, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_smfmac16x16x32_read_overlap +# GCN: V_MFMA +# GCN-NEXT: V_SMFMAC +name: dgemm4x4_mfma_write_vgpr_smfmac16x16x32_read_overlap +body: | + bb.0: + $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_SMFMAC_F32_16X16X32_BF16_e64 $vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15, $vgpr32, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfmai8_read_overlap +# GCN: V_MFMA +# GCN-NEXT: V_MFMA +name: dgemm16x16_mfma_write_vgpr_mfmai8_read_overlap +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfmaxf32_read_overlap +# GCN: V_MFMA +# GCN-NEXT: V_MFMA +name: dgemm16x16_mfma_write_vgpr_mfmaxf32_read_overlap +body: | + bb.0: + $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X8XF32_vgprcd_e64 $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_agpr_nonxdl_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MFMA +name: nonxdl_sgemm4x4_mfma_write_agpr_nonxdl_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_agpr_xdl_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: V_MFMA +name: nonxdl_sgemm4x4_mfma_write_agpr_xdl_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MFMA +name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_agpr_mfma_srca_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_MFMA +name: nonxdl_sgemm4x4_mfma_write_agpr_mfma_srca_read_overlap +body: | + bb.0: + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_smfma4x4_write_vgpr_vm_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: BUFFER_STORE_DWORD +name: nonxdl_smfma4x4_write_vgpr_vm_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_smfma4x4_write_vgpr_valu_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_MOV_B32 +name: nonxdl_smfma4x4_write_vgpr_valu_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_smfma4x4_write_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 3 +# GCN-NEXT: V_MOV_B32 +name: nonxdl_smfma4x4_write_vgpr_valu_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr0, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_8pass_smfma16x16_write_vgpr_vm_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: BUFFER_STORE_DWORD +name: nonxdl_8pass_smfma16x16_write_vgpr_vm_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_8pass_smfma16x16_write_vgpr_valu_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MOV_B32 +name: nonxdl_8pass_smfma16x16_write_vgpr_valu_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_8pass_smfma16x16_write_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MOV_B32 +name: nonxdl_8pass_smfma16x16_write_vgpr_valu_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_smfma32x32_write_vgpr_vm_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: BUFFER_STORE_DWORD +name: nonxdl_smfma32x32_write_vgpr_vm_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X1F32_vgprcd_e64 $agpr26, $agpr28, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_smfma32x32_write_vgpr_valu_read +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MOV_B32 +name: nonxdl_smfma32x32_write_vgpr_valu_read +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X1F32_vgprcd_e64 $agpr26, $agpr28, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: nonxdl_smfma32x32_write_vgpr_valu_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 1 +# GCN-NEXT: V_MOV_B32 +name: nonxdl_smfma32x32_write_vgpr_valu_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X1F32_vgprcd_e64 $agpr26, $agpr28, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm16x16_4pass_mfma_write_agpr_mfma_read_overlap +# GCN: V_MFMA +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: xdl_sgemm16x16_4pass_mfma_write_agpr_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_16X16X8XF32_e64 $vgpr10_vgpr11, $vgpr12_vgpr13, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: smfmac16x16_mfma_write_agpr_mfma_read_overlap +# GCN: V_SMFMAC +# GCN-NEXT: S_NOP 4 +# GCN-NEXT: V_MFMA +name: smfmac16x16_mfma_write_agpr_mfma_read_overlap +body: | + bb.0: + $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_16X16X8XF32_e64 $vgpr10_vgpr11, $vgpr12_vgpr13, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm4x4_mfma_read_vgpr_srcc_valu_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: V_MOV_B32 +name: xdl_sgemm4x4_mfma_read_vgpr_srcc_valu_write +body: | + bb.0: + $vgpr10_vgpr11_vgpr12_vgpr13 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $exec +... +# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_read_vgpr_srcc_valu_write +# GCN: V_MFMA +# GCN-NEXT: V_MOV_B32 +name: nonxdl_sgemm4x4_mfma_read_vgpr_srcc_valu_write +body: | + bb.0: + $vgpr10_vgpr11_vgpr12_vgpr13 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $exec +... +# GCN-LABEL: name: xdl_4pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MOV_B32 +name: xdl_4pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write +body: | + bb.0: + $vgpr10_vgpr11_vgpr12_vgpr13 = V_MFMA_F32_16X16X8XF32_vgprcd_e64 $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $exec +... +# GCN-LABEL: name: smfmac16x16_read_vgpr_srcc_valu_write +# GCN: V_SMFMAC +# GCN-NEXT: S_NOP 6 +# GCN-NEXT: V_MOV_B32 +name: smfmac16x16_read_vgpr_srcc_valu_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $exec +... +# GCN-LABEL: name: xdl_8pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 6 +# GCN-NEXT: V_MOV_B32 +name: xdl_8pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write +body: | + bb.0: + $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $agpr26, $agpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $exec +... +# GCN-LABEL: name: nonxdl_8pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write +# GCN: V_MFMA +# GCN-NEXT: V_MOV_B32 +name: nonxdl_8pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write +body: | + bb.0: + $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $agpr26, $agpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $exec +... +# GCN-LABEL: name: smfmac32x32_read_vgpr_srcc_valu_write +# GCN: V_SMFMAC +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 2 +# GCN-NEXT: V_MOV_B32 +name: smfmac32x32_read_vgpr_srcc_valu_write +body: | + bb.0: + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $agpr0_agpr1, $agpr2_agpr3_agpr4_agpr5, $vgpr2, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $exec +... +# GCN-LABEL: name: xdl_sgemm32x32_mfma_read_vgpr_srcc_valu_write +# GCN: V_MFMA +# GCN-NEXT: S_NOP 7 +# GCN-NEXT: S_NOP 6 +# GCN-NEXT: V_MOV_B32 +name: xdl_sgemm32x32_mfma_read_vgpr_srcc_valu_write +body: | + bb.0: + $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $agpr126_agpr127, $agpr128_agpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $exec +... +# GCN-LABEL: name: nonxdl_sgemm32x32_mfma_read_vgpr_srcc_valu_write +# GCN: V_MFMA +# GCN-NEXT: V_MOV_B32 +name: nonxdl_sgemm32x32_mfma_read_vgpr_srcc_valu_write +body: | + bb.0: + $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_MFMA_F32_32X32X1F32_vgprcd_e64 $agpr26, $agpr28, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_MOV_B32_e32 0, implicit $exec +...