Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -1639,6 +1639,90 @@ [IntrNoMem, IntrSpeculatable, ImmArg<3>] >; +//===----------------------------------------------------------------------===// +// MI-100 intrinsics +// ===----------------------------------------------------------------------===// +// llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp +def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32i32_ty], + [llvm_float_ty, llvm_float_ty, llvm_v32i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_f32_16x16x1f32 : Intrinsic<[llvm_v16f32_ty], + [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_f32_4x4x1f32 : Intrinsic<[llvm_v4f32_ty], + [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_f32_32x32x2f32 : Intrinsic<[llvm_v16f32_ty], + [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_f32_16x16x4f32 : Intrinsic<[llvm_v4f32_ty], + [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_f32_32x32x4f16 : Intrinsic<[llvm_v32i32_ty], + [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_f32_16x16x4f16 : Intrinsic<[llvm_v16f32_ty], + [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_f32_4x4x4f16 : Intrinsic<[llvm_v4f32_ty], + [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_f32_32x32x8f16 : Intrinsic<[llvm_v16f32_ty], + [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_f32_16x16x16f16 : Intrinsic<[llvm_v4f32_ty], + [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_i32_32x32x4i8 : Intrinsic<[llvm_v32i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_i32_16x16x4i8 : Intrinsic<[llvm_v16i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_i32_4x4x4i8 : Intrinsic<[llvm_v4i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_i32_32x32x8i8 : Intrinsic<[llvm_v16i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_i32_16x16x16i8 : Intrinsic<[llvm_v4i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_f32_32x32x2bf16 : Intrinsic<[llvm_v32i32_ty], + [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_f32_16x16x2bf16 : Intrinsic<[llvm_v16f32_ty], + [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_f32_4x4x2bf16 : Intrinsic<[llvm_v4f32_ty], + [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_f32_32x32x4bf16 : Intrinsic<[llvm_v16f32_ty], + [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + +def int_amdgcn_mfma_f32_16x16x8bf16 : Intrinsic<[llvm_v4f32_ty], + [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -634,6 +634,11 @@ HighestVGPRReg = Reg; break; } + MCPhysReg AReg = AMDGPU::AGPR0 + TRI.getHWRegIndex(Reg); + if (MRI.isPhysRegUsed(AReg)) { + HighestVGPRReg = AReg; + break; + } } MCPhysReg HighestSGPRReg = AMDGPU::NoRegister; @@ -737,6 +742,9 @@ } else if (AMDGPU::VGPR_32RegClass.contains(Reg)) { IsSGPR = false; Width = 1; + } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) { + IsSGPR = false; + Width = 1; } else if (AMDGPU::SReg_64RegClass.contains(Reg)) { assert(!AMDGPU::TTMP_64RegClass.contains(Reg) && "trap handler registers should not be used"); @@ -745,9 +753,14 @@ } else if (AMDGPU::VReg_64RegClass.contains(Reg)) { IsSGPR = false; Width = 2; + } else if (AMDGPU::AReg_64RegClass.contains(Reg)) { + IsSGPR = false; + Width = 2; } else if (AMDGPU::VReg_96RegClass.contains(Reg)) { IsSGPR = false; Width = 3; + } else if (AMDGPU::SReg_96RegClass.contains(Reg)) { + Width = 3; } else if (AMDGPU::SReg_128RegClass.contains(Reg)) { assert(!AMDGPU::TTMP_128RegClass.contains(Reg) && "trap handler registers should not be used"); @@ -756,6 +769,9 @@ } else if (AMDGPU::VReg_128RegClass.contains(Reg)) { IsSGPR = false; Width = 4; + } else if (AMDGPU::AReg_128RegClass.contains(Reg)) { + IsSGPR = false; + Width = 4; } else if (AMDGPU::SReg_256RegClass.contains(Reg)) { assert(!AMDGPU::TTMP_256RegClass.contains(Reg) && "trap handler registers should not be used"); @@ -772,9 +788,18 @@ } else if (AMDGPU::VReg_512RegClass.contains(Reg)) { IsSGPR = false; Width = 16; - } else if (AMDGPU::SReg_96RegClass.contains(Reg)) { + } else if (AMDGPU::AReg_512RegClass.contains(Reg)) { + IsSGPR = false; + Width = 16; + } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) { IsSGPR = true; - Width = 3; + Width = 32; + } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) { + IsSGPR = false; + Width = 32; + } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) { + IsSGPR = false; + Width = 32; } else { llvm_unreachable("Unknown register class"); } Index: lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp +++ lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp @@ -350,4 +350,244 @@ "s[72:87]", "s[76:91]", "s[80:95]", "s[84:99]", "s[88:103]" }; +static const char *const AGPR32RegNames[] = { + "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", + "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", + "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", + "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", + "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", + "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", + "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", + "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", + "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", + "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", + "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", + "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", + "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", + "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", + "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", + "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", + "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", + "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", + "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", + "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", + "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", + "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", + "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", + "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", + "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", + "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", + "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", + "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", + "a252", "a253", "a254", "a255" +}; + +static const char *const AGPR64RegNames[] = { + "a[0:1]", "a[1:2]", "a[2:3]", "a[3:4]", "a[4:5]", + "a[5:6]", "a[6:7]", "a[7:8]", "a[8:9]", "a[9:10]", + "a[10:11]", "a[11:12]", "a[12:13]", "a[13:14]", "a[14:15]", + "a[15:16]", "a[16:17]", "a[17:18]", "a[18:19]", "a[19:20]", + "a[20:21]", "a[21:22]", "a[22:23]", "a[23:24]", "a[24:25]", + "a[25:26]", "a[26:27]", "a[27:28]", "a[28:29]", "a[29:30]", + "a[30:31]", "a[31:32]", "a[32:33]", "a[33:34]", "a[34:35]", + "a[35:36]", "a[36:37]", "a[37:38]", "a[38:39]", "a[39:40]", + "a[40:41]", "a[41:42]", "a[42:43]", "a[43:44]", "a[44:45]", + "a[45:46]", "a[46:47]", "a[47:48]", "a[48:49]", "a[49:50]", + "a[50:51]", "a[51:52]", "a[52:53]", "a[53:54]", "a[54:55]", + "a[55:56]", "a[56:57]", "a[57:58]", "a[58:59]", "a[59:60]", + "a[60:61]", "a[61:62]", "a[62:63]", "a[63:64]", "a[64:65]", + "a[65:66]", "a[66:67]", "a[67:68]", "a[68:69]", "a[69:70]", + "a[70:71]", "a[71:72]", "a[72:73]", "a[73:74]", "a[74:75]", + "a[75:76]", "a[76:77]", "a[77:78]", "a[78:79]", "a[79:80]", + "a[80:81]", "a[81:82]", "a[82:83]", "a[83:84]", "a[84:85]", + "a[85:86]", "a[86:87]", "a[87:88]", "a[88:89]", "a[89:90]", + "a[90:91]", "a[91:92]", "a[92:93]", "a[93:94]", "a[94:95]", + "a[95:96]", "a[96:97]", "a[97:98]", "a[98:99]", "a[99:100]", + "a[100:101]", "a[101:102]", "a[102:103]", "a[103:104]", "a[104:105]", + "a[105:106]", "a[106:107]", "a[107:108]", "a[108:109]", "a[109:110]", + "a[110:111]", "a[111:112]", "a[112:113]", "a[113:114]", "a[114:115]", + "a[115:116]", "a[116:117]", "a[117:118]", "a[118:119]", "a[119:120]", + "a[120:121]", "a[121:122]", "a[122:123]", "a[123:124]", "a[124:125]", + "a[125:126]", "a[126:127]", "a[127:128]", "a[128:129]", "a[129:130]", + "a[130:131]", "a[131:132]", "a[132:133]", "a[133:134]", "a[134:135]", + "a[135:136]", "a[136:137]", "a[137:138]", "a[138:139]", "a[139:140]", + "a[140:141]", "a[141:142]", "a[142:143]", "a[143:144]", "a[144:145]", + "a[145:146]", "a[146:147]", "a[147:148]", "a[148:149]", "a[149:150]", + "a[150:151]", "a[151:152]", "a[152:153]", "a[153:154]", "a[154:155]", + "a[155:156]", "a[156:157]", "a[157:158]", "a[158:159]", "a[159:160]", + "a[160:161]", "a[161:162]", "a[162:163]", "a[163:164]", "a[164:165]", + "a[165:166]", "a[166:167]", "a[167:168]", "a[168:169]", "a[169:170]", + "a[170:171]", "a[171:172]", "a[172:173]", "a[173:174]", "a[174:175]", + "a[175:176]", "a[176:177]", "a[177:178]", "a[178:179]", "a[179:180]", + "a[180:181]", "a[181:182]", "a[182:183]", "a[183:184]", "a[184:185]", + "a[185:186]", "a[186:187]", "a[187:188]", "a[188:189]", "a[189:190]", + "a[190:191]", "a[191:192]", "a[192:193]", "a[193:194]", "a[194:195]", + "a[195:196]", "a[196:197]", "a[197:198]", "a[198:199]", "a[199:200]", + "a[200:201]", "a[201:202]", "a[202:203]", "a[203:204]", "a[204:205]", + "a[205:206]", "a[206:207]", "a[207:208]", "a[208:209]", "a[209:210]", + "a[210:211]", "a[211:212]", "a[212:213]", "a[213:214]", "a[214:215]", + "a[215:216]", "a[216:217]", "a[217:218]", "a[218:219]", "a[219:220]", + "a[220:221]", "a[221:222]", "a[222:223]", "a[223:224]", "a[224:225]", + "a[225:226]", "a[226:227]", "a[227:228]", "a[228:229]", "a[229:230]", + "a[230:231]", "a[231:232]", "a[232:233]", "a[233:234]", "a[234:235]", + "a[235:236]", "a[236:237]", "a[237:238]", "a[238:239]", "a[239:240]", + "a[240:241]", "a[241:242]", "a[242:243]", "a[243:244]", "a[244:245]", + "a[245:246]", "a[246:247]", "a[247:248]", "a[248:249]", "a[249:250]", + "a[250:251]", "a[251:252]", "a[252:253]", "a[253:254]", "a[254:255]" +}; + +static const char *const AGPR128RegNames[] = { + "a[0:3]", "a[1:4]", "a[2:5]", "a[3:6]", "a[4:7]", + "a[5:8]", "a[6:9]", "a[7:10]", "a[8:11]", "a[9:12]", + "a[10:13]", "a[11:14]", "a[12:15]", "a[13:16]", "a[14:17]", + "a[15:18]", "a[16:19]", "a[17:20]", "a[18:21]", "a[19:22]", + "a[20:23]", "a[21:24]", "a[22:25]", "a[23:26]", "a[24:27]", + "a[25:28]", "a[26:29]", "a[27:30]", "a[28:31]", "a[29:32]", + "a[30:33]", "a[31:34]", "a[32:35]", "a[33:36]", "a[34:37]", + "a[35:38]", "a[36:39]", "a[37:40]", "a[38:41]", "a[39:42]", + "a[40:43]", "a[41:44]", "a[42:45]", "a[43:46]", "a[44:47]", + "a[45:48]", "a[46:49]", "a[47:50]", "a[48:51]", "a[49:52]", + "a[50:53]", "a[51:54]", "a[52:55]", "a[53:56]", "a[54:57]", + "a[55:58]", "a[56:59]", "a[57:60]", "a[58:61]", "a[59:62]", + "a[60:63]", "a[61:64]", "a[62:65]", "a[63:66]", "a[64:67]", + "a[65:68]", "a[66:69]", "a[67:70]", "a[68:71]", "a[69:72]", + "a[70:73]", "a[71:74]", "a[72:75]", "a[73:76]", "a[74:77]", + "a[75:78]", "a[76:79]", "a[77:80]", "a[78:81]", "a[79:82]", + "a[80:83]", "a[81:84]", "a[82:85]", "a[83:86]", "a[84:87]", + "a[85:88]", "a[86:89]", "a[87:90]", "a[88:91]", "a[89:92]", + "a[90:93]", "a[91:94]", "a[92:95]", "a[93:96]", "a[94:97]", + "a[95:98]", "a[96:99]", "a[97:100]", "a[98:101]", "a[99:102]", + "a[100:103]", "a[101:104]", "a[102:105]", "a[103:106]", "a[104:107]", + "a[105:108]", "a[106:109]", "a[107:110]", "a[108:111]", "a[109:112]", + "a[110:113]", "a[111:114]", "a[112:115]", "a[113:116]", "a[114:117]", + "a[115:118]", "a[116:119]", "a[117:120]", "a[118:121]", "a[119:122]", + "a[120:123]", "a[121:124]", "a[122:125]", "a[123:126]", "a[124:127]", + "a[125:128]", "a[126:129]", "a[127:130]", "a[128:131]", "a[129:132]", + "a[130:133]", "a[131:134]", "a[132:135]", "a[133:136]", "a[134:137]", + "a[135:138]", "a[136:139]", "a[137:140]", "a[138:141]", "a[139:142]", + "a[140:143]", "a[141:144]", "a[142:145]", "a[143:146]", "a[144:147]", + "a[145:148]", "a[146:149]", "a[147:150]", "a[148:151]", "a[149:152]", + "a[150:153]", "a[151:154]", "a[152:155]", "a[153:156]", "a[154:157]", + "a[155:158]", "a[156:159]", "a[157:160]", "a[158:161]", "a[159:162]", + "a[160:163]", "a[161:164]", "a[162:165]", "a[163:166]", "a[164:167]", + "a[165:168]", "a[166:169]", "a[167:170]", "a[168:171]", "a[169:172]", + "a[170:173]", "a[171:174]", "a[172:175]", "a[173:176]", "a[174:177]", + "a[175:178]", "a[176:179]", "a[177:180]", "a[178:181]", "a[179:182]", + "a[180:183]", "a[181:184]", "a[182:185]", "a[183:186]", "a[184:187]", + "a[185:188]", "a[186:189]", "a[187:190]", "a[188:191]", "a[189:192]", + "a[190:193]", "a[191:194]", "a[192:195]", "a[193:196]", "a[194:197]", + "a[195:198]", "a[196:199]", "a[197:200]", "a[198:201]", "a[199:202]", + "a[200:203]", "a[201:204]", "a[202:205]", "a[203:206]", "a[204:207]", + "a[205:208]", "a[206:209]", "a[207:210]", "a[208:211]", "a[209:212]", + "a[210:213]", "a[211:214]", "a[212:215]", "a[213:216]", "a[214:217]", + "a[215:218]", "a[216:219]", "a[217:220]", "a[218:221]", "a[219:222]", + "a[220:223]", "a[221:224]", "a[222:225]", "a[223:226]", "a[224:227]", + "a[225:228]", "a[226:229]", "a[227:230]", "a[228:231]", "a[229:232]", + "a[230:233]", "a[231:234]", "a[232:235]", "a[233:236]", "a[234:237]", + "a[235:238]", "a[236:239]", "a[237:240]", "a[238:241]", "a[239:242]", + "a[240:243]", "a[241:244]", "a[242:245]", "a[243:246]", "a[244:247]", + "a[245:248]", "a[246:249]", "a[247:250]", "a[248:251]", "a[249:252]", + "a[250:253]", "a[251:254]", "a[252:255]" +}; + +static const char *const AGPR512RegNames[] = { + "a[0:15]", "a[1:16]", "a[2:17]", "a[3:18]", "a[4:19]", + "a[5:20]", "a[6:21]", "a[7:22]", "a[8:23]", "a[9:24]", + "a[10:25]", "a[11:26]", "a[12:27]", "a[13:28]", "a[14:29]", + "a[15:30]", "a[16:31]", "a[17:32]", "a[18:33]", "a[19:34]", + "a[20:35]", "a[21:36]", "a[22:37]", "a[23:38]", "a[24:39]", + "a[25:40]", "a[26:41]", "a[27:42]", "a[28:43]", "a[29:44]", + "a[30:45]", "a[31:46]", "a[32:47]", "a[33:48]", "a[34:49]", + "a[35:50]", "a[36:51]", "a[37:52]", "a[38:53]", "a[39:54]", + "a[40:55]", "a[41:56]", "a[42:57]", "a[43:58]", "a[44:59]", + "a[45:60]", "a[46:61]", "a[47:62]", "a[48:63]", "a[49:64]", + "a[50:65]", "a[51:66]", "a[52:67]", "a[53:68]", "a[54:69]", + "a[55:70]", "a[56:71]", "a[57:72]", "a[58:73]", "a[59:74]", + "a[60:75]", "a[61:76]", "a[62:77]", "a[63:78]", "a[64:79]", + "a[65:80]", "a[66:81]", "a[67:82]", "a[68:83]", "a[69:84]", + "a[70:85]", "a[71:86]", "a[72:87]", "a[73:88]", "a[74:89]", + "a[75:90]", "a[76:91]", "a[77:92]", "a[78:93]", "a[79:94]", + "a[80:95]", "a[81:96]", "a[82:97]", "a[83:98]", "a[84:99]", + "a[85:100]", "a[86:101]", "a[87:102]", "a[88:103]", "a[89:104]", + "a[90:105]", "a[91:106]", "a[92:107]", "a[93:108]", "a[94:109]", + "a[95:110]", "a[96:111]", "a[97:112]", "a[98:113]", "a[99:114]", + "a[100:115]", "a[101:116]", "a[102:117]", "a[103:118]", "a[104:119]", + "a[105:120]", "a[106:121]", "a[107:122]", "a[108:123]", "a[109:124]", + "a[110:125]", "a[111:126]", "a[112:127]", "a[113:128]", "a[114:129]", + "a[115:130]", "a[116:131]", "a[117:132]", "a[118:133]", "a[119:134]", + "a[120:135]", "a[121:136]", "a[122:137]", "a[123:138]", "a[124:139]", + "a[125:140]", "a[126:141]", "a[127:142]", "a[128:143]", "a[129:144]", + "a[130:145]", "a[131:146]", "a[132:147]", "a[133:148]", "a[134:149]", + "a[135:150]", "a[136:151]", "a[137:152]", "a[138:153]", "a[139:154]", + "a[140:155]", "a[141:156]", "a[142:157]", "a[143:158]", "a[144:159]", + "a[145:160]", "a[146:161]", "a[147:162]", "a[148:163]", "a[149:164]", + "a[150:165]", "a[151:166]", "a[152:167]", "a[153:168]", "a[154:169]", + "a[155:170]", "a[156:171]", "a[157:172]", "a[158:173]", "a[159:174]", + "a[160:175]", "a[161:176]", "a[162:177]", "a[163:178]", "a[164:179]", + "a[165:180]", "a[166:181]", "a[167:182]", "a[168:183]", "a[169:184]", + "a[170:185]", "a[171:186]", "a[172:187]", "a[173:188]", "a[174:189]", + "a[175:190]", "a[176:191]", "a[177:192]", "a[178:193]", "a[179:194]", + "a[180:195]", "a[181:196]", "a[182:197]", "a[183:198]", "a[184:199]", + "a[185:200]", "a[186:201]", "a[187:202]", "a[188:203]", "a[189:204]", + "a[190:205]", "a[191:206]", "a[192:207]", "a[193:208]", "a[194:209]", + "a[195:210]", "a[196:211]", "a[197:212]", "a[198:213]", "a[199:214]", + "a[200:215]", "a[201:216]", "a[202:217]", "a[203:218]", "a[204:219]", + "a[205:220]", "a[206:221]", "a[207:222]", "a[208:223]", "a[209:224]", + "a[210:225]", "a[211:226]", "a[212:227]", "a[213:228]", "a[214:229]", + "a[215:230]", "a[216:231]", "a[217:232]", "a[218:233]", "a[219:234]", + "a[220:235]", "a[221:236]", "a[222:237]", "a[223:238]", "a[224:239]", + "a[225:240]", "a[226:241]", "a[227:242]", "a[228:243]", "a[229:244]", + "a[230:245]", "a[231:246]", "a[232:247]", "a[233:248]", "a[234:249]", + "a[235:250]", "a[236:251]", "a[237:252]", "a[238:253]", "a[239:254]", + "a[240:255]" +}; + +static const char *const AGPR1024RegNames[] = { + "a[0:31]", "a[1:32]", "a[2:33]", "a[3:34]", "a[4:35]", + "a[5:36]", "a[6:37]", "a[7:38]", "a[8:39]", "a[9:40]", + "a[10:41]", "a[11:42]", "a[12:43]", "a[13:44]", "a[14:45]", + "a[15:46]", "a[16:47]", "a[17:48]", "a[18:49]", "a[19:50]", + "a[20:51]", "a[21:52]", "a[22:53]", "a[23:54]", "a[24:55]", + "a[25:56]", "a[26:57]", "a[27:58]", "a[28:59]", "a[29:60]", + "a[30:61]", "a[31:62]", "a[32:63]", "a[33:64]", "a[34:65]", + "a[35:66]", "a[36:67]", "a[37:68]", "a[38:69]", "a[39:70]", + "a[40:71]", "a[41:72]", "a[42:73]", "a[43:74]", "a[44:75]", + "a[45:76]", "a[46:77]", "a[47:78]", "a[48:79]", "a[49:80]", + "a[50:81]", "a[51:82]", "a[52:83]", "a[53:84]", "a[54:85]", + "a[55:86]", "a[56:87]", "a[57:88]", "a[58:89]", "a[59:90]", + "a[60:91]", "a[61:92]", "a[62:93]", "a[63:94]", "a[64:95]", + "a[65:96]", "a[66:97]", "a[67:98]", "a[68:99]", "a[69:100]", + "a[70:101]", "a[71:102]", "a[72:103]", "a[73:104]", "a[74:105]", + "a[75:106]", "a[76:107]", "a[77:108]", "a[78:109]", "a[79:110]", + "a[80:111]", "a[81:112]", "a[82:113]", "a[83:114]", "a[84:115]", + "a[85:116]", "a[86:117]", "a[87:118]", "a[88:119]", "a[89:120]", + "a[90:121]", "a[91:122]", "a[92:123]", "a[93:124]", "a[94:125]", + "a[95:126]", "a[96:127]", "a[97:128]", "a[98:129]", "a[99:130]", + "a[100:131]", "a[101:132]", "a[102:133]", "a[103:134]", "a[104:135]", + "a[105:136]", "a[106:137]", "a[107:138]", "a[108:139]", "a[109:140]", + "a[110:141]", "a[111:142]", "a[112:143]", "a[113:144]", "a[114:145]", + "a[115:146]", "a[116:147]", "a[117:148]", "a[118:149]", "a[119:150]", + "a[120:151]", "a[121:152]", "a[122:153]", "a[123:154]", "a[124:155]", + "a[125:156]", "a[126:157]", "a[127:158]", "a[128:159]", "a[129:160]", + "a[130:161]", "a[131:162]", "a[132:163]", "a[133:164]", "a[134:165]", + "a[135:166]", "a[136:167]", "a[137:168]", "a[138:169]", "a[139:170]", + "a[140:171]", "a[141:172]", "a[142:173]", "a[143:174]", "a[144:175]", + "a[145:176]", "a[146:177]", "a[147:178]", "a[148:179]", "a[149:180]", + "a[150:181]", "a[151:182]", "a[152:183]", "a[153:184]", "a[154:185]", + "a[155:186]", "a[156:187]", "a[157:188]", "a[158:189]", "a[159:190]", + "a[160:191]", "a[161:192]", "a[162:193]", "a[163:194]", "a[164:195]", + "a[165:196]", "a[166:197]", "a[167:198]", "a[168:199]", "a[169:200]", + "a[170:201]", "a[171:202]", "a[172:203]", "a[173:204]", "a[174:205]", + "a[175:206]", "a[176:207]", "a[177:208]", "a[178:209]", "a[179:210]", + "a[180:211]", "a[181:212]", "a[182:213]", "a[183:214]", "a[184:215]", + "a[185:216]", "a[186:217]", "a[187:218]", "a[188:219]", "a[189:220]", + "a[190:221]", "a[191:222]", "a[192:223]", "a[193:224]", "a[194:225]", + "a[195:226]", "a[196:227]", "a[197:228]", "a[198:229]", "a[199:230]", + "a[200:231]", "a[201:232]", "a[202:233]", "a[203:234]", "a[204:235]", + "a[205:236]", "a[206:237]", "a[207:238]", "a[208:239]", "a[209:240]", + "a[210:241]", "a[211:242]", "a[212:243]", "a[213:244]", "a[214:245]", + "a[215:246]", "a[216:247]", "a[217:248]", "a[218:249]", "a[219:250]", + "a[220:251]", "a[221:252]", "a[222:253]", "a[223:254]", "a[224:255]" +}; + #endif Index: lib/Target/AMDGPU/AMDGPURegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterInfo.cpp +++ lib/Target/AMDGPU/AMDGPURegisterInfo.cpp @@ -31,7 +31,10 @@ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, - AMDGPU::sub15 + AMDGPU::sub15, AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19, + AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, AMDGPU::sub24, + AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, AMDGPU::sub28, AMDGPU::sub29, + AMDGPU::sub30, AMDGPU::sub31 }; assert(Channel < array_lengthof(SubRegs)); Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -139,6 +139,13 @@ DECODE_OPERAND_REG(SReg_256) DECODE_OPERAND_REG(SReg_512) +DECODE_OPERAND_REG(AGPR_32) +DECODE_OPERAND_REG(AReg_128) +DECODE_OPERAND_REG(AReg_512) +DECODE_OPERAND_REG(AReg_1024) +DECODE_OPERAND_REG(AV_32) +DECODE_OPERAND_REG(AV_64) + static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, unsigned Imm, uint64_t Addr, @@ -171,6 +178,30 @@ return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm)); } +static DecodeStatus decodeOperand_AReg_128(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + auto DAsm = static_cast(Decoder); + return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512)); +} + +static DecodeStatus decodeOperand_AReg_512(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + auto DAsm = static_cast(Decoder); + return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm | 512)); +} + +static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + auto DAsm = static_cast(Decoder); + return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512)); +} + static DecodeStatus decodeOperand_SReg_32(MCInst &Inst, unsigned Imm, uint64_t Addr, @@ -179,6 +210,14 @@ return addOperand(Inst, DAsm->decodeOperand_SReg_32(Imm)); } +static DecodeStatus decodeOperand_VGPR_32(MCInst &Inst, + unsigned Imm, + uint64_t Addr, + const void *Decoder) { + auto DAsm = static_cast(Decoder); + return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW32, Imm)); +} + #define DECODE_SDWA(DecName) \ DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName) Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -156,6 +156,12 @@ const MCSubtargetInfo &STI, raw_ostream &O); void printMemOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printBLGP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); + void printCBSZ(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); + void printABID(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printDefaultVccOperand(unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -415,6 +415,21 @@ } else if (MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo)) { O << 's'; NumRegs = 16; + } else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(RegNo)) { + O << 'a'; + NumRegs = 1; + } else if (MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(RegNo)) { + O << 'a'; + NumRegs = 2; + } else if (MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(RegNo)) { + O << 'a'; + NumRegs = 4; + } else if (MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(RegNo)) { + O << 'a'; + NumRegs = 16; + } else if (MRI.getRegClass(AMDGPU::AReg_1024RegClassID).contains(RegNo)) { + O << 'a'; + NumRegs = 32; } else { O << getRegisterName(RegNo); return; @@ -586,6 +601,36 @@ } } +void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNo).getImm(); + if (!Imm) + return; + + O << " blgp:" << Imm; +} + +void AMDGPUInstPrinter::printCBSZ(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNo).getImm(); + if (!Imm) + return; + + O << " cbsz:" << Imm; +} + +void AMDGPUInstPrinter::printABID(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + unsigned Imm = MI->getOperand(OpNo).getImm(); + if (!Imm) + return; + + O << " abid:" << Imm; +} + void AMDGPUInstPrinter::printDefaultVccOperand(unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -621,6 +666,8 @@ case AMDGPU::OPERAND_REG_IMM_FP32: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_INLINE_AC_INT32: + case AMDGPU::OPERAND_REG_INLINE_AC_FP32: case MCOI::OPERAND_IMMEDIATE: printImmediate32(Op.getImm(), STI, O); break; @@ -632,6 +679,8 @@ break; case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_FP16: case AMDGPU::OPERAND_REG_IMM_INT16: case AMDGPU::OPERAND_REG_IMM_FP16: printImmediate16(Op.getImm(), STI, O); @@ -646,6 +695,8 @@ LLVM_FALLTHROUGH; case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: printImmediateV216(Op.getImm(), STI, O); break; case MCOI::OPERAND_UNKNOWN: Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -63,6 +63,12 @@ return 0; } + virtual unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + return 0; + } + protected: FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const; void Index: lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPURegisterInfo.h" #include "MCTargetDesc/AMDGPUFixupKinds.h" #include "MCTargetDesc/AMDGPUMCCodeEmitter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" @@ -77,6 +78,10 @@ unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; + + unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; }; } // end anonymous namespace @@ -233,6 +238,8 @@ case AMDGPU::OPERAND_REG_IMM_FP32: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_INLINE_AC_INT32: + case AMDGPU::OPERAND_REG_INLINE_AC_FP32: return getLit32Encoding(static_cast(Imm), STI); case AMDGPU::OPERAND_REG_IMM_INT64: @@ -245,6 +252,8 @@ case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_FP16: // FIXME Is this correct? What do inline immediates do on SI for f16 src // which does not have f16 support? return getLit16Encoding(static_cast(Imm), STI); @@ -255,7 +264,9 @@ return getLit32Encoding(static_cast(Imm), STI); LLVM_FALLTHROUGH; case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: - case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { uint16_t Lo16 = static_cast(Imm); uint32_t Encoding = getLit16Encoding(Lo16, STI); return Encoding; @@ -397,6 +408,23 @@ return RegEnc; } +unsigned +SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + unsigned Reg = MI.getOperand(OpNo).getReg(); + uint64_t Enc = MRI.getEncodingValue(Reg); + + // VGPR and AGPR have the same encoding, but SrcA and SrcB operands of mfma + // instructions use acc[0:1] modifier bits to distinguish. These bits are + // encoded as a virtual 9th bit of the register for these operands. + if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) || + MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg)) + Enc |= 512; + + return Enc; +} + static bool needsPCRel(const MCExpr *Expr) { switch (Expr->getKind()) { case MCExpr::SymbolRef: { Index: lib/Target/AMDGPU/SIDefines.h =================================================================== --- lib/Target/AMDGPU/SIDefines.h +++ lib/Target/AMDGPU/SIDefines.h @@ -96,7 +96,10 @@ FPDPRounding = UINT64_C(1) << 52, // Instruction is FP atomic. - FPAtomic = UINT64_C(1) << 53 + FPAtomic = UINT64_C(1) << 53, + + // Is a MFMA instruction. + IsMAI = UINT64_C(1) << 54 }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -10304,6 +10304,29 @@ break; } break; + case 'a': + switch (VT.getSizeInBits()) { + default: + return std::make_pair(0U, nullptr); + case 32: + case 16: + RC = &AMDGPU::AGPR_32RegClass; + break; + case 64: + RC = &AMDGPU::AReg_64RegClass; + break; + case 128: + RC = &AMDGPU::AReg_128RegClass; + break; + case 512: + RC = &AMDGPU::AReg_512RegClass; + break; + case 1024: + RC = &AMDGPU::AReg_1024RegClass; + // v32 types are not legal but we support them here. + return std::make_pair(0U, RC); + } + break; } // We actually support i128, i16 and f16 as inline parameters // even if they are not reported as legal @@ -10317,6 +10340,8 @@ RC = &AMDGPU::VGPR_32RegClass; } else if (Constraint[1] == 's') { RC = &AMDGPU::SGPR_32RegClass; + } else if (Constraint[1] == 'a') { + RC = &AMDGPU::AGPR_32RegClass; } if (RC) { @@ -10336,6 +10361,7 @@ default: break; case 's': case 'v': + case 'a': return C_RegisterClass; } } Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -121,6 +121,9 @@ // Instruction is FP atomic. field bit FPAtomic = 0; + // This bit indicates that this is one of MFMA instructions. + field bit IsMAI = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = SALU; let TSFlags{1} = VALU; @@ -184,6 +187,8 @@ let TSFlags{53} = FPAtomic; + let TSFlags{54} = IsMAI; + let SchedRW = [Write32Bit]; field bits<1> DisableSIDecoder = 0; Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -570,6 +570,14 @@ return get(Opcode).TSFlags & SIInstrFlags::VINTRP; } + static bool isMAI(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::IsMAI; + } + + bool isMAI(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::IsMAI; + } + static bool isScalarUnit(const MachineInstr &MI) { return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); } Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -251,7 +251,8 @@ !if(!eq(SrcVT.Value, f32.Value), 1, !if(!eq(SrcVT.Value, f64.Value), 1, !if(!eq(SrcVT.Value, v2f16.Value), 1, - 0)))); + !if(!eq(SrcVT.Value, v4f16.Value), 1, + 0))))); } class isIntType { @@ -265,8 +266,9 @@ class isPackedType { bit ret = !if(!eq(SrcVT.Value, v2i16.Value), 1, - !if(!eq(SrcVT.Value, v2f16.Value), 1, 0) - ); + !if(!eq(SrcVT.Value, v2f16.Value), 1, + !if(!eq(SrcVT.Value, v4f16.Value), 1, 0) + )); } //===----------------------------------------------------------------------===// @@ -929,6 +931,10 @@ def neg_lo : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>; def neg_hi : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>; +def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>; +def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>; +def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>; + def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>; def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> { @@ -1284,11 +1290,12 @@ class getIsFP { bit ret = !if(!eq(VT.Value, f16.Value), 1, !if(!eq(VT.Value, v2f16.Value), 1, + !if(!eq(VT.Value, v4f16.Value), 1, !if(!eq(VT.Value, f32.Value), 1, !if(!eq(VT.Value, v2f32.Value), 1, !if(!eq(VT.Value, f64.Value), 1, !if(!eq(VT.Value, v2f64.Value), 1, - 0)))))); + 0))))))); } // Returns the register class to use for the destination of VOP[12C] @@ -1312,7 +1319,10 @@ VSrc_f16, !if(!eq(VT.Value, v2f16.Value), VSrc_v2f16, - VSrc_f32 + !if(!eq(VT.Value, v4f16.Value), + AVSrc_64, + VSrc_f32 + ) ) ) ), @@ -1360,7 +1370,10 @@ VSrc_f16, !if(!eq(VT.Value, v2f16.Value), VSrc_v2f16, - VSrc_f32 + !if(!eq(VT.Value, v4f16.Value), + AVSrc_64, + VSrc_f32 + ) ) ), !if(!eq(VT.Value, i16.Value), @@ -1993,6 +2006,8 @@ field bit HasExtSDWA9 = HasExt; field int NeedPatGen = PatGenMode.NoPattern; + field bit IsMAI = 0; + field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods); @@ -2130,6 +2145,20 @@ def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>; def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>; +def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>; +def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>; +// TODO: define v32f32 +def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32i32, f32, f32, v32i32]>; +def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>; +def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>; +def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32i32, v4f16, v4f16, v32i32]>; +def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>; +def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>; +def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32i32, v2i16, v2i16, v32i32]>; +def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>; +def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>; +def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>; + class Commutable_REV { string RevOp = revOp; bit IsOrig = isOrig; Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -206,6 +206,8 @@ for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) { unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i); reserveRegisterTuples(Reserved, Reg); + Reg = AMDGPU::AGPR_32RegClass.getRegister(i); + reserveRegisterTuples(Reserved, Reg); } const SIMachineFunctionInfo *MFI = MF.getInfo(); @@ -1256,8 +1258,10 @@ REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames); REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR105, SGPR32RegNames); + REG_RANGE(AMDGPU::AGPR0, AMDGPU::AGPR255, AGPR32RegNames); REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames); REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR104_SGPR105, SGPR64RegNames); + REG_RANGE(AMDGPU::AGPR0_AGPR1, AMDGPU::AGPR254_AGPR255, AGPR64RegNames); REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255, VGPR96RegNames); @@ -1267,6 +1271,9 @@ REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103, SGPR128RegNames); + REG_RANGE(AMDGPU::AGPR0_AGPR1_AGPR2_AGPR3, + AMDGPU::AGPR252_AGPR253_AGPR254_AGPR255, + AGPR128RegNames); REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7, AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, @@ -1276,6 +1283,10 @@ AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15, AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, VGPR512RegNames); + REG_RANGE( + AMDGPU::AGPR0_AGPR1_AGPR2_AGPR3_AGPR4_AGPR5_AGPR6_AGPR7_AGPR8_AGPR9_AGPR10_AGPR11_AGPR12_AGPR13_AGPR14_AGPR15, + AMDGPU::AGPR240_AGPR241_AGPR242_AGPR243_AGPR244_AGPR245_AGPR246_AGPR247_AGPR248_AGPR249_AGPR250_AGPR251_AGPR252_AGPR253_AGPR254_AGPR255, + AGPR512RegNames); REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7, AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, @@ -1287,6 +1298,11 @@ SGPR512RegNames ); + REG_RANGE( + AMDGPU::AGPR0_AGPR1_AGPR2_AGPR3_AGPR4_AGPR5_AGPR6_AGPR7_AGPR8_AGPR9_AGPR10_AGPR11_AGPR12_AGPR13_AGPR14_AGPR15_AGPR16_AGPR17_AGPR18_AGPR19_AGPR20_AGPR21_AGPR22_AGPR23_AGPR24_AGPR25_AGPR26_AGPR27_AGPR28_AGPR29_AGPR30_AGPR31, + AMDGPU::AGPR224_AGPR225_AGPR226_AGPR227_AGPR228_AGPR229_AGPR230_AGPR231_AGPR232_AGPR233_AGPR234_AGPR235_AGPR236_AGPR237_AGPR238_AGPR239_AGPR240_AGPR241_AGPR242_AGPR243_AGPR244_AGPR245_AGPR246_AGPR247_AGPR248_AGPR249_AGPR250_AGPR251_AGPR252_AGPR253_AGPR254_AGPR255, + AGPR1024RegNames); + #undef REG_RANGE // FIXME: Rename flat_scr so we don't need to special case this. Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -564,6 +564,8 @@ case AMDGPU::OPERAND_REG_IMM_FP32: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_INLINE_AC_INT32: + case AMDGPU::OPERAND_REG_INLINE_AC_FP32: return 4; case AMDGPU::OPERAND_REG_IMM_INT64: @@ -578,6 +580,10 @@ case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2FP16: return 2; Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1007,6 +1007,10 @@ case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_FP32: + case AMDGPU::OPERAND_REG_INLINE_AC_FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: return true; default: return false; @@ -1027,15 +1031,19 @@ case AMDGPU::SGPR_32RegClassID: case AMDGPU::VGPR_32RegClassID: case AMDGPU::VRegOrLds_32RegClassID: + case AMDGPU::AGPR_32RegClassID: case AMDGPU::VS_32RegClassID: + case AMDGPU::AV_32RegClassID: case AMDGPU::SReg_32RegClassID: case AMDGPU::SReg_32_XM0RegClassID: case AMDGPU::SRegOrLds_32RegClassID: return 32; case AMDGPU::SGPR_64RegClassID: case AMDGPU::VS_64RegClassID: + case AMDGPU::AV_64RegClassID: case AMDGPU::SReg_64RegClassID: case AMDGPU::VReg_64RegClassID: + case AMDGPU::AReg_64RegClassID: case AMDGPU::SReg_64_XEXECRegClassID: return 64; case AMDGPU::SGPR_96RegClassID: @@ -1045,6 +1053,7 @@ case AMDGPU::SGPR_128RegClassID: case AMDGPU::SReg_128RegClassID: case AMDGPU::VReg_128RegClassID: + case AMDGPU::AReg_128RegClassID: return 128; case AMDGPU::SGPR_160RegClassID: case AMDGPU::SReg_160RegClassID: @@ -1055,7 +1064,12 @@ return 256; case AMDGPU::SReg_512RegClassID: case AMDGPU::VReg_512RegClassID: + case AMDGPU::AReg_512RegClassID: return 512; + case AMDGPU::SReg_1024RegClassID: + case AMDGPU::VReg_1024RegClassID: + case AMDGPU::AReg_1024RegClassID: + return 1024; default: llvm_unreachable("Unexpected register class"); } Index: lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP3Instructions.td +++ lib/Target/AMDGPU/VOP3Instructions.td @@ -110,6 +110,11 @@ ret1)); } +class getVOP3MAIPat { + list ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, + imm:$cbsz, imm:$abid, imm:$blgp))]; +} + class VOP3Inst : VOP3_Pseudo.ret, !if(P.HasIntClamp, getVOP3ClampPat.ret, - getVOP3Pat.ret))), + !if (P.IsMAI, + getVOP3MAIPat.ret, + getVOP3Pat.ret)))), VOP3Only, 0, P.HasOpSel> { let IntClamp = P.HasIntClamp; @@ -143,24 +150,27 @@ } } -class VOP3Features { +class VOP3Features { bit HasClamp = Clamp; bit HasOpSel = OpSel; bit IsPacked = Packed; + bit IsMAI = MAI; } -def VOP3_REGULAR : VOP3Features<0, 0, 0>; -def VOP3_CLAMP : VOP3Features<1, 0, 0>; -def VOP3_OPSEL : VOP3Features<1, 1, 0>; -def VOP3_PACKED : VOP3Features<1, 1, 1>; +def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>; +def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>; +def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>; +def VOP3_PACKED : VOP3Features<1, 1, 1, 0>; +def VOP3_MAI : VOP3Features<0, 0, 0, 1>; class VOP3_Profile : VOPProfile { let HasClamp = !if(Features.HasClamp, 1, P.HasClamp); let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel); + let IsMAI = !if(Features.IsMAI, 1, P.IsMAI); let IsPacked = !if(Features.IsPacked, 1, P.IsPacked); - let HasModifiers = !if(Features.IsPacked, 1, P.HasModifiers); + let HasModifiers = !if(Features.IsPacked, !if(Features.IsMAI, 0, 1), P.HasModifiers); // FIXME: Hack to stop printing _e64 let Outs64 = (outs DstRC.RegClass:$vdst); Index: lib/Target/AMDGPU/VOP3PInstructions.td =================================================================== --- lib/Target/AMDGPU/VOP3PInstructions.td +++ lib/Target/AMDGPU/VOP3PInstructions.td @@ -324,6 +324,81 @@ (NonACAdd_oneuse lhs, (!cast("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))), (!cast("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>; +def ADst_32 : VOPDstOperand; +def ADst_128 : VOPDstOperand; +def ADst_512 : VOPDstOperand; +def ADst_1024 : VOPDstOperand; + +def VOPProfileAccRead : VOP3_Profile { + let Src0RC64 = ARegSrc_32; +} + +def VOPProfileAccWrite : VOP3_Profile { + let DstRC = ADst_32; + let Src0RC64 = VISrc_b32; +} + +class VOPProfileMAI + : VOP3_Profile { + let DstRC = _DstRC; + let Src0RC64 = SrcABRC; + let Src1RC64 = SrcABRC; + let Src2RC64 = _SrcRC; + let HasOpSel = 0; + let HasClamp = 0; + let HasModifiers = 0; + let Asm64 = " $vdst, $src0, $src1, $src2$cbsz$abid$blgp"; + let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, Src2RC64:$src2, cbsz:$cbsz, abid:$abid, blgp:$blgp); +} + +def VOPProfileMAI_F32_F32_X4 : VOPProfileMAI; +def VOPProfileMAI_F32_F32_X16 : VOPProfileMAI; +def VOPProfileMAI_F32_F32_X32 : VOPProfileMAI; +def VOPProfileMAI_I32_I32_X4 : VOPProfileMAI; +def VOPProfileMAI_I32_I32_X16 : VOPProfileMAI; +def VOPProfileMAI_I32_I32_X32 : VOPProfileMAI; +def VOPProfileMAI_F32_V2I16_X4 : VOPProfileMAI; +def VOPProfileMAI_F32_V2I16_X16 : VOPProfileMAI; +def VOPProfileMAI_F32_V2I16_X32 : VOPProfileMAI; +def VOPProfileMAI_F32_V4F16_X4 : VOPProfileMAI; +def VOPProfileMAI_F32_V4F16_X16 : VOPProfileMAI; +def VOPProfileMAI_F32_V4F16_X32 : VOPProfileMAI; + +let Predicates = [HasMAIInsts] in { +def V_ACCVGPR_READ_B32 : VOP3Inst<"v_accvgpr_read_b32", VOPProfileAccRead>; +def V_ACCVGPR_WRITE_B32 : VOP3Inst<"v_accvgpr_write_b32", VOPProfileAccWrite> { + let isMoveImm = 1; +} + +let isConvergent = 1 in { +def V_MFMA_F32_4X4X1F32 : VOP3Inst<"v_mfma_f32_4x4x1f32", VOPProfileMAI_F32_F32_X4, int_amdgcn_mfma_f32_4x4x1f32>; +def V_MFMA_F32_4X4X4F16 : VOP3Inst<"v_mfma_f32_4x4x4f16", VOPProfileMAI_F32_V4F16_X4, int_amdgcn_mfma_f32_4x4x4f16>; +def V_MFMA_I32_4X4X4I8 : VOP3Inst<"v_mfma_i32_4x4x4i8", VOPProfileMAI_I32_I32_X4, int_amdgcn_mfma_i32_4x4x4i8>; +def V_MFMA_F32_4X4X2BF16 : VOP3Inst<"v_mfma_f32_4x4x2bf16", VOPProfileMAI_F32_V2I16_X4, int_amdgcn_mfma_f32_4x4x2bf16>; +def V_MFMA_F32_16X16X1F32 : VOP3Inst<"v_mfma_f32_16x16x1f32", VOPProfileMAI_F32_F32_X16, int_amdgcn_mfma_f32_16x16x1f32>; +def V_MFMA_F32_16X16X4F32 : VOP3Inst<"v_mfma_f32_16x16x4f32", VOPProfileMAI_F32_F32_X4, int_amdgcn_mfma_f32_16x16x4f32>; +def V_MFMA_F32_16X16X4F16 : VOP3Inst<"v_mfma_f32_16x16x4f16", VOPProfileMAI_F32_V4F16_X16, int_amdgcn_mfma_f32_16x16x4f16>; +def V_MFMA_F32_16X16X16F16 : VOP3Inst<"v_mfma_f32_16x16x16f16", VOPProfileMAI_F32_V4F16_X4, int_amdgcn_mfma_f32_16x16x16f16>; +def V_MFMA_I32_16X16X4I8 : VOP3Inst<"v_mfma_i32_16x16x4i8", VOPProfileMAI_I32_I32_X16, int_amdgcn_mfma_i32_16x16x4i8>; +def V_MFMA_I32_16X16X16I8 : VOP3Inst<"v_mfma_i32_16x16x16i8", VOPProfileMAI_I32_I32_X4, int_amdgcn_mfma_i32_16x16x16i8>; +def V_MFMA_F32_16X16X2BF16 : VOP3Inst<"v_mfma_f32_16x16x2bf16", VOPProfileMAI_F32_V2I16_X16, int_amdgcn_mfma_f32_16x16x2bf16>; +def V_MFMA_F32_16X16X8BF16 : VOP3Inst<"v_mfma_f32_16x16x8bf16", VOPProfileMAI_F32_V2I16_X4, int_amdgcn_mfma_f32_16x16x8bf16>; +def V_MFMA_F32_32X32X1F32 : VOP3Inst<"v_mfma_f32_32x32x1f32", VOPProfileMAI_F32_F32_X32, int_amdgcn_mfma_f32_32x32x1f32>; +def V_MFMA_F32_32X32X2F32 : VOP3Inst<"v_mfma_f32_32x32x2f32", VOPProfileMAI_F32_F32_X16, int_amdgcn_mfma_f32_32x32x2f32>; +def V_MFMA_F32_32X32X4F16 : VOP3Inst<"v_mfma_f32_32x32x4f16", VOPProfileMAI_F32_V4F16_X32, int_amdgcn_mfma_f32_32x32x4f16>; +def V_MFMA_F32_32X32X8F16 : VOP3Inst<"v_mfma_f32_32x32x8f16", VOPProfileMAI_F32_V4F16_X16, int_amdgcn_mfma_f32_32x32x8f16>; +def V_MFMA_I32_32X32X4I8 : VOP3Inst<"v_mfma_i32_32x32x4i8", VOPProfileMAI_I32_I32_X32, int_amdgcn_mfma_i32_32x32x4i8>; +def V_MFMA_I32_32X32X8I8 : VOP3Inst<"v_mfma_i32_32x32x8i8", VOPProfileMAI_I32_I32_X16, int_amdgcn_mfma_i32_32x32x8i8>; +def V_MFMA_F32_32X32X2BF16 : VOP3Inst<"v_mfma_f32_32x32x2bf16", VOPProfileMAI_F32_V2I16_X32, int_amdgcn_mfma_f32_32x32x2bf16>; +def V_MFMA_F32_32X32X4BF16 : VOP3Inst<"v_mfma_f32_32x32x4bf16", VOPProfileMAI_F32_V2I16_X16, int_amdgcn_mfma_f32_32x32x4bf16>; +} // End isConvergent = 1 + +} // End SubtargetPredicate = HasMAIInsts + +def : MnemonicAlias<"v_accvgpr_read", "v_accvgpr_read_b32">; +def : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">; + multiclass VOP3P_Real_vi op> { def _vi : VOP3P_Real(NAME), SIEncodingFamily.VI>, VOP3Pe (NAME).Pfl> { @@ -332,6 +407,14 @@ } } +multiclass VOP3P_Real_MAI op> { + def _vi : VOP3P_Real(NAME), SIEncodingFamily.VI>, + VOP3Pe_MAI (NAME).Pfl> { + let AssemblerPredicates = [HasMAIInsts]; + let DecoderNamespace = "GFX8"; + } +} + defm V_PK_MAD_I16 : VOP3P_Real_vi <0x380>; defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x381>; defm V_PK_ADD_I16 : VOP3P_Real_vi <0x382>; @@ -389,6 +472,33 @@ } // End SubtargetPredicate = HasDot1Insts +let SubtargetPredicate = HasMAIInsts in { + +defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x3d8>; +defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x3d9>; +defm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MAI <0x3c0>; +defm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MAI <0x3c1>; +defm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MAI <0x3c2>; +defm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MAI <0x3c4>; +defm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MAI <0x3c5>; +defm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MAI <0x3c8>; +defm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MAI <0x3c9>; +defm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MAI <0x3ca>; +defm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MAI <0x3cc>; +defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MAI <0x3cd>; +defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MAI <0x3d0>; +defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MAI <0x3d1>; +defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MAI <0x3d2>; +defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MAI <0x3d4>; +defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MAI <0x3d5>; +defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MAI <0x3e8>; +defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MAI <0x3e9>; +defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MAI <0x3eb>; +defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MAI <0x3ec>; +defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MAI <0x3ed>; + +} // End SubtargetPredicate = HasMAIInsts + //===----------------------------------------------------------------------===// // GFX10. //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/VOPInstructions.td =================================================================== --- lib/Target/AMDGPU/VOPInstructions.td +++ lib/Target/AMDGPU/VOPInstructions.td @@ -90,6 +90,7 @@ let VOP3_OPSEL = isVop3OpSel; let IsPacked = P.IsPacked; + let IsMAI = P.IsMAI; let AsmOperands = !if(isVop3OpSel, P.AsmVOP3OpSel, @@ -326,6 +327,36 @@ let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo) } +class VOP3Pe_MAI op, VOPProfile P> : Enc64 { + bits<8> vdst; + bits<10> src0; + bits<10> src1; + bits<9> src2; + bits<3> blgp; + bits<3> cbsz; + bits<4> abid; + bits<1> clamp; + + let Inst{7-0} = vdst; + + let Inst{10-8} = !if(P.HasSrc1, cbsz, 0); + let Inst{14-11} = !if(P.HasSrc1, abid, 0); + + let Inst{15} = !if(P.HasClamp, clamp{0}, 0); + + let Inst{25-16} = op; + let Inst{31-26} = 0x34; //encoding + let Inst{40-32} = !if(P.HasSrc0, src0{8-0}, 0); + let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0); + let Inst{58-50} = !if(P.HasSrc2, src2, 0); + + let Inst{59} = !if(P.HasSrc0, src0{9}, 0); // acc(0) + let Inst{60} = !if(P.HasSrc1, src1{9}, 0); // acc(1) + + let Inst{63-61} = !if(P.HasSrc1, blgp, 0); +} + + class VOP3Pe_gfx10 op, VOPProfile P> : VOP3Pe { let Inst{31-26} = 0x33; //encoding } Index: test/MC/AMDGPU/accvgpr-altnames.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/accvgpr-altnames.s @@ -0,0 +1,10 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck -check-prefix=GFX908 %s + +v_accvgpr_read_b32 v2, acc0 +// GFX908: v_accvgpr_read_b32 v2, a0 ; encoding: [0x02,0x00,0xd8,0xd3,0x00,0x01,0x00,0x08] + +v_accvgpr_write_b32 acc2, -2.0 +// GFX908: v_accvgpr_write_b32 a2, -2.0 ; encoding: [0x02,0x00,0xd9,0xd3,0xf5,0x00,0x00,0x00] + +v_mfma_f32_32x32x1f32 acc[0:31], acc0, acc1, acc[1:32] +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[1:32] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x1c] Index: test/MC/AMDGPU/mai-err.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/mai-err.s @@ -0,0 +1,50 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck -check-prefix=GFX908 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefix=GFX900 %s + +v_accvgpr_read_b32 v0, v0 +// GFX908: error: invalid operand for instruction + +v_accvgpr_read_b32 a0, a0 +// GFX908: error: invalid operand for instruction + +v_accvgpr_read_b32 v0, 1 +// GFX908: error: invalid operand for instruction + +v_accvgpr_read_b32 v0, s0 +// GFX908: error: invalid operand for instruction + +v_accvgpr_read_b32 v0, a0 +// GFX900: error: instruction not supported on this GPU + +v_accvgpr_write_b32 v0, v0 +// GFX908: error: invalid operand for instruction + +v_accvgpr_write_b32 a0, a0 +// GFX908: error: invalid operand for instruction + +v_accvgpr_write_b32 a0, s0 +// GFX908: error: invalid operand for instruction + +v_accvgpr_write_b32 a0, 65 +// GFX908: error: invalid operand for instruction + +v_accvgpr_write_b32 a0, v0 +// GFX900: error: instruction not supported on this GPU + +v_mfma_f32_32x32x1f32 v[0:31], v0, v1, a[1:32] +// GFX908: error: not a valid operand + +v_mfma_f32_32x32x1f32 a[0:31], v0, v1, v[1:32] +// GFX908: error: not a valid operand + +v_mfma_f32_32x32x1f32 a[0:31], s0, v1, a[1:32] +// GFX908: error: invalid operand for instruction + +v_mfma_f32_32x32x1f32 a[0:31], 1, v1, a[1:32] +// GFX908: error: invalid operand for instruction + +v_mfma_f32_32x32x1f32 a[0:31], v0, v1, 65 +// GFX908: error: invalid operand for instruction + +v_mfma_f32_32x32x1f32 a[0:31], v0, v1, 0 +// GFX900: error: instruction not supported on this GPU Index: test/MC/AMDGPU/mai.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/mai.s @@ -0,0 +1,985 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck -check-prefix=GFX908 %s + +v_accvgpr_read_b32 v2, a0 +// GFX908: v_accvgpr_read_b32 v2, a0 ; encoding: [0x02,0x00,0xd8,0xd3,0x00,0x01,0x00,0x08] + +v_accvgpr_read_b32 v2, a1 +// GFX908: v_accvgpr_read_b32 v2, a1 ; encoding: [0x02,0x00,0xd8,0xd3,0x01,0x01,0x00,0x08] + +v_accvgpr_read_b32 v2, a255 +// GFX908: v_accvgpr_read_b32 v2, a255 ; encoding: [0x02,0x00,0xd8,0xd3,0xff,0x01,0x00,0x08] + +v_accvgpr_read v2, a10 +// GFX908: v_accvgpr_read_b32 v2, a10 ; encoding: [0x02,0x00,0xd8,0xd3,0x0a,0x01,0x00,0x08] + +v_accvgpr_write_b32 a2, -2.0 +// GFX908: v_accvgpr_write_b32 a2, -2.0 ; encoding: [0x02,0x00,0xd9,0xd3,0xf5,0x00,0x00,0x00] + +v_accvgpr_write_b32 a2, -2 +// GFX908: v_accvgpr_write_b32 a2, -2 ; encoding: [0x02,0x00,0xd9,0xd3,0xc2,0x00,0x00,0x00] + +v_accvgpr_write_b32 a2, v1 +// GFX908: v_accvgpr_write_b32 a2, v1 ; encoding: [0x02,0x00,0xd9,0xd3,0x01,0x01,0x00,0x00] + +v_accvgpr_write a2, v255 +// GFX908: v_accvgpr_write_b32 a2, v255 ; encoding: [0x02,0x00,0xd9,0xd3,0xff,0x01,0x00,0x00] + +v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[1:32] +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[1:32] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[1:32] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[1:32] +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[1:32] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[1:32] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[1:32] +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[1:32] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[1:32] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[1:32] +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[1:32] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[1:32] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_f32_32x32x1f32 a[0:31], v0, v1, -2.0 +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, -2.0 ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0xd6,0x03] + +v_mfma_f32_32x32x1f32 a[0:31], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0xd6,0xe3] + +v_mfma_f32_32x32x1f32 a[0:31], v0, a1, -2.0 +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, a1, -2.0 ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0xd6,0x13] + +v_mfma_f32_32x32x1f32 a[0:31], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0xd6,0xf3] + +v_mfma_f32_32x32x1f32 a[0:31], a0, v1, -2.0 +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, v1, -2.0 ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0xd6,0x0b] + +v_mfma_f32_32x32x1f32 a[0:31], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0xd6,0xeb] + +v_mfma_f32_32x32x1f32 a[0:31], a0, a1, -2.0 +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, -2.0 ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0xd6,0x1b] + +v_mfma_f32_32x32x1f32 a[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0xd6,0xfb] + +v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[1:16] +// GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[1:16] +// GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[1:16] +// GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[1:16] +// GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_f32_16x16x1f32 a[0:15], v0, v1, -2.0 +// GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, -2.0 ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0xd6,0x03] + +v_mfma_f32_16x16x1f32 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0xd6,0xe3] + +v_mfma_f32_16x16x1f32 a[0:15], v0, a1, -2.0 +// GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, a1, -2.0 ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0xd6,0x13] + +v_mfma_f32_16x16x1f32 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0xd6,0xf3] + +v_mfma_f32_16x16x1f32 a[0:15], a0, v1, -2.0 +// GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, v1, -2.0 ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0xd6,0x0b] + +v_mfma_f32_16x16x1f32 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0xd6,0xeb] + +v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 +// GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0xd6,0x1b] + +v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0xd6,0xfb] + +v_mfma_f32_4x4x1f32 a[0:3], v0, v1, a[1:4] +// GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_f32_4x4x1f32 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_f32_4x4x1f32 a[0:3], v0, a1, a[1:4] +// GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, a1, a[1:4] ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_f32_4x4x1f32 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_f32_4x4x1f32 a[0:3], a0, v1, a[1:4] +// GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, v1, a[1:4] ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_f32_4x4x1f32 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_f32_4x4x1f32 a[0:3], a0, a1, a[1:4] +// GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, a1, a[1:4] ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_f32_4x4x1f32 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_f32_4x4x1f32 a[0:3], v0, v1, -2.0 +// GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, v1, -2.0 ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0xd6,0x03] + +v_mfma_f32_4x4x1f32 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0xd6,0xe3] + +v_mfma_f32_4x4x1f32 a[0:3], v0, a1, -2.0 +// GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, a1, -2.0 ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0xd6,0x13] + +v_mfma_f32_4x4x1f32 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0xd6,0xf3] + +v_mfma_f32_4x4x1f32 a[0:3], a0, v1, -2.0 +// GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, v1, -2.0 ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0xd6,0x0b] + +v_mfma_f32_4x4x1f32 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0xd6,0xeb] + +v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 +// GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0xd6,0x1b] + +v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0xd6,0xfb] + +v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[1:16] +// GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[1:16] +// GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[1:16] +// GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[1:16] +// GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_f32_32x32x2f32 a[0:15], v0, v1, -2.0 +// GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, -2.0 ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0xd6,0x03] + +v_mfma_f32_32x32x2f32 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0xd6,0xe3] + +v_mfma_f32_32x32x2f32 a[0:15], v0, a1, -2.0 +// GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, a1, -2.0 ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0xd6,0x13] + +v_mfma_f32_32x32x2f32 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0xd6,0xf3] + +v_mfma_f32_32x32x2f32 a[0:15], a0, v1, -2.0 +// GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, v1, -2.0 ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0xd6,0x0b] + +v_mfma_f32_32x32x2f32 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0xd6,0xeb] + +v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 +// GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0xd6,0x1b] + +v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0xd6,0xfb] + +v_mfma_f32_16x16x4f32 a[0:3], v0, v1, a[1:4] +// GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_f32_16x16x4f32 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_f32_16x16x4f32 a[0:3], v0, a1, a[1:4] +// GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, a1, a[1:4] ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_f32_16x16x4f32 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_f32_16x16x4f32 a[0:3], a0, v1, a[1:4] +// GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, v1, a[1:4] ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_f32_16x16x4f32 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_f32_16x16x4f32 a[0:3], a0, a1, a[1:4] +// GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, a1, a[1:4] ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_f32_16x16x4f32 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_f32_16x16x4f32 a[0:3], v0, v1, -2.0 +// GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, v1, -2.0 ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0xd6,0x03] + +v_mfma_f32_16x16x4f32 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0xd6,0xe3] + +v_mfma_f32_16x16x4f32 a[0:3], v0, a1, -2.0 +// GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, a1, -2.0 ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0xd6,0x13] + +v_mfma_f32_16x16x4f32 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0xd6,0xf3] + +v_mfma_f32_16x16x4f32 a[0:3], a0, v1, -2.0 +// GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, v1, -2.0 ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0xd6,0x0b] + +v_mfma_f32_16x16x4f32 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0xd6,0xeb] + +v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 +// GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0xd6,0x1b] + +v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0xd6,0xfb] + +v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], a[1:32] +// GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], a[1:32] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], a[1:32] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], a[1:32] +// GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], a[1:32] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], a[1:32] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], a[1:32] +// GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], a[1:32] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], a[1:32] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], a[1:32] +// GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], a[1:32] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], a[1:32] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], -2.0 +// GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0xd6,0x03] + +v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0xd6,0xe3] + +v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], -2.0 +// GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0xd6,0x13] + +v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0xd6,0xf3] + +v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], -2.0 +// GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0xd6,0x0b] + +v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0xd6,0xeb] + +v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 +// GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0xd6,0x1b] + +v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0xd6,0xfb] + +v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], a[1:16] +// GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], a[1:16] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], a[1:16] +// GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], a[1:16] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], a[1:16] +// GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], a[1:16] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], a[1:16] +// GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], a[1:16] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], -2.0 +// GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0xd6,0x03] + +v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0xd6,0xe3] + +v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], -2.0 +// GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0xd6,0x13] + +v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0xd6,0xf3] + +v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], -2.0 +// GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0xd6,0x0b] + +v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0xd6,0xeb] + +v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 +// GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0xd6,0x1b] + +v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0xd6,0xfb] + +v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], a[1:4] +// GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], a[1:4] ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], a[1:4] +// GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], a[1:4] ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], a[1:4] +// GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], a[1:4] ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], a[1:4] +// GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], a[1:4] ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], -2.0 +// GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0xd6,0x03] + +v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0xd6,0xe3] + +v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], -2.0 +// GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0xd6,0x13] + +v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0xd6,0xf3] + +v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], -2.0 +// GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0xd6,0x0b] + +v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0xd6,0xeb] + +v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 +// GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0xd6,0x1b] + +v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0xd6,0xfb] + +v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], a[1:16] +// GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], a[1:16] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], a[1:16] +// GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], a[1:16] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], a[1:16] +// GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], a[1:16] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], a[1:16] +// GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], a[1:16] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], -2.0 +// GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0xd6,0x03] + +v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0xd6,0xe3] + +v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], -2.0 +// GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0xd6,0x13] + +v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0xd6,0xf3] + +v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], -2.0 +// GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0xd6,0x0b] + +v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0xd6,0xeb] + +v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 +// GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0xd6,0x1b] + +v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0xd6,0xfb] + +v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], a[1:4] +// GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], a[1:4] ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], a[1:4] +// GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], a[1:4] ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], a[1:4] +// GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], a[1:4] ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], a[1:4] +// GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], a[1:4] ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], -2.0 +// GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0xd6,0x03] + +v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0xd6,0xe3] + +v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], -2.0 +// GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0xd6,0x13] + +v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0xd6,0xf3] + +v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], -2.0 +// GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0xd6,0x0b] + +v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0xd6,0xeb] + +v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 +// GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0xd6,0x1b] + +v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0xd6,0xfb] + +v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[1:32] +// GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[1:32] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[1:32] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[1:32] +// GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[1:32] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[1:32] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[1:32] +// GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[1:32] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[1:32] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[1:32] +// GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[1:32] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[1:32] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_i32_32x32x4i8 a[0:31], v0, v1, 2 +// GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, 2 ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x0a,0x02] + +v_mfma_i32_32x32x4i8 a[0:31], v0, v1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xe2] + +v_mfma_i32_32x32x4i8 a[0:31], v0, a1, 2 +// GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, a1, 2 ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x0a,0x12] + +v_mfma_i32_32x32x4i8 a[0:31], v0, a1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xf2] + +v_mfma_i32_32x32x4i8 a[0:31], a0, v1, 2 +// GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, v1, 2 ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x0a,0x0a] + +v_mfma_i32_32x32x4i8 a[0:31], a0, v1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xea] + +v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 +// GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x0a,0x1a] + +v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xfa] + +v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[1:16] +// GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[1:16] +// GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[1:16] +// GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[1:16] +// GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_i32_16x16x4i8 a[0:15], v0, v1, 2 +// GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, 2 ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x0a,0x02] + +v_mfma_i32_16x16x4i8 a[0:15], v0, v1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x0a,0xe2] + +v_mfma_i32_16x16x4i8 a[0:15], v0, a1, 2 +// GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, a1, 2 ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x0a,0x12] + +v_mfma_i32_16x16x4i8 a[0:15], v0, a1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x0a,0xf2] + +v_mfma_i32_16x16x4i8 a[0:15], a0, v1, 2 +// GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, v1, 2 ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x0a,0x0a] + +v_mfma_i32_16x16x4i8 a[0:15], a0, v1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x0a,0xea] + +v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 +// GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x0a,0x1a] + +v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x0a,0xfa] + +v_mfma_i32_4x4x4i8 a[0:3], v0, v1, a[1:4] +// GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_i32_4x4x4i8 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_i32_4x4x4i8 a[0:3], v0, a1, a[1:4] +// GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, a1, a[1:4] ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_i32_4x4x4i8 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_i32_4x4x4i8 a[0:3], a0, v1, a[1:4] +// GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, v1, a[1:4] ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_i32_4x4x4i8 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_i32_4x4x4i8 a[0:3], a0, a1, a[1:4] +// GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, a1, a[1:4] ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_i32_4x4x4i8 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_i32_4x4x4i8 a[0:3], v0, v1, 2 +// GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, v1, 2 ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x0a,0x02] + +v_mfma_i32_4x4x4i8 a[0:3], v0, v1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x0a,0xe2] + +v_mfma_i32_4x4x4i8 a[0:3], v0, a1, 2 +// GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, a1, 2 ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x0a,0x12] + +v_mfma_i32_4x4x4i8 a[0:3], v0, a1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x0a,0xf2] + +v_mfma_i32_4x4x4i8 a[0:3], a0, v1, 2 +// GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, v1, 2 ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x0a,0x0a] + +v_mfma_i32_4x4x4i8 a[0:3], a0, v1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x0a,0xea] + +v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 +// GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x0a,0x1a] + +v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x0a,0xfa] + +v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[1:16] +// GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[1:16] +// GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[1:16] +// GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[1:16] +// GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_i32_32x32x8i8 a[0:15], v0, v1, 2 +// GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, 2 ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x0a,0x02] + +v_mfma_i32_32x32x8i8 a[0:15], v0, v1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x0a,0xe2] + +v_mfma_i32_32x32x8i8 a[0:15], v0, a1, 2 +// GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, a1, 2 ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x0a,0x12] + +v_mfma_i32_32x32x8i8 a[0:15], v0, a1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x0a,0xf2] + +v_mfma_i32_32x32x8i8 a[0:15], a0, v1, 2 +// GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, v1, 2 ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x0a,0x0a] + +v_mfma_i32_32x32x8i8 a[0:15], a0, v1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x0a,0xea] + +v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 +// GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x0a,0x1a] + +v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x0a,0xfa] + +v_mfma_i32_16x16x16i8 a[0:3], v0, v1, a[1:4] +// GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_i32_16x16x16i8 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_i32_16x16x16i8 a[0:3], v0, a1, a[1:4] +// GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, a1, a[1:4] ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_i32_16x16x16i8 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_i32_16x16x16i8 a[0:3], a0, v1, a[1:4] +// GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, v1, a[1:4] ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_i32_16x16x16i8 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_i32_16x16x16i8 a[0:3], a0, a1, a[1:4] +// GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, a1, a[1:4] ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_i32_16x16x16i8 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_i32_16x16x16i8 a[0:3], v0, v1, 2 +// GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, v1, 2 ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x0a,0x02] + +v_mfma_i32_16x16x16i8 a[0:3], v0, v1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x0a,0xe2] + +v_mfma_i32_16x16x16i8 a[0:3], v0, a1, 2 +// GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, a1, 2 ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x0a,0x12] + +v_mfma_i32_16x16x16i8 a[0:3], v0, a1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x0a,0xf2] + +v_mfma_i32_16x16x16i8 a[0:3], a0, v1, 2 +// GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, v1, 2 ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x0a,0x0a] + +v_mfma_i32_16x16x16i8 a[0:3], a0, v1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x0a,0xea] + +v_mfma_i32_16x16x16i8 a[0:3], a0, a1, 2 +// GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, a1, 2 ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x0a,0x1a] + +v_mfma_i32_16x16x16i8 a[0:3], a0, a1, 2 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x0a,0xfa] + +v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[1:32] +// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[1:32] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[1:32] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[1:32] +// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[1:32] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[1:32] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[1:32] +// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[1:32] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[1:32] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[1:32] +// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[1:32] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[1:32] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, -2.0 +// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, -2.0 ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0xd6,0x03] + +v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0xd6,0xe3] + +v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, -2.0 +// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, -2.0 ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0xd6,0x13] + +v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0xd6,0xf3] + +v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, -2.0 +// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, -2.0 ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0xd6,0x0b] + +v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0xd6,0xeb] + +v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 +// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0xd6,0x1b] + +v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0xd6,0xfb] + +v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[1:16] +// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[1:16] +// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[1:16] +// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[1:16] +// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, -2.0 +// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, -2.0 ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0xd6,0x03] + +v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0xd6,0xe3] + +v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, -2.0 +// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, -2.0 ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0xd6,0x13] + +v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0xd6,0xf3] + +v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, -2.0 +// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, -2.0 ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0xd6,0x0b] + +v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0xd6,0xeb] + +v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 +// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0xd6,0x1b] + +v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0xd6,0xfb] + +v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, a[1:4] +// GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, a[1:4] +// GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, a[1:4] ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, a[1:4] +// GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, a[1:4] ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, a[1:4] +// GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, a[1:4] ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, -2.0 +// GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, -2.0 ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0xd6,0x03] + +v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0xd6,0xe3] + +v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, -2.0 +// GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, -2.0 ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0xd6,0x13] + +v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0xd6,0xf3] + +v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, -2.0 +// GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, -2.0 ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0xd6,0x0b] + +v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0xd6,0xeb] + +v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 +// GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0xd6,0x1b] + +v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0xd6,0xfb] + +v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[1:16] +// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[1:16] +// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[1:16] +// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[1:16] +// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, -2.0 +// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, -2.0 ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0xd6,0x03] + +v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0xd6,0xe3] + +v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, -2.0 +// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, -2.0 ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0xd6,0x13] + +v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0xd6,0xf3] + +v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, -2.0 +// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, -2.0 ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0xd6,0x0b] + +v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0xd6,0xeb] + +v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 +// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0xd6,0x1b] + +v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0xd6,0xfb] + +v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, a[1:4] +// GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0x06,0x04] + +v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0x06,0xe4] + +v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, a[1:4] +// GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, a[1:4] ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0x06,0x14] + +v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0x06,0xf4] + +v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, a[1:4] +// GFX908: v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, a[1:4] ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0x06,0x0c] + +v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0x06,0xec] + +v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, a[1:4] +// GFX908: v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, a[1:4] ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0x06,0x1c] + +v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0x06,0xfc] + +v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, -2.0 +// GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, -2.0 ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0xd6,0x03] + +v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0xd6,0xe3] + +v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, -2.0 +// GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, -2.0 ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0xd6,0x13] + +v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0xd6,0xf3] + +v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, -2.0 +// GFX908: v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, -2.0 ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0xd6,0x0b] + +v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0xd6,0xeb] + +v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 +// GFX908: v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0xd6,0x1b] + +v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 +// GFX908: v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0xd6,0xfb] Index: test/MC/Disassembler/AMDGPU/mai.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/AMDGPU/mai.txt @@ -0,0 +1,979 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding -disassemble %s | FileCheck -check-prefix=GFX908 %s + +# GFX908: v_accvgpr_read_b32 v2, a0 ; encoding: [0x02,0x00,0xd8,0xd3,0x00,0x01,0x00,0x08] +0x02,0x00,0xd8,0xd3,0x00,0x01,0x00,0x08 + +# GFX908: v_accvgpr_read_b32 v2, a1 ; encoding: [0x02,0x00,0xd8,0xd3,0x01,0x01,0x00,0x08] +0x02,0x00,0xd8,0xd3,0x01,0x01,0x00,0x08 + +# GFX908: v_accvgpr_read_b32 v2, a255 ; encoding: [0x02,0x00,0xd8,0xd3,0xff,0x01,0x00,0x08] +0x02,0x00,0xd8,0xd3,0xff,0x01,0x00,0x08 + +# GFX908: v_accvgpr_write_b32 a2, -2.0 ; encoding: [0x02,0x00,0xd9,0xd3,0xf5,0x00,0x00,0x00] +0x02,0x00,0xd9,0xd3,0xf5,0x00,0x00,0x00 + +# GFX908: v_accvgpr_write_b32 a2, -2 ; encoding: [0x02,0x00,0xd9,0xd3,0xc2,0x00,0x00,0x00] +0x02,0x00,0xd9,0xd3,0xc2,0x00,0x00,0x00 + +# GFX908: v_accvgpr_write_b32 a2, v1 ; encoding: [0x02,0x00,0xd9,0xd3,0x01,0x01,0x00,0x00] +0x02,0x00,0xd9,0xd3,0x01,0x01,0x00,0x00 + +# GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[1:32] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xc0,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[1:32] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xc0,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[1:32] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xc0,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[1:32] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xc0,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, -2.0 ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0xd6,0x03] +0x00,0x00,0xc0,0xd3,0x00,0x03,0xd6,0x03 + +# GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0xd6,0xe3] +0x00,0x13,0xc0,0xd3,0x00,0x03,0xd6,0xe3 + +# GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, a1, -2.0 ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0xd6,0x13] +0x00,0x00,0xc0,0xd3,0x00,0x03,0xd6,0x13 + +# GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0xd6,0xf3] +0x00,0x13,0xc0,0xd3,0x00,0x03,0xd6,0xf3 + +# GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, v1, -2.0 ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0xd6,0x0b] +0x00,0x00,0xc0,0xd3,0x00,0x03,0xd6,0x0b + +# GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0xd6,0xeb] +0x00,0x13,0xc0,0xd3,0x00,0x03,0xd6,0xeb + +# GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, -2.0 ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0xd6,0x1b] +0x00,0x00,0xc0,0xd3,0x00,0x03,0xd6,0x1b + +# GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0xd6,0xfb] +0x00,0x13,0xc0,0xd3,0x00,0x03,0xd6,0xfb + +# GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xc1,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xc1,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xc1,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xc1,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xc1,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xc1,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xc1,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xc1,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, -2.0 ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0xd6,0x03] +0x00,0x00,0xc1,0xd3,0x00,0x03,0xd6,0x03 + +# GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0xd6,0xe3] +0x00,0x13,0xc1,0xd3,0x00,0x03,0xd6,0xe3 + +# GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, a1, -2.0 ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0xd6,0x13] +0x00,0x00,0xc1,0xd3,0x00,0x03,0xd6,0x13 + +# GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0xd6,0xf3] +0x00,0x13,0xc1,0xd3,0x00,0x03,0xd6,0xf3 + +# GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, v1, -2.0 ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0xd6,0x0b] +0x00,0x00,0xc1,0xd3,0x00,0x03,0xd6,0x0b + +# GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0xd6,0xeb] +0x00,0x13,0xc1,0xd3,0x00,0x03,0xd6,0xeb + +# GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0xd6,0x1b] +0x00,0x00,0xc1,0xd3,0x00,0x03,0xd6,0x1b + +# GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0xd6,0xfb] +0x00,0x13,0xc1,0xd3,0x00,0x03,0xd6,0xfb + +# GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xc2,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xc2,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, a1, a[1:4] ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xc2,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xc2,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, v1, a[1:4] ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xc2,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xc2,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, a1, a[1:4] ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xc2,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xc2,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, v1, -2.0 ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0xd6,0x03] +0x00,0x00,0xc2,0xd3,0x00,0x03,0xd6,0x03 + +# GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0xd6,0xe3] +0x00,0x13,0xc2,0xd3,0x00,0x03,0xd6,0xe3 + +# GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, a1, -2.0 ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0xd6,0x13] +0x00,0x00,0xc2,0xd3,0x00,0x03,0xd6,0x13 + +# GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0xd6,0xf3] +0x00,0x13,0xc2,0xd3,0x00,0x03,0xd6,0xf3 + +# GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, v1, -2.0 ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0xd6,0x0b] +0x00,0x00,0xc2,0xd3,0x00,0x03,0xd6,0x0b + +# GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0xd6,0xeb] +0x00,0x13,0xc2,0xd3,0x00,0x03,0xd6,0xeb + +# GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0xd6,0x1b] +0x00,0x00,0xc2,0xd3,0x00,0x03,0xd6,0x1b + +# GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0xd6,0xfb] +0x00,0x13,0xc2,0xd3,0x00,0x03,0xd6,0xfb + +# GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xc4,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xc4,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xc4,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xc4,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xc4,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xc4,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xc4,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xc4,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, -2.0 ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0xd6,0x03] +0x00,0x00,0xc4,0xd3,0x00,0x03,0xd6,0x03 + +# GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0xd6,0xe3] +0x00,0x13,0xc4,0xd3,0x00,0x03,0xd6,0xe3 + +# GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, a1, -2.0 ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0xd6,0x13] +0x00,0x00,0xc4,0xd3,0x00,0x03,0xd6,0x13 + +# GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0xd6,0xf3] +0x00,0x13,0xc4,0xd3,0x00,0x03,0xd6,0xf3 + +# GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, v1, -2.0 ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0xd6,0x0b] +0x00,0x00,0xc4,0xd3,0x00,0x03,0xd6,0x0b + +# GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0xd6,0xeb] +0x00,0x13,0xc4,0xd3,0x00,0x03,0xd6,0xeb + +# GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0xd6,0x1b] +0x00,0x00,0xc4,0xd3,0x00,0x03,0xd6,0x1b + +# GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0xd6,0xfb] +0x00,0x13,0xc4,0xd3,0x00,0x03,0xd6,0xfb + +# GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xc5,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xc5,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, a1, a[1:4] ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xc5,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xc5,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, v1, a[1:4] ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xc5,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xc5,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, a1, a[1:4] ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xc5,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xc5,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, v1, -2.0 ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0xd6,0x03] +0x00,0x00,0xc5,0xd3,0x00,0x03,0xd6,0x03 + +# GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0xd6,0xe3] +0x00,0x13,0xc5,0xd3,0x00,0x03,0xd6,0xe3 + +# GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, a1, -2.0 ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0xd6,0x13] +0x00,0x00,0xc5,0xd3,0x00,0x03,0xd6,0x13 + +# GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0xd6,0xf3] +0x00,0x13,0xc5,0xd3,0x00,0x03,0xd6,0xf3 + +# GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, v1, -2.0 ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0xd6,0x0b] +0x00,0x00,0xc5,0xd3,0x00,0x03,0xd6,0x0b + +# GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0xd6,0xeb] +0x00,0x13,0xc5,0xd3,0x00,0x03,0xd6,0xeb + +# GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0xd6,0x1b] +0x00,0x00,0xc5,0xd3,0x00,0x03,0xd6,0x1b + +# GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0xd6,0xfb] +0x00,0x13,0xc5,0xd3,0x00,0x03,0xd6,0xfb + +# GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], a[1:32] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xc8,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xc8,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], a[1:32] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xc8,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xc8,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], a[1:32] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xc8,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xc8,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], a[1:32] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xc8,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xc8,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0xd6,0x03] +0x00,0x00,0xc8,0xd3,0x00,0x03,0xd6,0x03 + +# GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0xd6,0xe3] +0x00,0x13,0xc8,0xd3,0x00,0x03,0xd6,0xe3 + +# GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0xd6,0x13] +0x00,0x00,0xc8,0xd3,0x00,0x03,0xd6,0x13 + +# GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0xd6,0xf3] +0x00,0x13,0xc8,0xd3,0x00,0x03,0xd6,0xf3 + +# GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0xd6,0x0b] +0x00,0x00,0xc8,0xd3,0x00,0x03,0xd6,0x0b + +# GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0xd6,0xeb] +0x00,0x13,0xc8,0xd3,0x00,0x03,0xd6,0xeb + +# GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0xd6,0x1b] +0x00,0x00,0xc8,0xd3,0x00,0x03,0xd6,0x1b + +# GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0xd6,0xfb] +0x00,0x13,0xc8,0xd3,0x00,0x03,0xd6,0xfb + +# GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], a[1:16] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xc9,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xc9,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], a[1:16] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xc9,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xc9,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], a[1:16] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xc9,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xc9,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], a[1:16] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xc9,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xc9,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0xd6,0x03] +0x00,0x00,0xc9,0xd3,0x00,0x03,0xd6,0x03 + +# GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0xd6,0xe3] +0x00,0x13,0xc9,0xd3,0x00,0x03,0xd6,0xe3 + +# GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0xd6,0x13] +0x00,0x00,0xc9,0xd3,0x00,0x03,0xd6,0x13 + +# GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0xd6,0xf3] +0x00,0x13,0xc9,0xd3,0x00,0x03,0xd6,0xf3 + +# GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0xd6,0x0b] +0x00,0x00,0xc9,0xd3,0x00,0x03,0xd6,0x0b + +# GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0xd6,0xeb] +0x00,0x13,0xc9,0xd3,0x00,0x03,0xd6,0xeb + +# GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0xd6,0x1b] +0x00,0x00,0xc9,0xd3,0x00,0x03,0xd6,0x1b + +# GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0xd6,0xfb] +0x00,0x13,0xc9,0xd3,0x00,0x03,0xd6,0xfb + +# GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], a[1:4] ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xca,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xca,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], a[1:4] ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xca,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xca,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], a[1:4] ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xca,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xca,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], a[1:4] ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xca,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xca,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0xd6,0x03] +0x00,0x00,0xca,0xd3,0x00,0x03,0xd6,0x03 + +# GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0xd6,0xe3] +0x00,0x13,0xca,0xd3,0x00,0x03,0xd6,0xe3 + +# GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0xd6,0x13] +0x00,0x00,0xca,0xd3,0x00,0x03,0xd6,0x13 + +# GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0xd6,0xf3] +0x00,0x13,0xca,0xd3,0x00,0x03,0xd6,0xf3 + +# GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0xd6,0x0b] +0x00,0x00,0xca,0xd3,0x00,0x03,0xd6,0x0b + +# GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0xd6,0xeb] +0x00,0x13,0xca,0xd3,0x00,0x03,0xd6,0xeb + +# GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0xd6,0x1b] +0x00,0x00,0xca,0xd3,0x00,0x03,0xd6,0x1b + +# GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0xd6,0xfb] +0x00,0x13,0xca,0xd3,0x00,0x03,0xd6,0xfb + +# GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], a[1:16] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xcc,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xcc,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], a[1:16] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xcc,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xcc,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], a[1:16] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xcc,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xcc,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], a[1:16] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xcc,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xcc,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0xd6,0x03] +0x00,0x00,0xcc,0xd3,0x00,0x03,0xd6,0x03 + +# GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0xd6,0xe3] +0x00,0x13,0xcc,0xd3,0x00,0x03,0xd6,0xe3 + +# GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0xd6,0x13] +0x00,0x00,0xcc,0xd3,0x00,0x03,0xd6,0x13 + +# GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0xd6,0xf3] +0x00,0x13,0xcc,0xd3,0x00,0x03,0xd6,0xf3 + +# GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0xd6,0x0b] +0x00,0x00,0xcc,0xd3,0x00,0x03,0xd6,0x0b + +# GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0xd6,0xeb] +0x00,0x13,0xcc,0xd3,0x00,0x03,0xd6,0xeb + +# GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0xd6,0x1b] +0x00,0x00,0xcc,0xd3,0x00,0x03,0xd6,0x1b + +# GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0xd6,0xfb] +0x00,0x13,0xcc,0xd3,0x00,0x03,0xd6,0xfb + +# GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], a[1:4] ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xcd,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xcd,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], a[1:4] ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xcd,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xcd,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], a[1:4] ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xcd,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xcd,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], a[1:4] ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xcd,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xcd,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0xd6,0x03] +0x00,0x00,0xcd,0xd3,0x00,0x03,0xd6,0x03 + +# GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0xd6,0xe3] +0x00,0x13,0xcd,0xd3,0x00,0x03,0xd6,0xe3 + +# GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0xd6,0x13] +0x00,0x00,0xcd,0xd3,0x00,0x03,0xd6,0x13 + +# GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0xd6,0xf3] +0x00,0x13,0xcd,0xd3,0x00,0x03,0xd6,0xf3 + +# GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], -2.0 ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0xd6,0x0b] +0x00,0x00,0xcd,0xd3,0x00,0x03,0xd6,0x0b + +# GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], v[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0xd6,0xeb] +0x00,0x13,0xcd,0xd3,0x00,0x03,0xd6,0xeb + +# GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0xd6,0x1b] +0x00,0x00,0xcd,0xd3,0x00,0x03,0xd6,0x1b + +# GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0xd6,0xfb] +0x00,0x13,0xcd,0xd3,0x00,0x03,0xd6,0xfb + +# GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[1:32] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xd0,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xd0,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[1:32] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xd0,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xd0,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[1:32] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xd0,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xd0,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[1:32] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xd0,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xd0,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, 2 ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x0a,0x02] +0x00,0x00,0xd0,0xd3,0x00,0x03,0x0a,0x02 + +# GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xe2] +0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xe2 + +# GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, a1, 2 ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x0a,0x12] +0x00,0x00,0xd0,0xd3,0x00,0x03,0x0a,0x12 + +# GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xf2] +0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xf2 + +# GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, v1, 2 ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x0a,0x0a] +0x00,0x00,0xd0,0xd3,0x00,0x03,0x0a,0x0a + +# GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xea] +0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xea + +# GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x0a,0x1a] +0x00,0x00,0xd0,0xd3,0x00,0x03,0x0a,0x1a + +# GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xfa] +0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xfa + +# GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xd1,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xd1,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xd1,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xd1,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xd1,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xd1,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xd1,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xd1,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, 2 ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x0a,0x02] +0x00,0x00,0xd1,0xd3,0x00,0x03,0x0a,0x02 + +# GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x0a,0xe2] +0x00,0x13,0xd1,0xd3,0x00,0x03,0x0a,0xe2 + +# GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, a1, 2 ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x0a,0x12] +0x00,0x00,0xd1,0xd3,0x00,0x03,0x0a,0x12 + +# GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x0a,0xf2] +0x00,0x13,0xd1,0xd3,0x00,0x03,0x0a,0xf2 + +# GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, v1, 2 ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x0a,0x0a] +0x00,0x00,0xd1,0xd3,0x00,0x03,0x0a,0x0a + +# GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x0a,0xea] +0x00,0x13,0xd1,0xd3,0x00,0x03,0x0a,0xea + +# GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x0a,0x1a] +0x00,0x00,0xd1,0xd3,0x00,0x03,0x0a,0x1a + +# GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x0a,0xfa] +0x00,0x13,0xd1,0xd3,0x00,0x03,0x0a,0xfa + +# GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xd2,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xd2,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, a1, a[1:4] ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xd2,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xd2,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, v1, a[1:4] ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xd2,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xd2,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, a1, a[1:4] ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xd2,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xd2,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, v1, 2 ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x0a,0x02] +0x00,0x00,0xd2,0xd3,0x00,0x03,0x0a,0x02 + +# GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x0a,0xe2] +0x00,0x13,0xd2,0xd3,0x00,0x03,0x0a,0xe2 + +# GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, a1, 2 ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x0a,0x12] +0x00,0x00,0xd2,0xd3,0x00,0x03,0x0a,0x12 + +# GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x0a,0xf2] +0x00,0x13,0xd2,0xd3,0x00,0x03,0x0a,0xf2 + +# GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, v1, 2 ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x0a,0x0a] +0x00,0x00,0xd2,0xd3,0x00,0x03,0x0a,0x0a + +# GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x0a,0xea] +0x00,0x13,0xd2,0xd3,0x00,0x03,0x0a,0xea + +# GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x0a,0x1a] +0x00,0x00,0xd2,0xd3,0x00,0x03,0x0a,0x1a + +# GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x0a,0xfa] +0x00,0x13,0xd2,0xd3,0x00,0x03,0x0a,0xfa + +# GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xd4,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xd4,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xd4,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xd4,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xd4,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xd4,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xd4,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xd4,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, 2 ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x0a,0x02] +0x00,0x00,0xd4,0xd3,0x00,0x03,0x0a,0x02 + +# GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x0a,0xe2] +0x00,0x13,0xd4,0xd3,0x00,0x03,0x0a,0xe2 + +# GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, a1, 2 ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x0a,0x12] +0x00,0x00,0xd4,0xd3,0x00,0x03,0x0a,0x12 + +# GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x0a,0xf2] +0x00,0x13,0xd4,0xd3,0x00,0x03,0x0a,0xf2 + +# GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, v1, 2 ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x0a,0x0a] +0x00,0x00,0xd4,0xd3,0x00,0x03,0x0a,0x0a + +# GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x0a,0xea] +0x00,0x13,0xd4,0xd3,0x00,0x03,0x0a,0xea + +# GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x0a,0x1a] +0x00,0x00,0xd4,0xd3,0x00,0x03,0x0a,0x1a + +# GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x0a,0xfa] +0x00,0x13,0xd4,0xd3,0x00,0x03,0x0a,0xfa + +# GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xd5,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xd5,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, a1, a[1:4] ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xd5,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xd5,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, v1, a[1:4] ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xd5,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xd5,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, a1, a[1:4] ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xd5,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xd5,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, v1, 2 ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x0a,0x02] +0x00,0x00,0xd5,0xd3,0x00,0x03,0x0a,0x02 + +# GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x0a,0xe2] +0x00,0x13,0xd5,0xd3,0x00,0x03,0x0a,0xe2 + +# GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, a1, 2 ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x0a,0x12] +0x00,0x00,0xd5,0xd3,0x00,0x03,0x0a,0x12 + +# GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x0a,0xf2] +0x00,0x13,0xd5,0xd3,0x00,0x03,0x0a,0xf2 + +# GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, v1, 2 ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x0a,0x0a] +0x00,0x00,0xd5,0xd3,0x00,0x03,0x0a,0x0a + +# GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, v1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x0a,0xea] +0x00,0x13,0xd5,0xd3,0x00,0x03,0x0a,0xea + +# GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, a1, 2 ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x0a,0x1a] +0x00,0x00,0xd5,0xd3,0x00,0x03,0x0a,0x1a + +# GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x0a,0xfa] +0x00,0x13,0xd5,0xd3,0x00,0x03,0x0a,0xfa + +# GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[1:32] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xe8,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xe8,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[1:32] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xe8,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xe8,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[1:32] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xe8,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xe8,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[1:32] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xe8,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xe8,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, -2.0 ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0xd6,0x03] +0x00,0x00,0xe8,0xd3,0x00,0x03,0xd6,0x03 + +# GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0xd6,0xe3] +0x00,0x13,0xe8,0xd3,0x00,0x03,0xd6,0xe3 + +# GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, -2.0 ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0xd6,0x13] +0x00,0x00,0xe8,0xd3,0x00,0x03,0xd6,0x13 + +# GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0xd6,0xf3] +0x00,0x13,0xe8,0xd3,0x00,0x03,0xd6,0xf3 + +# GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, -2.0 ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0xd6,0x0b] +0x00,0x00,0xe8,0xd3,0x00,0x03,0xd6,0x0b + +# GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0xd6,0xeb] +0x00,0x13,0xe8,0xd3,0x00,0x03,0xd6,0xeb + +# GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0xd6,0x1b] +0x00,0x00,0xe8,0xd3,0x00,0x03,0xd6,0x1b + +# GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0xd6,0xfb] +0x00,0x13,0xe8,0xd3,0x00,0x03,0xd6,0xfb + +# GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xe9,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xe9,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xe9,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xe9,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xe9,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xe9,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xe9,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xe9,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, -2.0 ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0xd6,0x03] +0x00,0x00,0xe9,0xd3,0x00,0x03,0xd6,0x03 + +# GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0xd6,0xe3] +0x00,0x13,0xe9,0xd3,0x00,0x03,0xd6,0xe3 + +# GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, -2.0 ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0xd6,0x13] +0x00,0x00,0xe9,0xd3,0x00,0x03,0xd6,0x13 + +# GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0xd6,0xf3] +0x00,0x13,0xe9,0xd3,0x00,0x03,0xd6,0xf3 + +# GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, -2.0 ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0xd6,0x0b] +0x00,0x00,0xe9,0xd3,0x00,0x03,0xd6,0x0b + +# GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0xd6,0xeb] +0x00,0x13,0xe9,0xd3,0x00,0x03,0xd6,0xeb + +# GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0xd6,0x1b] +0x00,0x00,0xe9,0xd3,0x00,0x03,0xd6,0x1b + +# GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0xd6,0xfb] +0x00,0x13,0xe9,0xd3,0x00,0x03,0xd6,0xfb + +# GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xeb,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xeb,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, a[1:4] ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xeb,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xeb,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, a[1:4] ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xeb,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xeb,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, a[1:4] ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xeb,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xeb,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, -2.0 ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0xd6,0x03] +0x00,0x00,0xeb,0xd3,0x00,0x03,0xd6,0x03 + +# GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0xd6,0xe3] +0x00,0x13,0xeb,0xd3,0x00,0x03,0xd6,0xe3 + +# GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, -2.0 ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0xd6,0x13] +0x00,0x00,0xeb,0xd3,0x00,0x03,0xd6,0x13 + +# GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0xd6,0xf3] +0x00,0x13,0xeb,0xd3,0x00,0x03,0xd6,0xf3 + +# GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, -2.0 ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0xd6,0x0b] +0x00,0x00,0xeb,0xd3,0x00,0x03,0xd6,0x0b + +# GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0xd6,0xeb] +0x00,0x13,0xeb,0xd3,0x00,0x03,0xd6,0xeb + +# GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0xd6,0x1b] +0x00,0x00,0xeb,0xd3,0x00,0x03,0xd6,0x1b + +# GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0xd6,0xfb] +0x00,0x13,0xeb,0xd3,0x00,0x03,0xd6,0xfb + +# GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xec,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xec,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xec,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xec,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xec,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xec,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xec,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xec,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, -2.0 ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0xd6,0x03] +0x00,0x00,0xec,0xd3,0x00,0x03,0xd6,0x03 + +# GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0xd6,0xe3] +0x00,0x13,0xec,0xd3,0x00,0x03,0xd6,0xe3 + +# GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, -2.0 ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0xd6,0x13] +0x00,0x00,0xec,0xd3,0x00,0x03,0xd6,0x13 + +# GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0xd6,0xf3] +0x00,0x13,0xec,0xd3,0x00,0x03,0xd6,0xf3 + +# GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, -2.0 ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0xd6,0x0b] +0x00,0x00,0xec,0xd3,0x00,0x03,0xd6,0x0b + +# GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0xd6,0xeb] +0x00,0x13,0xec,0xd3,0x00,0x03,0xd6,0xeb + +# GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0xd6,0x1b] +0x00,0x00,0xec,0xd3,0x00,0x03,0xd6,0x1b + +# GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0xd6,0xfb] +0x00,0x13,0xec,0xd3,0x00,0x03,0xd6,0xfb + +# GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0x06,0x04] +0x00,0x00,0xed,0xd3,0x00,0x03,0x06,0x04 + +# GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0x06,0xe4] +0x00,0x13,0xed,0xd3,0x00,0x03,0x06,0xe4 + +# GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, a[1:4] ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0x06,0x14] +0x00,0x00,0xed,0xd3,0x00,0x03,0x06,0x14 + +# GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0x06,0xf4] +0x00,0x13,0xed,0xd3,0x00,0x03,0x06,0xf4 + +# GFX908: v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, a[1:4] ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0x06,0x0c] +0x00,0x00,0xed,0xd3,0x00,0x03,0x06,0x0c + +# GFX908: v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0x06,0xec] +0x00,0x13,0xed,0xd3,0x00,0x03,0x06,0xec + +# GFX908: v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, a[1:4] ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0x06,0x1c] +0x00,0x00,0xed,0xd3,0x00,0x03,0x06,0x1c + +# GFX908: v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0x06,0xfc] +0x00,0x13,0xed,0xd3,0x00,0x03,0x06,0xfc + +# GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, -2.0 ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0xd6,0x03] +0x00,0x00,0xed,0xd3,0x00,0x03,0xd6,0x03 + +# GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0xd6,0xe3] +0x00,0x13,0xed,0xd3,0x00,0x03,0xd6,0xe3 + +# GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, -2.0 ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0xd6,0x13] +0x00,0x00,0xed,0xd3,0x00,0x03,0xd6,0x13 + +# GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0xd6,0xf3] +0x00,0x13,0xed,0xd3,0x00,0x03,0xd6,0xf3 + +# GFX908: v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, -2.0 ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0xd6,0x0b] +0x00,0x00,0xed,0xd3,0x00,0x03,0xd6,0x0b + +# GFX908: v_mfma_f32_16x16x8bf16 a[0:3], a0, v1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0xd6,0xeb] +0x00,0x13,0xed,0xd3,0x00,0x03,0xd6,0xeb + +# GFX908: v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0xd6,0x1b] +0x00,0x00,0xed,0xd3,0x00,0x03,0xd6,0x1b + +# GFX908: v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0xd6,0xfb] +0x00,0x13,0xed,0xd3,0x00,0x03,0xd6,0xfb