diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1648,6 +1648,9 @@ def HasTrue16BitInsts : Predicate<"Subtarget->hasTrue16BitInsts()">, AssemblerPredicate<(all_of FeatureTrue16BitInsts)>; +def UseTrue16BitInsts : Predicate<"Subtarget->useTrue16BitInsts()">; +def UseFake16BitInsts : Predicate<"Subtarget->hasTrue16BitInsts() && " + "!Subtarget->useTrue16BitInsts()">; def NotHasTrue16BitInsts : Predicate<"!Subtarget->hasTrue16BitInsts()">; def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -144,6 +144,17 @@ return MCDisassembler::Fail; } + template + DecodeStatus tryDecodeInst(const uint8_t *Table1, const uint8_t *Table2, + MCInst &MI, InsnType Inst, uint64_t Address, + raw_ostream &Comments) const { + for (const uint8_t *T : {Table1, Table2}) { + if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments)) + return Res; + } + return MCDisassembler::Fail; + } + std::optional onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const override; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -418,11 +418,14 @@ // encodings if (isGFX11Plus() && Bytes.size() >= 12 ) { DecoderUInt128 DecW = eat12Bytes(Bytes); - Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW, Address, CS); + Res = + tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696, + MI, DecW, Address, CS); if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) break; MI = MCInst(); // clear - Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW, Address, CS); + Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696, + MI, DecW, Address, CS); if (Res) { if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) convertVOP3PDPPInst(MI); @@ -461,7 +464,8 @@ break; MI = MCInst(); // clear - Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address, CS); + Res = tryDecodeInst(DecoderTableDPP8GFX1164, + DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS); if (Res && convertDPP8Inst(MI) == MCDisassembler::Success) break; MI = MCInst(); // clear @@ -469,7 +473,8 @@ Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address, CS); + Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664, + MI, QW, Address, CS); if (Res) { if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) convertVOPCDPPInst(MI); @@ -530,7 +535,8 @@ Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address, CS); + Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW, + Address, CS); if (Res) break; if (Bytes.size() < 4) break; @@ -560,7 +566,8 @@ Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS); if (Res) break; - Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address, CS); + Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW, + Address, CS); if (Res) break; diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1340,6 +1340,7 @@ case AMDGPU::V_MAX_F32_e64: case AMDGPU::V_MAX_F16_e64: case AMDGPU::V_MAX_F16_t16_e64: + case AMDGPU::V_MAX_F16_not16_e64: case AMDGPU::V_MAX_F64_e64: case AMDGPU::V_PK_MAX_F16: { if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm()) @@ -1435,7 +1436,8 @@ } } case AMDGPU::V_MUL_F16_e64: - case AMDGPU::V_MUL_F16_t16_e64: { + case AMDGPU::V_MUL_F16_t16_e64: + case AMDGPU::V_MUL_F16_not16_e64: { switch (static_cast(Val)) { case 0x3800: // 0.5 return SIOutMods::DIV2; @@ -1462,12 +1464,14 @@ case AMDGPU::V_MUL_F64_e64: case AMDGPU::V_MUL_F32_e64: case AMDGPU::V_MUL_F16_t16_e64: + case AMDGPU::V_MUL_F16_not16_e64: case AMDGPU::V_MUL_F16_e64: { // If output denormals are enabled, omod is ignored. if ((Op == AMDGPU::V_MUL_F32_e64 && MFI->getMode().FP32Denormals.Output != DenormalMode::PreserveSign) || ((Op == AMDGPU::V_MUL_F64_e64 || Op == AMDGPU::V_MUL_F16_e64 || - Op == AMDGPU::V_MUL_F16_t16_e64) && + Op == AMDGPU::V_MUL_F16_t16_e64 || + Op == AMDGPU::V_MUL_F16_not16_e64) && MFI->getMode().FP64FP16Denormals.Output != DenormalMode::PreserveSign)) return std::pair(nullptr, SIOutMods::NONE); @@ -1497,12 +1501,14 @@ case AMDGPU::V_ADD_F64_e64: case AMDGPU::V_ADD_F32_e64: case AMDGPU::V_ADD_F16_e64: - case AMDGPU::V_ADD_F16_t16_e64: { + case AMDGPU::V_ADD_F16_t16_e64: + case AMDGPU::V_ADD_F16_not16_e64: { // If output denormals are enabled, omod is ignored. if ((Op == AMDGPU::V_ADD_F32_e64 && MFI->getMode().FP32Denormals.Output != DenormalMode::PreserveSign) || ((Op == AMDGPU::V_ADD_F64_e64 || Op == AMDGPU::V_ADD_F16_e64 || - Op == AMDGPU::V_ADD_F16_t16_e64) && + Op == AMDGPU::V_ADD_F16_t16_e64 || + Op == AMDGPU::V_ADD_F16_not16_e64) && MFI->getMode().FP64FP16Denormals.Output != DenormalMode::PreserveSign)) return std::pair(nullptr, SIOutMods::NONE); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2253,6 +2253,7 @@ field list ArgVT = _ArgVT; field bit EnableClamp = _EnableClamp; field bit IsTrue16 = 0; + field bit IsFake16 = 0; field ValueType DstVT = ArgVT[0]; field ValueType Src0VT = ArgVT[1]; @@ -2457,6 +2458,21 @@ let Src2ModDPP = getSrcModDPP_t16.ret; } +class VOPProfile_Fake16 : VOPProfile { + let IsTrue16 = 1; + let IsFake16 = 1; + // Most DstVT are 16-bit, but not all + let DstRC = getVALUDstForVT_t16.ret; + let DstRC64 = getVALUDstForVT.ret; + let Src1RC32 = RegisterOperand.ret>; + let Src0DPP = getVregSrcForVT_t16.ret; + let Src1DPP = getVregSrcForVT_t16.ret; + let Src2DPP = getVregSrcForVT_t16.ret; + let Src0ModDPP = getSrcModDPP_t16.ret; + let Src1ModDPP = getSrcModDPP_t16.ret; + let Src2ModDPP = getSrcModDPP_t16.ret; +} + def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>; def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1627,8 +1627,10 @@ def : ClampPat; let SubtargetPredicate = NotHasTrue16BitInsts in def : ClampPat; -let SubtargetPredicate = HasTrue16BitInsts in +let SubtargetPredicate = UseTrue16BitInsts in def : ClampPat; +let SubtargetPredicate = UseFake16BitInsts in +def : ClampPat; let SubtargetPredicate = HasVOP3PInsts in { def : GCNPat < @@ -2674,12 +2676,12 @@ let OtherPredicates = [HasTrue16BitInsts] in { def : GCNPat< (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), - (V_MUL_F16_t16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src) + (V_MUL_F16_not16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src) >; def : GCNPat< (fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))), - (V_MUL_F16_t16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src) + (V_MUL_F16_not16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src) >; } // End OtherPredicates diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -152,7 +152,7 @@ defm NAME : VOP1Inst; } let OtherPredicates = [HasTrue16BitInsts] in { - defm _t16 : VOP1Inst, node>; + defm _t16 : VOP1Inst, node>; } } @@ -170,7 +170,7 @@ } class VOPProfileI2F_True16 : - VOPProfile_True16> { + VOPProfile_Fake16> { let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod); @@ -199,7 +199,7 @@ def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF; def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF; def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF; -def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_True16 { +def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16 { let HasOMod = 1; } @@ -292,13 +292,13 @@ let OtherPredicates = [NotHasTrue16BitInsts] in defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>; let OtherPredicates = [HasTrue16BitInsts] in - defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16, any_fpround>; + defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_Fake16, any_fpround>; } // End FPDPRounding = 1, isReMaterializable = 0 let OtherPredicates = [NotHasTrue16BitInsts] in defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>; let OtherPredicates = [HasTrue16BitInsts] in -defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16, any_fpextend>; +defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_Fake16, any_fpextend>; let ReadsModeReg = 0, mayRaiseFPException = 0 in { defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -194,9 +194,12 @@ let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in { defm NAME : VOP2Inst; } - let SubtargetPredicate = HasTrue16BitInsts in { + let SubtargetPredicate = UseTrue16BitInsts in { defm _t16 : VOP2Inst, node, revOp#"_t16", GFX9Renamed>; } + let SubtargetPredicate = UseFake16BitInsts in { + defm _not16 : VOP2Inst, node, revOp#"_not16", GFX9Renamed>; + } } // Creating a _t16_e32 pseudo when there is no corresponding real instruction on @@ -212,7 +215,7 @@ defm NAME : VOP2Inst; } let SubtargetPredicate = HasTrue16BitInsts in { - defm _t16 : VOP2Inst_e64, node, revOp#"_t16", GFX9Renamed>; + defm _t16 : VOP2Inst_e64, node, revOp#"_t16", GFX9Renamed>; } } @@ -874,7 +877,7 @@ let HasSrc1FloatMods = 0; let Src1ModSDWA = Int16SDWAInputMods; } -def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16 { +def LDEXP_F16_VOPProfile_True16 : VOPProfile_Fake16 { let Src1RC32 = RegisterOperand; let Src1DPP = VGPR_32_Lo128; let Src1ModDPP = IntT16VRegInputMods; @@ -925,9 +928,9 @@ let SubtargetPredicate = isGFX11Plus in { let isCommutable = 1 in { - defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16, and>; - defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16, or>; - defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16, xor>; + defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_Fake16, and>; + defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_Fake16, or>; + defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_Fake16, xor>; } // End isCommutable = 1 } // End SubtargetPredicate = isGFX11Plus @@ -1307,6 +1310,7 @@ multiclass VOP2_Real_e32_with_name_gfx11 op, string opName, string asmName, bit single = 0> { defvar ps = !cast(opName#"_e32"); + let DecoderNamespace = !if(ps.Pfl.IsFake16, "GFX11_FAKE16", "GFX11") in def _e32_gfx11 : VOP2_Real, VOP2e { @@ -1331,7 +1335,7 @@ def _dpp_gfx11 : VOP2_DPP16(opName#"_dpp"), SIEncodingFamily.GFX11> { let AsmString = asmName # ps.Pfl.AsmDPP16; - let DecoderNamespace = "DPPGFX11"; + let DecoderNamespace = !if(ps.Pfl.IsFake16, "DPPGFX11_FAKE16", "DPPGFX11"); } } multiclass VOP2_Real_dpp8_with_name_gfx11 op, string opName, @@ -1340,7 +1344,7 @@ if ps.Pfl.HasExtDPP then def _dpp8_gfx11 : VOP2_DPP8 { let AsmString = asmName # ps.Pfl.AsmDPP8; - let DecoderNamespace = "DPP8GFX11"; + let DecoderNamespace = !if(ps.Pfl.IsFake16, "DPP8GFX11_FAKE16", "DPP8GFX11"); } } @@ -1491,13 +1495,19 @@ defm V_PK_FMAC_F16 : VOP2Only_Real_gfx11<0x03c>; defm V_ADD_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x032, "v_add_f16">; +defm V_ADD_F16_not16 : VOP2_Real_FULL_t16_gfx11<0x032, "v_add_f16">; defm V_SUB_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x033, "v_sub_f16">; +defm V_SUB_F16_not16 : VOP2_Real_FULL_t16_gfx11<0x033, "v_sub_f16">; defm V_SUBREV_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x034, "v_subrev_f16">; +defm V_SUBREV_F16_not16 : VOP2_Real_FULL_t16_gfx11<0x034, "v_subrev_f16">; defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x035, "v_mul_f16">; +defm V_MUL_F16_not16 : VOP2_Real_FULL_t16_gfx11<0x035, "v_mul_f16">; defm V_FMAC_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x036, "v_fmac_f16">; defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03b, "v_ldexp_f16">; defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">; +defm V_MAX_F16_not16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">; defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">; +defm V_MIN_F16_not16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">; defm V_FMAMK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x037, "v_fmamk_f16">; defm V_FMAAK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x038, "v_fmaak_f16">; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -1358,6 +1358,7 @@ VOP3_Real, VOP3OpSel_gfx11; if !not(ps.Pfl.HasOpSel) then + let DecoderNamespace = !if(ps.Pfl.IsFake16, "GFX11_FAKE16", "GFX11") in def _e64_gfx11 : VOP3_Real, VOP3e_gfx11; @@ -1388,7 +1389,8 @@ multiclass VOP3_Real_dpp_with_name_gfx11 op, string opName, string asmName> { defvar ps = !cast(opName#"_e64"); - let AsmString = asmName # ps.Pfl.AsmVOP3DPP16, DecoderNamespace = "DPPGFX11" in { + let AsmString = asmName # ps.Pfl.AsmVOP3DPP16, + DecoderNamespace = !if(ps.Pfl.IsFake16, "DPPGFX11_FAKE16", "DPPGFX11") in { defm NAME : VOP3_Real_dpp_Base_gfx11; } } @@ -1411,7 +1413,8 @@ multiclass VOP3_Real_dpp8_with_name_gfx11 op, string opName, string asmName> { defvar ps = !cast(opName#"_e64"); - let AsmString = asmName # ps.Pfl.AsmVOP3DPP8, DecoderNamespace = "DPP8GFX11" in { + let AsmString = asmName # ps.Pfl.AsmVOP3DPP8, + DecoderNamespace = !if(ps.Pfl.IsFake16, "DPP8GFX11_FAKE16", "DPP8GFX11") in { defm NAME : VOP3_Real_dpp8_Base_gfx11; } } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir @@ -18,15 +18,16 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] + ; ; GFX11-LABEL: name: fmaxnum_ieee_f16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[V_MAX_F16_not16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_not16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_not16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -49,15 +50,16 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %5 + ; CHECK-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] + ; ; GFX11-LABEL: name: fmaxnum_ieee_f16_v_fneg_v ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %5 + ; GFX11-NEXT: [[V_MAX_F16_not16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_not16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_not16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir @@ -18,15 +18,16 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] + ; ; GFX11-LABEL: name: fmaxnum_f16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[V_MAX_F16_not16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_not16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_not16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -49,15 +50,16 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %5 + ; CHECK-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] + ; ; GFX11-LABEL: name: fmaxnum_f16_v_fneg_v ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %5 + ; GFX11-NEXT: [[V_MAX_F16_not16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_not16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAX_F16_not16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir @@ -18,15 +18,16 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]] + ; ; GFX11-LABEL: name: fminnum_ieee_f16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[V_MIN_F16_not16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_not16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_not16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -49,15 +50,16 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %5 + ; CHECK-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]] + ; ; GFX11-LABEL: name: fminnum_ieee_f16_v_fneg_v ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %5 + ; GFX11-NEXT: [[V_MIN_F16_not16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_not16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_not16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir @@ -18,15 +18,16 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %4 + ; CHECK-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]] + ; ; GFX11-LABEL: name: fminnum_f16_vv ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %4:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %4 + ; GFX11-NEXT: [[V_MIN_F16_not16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_not16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_not16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -49,15 +50,16 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit %5 + ; CHECK-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]] + ; ; GFX11-LABEL: name: fminnum_f16_v_fneg_v ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: %5:vgpr_32 = nofpexcept V_MIN_F16_t16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit %5 + ; GFX11-NEXT: [[V_MIN_F16_not16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_not16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MIN_F16_not16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0