diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -6894,6 +6894,7 @@ Opc == AMDGPU::V_MAC_F16_e64_vi || Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || Opc == AMDGPU::V_FMAC_F32_e64_vi || + Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { auto it = Inst.begin(); std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -390,6 +390,7 @@ MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi || MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi || MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 || + MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) { // Insert dummy unused src2_modifiers. insertNamedMCOperand(MI, MCOperand::createImm(0), diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -672,14 +672,23 @@ defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32>; let Constraints = "$vdst = $src2", - DisableEncoding="$src2", + DisableEncoding = "$src2", isConvertibleToThreeAddress = 1, - isCommutable = 1 in { + isCommutable = 1 in defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>; -} } // End SubtargetPredicate = HasDLInsts +let SubtargetPredicate = HasNoMadMacF32Insts in { + +let Constraints = "$vdst = $src2", + DisableEncoding = "$src2", + isConvertibleToThreeAddress = 1, + isCommutable = 1 in +defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>; + +} // End SubtargetPredicate = HasNoMadMacF32Insts + let Constraints = "$vdst = $src2", DisableEncoding="$src2", isConvertibleToThreeAddress = 1, @@ -1147,21 +1156,25 @@ VOP2_Real_dpp_gfx10_with_name, VOP2_Real_dpp8_gfx10_with_name; -defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>; -defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>; -defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>; -defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10<0x02d>; -defm V_ADD_F16 : VOP2_Real_gfx10<0x032>; -defm V_SUB_F16 : VOP2_Real_gfx10<0x033>; -defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>; -defm V_MUL_F16 : VOP2_Real_gfx10<0x035>; -defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>; -defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>; -defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; -defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; -defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; -defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; -defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; +// NB: Same opcode as v_mac_legacy_f32 +let DecoderNamespace = "GFX10_B" in +defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>; + +defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>; +defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>; +defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>; +defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10<0x02d>; +defm V_ADD_F16 : VOP2_Real_gfx10<0x032>; +defm V_SUB_F16 : VOP2_Real_gfx10<0x033>; +defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>; +defm V_MUL_F16 : VOP2_Real_gfx10<0x035>; +defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>; +defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>; +defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; +defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; +defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; +defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; +defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; // VOP2 no carry-in, carry-out. defm V_ADD_NC_U32 : diff --git a/llvm/test/MC/AMDGPU/gfx1011_err.s b/llvm/test/MC/AMDGPU/gfx1011_err.s --- a/llvm/test/MC/AMDGPU/gfx1011_err.s +++ b/llvm/test/MC/AMDGPU/gfx1011_err.s @@ -22,6 +22,9 @@ v_fma_legacy_f32 v0, v1, v2, v3 // GFX10: error: instruction not supported on this GPU +v_fmac_legacy_f32 v0, v1, v2 +// GFX10: error: instruction not supported on this GPU + image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] // GFX10: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1030_new.s b/llvm/test/MC/AMDGPU/gfx1030_new.s --- a/llvm/test/MC/AMDGPU/gfx1030_new.s +++ b/llvm/test/MC/AMDGPU/gfx1030_new.s @@ -61,6 +61,15 @@ v_fma_legacy_f32 v0, s1, 2.0, -v3 // GFX10: encoding: [0x00,0x00,0x40,0xd5,0x01,0xe8,0x0d,0x84] +v_fmac_legacy_f32 v0, v1, v2 +// GFX10: encoding: [0x01,0x05,0x00,0x0c] + +v_fmac_legacy_f32 v0, |v1|, -v2 +// GFX10: encoding: [0x00,0x01,0x06,0xd5,0x01,0x05,0x02,0x40] + +v_fmac_legacy_f32 v0, s1, 2.0 +// GFX10: encoding: [0x00,0x00,0x06,0xd5,0x01,0xe8,0x01,0x00] + image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] // GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1030_unsupported.s b/llvm/test/MC/AMDGPU/gfx1030_unsupported.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1030_unsupported.s @@ -0,0 +1,16 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s + +//===----------------------------------------------------------------------===// +// Unsupported dpp variants. +//===----------------------------------------------------------------------===// + +v_fmac_legacy_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: dpp variant of this instruction is not supported + +//===----------------------------------------------------------------------===// +// Unsupported sdwa variants. +//===----------------------------------------------------------------------===// + +v_fmac_legacy_f32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt @@ -52,6 +52,15 @@ # GFX10: v_fma_legacy_f32 v0, s1, 2.0, -v3 0x00,0x00,0x40,0xd5,0x01,0xe8,0x0d,0x84 +# GFX10: v_fmac_legacy_f32_e32 v0, v1, v2 +0x01,0x05,0x00,0x0c + +# GFX10: v_fmac_legacy_f32_e64 v0, |v1|, -v2 +0x00,0x01,0x06,0xd5,0x01,0x05,0x02,0x40 + +# GFX10: v_fmac_legacy_f32_e64 v0, s1, 2.0 +0x00,0x00,0x06,0xd5,0x01,0xe8,0x01,0x00 + # GFX10: image_bvh_intersect_ray v[4:7], v[9:24], s[4:7] 0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00