diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -1552,6 +1552,10 @@ VOP2_Real(NAME#"_e32"), SIEncodingFamily.GFX10>, VOP2e(NAME#"_e32").Pfl>; } + multiclass VOP2Only_Real_e32_gfx10 op> { + let IsSingle = 1 in + defm NAME: VOP2_Real_e32_gfx10; + } multiclass VOP2_Real_e64_gfx10 op> { def _e64_gfx10 : VOP3_Real(NAME#"_e64"), SIEncodingFamily.GFX10>, @@ -1763,6 +1767,11 @@ VOP2be_Real_dpp_gfx10, VOP2be_Real_dpp8_gfx10; +multiclass VOP2Only_Real_gfx10 op> : + VOP2Only_Real_e32_gfx10, + VOP2_Real_dpp_gfx10, + VOP2_Real_dpp8_gfx10; + multiclass VOP2_Real_gfx10 op> : VOP2_Real_e32_gfx10, VOP2_Real_e64_gfx10, VOP2_Real_sdwa_gfx10, VOP2_Real_dpp_gfx10, VOP2_Real_dpp8_gfx10; @@ -1802,9 +1811,9 @@ defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; -let IsSingle = 1 in { - defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; -} +let SubtargetPredicate = HasPkFmacF16Inst in { +defm V_PK_FMAC_F16 : VOP2Only_Real_gfx10<0x03c>; +} // End SubtargetPredicate = HasPkFmacF16Inst // VOP2 no carry-in, carry-out. defm V_ADD_NC_U32 : @@ -2144,6 +2153,26 @@ } } +multiclass VOP2_Real_e32_gfx9 op> { + def _e32_gfx9 : + VOP2_Real(NAME#"_e32"), SIEncodingFamily.GFX9>, + VOP2e(NAME#"_e32").Pfl>{ + let DecoderNamespace = "GFX9"; + } +} + +multiclass VOP2Only_Real_e32_gfx9 op> { + let IsSingle = 1 in + defm NAME: VOP2_Real_e32_gfx9; +} + +multiclass VOP2_Real_dpp_gfx9 op> { + if !cast(NAME#"_e32").Pfl.HasExt32BitDPP then + def _dpp_gfx9 : VOP2_DPP16(NAME#"_dpp"), SIEncodingFamily.GFX9> { + let DecoderNamespace = "SDWA9"; + } +} + } // AssemblerPredicate = isGFX9Only multiclass VOP2_Real_e32e64_vi op> : @@ -2155,6 +2184,11 @@ VOP2_DPPe(NAME#"_dpp")>; } +multiclass VOP2Only_Real_gfx9 op> : + VOP2Only_Real_e32_gfx9, + VOP2_Real_dpp_gfx9; + + defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>; defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; @@ -2234,6 +2268,10 @@ defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>; defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>; +let SubtargetPredicate = HasPkFmacF16Inst in { +defm V_PK_FMAC_F16 : VOP2Only_Real_gfx9<0x03c>; +} // End SubtargetPredicate = HasPkFmacF16Inst + let SubtargetPredicate = isGFX8GFX9 in { // Aliases to simplify matching of floating-point instructions that diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s b/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s --- a/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s @@ -309,6 +309,9 @@ v_or_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 // GFX10: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0x00] +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX10: [0xfa,0x04,0x0a,0x78,0x01,0x1b,0x00,0x00] + v_xor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 // GFX10: [0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0x00] @@ -597,6 +600,9 @@ v_or_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 // GFX10: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x04,0x00] +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX10: [0xfa,0x04,0x0a,0x78,0x01,0x1b,0x04,0x00] + v_xor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 // GFX10: [0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x04,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s --- a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s @@ -225,6 +225,9 @@ v_or_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] // GFX10: encoding: [0xe9,0x04,0x0a,0x38,0x01,0x88,0xc6,0xfa] +v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] +// GFX10: encoding: [0xe9,0x04,0x0a,0x78,0x01,0x88,0xc6,0xfa] + v_xor_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] // GFX10: encoding: [0xe9,0x04,0x0a,0x3a,0x01,0x88,0xc6,0xfa] @@ -477,6 +480,9 @@ v_or_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 // GFX10: encoding: [0xea,0x04,0x0a,0x38,0x01,0x88,0xc6,0xfa] +v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX10: encoding: [0xea,0x04,0x0a,0x78,0x01,0x88,0xc6,0xfa] + v_xor_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 // GFX10: encoding: [0xea,0x04,0x0a,0x3a,0x01,0x88,0xc6,0xfa] diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s --- a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s @@ -13185,3 +13185,81 @@ v_pk_fmac_f16 v5, v1, v255 // GFX10: encoding: [0x01,0xff,0x0b,0x78] + +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00] + +v_pk_fmac_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX10: encoding: [0xfa,0x04,0xfe,0x79,0x01,0xe4,0x00,0x00] + +v_pk_fmac_f16_dpp v5, v255, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX10: encoding: [0xfa,0x04,0x0a,0x78,0xff,0xe4,0x00,0x00] + +v_pk_fmac_f16_dpp v5, v1, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX10: encoding: [0xfa,0xfe,0x0b,0x78,0x01,0xe4,0x00,0x00] + +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 +// GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x1b,0x00,0x00] + +v_pk_fmac_f16_dpp v5, v1, v2 row_mirror row_mask:0x0 bank_mask:0x0 +// GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x40,0x01,0x00] + +v_pk_fmac_f16_dpp v5, v1, v2 row_half_mirror row_mask:0x0 bank_mask:0x0 +// GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x41,0x01,0x00] + +v_pk_fmac_f16_dpp v5, v1, v2 row_shl:1 row_mask:0x0 bank_mask:0x0 +// GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x01,0x01,0x00] + + v_pk_fmac_f16_dpp v5, v1, v2 row_shl:15 row_mask:0x0 bank_mask:0x0 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x0f,0x01,0x00] + + v_pk_fmac_f16_dpp v5, v1, v2 row_shr:1 row_mask:0x0 bank_mask:0x0 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x11,0x01,0x00] + + v_pk_fmac_f16_dpp v5, v1, v2 row_shr:15 row_mask:0x0 bank_mask:0x0 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x1f,0x01,0x00] + + v_pk_fmac_f16_dpp v5, v1, v2 row_ror:1 row_mask:0x0 bank_mask:0x0 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x21,0x01,0x00] + + v_pk_fmac_f16_dpp v5, v1, v2 row_ror:15 row_mask:0x0 bank_mask:0x0 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x2f,0x01,0x00] + + v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x10] + + v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x30] + + v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xf0] + + v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x01] + + v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x03] + + v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x0f] + + v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:1 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x08,0x00] + + v_pk_fmac_f16_dpp v5, -v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x10,0x00] + + v_pk_fmac_f16_dpp v5, |v1|, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x20,0x00] + + v_pk_fmac_f16_dpp v5, v1, v2 row_share:1 row_mask:0x0 bank_mask:0x0 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x51,0x01,0x00] + + v_pk_fmac_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x0 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x5f,0x01,0x00] + + v_pk_fmac_f16_dpp v5, v1, v2 row_xmask:1 row_mask:0x0 bank_mask:0x0 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x61,0x01,0x00] + + v_pk_fmac_f16_dpp v5, v1, v2 row_xmask:15 row_mask:0x0 bank_mask:0x0 + // GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x6f,0x01,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx10_unsupported_e64_dpp.s b/llvm/test/MC/AMDGPU/gfx10_unsupported_e64_dpp.s --- a/llvm/test/MC/AMDGPU/gfx10_unsupported_e64_dpp.s +++ b/llvm/test/MC/AMDGPU/gfx10_unsupported_e64_dpp.s @@ -818,6 +818,9 @@ v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported +v_pk_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + v_rcp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s @@ -1553,6 +1553,48 @@ v_or_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: encoding: [0xfa,0xfe,0xff,0x39,0xff,0x6f,0x05,0x30] +v_pk_fmac_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x1b,0x00,0xff] + +v_pk_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff] + +v_pk_fmac_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x40,0x01,0xff] + +v_pk_fmac_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x41,0x01,0xff] + +v_pk_fmac_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x01,0x01,0xff] + +v_pk_fmac_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x0f,0x01,0xff] + +v_pk_fmac_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x11,0x01,0xff] + +v_pk_fmac_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x1f,0x01,0xff] + +v_pk_fmac_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x21,0x01,0xff] + +v_pk_fmac_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x2f,0x01,0xff] + +v_pk_fmac_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x50,0x01,0xff] + +v_pk_fmac_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x5f,0x01,0x01] + +v_pk_fmac_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x60,0x09,0x13] + +v_pk_fmac_f16 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x79,0xff,0x6f,0x05,0x30] + v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] // W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s @@ -330,6 +330,15 @@ v_or_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: encoding: [0xe9,0xfe,0xff,0x39,0xff,0x00,0x00,0x00] +v_pk_fmac_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05] + +v_pk_fmac_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x78,0x01,0x77,0x39,0x05] + +v_pk_fmac_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x78,0x7f,0x00,0x00,0x00] + v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] // W32: encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx8_unsupported.s b/llvm/test/MC/AMDGPU/gfx8_unsupported.s --- a/llvm/test/MC/AMDGPU/gfx8_unsupported.s +++ b/llvm/test/MC/AMDGPU/gfx8_unsupported.s @@ -1567,6 +1567,9 @@ v_pk_fmac_f16 v0, v1, v2 // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + v_pk_lshlrev_b16 v0, lds_direct, v0 // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/xdl-insts-gfx908.s b/llvm/test/MC/AMDGPU/xdl-insts-gfx908.s --- a/llvm/test/MC/AMDGPU/xdl-insts-gfx908.s +++ b/llvm/test/MC/AMDGPU/xdl-insts-gfx908.s @@ -502,3 +502,81 @@ // CHECK: encoding: [0x01,0xff,0x0b,0x78] v_pk_fmac_f16 v5, v1, v255 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00] +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0xfe,0x79,0x01,0xe4,0x00,0x00] +v_pk_fmac_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0xff,0xe4,0x00,0x00] +v_pk_fmac_f16_dpp v5, v255, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0xfe,0x0b,0x78,0x01,0xe4,0x00,0x00] +v_pk_fmac_f16_dpp v5, v1, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x1b,0x00,0x00] +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x40,0x01,0x00] +v_pk_fmac_f16_dpp v5, v1, v2 row_mirror row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x41,0x01,0x00] +v_pk_fmac_f16_dpp v5, v1, v2 row_half_mirror row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x01,0x01,0x00] +v_pk_fmac_f16_dpp v5, v1, v2 row_shl:1 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x0f,0x01,0x00] +v_pk_fmac_f16_dpp v5, v1, v2 row_shl:15 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x11,0x01,0x00] +v_pk_fmac_f16_dpp v5, v1, v2 row_shr:1 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x1f,0x01,0x00] +v_pk_fmac_f16_dpp v5, v1, v2 row_shr:15 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x21,0x01,0x00] +v_pk_fmac_f16_dpp v5, v1, v2 row_ror:1 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0x2f,0x01,0x00] +v_pk_fmac_f16_dpp v5, v1, v2 row_ror:15 row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x10] +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x30] +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xf0] +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xf0] +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x01] +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x03] +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x0f] +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x0f] +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x08,0x00] +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x10,0x00] +v_pk_fmac_f16_dpp v5, -v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x20,0x00] +v_pk_fmac_f16_dpp v5, |v1|, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x40,0x00] +v_pk_fmac_f16_dpp v5, v1, -v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + +// CHECK: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x80,0x00] +v_pk_fmac_f16_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt @@ -1898,6 +1898,87 @@ # GFX10: v_or_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x04,0x00] 0xfa,0x04,0x0a,0x38,0x01,0x1b,0x04,0x00 +# GFX10: v_pk_fmac_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x79,0x01,0xe4,0x00,0x00] +0xfa,0x04,0xfe,0x79,0x01,0xe4,0x00,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, -v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x10,0x00] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x10,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x08,0x00] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x08,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x01] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x01 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x03] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x03 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x0f] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x0f + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x10] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x10 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x30] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x30 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xf0] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xf0 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x1b,0x00,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 row_half_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x41,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x41,0x01,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 row_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x40,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x40,0x01,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 row_ror:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x21,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x21,0x01,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 row_ror:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x2f,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x2f,0x01,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 row_share:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x51,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x51,0x01,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x5f,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x5f,0x01,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 row_shl:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x01,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x01,0x01,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x0f,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x0f,0x01,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 row_shr:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x11,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x11,0x01,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 row_shr:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x1f,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x1f,0x01,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 row_xmask:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x61,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x61,0x01,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 row_xmask:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x6f,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x6f,0x01,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xfe,0x0b,0x78,0x01,0xe4,0x00,0x00] +0xfa,0xfe,0x0b,0x78,0x01,0xe4,0x00,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v255, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0xff,0xe4,0x00,0x00] +0xfa,0x04,0x0a,0x78,0xff,0xe4,0x00,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, |v1|, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x20,0x00] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x20,0x00 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x1b,0x00,0x00 + # GFX10: v_sub_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x67,0x01,0xe4,0x00,0x00] 0xfa,0x04,0xfe,0x67,0x01,0xe4,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt @@ -148,6 +148,12 @@ # GFX10: v_or_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x38,0x01,0x88,0xc6,0xfa] 0xea,0x04,0x0a,0x38,0x01,0x88,0xc6,0xfa +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05] +0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05 + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x78,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x0a,0x78,0x01,0x88,0xc6,0xfa + # GFX10: v_xor_b32_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x0a,0x3a,0x01,0x88,0xc6,0xfa] 0xea,0x04,0x0a,0x3a,0x01,0x88,0xc6,0xfa diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp16.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp16.txt @@ -1301,6 +1301,48 @@ # GFX11: v_or_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x39,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x39,0xff,0x6f,0x0d,0x30 +# GFX11: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x1b,0x00,0xff] +0xfa,0x04,0x0a,0x78,0x01,0x1b,0x00,0xff + +# GFX11: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff + +# GFX11: v_pk_fmac_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x40,0x01,0xff] +0xfa,0x04,0x0a,0x78,0x01,0x40,0x01,0xff + +# GFX11: v_pk_fmac_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x41,0x01,0xff] +0xfa,0x04,0x0a,0x78,0x01,0x41,0x01,0xff + +# GFX11: v_pk_fmac_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x01,0x01,0xff] +0xfa,0x04,0x0a,0x78,0x01,0x01,0x01,0xff + +# GFX11: v_pk_fmac_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x0f,0x01,0xff] +0xfa,0x04,0x0a,0x78,0x01,0x0f,0x01,0xff + +# GFX11: v_pk_fmac_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x11,0x01,0xff] +0xfa,0x04,0x0a,0x78,0x01,0x11,0x01,0xff + +# GFX11: v_pk_fmac_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x1f,0x01,0xff] +0xfa,0x04,0x0a,0x78,0x01,0x1f,0x01,0xff + +# GFX11: v_pk_fmac_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x21,0x01,0xff] +0xfa,0x04,0x0a,0x78,0x01,0x21,0x01,0xff + +# GFX11: v_pk_fmac_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x2f,0x01,0xff] +0xfa,0x04,0x0a,0x78,0x01,0x2f,0x01,0xff + +# GFX11: v_pk_fmac_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x50,0x01,0xff] +0xfa,0x04,0x0a,0x78,0x01,0x50,0x01,0xff + +# GFX11: v_pk_fmac_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x5f,0x01,0x01] +0xfa,0x04,0x0a,0x78,0x01,0x5f,0x01,0x01 + +# GFX11: v_pk_fmac_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x60,0x01,0x13] +0xfa,0x04,0x0a,0x78,0x01,0x60,0x01,0x13 + +# GFX11: v_pk_fmac_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x78,0x7f,0x6f,0xfd,0x30] +0xfa,0xfe,0xfe,0x78,0x7f,0x6f,0xfd,0x30 + # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp8.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp8.txt @@ -185,6 +185,12 @@ # GFX11: v_or_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x39,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x39,0xff,0x00,0x00,0x00 +# GFX11: v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05] +0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05 + +# GFX11: v_pk_fmac_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x78,0x7f,0x00,0x00,0x00] +0xea,0xfe,0xfe,0x78,0x7f,0x00,0x00,0x00 + # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt @@ -478,3 +478,63 @@ # CHECK: v_pk_fmac_f16_e32 v5, v1, v255 ; encoding: [0x01,0xff,0x0b,0x78] 0x01,0xff,0x0b,0x78 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00 + +# CHECK: v_pk_fmac_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x79,0x01,0xe4,0x00,0x00] +0xfa,0x04,0xfe,0x79,0x01,0xe4,0x00,0x00 + +# CHECK: v_pk_fmac_f16_dpp v5, v255, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0xff,0xe4,0x00,0x00] +0xfa,0x04,0x0a,0x78,0xff,0xe4,0x00,0x00 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0xfe,0x0b,0x78,0x01,0xe4,0x00,0x00] +0xfa,0xfe,0x0b,0x78,0x01,0xe4,0x00,0x00 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x1b,0x00,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x1b,0x00,0x00 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 row_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x40,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x40,0x01,0x00 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 row_half_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x41,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x41,0x01,0x00 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 row_shl:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x01,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x01,0x01,0x00 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x0f,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x0f,0x01,0x00 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 row_shr:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x11,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x11,0x01,0x00 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 row_shr:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x1f,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x1f,0x01,0x00 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 row_ror:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x21,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x21,0x01,0x00 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 row_ror:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0x2f,0x01,0x00] +0xfa,0x04,0x0a,0x78,0x01,0x2f,0x01,0x00 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x10] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x10 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x30] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x30 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xf0] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xf0 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x01] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x01 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x03] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x03 + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x0f] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x0f + +# CHECK: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x08,0x00] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x08,0x00