diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -863,9 +863,18 @@ // 16-Bit Operand Instructions //===----------------------------------------------------------------------===// -def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16 { - // The ldexp.f16 intrinsic expects a i32 src1 operand, though the hardware - // encoding treats src1 as an f16 +// The ldexp.f16 intrinsic expects a integer src1 operand, though the hardware +// encoding treats src1 as an f16 +def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> { + let Src1Mod = Int32InputMods; + let Src1ModDPP = IntVRegInputMods; + let Src1ModVOP3DPP = IntVRegInputMods; + // SDWA sext is the only modifier allowed. + let HasSrc1IntMods = 1; + let HasSrc1FloatMods = 0; + let Src1ModSDWA = Int16SDWAInputMods; +} +def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16 { let Src1RC32 = RegisterOperand; let Src1DPP = VGPR_32_Lo128; let Src1ModDPP = IntT16VRegInputMods; @@ -874,9 +883,9 @@ let isReMaterializable = 1 in { let FPDPRounding = 1 in { let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in - defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I16, any_fldexp>; + defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", LDEXP_F16_VOPProfile>; let SubtargetPredicate = HasTrue16BitInsts in - defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16, any_fldexp>; + defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16>; } // End FPDPRounding = 1 // FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>; @@ -899,6 +908,21 @@ } // End isCommutable = 1 } // End isReMaterializable = 1 +class LDEXP_F16_Pat : GCNPat < + (P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), + (i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))), + (inst $src0_modifiers, $src0, + $src1_modifiers, $src1, + $clamp, /* clamp */ + $omod /* omod */) +>; + +let OtherPredicates = [NotHasTrue16BitInsts] in +def : LDEXP_F16_Pat; + +let OtherPredicates = [HasTrue16BitInsts] in +def : LDEXP_F16_Pat; + let SubtargetPredicate = isGFX11Plus in { let isCommutable = 1 in { defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16, and>; diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s --- a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s @@ -12931,11 +12931,11 @@ v_ldexp_f16_e64 v5, v1, -1 // GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0x83,0x01,0x00] -v_ldexp_f16_e64 v5, v1, 0x3800 -// GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +v_ldexp_f16_e64 v5, v1, 0.5 +// GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0xe1,0x01,0x00] v_ldexp_f16_e64 v5, v1, -4.0 -// GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +// GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0xef,0x01,0x00] v_ldexp_f16_e64 v5, -v1, v2 // GFX10: encoding: [0x05,0x00,0x3b,0xd5,0x01,0x05,0x02,0x20] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s @@ -734,7 +734,7 @@ // GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00] v_ldexp_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00] v_ldexp_f16_e64 v5, exec_lo, -1 // GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx8_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx8_asm_vop3.s --- a/llvm/test/MC/AMDGPU/gfx8_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx8_asm_vop3.s @@ -1,5 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding -filetype=null 2>&1 %s | FileCheck -check-prefix=ERR --implicit-check-not=error %s +// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s v_interp_p1_f32_e64 v5, v2, attr0.x // CHECK: [0x05,0x00,0x70,0xd2,0x00,0x04,0x02,0x00] @@ -12827,10 +12826,10 @@ // CHECK: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00] v_ldexp_f16_e64 v5, v1, 0.5 -// ERR: [[@LINE-1]]:25: error: literal operands are not supported +// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00] v_ldexp_f16_e64 v5, v1, -4.0 -// ERR: [[@LINE-1]]:25: error: literal operands are not supported +// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00] v_ldexp_f16_e64 v5, v1, src_vccz // CHECK: [0x05,0x00,0x33,0xd1,0x01,0xf7,0x01,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx9_asm_vop3.s --- a/llvm/test/MC/AMDGPU/gfx9_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop3.s @@ -1,5 +1,4 @@ -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=ERR --implicit-check-not=error %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s v_interp_p1_f32_e64 v5, v2, attr0.x // CHECK: [0x05,0x00,0x70,0xd2,0x00,0x04,0x02,0x00] @@ -11240,10 +11239,10 @@ // CHECK: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00] v_ldexp_f16_e64 v5, v1, 0.5 -// ERR: [[@LINE-1]]:25: error: literal operands are not supported +// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00] v_ldexp_f16_e64 v5, v1, -4.0 -// ERR: [[@LINE-1]]:25: error: literal operands are not supported +// CHECK: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00] v_ldexp_f16_e64 v5, v1, src_vccz // CHECK: [0x05,0x00,0x33,0xd1,0x01,0xf7,0x01,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt @@ -7520,13 +7520,13 @@ # GFX10: v_ldexp_f16_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0x83,0x01,0x00] 0x05,0x00,0x3b,0xd5,0x01,0x83,0x01,0x00 -# GFX10: v_ldexp_f16_e64 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_ldexp_f16_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xef,0x01,0x00] 0x05,0x00,0x3b,0xd5,0x01,0xef,0x01,0x00 # GFX10: v_ldexp_f16_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0x01,0x01,0x00] 0x05,0x00,0x3b,0xd5,0x01,0x01,0x01,0x00 -# GFX10: v_ldexp_f16_e64 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_ldexp_f16_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xe1,0x01,0x00] 0x05,0x00,0x3b,0xd5,0x01,0xe1,0x01,0x00 # GFX10: v_ldexp_f16_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt @@ -543,7 +543,7 @@ # GFX11: v_ldexp_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00 -# GFX11: v_ldexp_f16_e64 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# GFX11: v_ldexp_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00] 0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00 # GFX11: v_ldexp_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8_vop3.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx8_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8_vop3.txt @@ -11178,10 +11178,10 @@ # CHECK: v_ldexp_f16_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00] 0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00 -# CHECK: v_ldexp_f16_e64 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xff,0x01,0x00] +# CHECK: v_ldexp_f16_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00] 0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00 -# CHECK: v_ldexp_f16_e64 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xff,0x01,0x00] +# CHECK: v_ldexp_f16_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00] 0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00 # CHECK: v_ldexp_f16_e64 v5, -v1, v2 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0x05,0x02,0x20] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt @@ -8814,10 +8814,10 @@ # CHECK: v_ldexp_f16_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00] 0x05,0x00,0x33,0xd1,0x01,0x83,0x01,0x00 -# CHECK: v_ldexp_f16_e64 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xff,0x01,0x00] +# CHECK: v_ldexp_f16_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00] 0x05,0x00,0x33,0xd1,0x01,0xe1,0x01,0x00 -# CHECK: v_ldexp_f16_e64 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xff,0x01,0x00] +# CHECK: v_ldexp_f16_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00] 0x05,0x00,0x33,0xd1,0x01,0xef,0x01,0x00 # CHECK: v_ldexp_f16_e64 v5, -v1, v2 ; encoding: [0x05,0x00,0x33,0xd1,0x01,0x05,0x02,0x20]