Index: llvm/trunk/include/llvm/IR/IntrinsicsX86.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td @@ -3854,6 +3854,12 @@ def int_x86_avx512_mask_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty, llvm_v8f32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty, llvm_v4f32_ty, + llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -5331,14 +5331,39 @@ //===----------------------------------------------------------------------===// // Half precision conversion instructions //===----------------------------------------------------------------------===// -multiclass avx512_cvtph2ps { - def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", - []>, EVEX; - let hasSideEffects = 0, mayLoad = 1 in - def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX; +multiclass avx512_cvtph2ps { + defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src), + "vcvtph2ps", "$src", "$src", + (X86cvtph2ps (_src.VT _src.RC:$src), + (i32 FROUND_CURRENT))>, T8PD; + let hasSideEffects = 0, mayLoad = 1 in { + defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src), + "vcvtph2ps", "$src", "$src", + (X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))), + (i32 FROUND_CURRENT))>, T8PD; + } +} + +multiclass avx512_cvtph2ps_sae { + defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src), + "vcvtph2ps", "{sae}, $src", "$src, {sae}", + (X86cvtph2ps (_src.VT _src.RC:$src), + (i32 FROUND_NO_EXC))>, T8PD, EVEX_B; + +} + +let Predicates = [HasAVX512] in { + defm VCVTPH2PSZ : avx512_cvtph2ps, + avx512_cvtph2ps_sae, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; + let Predicates = [HasVLX] in { + defm VCVTPH2PSZ256 : avx512_cvtph2ps,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; + defm VCVTPH2PSZ128 : avx512_cvtph2ps, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; + } } multiclass avx512_cvtps2ph, EVEX; } -defm VCVTPH2PSZ : avx512_cvtph2ps, EVEX_V512, - EVEX_CD8<32, CD8VH>; defm VCVTPS2PHZ : avx512_cvtps2ph, EVEX_V512, EVEX_CD8<32, CD8VH>; @@ -5362,10 +5385,6 @@ imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))), (VCVTPS2PHZrr VR512:$src, imm:$rc)>; -def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src), - (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))), - (VCVTPH2PSZrr VR256X:$src)>; - let Defs = [EFLAGS], Predicates = [HasAVX512] in { defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, "ucomiss">, PS, EVEX, VEX_LIG, Index: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -507,6 +507,12 @@ def X86cvtpd2Int : SDNode<"X86ISD::FP_TO_SINT_RND", SDTDoubleToInt>; def X86cvtpd2UInt : SDNode<"X86ISD::FP_TO_UINT_RND", SDTDoubleToInt>; +def X86cvtph2ps : SDNode<"ISD::FP16_TO_FP", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCVecEltisVT<0, f32>, + SDTCVecEltisVT<1, i16>, + SDTCisFP<0>, SDTCisInt<2>]> >; + def X86vfpextRnd : SDNode<"X86ISD::VFPEXT", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisFP<0>, SDTCisFP<1>, Index: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h @@ -1356,6 +1356,12 @@ X86ISD::VALIGN, 0), X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VALIGN, 0), + X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK_RM, + ISD::FP16_TO_FP, 0), + X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK_RM, + ISD::FP16_TO_FP, 0), + X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_RM, + ISD::FP16_TO_FP, 0), X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD, Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll @@ -322,10 +322,40 @@ declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) { + ; CHECK: test_x86_vcvtph2ps_512 ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0] %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ret <16 x float> %res } + +define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) { +; CHECK: test_x86_vcvtph2ps_512_sae + ; CHECK: vcvtph2ps {sae}, %ymm0, %zmm0 + %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) + ret <16 x float> %res +} + +define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) { + ; CHECK: test_x86_vcvtph2ps_512_rrk + ; CHECK: vcvtph2ps %ymm0, %zmm1 {%k1} + %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4) + ret <16 x float> %res +} + +define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) { + ; CHECK: test_x86_vcvtph2ps_512_sae_rrkz + ; CHECK: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z} + %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8) + ret <16 x float> %res +} + +define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) { + ; CHECK: test_x86_vcvtph2ps_512_rrkz + ; CHECK: vcvtph2ps %ymm0, %zmm0 {%k1} {z} + %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4) + ret <16 x float> %res +} + declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly Index: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -5269,3 +5269,49 @@ ret <2 x i64> %res4 } +define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) { + ; CHECK: test_x86_vcvtph2ps_128 + ; CHECK: vcvtph2ps %xmm0, %xmm0 + %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1) + ret <4 x float> %res +} + +define <4 x float> @test_x86_vcvtph2ps_128_rrk(<8 x i16> %a0,<4 x float> %a1, i8 %mask) { + ; CHECK: test_x86_vcvtph2ps_128_rrk + ; CHECK: vcvtph2ps %xmm0, %xmm1 {%k1} + %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> %a1, i8 %mask) + ret <4 x float> %res +} + + +define <4 x float> @test_x86_vcvtph2ps_128_rrkz(<8 x i16> %a0, i8 %mask) { + ; CHECK: test_x86_vcvtph2ps_128_rrkz + ; CHECK: vcvtph2ps %xmm0, %xmm0 {%k1} {z} + %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 %mask) + ret <4 x float> %res +} + +declare <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16>, <4 x float>, i8) nounwind readonly + +define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) { + ; CHECK: test_x86_vcvtph2ps_256 + ; CHECK: vcvtph2ps %xmm0, %ymm0 + %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 -1) + ret <8 x float> %res +} + +define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 %mask) { + ; CHECK: test_x86_vcvtph2ps_256_rrk + ; CHECK: vcvtph2ps %xmm0, %ymm1 {%k1} + %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask) + ret <8 x float> %res +} + +define <8 x float> @test_x86_vcvtph2ps_256_rrkz(<8 x i16> %a0, i8 %mask) { + ; CHECK: test_x86_vcvtph2ps_256_rrkz + ; CHECK: vcvtph2ps %xmm0, %ymm0 {%k1} {z} + %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 %mask) + ret <8 x float> %res +} + +declare <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16>, <8 x float>, i8) nounwind readonly Index: llvm/trunk/test/MC/X86/avx512-encodings.s =================================================================== --- llvm/trunk/test/MC/X86/avx512-encodings.s +++ llvm/trunk/test/MC/X86/avx512-encodings.s @@ -17857,3 +17857,42 @@ // CHECK: encoding: [0x62,0xd2,0xfd,0x48,0x7c,0xc8] vpbroadcastq %r8, %zmm1 +// CHECK: vcvtph2ps %ymm27, %zmm13 +// CHECK: encoding: [0x62,0x12,0x7d,0x48,0x13,0xeb] + vcvtph2ps %ymm27, %zmm13 + +// CHECK: vcvtph2ps %ymm27, %zmm13 {%k3} +// CHECK: encoding: [0x62,0x12,0x7d,0x4b,0x13,0xeb] + vcvtph2ps %ymm27, %zmm13 {%k3} + +// CHECK: vcvtph2ps %ymm27, %zmm13 {%k3} {z} +// CHECK: encoding: [0x62,0x12,0x7d,0xcb,0x13,0xeb] + vcvtph2ps %ymm27, %zmm13 {%k3} {z} + +// CHECK: vcvtph2ps {sae}, %ymm27, %zmm13 +// CHECK: encoding: [0x62,0x12,0x7d,0x18,0x13,0xeb] + vcvtph2ps {sae}, %ymm27, %zmm13 + +// CHECK: vcvtph2ps (%rcx), %zmm13 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x13,0x29] + vcvtph2ps (%rcx), %zmm13 + +// CHECK: vcvtph2ps 291(%rax,%r14,8), %zmm13 +// CHECK: encoding: [0x62,0x32,0x7d,0x48,0x13,0xac,0xf0,0x23,0x01,0x00,0x00] + vcvtph2ps 291(%rax,%r14,8), %zmm13 + +// CHECK: vcvtph2ps 4064(%rdx), %zmm13 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x13,0x6a,0x7f] + vcvtph2ps 4064(%rdx), %zmm13 + +// CHECK: vcvtph2ps 4096(%rdx), %zmm13 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x13,0xaa,0x00,0x10,0x00,0x00] + vcvtph2ps 4096(%rdx), %zmm13 + +// CHECK: vcvtph2ps -4096(%rdx), %zmm13 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x13,0x6a,0x80] + vcvtph2ps -4096(%rdx), %zmm13 + +// CHECK: vcvtph2ps -4128(%rdx), %zmm13 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x13,0xaa,0xe0,0xef,0xff,0xff] + vcvtph2ps -4128(%rdx), %zmm13 Index: llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s =================================================================== --- llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s +++ llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s @@ -21811,3 +21811,74 @@ // CHECK: encoding: [0x62,0xc2,0xfd,0x28,0x7c,0xd8] vpbroadcastq %r8, %ymm19 +// CHECK: vcvtph2ps %xmm17, %xmm27 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x13,0xd9] + vcvtph2ps %xmm17, %xmm27 + +// CHECK: vcvtph2ps %xmm17, %xmm27 {%k2} +// CHECK: encoding: [0x62,0x22,0x7d,0x0a,0x13,0xd9] + vcvtph2ps %xmm17, %xmm27 {%k2} + +// CHECK: vcvtph2ps %xmm17, %xmm27 {%k2} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0x8a,0x13,0xd9] + vcvtph2ps %xmm17, %xmm27 {%k2} {z} + +// CHECK: vcvtph2ps (%rcx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x13,0x19] + vcvtph2ps (%rcx), %xmm27 + +// CHECK: vcvtph2ps 291(%rax,%r14,8), %xmm27 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x13,0x9c,0xf0,0x23,0x01,0x00,0x00] + vcvtph2ps 291(%rax,%r14,8), %xmm27 + +// CHECK: vcvtph2ps 1016(%rdx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x13,0x5a,0x7f] + vcvtph2ps 1016(%rdx), %xmm27 + +// CHECK: vcvtph2ps 1024(%rdx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x13,0x9a,0x00,0x04,0x00,0x00] + vcvtph2ps 1024(%rdx), %xmm27 + +// CHECK: vcvtph2ps -1024(%rdx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x13,0x5a,0x80] + vcvtph2ps -1024(%rdx), %xmm27 + +// CHECK: vcvtph2ps -1032(%rdx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x13,0x9a,0xf8,0xfb,0xff,0xff] + vcvtph2ps -1032(%rdx), %xmm27 + +// CHECK: vcvtph2ps %xmm22, %ymm30 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x13,0xf6] + vcvtph2ps %xmm22, %ymm30 + +// CHECK: vcvtph2ps %xmm22, %ymm30 {%k7} +// CHECK: encoding: [0x62,0x22,0x7d,0x2f,0x13,0xf6] + vcvtph2ps %xmm22, %ymm30 {%k7} + +// CHECK: vcvtph2ps %xmm22, %ymm30 {%k7} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xaf,0x13,0xf6] + vcvtph2ps %xmm22, %ymm30 {%k7} {z} + +// CHECK: vcvtph2ps (%rcx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x13,0x31] + vcvtph2ps (%rcx), %ymm30 + +// CHECK: vcvtph2ps 291(%rax,%r14,8), %ymm30 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x13,0xb4,0xf0,0x23,0x01,0x00,0x00] + vcvtph2ps 291(%rax,%r14,8), %ymm30 + +// CHECK: vcvtph2ps 2032(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x13,0x72,0x7f] + vcvtph2ps 2032(%rdx), %ymm30 + +// CHECK: vcvtph2ps 2048(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x13,0xb2,0x00,0x08,0x00,0x00] + vcvtph2ps 2048(%rdx), %ymm30 + +// CHECK: vcvtph2ps -2048(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x13,0x72,0x80] + vcvtph2ps -2048(%rdx), %ymm30 + +// CHECK: vcvtph2ps -2064(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x13,0xb2,0xf0,0xf7,0xff,0xff] + vcvtph2ps -2064(%rdx), %ymm30