Index: ../commit/llvm_clean/include/llvm/IR/IntrinsicsX86.td =================================================================== --- ../commit/llvm_clean/include/llvm/IR/IntrinsicsX86.td +++ ../commit/llvm_clean/include/llvm/IR/IntrinsicsX86.td @@ -3857,12 +3857,18 @@ def int_x86_avx512_mask_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps128_mask">, + def int_x86_avx512_mask_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// Index: ../commit/llvm_clean/lib/Target/X86/X86InstrAVX512.td =================================================================== --- ../commit/llvm_clean/lib/Target/X86/X86InstrAVX512.td +++ ../commit/llvm_clean/lib/Target/X86/X86InstrAVX512.td @@ -5345,8 +5345,7 @@ } } -multiclass avx512_cvtph2ps_sae { +multiclass avx512_cvtph2ps_sae { defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src), "vcvtph2ps", "{sae}, $src", "$src, {sae}", (X86cvtph2ps (_src.VT _src.RC:$src), @@ -5356,7 +5355,7 @@ let Predicates = [HasAVX512] in { defm VCVTPH2PSZ : avx512_cvtph2ps, - avx512_cvtph2ps_sae, + avx512_cvtph2ps_sae, EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; let Predicates = [HasVLX] in { defm VCVTPH2PSZ256 : avx512_cvtph2ps { - def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst), - (ins srcRC:$src1, i32u8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX; - let hasSideEffects = 0, mayStore = 1 in - def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), - (ins x86memop:$dst, srcRC:$src1, i32u8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX; +multiclass avx512_cvtps2ph { + defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst), + (ins _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph", "$src2, $src1", "$src1, $src2", + (X86cvtps2ph (_src.VT _src.RC:$src1), + (i32 imm:$src2), + (i32 FROUND_CURRENT))>, AVX512AIi8Base; + let hasSideEffects = 0, mayStore = 1 in { + def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), + (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1), + (i32 imm:$src2), (i32 FROUND_CURRENT) )), + addr:$dst)]>; + def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), + (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", + []>, EVEX_K; + } +} +multiclass avx512_cvtps2ph_sae { + defm rb : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst), + (ins _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph", "$src2, {sae}, $src1", "$src1, $src2, {sae}", + (X86cvtps2ph (_src.VT _src.RC:$src1), + (i32 imm:$src2), + (i32 FROUND_NO_EXC))>, EVEX_B, AVX512AIi8Base; +} +let Predicates = [HasAVX512] in { + defm VCVTPS2PHZ : avx512_cvtps2ph, + avx512_cvtps2ph_sae, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; + let Predicates = [HasVLX] in { + defm VCVTPS2PHZ256 : avx512_cvtps2ph, + EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; + defm VCVTPS2PHZ128 : avx512_cvtps2ph, + EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; + } } - -defm VCVTPS2PHZ : avx512_cvtps2ph, EVEX_V512, - EVEX_CD8<32, CD8VH>; - -def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src), - imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))), - (VCVTPS2PHZrr VR512:$src, imm:$rc)>; - let Defs = [EFLAGS], Predicates = [HasAVX512] in { defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, "ucomiss">, PS, EVEX, VEX_LIG, Index: ../commit/llvm_clean/lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- ../commit/llvm_clean/lib/Target/X86/X86InstrFragmentsSIMD.td +++ ../commit/llvm_clean/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -513,6 +513,11 @@ SDTCVecEltisVT<1, i16>, SDTCisFP<0>, SDTCisInt<2>]> >; +def X86cvtps2ph : SDNode<"ISD::FP_TO_FP16", + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, + SDTCVecEltisVT<0, i16>, + SDTCVecEltisVT<1, f32>, + SDTCisFP<1>, SDTCisInt<2>, SDTCisInt<3>]> >; def X86vfpextRnd : SDNode<"X86ISD::VFPEXT", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisFP<0>, SDTCisFP<1>, Index: ../commit/llvm_clean/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- ../commit/llvm_clean/lib/Target/X86/X86IntrinsicsInfo.h +++ ../commit/llvm_clean/lib/Target/X86/X86IntrinsicsInfo.h @@ -1362,6 +1362,12 @@ ISD::FP16_TO_FP, 0), X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_RM, ISD::FP16_TO_FP, 0), + X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, INTR_TYPE_2OP_MASK_RM, + ISD::FP_TO_FP16, 0), + X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, INTR_TYPE_2OP_MASK_RM, + ISD::FP_TO_FP16, 0), + X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK_RM, + ISD::FP_TO_FP16, 0), X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD, Index: ../commit/llvm_clean/test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- ../commit/llvm_clean/test/CodeGen/X86/avx512vl-intrinsics.ll +++ ../commit/llvm_clean/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -5315,3 +5315,35 @@ } declare <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16>, <8 x float>, i8) nounwind readonly + +define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0) { + ; CHECK: test_x86_vcvtps2ph_128 + ; CHECK: vcvtps2ph $2, %xmm0, %xmm0 + %res = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_x86_vcvtps2ph_128_rrkz(<4 x float> %a0, i8 %mask) { + ; CHECK: test_x86_vcvtps2ph_128_rrkz + ; CHECK: vcvtps2ph $2, %xmm0, %xmm0 {%k1} {z} + %res = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float>, i32, <8 x i16>, i8) nounwind readonly + +define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) { + ; CHECK: test_x86_vcvtps2ph_256 + ; CHECK: vcvtps2ph $2, %ymm0, %xmm0 + %res = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1) + ret <8 x i16> %res +} + +define <8 x i16> @test_x86_vcvtps2ph_256_rrkz(<8 x float> %a0, i8 %mask) { + ; CHECK: test_x86_vcvtps2ph_256_rrkz + ; CHECK: vcvtps2ph $2, %ymm0, %xmm0 {%k1} {z} + %res = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 %mask) + ret <8 x i16> %res +} + +declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float>, i32, <8 x i16>, i8) nounwind readonly Index: ../commit/llvm_clean/test/MC/X86/avx512-encodings.s =================================================================== --- ../commit/llvm_clean/test/MC/X86/avx512-encodings.s +++ ../commit/llvm_clean/test/MC/X86/avx512-encodings.s @@ -17896,3 +17896,59 @@ // CHECK: vcvtph2ps -4128(%rdx), %zmm13 // CHECK: encoding: [0x62,0x72,0x7d,0x48,0x13,0xaa,0xe0,0xef,0xff,0xff] vcvtph2ps -4128(%rdx), %zmm13 + +// CHECK: vcvtps2ph $171, %zmm14, %ymm11 +// CHECK: encoding: [0x62,0x53,0x7d,0x48,0x1d,0xf3,0xab] + vcvtps2ph $0xab, %zmm14, %ymm11 + +// CHECK: vcvtps2ph $171, %zmm14, %ymm11 {%k6} +// CHECK: encoding: [0x62,0x53,0x7d,0x4e,0x1d,0xf3,0xab] + vcvtps2ph $0xab, %zmm14, %ymm11 {%k6} + +// CHECK: vcvtps2ph $171, %zmm14, %ymm11 {%k6} {z} +// CHECK: encoding: [0x62,0x53,0x7d,0xce,0x1d,0xf3,0xab] + vcvtps2ph $0xab, %zmm14, %ymm11 {%k6} {z} + +// CHECK: vcvtps2ph $171,{sae}, %zmm14, %ymm11 +// CHECK: encoding: [0x62,0x53,0x7d,0x18,0x1d,0xf3,0xab] + vcvtps2ph $0xab,{sae}, %zmm14, %ymm11 + +// CHECK: vcvtps2ph $123, %zmm14, %ymm11 +// CHECK: encoding: [0x62,0x53,0x7d,0x48,0x1d,0xf3,0x7b] + vcvtps2ph $0x7b, %zmm14, %ymm11 + +// CHECK: vcvtps2ph $123,{sae}, %zmm14, %ymm11 +// CHECK: encoding: [0x62,0x53,0x7d,0x18,0x1d,0xf3,0x7b] + vcvtps2ph $0x7b,{sae}, %zmm14, %ymm11 + +// CHECK: vcvtps2ph $171, %zmm19, (%rcx) +// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x19,0xab] + vcvtps2ph $0xab, %zmm19, (%rcx) + +// CHECK: vcvtps2ph $171, %zmm19, (%rcx) {%k6} +// CHECK: encoding: [0x62,0xe3,0x7d,0x4e,0x1d,0x19,0xab] + vcvtps2ph $0xab, %zmm19, (%rcx) {%k6} + +// CHECK: vcvtps2ph $123, %zmm19, (%rcx) +// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x19,0x7b] + vcvtps2ph $0x7b, %zmm19, (%rcx) + +// CHECK: vcvtps2ph $123, %zmm19, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa3,0x7d,0x48,0x1d,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcvtps2ph $0x7b, %zmm19, 291(%rax,%r14,8) + +// CHECK: vcvtps2ph $123, %zmm19, 4064(%rdx) +// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x5a,0x7f,0x7b] + vcvtps2ph $0x7b, %zmm19, 4064(%rdx) + +// CHECK: vcvtps2ph $123, %zmm19, 4096(%rdx) +// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x9a,0x00,0x10,0x00,0x00,0x7b] + vcvtps2ph $0x7b, %zmm19, 4096(%rdx) + +// CHECK: vcvtps2ph $123, %zmm19, -4096(%rdx) +// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x5a,0x80,0x7b] + vcvtps2ph $0x7b, %zmm19, -4096(%rdx) + +// CHECK: vcvtps2ph $123, %zmm19, -4128(%rdx) +// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x9a,0xe0,0xef,0xff,0xff,0x7b] + vcvtps2ph $0x7b, %zmm19, -4128(%rdx) \ No newline at end of file Index: ../commit/llvm_clean/test/MC/X86/x86-64-avx512f_vl.s =================================================================== --- ../commit/llvm_clean/test/MC/X86/x86-64-avx512f_vl.s +++ ../commit/llvm_clean/test/MC/X86/x86-64-avx512f_vl.s @@ -21882,3 +21882,99 @@ // CHECK: vcvtph2ps -2064(%rdx), %ymm30 // CHECK: encoding: [0x62,0x62,0x7d,0x28,0x13,0xb2,0xf0,0xf7,0xff,0xff] vcvtph2ps -2064(%rdx), %ymm30 + +// CHECK: vcvtps2ph $171, %xmm25, %xmm21 +// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x1d,0xcd,0xab] + vcvtps2ph $0xab, %xmm25, %xmm21 + +// CHECK: vcvtps2ph $171, %xmm25, %xmm21 {%k5} +// CHECK: encoding: [0x62,0x23,0x7d,0x0d,0x1d,0xcd,0xab] + vcvtps2ph $0xab, %xmm25, %xmm21 {%k5} + +// CHECK: vcvtps2ph $171, %xmm25, %xmm21 {%k5} {z} +// CHECK: encoding: [0x62,0x23,0x7d,0x8d,0x1d,0xcd,0xab] + vcvtps2ph $0xab, %xmm25, %xmm21 {%k5} {z} + +// CHECK: vcvtps2ph $123, %xmm25, %xmm21 +// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x1d,0xcd,0x7b] + vcvtps2ph $0x7b, %xmm25, %xmm21 + +// CHECK: vcvtps2ph $171, %ymm28, %xmm23 +// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x1d,0xe7,0xab] + vcvtps2ph $0xab, %ymm28, %xmm23 + +// CHECK: vcvtps2ph $171, %ymm28, %xmm23 {%k3} +// CHECK: encoding: [0x62,0x23,0x7d,0x2b,0x1d,0xe7,0xab] + vcvtps2ph $0xab, %ymm28, %xmm23 {%k3} + +// CHECK: vcvtps2ph $171, %ymm28, %xmm23 {%k3} {z} +// CHECK: encoding: [0x62,0x23,0x7d,0xab,0x1d,0xe7,0xab] + vcvtps2ph $0xab, %ymm28, %xmm23 {%k3} {z} + +// CHECK: vcvtps2ph $123, %ymm28, %xmm23 +// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x1d,0xe7,0x7b] + vcvtps2ph $0x7b, %ymm28, %xmm23 + +// CHECK: vcvtps2ph $171, %xmm27, (%rcx) +// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x1d,0x19,0xab] + vcvtps2ph $0xab, %xmm27, (%rcx) + +// CHECK: vcvtps2ph $171, %xmm27, (%rcx) {%k4} +// CHECK: encoding: [0x62,0x63,0x7d,0x0c,0x1d,0x19,0xab] + vcvtps2ph $0xab, %xmm27, (%rcx) {%k4} + +// CHECK: vcvtps2ph $123, %xmm27, (%rcx) +// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x1d,0x19,0x7b] + vcvtps2ph $0x7b, %xmm27, (%rcx) + +// CHECK: vcvtps2ph $123, %xmm27, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x1d,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcvtps2ph $0x7b, %xmm27, 291(%rax,%r14,8) + +// CHECK: vcvtps2ph $123, %xmm27, 1016(%rdx) +// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x1d,0x5a,0x7f,0x7b] + vcvtps2ph $0x7b, %xmm27, 1016(%rdx) + +// CHECK: vcvtps2ph $123, %xmm27, 1024(%rdx) +// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x1d,0x9a,0x00,0x04,0x00,0x00,0x7b] + vcvtps2ph $0x7b, %xmm27, 1024(%rdx) + +// CHECK: vcvtps2ph $123, %xmm27, -1024(%rdx) +// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x1d,0x5a,0x80,0x7b] + vcvtps2ph $0x7b, %xmm27, -1024(%rdx) + +// CHECK: vcvtps2ph $123, %xmm27, -1032(%rdx) +// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x1d,0x9a,0xf8,0xfb,0xff,0xff,0x7b] + vcvtps2ph $0x7b, %xmm27, -1032(%rdx) + +// CHECK: vcvtps2ph $171, %ymm30, (%rcx) +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x1d,0x31,0xab] + vcvtps2ph $0xab, %ymm30, (%rcx) + +// CHECK: vcvtps2ph $171, %ymm30, (%rcx) {%k2} +// CHECK: encoding: [0x62,0x63,0x7d,0x2a,0x1d,0x31,0xab] + vcvtps2ph $0xab, %ymm30, (%rcx) {%k2} + +// CHECK: vcvtps2ph $123, %ymm30, (%rcx) +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x1d,0x31,0x7b] + vcvtps2ph $0x7b, %ymm30, (%rcx) + +// CHECK: vcvtps2ph $123, %ymm30, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x1d,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vcvtps2ph $0x7b, %ymm30, 291(%rax,%r14,8) + +// CHECK: vcvtps2ph $123, %ymm30, 2032(%rdx) +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x1d,0x72,0x7f,0x7b] + vcvtps2ph $0x7b, %ymm30, 2032(%rdx) + +// CHECK: vcvtps2ph $123, %ymm30, 2048(%rdx) +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x1d,0xb2,0x00,0x08,0x00,0x00,0x7b] + vcvtps2ph $0x7b, %ymm30, 2048(%rdx) + +// CHECK: vcvtps2ph $123, %ymm30, -2048(%rdx) +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x1d,0x72,0x80,0x7b] + vcvtps2ph $0x7b, %ymm30, -2048(%rdx) + +// CHECK: vcvtps2ph $123, %ymm30, -2064(%rdx) +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x1d,0xb2,0xf0,0xf7,0xff,0xff,0x7b] + vcvtps2ph $0x7b, %ymm30, -2064(%rdx)