Index: llvm/trunk/include/llvm/IR/IntrinsicsX86.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td @@ -4860,6 +4860,13 @@ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_getexp_ss : GCCBuiltin<"__builtin_ia32_getexpss_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_getexp_sd : GCCBuiltin<"__builtin_ia32_getexpsd_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -5210,6 +5210,8 @@ defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V; defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V; } + +defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V; /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, Index: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -326,7 +326,8 @@ def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>; def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>; def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>; -def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>; +def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>; +def X86fgetexpRnds : SDNode<"X86ISD::FGETEXP_RND", STDFp2SrcRm>; def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; Index: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h @@ -659,6 +659,10 @@ X86ISD::FGETEXP_RND, 0), X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM, X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_sd, INTR_TYPE_SCALAR_MASK_RM, + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_ss, INTR_TYPE_SCALAR_MASK_RM, + X86ISD::FGETEXP_RND, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX, Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll @@ -3896,3 +3896,58 @@ %res2 = fadd <2 x double> %res, %res1 ret <2 x double> %res2 } + +declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone + +define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { +; CHECK-LABEL: test_getexp_ss: +; CHECK: ## BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1} +; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z} +; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1 +; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) + %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8) + %res2 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8) + %res3 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8) + + %res.1 = fadd <4 x float> %res0, %res1 + %res.2 = fadd <4 x float> %res2, %res3 + %res = fadd <4 x float> %res.1, %res.2 + ret <4 x float> %res +} + +declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone + +define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { +; CHECK-LABEL: test_getexp_sd: +; CHECK: ## BB#0: +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovaps %zmm2, %zmm3 +; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1} +; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4 +; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1 +; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0 +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8) + %res2 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8) + %res3 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4) + + %res.1 = fadd <2 x double> %res0, %res1 + %res.2 = fadd <2 x double> %res2, %res3 + %res = fadd <2 x double> %res.1, %res.2 + ret <2 x double> %res +} + Index: llvm/trunk/test/MC/X86/avx512-encodings.s =================================================================== --- llvm/trunk/test/MC/X86/avx512-encodings.s +++ llvm/trunk/test/MC/X86/avx512-encodings.s @@ -14878,3 +14878,83 @@ // CHECK: encoding: [0x62,0x61,0x85,0x58,0x6d,0x9a,0xf8,0xfb,0xff,0xff] vpunpckhqdq -1032(%rdx){1to8}, %zmm15, %zmm27 +// CHECK: vgetexpss %xmm26, %xmm1, %xmm20 +// CHECK: encoding: [0x62,0x82,0x75,0x08,0x43,0xe2] + vgetexpss %xmm26, %xmm1, %xmm20 + +// CHECK: vgetexpss %xmm26, %xmm1, %xmm20 {%k7} +// CHECK: encoding: [0x62,0x82,0x75,0x0f,0x43,0xe2] + vgetexpss %xmm26, %xmm1, %xmm20 {%k7} + +// CHECK: vgetexpss %xmm26, %xmm1, %xmm20 {%k7} {z} +// CHECK: encoding: [0x62,0x82,0x75,0x8f,0x43,0xe2] + vgetexpss %xmm26, %xmm1, %xmm20 {%k7} {z} + +// CHECK: vgetexpss {sae}, %xmm26, %xmm1, %xmm20 +// CHECK: encoding: [0x62,0x82,0x75,0x18,0x43,0xe2] + vgetexpss {sae}, %xmm26, %xmm1, %xmm20 + +// CHECK: vgetexpss (%rcx), %xmm1, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x75,0x08,0x43,0x21] + vgetexpss (%rcx), %xmm1, %xmm20 + +// CHECK: vgetexpss 291(%rax,%r14,8), %xmm1, %xmm20 +// CHECK: encoding: [0x62,0xa2,0x75,0x08,0x43,0xa4,0xf0,0x23,0x01,0x00,0x00] + vgetexpss 291(%rax,%r14,8), %xmm1, %xmm20 + +// CHECK: vgetexpss 508(%rdx), %xmm1, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x75,0x08,0x43,0x62,0x7f] + vgetexpss 508(%rdx), %xmm1, %xmm20 + +// CHECK: vgetexpss 512(%rdx), %xmm1, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x75,0x08,0x43,0xa2,0x00,0x02,0x00,0x00] + vgetexpss 512(%rdx), %xmm1, %xmm20 + +// CHECK: vgetexpss -512(%rdx), %xmm1, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x75,0x08,0x43,0x62,0x80] + vgetexpss -512(%rdx), %xmm1, %xmm20 + +// CHECK: vgetexpss -516(%rdx), %xmm1, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x75,0x08,0x43,0xa2,0xfc,0xfd,0xff,0xff] + vgetexpss -516(%rdx), %xmm1, %xmm20 + +// CHECK: vgetexpsd %xmm2, %xmm7, %xmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0xd2] + vgetexpsd %xmm2, %xmm7, %xmm2 + +// CHECK: vgetexpsd %xmm2, %xmm7, %xmm2 {%k5} +// CHECK: encoding: [0x62,0xf2,0xc5,0x0d,0x43,0xd2] + vgetexpsd %xmm2, %xmm7, %xmm2 {%k5} + +// CHECK: vgetexpsd %xmm2, %xmm7, %xmm2 {%k5} {z} +// CHECK: encoding: [0x62,0xf2,0xc5,0x8d,0x43,0xd2] + vgetexpsd %xmm2, %xmm7, %xmm2 {%k5} {z} + +// CHECK: vgetexpsd {sae}, %xmm2, %xmm7, %xmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x18,0x43,0xd2] + vgetexpsd {sae}, %xmm2, %xmm7, %xmm2 + +// CHECK: vgetexpsd (%rcx), %xmm7, %xmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x11] + vgetexpsd (%rcx), %xmm7, %xmm2 + +// CHECK: vgetexpsd 291(%rax,%r14,8), %xmm7, %xmm2 +// CHECK: encoding: [0x62,0xb2,0xc5,0x08,0x43,0x94,0xf0,0x23,0x01,0x00,0x00] + vgetexpsd 291(%rax,%r14,8), %xmm7, %xmm2 + +// CHECK: vgetexpsd 1016(%rdx), %xmm7, %xmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x52,0x7f] + vgetexpsd 1016(%rdx), %xmm7, %xmm2 + +// CHECK: vgetexpsd 1024(%rdx), %xmm7, %xmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x92,0x00,0x04,0x00,0x00] + vgetexpsd 1024(%rdx), %xmm7, %xmm2 + +// CHECK: vgetexpsd -1024(%rdx), %xmm7, %xmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x52,0x80] + vgetexpsd -1024(%rdx), %xmm7, %xmm2 + +// CHECK: vgetexpsd -1032(%rdx), %xmm7, %xmm2 +// CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x92,0xf8,0xfb,0xff,0xff] + vgetexpsd -1032(%rdx), %xmm7, %xmm2 +