Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -3074,6 +3074,14 @@ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_exp2_ps : GCCBuiltin<"__builtin_ia32_exp2ps_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_exp2_pd : GCCBuiltin<"__builtin_ia32_exp2pd_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; } // Integer shift ops. Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -4198,7 +4198,7 @@ (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X), (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>; -/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd +/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd, exp2pd, exp2ps multiclass avx512_fp28_p opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop> { let hasSideEffects = 0, Predicates = [HasERI] in { @@ -4223,6 +4223,10 @@ EVEX_V512, EVEX_CD8<32, CD8VF>; defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; +defm VEXP2PSZ : avx512_fp28_p<0xC8, "vexp2ps", VR512, f512mem>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VEXP2PDZ : avx512_fp28_p<0xC8, "vexp2pd", VR512, f512mem>, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src), (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)), @@ -4238,6 +4242,19 @@ (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)), (VRCP28PDZrb VR512:$src)>; +def : Pat <(v16f32 (int_x86_avx512_exp2_ps (v16f32 VR512:$src), + (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)), + (VEXP2PSZrb VR512:$src)>; +def : Pat <(v16f32 (int_x86_avx512_exp2_ps (v16f32 VR512:$src), + (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)), + (VEXP2PSZr VR512:$src)>; +def : Pat <(v8f64 (int_x86_avx512_exp2_pd (v8f64 VR512:$src), + (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)), + (VEXP2PDZrb VR512:$src)>; +def : Pat <(v8f64 (int_x86_avx512_exp2_pd (v8f64 VR512:$src), + (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)), + (VEXP2PDZr VR512:$src)>; + multiclass avx512_sqrt_packed opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>{ defm r: AVX512_maskable @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone +define <16 x float> @test_exp2_ps_round_no_exc(<16 x float> %a0) { + ; CHECK: vexp2ps {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xc8,0xc0] + %res = call <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) ; <<16 x float>> [#uses=1] + ret <16 x float> %res +} +define <16 x float> @test_exp2_ps_round_current(<16 x float> %a0) { + ; CHECK: vexp2ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0xc8,0xc0] + %res = call <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ; <<16 x float>> [#uses=1] + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone + +define <8 x double> @test_exp2_pd_round_no_exc(<8 x double> %a0) { + ; CHECK: vexp2pd {sae}, {{.*}}encoding: [0x62,0xf2,0xfd,0x18,0xc8,0xc0] + %res = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) ; <<8 x double>> [#uses=1] + ret <8 x double> %res +} + +define <8 x double> @test_exp2_pd_round_current(<8 x double> %a0) { + ; CHECK: vexp2pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0xc8,0xc0] + %res = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ; <<8 x double>> [#uses=1] + ret <8 x double> %res +} +declare <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone + define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) { ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0] %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]