Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -250,7 +250,8 @@ /// Note that these typically require refinement /// in order to obtain suitable precision. FRSQRT, FRCP, - + FRSQRTS, FRCPS, + // Thread Local Storage. TLSADDR, Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -21753,7 +21753,9 @@ case X86ISD::FMAXC: return "X86ISD::FMAXC"; case X86ISD::FMINC: return "X86ISD::FMINC"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; + case X86ISD::FRSQRTS: return "X86ISD::FRSQRTS"; case X86ISD::FRCP: return "X86ISD::FRCP"; + case X86ISD::FRCPS: return "X86ISD::FRCPS"; case X86ISD::EXTRQI: return "X86ISD::EXTRQI"; case X86ISD::INSERTQI: return "X86ISD::INSERTQI"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; Index: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -60,8 +60,8 @@ [SDNPCommutative, SDNPAssociative]>; def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; -def X86frsqrt14s: SDNode<"X86ISD::FRSQRT", SDTFPBinOp>; -def X86frcp14s : SDNode<"X86ISD::FRCP", SDTFPBinOp>; +def X86frsqrt14s: SDNode<"X86ISD::FRSQRTS", SDTFPBinOp>; +def X86frcp14s : SDNode<"X86ISD::FRCPS", SDTFPBinOp>; def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>; def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>; Index: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h @@ -2125,8 +2125,8 @@ X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0), + X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0), + X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0), X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0), @@ -2137,8 +2137,8 @@ X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0), Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll @@ -126,26 +126,6 @@ } declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone -define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) { -; CHECK-LABEL: test_rsqrt14_ss: -; CHECK: ## BB#0: -; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone - -define <4 x float> @test_rcp14_ss(<4 x float> %a0) { -; CHECK-LABEL: test_rcp14_ss: -; CHECK: ## BB#0: -; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone - define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) { ; CHECK-LABEL: test_sqrt_pd_512: ; CHECK: ## BB#0: Index: llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll @@ -1,6 +1,48 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s + +define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) { + ; CHECK-LABEL: test_rsqrt14_ss: + ; CHECK: ## BB#0: + ; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0 + ; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone + +define <4 x float> @test_rcp14_ss(<4 x float> %a0) { + ; CHECK-LABEL: test_rcp14_ss: + ; CHECK: ## BB#0: + ; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0 + ; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone + +define <2 x double> @test_rsqrt14_sd(<2 x double> %a0) { + ; CHECK-LABEL: test_rsqrt14_sd: + ; CHECK: ## BB#0: + ; CHECK-NEXT: vrsqrt14sd %xmm0, %xmm0, %xmm0 + ; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ; + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx512.rsqrt14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone + +define <2 x double> @test_rcp14_sd(<2 x double> %a0) { + ; CHECK-LABEL: test_rcp14_sd: + ; CHECK: ## BB#0: + ; CHECK-NEXT: vrcp14sd %xmm0, %xmm0, %xmm0 + ; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 -1) ; + ret <2 x double> %res + +} +declare <2 x double> @llvm.x86.avx512.rcp14.sd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone + declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32) define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss: