Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -249,7 +249,8 @@ /// Note that these typically require refinement /// in order to obtain suitable precision. FRSQRT, FRCP, - + FRSQRTS, FRCPS, + // Thread Local Storage. TLSADDR, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -21782,7 +21782,9 @@ case X86ISD::FMAXC: return "X86ISD::FMAXC"; case X86ISD::FMINC: return "X86ISD::FMINC"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; + case X86ISD::FRSQRTS: return "X86ISD::FRSQRTS"; case X86ISD::FRCP: return "X86ISD::FRCP"; + case X86ISD::FRCPS: return "X86ISD::FRCPS"; case X86ISD::EXTRQI: return "X86ISD::EXTRQI"; case X86ISD::INSERTQI: return "X86ISD::INSERTQI"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -60,8 +60,8 @@ [SDNPCommutative, SDNPAssociative]>; def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; -def X86frsqrt14s: SDNode<"X86ISD::FRSQRT", SDTFPBinOp>; -def X86frcp14s : SDNode<"X86ISD::FRCP", SDTFPBinOp>; +def X86frsqrt14s: SDNode<"X86ISD::FRSQRTS", SDTFPBinOp>; +def X86frcp14s : SDNode<"X86ISD::FRCPS", SDTFPBinOp>; def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>; def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>; Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -2119,8 +2119,8 @@ X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0), - X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0), + X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0), + X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0), X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0), X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0), @@ -2131,8 +2131,8 @@ X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0), - X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0), + X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0), Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -126,26 +126,6 @@ } declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone -define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) { -; CHECK-LABEL: test_rsqrt14_ss: -; CHECK: ## BB#0: -; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone - -define <4 x float> @test_rcp14_ss(<4 x float> %a0) { -; CHECK-LABEL: test_rcp14_ss: -; CHECK: ## BB#0: -; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: retq - %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone - define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) { ; CHECK-LABEL: test_sqrt_pd_512: ; CHECK: ## BB#0: Index: test/CodeGen/X86/avx512-scalarIntrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-scalarIntrinsics.ll +++ test/CodeGen/X86/avx512-scalarIntrinsics.ll @@ -0,0 +1,43 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s + + +define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) { + ; CHECK-LABEL: test_rsqrt14_ss: + ; CHECK: ## BB#0: + ; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0 + ; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone + +define <4 x float> @test_rcp14_ss(<4 x float> %a0) { + ; CHECK-LABEL: test_rcp14_ss: + ; CHECK: ## BB#0: + ; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0 + ; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} + +define <4 x float> @test_rsqrt14_sd(<4 x float> %a0) { + ; CHECK-LABEL: test_rsqrt14_sd: + ; CHECK: ## BB#0: + ; CHECK-NEXT: vrsqrt14sd %xmm0, %xmm0, %xmm0 + ; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.rsqrt14.sd(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.rsqrt14.sd(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone + +define <4 x float> @test_rcp14_sd(<4 x float> %a0) { + ; CHECK-LABEL: test_rcp14_sd: + ; CHECK: ## BB#0: + ; CHECK-NEXT: vrcp14sd %xmm0, %xmm0, %xmm0 + ; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.rcp14.sd(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} + +