Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -302,8 +302,8 @@ // Vector FP round. VFPROUND, VFPROUND_RND, VFPROUNDS_RND, - // Vector double to signed integer (truncated). - CVTTPD2DQ, + // Vector double to signed/unsigned integer (truncated). + CVTTPD2DQ, CVTTPD2UDQ, // Vector signed/unsigned integer to double. CVTDQ2PD, CVTUDQ2PD, Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -1193,6 +1193,7 @@ setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom); @@ -22358,12 +22359,16 @@ case ISD::FP_TO_UINT: { bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; - if (IsSigned && N->getValueType(0) == MVT::v2i32) { + if (N->getValueType(0) == MVT::v2i32) { + assert((IsSigned || Subtarget.hasAVX512()) && + "Can only handle signed conversion without AVX512"); assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); SDValue Src = N->getOperand(0); if (Src.getValueType() == MVT::v2f64) { SDValue Idx = DAG.getIntPtrConstant(0, dl); - SDValue Res = DAG.getNode(X86ISD::CVTTPD2DQ, dl, MVT::v4i32, Src); + SDValue Res = DAG.getNode(IsSigned ? X86ISD::CVTTPD2DQ + : X86ISD::CVTTPD2UDQ, + dl, MVT::v4i32, Src); Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx); Results.push_back(Res); return; @@ -22372,7 +22377,8 @@ SDValue Idx = DAG.getIntPtrConstant(0, dl); SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, DAG.getUNDEF(MVT::v2f32)); - Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, Res); + Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT + : ISD::FP_TO_UINT, dl, MVT::v4i32, Res); Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx); Results.push_back(Res); return; @@ -22700,6 +22706,7 @@ case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND"; case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND"; case X86ISD::CVTTPD2DQ: return "X86ISD::CVTTPD2DQ"; + case X86ISD::CVTTPD2UDQ: return "X86ISD::CVTTPD2UDQ"; case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD"; case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD"; case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK"; Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -6314,8 +6314,8 @@ X86cvttp2uiRnd>, PS, EVEX_CD8<32, CD8VF>; -defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, fp_to_uint, - X86cvttp2uiRnd>, PS, VEX_W, +defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, + X86cvttpd2udq, X86cvttp2uiRnd>, PS, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86cvtudq2pd>, @@ -6395,6 +6395,11 @@ (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_xmm)>; +def : Pat<(v4i32 (X86cvttpd2udq (v2f64 VR128X:$src))), + (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr + (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), + VR128X:$src, sub_xmm)))), sub_xmm)>; + def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))), (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), @@ -6416,6 +6421,9 @@ def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvttpd2dq (v2f64 VR128X:$src)))))), (VCVTTPD2DQZ128rr VR128:$src)>; + def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvttpd2udq (v2f64 VR128X:$src)))))))), + (VCVTTPD2UDQZ128rr VR128:$src)>; } let Predicates = [HasAVX512] in { Index: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -70,6 +70,9 @@ def X86cvttpd2dq: SDNode<"X86ISD::CVTTPD2DQ", SDTypeProfile<1, 1, [SDTCisVT<0, v4i32>, SDTCisVT<1, v2f64>]>>; +def X86cvttpd2udq: SDNode<"X86ISD::CVTTPD2UDQ", + SDTypeProfile<1, 1, [SDTCisVT<0, v4i32>, + SDTCisVT<1, v2f64>]>>; def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD", SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>, SDTCisVT<1, v4i32>]>>; Index: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h @@ -586,7 +586,7 @@ X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK, ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND), X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, INTR_TYPE_1OP_MASK, - ISD::FP_TO_UINT, 0), + X86ISD::CVTTPD2UDQ, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK, ISD::FP_TO_UINT, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK, Index: llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll +++ llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll @@ -407,33 +407,19 @@ ; ; AVX512F-LABEL: fptoui_2f64_to_4i32: ; AVX512F: # BB#0: -; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax -; AVX512F-NEXT: vmovq %rax, %xmm1 -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax -; AVX512F-NEXT: vmovq %rax, %xmm0 -; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512F-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0 ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: fptoui_2f64_to_4i32: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax -; AVX512VL-NEXT: vmovq %rax, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax -; AVX512VL-NEXT: vmovq %rax, %xmm0 -; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512VLDQ-LABEL: fptoui_2f64_to_4i32: ; AVX512VLDQ: # BB#0: -; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0 -; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0 ; AVX512VLDQ-NEXT: retq %cvt = fptoui <2 x double> %a to <2 x i32> %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> @@ -491,30 +477,19 @@ ; ; AVX512F-LABEL: fptoui_2f64_to_2i32: ; AVX512F: # BB#0: -; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax -; AVX512F-NEXT: vmovq %rax, %xmm1 -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax -; AVX512F-NEXT: vmovq %rax, %xmm0 -; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512F-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0 +; AVX512F-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: fptoui_2f64_to_2i32: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax -; AVX512VL-NEXT: vmovq %rax, %xmm1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax -; AVX512VL-NEXT: vmovq %rax, %xmm0 -; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i32: ; AVX512VLDQ: # BB#0: -; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0 -; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0 ; AVX512VLDQ-NEXT: retq %cvt = fptoui <2 x double> %a to <2 x i32> %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> @@ -1250,15 +1225,24 @@ ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] ; AVX-NEXT: retq ; -; AVX512-LABEL: fptoui_2f32_to_2i32: -; AVX512: # BB#0: -; AVX512-NEXT: vcvttss2usi %xmm0, %rax -; AVX512-NEXT: vmovq %rax, %xmm1 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; AVX512-NEXT: vcvttss2usi %xmm0, %rax -; AVX512-NEXT: vmovq %rax, %xmm0 -; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512-NEXT: retq +; AVX512F-LABEL: fptoui_2f32_to_2i32: +; AVX512F: # BB#0: +; AVX512F-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0 +; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: fptoui_2f32_to_2i32: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0 +; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX512VL-NEXT: retq +; +; AVX512VLDQ-LABEL: fptoui_2f32_to_2i32: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0 +; AVX512VLDQ-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX512VLDQ-NEXT: retq %cvt = fptoui <2 x float> %a to <2 x i32> ret <2 x i32> %cvt }