Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -6148,8 +6148,8 @@ } // Convert Double to Signed/Unsigned Doubleword with truncation -multiclass avx512_cvttpd2dq opc, string OpcodeStr, - SDNode OpNode, SDNode OpNodeRnd> { +multiclass avx512_cvttpd2dq opc, string OpcodeStr, SDNode OpNode, + SDNode OpNode128, SDNode OpNodeRnd> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V128; + defm Z128 : avx512_vcvt_fp, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; } @@ -6302,7 +6302,7 @@ X86cvttp2siRnd>, XS, EVEX_CD8<32, CD8VF>; -defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, +defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttpd2dq, X86cvttp2siRnd>, PD, VEX_W, EVEX_CD8<64, CD8VF>; @@ -6310,7 +6310,7 @@ X86cvttp2uiRnd>, PS, EVEX_CD8<32, CD8VF>; -defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, +defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, fp_to_uint, X86cvttp2uiRnd>, PS, VEX_W, EVEX_CD8<64, CD8VF>; @@ -6411,10 +6411,6 @@ def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvttpd2dq (v2f64 VR128X:$src)))))))), (VCVTTPD2DQZ128rr VR128:$src)>; - def : Pat<(v4i32 (X86cvttpd2dq (v2f64 VR128X:$src))), - (VCVTTPD2DQZ128rr VR128X:$src)>; - def : Pat<(v4i32 (X86cvttpd2dq (loadv2f64 addr:$src))), - (VCVTTPD2DQZ128rm addr:$src)>; } let Predicates = [HasAVX512] in { Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -2065,11 +2065,12 @@ (CVTTPS2DQrm addr:$src)>; } +let Predicates = [HasAVX, NoVLX] in def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvttpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>; + (v4i32 (X86cvttpd2dq (v2f64 VR128:$src))))], + IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>; // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. @@ -2078,10 +2079,11 @@ // XMM only def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>; +let Predicates = [HasAVX, NoVLX] in def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttpd2dqx\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (loadv2f64 addr:$src)))], + [(set VR128:$dst, + (v4i32 (X86cvttpd2dq (loadv2f64 addr:$src))))], IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>; // YMM only @@ -2102,10 +2104,6 @@ def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))))), (VCVTTPD2DQrr VR128:$src)>; - def : Pat<(v4i32 (X86cvttpd2dq (v2f64 VR128:$src))), - (VCVTTPD2DQrr VR128:$src)>; - def : Pat<(v4i32 (X86cvttpd2dq (loadv2f64 addr:$src))), - (VCVTTPD2DQXrm addr:$src)>; def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), (VCVTTPD2DQYrr VR256:$src)>; @@ -2115,23 +2113,19 @@ def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], + [(set VR128:$dst, + (v4i32 (X86cvttpd2dq (v2f64 VR128:$src))))], IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, - Sched<[WriteCvtF2ILd]>; + [(set VR128:$dst, + (v4i32 (X86cvttpd2dq (memopv2f64 addr:$src))))], + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>; let Predicates = [UseSSE2] in { def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvttpd2dq (v2f64 VR128:$src)))))))), (CVTTPD2DQrr VR128:$src)>; - def : Pat<(v4i32 (X86cvttpd2dq (v2f64 VR128:$src))), - (CVTTPD2DQrr VR128:$src)>; - def : Pat<(v4i32 (X86cvttpd2dq (memopv2f64 addr:$src))), - (CVTTPD2DQrm addr:$src)>; } // Predicates = [UseSSE2] // Convert packed single to packed double Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -574,7 +574,7 @@ X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VFPEXTS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK, - ISD::FP_TO_SINT, 0), + X86ISD::CVTTPD2DQ, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_256, INTR_TYPE_1OP_MASK, ISD::FP_TO_SINT, 0), X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK, @@ -1754,6 +1754,7 @@ X86_INTRINSIC_DATA(sse2_comineq_sd, COMI, X86ISD::COMI, ISD::SETNE), X86_INTRINSIC_DATA(sse2_cvtdq2ps, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0), X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0), + X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTPD2DQ, 0), X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0), X86_INTRINSIC_DATA(sse2_movmsk_pd, INTR_TYPE_1OP, X86ISD::MOVMSK, 0), Index: test/CodeGen/X86/avx-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/avx-intrinsics-x86.ll +++ test/CodeGen/X86/avx-intrinsics-x86.ll @@ -338,10 +338,15 @@ define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { -; CHECK-LABEL: test_x86_sse2_cvttpd2dq: -; CHECK: ## BB#0: -; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0] -; CHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_sse2_cvttpd2dq: +; AVX: ## BB#0: +; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0] +; AVX-NEXT: retl ## encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_sse2_cvttpd2dq: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0] +; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } Index: test/CodeGen/X86/sse2-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/sse2-intrinsics-x86.ll +++ test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -502,10 +502,15 @@ ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xe6,0xc0] ; SSE-NEXT: retl ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_sse2_cvttpd2dq: -; VCHECK: ## BB#0: -; VCHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX2-LABEL: test_x86_sse2_cvttpd2dq: +; AVX2: ## BB#0: +; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse2_cvttpd2dq: +; SKX: ## BB#0: +; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0] +; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -529,7 +534,7 @@ ; ; SKX-LABEL: test_mm_cvttpd_epi32_zext: ; SKX: ## BB#0: -; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe6,0xc0] +; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0] ; SKX-NEXT: vmovq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x7e,0xc0] ; SKX-NEXT: ## xmm0 = xmm0[0],zero ; SKX-NEXT: retl ## encoding: [0xc3]