Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -2282,6 +2282,9 @@ let Predicates = [HasAVX, NoVLX] in { // Match fpround and fpextend for 128/256-bit conversions + def : Pat<(v4f32 (bitconvert (X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (v2f64 VR128:$src)))))))), + (VCVTPD2PSrr VR128:$src)>; def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), (VCVTPD2PSrr VR128:$src)>; def : Pat<(v4f32 (X86vfpround (loadv2f64 addr:$src))), @@ -2301,6 +2304,9 @@ let Predicates = [UseSSE2] in { // Match fpround and fpextend for 128 conversions + def : Pat<(v4f32 (bitconvert (X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (v2f64 VR128:$src)))))))), + (CVTPD2PSrr VR128:$src)>; def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), (CVTPD2PSrr VR128:$src)>; def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))), Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -1884,6 +1884,7 @@ X86_INTRINSIC_DATA(sse2_comile_sd, COMI, X86ISD::COMI, ISD::SETLE), X86_INTRINSIC_DATA(sse2_comilt_sd, COMI, X86ISD::COMI, ISD::SETLT), X86_INTRINSIC_DATA(sse2_comineq_sd, COMI, X86ISD::COMI, ISD::SETNE), + X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0), X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0), X86_INTRINSIC_DATA(sse2_movmsk_pd, INTR_TYPE_1OP, X86ISD::MOVMSK, 0), Index: test/CodeGen/X86/sse2-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/sse2-intrinsics-x86.ll +++ test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -229,13 +229,11 @@ ; SSE-LABEL: test_x86_sse2_cvtpd2ps_zext: ; SSE: ## BB#0: ; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 -; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse2_cvtpd2ps_zext: ; KNL: ## BB#0: ; KNL-NEXT: vcvtpd2ps %xmm0, %xmm0 -; KNL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; KNL-NEXT: retl %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> Index: test/CodeGen/X86/vec_fptrunc.ll =================================================================== --- test/CodeGen/X86/vec_fptrunc.ll +++ test/CodeGen/X86/vec_fptrunc.ll @@ -138,26 +138,22 @@ ; X32-SSE: # BB#0: ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE-NEXT: cvtpd2ps (%eax), %xmm0 -; X32-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; X32-SSE-NEXT: retl ; ; X32-AVX-LABEL: fptrunc_frommem2_zext: ; X32-AVX: # BB#0: ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX-NEXT: vcvtpd2psx (%eax), %xmm0 -; X32-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; X32-AVX-NEXT: retl ; ; X64-SSE-LABEL: fptrunc_frommem2_zext: ; X64-SSE: # BB#0: ; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 -; X64-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: fptrunc_frommem2_zext: ; X64-AVX: # BB#0: ; X64-AVX-NEXT: vcvtpd2psx (%rdi), %xmm0 -; X64-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; X64-AVX-NEXT: retq %arg = load <2 x double>, <2 x double> * %ld, align 16 %cvt = fptrunc <2 x double> %arg to <2 x float> @@ -169,25 +165,21 @@ ; X32-SSE-LABEL: fptrunc_fromreg2_zext: ; X32-SSE: # BB#0: ; X32-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 -; X32-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; X32-SSE-NEXT: retl ; ; X32-AVX-LABEL: fptrunc_fromreg2_zext: ; X32-AVX: # BB#0: ; X32-AVX-NEXT: vcvtpd2ps %xmm0, %xmm0 -; X32-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; X32-AVX-NEXT: retl ; ; X64-SSE-LABEL: fptrunc_fromreg2_zext: ; X64-SSE: # BB#0: ; X64-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 -; X64-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: fptrunc_fromreg2_zext: ; X64-AVX: # BB#0: ; X64-AVX-NEXT: vcvtpd2ps %xmm0, %xmm0 -; X64-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; X64-AVX-NEXT: retq %cvt = fptrunc <2 x double> %arg to <2 x float> %ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32>