Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -723,8 +723,6 @@ (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; - def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; } let AddedComplexity = 20 in { @@ -5337,6 +5335,12 @@ } } // isCodeGenOnly, SchedRW +let Predicates = [UseSSE2] in { + let Predicates = [NoSSE41], AddedComplexity = 15 in + def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), + (MOVZPQILo2PQIrr (PUNPCKLDQrr $src, (v4i32 (V_SET0))))>; +} + let AddedComplexity = 20 in { let Predicates = [UseAVX] in { def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), Index: test/CodeGen/X86/uint_to_fp-2.ll =================================================================== --- test/CodeGen/X86/uint_to_fp-2.ll +++ test/CodeGen/X86/uint_to_fp-2.ll @@ -25,13 +25,13 @@ ; CHECK-LABEL: test2: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: pushl %eax -; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: movss %xmm0, %xmm1 -; CHECK-NEXT: movsd .LCPI1_0, %xmm0 -; CHECK-NEXT: orps %xmm0, %xmm1 -; CHECK-NEXT: subsd %xmm0, %xmm1 -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: cvtsd2ss %xmm1, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: movq %xmm0, %xmm0 +; CHECK-NEXT: movsd .LCPI1_0, %xmm1 +; CHECK-NEXT: orps %xmm1, %xmm0 +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 ; CHECK-NEXT: movss %xmm0, (%esp) ; CHECK-NEXT: flds (%esp) ; CHECK-NEXT: popl %eax Index: test/CodeGen/X86/vector-shuffle-128-v4.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v4.ll +++ test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -660,23 +660,23 @@ define <4 x i32> @shuffle_v4i32_4zzz(<4 x i32> %a) { ; SSE2-LABEL: shuffle_v4i32_4zzz: ; SSE2: # BB#0: -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: movss %xmm0, %xmm1 -; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movq %xmm0, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v4i32_4zzz: ; SSE3: # BB#0: -; SSE3-NEXT: xorps %xmm1, %xmm1 -; SSE3-NEXT: movss %xmm0, %xmm1 -; SSE3-NEXT: movaps %xmm1, %xmm0 +; SSE3-NEXT: pxor %xmm1, %xmm1 +; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE3-NEXT: movq %xmm0, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v4i32_4zzz: ; SSSE3: # BB#0: -; SSSE3-NEXT: xorps %xmm1, %xmm1 -; SSSE3-NEXT: movss %xmm0, %xmm1 -; SSSE3-NEXT: movaps %xmm1, %xmm0 +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: movq %xmm0, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v4i32_4zzz: @@ -697,24 +697,28 @@ define <4 x i32> @shuffle_v4i32_z4zz(<4 x i32> %a) { ; SSE2-LABEL: shuffle_v4i32_z4zz: ; SSE2: # BB#0: -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: movss %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1] +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movq %xmm0, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v4i32_z4zz: ; SSE3: # BB#0: -; SSE3-NEXT: xorps %xmm1, %xmm1 -; SSE3-NEXT: movss %xmm0, %xmm1 -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1] +; SSE3-NEXT: pxor %xmm1, %xmm1 +; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE3-NEXT: movq %xmm0, %xmm0 +; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v4i32_z4zz: ; SSSE3: # BB#0: -; SSSE3-NEXT: xorps %xmm1, %xmm1 -; SSSE3-NEXT: movss %xmm0, %xmm1 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1] -; SSSE3-NEXT: retq +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: movq %xmm0, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] +; SSSE3-NEXT: retq + ; ; SSE41-LABEL: shuffle_v4i32_z4zz: ; SSE41: # BB#0: @@ -736,24 +740,27 @@ define <4 x i32> @shuffle_v4i32_zz4z(<4 x i32> %a) { ; SSE2-LABEL: shuffle_v4i32_zz4z: ; SSE2: # BB#0: -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: movss %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1] +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movq %xmm0, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,0,1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v4i32_zz4z: ; SSE3: # BB#0: -; SSE3-NEXT: xorps %xmm1, %xmm1 -; SSE3-NEXT: movss %xmm0, %xmm1 -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1] +; SSE3-NEXT: pxor %xmm1, %xmm1 +; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE3-NEXT: movq %xmm0, %xmm0 +; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,0,1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v4i32_zz4z: ; SSSE3: # BB#0: -; SSSE3-NEXT: xorps %xmm1, %xmm1 -; SSSE3-NEXT: movss %xmm0, %xmm1 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1] -; SSSE3-NEXT: retq +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: movq %xmm0, %xmm0 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,0,1] +; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v4i32_zz4z: ; SSE41: # BB#0: