Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -6472,9 +6472,20 @@ if (LoadMask[i]) { SDValue Elt = peekThroughBitcasts(Elts[i]); LoadSDNode *LD = cast(Elt); - if (!DAG.areNonVolatileConsecutiveLoads( - LD, LDBase, Elt.getValueType().getStoreSizeInBits() / 8, - i - FirstLoadedElt)) { + // See if this load is consecutive to any previously confirmed + // consecutive load. We can't just test against LDBase as we have cases + // where loads have been further split and offset from each other. + bool IsConsecutive = false; + for (int j = FirstLoadedElt; j < i; j = LoadMask.find_next(j)) { + LoadSDNode *LocalBase = cast(peekThroughBitcasts(Elts[j])); + if (DAG.areNonVolatileConsecutiveLoads( + LD, LocalBase, Elt.getValueType().getStoreSizeInBits() / 8, + i - j)) { + IsConsecutive = true; + break; + } + } + if (!IsConsecutive) { IsConsecutiveLoad = false; IsConsecutiveLoadWithZeros = false; break; Index: test/CodeGen/X86/build-vector-128.ll =================================================================== --- test/CodeGen/X86/build-vector-128.ll +++ test/CodeGen/X86/build-vector-128.ll @@ -72,12 +72,10 @@ } define <2 x i64> @test_buildvector_v2i64(i64 %a0, i64 %a1) { -; SSE2-32-LABEL: test_buildvector_v2i64: -; SSE2-32: # BB#0: -; SSE2-32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; SSE2-32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE2-32-NEXT: retl +; SSE-32-LABEL: test_buildvector_v2i64: +; SSE-32: # BB#0: +; SSE-32-NEXT: movups {{[0-9]+}}(%esp), %xmm0 +; SSE-32-NEXT: retl ; ; SSE-64-LABEL: test_buildvector_v2i64: ; SSE-64: # BB#0: @@ -86,20 +84,9 @@ ; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-64-NEXT: retq ; -; SSE41-32-LABEL: test_buildvector_v2i64: -; SSE41-32: # BB#0: -; SSE41-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE41-32-NEXT: pinsrd $1, {{[0-9]+}}(%esp), %xmm0 -; SSE41-32-NEXT: pinsrd $2, {{[0-9]+}}(%esp), %xmm0 -; SSE41-32-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm0 -; SSE41-32-NEXT: retl -; ; AVX-32-LABEL: test_buildvector_v2i64: ; AVX-32: # BB#0: -; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 ; AVX-32-NEXT: retl ; ; AVX-64-LABEL: test_buildvector_v2i64: Index: test/CodeGen/X86/build-vector-256.ll =================================================================== --- test/CodeGen/X86/build-vector-256.ll +++ test/CodeGen/X86/build-vector-256.ll @@ -51,18 +51,10 @@ } define <4 x i64> @test_buildvector_v4i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { -; AVX1-32-LABEL: test_buildvector_v4i64: -; AVX1-32: # BB#0: -; AVX1-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX1-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX1-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX1-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX1-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX1-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX1-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX1-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-32-NEXT: retl +; AVX-32-LABEL: test_buildvector_v4i64: +; AVX-32: # BB#0: +; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %ymm0 +; AVX-32-NEXT: retl ; ; AVX1-64-LABEL: test_buildvector_v4i64: ; AVX1-64: # BB#0: @@ -75,19 +67,6 @@ ; AVX1-64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-64-NEXT: retq ; -; AVX2-32-LABEL: test_buildvector_v4i64: -; AVX2-32: # BB#0: -; AVX2-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX2-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX2-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX2-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX2-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX2-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX2-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX2-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX2-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-32-NEXT: retl -; ; AVX2-64-LABEL: test_buildvector_v4i64: ; AVX2-64: # BB#0: ; AVX2-64-NEXT: vmovq %rcx, %xmm0 Index: test/CodeGen/X86/build-vector-512.ll =================================================================== --- test/CodeGen/X86/build-vector-512.ll +++ test/CodeGen/X86/build-vector-512.ll @@ -79,25 +79,7 @@ define <8 x i64> @test_buildvector_v8i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7) { ; AVX-32-LABEL: test_buildvector_v8i64: ; AVX-32: # BB#0: -; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 -; AVX-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm2, %xmm2 -; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2 -; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm2, %xmm2 -; AVX-32-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; AVX-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %zmm0 ; AVX-32-NEXT: retl ; ; AVX-64-LABEL: test_buildvector_v8i64: