Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15154,6 +15154,29 @@ // Skip bitcasting V = peekThroughBitcast(V); + // If the input is a build vector. Try to make a smaller build vector. + if (V->getOpcode() == ISD::BUILD_VECTOR) { + if (auto *Idx = dyn_cast(N->getOperand(1))) { + EVT InVT = V->getValueType(0); + unsigned NumElems = NVT.getSizeInBits() / InVT.getScalarSizeInBits(); + if (NumElems > 0) { + EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), NumElems); + if (!LegalOperations || + TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) { + unsigned IdxVal = Idx->getZExtValue() * NVT.getScalarSizeInBits() / + InVT.getScalarSizeInBits(); + + // Extract the pieces from the original build_vector. + SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), + makeArrayRef(V->op_begin() + IdxVal, + NumElems)); + return DAG.getBitcast(NVT, BuildVec); + } + } + } + } + if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { // Handle only simple case where vector being inserted and vector // being extracted are of same size. Index: test/CodeGen/X86/2012-1-10-buildvector.ll =================================================================== --- test/CodeGen/X86/2012-1-10-buildvector.ll +++ test/CodeGen/X86/2012-1-10-buildvector.ll @@ -7,7 +7,6 @@ ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vmovaps %xmm0, (%eax) ; CHECK-NEXT: movl $0, (%eax) -; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retl %vext.i = shufflevector <2 x i64> undef, <2 x i64> undef, <3 x i32> %vecinit8.i = shufflevector <3 x i64> zeroinitializer, <3 x i64> %vext.i, <3 x i32> Index: test/CodeGen/X86/fold-vector-sext-zext.ll =================================================================== --- test/CodeGen/X86/fold-vector-sext-zext.ll +++ test/CodeGen/X86/fold-vector-sext-zext.ll @@ -83,8 +83,7 @@ define <4 x i64> @test_sext_4i8_4i64() { ; X32-LABEL: test_sext_4i8_4i64: ; X32: # BB#0: -; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,4294967295,4294967295] -; X32-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm0, %ymm0 +; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,4294967295,4294967295,2,0,4294967293,4294967295] ; X32-NEXT: retl ; ; X64-LABEL: test_sext_4i8_4i64: @@ -102,8 +101,7 @@ define <4 x i64> @test_sext_4i8_4i64_undef() { ; X32-LABEL: test_sext_4i8_4i64_undef: ; X32: # BB#0: -; X32-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; X32-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm0, %ymm0 +; X32-NEXT: vmovaps {{.*#+}} ymm0 = ; X32-NEXT: retl ; ; X64-LABEL: test_sext_4i8_4i64_undef: @@ -245,8 +243,7 @@ define <4 x i64> @test_zext_4i8_4i64() { ; X32-LABEL: test_zext_4i8_4i64: ; X32: # BB#0: -; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,255,0] -; X32-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm0, %ymm0 +; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,255,0,2,0,253,0] ; X32-NEXT: retl ; ; X64-LABEL: test_zext_4i8_4i64: @@ -300,10 +297,7 @@ define <4 x i64> @test_zext_4i8_4i64_undef() { ; X32-LABEL: test_zext_4i8_4i64_undef: ; X32: # BB#0: -; X32-NEXT: vmovaps {{.*#+}} xmm0 = -; X32-NEXT: movl $2, %eax -; X32-NEXT: vmovd %eax, %xmm1 -; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X32-NEXT: vmovaps {{.*#+}} ymm0 = ; X32-NEXT: retl ; ; X64-LABEL: test_zext_4i8_4i64_undef: Index: test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- test/CodeGen/X86/masked_gather_scatter.ll +++ test/CodeGen/X86/masked_gather_scatter.ll @@ -722,10 +722,8 @@ ; KNL_64-NEXT: vpsllq $2, %zmm1, %zmm1 ; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; KNL_64-NEXT: kxnorw %k0, %k0, %k1 -; KNL_64-NEXT: kshiftrw $8, %k1, %k2 -; KNL_64-NEXT: vgatherqps (,%zmm0), %ymm1 {%k2} -; KNL_64-NEXT: vgatherqps (,%zmm0), %ymm2 {%k1} -; KNL_64-NEXT: vinsertf64x4 $1, %ymm1, %zmm2, %zmm0 +; KNL_64-NEXT: vgatherqps (,%zmm0), %ymm1 {%k1} +; KNL_64-NEXT: vinsertf64x4 $1, %ymm1, %zmm1, %zmm0 ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test14: @@ -747,10 +745,8 @@ ; SKX-NEXT: vpsllq $2, %zmm1, %zmm1 ; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; SKX-NEXT: kxnorw %k0, %k0, %k1 -; SKX-NEXT: kshiftrw $8, %k1, %k2 -; SKX-NEXT: vgatherqps (,%zmm0), %ymm1 {%k2} -; SKX-NEXT: vgatherqps (,%zmm0), %ymm2 {%k1} -; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm2, %zmm0 +; SKX-NEXT: vgatherqps (,%zmm0), %ymm1 {%k1} +; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm1, %zmm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test14: @@ -1624,7 +1620,6 @@ ; KNL_64-NEXT: kxnorw %k0, %k0, %k1 ; KNL_64-NEXT: kxnorw %k0, %k0, %k2 ; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2} -; KNL_64-NEXT: kshiftrw $8, %k1, %k1 ; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1} ; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0 ; KNL_64-NEXT: vmovdqa64 %zmm3, %zmm1 @@ -1642,7 +1637,6 @@ ; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: kxnorw %k0, %k0, %k2 ; SKX-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2} -; SKX-NEXT: kshiftrw $8, %k1, %k1 ; SKX-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1} ; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 ; SKX-NEXT: vmovdqa64 %zmm3, %zmm1 Index: test/CodeGen/X86/pr34139.ll =================================================================== --- test/CodeGen/X86/pr34139.ll +++ test/CodeGen/X86/pr34139.ll @@ -6,14 +6,6 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vmovdqa %xmm0, (%rax) -; CHECK-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovapd (%rdi), %zmm1 -; CHECK-NEXT: vmovapd 64(%rdi), %zmm2 -; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k1 -; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovapd %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vmovapd %zmm2, 64(%rdi) -; CHECK-NEXT: vmovapd %zmm1, (%rdi) store <16 x i8> , <16 x i8>* undef %load_mask8.i.i.i = load <16 x i8>, <16 x i8>* undef %v.i.i.i.i = load <16 x double>, <16 x double>* %ptr Index: test/CodeGen/X86/widen_extract-1.ll =================================================================== --- test/CodeGen/X86/widen_extract-1.ll +++ test/CodeGen/X86/widen_extract-1.ll @@ -7,8 +7,8 @@ define void @convert(<2 x double>* %dst.addr, <3 x double> %src) { ; X32-LABEL: convert: ; X32: # BB#0: # %entry -; X32-NEXT: movups {{[0-9]+}}(%esp), %xmm0 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movups {{[0-9]+}}(%esp), %xmm0 ; X32-NEXT: movaps %xmm0, (%eax) ; X32-NEXT: retl ;