Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16128,6 +16128,9 @@ if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG)) return NarrowBOp; + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1479,6 +1479,25 @@ KnownZero.insertBits(SubZero, SubIdx); break; } + case ISD::EXTRACT_SUBVECTOR: { + if (!isa(Op.getOperand(1))) + break; + SDValue Src = Op.getOperand(0); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt Idx = cast(Op.getOperand(1))->getAPIntValue(); + if (Idx.uge(NumSrcElts - NumElts)) + break; + // Offset the demanded elts by the subvector index. + uint64_t SubIdx = Idx.getZExtValue(); + APInt SrcElts = DemandedElts.zext(NumSrcElts).shl(SubIdx); + APInt SrcUndef, SrcZero; + if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, + Depth + 1)) + return true; + KnownUndef = SrcUndef.extractBits(NumElts, SubIdx); + KnownZero = SrcZero.extractBits(NumElts, SubIdx); + break; + } case ISD::INSERT_VECTOR_ELT: { SDValue Vec = Op.getOperand(0); SDValue Scl = Op.getOperand(1); Index: test/CodeGen/X86/avx-vperm2x128.ll =================================================================== --- test/CodeGen/X86/avx-vperm2x128.ll +++ test/CodeGen/X86/avx-vperm2x128.ll @@ -505,7 +505,7 @@ define <4 x i64> @shuffle_v4i64_67zz(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4i64_67zz: ; AVX1: # %bb.0: -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX1-NEXT: retq Index: test/CodeGen/X86/avx512-hadd-hsub.ll =================================================================== --- test/CodeGen/X86/avx512-hadd-hsub.ll +++ test/CodeGen/X86/avx512-hadd-hsub.ll @@ -225,16 +225,16 @@ define <8 x i32> @hadd_16_3_sv(<16 x i32> %x225, <16 x i32> %x227) { ; KNL-LABEL: hadd_16_3_sv: ; KNL: # %bb.0: -; KNL-NEXT: vshufps {{.*#+}} zmm2 = zmm0[0,2],zmm1[0,2],zmm0[4,6],zmm1[4,6],zmm0[8,10],zmm1[8,10],zmm0[12,14],zmm1[12,14] -; KNL-NEXT: vshufps {{.*#+}} zmm0 = zmm0[1,3],zmm1[1,3],zmm0[5,7],zmm1[5,7],zmm0[9,11],zmm1[9,11],zmm0[13,15],zmm1[13,15] +; KNL-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] +; KNL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] ; KNL-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: hadd_16_3_sv: ; SKX: # %bb.0: -; SKX-NEXT: vshufps {{.*#+}} zmm2 = zmm0[0,2],zmm1[0,2],zmm0[4,6],zmm1[4,6],zmm0[8,10],zmm1[8,10],zmm0[12,14],zmm1[12,14] -; SKX-NEXT: vshufps {{.*#+}} zmm0 = zmm0[1,3],zmm1[1,3],zmm0[5,7],zmm1[5,7],zmm0[9,11],zmm1[9,11],zmm0[13,15],zmm1[13,15] +; SKX-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] +; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] ; SKX-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ; SKX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; SKX-NEXT: retq Index: test/CodeGen/X86/oddshuffles.ll =================================================================== --- test/CodeGen/X86/oddshuffles.ll +++ test/CodeGen/X86/oddshuffles.ll @@ -22,22 +22,12 @@ ; SSE42-NEXT: movdqa %xmm0, (%rdi) ; SSE42-NEXT: retq ; -; AVX1-LABEL: v3i64: -; AVX1: # %bb.0: -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm0[0],xmm1[0] -; AVX1-NEXT: vpextrq $1, %xmm0, 16(%rdi) -; AVX1-NEXT: vmovdqa %xmm1, (%rdi) -; AVX1-NEXT: retq -; -; AVX2-LABEL: v3i64: -; AVX2: # %bb.0: -; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] -; AVX2-NEXT: vpextrq $1, %xmm0, 16(%rdi) -; AVX2-NEXT: vmovdqa %xmm1, (%rdi) -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX-LABEL: v3i64: +; AVX: # %bb.0: +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm0[0],xmm1[0] +; AVX-NEXT: vpextrq $1, %xmm0, 16(%rdi) +; AVX-NEXT: vmovdqa %xmm1, (%rdi) +; AVX-NEXT: retq ; ; XOP-LABEL: v3i64: ; XOP: # %bb.0: @@ -57,22 +47,12 @@ ; SSE-NEXT: movapd %xmm0, (%rdi) ; SSE-NEXT: retq ; -; AVX1-LABEL: v3f64: -; AVX1: # %bb.0: -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] -; AVX1-NEXT: vmovhpd %xmm0, 16(%rdi) -; AVX1-NEXT: vmovapd %xmm1, (%rdi) -; AVX1-NEXT: retq -; -; AVX2-LABEL: v3f64: -; AVX2: # %bb.0: -; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 -; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,1,3] -; AVX2-NEXT: vmovhpd %xmm0, 16(%rdi) -; AVX2-NEXT: vmovapd %xmm1, (%rdi) -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX-LABEL: v3f64: +; AVX: # %bb.0: +; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovhpd %xmm0, 16(%rdi) +; AVX-NEXT: vmovapd %xmm1, (%rdi) +; AVX-NEXT: retq ; ; XOP-LABEL: v3f64: ; XOP: # %bb.0: @@ -218,29 +198,27 @@ ; ; AVX1-LABEL: v5i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2] -; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3] -; AVX1-NEXT: vextractps $3, %xmm0, 16(%rdi) -; AVX1-NEXT: vmovaps %xmm1, (%rdi) +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,2] +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] +; AVX1-NEXT: vpextrd $3, %xmm0, 16(%rdi) +; AVX1-NEXT: vmovdqa %xmm1, (%rdi) ; AVX1-NEXT: retq ; ; AVX2-LABEL: v5i32: ; AVX2: # %bb.0: -; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 -; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,5,1,6,3,u,u,u> -; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vextractps $3, %xmm0, 16(%rdi) -; AVX2-NEXT: vmovaps %xmm1, (%rdi) -; AVX2-NEXT: vzeroupper +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,2] +; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero +; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] +; AVX2-NEXT: vpextrd $3, %xmm0, 16(%rdi) +; AVX2-NEXT: vmovdqa %xmm1, (%rdi) ; AVX2-NEXT: retq ; ; XOP-LABEL: v5i32: ; XOP: # %bb.0: -; XOP-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2] -; XOP-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3] -; XOP-NEXT: vextractps $3, %xmm0, 16(%rdi) -; XOP-NEXT: vmovaps %xmm1, (%rdi) +; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7],xmm0[4,5,6,7],xmm1[8,9,10,11] +; XOP-NEXT: vpextrd $3, %xmm0, 16(%rdi) +; XOP-NEXT: vmovdqa %xmm1, (%rdi) ; XOP-NEXT: retq %r = shufflevector <4 x i32> %a, <4 x i32> %b, <5 x i32> store <5 x i32> %r, <5 x i32>* %p @@ -266,24 +244,13 @@ ; SSE42-NEXT: movaps %xmm0, (%rdi) ; SSE42-NEXT: retq ; -; AVX1-LABEL: v5f32: -; AVX1: # %bb.0: -; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2] -; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3] -; AVX1-NEXT: vextractps $3, %xmm0, 16(%rdi) -; AVX1-NEXT: vmovaps %xmm1, (%rdi) -; AVX1-NEXT: retq -; -; AVX2-LABEL: v5f32: -; AVX2: # %bb.0: -; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 -; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <0,5,1,6,3,u,u,u> -; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: vextractps $3, %xmm0, 16(%rdi) -; AVX2-NEXT: vmovaps %xmm1, (%rdi) -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX-LABEL: v5f32: +; AVX: # %bb.0: +; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2] +; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3] +; AVX-NEXT: vextractps $3, %xmm0, 16(%rdi) +; AVX-NEXT: vmovaps %xmm1, (%rdi) +; AVX-NEXT: retq ; ; XOP-LABEL: v5f32: ; XOP: # %bb.0: