Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1532,12 +1532,20 @@ break; } case ISD::VSELECT: { - APInt DemandedLHS(DemandedElts); - APInt DemandedRHS(DemandedElts); - - // TODO - add support for constant vselect masks. + // Try to transform the select condition based on the current demanded + // elements. + // TODO: If a condition element is undef, we can choose from one arm of the + // select (and if one arm is undef, then we can propagate that to the + // result). + // TODO - add support for constant vselect masks (see IR version of this). + APInt UnusedUndef, UnusedZero; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef, + UnusedZero, TLO, Depth + 1)) + return true; // See if we can simplify either vselect operand. + APInt DemandedLHS(DemandedElts); + APInt DemandedRHS(DemandedElts); APInt UndefLHS, ZeroLHS; APInt UndefRHS, ZeroRHS; if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS, Index: test/CodeGen/X86/horizontal-reduce-smax.ll =================================================================== --- test/CodeGen/X86/horizontal-reduce-smax.ll +++ test/CodeGen/X86/horizontal-reduce-smax.ll @@ -469,9 +469,6 @@ ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 -; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: vmovd %xmm0, %eax ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx @@ -548,9 +545,6 @@ ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX1-NEXT: vmovq %xmm0, %rax ; X64-AVX1-NEXT: vzeroupper @@ -1159,9 +1153,6 @@ ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 -; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: vmovd %xmm0, %eax ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx @@ -1283,9 +1274,6 @@ ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX1-NEXT: vmovq %xmm0, %rax ; X64-AVX1-NEXT: vzeroupper Index: test/CodeGen/X86/horizontal-reduce-smin.ll =================================================================== --- test/CodeGen/X86/horizontal-reduce-smin.ll +++ test/CodeGen/X86/horizontal-reduce-smin.ll @@ -472,9 +472,6 @@ ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 -; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: vmovd %xmm0, %eax ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx @@ -552,9 +549,6 @@ ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX1-NEXT: vmovq %xmm0, %rax ; X64-AVX1-NEXT: vzeroupper @@ -1163,9 +1157,6 @@ ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 -; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: vmovd %xmm0, %eax ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx @@ -1287,9 +1278,6 @@ ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX1-NEXT: vmovq %xmm0, %rax ; X64-AVX1-NEXT: vzeroupper Index: test/CodeGen/X86/horizontal-reduce-umax.ll =================================================================== --- test/CodeGen/X86/horizontal-reduce-umax.ll +++ test/CodeGen/X86/horizontal-reduce-umax.ll @@ -535,12 +535,8 @@ ; X86-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 -; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4 -; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 -; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; X86-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2 -; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2 -; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 +; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2 +; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: vmovd %xmm0, %eax ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx @@ -631,12 +627,8 @@ ; X64-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 -; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4 -; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 -; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; X64-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2 -; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 +; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2 +; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX1-NEXT: vmovq %xmm0, %rax ; X64-AVX1-NEXT: vzeroupper @@ -1270,12 +1262,8 @@ ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X86-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2 -; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4 -; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm2 -; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; X86-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3 -; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 -; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3 +; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: vmovd %xmm0, %eax ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx @@ -1422,12 +1410,8 @@ ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2 -; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4 -; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm2 -; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; X64-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3 -; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3 +; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX1-NEXT: vmovq %xmm0, %rax ; X64-AVX1-NEXT: vzeroupper Index: test/CodeGen/X86/horizontal-reduce-umin.ll =================================================================== --- test/CodeGen/X86/horizontal-reduce-umin.ll +++ test/CodeGen/X86/horizontal-reduce-umin.ll @@ -473,12 +473,8 @@ ; X86-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 -; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4 -; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 -; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; X86-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2 -; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm2 -; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 +; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2 +; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: vmovd %xmm0, %eax ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx @@ -571,12 +567,8 @@ ; X64-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 -; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4 -; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 -; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; X64-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2 -; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm2 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 +; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2 +; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX1-NEXT: vmovq %xmm0, %rax ; X64-AVX1-NEXT: vzeroupper @@ -1172,12 +1164,8 @@ ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X86-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2 -; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4 -; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 -; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; X86-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3 -; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 -; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3 +; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: vmovd %xmm0, %eax ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx @@ -1326,12 +1314,8 @@ ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2 -; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4 -; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 -; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; X64-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3 -; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3 +; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; X64-AVX1-NEXT: vmovq %xmm0, %rax ; X64-AVX1-NEXT: vzeroupper Index: test/CodeGen/X86/pr34592.ll =================================================================== --- test/CodeGen/X86/pr34592.ll +++ test/CodeGen/X86/pr34592.ll @@ -19,31 +19,30 @@ ; CHECK-NEXT: vmovaps 80(%rbp), %ymm13 ; CHECK-NEXT: vmovaps 48(%rbp), %ymm14 ; CHECK-NEXT: vmovaps 16(%rbp), %ymm15 -; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7] +; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7] ; CHECK-NEXT: vxorps %xmm6, %xmm6, %xmm6 -; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1],ymm8[2,3,4,5,6,7] -; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1],ymm11[2,3,4,5,6,7] +; CHECK-NEXT: vpblendd {{.*#+}} ymm11 = ymm6[0,1,2,3],ymm11[4,5],ymm6[6,7] ; CHECK-NEXT: # kill: def $xmm9 killed $xmm9 killed $ymm9 -; CHECK-NEXT: vmovdqa %xmm9, %xmm11 -; CHECK-NEXT: # kill: def $ymm11 killed $xmm11 -; CHECK-NEXT: vpalignr {{.*#+}} ymm6 = ymm2[8,9,10,11,12,13,14,15],ymm6[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm6[16,17,18,19,20,21,22,23] -; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm6[2,3,2,0] +; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: vmovdqa %xmm9, %xmm0 +; CHECK-NEXT: # kill: def $ymm0 killed $xmm0 +; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm2[8,9,10,11,12,13,14,15],ymm11[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm11[16,17,18,19,20,21,22,23] +; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[2,3,2,0] ; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: # implicit-def: $ymm0 ; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0 -; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5],ymm6[6,7] +; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm11[0,1,2,3],ymm0[4,5],ymm11[6,7] ; CHECK-NEXT: vmovaps %xmm2, %xmm9 ; CHECK-NEXT: # implicit-def: $ymm2 ; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm2, %ymm2 -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm6 # 32-byte Reload -; CHECK-NEXT: vpunpcklqdq {{.*#+}} ymm6 = ymm7[0],ymm6[0],ymm7[2],ymm6[2] +; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm7[0,1],ymm6[2,3],ymm7[4,5],ymm6[6,7] ; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm6[2,1,2,3] ; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7] ; CHECK-NEXT: vmovaps %xmm7, %xmm9 ; CHECK-NEXT: vpslldq {{.*#+}} xmm9 = zero,zero,zero,zero,zero,zero,zero,zero,xmm9[0,1,2,3,4,5,6,7] ; CHECK-NEXT: # implicit-def: $ymm6 ; CHECK-NEXT: vmovaps %xmm9, %xmm6 +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Reload ; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm11[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm11[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23] ; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3] ; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1,2,3],ymm11[4,5,6,7] @@ -56,9 +55,9 @@ ; CHECK-NEXT: vmovaps %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovaps %ymm6, %ymm3 ; CHECK-NEXT: vmovaps %ymm15, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovaps %ymm13, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovaps %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK-NEXT: vmovaps %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovaps %ymm4, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovaps %ymm14, (%rsp) # 32-byte Spill ; CHECK-NEXT: movq %rbp, %rsp Index: test/CodeGen/X86/vector-reduce-smax.ll =================================================================== --- test/CodeGen/X86/vector-reduce-smax.ll +++ test/CodeGen/X86/vector-reduce-smax.ll @@ -158,9 +158,6 @@ ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vzeroupper @@ -343,9 +340,6 @@ ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vzeroupper @@ -645,9 +639,6 @@ ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vzeroupper Index: test/CodeGen/X86/vector-reduce-smin.ll =================================================================== --- test/CodeGen/X86/vector-reduce-smin.ll +++ test/CodeGen/X86/vector-reduce-smin.ll @@ -157,9 +157,6 @@ ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vzeroupper @@ -342,9 +339,6 @@ ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vzeroupper @@ -644,9 +638,6 @@ ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vzeroupper Index: test/CodeGen/X86/vector-reduce-umax.ll =================================================================== --- test/CodeGen/X86/vector-reduce-umax.ll +++ test/CodeGen/X86/vector-reduce-umax.ll @@ -164,12 +164,8 @@ ; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 -; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 +; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vzeroupper @@ -364,12 +360,8 @@ ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2 -; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vzeroupper @@ -693,10 +685,6 @@ ; AVX1-NEXT: vxorpd %xmm4, %xmm0, %xmm2 ; AVX1-NEXT: vxorpd %xmm4, %xmm1, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vzeroupper Index: test/CodeGen/X86/vector-reduce-umin.ll =================================================================== --- test/CodeGen/X86/vector-reduce-umin.ll +++ test/CodeGen/X86/vector-reduce-umin.ll @@ -163,12 +163,8 @@ ; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 -; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 +; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vzeroupper @@ -363,12 +359,8 @@ ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2 -; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vzeroupper @@ -692,10 +684,6 @@ ; AVX1-NEXT: vxorpd %xmm4, %xmm0, %xmm2 ; AVX1-NEXT: vxorpd %xmm4, %xmm1, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vzeroupper