Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7253,6 +7253,35 @@ AddToWorklist(Add.getNode()); return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); } + + // If this select has a condition (setcc) with narrower operands than the + // select, try to widen the compare to match the select width. + // TODO: This should be extended to handle any constant. + // TODO: This could be extended to handle non-loading patterns, but that + // requires thorough testing to avoid regressions. + if (isNullConstantOrNullSplatConstant(RHS)) { + EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger(); + EVT SetCCVT = getSetCCResultType(LHS.getValueType()); + unsigned SetCCWidth = SetCCVT.getScalarSizeInBits(); + unsigned WideWidth = WideVT.getScalarSizeInBits(); + bool IsSigned = isSignedIntSetCC(CC); + auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() && + SetCCWidth != 1 && SetCCWidth < WideWidth && + TLI.isOperationLegalOrCustom(LoadExtOpcode, WideVT) && + TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) { + // Both compare operands can be widened for free. The LHS can use an + // extended load, and the RHS is a constant: + // vselect (ext (setcc load(X), C)), N1, N2 --> + // vselect (setcc extload(X), C'), N1, N2 + auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS); + SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS); + EVT WideSetCCVT = getSetCCResultType(WideVT); + SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC); + return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2); + } + } } if (SimplifySelectOps(N, N1, N2)) Index: test/CodeGen/X86/vsel-cmp-load.ll =================================================================== --- test/CodeGen/X86/vsel-cmp-load.ll +++ test/CodeGen/X86/vsel-cmp-load.ll @@ -8,24 +8,20 @@ define <8 x i32> @eq_zero(<8 x i8>* %p, <8 x i32> %x, <8 x i32> %y) { ; AVX1-LABEL: eq_zero: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero -; AVX1-NEXT: vpslld $24, %xmm3, %xmm3 -; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; AVX1-NEXT: vpslld $24, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 +; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpeqd %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: eq_zero: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX2-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; AVX2-NEXT: vpslld $24, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; @@ -44,26 +40,25 @@ define <4 x i64> @ne_zero(<4 x i16>* %p, <4 x i64> %x, <4 x i64> %y) { ; AVX1-LABEL: ne_zero: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpmovsxdq %xmm2, %xmm3 -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] -; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 +; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero +; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpeqq %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqq %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: ne_zero: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX2-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpmovsxdq %xmm2, %ymm2 +; AVX2-NEXT: vpcmpeqq %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 +; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; @@ -82,13 +77,12 @@ define <16 x i16> @sgt_zero(<16 x i8>* %p, <16 x i16> %x, <16 x i16> %y) { ; AVX1-LABEL: sgt_zero: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa (%rdi), %xmm2 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpmovsxbw %xmm2, %xmm3 -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] -; AVX1-NEXT: vpmovsxbw %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 +; AVX1-NEXT: vpmovsxbw (%rdi), %xmm2 +; AVX1-NEXT: vpmovsxbw 8(%rdi), %xmm3 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtw %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtw %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 @@ -96,10 +90,9 @@ ; ; AVX2-LABEL: sgt_zero: ; AVX2: # %bb.0: -; AVX2-NEXT: vmovdqa (%rdi), %xmm2 +; AVX2-NEXT: vpmovsxbw (%rdi), %ymm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX2-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpmovsxbw %xmm2, %ymm2 +; AVX2-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; @@ -119,23 +112,21 @@ ; AVX1-LABEL: slt_zero: ; AVX1: # %bb.0: ; AVX1-NEXT: vpmovsxbw (%rdi), %xmm2 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero -; AVX1-NEXT: vpslld $24, %xmm3, %xmm3 -; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; AVX1-NEXT: vpslld $24, %xmm2, %xmm2 +; AVX1-NEXT: vpmovsxwd %xmm2, %xmm3 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] +; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtd %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: slt_zero: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmovsxbw (%rdi), %xmm2 +; AVX2-NEXT: vpmovsxbd (%rdi), %ymm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX2-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; AVX2-NEXT: vpslld $24, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm2 ; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; @@ -154,22 +145,20 @@ define <4 x double> @eq_zero_fp_select(<4 x i8>* %p, <4 x double> %x, <4 x double> %y) { ; AVX1-LABEL: eq_zero_fp_select: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpmovsxdq %xmm2, %xmm3 -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] -; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 +; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpeqq %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqq %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: eq_zero_fp_select: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX2-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpmovsxdq %xmm2, %ymm2 +; AVX2-NEXT: vpcmpeqq %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; @@ -188,28 +177,25 @@ define <8 x float> @ne_zero_fp_select(<8 x i8>* %p, <8 x float> %x, <8 x float> %y) { ; AVX1-LABEL: ne_zero_fp_select: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero -; AVX1-NEXT: vpslld $24, %xmm3, %xmm3 -; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; AVX1-NEXT: vpslld $24, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 +; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpeqd %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpeqd %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: ne_zero_fp_select: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX2-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; AVX2-NEXT: vpslld $24, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 +; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; @@ -229,21 +215,21 @@ ; AVX1-LABEL: sgt_zero_fp_select: ; AVX1: # %bb.0: ; AVX1-NEXT: vpmovsxbd (%rdi), %xmm2 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpmovsxdq %xmm2, %xmm3 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] ; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: sgt_zero_fp_select: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmovsxbd (%rdi), %xmm2 +; AVX2-NEXT: vpmovsxbq (%rdi), %ymm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX2-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpmovsxdq %xmm2, %ymm2 +; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; @@ -263,15 +249,20 @@ define <8 x float> @slt_zero_fp_select(<8 x i16>* %p, <8 x float> %x, <8 x float> %y) { ; AVX1-LABEL: slt_zero_fp_select: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm2 -; AVX1-NEXT: vpmovsxwd (%rdi), %xmm3 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 +; AVX1-NEXT: vpmovsxwd (%rdi), %xmm2 +; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm3 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpcmpgtd %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 ; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: slt_zero_fp_select: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovsxwd (%rdi), %ymm2 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm2 ; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ;