diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39898,6 +39898,65 @@ return DAG.getNode(X86ISD::ANDNP, SDLoc(N), VT, X, Y); } +// Try to widen AND, OR and XOR nodes to VT in order to remove casts around +// logical operations, like in the example below. +// or (and (truncate x, truncate y)), +// (xor (truncate z, build_vector (constants))) +// Given a target type \p VT, we generate +// or (and x, y), (xor z, zext(build_vector (constants))) +// given x, y and z are of type \p VT. We can do so, if operands are either +// truncates from VT types, the second operand is a vector of constants or can +// be recursively promoted. +static SDValue PromoteMaskArithmetic(SDNode *N, EVT VT, SelectionDAG &DAG, + unsigned Depth) { + // Limit recursion to avoid excessive compile times. + if (Depth >= SelectionDAG::MaxRecursionDepth) + return SDValue(); + + if (N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND && + N->getOpcode() != ISD::OR) + return SDValue(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDLoc DL(N); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isOperationLegalOrPromote(N->getOpcode(), VT)) + return SDValue(); + + if (SDValue NN0 = PromoteMaskArithmetic(N0.getNode(), VT, DAG, Depth + 1)) + N0 = NN0; + else { + // The Left side has to be a trunc. + if (N0.getOpcode() != ISD::TRUNCATE) + return SDValue(); + + // The type of the truncated inputs. + if (N0.getOperand(0).getValueType() != VT) + return SDValue(); + + N0 = N0.getOperand(0); + } + + if (SDValue NN1 = PromoteMaskArithmetic(N1.getNode(), VT, DAG, Depth + 1)) + N1 = NN1; + else { + // The right side has to be a 'trunc' or a constant vector. + bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getValueType() == VT; + if (!RHSTrunc && !ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) + return SDValue(); + + if (RHSTrunc) + N1 = N1.getOperand(0); + else + N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1); + } + + return DAG.getNode(N->getOpcode(), DL, VT, N0, N1); +} + // On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized // register. In most cases we actually compare or select YMM-sized registers // and mixing the two types creates horrible code. This method optimizes @@ -39909,6 +39968,7 @@ EVT VT = N->getValueType(0); assert(VT.isVector() && "Expected vector type"); + SDLoc DL(N); assert((N->getOpcode() == ISD::ANY_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node"); @@ -39916,46 +39976,11 @@ SDValue Narrow = N->getOperand(0); EVT NarrowVT = Narrow.getValueType(); - if (Narrow->getOpcode() != ISD::XOR && - Narrow->getOpcode() != ISD::AND && - Narrow->getOpcode() != ISD::OR) - return SDValue(); - - SDValue N0 = Narrow->getOperand(0); - SDValue N1 = Narrow->getOperand(1); - SDLoc DL(Narrow); - - // The Left side has to be a trunc. - if (N0.getOpcode() != ISD::TRUNCATE) - return SDValue(); - - // The type of the truncated inputs. - if (N0.getOperand(0).getValueType() != VT) - return SDValue(); - - // The right side has to be a 'trunc' or a constant vector. - bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE && - N1.getOperand(0).getValueType() == VT; - if (!RHSTrunc && - !ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) - return SDValue(); - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - - if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), VT)) - return SDValue(); - - // Set N0 and N1 to hold the inputs to the new wide operation. - N0 = N0.getOperand(0); - if (RHSTrunc) - N1 = N1.getOperand(0); - else - N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1); - // Generate the wide operation. - SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, VT, N0, N1); - unsigned Opcode = N->getOpcode(); - switch (Opcode) { + SDValue Op = PromoteMaskArithmetic(Narrow.getNode(), VT, DAG, 0); + if (!Op) + return SDValue(); + switch (N->getOpcode()) { default: llvm_unreachable("Unexpected opcode"); case ISD::ANY_EXTEND: return Op; diff --git a/llvm/test/CodeGen/X86/v8i1-masks.ll b/llvm/test/CodeGen/X86/v8i1-masks.ll --- a/llvm/test/CodeGen/X86/v8i1-masks.ll +++ b/llvm/test/CodeGen/X86/v8i1-masks.ll @@ -212,18 +212,10 @@ ; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X32-NEXT: vcmpltps LCPI4_1, %ymm0, %ymm2 -; X32-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X32-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X32-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 -; X32-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X32-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X32-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X32-NEXT: vpmovsxwd %xmm0, %xmm1 -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X32-NEXT: vpmovsxwd %xmm0, %xmm0 -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X32-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X32-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: three_ands: @@ -231,18 +223,10 @@ ; X64-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X64-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-NEXT: vcmpltps {{.*}}(%rip), %ymm0, %ymm2 -; X64-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X64-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X64-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 -; X64-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X64-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X64-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X64-NEXT: vpmovsxwd %xmm0, %xmm1 -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X64-NEXT: vpmovsxwd %xmm0, %xmm0 -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X64-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X64-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; ; X32-AVX2-LABEL: three_ands: @@ -251,15 +235,10 @@ ; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 -; X32-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X32-AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X32-AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X32-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X32-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X32-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X32-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: three_ands: @@ -268,15 +247,10 @@ ; X64-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X64-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X64-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X64-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X64-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 -; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X64-AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X64-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X64-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X64-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, @@ -294,22 +268,12 @@ ; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X32-NEXT: vcmpltps LCPI5_1, %ymm0, %ymm2 -; X32-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X32-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X32-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X32-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X32-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X32-NEXT: vcmpneqps LCPI5_2, %ymm0, %ymm0 -; X32-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X32-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X32-NEXT: vpand %xmm0, %xmm2, %xmm0 -; X32-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X32-NEXT: vpmovsxwd %xmm0, %xmm1 -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X32-NEXT: vpmovsxwd %xmm0, %xmm0 -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X32-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X32-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: four_ands: @@ -317,22 +281,12 @@ ; X64-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X64-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-NEXT: vcmpltps {{.*}}(%rip), %ymm0, %ymm2 -; X64-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X64-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X64-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X64-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X64-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X64-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm0 -; X64-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X64-NEXT: vpand %xmm0, %xmm2, %xmm0 -; X64-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X64-NEXT: vpmovsxwd %xmm0, %xmm1 -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X64-NEXT: vpmovsxwd %xmm0, %xmm0 -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X64-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X64-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; ; X32-AVX2-LABEL: four_ands: @@ -341,20 +295,13 @@ ; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X32-AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X32-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] ; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X32-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X32-AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X32-AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0 -; X32-AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X32-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X32-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X32-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X32-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: four_ands: @@ -363,20 +310,13 @@ ; X64-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X64-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X64-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X64-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X64-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X64-AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X64-AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; X64-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X64-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] ; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0 -; X64-AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X64-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X64-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, @@ -396,26 +336,14 @@ ; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X32-NEXT: vcmpltps LCPI6_1, %ymm0, %ymm2 -; X32-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X32-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X32-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X32-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X32-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X32-NEXT: vcmpneqps LCPI6_2, %ymm0, %ymm3 -; X32-NEXT: vextractf128 $1, %ymm3, %xmm4 -; X32-NEXT: vpackssdw %xmm4, %xmm3, %xmm3 -; X32-NEXT: vpand %xmm3, %xmm2, %xmm2 +; X32-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X32-NEXT: vcmpneqps LCPI6_3, %ymm0, %ymm0 -; X32-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X32-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X32-NEXT: vpand %xmm0, %xmm2, %xmm0 -; X32-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X32-NEXT: vpmovsxwd %xmm0, %xmm1 -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X32-NEXT: vpmovsxwd %xmm0, %xmm0 -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X32-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X32-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: five_ands: @@ -423,26 +351,14 @@ ; X64-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X64-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-NEXT: vcmpltps {{.*}}(%rip), %ymm0, %ymm2 -; X64-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X64-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X64-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X64-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X64-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X64-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm3 -; X64-NEXT: vextractf128 $1, %ymm3, %xmm4 -; X64-NEXT: vpackssdw %xmm4, %xmm3, %xmm3 -; X64-NEXT: vpand %xmm3, %xmm2, %xmm2 +; X64-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm0 -; X64-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X64-NEXT: vpand %xmm0, %xmm2, %xmm0 -; X64-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X64-NEXT: vpmovsxwd %xmm0, %xmm1 -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X64-NEXT: vpmovsxwd %xmm0, %xmm0 -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X64-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X64-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; ; X32-AVX2-LABEL: five_ands: @@ -451,25 +367,16 @@ ; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X32-AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X32-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] ; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-AVX2-NEXT: vextractf128 $1, %ymm3, %xmm4 -; X32-AVX2-NEXT: vpackssdw %xmm4, %xmm3, %xmm3 -; X32-AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 +; X32-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] ; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X32-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X32-AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X32-AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0 -; X32-AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X32-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X32-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X32-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X32-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: five_ands: @@ -478,25 +385,16 @@ ; X64-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X64-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X64-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X64-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X64-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X64-AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X64-AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; X64-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X64-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] ; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X64-AVX2-NEXT: vextractf128 $1, %ymm3, %xmm4 -; X64-AVX2-NEXT: vpackssdw %xmm4, %xmm3, %xmm3 -; X64-AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 +; X64-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] ; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0 -; X64-AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X64-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 +; X64-AVX2-NEXT: vandps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, @@ -560,18 +458,10 @@ ; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X32-NEXT: vcmpltps LCPI8_1, %ymm0, %ymm2 -; X32-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X32-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X32-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 -; X32-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X32-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X32-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X32-NEXT: vpmovsxwd %xmm0, %xmm1 -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X32-NEXT: vpmovsxwd %xmm0, %xmm0 -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X32-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X32-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: three_or: @@ -579,18 +469,10 @@ ; X64-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X64-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-NEXT: vcmpltps {{.*}}(%rip), %ymm0, %ymm2 -; X64-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X64-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X64-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 -; X64-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X64-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X64-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-NEXT: vpmovsxwd %xmm0, %xmm1 -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X64-NEXT: vpmovsxwd %xmm0, %xmm0 -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X64-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; ; X32-AVX2-LABEL: three_or: @@ -599,15 +481,10 @@ ; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 -; X32-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X32-AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X32-AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X32-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X32-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X32-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: three_or: @@ -616,15 +493,10 @@ ; X64-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X64-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X64-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X64-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X64-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 -; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X64-AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X64-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 +; X64-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, @@ -642,22 +514,12 @@ ; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X32-NEXT: vcmpltps LCPI9_1, %ymm0, %ymm2 -; X32-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X32-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X32-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X32-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X32-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X32-NEXT: vcmpneqps LCPI9_2, %ymm0, %ymm0 -; X32-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X32-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X32-NEXT: vpor %xmm0, %xmm2, %xmm0 -; X32-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X32-NEXT: vpmovsxwd %xmm0, %xmm1 -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X32-NEXT: vpmovsxwd %xmm0, %xmm0 -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X32-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X32-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: four_or: @@ -665,22 +527,12 @@ ; X64-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X64-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-NEXT: vcmpltps {{.*}}(%rip), %ymm0, %ymm2 -; X64-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X64-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X64-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X64-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X64-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X64-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm0 -; X64-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X64-NEXT: vpor %xmm0, %xmm2, %xmm0 -; X64-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-NEXT: vpmovsxwd %xmm0, %xmm1 -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X64-NEXT: vpmovsxwd %xmm0, %xmm0 -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X64-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; ; X32-AVX2-LABEL: four_or: @@ -689,20 +541,13 @@ ; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X32-AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X32-AVX2-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] ; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X32-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X32-AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X32-AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 -; X32-AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X32-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X32-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X32-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: four_or: @@ -711,20 +556,13 @@ ; X64-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X64-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X64-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X64-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X64-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X64-AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X64-AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; X64-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X64-AVX2-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] ; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 -; X64-AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X64-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, @@ -744,26 +582,14 @@ ; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X32-NEXT: vcmpltps LCPI10_1, %ymm0, %ymm2 -; X32-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X32-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X32-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X32-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X32-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X32-NEXT: vcmpneqps LCPI10_2, %ymm0, %ymm3 -; X32-NEXT: vextractf128 $1, %ymm3, %xmm4 -; X32-NEXT: vpackssdw %xmm4, %xmm3, %xmm3 -; X32-NEXT: vpor %xmm3, %xmm2, %xmm2 +; X32-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X32-NEXT: vcmpneqps LCPI10_3, %ymm0, %ymm0 -; X32-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X32-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X32-NEXT: vpor %xmm0, %xmm2, %xmm0 -; X32-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X32-NEXT: vpmovsxwd %xmm0, %xmm1 -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X32-NEXT: vpmovsxwd %xmm0, %xmm0 -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X32-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X32-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: five_or: @@ -771,26 +597,14 @@ ; X64-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X64-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-NEXT: vcmpltps {{.*}}(%rip), %ymm0, %ymm2 -; X64-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X64-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X64-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X64-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X64-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X64-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm3 -; X64-NEXT: vextractf128 $1, %ymm3, %xmm4 -; X64-NEXT: vpackssdw %xmm4, %xmm3, %xmm3 -; X64-NEXT: vpor %xmm3, %xmm2, %xmm2 +; X64-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm0 -; X64-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X64-NEXT: vpor %xmm0, %xmm2, %xmm0 -; X64-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-NEXT: vpmovsxwd %xmm0, %xmm1 -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X64-NEXT: vpmovsxwd %xmm0, %xmm0 -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X64-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; ; X32-AVX2-LABEL: five_or: @@ -799,25 +613,16 @@ ; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X32-AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X32-AVX2-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] ; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-AVX2-NEXT: vextractf128 $1, %ymm3, %xmm4 -; X32-AVX2-NEXT: vpackssdw %xmm4, %xmm3, %xmm3 -; X32-AVX2-NEXT: vpor %xmm3, %xmm2, %xmm2 +; X32-AVX2-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] ; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X32-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X32-AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X32-AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 -; X32-AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X32-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X32-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X32-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: five_or: @@ -826,25 +631,16 @@ ; X64-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X64-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 -; X64-AVX2-NEXT: vorps %ymm2, %ymm1, %ymm1 -; X64-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X64-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X64-AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X64-AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 +; X64-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X64-AVX2-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] ; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X64-AVX2-NEXT: vextractf128 $1, %ymm3, %xmm4 -; X64-AVX2-NEXT: vpackssdw %xmm4, %xmm3, %xmm3 -; X64-AVX2-NEXT: vpor %xmm3, %xmm2, %xmm2 +; X64-AVX2-NEXT: vorps %ymm3, %ymm2, %ymm2 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] ; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 -; X64-AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X64-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 +; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, @@ -923,19 +719,11 @@ ; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X32-NEXT: vcmpltps LCPI12_1, %ymm0, %ymm2 ; X32-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X32-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X32-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 ; X32-NEXT: vcmpneqps LCPI12_2, %ymm0, %ymm0 ; X32-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X32-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X32-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X32-NEXT: vpmovsxwd %xmm0, %xmm1 -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X32-NEXT: vpmovsxwd %xmm0, %xmm0 -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X32-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: four_or_and: @@ -944,19 +732,11 @@ ; X64-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-NEXT: vcmpltps {{.*}}(%rip), %ymm0, %ymm2 ; X64-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X64-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X64-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 ; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm0 ; X64-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X64-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X64-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X64-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-NEXT: vpmovsxwd %xmm0, %xmm1 -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X64-NEXT: vpmovsxwd %xmm0, %xmm0 -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; ; X32-AVX2-LABEL: four_or_and: @@ -966,17 +746,12 @@ ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 ; X32-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] ; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 ; X32-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X32-AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X32-AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X32-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X32-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: four_or_and: @@ -986,17 +761,12 @@ ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X64-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 ; X64-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 -; X64-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; X64-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] ; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 ; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X64-AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, @@ -1019,19 +789,11 @@ ; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 ; X32-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X32-NEXT: vorps %ymm1, %ymm2, %ymm1 -; X32-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X32-NEXT: vcmpneqps LCPI13_2, %ymm0, %ymm2 +; X32-NEXT: vcmpneqps LCPI13_2, %ymm0, %ymm3 ; X32-NEXT: vcmpneqps LCPI13_3, %ymm0, %ymm0 -; X32-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X32-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X32-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X32-NEXT: vpmovsxwd %xmm0, %xmm1 -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X32-NEXT: vpmovsxwd %xmm0, %xmm0 -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X32-NEXT: vandps %ymm0, %ymm3, %ymm0 +; X32-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X32-NEXT: vorps %ymm0, %ymm2, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: five_or_and: @@ -1042,19 +804,11 @@ ; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 ; X64-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X64-NEXT: vorps %ymm1, %ymm2, %ymm1 -; X64-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm2 +; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm3 ; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm0 -; X64-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X64-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X64-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X64-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-NEXT: vpmovsxwd %xmm0, %xmm1 -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X64-NEXT: vpmovsxwd %xmm0, %xmm0 -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X64-NEXT: vandps %ymm0, %ymm3, %ymm0 +; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X64-NEXT: vorps %ymm0, %ymm2, %ymm0 ; X64-NEXT: retq ; ; X32-AVX2-LABEL: five_or_and: @@ -1066,18 +820,13 @@ ; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 ; X32-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X32-AVX2-NEXT: vorps %ymm1, %ymm2, %ymm1 -; X32-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X32-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X32-AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X32-AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X32-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm4 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] +; X32-AVX2-NEXT: vcmpneqps %ymm4, %ymm0, %ymm0 +; X32-AVX2-NEXT: vandps %ymm0, %ymm3, %ymm0 +; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X32-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 ; X32-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: five_or_and: @@ -1089,18 +838,13 @@ ; X64-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 ; X64-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X64-AVX2-NEXT: vorps %ymm1, %ymm2, %ymm1 -; X64-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 -; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] -; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 -; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X64-AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm4 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] +; X64-AVX2-NEXT: vcmpneqps %ymm4, %ymm0, %ymm0 +; X64-AVX2-NEXT: vandps %ymm0, %ymm3, %ymm0 +; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 +; X64-AVX2-NEXT: vorps %ymm0, %ymm2, %ymm0 ; X64-AVX2-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, @@ -1123,19 +867,11 @@ ; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X32-NEXT: vcmpltps LCPI14_1, %ymm0, %ymm2 ; X32-NEXT: vxorps %ymm2, %ymm1, %ymm1 -; X32-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X32-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 ; X32-NEXT: vcmpneqps LCPI14_2, %ymm0, %ymm0 ; X32-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X32-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X32-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X32-NEXT: vpmovsxwd %xmm0, %xmm1 -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X32-NEXT: vpmovsxwd %xmm0, %xmm0 -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X32-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: four_or_and_xor: @@ -1144,19 +880,11 @@ ; X64-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 ; X64-NEXT: vcmpltps {{.*}}(%rip), %ymm0, %ymm2 ; X64-NEXT: vxorps %ymm2, %ymm1, %ymm1 -; X64-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X64-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 ; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm0 ; X64-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X64-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X64-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X64-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-NEXT: vpmovsxwd %xmm0, %xmm1 -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X64-NEXT: vpmovsxwd %xmm0, %xmm0 -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; ; X32-AVX2-LABEL: four_or_and_xor: @@ -1166,17 +894,12 @@ ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 ; X32-AVX2-NEXT: vxorps %ymm2, %ymm1, %ymm1 -; X32-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; X32-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] ; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 ; X32-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X32-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X32-AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X32-AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X32-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X32-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: four_or_and_xor: @@ -1186,17 +909,12 @@ ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X64-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 ; X64-AVX2-NEXT: vxorps %ymm2, %ymm1, %ymm1 -; X64-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; X64-AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] ; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm0 ; X64-AVX2-NEXT: vandps %ymm0, %ymm2, %ymm0 -; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X64-AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, @@ -1215,100 +933,66 @@ ; X32: ## %bb.0: ## %entry ; X32-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X32-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; X32-NEXT: vcmpltps LCPI15_1, %ymm0, %ymm2 ; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-NEXT: vxorps %ymm3, %ymm2, %ymm2 -; X32-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X32-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 -; X32-NEXT: vcmpneqps LCPI15_2, %ymm0, %ymm3 +; X32-NEXT: vcmpneqps LCPI15_2, %ymm0, %ymm4 ; X32-NEXT: vcmpneqps LCPI15_3, %ymm0, %ymm0 -; X32-NEXT: vandps %ymm0, %ymm3, %ymm0 -; X32-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X32-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X32-NEXT: vpxor %xmm0, %xmm2, %xmm0 -; X32-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X32-NEXT: vpmovsxwd %xmm0, %xmm1 -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X32-NEXT: vpmovsxwd %xmm0, %xmm0 -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X32-NEXT: vandps %ymm0, %ymm4, %ymm0 +; X32-NEXT: vxorps %ymm0, %ymm3, %ymm0 +; X32-NEXT: vxorps %ymm0, %ymm2, %ymm0 +; X32-NEXT: vorps %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: five_or_and_xor: ; X64: ## %bb.0: ## %entry ; X64-NEXT: vmovaps {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X64-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X64-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; X64-NEXT: vcmpltps {{.*}}(%rip), %ymm0, %ymm2 ; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X64-NEXT: vxorps %ymm3, %ymm2, %ymm2 -; X64-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X64-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 -; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm3 +; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm4 ; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm0 -; X64-NEXT: vandps %ymm0, %ymm3, %ymm0 -; X64-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X64-NEXT: vpxor %xmm0, %xmm2, %xmm0 -; X64-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpmovsxwd %xmm0, %xmm1 -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X64-NEXT: vpmovsxwd %xmm0, %xmm0 -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X64-NEXT: vandps %ymm0, %ymm4, %ymm0 +; X64-NEXT: vxorps %ymm0, %ymm3, %ymm0 +; X64-NEXT: vxorps %ymm0, %ymm2, %ymm0 +; X64-NEXT: vorps %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq ; ; X32-AVX2-LABEL: five_or_and_xor: ; X32-AVX2: ## %bb.0: ## %entry ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X32-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X32-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X32-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 ; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-AVX2-NEXT: vxorps %ymm3, %ymm2, %ymm2 -; X32-AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X32-AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm4 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm4, %ymm0, %ymm0 -; X32-AVX2-NEXT: vandps %ymm0, %ymm3, %ymm0 -; X32-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X32-AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X32-AVX2-NEXT: vpxor %xmm0, %xmm2, %xmm0 -; X32-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X32-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X32-AVX2-NEXT: vcmpneqps %ymm4, %ymm0, %ymm4 +; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm5 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] +; X32-AVX2-NEXT: vcmpneqps %ymm5, %ymm0, %ymm0 +; X32-AVX2-NEXT: vandps %ymm0, %ymm4, %ymm0 +; X32-AVX2-NEXT: vxorps %ymm0, %ymm3, %ymm0 +; X32-AVX2-NEXT: vxorps %ymm0, %ymm2, %ymm0 +; X32-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0 ; X32-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: five_or_and_xor: ; X64-AVX2: ## %bb.0: ## %entry ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] ; X64-AVX2-NEXT: vcmpleps %ymm0, %ymm1, %ymm1 -; X64-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 -; X64-AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X64-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 ; X64-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X64-AVX2-NEXT: vxorps %ymm3, %ymm2, %ymm2 -; X64-AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X64-AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 -; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm4 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] -; X64-AVX2-NEXT: vcmpneqps %ymm4, %ymm0, %ymm0 -; X64-AVX2-NEXT: vandps %ymm0, %ymm3, %ymm0 -; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm3 -; X64-AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpxor %xmm0, %xmm2, %xmm0 -; X64-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X64-AVX2-NEXT: vcmpneqps %ymm4, %ymm0, %ymm4 +; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm5 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] +; X64-AVX2-NEXT: vcmpneqps %ymm5, %ymm0, %ymm0 +; X64-AVX2-NEXT: vandps %ymm0, %ymm4, %ymm0 +; X64-AVX2-NEXT: vxorps %ymm0, %ymm3, %ymm0 +; X64-AVX2-NEXT: vxorps %ymm0, %ymm2, %ymm0 +; X64-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0 ; X64-AVX2-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x, @@ -1331,26 +1015,14 @@ ; X32-NEXT: vcmpltps LCPI16_1, %ymm0, %ymm2 ; X32-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; X32-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X32-NEXT: vcmpneqps LCPI16_2, %ymm0, %ymm4 +; X32-NEXT: vandps %ymm4, %ymm3, %ymm3 ; X32-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X32-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X32-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 -; X32-NEXT: vcmpneqps LCPI16_2, %ymm0, %ymm3 -; X32-NEXT: vextractf128 $1, %ymm3, %xmm4 -; X32-NEXT: vpackssdw %xmm4, %xmm3, %xmm3 -; X32-NEXT: vpand %xmm3, %xmm2, %xmm2 -; X32-NEXT: vcmpneqps LCPI16_3, %ymm0, %ymm3 -; X32-NEXT: vxorps %ymm1, %ymm3, %ymm1 -; X32-NEXT: vextractf128 $1, %ymm1, %xmm3 -; X32-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 -; X32-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; X32-NEXT: vxorps %ymm2, %ymm1, %ymm1 +; X32-NEXT: vcmpneqps LCPI16_3, %ymm0, %ymm2 +; X32-NEXT: vxorps %ymm1, %ymm2, %ymm1 ; X32-NEXT: vcmpneqps LCPI16_4, %ymm0, %ymm0 -; X32-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X32-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X32-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X32-NEXT: vpmovsxwd %xmm0, %xmm1 -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X32-NEXT: vpmovsxwd %xmm0, %xmm0 -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X32-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: six_or_and_xor: @@ -1360,26 +1032,14 @@ ; X64-NEXT: vcmpltps {{.*}}(%rip), %ymm0, %ymm2 ; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm4 +; X64-NEXT: vandps %ymm4, %ymm3, %ymm3 ; X64-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X64-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X64-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 -; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm3 -; X64-NEXT: vextractf128 $1, %ymm3, %xmm4 -; X64-NEXT: vpackssdw %xmm4, %xmm3, %xmm3 -; X64-NEXT: vpand %xmm3, %xmm2, %xmm2 -; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm3 -; X64-NEXT: vxorps %ymm1, %ymm3, %ymm1 -; X64-NEXT: vextractf128 $1, %ymm1, %xmm3 -; X64-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 -; X64-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; X64-NEXT: vxorps %ymm2, %ymm1, %ymm1 +; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm2 +; X64-NEXT: vxorps %ymm1, %ymm2, %ymm1 ; X64-NEXT: vcmpneqps {{.*}}(%rip), %ymm0, %ymm0 -; X64-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X64-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X64-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-NEXT: vpmovsxwd %xmm0, %xmm1 -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X64-NEXT: vpmovsxwd %xmm0, %xmm0 -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X64-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; ; X32-AVX2-LABEL: six_or_and_xor: @@ -1390,26 +1050,17 @@ ; X32-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 ; X32-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X32-AVX2-NEXT: vcmpneqps %ymm4, %ymm0, %ymm4 +; X32-AVX2-NEXT: vandps %ymm4, %ymm3, %ymm3 ; X32-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X32-AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X32-AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-AVX2-NEXT: vextractf128 $1, %ymm3, %xmm4 -; X32-AVX2-NEXT: vpackssdw %xmm4, %xmm3, %xmm3 -; X32-AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 -; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] -; X32-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X32-AVX2-NEXT: vxorps %ymm1, %ymm3, %ymm1 -; X32-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm3 -; X32-AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 -; X32-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; X32-AVX2-NEXT: vxorps %ymm2, %ymm1, %ymm1 +; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] +; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X32-AVX2-NEXT: vxorps %ymm1, %ymm2, %ymm1 ; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1] ; X32-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 -; X32-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X32-AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X32-AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X32-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X32-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X32-AVX2-NEXT: retl ; ; X64-AVX2-LABEL: six_or_and_xor: @@ -1420,26 +1071,17 @@ ; X64-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 ; X64-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 +; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X64-AVX2-NEXT: vcmpneqps %ymm4, %ymm0, %ymm4 +; X64-AVX2-NEXT: vandps %ymm4, %ymm3, %ymm3 ; X64-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X64-AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 -; X64-AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 -; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X64-AVX2-NEXT: vextractf128 $1, %ymm3, %xmm4 -; X64-AVX2-NEXT: vpackssdw %xmm4, %xmm3, %xmm3 -; X64-AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 -; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] -; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X64-AVX2-NEXT: vxorps %ymm1, %ymm3, %ymm1 -; X64-AVX2-NEXT: vextractf128 $1, %ymm1, %xmm3 -; X64-AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 -; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; X64-AVX2-NEXT: vxorps %ymm2, %ymm1, %ymm1 +; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] +; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm2 +; X64-AVX2-NEXT: vxorps %ymm1, %ymm2, %ymm1 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1,4.00000006E-1] ; X64-AVX2-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0 -; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2 -; X64-AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 -; X64-AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; X64-AVX2-NEXT: vorps %ymm0, %ymm1, %ymm0 ; X64-AVX2-NEXT: retq entry: %cmp = fcmp oge <8 x float> %x,