Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -35121,6 +35121,23 @@ return false; } +// Helper to push sign extension of vXi1 SETCC result through bitops. +static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, + SDValue Src, const SDLoc &DL) { + switch (Src.getOpcode()) { + case ISD::SETCC: + return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); + case ISD::AND: + case ISD::XOR: + case ISD::OR: + return DAG.getNode( + Src.getOpcode(), DL, SExtVT, + signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL), + signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL)); + } + llvm_unreachable("Unexpected node type for vXi1 sign extension"); +} + // Try to match patterns such as // (i16 bitcast (v16i1 x)) // -> @@ -35159,6 +35176,7 @@ // For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as: // (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef) MVT SExtVT; + bool PropagateSExt = false; switch (SrcVT.getSimpleVT().SimpleTy) { default: return SDValue(); @@ -35169,8 +35187,10 @@ SExtVT = MVT::v4i32; // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2)) // sign-extend to a 256-bit operation to avoid truncation. - if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256)) + if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256)) { SExtVT = MVT::v4i64; + PropagateSExt = true; + } break; case MVT::v8i1: SExtVT = MVT::v8i16; @@ -35179,11 +35199,10 @@ // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over // 256-bit because the shuffle is cheaper than sign extending the result of // the compare. - // TODO : use checkBitcastSrcVectorSize - if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() && - (Src.getOperand(0).getValueType().is256BitVector() || - Src.getOperand(0).getValueType().is512BitVector())) { + if (Subtarget.hasAVX() && (checkBitcastSrcVectorSize(Src, 256) || + checkBitcastSrcVectorSize(Src, 512))) { SExtVT = MVT::v8i32; + PropagateSExt = true; } break; case MVT::v16i1: @@ -35206,7 +35225,8 @@ return SDValue(); }; - SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); + SDValue V = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL) + : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8 || SExtVT == MVT::v64i8) { V = getPMOVMSKB(DL, V, DAG, Subtarget); Index: llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll +++ llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll @@ -55,17 +55,17 @@ ; ; AVX1-LABEL: v4i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 ; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 -; AVX1-NEXT: vandpd %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; AVX1-NEXT: vandpd %ymm2, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskpd %ymm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -73,9 +73,9 @@ ; ; AVX2-LABEL: v4i64: ; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vmovmskpd %ymm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -121,9 +121,9 @@ ; ; AVX12-LABEL: v4f64: ; AVX12: # %bb.0: +; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm2 ; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1 -; AVX12-NEXT: vandpd %ymm1, %ymm0, %ymm0 +; AVX12-NEXT: vandpd %ymm2, %ymm0, %ymm0 ; AVX12-NEXT: vmovmskpd %ymm0, %eax ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: vzeroupper @@ -241,32 +241,28 @@ ; ; AVX1-LABEL: v8i32_and: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 +; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: v8i32_and: ; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm1 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -314,32 +310,28 @@ ; ; AVX1-LABEL: v8i32_or: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1 -; AVX1-NEXT: vpor %xmm1, %xmm4, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 +; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: v8i32_or: ; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm1 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -395,42 +387,36 @@ ; ; AVX1-LABEL: v8i32_or_and: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm6 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm1 -; AVX1-NEXT: vpor %xmm1, %xmm6, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm7 +; AVX1-NEXT: vpcmpgtd %xmm6, %xmm7, %xmm6 ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm2, %ymm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 +; AVX1-NEXT: vpcmpgtd %xmm3, %xmm6, %xmm3 +; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm2 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm2 -; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: v8i32_or_and: ; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm2 ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm1 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpeqd %ymm5, %ymm4, %ymm1 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -482,13 +468,10 @@ ; ; AVX12-LABEL: v8f32_and: ; AVX12: # %bb.0: +; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm2 ; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm1 -; AVX12-NEXT: vandps %ymm1, %ymm0, %ymm0 -; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX12-NEXT: vpmovmskb %xmm0, %eax +; AVX12-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX12-NEXT: vmovmskps %ymm0, %eax ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: vzeroupper ; AVX12-NEXT: retq @@ -536,13 +519,10 @@ ; ; AVX12-LABEL: v8f32_xor: ; AVX12: # %bb.0: +; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm2 ; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm1 -; AVX12-NEXT: vxorps %ymm1, %ymm0, %ymm0 -; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX12-NEXT: vpmovmskb %xmm0, %eax +; AVX12-NEXT: vxorps %ymm2, %ymm0, %ymm0 +; AVX12-NEXT: vmovmskps %ymm0, %eax ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: vzeroupper ; AVX12-NEXT: retq @@ -604,17 +584,12 @@ ; ; AVX12-LABEL: v8f32_xor_and: ; AVX12: # %bb.0: +; AVX12-NEXT: vcmpeq_uqps %ymm3, %ymm2, %ymm2 ; AVX12-NEXT: vcmpnleps %ymm1, %ymm0, %ymm0 -; AVX12-NEXT: vcmpeq_uqps %ymm3, %ymm2, %ymm1 -; AVX12-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; AVX12-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; AVX12-NEXT: vcmpltps %ymm4, %ymm5, %ymm1 -; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX12-NEXT: vpmovmskb %xmm0, %eax +; AVX12-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX12-NEXT: vmovmskps %ymm0, %eax ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: vzeroupper ; AVX12-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-512.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-512.ll +++ llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-512.ll @@ -50,56 +50,45 @@ ; ; AVX1-LABEL: v8i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 +; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm8 +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm9 ; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8 +; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5 +; AVX1-NEXT: vpackssdw %xmm8, %xmm5, %xmm8 +; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm7 +; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm5 +; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5 +; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm4 +; AVX1-NEXT: vpackssdw %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm4, %ymm4 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6 +; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpackssdw %xmm8, %xmm1, %xmm8 +; AVX1-NEXT: vpackssdw %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpackssdw %xmm8, %xmm0, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm2 -; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm3 -; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 -; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 +; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: v8i64: ; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm5 +; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm4 +; AVX2-NEXT: vpackssdw %ymm5, %ymm4, %ymm4 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 -; AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 ; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm1 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0 -; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %eax +; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vmovmskps %ymm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -171,29 +160,42 @@ ; SSE-NEXT: # kill: def $al killed $al killed $eax ; SSE-NEXT: retq ; -; AVX12-LABEL: v8f64: -; AVX12: # %bb.0: -; AVX12-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1 -; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX12-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 -; AVX12-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 -; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vcmpltpd %ymm5, %ymm7, %ymm1 -; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 -; AVX12-NEXT: vcmpltpd %ymm4, %ymm6, %ymm2 -; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm3 -; AVX12-NEXT: vpackssdw %xmm3, %xmm2, %xmm2 -; AVX12-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 -; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vpsllw $15, %xmm0, %xmm0 -; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 -; AVX12-NEXT: vpmovmskb %xmm0, %eax -; AVX12-NEXT: # kill: def $al killed $al killed $eax -; AVX12-NEXT: vzeroupper -; AVX12-NEXT: retq +; AVX1-LABEL: v8f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vcmpltpd %ymm5, %ymm7, %ymm5 +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm7 +; AVX1-NEXT: vpackssdw %xmm7, %xmm5, %xmm5 +; AVX1-NEXT: vcmpltpd %ymm4, %ymm6, %ymm4 +; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm6 +; AVX1-NEXT: vpackssdw %xmm6, %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4 +; AVX1-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 +; AVX1-NEXT: vmovmskps %ymm0, %eax +; AVX1-NEXT: # kill: def $al killed $al killed $eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: v8f64: +; AVX2: # %bb.0: +; AVX2-NEXT: vcmpltpd %ymm5, %ymm7, %ymm5 +; AVX2-NEXT: vcmpltpd %ymm4, %ymm6, %ymm4 +; AVX2-NEXT: vpackssdw %ymm5, %ymm4, %ymm4 +; AVX2-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1 +; AVX2-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vmovmskps %ymm0, %eax +; AVX2-NEXT: # kill: def $al killed $al killed $eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq ; ; AVX512F-LABEL: v8f64: ; AVX512F: # %bb.0: