Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -35996,6 +35996,45 @@ return SDValue(); } +// Attempt to combine a (sext/zext (setcc)) to a setcc with a xmm/ymm/zmm +// result type. +static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + SDLoc dl(N); + + // Only do this combine with AVX512 for vector extends. + if (!Subtarget.hasAVX512() || !VT.isVector() || N0->getOpcode() != ISD::SETCC) + return SDValue(); + + // Only combine legal element types. + EVT SVT = VT.getVectorElementType(); + if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 && + SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64) + return SDValue(); + + // We can only do this if the vector size in 256 bits or less. + unsigned Size = VT.getSizeInBits(); + if (Size > 256) + return SDValue(); + + // Don't fold if the condition code can't be handled by PCMPEQ/PCMPGT since + // that's the only integer compares with we have. + ISD::CondCode CC = cast(N0->getOperand(2))->get(); + if (ISD::isUnsignedIntSetCC(CC) || CC == ISD::SETLE || CC == ISD::SETGE || + CC == ISD::SETNE) + return SDValue(); + + // Only do this combine if the extension will be fully consumed by the setcc. + EVT N00VT = N0.getOperand(0).getValueType(); + EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); + if (Size != MatchingVecType.getSizeInBits()) + return SDValue(); + + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); +} + static SDValue combineSext(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -36013,6 +36052,9 @@ if (!DCI.isBeforeLegalizeOps()) return SDValue(); + if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) + return V; + if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR && isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) { // Invert and sign-extend a boolean is the same as zero-extend and subtract Index: llvm/trunk/test/CodeGen/X86/avx-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll @@ -947,11 +947,9 @@ ; ; SKX-LABEL: test_cmppd: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %k1 # sched: [10:1.00] -; SKX-NEXT: vpmovm2q %k0, %ymm0 # sched: [1:0.25] -; SKX-NEXT: vpmovm2q %k1, %ymm1 # sched: [1:0.25] -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.33] +; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cmppd: @@ -1015,11 +1013,9 @@ ; ; SKX-LABEL: test_cmpps: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %k1 # sched: [10:1.00] -; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25] -; SKX-NEXT: vpmovm2d %k1, %ymm1 # sched: [1:0.25] -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.33] +; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cmpps: Index: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll @@ -2159,10 +2159,8 @@ ; ; SKX-LABEL: test_pcmpeqb: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2b %k0, %ymm0 # sched: [1:0.25] -; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0 # sched: [10:1.00] -; SKX-NEXT: vpmovm2b %k0, %ymm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pcmpeqb: @@ -2205,10 +2203,8 @@ ; ; SKX-LABEL: test_pcmpeqd: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25] -; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 # sched: [10:1.00] -; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pcmpeqd: @@ -2251,10 +2247,8 @@ ; ; SKX-LABEL: test_pcmpeqq: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2q %k0, %ymm0 # sched: [1:0.25] -; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 # sched: [10:1.00] -; SKX-NEXT: vpmovm2q %k0, %ymm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pcmpeqq: @@ -2297,10 +2291,8 @@ ; ; SKX-LABEL: test_pcmpeqw: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2w %k0, %ymm0 # sched: [1:0.25] -; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 # sched: [10:1.00] -; SKX-NEXT: vpmovm2w %k0, %ymm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pcmpeqw: @@ -2343,10 +2335,8 @@ ; ; SKX-LABEL: test_pcmpgtb: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2b %k0, %ymm0 # sched: [1:0.25] -; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0 # sched: [10:1.00] -; SKX-NEXT: vpmovm2b %k0, %ymm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pcmpgtb: @@ -2389,10 +2379,8 @@ ; ; SKX-LABEL: test_pcmpgtd: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25] -; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 # sched: [10:1.00] -; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pcmpgtd: @@ -2435,10 +2423,8 @@ ; ; SKX-LABEL: test_pcmpgtq: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2q %k0, %ymm0 # sched: [1:0.25] -; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 # sched: [10:1.00] -; SKX-NEXT: vpmovm2q %k0, %ymm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pcmpgtq: @@ -2481,10 +2467,8 @@ ; ; SKX-LABEL: test_pcmpgtw: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2w %k0, %ymm0 # sched: [1:0.25] -; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 # sched: [10:1.00] -; SKX-NEXT: vpmovm2w %k0, %ymm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pcmpgtw: Index: llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll +++ llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll @@ -848,22 +848,13 @@ } define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 { -; KNL-LABEL: test44: -; KNL: ## %bb.0: -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] -; KNL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] -; KNL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; KNL-NEXT: retq -; -; SKX-LABEL: test44: -; SKX: ## %bb.0: -; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; SKX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] -; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] -; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 -; SKX-NEXT: vpmovm2d %k0, %xmm0 -; SKX-NEXT: retq +; CHECK-LABEL: test44: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] +; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq %mask = icmp eq <4 x i16> %x, %y %1 = sext <4 x i1> %mask to <4 x i32> ret <4 x i32> %1 Index: llvm/trunk/test/CodeGen/X86/avx512-vec3-crash.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-vec3-crash.ll +++ llvm/trunk/test/CodeGen/X86/avx512-vec3-crash.ll @@ -9,14 +9,13 @@ ; CHECK-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 ; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ; CHECK-NEXT: vpslld $24, %xmm0, %xmm0 -; CHECK-NEXT: vpsrad $24, %xmm0, %xmm0 ; CHECK-NEXT: vmovd %ecx, %xmm1 ; CHECK-NEXT: vpinsrd $1, %r8d, %xmm1, %xmm1 +; CHECK-NEXT: vpsrad $24, %xmm0, %xmm0 ; CHECK-NEXT: vpinsrd $2, %r9d, %xmm1, %xmm1 ; CHECK-NEXT: vpslld $24, %xmm1, %xmm1 ; CHECK-NEXT: vpsrad $24, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k0 -; CHECK-NEXT: vpmovm2d %k0, %xmm0 +; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: vpextrb $0, %xmm0, %eax ; CHECK-NEXT: vpextrb $4, %xmm0, %edx ; CHECK-NEXT: vpextrb $8, %xmm0, %ecx Index: llvm/trunk/test/CodeGen/X86/commute-fcmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/commute-fcmp.ll +++ llvm/trunk/test/CodeGen/X86/commute-fcmp.ll @@ -21,9 +21,7 @@ ; ; AVX512-LABEL: commute_cmpps_eq: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpeqps (%rdi), %xmm0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %a0 %2 = fcmp oeq <4 x float> %1, %a1 @@ -44,9 +42,7 @@ ; ; AVX512-LABEL: commute_cmpps_ne: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpneqps (%rdi), %xmm0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %a0 %2 = fcmp une <4 x float> %1, %a1 @@ -67,9 +63,7 @@ ; ; AVX512-LABEL: commute_cmpps_ord: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpordps (%rdi), %xmm0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vcmpordps (%rdi), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %a0 %2 = fcmp ord <4 x float> %1, %a1 @@ -90,9 +84,7 @@ ; ; AVX512-LABEL: commute_cmpps_uno: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpunordps (%rdi), %xmm0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %a0 %2 = fcmp uno <4 x float> %1, %a1 @@ -117,9 +109,7 @@ ; ; AVX512-LABEL: commute_cmpps_ueq: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpeq_uqps (%rdi), %xmm0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vcmpeq_uqps (%rdi), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %a0 %2 = fcmp ueq <4 x float> %1, %a1 @@ -144,9 +134,7 @@ ; ; AVX512-LABEL: commute_cmpps_one: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpneq_oqps (%rdi), %xmm0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vcmpneq_oqps (%rdi), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %a0 %2 = fcmp one <4 x float> %1, %a1 @@ -171,9 +159,7 @@ ; AVX512-LABEL: commute_cmpps_lt: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps (%rdi), %xmm1 -; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %a0 %2 = fcmp olt <4 x float> %1, %a1 @@ -198,9 +184,7 @@ ; AVX512-LABEL: commute_cmpps_le: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps (%rdi), %xmm1 -; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %a0 %2 = fcmp ole <4 x float> %1, %a1 @@ -222,9 +206,7 @@ ; ; AVX512-LABEL: commute_cmpps_eq_ymm: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpeqps (%rdi), %ymm0, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %a0 %2 = fcmp oeq <8 x float> %1, %a1 @@ -246,9 +228,7 @@ ; ; AVX512-LABEL: commute_cmpps_ne_ymm: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpneqps (%rdi), %ymm0, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %a0 %2 = fcmp une <8 x float> %1, %a1 @@ -270,9 +250,7 @@ ; ; AVX512-LABEL: commute_cmpps_ord_ymm: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpordps (%rdi), %ymm0, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vcmpordps (%rdi), %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %a0 %2 = fcmp ord <8 x float> %1, %a1 @@ -294,9 +272,7 @@ ; ; AVX512-LABEL: commute_cmpps_uno_ymm: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpunordps (%rdi), %ymm0, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %a0 %2 = fcmp uno <8 x float> %1, %a1 @@ -326,9 +302,7 @@ ; ; AVX512-LABEL: commute_cmpps_ueq_ymm: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpeq_uqps (%rdi), %ymm0, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vcmpeq_uqps (%rdi), %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %a0 %2 = fcmp ueq <8 x float> %1, %a1 @@ -358,9 +332,7 @@ ; ; AVX512-LABEL: commute_cmpps_one_ymm: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpneq_oqps (%rdi), %ymm0, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vcmpneq_oqps (%rdi), %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %a0 %2 = fcmp one <8 x float> %1, %a1 @@ -388,9 +360,7 @@ ; AVX512-LABEL: commute_cmpps_lt_ymm: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps (%rdi), %ymm1 -; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 ; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %a0 %2 = fcmp olt <8 x float> %1, %a1 @@ -418,9 +388,7 @@ ; AVX512-LABEL: commute_cmpps_le_ymm: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps (%rdi), %ymm1 -; AVX512-NEXT: vcmpleps %ymm0, %ymm1, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vcmpleps %ymm0, %ymm1, %ymm0 ; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %a0 %2 = fcmp ole <8 x float> %1, %a1 @@ -446,9 +414,7 @@ ; ; AVX512-LABEL: commute_cmppd_eq: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpeqpd (%rdi), %xmm0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %a0 %2 = fcmp oeq <2 x double> %1, %a1 @@ -469,9 +435,7 @@ ; ; AVX512-LABEL: commute_cmppd_ne: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpneqpd (%rdi), %xmm0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %a0 %2 = fcmp une <2 x double> %1, %a1 @@ -492,9 +456,7 @@ ; ; AVX512-LABEL: commute_cmppd_ord: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpordpd (%rdi), %xmm0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %a0 %2 = fcmp ord <2 x double> %1, %a1 @@ -519,9 +481,7 @@ ; ; AVX512-LABEL: commute_cmppd_ueq: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpeq_uqpd (%rdi), %xmm0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vcmpeq_uqpd (%rdi), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %a0 %2 = fcmp ueq <2 x double> %1, %a1 @@ -546,9 +506,7 @@ ; ; AVX512-LABEL: commute_cmppd_one: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpneq_oqpd (%rdi), %xmm0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vcmpneq_oqpd (%rdi), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %a0 %2 = fcmp one <2 x double> %1, %a1 @@ -569,9 +527,7 @@ ; ; AVX512-LABEL: commute_cmppd_uno: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpunordpd (%rdi), %xmm0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %a0 %2 = fcmp uno <2 x double> %1, %a1 @@ -596,9 +552,7 @@ ; AVX512-LABEL: commute_cmppd_lt: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovapd (%rdi), %xmm1 -; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %a0 %2 = fcmp olt <2 x double> %1, %a1 @@ -623,9 +577,7 @@ ; AVX512-LABEL: commute_cmppd_le: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovapd (%rdi), %xmm1 -; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %a0 %2 = fcmp ole <2 x double> %1, %a1 @@ -647,9 +599,7 @@ ; ; AVX512-LABEL: commute_cmppd_eq_ymmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpeqpd (%rdi), %ymm0, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %a0 %2 = fcmp oeq <4 x double> %1, %a1 @@ -671,9 +621,7 @@ ; ; AVX512-LABEL: commute_cmppd_ne_ymmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpneqpd (%rdi), %ymm0, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %a0 %2 = fcmp une <4 x double> %1, %a1 @@ -695,9 +643,7 @@ ; ; AVX512-LABEL: commute_cmppd_ord_ymmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpordpd (%rdi), %ymm0, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %a0 %2 = fcmp ord <4 x double> %1, %a1 @@ -719,9 +665,7 @@ ; ; AVX512-LABEL: commute_cmppd_uno_ymmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpunordpd (%rdi), %ymm0, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %a0 %2 = fcmp uno <4 x double> %1, %a1 @@ -751,9 +695,7 @@ ; ; AVX512-LABEL: commute_cmppd_ueq_ymmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpeq_uqpd (%rdi), %ymm0, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vcmpeq_uqpd (%rdi), %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %a0 %2 = fcmp ueq <4 x double> %1, %a1 @@ -783,9 +725,7 @@ ; ; AVX512-LABEL: commute_cmppd_one_ymmm: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpneq_oqpd (%rdi), %ymm0, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vcmpneq_oqpd (%rdi), %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %a0 %2 = fcmp one <4 x double> %1, %a1 @@ -813,9 +753,7 @@ ; AVX512-LABEL: commute_cmppd_lt_ymmm: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovapd (%rdi), %ymm1 -; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 ; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %a0 %2 = fcmp olt <4 x double> %1, %a1 @@ -843,9 +781,7 @@ ; AVX512-LABEL: commute_cmppd_le_ymmm: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovapd (%rdi), %ymm1 -; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 ; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %a0 %2 = fcmp ole <4 x double> %1, %a1 Index: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll @@ -860,10 +860,8 @@ ; ; SKX-LABEL: test_pcmpeqq: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 # sched: [9:1.00] -; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pcmpeqq: Index: llvm/trunk/test/CodeGen/X86/sse42-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse42-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse42-schedule.ll @@ -771,10 +771,8 @@ ; ; SKX-LABEL: test_pcmpgtq: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 # sched: [9:1.00] -; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pcmpgtq: Index: llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll +++ llvm/trunk/test/CodeGen/X86/vector-compare-all_of.ll @@ -23,10 +23,8 @@ ; ; AVX512-LABEL: test_v2f64_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: retq @@ -62,10 +60,8 @@ ; ; AVX512-LABEL: test_v4f64_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 @@ -157,10 +153,8 @@ ; ; AVX512-LABEL: test_v4f32_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 @@ -202,10 +196,8 @@ ; ; AVX512-LABEL: test_v8f32_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 @@ -298,9 +290,7 @@ ; ; AVX512-LABEL: test_v2i64_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax @@ -352,9 +342,7 @@ ; ; AVX512-LABEL: test_v4i64_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] @@ -463,9 +451,7 @@ ; ; AVX512-LABEL: test_v4i32_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] @@ -523,9 +509,7 @@ ; ; AVX512-LABEL: test_v8i32_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] @@ -640,8 +624,7 @@ ; ; AVX512-LABEL: test_v8i16_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2w %k0, %xmm0 +; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] @@ -709,8 +692,7 @@ ; ; AVX512-LABEL: test_v16i16_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 -; AVX512-NEXT: vpmovm2w %k0, %ymm0 +; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] @@ -838,8 +820,7 @@ ; ; AVX512-LABEL: test_v16i8_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2b %k0, %xmm0 +; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] @@ -913,8 +894,7 @@ ; ; AVX512-LABEL: test_v32i8_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 -; AVX512-NEXT: vpmovm2b %k0, %ymm0 +; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] Index: llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll +++ llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll @@ -23,10 +23,8 @@ ; ; AVX512-LABEL: test_v2f64_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: retq @@ -60,10 +58,8 @@ ; ; AVX512-LABEL: test_v4f64_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 @@ -147,10 +143,8 @@ ; ; AVX512-LABEL: test_v4f32_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -188,10 +182,8 @@ ; ; AVX512-LABEL: test_v8f32_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 @@ -280,9 +272,7 @@ ; ; AVX512-LABEL: test_v2i64_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax @@ -330,9 +320,7 @@ ; ; AVX512-LABEL: test_v4i64_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] @@ -431,9 +419,7 @@ ; ; AVX512-LABEL: test_v4i32_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] @@ -485,9 +471,7 @@ ; ; AVX512-LABEL: test_v8i32_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] @@ -592,8 +576,7 @@ ; ; AVX512-LABEL: test_v8i16_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2w %k0, %xmm0 +; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] @@ -658,8 +641,7 @@ ; ; AVX512-LABEL: test_v16i16_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 -; AVX512-NEXT: vpmovm2w %k0, %ymm0 +; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] @@ -777,8 +759,7 @@ ; ; AVX512-LABEL: test_v16i8_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2b %k0, %xmm0 +; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] @@ -849,8 +830,7 @@ ; ; AVX512-LABEL: test_v32i8_sext: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 -; AVX512-NEXT: vpmovm2b %k0, %ymm0 +; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] Index: llvm/trunk/test/CodeGen/X86/vselect-packss.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vselect-packss.ll +++ llvm/trunk/test/CodeGen/X86/vselect-packss.ll @@ -377,33 +377,14 @@ ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512NOBW-LABEL: vselect_packss: -; AVX512NOBW: # %bb.0: -; AVX512NOBW-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX512NOBW-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512NOBW-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX512NOBW-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512NOBW-NEXT: vzeroupper -; AVX512NOBW-NEXT: retq -; -; AVX512BWNOVL-LABEL: vselect_packss: -; AVX512BWNOVL: # %bb.0: -; AVX512BWNOVL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; AVX512BWNOVL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512BWNOVL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX512BWNOVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512BWNOVL-NEXT: vzeroupper -; AVX512BWNOVL-NEXT: retq -; -; AVX512BWVL-LABEL: vselect_packss: -; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 -; AVX512BWVL-NEXT: vpmovm2w %k0, %ymm0 -; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512BWVL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 -; AVX512BWVL-NEXT: vzeroupper -; AVX512BWVL-NEXT: retq +; AVX512-LABEL: vselect_packss: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %1 = icmp eq <16 x i16> %a0, %a1 %2 = sext <16 x i1> %1 to <16 x i16> %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <8 x i32>