Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1482,8 +1482,9 @@ } // Ensure that the constant occurs on the RHS and fold constant comparisons. + // TODO: Handle non-splat vector constants. All undef causes trouble. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); - if (isa(N0.getNode()) && + if (isConstOrConstSplat(N0) && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); @@ -1858,9 +1859,15 @@ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } } + } + + // These simplifications apply to splat vectors as well. + // TODO: Handle more splat vector cases. + if (auto *N1C = isConstOrConstSplat(N1)) { + const APInt &C1 = N1C->getAPIntValue(); APInt MinVal, MaxVal; - unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); + unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits(); if (ISD::isSignedIntSetCC(Cond)) { MinVal = APInt::getSignedMinValue(OperandBitSize); MaxVal = APInt::getSignedMaxValue(OperandBitSize); @@ -1875,16 +1882,18 @@ if (C1 == MinVal) return DAG.getBoolConstant(true, dl, VT, OpVT); - // X >= C0 --> X > (C0 - 1) - APInt C = C1 - 1; - ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; - if ((DCI.isBeforeLegalizeOps() || - isCondCodeLegal(NewCC, VT.getSimpleVT())) && - (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && - isLegalICmpImmediate(C.getSExtValue())))) { - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C, dl, N1.getValueType()), - NewCC); + if (!VT.isVector()) { // TODO: Support this for vectors. + // X >= C0 --> X > (C0 - 1) + APInt C = C1 - 1; + ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, dl, N1.getValueType()), + NewCC); + } } } @@ -1894,15 +1903,17 @@ return DAG.getBoolConstant(true, dl, VT, OpVT); // X <= C0 --> X < (C0 + 1) - APInt C = C1 + 1; - ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; - if ((DCI.isBeforeLegalizeOps() || - isCondCodeLegal(NewCC, VT.getSimpleVT())) && - (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && - isLegalICmpImmediate(C.getSExtValue())))) { - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C, dl, N1.getValueType()), - NewCC); + if (!VT.isVector()) { // TODO: Support this for vectors. + APInt C = C1 + 1; + ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, dl, N1.getValueType()), + NewCC); + } } } @@ -1951,6 +1962,12 @@ N1.getValueType()); return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); } + } + + // Back to non-vector simplifications. + // TODO: Can we do these for vector splats? + if (auto *N1C = dyn_cast(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); // Fold bit comparisons when we can. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && Index: test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- test/CodeGen/X86/avx512-insert-extract.ll +++ test/CodeGen/X86/avx512-insert-extract.ll @@ -1706,7 +1706,6 @@ ; KNL-NEXT: subq $64, %rsp ; KNL-NEXT: ## kill: def $esi killed $esi def $rsi ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1 ; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; KNL-NEXT: andl $31, %esi @@ -1738,8 +1737,7 @@ ; SKX-NEXT: andq $-32, %rsp ; SKX-NEXT: subq $64, %rsp ; SKX-NEXT: ## kill: def $esi killed $esi def $rsi -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 +; SKX-NEXT: vptestmb %ymm0, %ymm0, %k0 ; SKX-NEXT: andl $31, %esi ; SKX-NEXT: testb %dil, %dil ; SKX-NEXT: vpmovm2b %k0, %ymm0 @@ -1771,10 +1769,8 @@ ; KNL-NEXT: subq $128, %rsp ; KNL-NEXT: ## kill: def $esi killed $esi def $rsi ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vpminub %ymm2, %ymm0, %ymm3 -; KNL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 +; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 ; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vpminub %ymm2, %ymm1, %ymm2 ; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 ; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 ; KNL-NEXT: andl $63, %esi @@ -1823,8 +1819,7 @@ ; SKX-NEXT: andq $-64, %rsp ; SKX-NEXT: subq $128, %rsp ; SKX-NEXT: ## kill: def $esi killed $esi def $rsi -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 +; SKX-NEXT: vptestmb %zmm0, %zmm0, %k0 ; SKX-NEXT: andl $63, %esi ; SKX-NEXT: testb %dil, %dil ; SKX-NEXT: vpmovm2b %k0, %zmm0 @@ -1954,14 +1949,11 @@ ; KNL-NEXT: vpinsrb $15, 216(%rbp), %xmm3, %xmm3 ; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; KNL-NEXT: vpminub %ymm3, %ymm2, %ymm4 -; KNL-NEXT: vpcmpeqb %ymm4, %ymm2, %ymm2 +; KNL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 ; KNL-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 -; KNL-NEXT: vpminub %ymm3, %ymm1, %ymm4 -; KNL-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm1 +; KNL-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1 ; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 -; KNL-NEXT: vpminub %ymm3, %ymm0, %ymm4 -; KNL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0 +; KNL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 ; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; KNL-NEXT: movl 744(%rbp), %eax ; KNL-NEXT: andl $127, %eax @@ -2137,9 +2129,8 @@ ; SKX-NEXT: vpinsrb $14, 720(%rbp), %xmm2, %xmm2 ; SKX-NEXT: vpinsrb $15, 728(%rbp), %xmm2, %xmm2 ; SKX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; SKX-NEXT: vpcmpnleub %zmm2, %zmm0, %k0 -; SKX-NEXT: vpcmpnleub %zmm2, %zmm1, %k1 +; SKX-NEXT: vptestmb %zmm0, %zmm0, %k0 +; SKX-NEXT: vptestmb %zmm1, %zmm1, %k1 ; SKX-NEXT: movl 744(%rbp), %eax ; SKX-NEXT: andl $127, %eax ; SKX-NEXT: cmpb $0, 736(%rbp) @@ -2177,16 +2168,12 @@ ; KNL-NEXT: subq $256, %rsp ## imm = 0x100 ; KNL-NEXT: ## kill: def $esi killed $esi def $rsi ; KNL-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; KNL-NEXT: vpminub %ymm4, %ymm0, %ymm5 -; KNL-NEXT: vpcmpeqb %ymm5, %ymm0, %ymm0 +; KNL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0 ; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vpminub %ymm4, %ymm1, %ymm5 -; KNL-NEXT: vpcmpeqb %ymm5, %ymm1, %ymm1 +; KNL-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm1 ; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 -; KNL-NEXT: vpminub %ymm4, %ymm2, %ymm5 -; KNL-NEXT: vpcmpeqb %ymm5, %ymm2, %ymm2 +; KNL-NEXT: vpcmpeqb %ymm4, %ymm2, %ymm2 ; KNL-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 -; KNL-NEXT: vpminub %ymm4, %ymm3, %ymm4 ; KNL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm3 ; KNL-NEXT: vpternlogq $15, %zmm3, %zmm3, %zmm3 ; KNL-NEXT: andl $127, %esi @@ -2263,9 +2250,8 @@ ; SKX-NEXT: andq $-128, %rsp ; SKX-NEXT: subq $256, %rsp ## imm = 0x100 ; SKX-NEXT: ## kill: def $esi killed $esi def $rsi -; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; SKX-NEXT: vpcmpnleub %zmm2, %zmm0, %k0 -; SKX-NEXT: vpcmpnleub %zmm2, %zmm1, %k1 +; SKX-NEXT: vptestmb %zmm0, %zmm0, %k0 +; SKX-NEXT: vptestmb %zmm1, %zmm1, %k1 ; SKX-NEXT: andl $127, %esi ; SKX-NEXT: testb %dil, %dil ; SKX-NEXT: vpmovm2b %k1, %zmm0 Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -560,13 +560,13 @@ ; KNL-LABEL: test8: ; KNL: ## %bb.0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; KNL-NEXT: jg LBB17_1 ; KNL-NEXT: ## %bb.2: -; KNL-NEXT: vpcmpltud %zmm2, %zmm1, %k1 +; KNL-NEXT: kxorw %k0, %k0, %k1 ; KNL-NEXT: jmp LBB17_3 ; KNL-NEXT: LBB17_1: -; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 +; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 ; KNL-NEXT: LBB17_3: ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 @@ -576,15 +576,15 @@ ; SKX-LABEL: test8: ; SKX: ## %bb.0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: jg LBB17_1 ; SKX-NEXT: ## %bb.2: -; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0 +; SKX-NEXT: kxorw %k0, %k0, %k0 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; SKX-NEXT: LBB17_1: -; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -592,16 +592,16 @@ ; AVX512BW-LABEL: test8: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: cmpl %esi, %edi -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512BW-NEXT: jg LBB17_1 ; AVX512BW-NEXT: ## %bb.2: -; AVX512BW-NEXT: vpcmpltud %zmm2, %zmm1, %k0 +; AVX512BW-NEXT: kxorw %k0, %k0, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; AVX512BW-NEXT: LBB17_1: -; AVX512BW-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper @@ -610,13 +610,13 @@ ; AVX512DQ-LABEL: test8: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: cmpl %esi, %edi -; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512DQ-NEXT: jg LBB17_1 ; AVX512DQ-NEXT: ## %bb.2: -; AVX512DQ-NEXT: vpcmpltud %zmm2, %zmm1, %k0 +; AVX512DQ-NEXT: kxorw %k0, %k0, %k0 ; AVX512DQ-NEXT: jmp LBB17_3 ; AVX512DQ-NEXT: LBB17_1: -; AVX512DQ-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 +; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; AVX512DQ-NEXT: LBB17_3: ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 Index: test/CodeGen/X86/avx512-schedule.ll =================================================================== --- test/CodeGen/X86/avx512-schedule.ll +++ test/CodeGen/X86/avx512-schedule.ll @@ -7054,32 +7054,32 @@ define <16 x i8> @vcmp_test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { ; GENERIC-LABEL: vcmp_test8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] ; GENERIC-NEXT: jg .LBB386_1 # sched: [1:1.00] ; GENERIC-NEXT: # %bb.2: -; GENERIC-NEXT: vpcmpltud %zmm2, %zmm1, %k0 # sched: [3:1.00] +; GENERIC-NEXT: kxorw %k0, %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: .LBB386_1: -; GENERIC-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 # sched: [3:1.00] +; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00] ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vcmp_test8: ; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] ; SKX-NEXT: jg .LBB386_1 # sched: [1:0.50] ; SKX-NEXT: # %bb.2: -; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0 # sched: [3:1.00] +; SKX-NEXT: kxorw %k0, %k0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] ; SKX-NEXT: vzeroupper # sched: [4:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; SKX-NEXT: .LBB386_1: -; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] ; SKX-NEXT: vzeroupper # sched: [4:1.00] ; SKX-NEXT: retq # sched: [7:1.00] Index: test/CodeGen/X86/vector-compare-simplify.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/vector-compare-simplify.ll @@ -0,0 +1,330 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s + +; Test simplifications of vector compares that should simplify to true, false or equality. + +define <4 x i32> @slt_min(<4 x i32> %x) { +; CHECK-LABEL: slt_min: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp slt <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @sge_min(<4 x i32> %x) { +; CHECK-LABEL: sge_min: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp sge <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @sgt_min(<4 x i32> %x) { +; CHECK-LABEL: sgt_min: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp sgt <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @sle_min(<4 x i32> %x) { +; CHECK-LABEL: sle_min: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp sle <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @sgt_max(<4 x i32> %x) { +; CHECK-LABEL: sgt_max: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp sgt <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @sle_max(<4 x i32> %x) { +; CHECK-LABEL: sle_max: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp sle <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @slt_max(<4 x i32> %x) { +; CHECK-LABEL: slt_max: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp slt <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @sge_max(<4 x i32> %x) { +; CHECK-LABEL: sge_max: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp slt <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @ult_min(<4 x i32> %x) { +; CHECK-LABEL: ult_min: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp ult <4 x i32> %x, zeroinitializer + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @uge_min(<4 x i32> %x) { +; CHECK-LABEL: uge_min: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp uge <4 x i32> %x, zeroinitializer + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @ugt_min(<4 x i32> %x) { +; CHECK-LABEL: ugt_min: +; CHECK: # %bb.0: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp ugt <4 x i32> %x, zeroinitializer + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @ule_min(<4 x i32> %x) { +; CHECK-LABEL: ule_min: +; CHECK: # %bb.0: +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: pcmpgtd %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp ule <4 x i32> %x, zeroinitializer + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @ugt_max(<4 x i32> %x) { +; CHECK-LABEL: ugt_max: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp ugt <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @ule_max(<4 x i32> %x) { +; CHECK-LABEL: ule_max: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp ule <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @ult_max(<4 x i32> %x) { +; CHECK-LABEL: ult_max: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp ult <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @uge_max(<4 x i32> %x) { +; CHECK-LABEL: uge_max: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NEXT: pxor {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647] +; CHECK-NEXT: pcmpgtd %xmm0, %xmm1 +; CHECK-NEXT: pxor %xmm2, %xmm1 +; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp uge <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @slt_min_plus1(<4 x i32> %x) { +; CHECK-LABEL: slt_min_plus1: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq + %cmp = icmp slt <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @sge_min_plus1(<4 x i32> %x) { +; CHECK-LABEL: sge_min_plus1: +; CHECK: # %bb.0: +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483649,2147483649,2147483649,2147483649] +; CHECK-NEXT: pcmpgtd %xmm0, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp sge <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @sgt_max_minus1(<4 x i32> %x) { +; CHECK-LABEL: sgt_max_minus1: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq + %cmp = icmp sgt <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @sle_max_minus1(<4 x i32> %x) { +; CHECK-LABEL: sle_max_minus1: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp sle <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @ult_one(<4 x i32> %x) { +; CHECK-LABEL: ult_one: +; CHECK: # %bb.0: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp ult <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @uge_one(<4 x i32> %x) { +; CHECK-LABEL: uge_one: +; CHECK: # %bb.0: +; CHECK-NEXT: pxor {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483649,2147483649,2147483649,2147483649] +; CHECK-NEXT: pcmpgtd %xmm0, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp uge <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @ugt_max_minus1(<4 x i32> %x) { +; CHECK-LABEL: ugt_max_minus1: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp ugt <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @ule_max_minus1(<4 x i32> %x) { +; CHECK-LABEL: ule_max_minus1: +; CHECK: # %bb.0: +; CHECK-NEXT: pxor {{.*}}(%rip), %xmm0 +; CHECK-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp ule <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @ugt_smax(<4 x i32> %x) { +; CHECK-LABEL: ugt_smax: +; CHECK: # %bb.0: +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: pcmpgtd %xmm0, %xmm1 +; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp ugt <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @ule_smax(<4 x i32> %x) { +; CHECK-LABEL: ule_smax: +; CHECK: # %bb.0: +; CHECK-NEXT: pxor {{.*}}(%rip), %xmm0 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pcmpgtd %xmm1, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp ule <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @ult_smin(<4 x i32> %x) { +; CHECK-LABEL: ult_smin: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pcmpgtd %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp ult <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i32> @uge_smin(<4 x i32> %x) { +; CHECK-LABEL: uge_smin: +; CHECK: # %bb.0: +; CHECK-NEXT: pxor {{.*}}(%rip), %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: pcmpgtd %xmm0, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %cmp = icmp uge <4 x i32> %x, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +}