Index: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1764,8 +1764,9 @@ } // Ensure that the constant occurs on the RHS and fold constant comparisons. + // TODO: Handle non-splat vector constants. All undef causes trouble. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); - if (isa(N0.getNode()) && + if (isConstOrConstSplat(N0) && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); @@ -2140,9 +2141,15 @@ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } } + } + + // These simplifications apply to splat vectors as well. + // TODO: Handle more splat vector cases. + if (auto *N1C = isConstOrConstSplat(N1)) { + const APInt &C1 = N1C->getAPIntValue(); APInt MinVal, MaxVal; - unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); + unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits(); if (ISD::isSignedIntSetCC(Cond)) { MinVal = APInt::getSignedMinValue(OperandBitSize); MaxVal = APInt::getSignedMaxValue(OperandBitSize); @@ -2157,16 +2164,18 @@ if (C1 == MinVal) return DAG.getBoolConstant(true, dl, VT, OpVT); - // X >= C0 --> X > (C0 - 1) - APInt C = C1 - 1; - ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; - if ((DCI.isBeforeLegalizeOps() || - isCondCodeLegal(NewCC, VT.getSimpleVT())) && - (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && - isLegalICmpImmediate(C.getSExtValue())))) { - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C, dl, N1.getValueType()), - NewCC); + if (!VT.isVector()) { // TODO: Support this for vectors. + // X >= C0 --> X > (C0 - 1) + APInt C = C1 - 1; + ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, dl, N1.getValueType()), + NewCC); + } } } @@ -2176,59 +2185,79 @@ return DAG.getBoolConstant(true, dl, VT, OpVT); // X <= C0 --> X < (C0 + 1) - APInt C = C1 + 1; - ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; - if ((DCI.isBeforeLegalizeOps() || - isCondCodeLegal(NewCC, VT.getSimpleVT())) && - (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && - isLegalICmpImmediate(C.getSExtValue())))) { - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C, dl, N1.getValueType()), - NewCC); + if (!VT.isVector()) { // TODO: Support this for vectors. + APInt C = C1 + 1; + ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, dl, N1.getValueType()), + NewCC); + } } } - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) - return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false - if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) - return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false - - // Canonicalize setgt X, Min --> setne X, Min - if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); - // Canonicalize setlt X, Max --> setne X, Max - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); - - // If we have setult X, 1, turn it into seteq X, 0 - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MinVal, dl, N0.getValueType()), - ISD::SETEQ); - // If we have setugt X, Max-1, turn it into seteq X, Max - if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MaxVal, dl, N0.getValueType()), - ISD::SETEQ); + if (Cond == ISD::SETLT || Cond == ISD::SETULT) { + if (C1 == MinVal) + return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false + + if (!VT.isVector()) { // TODO: Support this for vectors. + // Canonicalize setlt X, Max --> setne X, Max + if (C1 == MaxVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setult X, 1, turn it into seteq X, 0 + if (C1 == MinVal+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MinVal, dl, N0.getValueType()), + ISD::SETEQ); + } + } + + if (Cond == ISD::SETGT || Cond == ISD::SETUGT) { + if (C1 == MaxVal) + return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false + + if (!VT.isVector()) { // TODO: Support this for vectors. + // Canonicalize setgt X, Min --> setne X, Min + if (C1 == MinVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setugt X, Max-1, turn it into seteq X, Max + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MaxVal, dl, N0.getValueType()), + ISD::SETEQ); + } + } // If we have "setcc X, C0", check to see if we can shrink the immediate // by changing cc. + if (!VT.isVector()) { // TODO: Support this for vectors. + // SETUGT X, SINTMAX -> SETLT X, 0 + if (Cond == ISD::SETUGT && + C1 == APInt::getSignedMaxValue(OperandBitSize)) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(0, dl, N1.getValueType()), + ISD::SETLT); - // SETUGT X, SINTMAX -> SETLT X, 0 - if (Cond == ISD::SETUGT && - C1 == APInt::getSignedMaxValue(OperandBitSize)) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(0, dl, N1.getValueType()), - ISD::SETLT); - - // SETULT X, SINTMIN -> SETGT X, -1 - if (Cond == ISD::SETULT && - C1 == APInt::getSignedMinValue(OperandBitSize)) { - SDValue ConstMinusOne = - DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, - N1.getValueType()); - return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + // SETULT X, SINTMIN -> SETGT X, -1 + if (Cond == ISD::SETULT && + C1 == APInt::getSignedMinValue(OperandBitSize)) { + SDValue ConstMinusOne = + DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, + N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + } } + } + + // Back to non-vector simplifications. + // TODO: Can we do these for vector splats? + if (auto *N1C = dyn_cast(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); // Fold bit comparisons when we can. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && Index: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll +++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll @@ -560,13 +560,13 @@ ; KNL-LABEL: test8: ; KNL: ## %bb.0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; KNL-NEXT: jg LBB17_1 ; KNL-NEXT: ## %bb.2: -; KNL-NEXT: vpcmpltud %zmm2, %zmm1, %k1 +; KNL-NEXT: kxorw %k0, %k0, %k1 ; KNL-NEXT: jmp LBB17_3 ; KNL-NEXT: LBB17_1: -; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 +; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 ; KNL-NEXT: LBB17_3: ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 @@ -576,15 +576,15 @@ ; SKX-LABEL: test8: ; SKX: ## %bb.0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: jg LBB17_1 ; SKX-NEXT: ## %bb.2: -; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0 +; SKX-NEXT: kxorw %k0, %k0, %k0 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; SKX-NEXT: LBB17_1: -; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -592,16 +592,16 @@ ; AVX512BW-LABEL: test8: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: cmpl %esi, %edi -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512BW-NEXT: jg LBB17_1 ; AVX512BW-NEXT: ## %bb.2: -; AVX512BW-NEXT: vpcmpltud %zmm2, %zmm1, %k0 +; AVX512BW-NEXT: kxorw %k0, %k0, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; AVX512BW-NEXT: LBB17_1: -; AVX512BW-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper @@ -610,13 +610,13 @@ ; AVX512DQ-LABEL: test8: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: cmpl %esi, %edi -; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512DQ-NEXT: jg LBB17_1 ; AVX512DQ-NEXT: ## %bb.2: -; AVX512DQ-NEXT: vpcmpltud %zmm2, %zmm1, %k0 +; AVX512DQ-NEXT: kxorw %k0, %k0, %k0 ; AVX512DQ-NEXT: jmp LBB17_3 ; AVX512DQ-NEXT: LBB17_1: -; AVX512DQ-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 +; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; AVX512DQ-NEXT: LBB17_3: ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 Index: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll @@ -7054,32 +7054,32 @@ define <16 x i8> @vcmp_test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { ; GENERIC-LABEL: vcmp_test8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] ; GENERIC-NEXT: jg .LBB386_1 # sched: [1:1.00] ; GENERIC-NEXT: # %bb.2: -; GENERIC-NEXT: vpcmpltud %zmm2, %zmm1, %k0 # sched: [3:1.00] +; GENERIC-NEXT: kxorw %k0, %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: .LBB386_1: -; GENERIC-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 # sched: [3:1.00] +; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00] ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vcmp_test8: ; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] ; SKX-NEXT: jg .LBB386_1 # sched: [1:0.50] ; SKX-NEXT: # %bb.2: -; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0 # sched: [3:1.00] +; SKX-NEXT: kxorw %k0, %k0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] ; SKX-NEXT: vzeroupper # sched: [4:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; SKX-NEXT: .LBB386_1: -; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] ; SKX-NEXT: vzeroupper # sched: [4:1.00] ; SKX-NEXT: retq # sched: [7:1.00] Index: llvm/trunk/test/CodeGen/X86/vector-compare-simplify.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-compare-simplify.ll +++ llvm/trunk/test/CodeGen/X86/vector-compare-simplify.ll @@ -6,9 +6,7 @@ define <4 x i32> @slt_min(<4 x i32> %x) { ; CHECK-LABEL: slt_min: ; CHECK: # %bb.0: -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; CHECK-NEXT: pcmpgtd %xmm0, %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: retq %cmp = icmp slt <4 x i32> %x, %r = sext <4 x i1> %cmp to <4 x i32> @@ -18,10 +16,7 @@ define <4 x i32> @sge_min(<4 x i32> %x) { ; CHECK-LABEL: sge_min: ; CHECK: # %bb.0: -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; CHECK-NEXT: pcmpgtd %xmm0, %xmm1 ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 ; CHECK-NEXT: retq %cmp = icmp sge <4 x i32> %x, %r = sext <4 x i1> %cmp to <4 x i32> @@ -53,7 +48,7 @@ define <4 x i32> @sgt_max(<4 x i32> %x) { ; CHECK-LABEL: sgt_max: ; CHECK: # %bb.0: -; CHECK-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: retq %cmp = icmp sgt <4 x i32> %x, %r = sext <4 x i1> %cmp to <4 x i32> @@ -63,9 +58,7 @@ define <4 x i32> @sle_max(<4 x i32> %x) { ; CHECK-LABEL: sle_max: ; CHECK: # %bb.0: -; CHECK-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 ; CHECK-NEXT: retq %cmp = icmp sle <4 x i32> %x, %r = sext <4 x i1> %cmp to <4 x i32> @@ -99,10 +92,7 @@ define <4 x i32> @ult_min(<4 x i32> %x) { ; CHECK-LABEL: ult_min: ; CHECK: # %bb.0: -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; CHECK-NEXT: pxor %xmm1, %xmm0 -; CHECK-NEXT: pcmpgtd %xmm0, %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: retq %cmp = icmp ult <4 x i32> %x, zeroinitializer %r = sext <4 x i1> %cmp to <4 x i32> @@ -112,11 +102,7 @@ define <4 x i32> @uge_min(<4 x i32> %x) { ; CHECK-LABEL: uge_min: ; CHECK: # %bb.0: -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; CHECK-NEXT: pxor %xmm1, %xmm0 -; CHECK-NEXT: pcmpgtd %xmm0, %xmm1 ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 ; CHECK-NEXT: retq %cmp = icmp uge <4 x i32> %x, zeroinitializer %r = sext <4 x i1> %cmp to <4 x i32> @@ -152,8 +138,7 @@ define <4 x i32> @ugt_max(<4 x i32> %x) { ; CHECK-LABEL: ugt_max: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor {{.*}}(%rip), %xmm0 -; CHECK-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: retq %cmp = icmp ugt <4 x i32> %x, %r = sext <4 x i1> %cmp to <4 x i32> @@ -163,10 +148,7 @@ define <4 x i32> @ule_max(<4 x i32> %x) { ; CHECK-LABEL: ule_max: ; CHECK: # %bb.0: -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pxor {{.*}}(%rip), %xmm0 -; CHECK-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 ; CHECK-NEXT: retq %cmp = icmp ule <4 x i32> %x, %r = sext <4 x i1> %cmp to <4 x i32>