diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53454,27 +53454,39 @@ return DAG.getSetCC(DL, VT, LHS.getOperand(0), DAG.getConstant(0, DL, SrcVT), CC); } + } - // With C as a power of 2 and C != 0 and C != INT_MIN: - // icmp eq Abs(X) C -> - // (icmp eq A, C) | (icmp eq A, -C) - // icmp ne Abs(X) C -> - // (icmp ne A, C) & (icmp ne A, -C) - // Both of these patterns can be better optimized in - // DAGCombiner::foldAndOrOfSETCC. Note this only applies for scalar - // integers which is checked above. - if (LHS.getOpcode() == ISD::ABS && LHS.hasOneUse()) { - if (auto *C = dyn_cast(RHS)) { - const APInt &CInt = C->getAPIntValue(); + if (OpVT.isInteger() && LHS.getOpcode() == ISD::ABS && LHS.hasOneUse()) { + if (auto *C = isConstOrConstSplat(RHS)) { + const APInt &CInt = C->getAPIntValue(); + bool ConvertToLogicOpOfSETCC = false; + if (OpVT.isVector() && OpVT.getVectorElementType() == MVT::i64 && + !Subtarget.hasAVX512()) { + // If ABS(vNxi64) requires avx512 even for xmm/ymm wereas SETCC/ALU + // are available with (sse2/sse4.1)/avx2. If ABS it not available, + // check if SETCC/ALU are, and if so, fold. + if (OpVT.getSizeInBits() == 128) + ConvertToLogicOpOfSETCC = Subtarget.hasSSE41(); + else if (OpVT.getSizeInBits() == 256) + ConvertToLogicOpOfSETCC = Subtarget.hasAVX2(); + } else if (OpVT.isScalarInteger()) { + // With C as a power of 2 and C != 0 and C != INT_MIN: + // icmp eq Abs(X) C -> + // (icmp eq A, C) | (icmp eq A, -C) + // icmp ne Abs(X) C -> + // (icmp ne A, C) & (icmp ne A, -C) // We can better optimize this case in DAGCombiner::foldAndOrOfSETCC. - if (CInt.isPowerOf2() && !CInt.isMinSignedValue()) { - SDValue BaseOp = LHS.getOperand(0); - SDValue SETCC0 = DAG.getSetCC(DL, VT, BaseOp, RHS, CC); - SDValue SETCC1 = DAG.getSetCC( - DL, VT, BaseOp, DAG.getConstant(-CInt, DL, OpVT), CC); - return DAG.getNode(CC == ISD::SETEQ ? ISD::OR : ISD::AND, DL, VT, - SETCC0, SETCC1); - } + ConvertToLogicOpOfSETCC = + CInt.isPowerOf2() && !CInt.isMinSignedValue(); + } + + if (ConvertToLogicOpOfSETCC) { + SDValue BaseOp = LHS.getOperand(0); + SDValue SETCC0 = DAG.getSetCC(DL, VT, BaseOp, RHS, CC); + SDValue SETCC1 = DAG.getSetCC(DL, VT, BaseOp, + DAG.getConstant(-CInt, DL, OpVT), CC); + return DAG.getNode(CC == ISD::SETEQ ? ISD::OR : ISD::AND, DL, VT, + SETCC0, SETCC1); } } } diff --git a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll --- a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll +++ b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll @@ -100,11 +100,11 @@ ; ; AVX2-LABEL: illegal_abs_to_eq_or: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1 -; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487] +; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vzeroupper @@ -112,19 +112,16 @@ ; ; SSE41-LABEL: illegal_abs_to_eq_or: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pxor %xmm3, %xmm3 -; SSE41-NEXT: pxor %xmm4, %xmm4 -; SSE41-NEXT: psubq %xmm0, %xmm4 -; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 -; SSE41-NEXT: psubq %xmm1, %xmm3 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129] -; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 -; SSE41-NEXT: packssdw %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129] +; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487] +; SSE41-NEXT: movdqa %xmm1, %xmm4 +; SSE41-NEXT: pcmpeqq %xmm3, %xmm4 +; SSE41-NEXT: pcmpeqq %xmm2, %xmm1 +; SSE41-NEXT: por %xmm4, %xmm1 +; SSE41-NEXT: pcmpeqq %xmm0, %xmm3 +; SSE41-NEXT: pcmpeqq %xmm2, %xmm0 +; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: packssdw %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE2-LABEL: illegal_abs_to_eq_or: @@ -163,28 +160,24 @@ ; ; AVX2-LABEL: illegal_abs_to_eq_or_sext: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1 -; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487] +; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; SSE41-LABEL: illegal_abs_to_eq_or_sext: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pxor %xmm3, %xmm3 -; SSE41-NEXT: pxor %xmm4, %xmm4 -; SSE41-NEXT: psubq %xmm1, %xmm4 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1 -; SSE41-NEXT: psubq %xmm2, %xmm3 -; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129] -; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 -; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129] +; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487] +; SSE41-NEXT: movdqa %xmm0, %xmm4 +; SSE41-NEXT: pcmpeqq %xmm3, %xmm4 +; SSE41-NEXT: pcmpeqq %xmm2, %xmm0 +; SSE41-NEXT: por %xmm4, %xmm0 +; SSE41-NEXT: pcmpeqq %xmm1, %xmm3 +; SSE41-NEXT: pcmpeqq %xmm2, %xmm1 +; SSE41-NEXT: por %xmm3, %xmm1 ; SSE41-NEXT: retq ; ; SSE2-LABEL: illegal_abs_to_eq_or_sext: @@ -225,13 +218,13 @@ ; ; AVX2-LABEL: illegal_abs_to_ne_and: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1 -; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] -; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487] +; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vzeroupper @@ -239,21 +232,19 @@ ; ; SSE41-LABEL: illegal_abs_to_ne_and: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pxor %xmm3, %xmm3 -; SSE41-NEXT: pxor %xmm4, %xmm4 -; SSE41-NEXT: psubq %xmm0, %xmm4 -; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 -; SSE41-NEXT: psubq %xmm1, %xmm3 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129] -; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm3, %xmm3 -; SSE41-NEXT: pxor %xmm3, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129] +; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: pcmpeqq %xmm2, %xmm3 +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487] +; SSE41-NEXT: pcmpeqq %xmm4, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm5, %xmm5 +; SSE41-NEXT: pxor %xmm5, %xmm1 +; SSE41-NEXT: pandn %xmm1, %xmm3 ; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 -; SSE41-NEXT: pxor %xmm3, %xmm2 -; SSE41-NEXT: packssdw %xmm1, %xmm2 +; SSE41-NEXT: pcmpeqq %xmm4, %xmm0 +; SSE41-NEXT: pxor %xmm5, %xmm0 +; SSE41-NEXT: pandn %xmm0, %xmm2 +; SSE41-NEXT: packssdw %xmm3, %xmm2 ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq ; @@ -297,33 +288,31 @@ ; ; AVX2-LABEL: illegal_abs_to_ne_and_sext: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1 -; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] -; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487] +; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; ; SSE41-LABEL: illegal_abs_to_ne_and_sext: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: pxor %xmm3, %xmm3 -; SSE41-NEXT: pxor %xmm4, %xmm4 -; SSE41-NEXT: psubq %xmm1, %xmm4 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1 -; SSE41-NEXT: psubq %xmm2, %xmm3 -; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129] -; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm3, %xmm3 -; SSE41-NEXT: pxor %xmm3, %xmm2 -; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 -; SSE41-NEXT: pxor %xmm3, %xmm1 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129] +; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: pcmpeqq %xmm2, %xmm3 +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487] +; SSE41-NEXT: pcmpeqq %xmm4, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm5, %xmm5 +; SSE41-NEXT: pxor %xmm5, %xmm0 +; SSE41-NEXT: pandn %xmm0, %xmm3 +; SSE41-NEXT: pcmpeqq %xmm1, %xmm2 +; SSE41-NEXT: pcmpeqq %xmm4, %xmm1 +; SSE41-NEXT: pxor %xmm5, %xmm1 +; SSE41-NEXT: pandn %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: movdqa %xmm2, %xmm1 ; SSE41-NEXT: retq ; ; SSE2-LABEL: illegal_abs_to_ne_and_sext: