diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1064,6 +1064,70 @@ Known.setAllZero(); } +static void computeKnownBitsFromAndXorOr(bool IsAnd, const Operator *I, + const APInt &DemandedElts, + KnownBits &Known, unsigned Depth, + const Query &Q) { + unsigned BitWidth = Known.getBitWidth(); + KnownBits Known2(BitWidth); + computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); + computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); + + Value *X = nullptr, *Y = nullptr; + // and(x, -x)/xor(x, x-1) are common idioms that will clearing all but + // lowest set bit. If we have a single known bit in x, we can clear all bits + // above it. + // TODO: instcombine often reassociates independent `and` which can hide + // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x). + if ((!Known.One.isZero() || !Known2.One.isZero()) && + (IsAnd ? match(I, m_c_And(m_Value(X), m_Neg(m_Deferred(X)))) + : match(I, m_c_Xor(m_Value(X), + m_c_Add(m_Deferred(X), m_AllOnes()))))) { + unsigned MinBit = std::min(Known.One.countTrailingZeros(), + Known2.One.countTrailingZeros()); + APInt Mask = APInt::getBitsSetFrom(BitWidth, MinBit + 1); + // Must do this AFTER computing MinBit. + Known = IsAnd ? (Known & Known2) : (Known ^ Known2); + Known.Zero |= Mask; + Known.One &= ~Mask; + } else { + // We don't want to put this before the Known.One.isZero check. + switch (I->getOpcode()) { + case Instruction::And: + Known &= Known2; + break; + case Instruction::Or: + Known |= Known2; + break; + case Instruction::Xor: + Known ^= Known2; + break; + default: + llvm_unreachable("Invalid Op used in 'computeKnownBitsFromAndXorOr'"); + } + } + + // and(x, add (x, -1)) is a common idiom that always clears the low bit; + // xor/or(x, add (x, -1)) is an idiom that will always set the low bit. + // here we handle the more general case of adding any odd number by + // matching the form and/xor/or(x, add(x, y)) where y is odd. + // TODO: This could be generalized to clearing any bit set in y where the + // following bit is known to be unset in y. + if (!Known.Zero[0] && !Known.One[0] && + (match(I, m_c_BinOp(m_Value(X), m_c_Add(m_Deferred(X), m_Value(Y)))) || + match(I, m_c_BinOp(m_Value(X), m_Sub(m_Deferred(X), m_Value(Y)))) || + match(I, m_c_BinOp(m_Value(X), m_Sub(m_Value(Y), m_Deferred(X)))))) { + Known2.resetAll(); + computeKnownBits(Y, DemandedElts, Known2, Depth + 1, Q); + if (Known2.countMinTrailingOnes() > 0) { + if (IsAnd) + Known.Zero.setBit(0); + else + Known.One.setBit(0); + } + } +} + static void computeKnownBitsFromOperator(const Operator *I, const APInt &DemandedElts, KnownBits &Known, unsigned Depth, @@ -1078,70 +1142,18 @@ Q.IIQ.getMetadata(cast(I), LLVMContext::MD_range)) computeKnownBitsFromRangeMetadata(*MD, Known); break; - case Instruction::And: { - // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); - computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); - - Value *X = nullptr, *Y = nullptr; - // and(x, -x) is a common idiom for clearing all but lowest set bit. If we - // have a single known bit in x, we can clear all bits above it. - // TODO: instcombine often reassociates independent `and` which can hide - // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x). - if (!Known.One.isZero() || !Known2.One.isZero()) { - if (match(I, m_c_BinOp(m_Value(X), m_Neg(m_Deferred(X))))) { - unsigned MinBit = std::min(Known.One.countTrailingZeros(), - Known2.One.countTrailingZeros()); - APInt Mask = APInt::getBitsSetFrom(BitWidth, MinBit + 1); - Known.Zero |= Mask; - Known.One &= ~Mask; - } - } - Known &= Known2; - - // and(x, add (x, -1)) is a common idiom that always clears the low bit; - // here we handle the more general case of adding any odd number by - // matching the form add(x, add(x, y)) where y is odd. - // TODO: This could be generalized to clearing any bit set in y where the - // following bit is known to be unset in y. - if (!Known.Zero[0] && !Known.One[0] && - match(I, m_c_BinOp(m_Value(X), m_Add(m_Deferred(X), m_Value(Y))))) { - Known2.resetAll(); - computeKnownBits(Y, DemandedElts, Known2, Depth + 1, Q); - if (Known2.countMinTrailingOnes() > 0) - Known.Zero.setBit(0); - } + case Instruction::And: + computeKnownBitsFromAndXorOr(/*IsAnd*/ true, I, DemandedElts, Known, Depth, + Q); break; - } case Instruction::Or: - computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); - computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); - - Known |= Known2; + computeKnownBitsFromAndXorOr(/*IsAnd*/ false, I, DemandedElts, Known, Depth, + Q); + break; + case Instruction::Xor: + computeKnownBitsFromAndXorOr(/*IsAnd*/ false, I, DemandedElts, Known, Depth, + Q); break; - case Instruction::Xor: { - computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); - computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); - - Value *X = nullptr; - // xor(x, x + -1) is a common idiom that will clear all bits above - // the lowest set bit. We can safely say any bit past the lowest - // known one must be zero. - // TODO: `x + -1` is often shrunk `x + C` which `C` is minimum bits needed - // for demanded. This can cause us to miss this pattern. Expand to account - // for `x + -1` in the context of demanded bits. - if ((!Known.One.isZero() || !Known2.One.isZero()) && - match(I, m_c_BinOp(m_Value(X), m_c_Add(m_Deferred(X), m_AllOnes())))) { - unsigned MinBit = std::min(Known.One.countTrailingZeros(), - Known2.One.countTrailingZeros()); - APInt Mask = APInt::getBitsSetFrom(BitWidth, MinBit + 1); - Known ^= Known2; - Known.Zero |= Mask; - Known.One &= ~Mask; - } else { - Known ^= Known2; - } - } break; case Instruction::Mul: { bool NSW = Q.IIQ.hasNoSignedWrap(cast(I)); computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, DemandedElts, diff --git a/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll b/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll --- a/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll +++ b/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll @@ -36,10 +36,7 @@ define i32 @cmp_eq_0_add_xor_eval(i32 %x, i32 %C) { ; CHECK-LABEL: @cmp_eq_0_add_xor_eval( -; CHECK-NEXT: [[Y:%.*]] = add i32 [[X:%.*]], 1 -; CHECK-NEXT: [[Z:%.*]] = xor i32 [[Y]], [[X]] -; CHECK-NEXT: [[B:%.*]] = and i32 [[Z]], 1 -; CHECK-NEXT: ret i32 [[B]] +; CHECK-NEXT: ret i32 1 ; %C1 = or i32 %C, 17 %y = add i32 %x, %C1 @@ -50,10 +47,7 @@ define i32 @cmp_ne_0_sub_xor_eval(i32 %x, i32 %C) { ; CHECK-LABEL: @cmp_ne_0_sub_xor_eval( -; CHECK-NEXT: [[Y:%.*]] = add i32 [[X:%.*]], 1 -; CHECK-NEXT: [[Z:%.*]] = xor i32 [[Y]], [[X]] -; CHECK-NEXT: [[B:%.*]] = and i32 [[Z]], 1 -; CHECK-NEXT: ret i32 [[B]] +; CHECK-NEXT: ret i32 1 ; %C1 = or i32 %C, 13 %y = sub i32 %x, %C1 @@ -75,10 +69,7 @@ define i32 @cmp_sgt_0_add_or_eval(i32 %x, i32 %C) { ; CHECK-LABEL: @cmp_sgt_0_add_or_eval( -; CHECK-NEXT: [[Y:%.*]] = add i32 [[X:%.*]], 1 -; CHECK-NEXT: [[Z:%.*]] = or i32 [[Y]], [[X]] -; CHECK-NEXT: [[B:%.*]] = and i32 [[Z]], 1 -; CHECK-NEXT: ret i32 [[B]] +; CHECK-NEXT: ret i32 1 ; %C1 = or i32 %C, 9 %y = add i32 %x, %C1 @@ -89,10 +80,7 @@ define i32 @cmp_ne_0_sub_or_eval(i32 %x, i32 %C) { ; CHECK-LABEL: @cmp_ne_0_sub_or_eval( -; CHECK-NEXT: [[Y:%.*]] = add i32 [[X:%.*]], 1 -; CHECK-NEXT: [[Z:%.*]] = or i32 [[Y]], [[X]] -; CHECK-NEXT: [[B:%.*]] = and i32 [[Z]], 1 -; CHECK-NEXT: ret i32 [[B]] +; CHECK-NEXT: ret i32 1 ; %C1 = or i32 %C, 5 %y = sub i32 %x, %C1 diff --git a/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll b/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll --- a/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll +++ b/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll @@ -1248,11 +1248,7 @@ define i1 @blsmsk_eq_no_proof3(i32 %x) { ; CHECK-LABEL: @blsmsk_eq_no_proof3( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 10 -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X1]], -3 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X1]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = icmp eq i32 [[X3]], 8 -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 false ; %x1 = or i32 %x, 10 %x2 = sub i32 %x1, 3