diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -98,6 +98,13 @@ /// \p KnownOne the set of bits that are known to be one void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, KnownBits &Known); +/// Using KnownBits LHS/RHS produce the known bits for logic op (and/xor/or). +KnownBits analyzeKnownBitsFromAndXorOr( + const Operator *I, const KnownBits &KnownLHS, const KnownBits &KnownRHS, + unsigned Depth, const DataLayout &DL, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, + OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true); + /// Return true if LHS and RHS have no common bits set. bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC = nullptr, diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1064,6 +1064,93 @@ Known.setAllZero(); } +static KnownBits getKnownBitsFromAndXorOr(const Operator *I, + const APInt &DemandedElts, + const KnownBits &KnownLHS, + const KnownBits &KnownRHS, + unsigned Depth, const Query &Q) { + unsigned BitWidth = KnownLHS.getBitWidth(); + KnownBits KnownOut(BitWidth); + bool IsAnd = false; + Value *X = nullptr, *Y = nullptr; + + // We don't want to put this before the Known.One.isZero check. + switch (I->getOpcode()) { + case Instruction::And: + KnownOut = KnownLHS & KnownRHS; + IsAnd = true; + break; + case Instruction::Or: + KnownOut = KnownLHS | KnownRHS; + break; + case Instruction::Xor: + KnownOut = KnownLHS ^ KnownRHS; + break; + default: + llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'"); + } + + // and(x, -x)/xor(x, x-1) are common idioms that will clearing all but + // lowest set bit. If we have a single known bit in x, we can clear all bits + // above it. + // TODO: instcombine often reassociates independent `and` which can hide + // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x). + const APInt *C; + if ((!KnownLHS.One.isZero() || !KnownRHS.One.isZero()) && + (IsAnd ? match(I, m_c_And(m_Value(X), m_Neg(m_Deferred(X)))) + : match(I, m_c_Xor(m_Value(X), + m_c_Add(m_Deferred(X), m_APInt(C)))))) { + if (IsAnd) { + if (KnownLHS.countMaxTrailingZeros() <= KnownRHS.countMaxTrailingZeros()) + KnownOut = KnownLHS.blsi(); + else + KnownOut = KnownRHS.blsi(); + } + // TODO: Extend C to values where (X-C) == (X-1) for the purpose of the + // pattern (based on demanded bits). + else if (C->isAllOnes()) { + const KnownBits &XBits = I->getOperand(0) == X ? KnownLHS : KnownRHS; + KnownOut = XBits.blsmsk(); + } + } + + // and(x, add (x, -1)) is a common idiom that always clears the low bit; + // xor/or(x, add (x, -1)) is an idiom that will always set the low bit. + // here we handle the more general case of adding any odd number by + // matching the form and/xor/or(x, add(x, y)) where y is odd. + // TODO: This could be generalized to clearing any bit set in y where the + // following bit is known to be unset in y. + if (!KnownOut.Zero[0] && !KnownOut.One[0] && + (match(I, m_c_BinOp(m_Value(X), m_c_Add(m_Deferred(X), m_Value(Y)))) || + match(I, m_c_BinOp(m_Value(X), m_Sub(m_Deferred(X), m_Value(Y)))) || + match(I, m_c_BinOp(m_Value(X), m_Sub(m_Value(Y), m_Deferred(X)))))) { + KnownBits KnownY(BitWidth); + computeKnownBits(Y, DemandedElts, KnownY, Depth + 1, Q); + if (KnownY.countMinTrailingOnes() > 0) { + if (IsAnd) + KnownOut.Zero.setBit(0); + else + KnownOut.One.setBit(0); + } + } + return KnownOut; +} + +// Public so this can be used in `SimplifyDemandedUseBits`. +KnownBits llvm::analyzeKnownBitsFromAndXorOr( + const Operator *I, const KnownBits &KnownLHS, const KnownBits &KnownRHS, + unsigned Depth, const DataLayout &DL, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT, + OptimizationRemarkEmitter *ORE, bool UseInstrInfo) { + auto *FVTy = dyn_cast(I->getType()); + APInt DemandedElts = + FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); + + return getKnownBitsFromAndXorOr( + I, DemandedElts, KnownLHS, KnownRHS, Depth, + Query(DL, AC, safeCxtI(I, CxtI), DT, UseInstrInfo, ORE)); +} + static void computeKnownBitsFromOperator(const Operator *I, const APInt &DemandedElts, KnownBits &Known, unsigned Depth, @@ -1078,67 +1165,24 @@ Q.IIQ.getMetadata(cast(I), LLVMContext::MD_range)) computeKnownBitsFromRangeMetadata(*MD, Known); break; - case Instruction::And: { - // If either the LHS or the RHS are Zero, the result is zero. + case Instruction::And: computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); - Value *X = nullptr, *Y = nullptr; - // and(x, -x) is a common idiom for clearing all but lowest set bit. If we - // have a single known bit in x, we can clear all bits above it. - // TODO: instcombine often reassociates independent `and` which can hide - // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x). - if (!Known.One.isZero() || !Known2.One.isZero()) { - if (match(I, m_c_BinOp(m_Value(X), m_Neg(m_Deferred(X))))) { - if (Known.countMaxTrailingZeros() <= Known2.countMaxTrailingZeros()) - Known = Known.blsi(); - else - Known = Known2.blsi(); - } - } - Known &= Known2; - - // and(x, add (x, -1)) is a common idiom that always clears the low bit; - // here we handle the more general case of adding any odd number by - // matching the form add(x, add(x, y)) where y is odd. - // TODO: This could be generalized to clearing any bit set in y where the - // following bit is known to be unset in y. - if (!Known.Zero[0] && !Known.One[0] && - match(I, m_c_BinOp(m_Value(X), m_Add(m_Deferred(X), m_Value(Y))))) { - Known2.resetAll(); - computeKnownBits(Y, DemandedElts, Known2, Depth + 1, Q); - if (Known2.countMinTrailingOnes() > 0) - Known.Zero.setBit(0); - } + Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); break; - } case Instruction::Or: computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); - Known |= Known2; + Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); break; - case Instruction::Xor: { + case Instruction::Xor: computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); - Value *X = nullptr; - const APInt *C; - // xor(x, x + -1) is a common idiom that will clear all bits above - // the lowest set bit. We can safely say any bit past the lowest - // known one must be zero. - // TODO: `x + -1` is often shrunk `x + C` which `C` is minimum bits needed - // for demanded. This can cause us to miss this pattern. Expand to account - // for `x + -1` in the context of demanded bits. - if ((!Known.One.isZero() || !Known2.One.isZero()) && - match(I, m_c_BinOp(m_Value(X), m_c_Add(m_Deferred(X), m_APInt(C)))) && - C->isAllOnes()) { - const KnownBits &XBits = I->getOperand(0) == X ? Known2 : Known; - Known = XBits.blsmsk(); - } else { - Known ^= Known2; - } - } break; + Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q); + break; case Instruction::Mul: { bool NSW = Q.IIQ.hasNoSignedWrap(cast(I)); computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, DemandedElts, diff --git a/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll b/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll --- a/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll +++ b/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll @@ -36,10 +36,7 @@ define i32 @cmp_eq_0_add_xor_eval(i32 %x, i32 %C) { ; CHECK-LABEL: @cmp_eq_0_add_xor_eval( -; CHECK-NEXT: [[Y:%.*]] = add i32 [[X:%.*]], 1 -; CHECK-NEXT: [[Z:%.*]] = xor i32 [[Y]], [[X]] -; CHECK-NEXT: [[B:%.*]] = and i32 [[Z]], 1 -; CHECK-NEXT: ret i32 [[B]] +; CHECK-NEXT: ret i32 1 ; %C1 = or i32 %C, 17 %y = add i32 %x, %C1 @@ -50,10 +47,7 @@ define i32 @cmp_ne_0_sub_xor_eval(i32 %x, i32 %C) { ; CHECK-LABEL: @cmp_ne_0_sub_xor_eval( -; CHECK-NEXT: [[Y:%.*]] = add i32 [[X:%.*]], 1 -; CHECK-NEXT: [[Z:%.*]] = xor i32 [[Y]], [[X]] -; CHECK-NEXT: [[B:%.*]] = and i32 [[Z]], 1 -; CHECK-NEXT: ret i32 [[B]] +; CHECK-NEXT: ret i32 1 ; %C1 = or i32 %C, 13 %y = sub i32 %x, %C1 @@ -75,10 +69,7 @@ define i32 @cmp_sgt_0_add_or_eval(i32 %x, i32 %C) { ; CHECK-LABEL: @cmp_sgt_0_add_or_eval( -; CHECK-NEXT: [[Y:%.*]] = add i32 [[X:%.*]], 1 -; CHECK-NEXT: [[Z:%.*]] = or i32 [[Y]], [[X]] -; CHECK-NEXT: [[B:%.*]] = and i32 [[Z]], 1 -; CHECK-NEXT: ret i32 [[B]] +; CHECK-NEXT: ret i32 1 ; %C1 = or i32 %C, 9 %y = add i32 %x, %C1 @@ -89,10 +80,7 @@ define i32 @cmp_ne_0_sub_or_eval(i32 %x, i32 %C) { ; CHECK-LABEL: @cmp_ne_0_sub_or_eval( -; CHECK-NEXT: [[Y:%.*]] = add i32 [[X:%.*]], 1 -; CHECK-NEXT: [[Z:%.*]] = or i32 [[Y]], [[X]] -; CHECK-NEXT: [[B:%.*]] = and i32 [[Z]], 1 -; CHECK-NEXT: ret i32 [[B]] +; CHECK-NEXT: ret i32 1 ; %C1 = or i32 %C, 5 %y = sub i32 %x, %C1 diff --git a/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll b/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll --- a/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll +++ b/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll @@ -1248,11 +1248,7 @@ define i1 @blsmsk_eq_no_proof3(i32 %x) { ; CHECK-LABEL: @blsmsk_eq_no_proof3( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 10 -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X1]], -3 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X1]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = icmp eq i32 [[X3]], 8 -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 false ; %x1 = or i32 %x, 10 %x2 = sub i32 %x1, 3