diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2036,6 +2036,63 @@ assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); } +/// Try to detect a recurrence that the value of the induction variable is +/// always a power of two (or zero). +static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero, + unsigned Depth, Query &Q) { + BinaryOperator *BO = nullptr; + Value *Start = nullptr, *Step = nullptr; + if (!matchSimpleRecurrence(PN, BO, Start, Step)) + return false; + + // Initial value must be a power of two. + for (const Use &U : PN->operands()) { + if (U.get() == Start) { + // Initial value comes from a different BB, need to adjust context + // instruction for analysis. + Q.CxtI = PN->getIncomingBlock(U)->getTerminator(); + if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q)) + return false; + } + } + + // Except for Mul, the induction variable must be on the left side of the + // increment expression, otherwise its value can be arbitrary. + if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step) + return false; + + Q.CxtI = BO->getParent()->getTerminator(); + switch (BO->getOpcode()) { + case Instruction::Mul: + // Power of two is closed under multiplication. + return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || + Q.IIQ.hasNoSignedWrap(BO)) && + isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q); + case Instruction::SDiv: + // Start value must not be signmask for signed division, so simply being a + // power of two is not sufficient, and it has to be a constant. + if (!match(Start, m_Power2()) || match(Start, m_SignMask())) + return false; + LLVM_FALLTHROUGH; + case Instruction::UDiv: + // Divisor must be a power of two. + // If OrZero is false, cannot guarantee induction variable is non-zero after + // division, same for Shr, unless it is exact division. + return (OrZero || Q.IIQ.isExact(BO)) && + isKnownToBeAPowerOfTwo(Step, false, Depth, Q); + case Instruction::Shl: + return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO); + case Instruction::AShr: + if (!match(Start, m_Power2()) || match(Start, m_SignMask())) + return false; + LLVM_FALLTHROUGH; + case Instruction::LShr: + return OrZero || Q.IIQ.isExact(BO); + default: + return false; + } +} + /// Return true if the given value is known to have exactly one /// bit set when defined. For vectors return true if every element is known to /// be a power of two when defined. Supports values with integer or pointer @@ -2127,10 +2184,15 @@ } } - // A PHI node is power of two if all incoming values are power of two. + // A PHI node is power of two if all incoming values are power of two, or if + // it is an induction variable where in each step its value is a power of two. if (const PHINode *PN = dyn_cast(V)) { Query RecQ = Q; + // Check if it is an induction variable and always power of two. + if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ)) + return true; + // Recursively check all incoming values. Limit recursion to 2 levels, so // that search complexity is limited to number of operands^2. unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1); diff --git a/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll b/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll --- a/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll +++ b/llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll @@ -119,7 +119,8 @@ ; CHECK: for.body: ; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[I:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[PHI]], -1 +; CHECK-NEXT: [[UREM:%.*]] = and i64 [[TMP0]], [[SIZE:%.*]] ; CHECK-NEXT: [[ADD]] = add nuw i64 [[SUM]], [[UREM]] ; CHECK-NEXT: [[I]] = shl nuw i64 [[PHI]], 2 ; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 25000000 @@ -189,7 +190,8 @@ ; CHECK: for.body: ; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[I:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[PHI]], -1 +; CHECK-NEXT: [[UREM:%.*]] = and i64 [[TMP0]], [[SIZE:%.*]] ; CHECK-NEXT: [[ADD]] = add nuw i64 [[SUM]], [[UREM]] ; CHECK-NEXT: [[I]] = shl nuw i64 [[PHI]], 1 ; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 50000000 @@ -223,7 +225,8 @@ ; CHECK: for.body: ; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[I:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[PHI]], -1 +; CHECK-NEXT: [[UREM:%.*]] = and i64 [[TMP0]], [[SIZE:%.*]] ; CHECK-NEXT: [[ADD]] = add nuw i64 [[SUM]], [[UREM]] ; CHECK-NEXT: [[I]] = lshr i64 [[PHI]], 1 ; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp ult i64 [[PHI]], 2 @@ -257,8 +260,9 @@ ; CHECK: for.body: ; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 4096, [[ENTRY:%.*]] ], [ [[I:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]] -; CHECK-NEXT: [[ADD]] = add nuw nsw i64 [[SUM]], [[UREM]] +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[PHI]], -1 +; CHECK-NEXT: [[UREM:%.*]] = and i64 [[TMP0]], [[SIZE:%.*]] +; CHECK-NEXT: [[ADD]] = add nsw i64 [[SUM]], [[UREM]] ; CHECK-NEXT: [[I]] = lshr i64 [[PHI]], [[A:%.*]] ; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp eq i64 [[I]], 0 ; CHECK-NEXT: br i1 [[ICMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]