Index: llvm/trunk/include/llvm/ADT/APInt.h =================================================================== --- llvm/trunk/include/llvm/ADT/APInt.h +++ llvm/trunk/include/llvm/ADT/APInt.h @@ -869,7 +869,14 @@ /// \brief Logical right-shift function. /// /// Logical right-shift this APInt by shiftAmt. - APInt lshr(unsigned shiftAmt) const; + APInt lshr(unsigned shiftAmt) const { + APInt R(*this); + R.lshrInPlace(shiftAmt); + return R; + } + + /// Logical right-shift this APInt by shiftAmt in place. + void lshrInPlace(unsigned shiftAmt); /// \brief Left-shift function. /// @@ -1949,7 +1956,7 @@ return A.ugt(B) ? A : B; } -/// \brief Compute GCD of two APInt values. +/// \brief Compute GCD of two unsigned APInt values. /// /// This function returns the greatest common divisor of the two APInt values /// using Euclid's algorithm. Index: llvm/trunk/lib/Support/APInt.cpp =================================================================== --- llvm/trunk/lib/Support/APInt.cpp +++ llvm/trunk/lib/Support/APInt.cpp @@ -733,18 +733,6 @@ return Count; } -/// Perform a logical right-shift from Src to Dst, which must be equal or -/// non-overlapping, of Words words, by Shift, which must be less than 64. -static void lshrNear(uint64_t *Dst, uint64_t *Src, unsigned Words, - unsigned Shift) { - uint64_t Carry = 0; - for (int I = Words - 1; I >= 0; --I) { - uint64_t Tmp = Src[I]; - Dst[I] = (Tmp >> Shift) | Carry; - Carry = Tmp << (64 - Shift); - } -} - APInt APInt::byteSwap() const { assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!"); if (BitWidth == 16) @@ -765,8 +753,7 @@ for (unsigned I = 0, N = getNumWords(); I != N; ++I) Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]); if (Result.BitWidth != BitWidth) { - lshrNear(Result.pVal, Result.pVal, getNumWords(), - Result.BitWidth - BitWidth); + Result.lshrInPlace(Result.BitWidth - BitWidth); Result.BitWidth = BitWidth; } return Result; @@ -803,11 +790,45 @@ } APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) { - while (!!B) { - APInt R = A.urem(B); - A = std::move(B); - B = std::move(R); + // Fast-path a common case. + if (A == B) return A; + + // Corner cases: if either operand is zero, the other is the gcd. + if (!A) return B; + if (!B) return A; + + // Count common powers of 2 and remove all other powers of 2. + unsigned Pow2; + { + unsigned Pow2_A = A.countTrailingZeros(); + unsigned Pow2_B = B.countTrailingZeros(); + if (Pow2_A > Pow2_B) { + A.lshrInPlace(Pow2_A - Pow2_B); + Pow2 = Pow2_B; + } else if (Pow2_B > Pow2_A) { + B.lshrInPlace(Pow2_B - Pow2_A); + Pow2 = Pow2_A; + } else { + Pow2 = Pow2_A; + } } + + // Both operands are odd multiples of 2^Pow_2: + // + // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b)) + // + // This is a modified version of Stein's algorithm, taking advantage of + // efficient countTrailingZeros(). + while (A != B) { + if (A.ugt(B)) { + A -= B; + A.lshrInPlace(A.countTrailingZeros() - Pow2); + } else { + B -= A; + B.lshrInPlace(B.countTrailingZeros() - Pow2); + } + } + return A; } @@ -1119,68 +1140,59 @@ return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth)); } -/// Logical right-shift this APInt by shiftAmt. -/// @brief Logical right-shift function. -APInt APInt::lshr(unsigned shiftAmt) const { - if (isSingleWord()) { - if (shiftAmt >= BitWidth) - return APInt(BitWidth, 0); - else - return APInt(BitWidth, this->VAL >> shiftAmt); - } - - // If all the bits were shifted out, the result is 0. This avoids issues - // with shifting by the size of the integer type, which produces undefined - // results. We define these "undefined results" to always be 0. - if (shiftAmt >= BitWidth) - return APInt(BitWidth, 0); - - // If none of the bits are shifted out, the result is *this. This avoids - // issues with shifting by the size of the integer type, which produces - // undefined results in the code below. This is also an optimization. - if (shiftAmt == 0) - return *this; +/// Perform a logical right-shift from Src to Dst of Words words, by Shift, +/// which must be less than 64. If the source and destination ranges overlap, +/// we require that Src >= Dst (put another way, we require that the overall +/// operation is a right shift on the combined range). +static void lshrWords(APInt::WordType *Dst, APInt::WordType *Src, + unsigned Words, unsigned Shift) { + assert(Shift < APInt::APINT_BITS_PER_WORD); - // Create some space for the result. - uint64_t * val = new uint64_t[getNumWords()]; + if (!Words) + return; - // If we are shifting less than a word, compute the shift with a simple carry - if (shiftAmt < APINT_BITS_PER_WORD) { - lshrNear(val, pVal, getNumWords(), shiftAmt); - APInt Result(val, BitWidth); - Result.clearUnusedBits(); - return Result; + if (Shift == 0) { + std::memmove(Dst, Src, Words * APInt::APINT_WORD_SIZE); + return; } - // Compute some values needed by the remaining shift algorithms - unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; - unsigned offset = shiftAmt / APINT_BITS_PER_WORD; + uint64_t Low = Src[0]; + for (unsigned I = 1; I != Words; ++I) { + uint64_t High = Src[I]; + Dst[I - 1] = + (Low >> Shift) | (High << (APInt::APINT_BITS_PER_WORD - Shift)); + Low = High; + } + Dst[Words - 1] = Low >> Shift; +} - // If we are shifting whole words, just move whole words - if (wordShift == 0) { - for (unsigned i = 0; i < getNumWords() - offset; ++i) - val[i] = pVal[i+offset]; - for (unsigned i = getNumWords()-offset; i < getNumWords(); i++) - val[i] = 0; - APInt Result(val, BitWidth); - Result.clearUnusedBits(); - return Result; +/// Logical right-shift this APInt by shiftAmt. +/// @brief Logical right-shift function. +void APInt::lshrInPlace(unsigned shiftAmt) { + if (isSingleWord()) { + if (shiftAmt >= BitWidth) + VAL = 0; + else + VAL >>= shiftAmt; + return; } - // Shift the low order words - unsigned breakWord = getNumWords() - offset -1; - for (unsigned i = 0; i < breakWord; ++i) - val[i] = (pVal[i+offset] >> wordShift) | - (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift)); - // Shift the break word. - val[breakWord] = pVal[breakWord+offset] >> wordShift; + // Don't bother performing a no-op shift. + if (!shiftAmt) + return; - // Remaining words are 0 - for (unsigned i = breakWord+1; i < getNumWords(); ++i) - val[i] = 0; - APInt Result(val, BitWidth); - Result.clearUnusedBits(); - return Result; + // Find number of complete words being shifted out and zeroed. + const unsigned Words = getNumWords(); + const unsigned ShiftFullWords = + std::min(shiftAmt / APINT_BITS_PER_WORD, Words); + + // Fill in first Words - ShiftFullWords by shifting. + lshrWords(pVal, pVal + ShiftFullWords, Words - ShiftFullWords, + shiftAmt - (ShiftFullWords * APINT_BITS_PER_WORD)); + + // The remaining high words are all zero. + for (unsigned I = Words - ShiftFullWords; I != Words; ++I) + pVal[I] = 0; } /// Left-shift this APInt by shiftAmt. Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1178,6 +1178,373 @@ Constant *C = Builder->getInt(CI->getValue()-1); return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); } +#if 0 +/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS +/// and CmpRHS are both known to be integer constants. +Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS) { + ConstantInt *CmpRHS = cast(ICI.getOperand(1)); + const APInt &CmpRHSV = CmpRHS->getValue(); + + // FIXME: If the operand types don't match the type of the divide + // then don't attempt this transform. The code below doesn't have the + // logic to deal with a signed divide and an unsigned compare (and + // vice versa). This is because (x /s C1) getOpcode() == Instruction::SDiv; + if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) + return nullptr; + if (DivRHS->isZero()) + return nullptr; // The ProdOV computation fails on divide by zero. + if (DivIsSigned && DivRHS->isAllOnesValue()) + return nullptr; // The overflow computation also screws up here + if (DivRHS->isOne()) { + // This eliminates some funny cases with INT_MIN. + ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X. + return &ICI; + } + + // Compute Prod = CI * DivRHS. We are essentially solving an equation + // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and + // C2 (CI). By solving for X we can turn this into a range check + // instead of computing a divide. + Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); + + // Determine if the product overflows by seeing if the product is + // not equal to the divide. Make sure we do the same kind of divide + // as in the LHS instruction that we're folding. + bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : + ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; + + // Get the ICmp opcode + ICmpInst::Predicate Pred = ICI.getPredicate(); + + /// If the division is known to be exact, then there is no remainder from the + /// divide, so the covered range size is unit, otherwise it is the divisor. + ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS; + + // Figure out the interval that is being checked. For example, a comparison + // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). + // Compute this interval based on the constants involved and the signedness of + // the compare/divide. This computes a half-open interval, keeping track of + // whether either value in the interval overflows. After analysis each + // overflow variable is set to 0 if it's corresponding bound variable is valid + // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. + int LoOverflow = 0, HiOverflow = 0; + Constant *LoBound = nullptr, *HiBound = nullptr; + + if (!DivIsSigned) { // udiv + // e.g. X/5 op 3 --> [15, 20) + LoBound = Prod; + HiOverflow = LoOverflow = ProdOV; + if (!HiOverflow) { + // If this is not an exact divide, then many values in the range collapse + // to the same result value. + HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false); + } + } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. + if (CmpRHSV == 0) { // (X / pos) op 0 + // Can't overflow. e.g. X/2 op 0 --> [-1, 2) + LoBound = ConstantExpr::getNeg(SubOne(RangeSize)); + HiBound = RangeSize; + } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos + LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) + HiOverflow = LoOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true); + } else { // (X / pos) op neg + // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) + HiBound = AddOne(Prod); + LoOverflow = HiOverflow = ProdOV ? -1 : 0; + if (!LoOverflow) { + ConstantInt *DivNeg =cast(ConstantExpr::getNeg(RangeSize)); + LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; + } + } + } else if (DivRHS->isNegative()) { // Divisor is < 0. + if (DivI->isExact()) + RangeSize = cast(ConstantExpr::getNeg(RangeSize)); + if (CmpRHSV == 0) { // (X / neg) op 0 + // e.g. X/-5 op 0 --> [-4, 5) + LoBound = AddOne(RangeSize); + HiBound = cast(ConstantExpr::getNeg(RangeSize)); + if (HiBound == DivRHS) { // -INTMIN = INTMIN + HiOverflow = 1; // [INTMIN+1, overflow) + HiBound = nullptr; // e.g. X/INTMIN = 0 --> X > INTMIN + } + } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos + // e.g. X/-5 op 3 --> [-19, -14) + HiBound = AddOne(Prod); + HiOverflow = LoOverflow = ProdOV ? -1 : 0; + if (!LoOverflow) + LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0; + } else { // (X / neg) op neg + LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20) + LoOverflow = HiOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true); + } + + // Dividing by a negative swaps the condition. LT <-> GT + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + Value *X = DivI->getOperand(0); + switch (Pred) { + default: llvm_unreachable("Unhandled icmp opcode!"); + case ICmpInst::ICMP_EQ: + if (LoOverflow && HiOverflow) + return ReplaceInstUsesWith(ICI, Builder->getFalse()); + if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + ICmpInst::ICMP_UGE, X, LoBound); + if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, X, HiBound); + return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound, + DivIsSigned, true)); + case ICmpInst::ICMP_NE: + if (LoOverflow && HiOverflow) + return ReplaceInstUsesWith(ICI, Builder->getTrue()); + if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, X, LoBound); + if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + ICmpInst::ICMP_UGE, X, HiBound); + return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound, + DivIsSigned, false)); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + if (LoOverflow == +1) // Low bound is greater than input range. + return ReplaceInstUsesWith(ICI, Builder->getTrue()); + if (LoOverflow == -1) // Low bound is less than input range. + return ReplaceInstUsesWith(ICI, Builder->getFalse()); + return new ICmpInst(Pred, X, LoBound); + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + if (HiOverflow == +1) // High bound greater than input range. + return ReplaceInstUsesWith(ICI, Builder->getFalse()); + if (HiOverflow == -1) // High bound less than input range. + return ReplaceInstUsesWith(ICI, Builder->getTrue()); + if (Pred == ICmpInst::ICMP_UGT) + return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); + return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); + } +} + +/// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)". +Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, + ConstantInt *ShAmt) { + const APInt &CmpRHSV = cast(ICI.getOperand(1))->getValue(); + + // Check that the shift amount is in range. If not, don't perform + // undefined shifts. When the shift is visited it will be + // simplified. + uint32_t TypeBits = CmpRHSV.getBitWidth(); + uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); + if (ShAmtVal >= TypeBits || ShAmtVal == 0) + return nullptr; + + if (!ICI.isEquality()) { + // If we have an unsigned comparison and an ashr, we can't simplify this. + // Similarly for signed comparisons with lshr. + if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr)) + return nullptr; + + // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv + // by a power of 2. Since we already have logic to simplify these, + // transform to div and then simplify the resultant comparison. + if (Shr->getOpcode() == Instruction::AShr && + (!Shr->isExact() || ShAmtVal == TypeBits - 1)) + return nullptr; + + // Revisit the shift (to delete it). + Worklist.Add(Shr); + + Constant *DivCst = + ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal)); + + Value *Tmp = + Shr->getOpcode() == Instruction::AShr ? + Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) : + Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()); + + ICI.setOperand(0, Tmp); + + // If the builder folded the binop, just return it. + BinaryOperator *TheDiv = dyn_cast(Tmp); + if (!TheDiv) + return &ICI; + + // Otherwise, fold this div/compare. + assert(TheDiv->getOpcode() == Instruction::SDiv || + TheDiv->getOpcode() == Instruction::UDiv); + + Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast(DivCst)); + assert(Res && "This div/cst should have folded!"); + return Res; + } + + // If we are comparing against bits always shifted out, the + // comparison cannot succeed. + APInt Comp = CmpRHSV << ShAmtVal; + ConstantInt *ShiftedCmpRHS = Builder->getInt(Comp); + if (Shr->getOpcode() == Instruction::LShr) + Comp = Comp.lshr(ShAmtVal); + else + Comp = Comp.ashr(ShAmtVal); + + if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero. + bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; + Constant *Cst = Builder->getInt1(IsICMP_NE); + return ReplaceInstUsesWith(ICI, Cst); + } + + // Otherwise, check to see if the bits shifted out are known to be zero. + // If so, we can compare against the unshifted value: + // (X & 4) >> 1 == 2 --> (X & 4) == 4. + if (Shr->hasOneUse() && Shr->isExact()) + return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS); + + if (Shr->hasOneUse()) { + // Otherwise strength reduce the shift into an and. + APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); + Constant *Mask = Builder->getInt(Val); + + Value *And = Builder->CreateAnd(Shr->getOperand(0), + Mask, Shr->getName()+".mask"); + return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS); + } + return nullptr; +} +#endif +namespace { +/// Models a check that LHS is divisible by Factor. +class DivisibilityCheck { + // Signedness of the check. A bitwise and is a divisibility check, + // if its mask is (equivalent to) a power of 2 mask. + enum { DC_Null, DC_SRem, DC_URem, DC_And } Kind; + Value *Check; + Value *LHS; + ConstantInt *Factor; + +public: + DivisibilityCheck() : Kind(DC_Null) {} + + /// Try to extract a divisibility check from V, on the assumption + /// that it is being compared to 0. + bool match(Value *V) { + Kind = DC_Null; + Check = V; + if (::match(V, m_SRem(m_Value(LHS), m_ConstantInt(Factor)))) + Kind = DC_SRem; + else if (::match(V, m_URem(m_Value(LHS), m_ConstantInt(Factor)))) + Kind = DC_URem; + else if (::match(V, m_And(m_Value(LHS), m_ConstantInt(Factor)))) + Kind = DC_And; + return Kind != DC_Null; + } + + /// Merge another divisibility check into this one. + bool merge(const DivisibilityCheck &O) { + assert(Kind != DC_Null && O.Kind != DC_Null); + if (LHS != O.LHS) + // We don't have two divisibility checks on the same operand. + return false; + + if (!(Check->hasOneUse() && Kind != DC_And) && + !(O.Check->hasOneUse() && O.Kind != DC_And)) + // We would not remove a division: bail out. + return false; + + // Determine the factors we're checking for. + bool Failed = false; + APInt LHS = getFactor(O, Failed); + APInt RHS = O.getFactor(*this, Failed); + if (Failed) + return false; + + // If we don't have a single signedness, we can fold the checks + // together if one of them is for a power of 2, because + // divisibility by a power of 2 is the same for srem and urem. + if (Kind != O.Kind && O.Kind != DC_And && LHS.isPowerOf2()) + Kind = O.Kind; + if (Kind != O.Kind && !RHS.isPowerOf2()) + return false; + assert(Kind == DC_SRem || Kind == DC_URem && "bad kind after merging"); + bool Signed = Kind == DC_SRem; + + // Fold them together. + APInt GCD = APIntOps::GreatestCommonDivisor(LHS, RHS); + APInt LCM = LHS.udiv(GCD); + bool Overflow = false; + // Use a negative signed multiplication: producing INT_MIN should not + // be considered an overflow here. + LCM = Signed ? LCM.smul_ov(-RHS, Overflow) : LCM.umul_ov(RHS, Overflow); + // On overflow, there cannot exist a non-zero value that is divisible by + // both factors at once. + if (Overflow) LCM = 0; + Factor = cast(ConstantInt::get(Factor->getType(), LCM)); + return true; + } + + Value *create(InstCombiner::BuilderTy *Builder) { + // LHS is divisible by zero iff LHS is zero. + if (!Factor->getValue()) + return LHS; + // Checking for divisibility by power of 2 doesn't need a division. + if (Factor->getValue().isPowerOf2()) + return Builder->CreateAnd( + LHS, ConstantInt::get(Factor->getType(), Factor->getValue() - 1)); + return Kind == DC_SRem ? Builder->CreateSRem(LHS, Factor) + : Builder->CreateURem(LHS, Factor); + } + +private: + /// Get the unsigned multiplicative factor we're checking for. + APInt getFactor(const DivisibilityCheck &O, bool &Failed) const { + switch (Kind) { + case DC_Null: + llvm_unreachable("unexpected Kind"); + + case DC_SRem: + if (Factor->getValue().isNegative()) + return -Factor->getValue(); + // Fall through. + case DC_URem: + return Factor->getValue(); + + case DC_And: + assert(O.Kind != DC_And && "bad kind pair"); + // If we're also checking for divisibility by K * 2^N, + // the low N bits of the mask are irrelevant. + APInt Result = + Factor->getValue() | + APInt::getLowBitsSet(Factor->getValue().getBitWidth(), + O.getFactor(*this, Failed).countTrailingZeros()); + ++Result; + if (!!Result && !Result.isPowerOf2()) + Failed = true; + return Result; + } + } +}; + +struct DivisibilityCheck_match { + DivisibilityCheck &Check; + DivisibilityCheck_match(DivisibilityCheck &Check) : Check(Check) {} + bool match(Value *V) { return Check.match(V); } +}; + +/// Matcher for divisibility checks. +DivisibilityCheck_match m_DivisibilityCheck(DivisibilityCheck &Check) { + return DivisibilityCheck_match(Check); +} +} /// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" -> /// (icmp eq/ne A, Log2(AP2/AP1)) -> @@ -1806,6 +2173,42 @@ if (!Cmp.isEquality() || *C != 0 || !Or->hasOneUse()) return nullptr; + DivisibilityCheck DivL, DivR; + if (match(Or, m_Or(m_DivisibilityCheck(DivL), m_DivisibilityCheck(DivR))) && + DivL.merge(DivR)) { + // Simplify icmp eq (or (srem P, M), (srem P, N)), 0 + // -> icmp eq (srem P, lcm(M, N)), 0 + return new ICmpInst(Pred, DivL.create(Builder), + ConstantInt::getNullValue(Or->getType())); + } + +#if 0 + // icmp eq (or X, Y), 0 + // -> and (icmp eq X, 0), (icmp eq Y, 0) + // but only if this allows either subexpression to simplify further. + Instruction *ICmpX = nullptr, *ICmpY = nullptr; + if (auto *X = dyn_cast(LHSI->getOperand(0))) + ICmpX = visitICmpInstWithInstAndIntCst(ICI, X, RHS); + if (auto *Y = dyn_cast(LHSI->getOperand(1))) + ICmpY = visitICmpInstWithInstAndIntCst(ICI, Y, RHS); + if (ICmpX || ICmpX) { + Value *NewX, *NewY; + if (ICmpX) { + Worklist.Add(ICmpX); + NewX = Builder->Insert(ICmpX); + } else + NewX = Builder->CreateICmp(ICI.getPredicate(), LHSI->getOperand(0), + RHS); + if (ICmpY) { + Worklist.Add(ICmpY); + NewY = Builder->Insert(ICmpY); + } else + NewY = Builder->CreateICmp(ICI.getPredicate(), LHSI->getOperand(1), + RHS); + return BinaryOperator::CreateAnd(NewX, NewY); + } +#endif + Value *P, *Q; if (match(Or, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) { // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 Index: llvm/trunk/test/Transforms/InstCombine/divisibility.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/divisibility.ll +++ llvm/trunk/test/Transforms/InstCombine/divisibility.ll @@ -0,0 +1,297 @@ +; Test that multiple divisibility checks are merged. + +; RUN: opt < %s -instcombine -S | FileCheck %s + +define i1 @test1(i32 %A) { + %B = srem i32 %A, 2 + %C = srem i32 %A, 3 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + ret i1 %E +; CHECK-LABEL: @test1( +; CHECK-NEXT: srem i32 %A, 6 +; CHECK-NEXT: icmp eq i32 %{{.*}}, 0 +; CHECK-NEXT: ret i1 +} + +define i1 @test2(i32 %A) { + %B = urem i32 %A, 2 + %C = urem i32 %A, 3 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + ret i1 %E +; CHECK-LABEL: @test2( +; CHECK-NEXT: urem i32 %A, 6 +; CHECK-NEXT: icmp eq i32 %{{.*}}, 0 +; CHECK-NEXT: ret i1 +} + +define i1 @test3(i32 %A) { + %B = srem i32 %A, 2 + %C = urem i32 %A, 3 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + ret i1 %E +; CHECK-LABEL: @test3( +; CHECK-NEXT: urem i32 %A, 6 +; CHECK-NEXT: icmp eq i32 %{{.*}}, 0 +; CHECK-NEXT: ret i1 +} + +define i1 @test4(i32 %A) { + %B = urem i32 %A, 2 + %C = srem i32 %A, 3 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + ret i1 %E +; CHECK-LABEL: @test4( +; CHECK-NEXT: srem i32 %A, 6 +; CHECK-NEXT: icmp eq i32 %{{.*}}, 0 +; CHECK-NEXT: ret i1 +} + +define i1 @test5(i32 %A) { + %B = srem i32 %A, 8 + %C = srem i32 %A, 12 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + ret i1 %E +; CHECK-LABEL: @test5( +; CHECK-NEXT: srem i32 %A, 24 +; CHECK-NEXT: icmp eq i32 %{{.*}}, 0 +; CHECK-NEXT: ret i1 +} + +define i1 @test6(i32 %A) { + %B = and i32 %A, 6 + %C = srem i32 %A, 12 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + ret i1 %E +; CHECK-LABEL: @test6( +; CHECK-NEXT: srem i32 %A, 24 +; CHECK-NEXT: icmp eq i32 %{{.*}}, 0 +; CHECK-NEXT: ret i1 +} + +define i1 @test7(i32 %A) { + %B = and i32 %A, 8 + %C = srem i32 %A, 12 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + ret i1 %E +; CHECK-LABEL: @test7( +; CHECK-NEXT: and i32 %A, 8 +; CHECK-NEXT: srem i32 %A, 12 +; CHECK-NEXT: or +; CHECK-NEXT: icmp +; CHECK-NEXT: ret i1 +} + +define i1 @test8(i32 %A, i32 %B) { + %C = srem i32 %A, 2 + %D = srem i32 %B, 3 + %E = or i32 %C, %D + %F = icmp eq i32 %E, 0 + ret i1 %F +; CHECK-LABEL: @test8( +; CHECK-NEXT: srem i32 %B, 3 +; CHECK-NEXT: and i32 %A, 1 +; CHECK-NEXT: or +; CHECK-NEXT: icmp +; CHECK-NEXT: ret i1 +} + +define i1 @test9(i32 %A) { + %B = srem i32 %A, 7589 + %C = srem i32 %A, 395309 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + ret i1 %E +; CHECK-LABEL: @test9( +; CHECK-NEXT: icmp eq i32 %A, 0 +; CHECK-NEXT: ret i1 %E +} + +define i1 @test10(i32 %A) { + ; 7589 and 395309 are prime, and + ; 7589 * 395309 == 3000000001 == -1294967295 (2^32) + %B = urem i32 %A, 7589 + %C = urem i32 %A, 395309 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + ret i1 %E +; CHECK-LABEL: @test10( +; CHECK-NEXT: urem i32 %A, -1294967295 +; CHECK-NEXT: icmp eq i32 %{{.*}}, 0 +; CHECK-NEXT: ret i1 +} + +define i1 @test11(i32 %A) { + %B = urem i32 %A, 65535 + %C = urem i32 %A, 65537 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + ret i1 %E +; CHECK-LABEL: @test11( +; CHECK-NEXT: urem i32 %A, -1 +; CHECK-NEXT: icmp eq i32 %{{.*}}, 0 +; CHECK-NEXT: ret i1 +} + +define i1 @test12(i32 %A) { + %B = urem i32 %A, 65536 + %C = urem i32 %A, 65537 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + ret i1 %E +; CHECK-LABEL: @test12( +; CHECK-NEXT: icmp eq i32 %A, 0 +; CHECK-NEXT: ret i1 +} + +define i1 @test13(i32 %A) { + %B = srem i32 %A, 65536 + %C = urem i32 %A, 65535 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + ret i1 %E +; CHECK-LABEL: @test13( +; CHECK-NEXT: urem i32 %A, -65536 +; CHECK-NEXT: icmp eq i32 %{{.*}}, 0 +; CHECK-NEXT: ret i1 +} + +define i1 @test14(i32 %A) { + %B = srem i32 %A, 95 + %C = srem i32 %A, 22605091 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + ret i1 %E +; CHECK-LABEL: @test14( +; CHECK-NEXT: srem i32 %A, 2147483645 +; CHECK-NEXT: icmp eq i32 %{{.*}}, 0 +; CHECK-NEXT: ret i1 +} + +define i1 @test15(i32 %A) { + %B = srem i32 %A, 97 + %C = srem i32 %A, 22605091 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + ret i1 %E +; CHECK-LABEL: @test15( +; CHECK-NEXT: icmp eq i32 %A, 0 +; CHECK-NEXT: ret i1 +} + +define i32 @test16(i32 %A) { + %B = srem i32 %A, 3 + %C = srem i32 %A, 5 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + %F = zext i1 %E to i32 + %G = add i32 %B, %F + ret i32 %G +; CHECK-LABEL: @test16( +; CHECK-NEXT: %B = srem i32 %A, 3 +; CHECK-NEXT: %[[REM:.*]] = srem i32 %A, 15 +; CHECK-NEXT: %E = icmp eq i32 %[[REM]], 0 +; CHECK-NEXT: %F = zext i1 %E to i32 +; CHECK-NEXT: %G = add i32 %B, %F +; CHECK-NEXT: ret i32 %G +} + +define i32 @test17(i32 %A) { + %B = srem i32 %A, 3 + %C = srem i32 %A, 5 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + %F = zext i1 %E to i32 + %G = add i32 %B, %F + %H = add i32 %C, %G + ret i32 %H +; CHECK-LABEL: @test17( +; CHECK-NEXT: %B = srem i32 %A, 3 +; CHECK-NEXT: %C = srem i32 %A, 5 +; CHECK-NOT: srem +; CHECK: ret i32 +} + +define i32 @test18(i32 %A) { + %B = srem i32 %A, 3 + %C = and i32 %A, 7 + %D = or i32 %B, %C + %E = icmp eq i32 %D, 0 + %F = zext i1 %E to i32 + %G = add i32 %C, %F + ret i32 %G +; CHECK-LABEL: @test18( +; CHECK-NEXT: %C = and i32 %A, 7 +; CHECK-NEXT: %[[REM:.*]] = srem i32 %A, 24 +; CHECK-NEXT: %E = icmp eq i32 %[[REM]], 0 +; CHECK-NEXT: %F = zext i1 %E to i32 +; CHECK-NEXT: %G = add +; CHECK-NEXT: ret i32 %G +} + +define i1 @test19(i32 %A) { + %B = srem i32 %A, 6 + %C = srem i32 %A, 10 + %D = icmp eq i32 %B, 0 + %E = icmp eq i32 %C, 0 + %F = and i1 %D, %E + ret i1 %F +; CHECK-LABEL: @test19( +; CHECK-NEXT: %[[REM:.*]] = srem i32 %A, 30 +; CHECK-NEXT: icmp eq i32 %[[REM]], 0 +; CHECK-NEXT: ret i1 +} + +define i1 @test20(i32 %A) { + %B = and i32 %A, 1 + %C = srem i32 %A, 3 + %D = and i32 %A, 3 + %E = srem i32 %A, 5 + %F = srem i32 %A, 6 + %G = icmp eq i32 %B, 0 + %H = icmp eq i32 %C, 0 + %I = icmp eq i32 %D, 0 + %J = icmp eq i32 %E, 0 + %K = icmp eq i32 %F, 0 + %L = and i1 %G, %H + %M = and i1 %L, %I + %N = and i1 %M, %J + %O = and i1 %N, %K + ret i1 %O +; CHECK-LABEL: @test20( +; CHECK-NEXT: srem i32 %A, 60 +; CHECK-NEXT: icmp eq i32 +; CHECK-NEXT: ret i1 +} + +define i1 @test21(i32 %A) { + %B = srem i32 %A, -2147483648 + %C = srem i32 %A, 1024 + %D = icmp eq i32 %B, 0 + %E = icmp eq i32 %C, 0 + %F = and i1 %D, %E + ret i1 %F +; CHECK-LABEL: @test21( +; CHECK-NEXT: and i32 %A, 2147483647 +; CHECK-NEXT: icmp eq i32 +; CHECK-NEXT: ret i1 +} + +define i1 @test22(i32 %A) { + %B = srem i32 %A, 1024 + %C = srem i32 %A, -2147483648 + %D = icmp eq i32 %B, 0 + %E = icmp eq i32 %C, 0 + %F = and i1 %D, %E + ret i1 %F +; CHECK-LABEL: @test22( +; CHECK-NEXT: and i32 %A, 2147483647 +; CHECK-NEXT: icmp eq i32 +; CHECK-NEXT: ret i1 +} Index: llvm/trunk/unittests/ADT/APIntTest.cpp =================================================================== --- llvm/trunk/unittests/ADT/APIntTest.cpp +++ llvm/trunk/unittests/ADT/APIntTest.cpp @@ -1977,3 +1977,46 @@ i128.setHighBits(2); EXPECT_EQ(0xc, i128.getHiBits(4)); } + +TEST(APIntTest, GCD) { + using APIntOps::GreatestCommonDivisor; + + for (unsigned Bits : {1, 2, 32, 63, 64, 65}) { + // Test some corner cases near zero. + APInt Zero(Bits, 0), One(Bits, 1); + EXPECT_EQ(GreatestCommonDivisor(Zero, Zero), Zero); + EXPECT_EQ(GreatestCommonDivisor(Zero, One), One); + EXPECT_EQ(GreatestCommonDivisor(One, Zero), One); + EXPECT_EQ(GreatestCommonDivisor(One, One), One); + + if (Bits > 1) { + APInt Two(Bits, 2); + EXPECT_EQ(GreatestCommonDivisor(Zero, Two), Two); + EXPECT_EQ(GreatestCommonDivisor(One, Two), One); + EXPECT_EQ(GreatestCommonDivisor(Two, Two), Two); + + // Test some corner cases near the highest representable value. + APInt Max(Bits, 0); + Max.setAllBits(); + EXPECT_EQ(GreatestCommonDivisor(Zero, Max), Max); + EXPECT_EQ(GreatestCommonDivisor(One, Max), One); + EXPECT_EQ(GreatestCommonDivisor(Two, Max), One); + EXPECT_EQ(GreatestCommonDivisor(Max, Max), Max); + + APInt MaxOver2 = Max.udiv(Two); + EXPECT_EQ(GreatestCommonDivisor(MaxOver2, Max), One); + // Max - 1 == Max / 2 * 2, because Max is odd. + EXPECT_EQ(GreatestCommonDivisor(MaxOver2, Max - 1), MaxOver2); + } + } + + // Compute the 20th Mersenne prime. + const unsigned BitWidth = 4450; + APInt HugePrime = APInt::getLowBitsSet(BitWidth, 4423); + + // 9931 and 123456 are coprime. + APInt A = HugePrime * APInt(BitWidth, 9931); + APInt B = HugePrime * APInt(BitWidth, 123456); + APInt C = GreatestCommonDivisor(A, B); + EXPECT_EQ(C, HugePrime); +}