Index: llvm/trunk/include/llvm/ADT/APInt.h
===================================================================
--- llvm/trunk/include/llvm/ADT/APInt.h
+++ llvm/trunk/include/llvm/ADT/APInt.h
@@ -869,7 +869,14 @@
   /// \brief Logical right-shift function.
   ///
   /// Logical right-shift this APInt by shiftAmt.
-  APInt lshr(unsigned shiftAmt) const;
+  APInt lshr(unsigned shiftAmt) const {
+    APInt R(*this);
+    R.lshrInPlace(shiftAmt);
+    return R;
+  }
+
+  /// Logical right-shift this APInt by shiftAmt in place.
+  void lshrInPlace(unsigned shiftAmt);
 
   /// \brief Left-shift function.
   ///
@@ -1949,7 +1956,7 @@
   return A.ugt(B) ? A : B;
 }
 
-/// \brief Compute GCD of two APInt values.
+/// \brief Compute GCD of two unsigned APInt values.
 ///
 /// This function returns the greatest common divisor of the two APInt values
 /// using Euclid's algorithm.
Index: llvm/trunk/lib/Support/APInt.cpp
===================================================================
--- llvm/trunk/lib/Support/APInt.cpp
+++ llvm/trunk/lib/Support/APInt.cpp
@@ -733,18 +733,6 @@
   return Count;
 }
 
-/// Perform a logical right-shift from Src to Dst, which must be equal or
-/// non-overlapping, of Words words, by Shift, which must be less than 64.
-static void lshrNear(uint64_t *Dst, uint64_t *Src, unsigned Words,
-                     unsigned Shift) {
-  uint64_t Carry = 0;
-  for (int I = Words - 1; I >= 0; --I) {
-    uint64_t Tmp = Src[I];
-    Dst[I] = (Tmp >> Shift) | Carry;
-    Carry = Tmp << (64 - Shift);
-  }
-}
-
 APInt APInt::byteSwap() const {
   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
   if (BitWidth == 16)
@@ -765,8 +753,7 @@
   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
     Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]);
   if (Result.BitWidth != BitWidth) {
-    lshrNear(Result.pVal, Result.pVal, getNumWords(),
-             Result.BitWidth - BitWidth);
+    Result.lshrInPlace(Result.BitWidth - BitWidth);
     Result.BitWidth = BitWidth;
   }
   return Result;
@@ -803,11 +790,45 @@
 }
 
 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
-  while (!!B) {
-    APInt R = A.urem(B);
-    A = std::move(B);
-    B = std::move(R);
+  // Fast-path a common case.
+  if (A == B) return A;
+
+  // Corner cases: if either operand is zero, the other is the gcd.
+  if (!A) return B;
+  if (!B) return A;
+
+  // Count common powers of 2 and remove all other powers of 2.
+  unsigned Pow2;
+  {
+    unsigned Pow2_A = A.countTrailingZeros();
+    unsigned Pow2_B = B.countTrailingZeros();
+    if (Pow2_A > Pow2_B) {
+      A.lshrInPlace(Pow2_A - Pow2_B);
+      Pow2 = Pow2_B;
+    } else if (Pow2_B > Pow2_A) {
+      B.lshrInPlace(Pow2_B - Pow2_A);
+      Pow2 = Pow2_A;
+    } else {
+      Pow2 = Pow2_A;
+    }
   }
+
+  // Both operands are odd multiples of 2^Pow_2:
+  //
+  //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
+  //
+  // This is a modified version of Stein's algorithm, taking advantage of
+  // efficient countTrailingZeros().
+  while (A != B) {
+    if (A.ugt(B)) {
+      A -= B;
+      A.lshrInPlace(A.countTrailingZeros() - Pow2);
+    } else {
+      B -= A;
+      B.lshrInPlace(B.countTrailingZeros() - Pow2);
+    }
+  }
+
   return A;
 }
 
@@ -1119,68 +1140,59 @@
   return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
 }
 
-/// Logical right-shift this APInt by shiftAmt.
-/// @brief Logical right-shift function.
-APInt APInt::lshr(unsigned shiftAmt) const {
-  if (isSingleWord()) {
-    if (shiftAmt >= BitWidth)
-      return APInt(BitWidth, 0);
-    else
-      return APInt(BitWidth, this->VAL >> shiftAmt);
-  }
-
-  // If all the bits were shifted out, the result is 0. This avoids issues
-  // with shifting by the size of the integer type, which produces undefined
-  // results. We define these "undefined results" to always be 0.
-  if (shiftAmt >= BitWidth)
-    return APInt(BitWidth, 0);
-
-  // If none of the bits are shifted out, the result is *this. This avoids
-  // issues with shifting by the size of the integer type, which produces
-  // undefined results in the code below. This is also an optimization.
-  if (shiftAmt == 0)
-    return *this;
+/// Perform a logical right-shift from Src to Dst of Words words, by Shift,
+/// which must be less than 64. If the source and destination ranges overlap,
+/// we require that Src >= Dst (put another way, we require that the overall
+/// operation is a right shift on the combined range).
+static void lshrWords(APInt::WordType *Dst, APInt::WordType *Src,
+                      unsigned Words, unsigned Shift) {
+  assert(Shift < APInt::APINT_BITS_PER_WORD);
 
-  // Create some space for the result.
-  uint64_t * val = new uint64_t[getNumWords()];
+  if (!Words)
+    return;
 
-  // If we are shifting less than a word, compute the shift with a simple carry
-  if (shiftAmt < APINT_BITS_PER_WORD) {
-    lshrNear(val, pVal, getNumWords(), shiftAmt);
-    APInt Result(val, BitWidth);
-    Result.clearUnusedBits();
-    return Result;
+  if (Shift == 0) {
+    std::memmove(Dst, Src, Words * APInt::APINT_WORD_SIZE);
+    return;
   }
 
-  // Compute some values needed by the remaining shift algorithms
-  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
-  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
+  uint64_t Low = Src[0];
+  for (unsigned I = 1; I != Words; ++I) {
+    uint64_t High = Src[I];
+    Dst[I - 1] =
+        (Low >> Shift) | (High << (APInt::APINT_BITS_PER_WORD - Shift));
+    Low = High;
+  }
+  Dst[Words - 1] = Low >> Shift;
+}
 
-  // If we are shifting whole words, just move whole words
-  if (wordShift == 0) {
-    for (unsigned i = 0; i < getNumWords() - offset; ++i)
-      val[i] = pVal[i+offset];
-    for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
-      val[i] = 0;
-    APInt Result(val, BitWidth);
-    Result.clearUnusedBits();
-    return Result;
+/// Logical right-shift this APInt by shiftAmt.
+/// @brief Logical right-shift function.
+void APInt::lshrInPlace(unsigned shiftAmt) {
+  if (isSingleWord()) {
+    if (shiftAmt >= BitWidth)
+      VAL = 0;
+    else
+      VAL >>= shiftAmt;
+    return;
   }
 
-  // Shift the low order words
-  unsigned breakWord = getNumWords() - offset -1;
-  for (unsigned i = 0; i < breakWord; ++i)
-    val[i] = (pVal[i+offset] >> wordShift) |
-             (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
-  // Shift the break word.
-  val[breakWord] = pVal[breakWord+offset] >> wordShift;
+  // Don't bother performing a no-op shift.
+  if (!shiftAmt)
+    return;
 
-  // Remaining words are 0
-  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
-    val[i] = 0;
-  APInt Result(val, BitWidth);
-  Result.clearUnusedBits();
-  return Result;
+  // Find number of complete words being shifted out and zeroed.
+  const unsigned Words = getNumWords();
+  const unsigned ShiftFullWords =
+      std::min(shiftAmt / APINT_BITS_PER_WORD, Words);
+
+  // Fill in first Words - ShiftFullWords by shifting.
+  lshrWords(pVal, pVal + ShiftFullWords, Words - ShiftFullWords,
+            shiftAmt - (ShiftFullWords * APINT_BITS_PER_WORD));
+
+  // The remaining high words are all zero.
+  for (unsigned I = Words - ShiftFullWords; I != Words; ++I)
+    pVal[I] = 0;
 }
 
 /// Left-shift this APInt by shiftAmt.
Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
===================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1178,6 +1178,373 @@
   Constant *C = Builder->getInt(CI->getValue()-1);
   return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
 }
+#if 0
+/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS
+/// and CmpRHS are both known to be integer constants.
+Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+                                          ConstantInt *DivRHS) {
+  ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
+  const APInt &CmpRHSV = CmpRHS->getValue();
+
+  // FIXME: If the operand types don't match the type of the divide
+  // then don't attempt this transform. The code below doesn't have the
+  // logic to deal with a signed divide and an unsigned compare (and
+  // vice versa). This is because (x /s C1) <s C2  produces different
+  // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
+  // (x /u C1) <u C2.  Simply casting the operands and result won't
+  // work. :(  The if statement below tests that condition and bails
+  // if it finds it.
+  bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
+  if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
+    return nullptr;
+  if (DivRHS->isZero())
+    return nullptr; // The ProdOV computation fails on divide by zero.
+  if (DivIsSigned && DivRHS->isAllOnesValue())
+    return nullptr; // The overflow computation also screws up here
+  if (DivRHS->isOne()) {
+    // This eliminates some funny cases with INT_MIN.
+    ICI.setOperand(0, DivI->getOperand(0));   // X/1 == X.
+    return &ICI;
+  }
+
+  // Compute Prod = CI * DivRHS. We are essentially solving an equation
+  // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
+  // C2 (CI). By solving for X we can turn this into a range check
+  // instead of computing a divide.
+  Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
+
+  // Determine if the product overflows by seeing if the product is
+  // not equal to the divide. Make sure we do the same kind of divide
+  // as in the LHS instruction that we're folding.
+  bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
+                 ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
+
+  // Get the ICmp opcode
+  ICmpInst::Predicate Pred = ICI.getPredicate();
+
+  /// If the division is known to be exact, then there is no remainder from the
+  /// divide, so the covered range size is unit, otherwise it is the divisor.
+  ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS;
+
+  // Figure out the interval that is being checked.  For example, a comparison
+  // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
+  // Compute this interval based on the constants involved and the signedness of
+  // the compare/divide.  This computes a half-open interval, keeping track of
+  // whether either value in the interval overflows.  After analysis each
+  // overflow variable is set to 0 if it's corresponding bound variable is valid
+  // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
+  int LoOverflow = 0, HiOverflow = 0;
+  Constant *LoBound = nullptr, *HiBound = nullptr;
+
+  if (!DivIsSigned) {  // udiv
+    // e.g. X/5 op 3  --> [15, 20)
+    LoBound = Prod;
+    HiOverflow = LoOverflow = ProdOV;
+    if (!HiOverflow) {
+      // If this is not an exact divide, then many values in the range collapse
+      // to the same result value.
+      HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
+    }
+  } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
+    if (CmpRHSV == 0) {       // (X / pos) op 0
+      // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
+      LoBound = ConstantExpr::getNeg(SubOne(RangeSize));
+      HiBound = RangeSize;
+    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / pos) op pos
+      LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
+      HiOverflow = LoOverflow = ProdOV;
+      if (!HiOverflow)
+        HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true);
+    } else {                       // (X / pos) op neg
+      // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
+      HiBound = AddOne(Prod);
+      LoOverflow = HiOverflow = ProdOV ? -1 : 0;
+      if (!LoOverflow) {
+        ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+        LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
+      }
+    }
+  } else if (DivRHS->isNegative()) { // Divisor is < 0.
+    if (DivI->isExact())
+      RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+    if (CmpRHSV == 0) {       // (X / neg) op 0
+      // e.g. X/-5 op 0  --> [-4, 5)
+      LoBound = AddOne(RangeSize);
+      HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+      if (HiBound == DivRHS) {     // -INTMIN = INTMIN
+        HiOverflow = 1;            // [INTMIN+1, overflow)
+        HiBound = nullptr;         // e.g. X/INTMIN = 0 --> X > INTMIN
+      }
+    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / neg) op pos
+      // e.g. X/-5 op 3  --> [-19, -14)
+      HiBound = AddOne(Prod);
+      HiOverflow = LoOverflow = ProdOV ? -1 : 0;
+      if (!LoOverflow)
+        LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
+    } else {                       // (X / neg) op neg
+      LoBound = Prod;       // e.g. X/-5 op -3  --> [15, 20)
+      LoOverflow = HiOverflow = ProdOV;
+      if (!HiOverflow)
+        HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true);
+    }
+
+    // Dividing by a negative swaps the condition.  LT <-> GT
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+
+  Value *X = DivI->getOperand(0);
+  switch (Pred) {
+  default: llvm_unreachable("Unhandled icmp opcode!");
+  case ICmpInst::ICMP_EQ:
+    if (LoOverflow && HiOverflow)
+      return ReplaceInstUsesWith(ICI, Builder->getFalse());
+    if (HiOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+                          ICmpInst::ICMP_UGE, X, LoBound);
+    if (LoOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+                          ICmpInst::ICMP_ULT, X, HiBound);
+    return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+                                                    DivIsSigned, true));
+  case ICmpInst::ICMP_NE:
+    if (LoOverflow && HiOverflow)
+      return ReplaceInstUsesWith(ICI, Builder->getTrue());
+    if (HiOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+                          ICmpInst::ICMP_ULT, X, LoBound);
+    if (LoOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+                          ICmpInst::ICMP_UGE, X, HiBound);
+    return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+                                                    DivIsSigned, false));
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_SLT:
+    if (LoOverflow == +1)   // Low bound is greater than input range.
+      return ReplaceInstUsesWith(ICI, Builder->getTrue());
+    if (LoOverflow == -1)   // Low bound is less than input range.
+      return ReplaceInstUsesWith(ICI, Builder->getFalse());
+    return new ICmpInst(Pred, X, LoBound);
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_SGT:
+    if (HiOverflow == +1)       // High bound greater than input range.
+      return ReplaceInstUsesWith(ICI, Builder->getFalse());
+    if (HiOverflow == -1)       // High bound less than input range.
+      return ReplaceInstUsesWith(ICI, Builder->getTrue());
+    if (Pred == ICmpInst::ICMP_UGT)
+      return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
+    return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+  }
+}
+
+/// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)".
+Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
+                                          ConstantInt *ShAmt) {
+  const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue();
+
+  // Check that the shift amount is in range.  If not, don't perform
+  // undefined shifts.  When the shift is visited it will be
+  // simplified.
+  uint32_t TypeBits = CmpRHSV.getBitWidth();
+  uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+  if (ShAmtVal >= TypeBits || ShAmtVal == 0)
+    return nullptr;
+
+  if (!ICI.isEquality()) {
+    // If we have an unsigned comparison and an ashr, we can't simplify this.
+    // Similarly for signed comparisons with lshr.
+    if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
+      return nullptr;
+
+    // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
+    // by a power of 2.  Since we already have logic to simplify these,
+    // transform to div and then simplify the resultant comparison.
+    if (Shr->getOpcode() == Instruction::AShr &&
+        (!Shr->isExact() || ShAmtVal == TypeBits - 1))
+      return nullptr;
+
+    // Revisit the shift (to delete it).
+    Worklist.Add(Shr);
+
+    Constant *DivCst =
+      ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
+
+    Value *Tmp =
+      Shr->getOpcode() == Instruction::AShr ?
+      Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) :
+      Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact());
+
+    ICI.setOperand(0, Tmp);
+
+    // If the builder folded the binop, just return it.
+    BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
+    if (!TheDiv)
+      return &ICI;
+
+    // Otherwise, fold this div/compare.
+    assert(TheDiv->getOpcode() == Instruction::SDiv ||
+           TheDiv->getOpcode() == Instruction::UDiv);
+
+    Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst));
+    assert(Res && "This div/cst should have folded!");
+    return Res;
+  }
+
+  // If we are comparing against bits always shifted out, the
+  // comparison cannot succeed.
+  APInt Comp = CmpRHSV << ShAmtVal;
+  ConstantInt *ShiftedCmpRHS = Builder->getInt(Comp);
+  if (Shr->getOpcode() == Instruction::LShr)
+    Comp = Comp.lshr(ShAmtVal);
+  else
+    Comp = Comp.ashr(ShAmtVal);
+
+  if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
+    bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+    Constant *Cst = Builder->getInt1(IsICMP_NE);
+    return ReplaceInstUsesWith(ICI, Cst);
+  }
+
+  // Otherwise, check to see if the bits shifted out are known to be zero.
+  // If so, we can compare against the unshifted value:
+  //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
+  if (Shr->hasOneUse() && Shr->isExact())
+    return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS);
+
+  if (Shr->hasOneUse()) {
+    // Otherwise strength reduce the shift into an and.
+    APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+    Constant *Mask = Builder->getInt(Val);
+
+    Value *And = Builder->CreateAnd(Shr->getOperand(0),
+                                    Mask, Shr->getName()+".mask");
+    return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
+  }
+  return nullptr;
+}
+#endif
+namespace {
+/// Models a check that LHS is divisible by Factor.
+class DivisibilityCheck {
+  // Signedness of the check. A bitwise and is a divisibility check,
+  // if its mask is (equivalent to) a power of 2 mask.
+  enum { DC_Null, DC_SRem, DC_URem, DC_And } Kind;
+  Value *Check;
+  Value *LHS;
+  ConstantInt *Factor;
+
+public:
+  DivisibilityCheck() : Kind(DC_Null) {}
+
+  /// Try to extract a divisibility check from V, on the assumption
+  /// that it is being compared to 0.
+  bool match(Value *V) {
+    Kind = DC_Null;
+    Check = V;
+    if (::match(V, m_SRem(m_Value(LHS), m_ConstantInt(Factor))))
+      Kind = DC_SRem;
+    else if (::match(V, m_URem(m_Value(LHS), m_ConstantInt(Factor))))
+      Kind = DC_URem;
+    else if (::match(V, m_And(m_Value(LHS), m_ConstantInt(Factor))))
+      Kind = DC_And;
+    return Kind != DC_Null;
+  }
+
+  /// Merge another divisibility check into this one.
+  bool merge(const DivisibilityCheck &O) {
+    assert(Kind != DC_Null && O.Kind != DC_Null);
+    if (LHS != O.LHS)
+      // We don't have two divisibility checks on the same operand.
+      return false;
+
+    if (!(Check->hasOneUse() && Kind != DC_And) &&
+        !(O.Check->hasOneUse() && O.Kind != DC_And))
+      // We would not remove a division: bail out.
+      return false;
+
+    // Determine the factors we're checking for.
+    bool Failed = false;
+    APInt LHS = getFactor(O, Failed);
+    APInt RHS = O.getFactor(*this, Failed);
+    if (Failed)
+      return false;
+
+    // If we don't have a single signedness, we can fold the checks
+    // together if one of them is for a power of 2, because
+    // divisibility by a power of 2 is the same for srem and urem.
+    if (Kind != O.Kind && O.Kind != DC_And && LHS.isPowerOf2())
+      Kind = O.Kind;
+    if (Kind != O.Kind && !RHS.isPowerOf2())
+      return false;
+    assert(Kind == DC_SRem || Kind == DC_URem && "bad kind after merging");
+    bool Signed = Kind == DC_SRem;
+
+    // Fold them together.
+    APInt GCD = APIntOps::GreatestCommonDivisor(LHS, RHS);
+    APInt LCM = LHS.udiv(GCD);
+    bool Overflow = false;
+    // Use a negative signed multiplication: producing INT_MIN should not
+    // be considered an overflow here.
+    LCM = Signed ? LCM.smul_ov(-RHS, Overflow) : LCM.umul_ov(RHS, Overflow);
+    // On overflow, there cannot exist a non-zero value that is divisible by
+    // both factors at once.
+    if (Overflow) LCM = 0;
+    Factor = cast<ConstantInt>(ConstantInt::get(Factor->getType(), LCM));
+    return true;
+  }
+
+  Value *create(InstCombiner::BuilderTy *Builder) {
+    // LHS is divisible by zero iff LHS is zero.
+    if (!Factor->getValue())
+      return LHS;
+    // Checking for divisibility by power of 2 doesn't need a division.
+    if (Factor->getValue().isPowerOf2())
+      return Builder->CreateAnd(
+          LHS, ConstantInt::get(Factor->getType(), Factor->getValue() - 1));
+    return Kind == DC_SRem ? Builder->CreateSRem(LHS, Factor)
+                           : Builder->CreateURem(LHS, Factor);
+  }
+
+private:
+  /// Get the unsigned multiplicative factor we're checking for.
+  APInt getFactor(const DivisibilityCheck &O, bool &Failed) const {
+    switch (Kind) {
+    case DC_Null:
+      llvm_unreachable("unexpected Kind");
+
+    case DC_SRem:
+      if (Factor->getValue().isNegative())
+        return -Factor->getValue();
+      // Fall through.
+    case DC_URem:
+      return Factor->getValue();
+
+    case DC_And:
+      assert(O.Kind != DC_And && "bad kind pair");
+      // If we're also checking for divisibility by K * 2^N,
+      // the low N bits of the mask are irrelevant.
+      APInt Result =
+          Factor->getValue() |
+          APInt::getLowBitsSet(Factor->getValue().getBitWidth(),
+                               O.getFactor(*this, Failed).countTrailingZeros());
+      ++Result;
+      if (!!Result && !Result.isPowerOf2())
+        Failed = true;
+      return Result;
+    }
+  }
+};
+
+struct DivisibilityCheck_match {
+  DivisibilityCheck &Check;
+  DivisibilityCheck_match(DivisibilityCheck &Check) : Check(Check) {}
+  bool match(Value *V) { return Check.match(V); }
+};
+
+/// Matcher for divisibility checks.
+DivisibilityCheck_match m_DivisibilityCheck(DivisibilityCheck &Check) {
+  return DivisibilityCheck_match(Check);
+}
+}
 
 /// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" ->
 /// (icmp eq/ne A, Log2(AP2/AP1)) ->
@@ -1806,6 +2173,42 @@
   if (!Cmp.isEquality() || *C != 0 || !Or->hasOneUse())
     return nullptr;
 
+  DivisibilityCheck DivL, DivR;
+  if (match(Or, m_Or(m_DivisibilityCheck(DivL), m_DivisibilityCheck(DivR))) &&
+      DivL.merge(DivR)) {
+    // Simplify icmp eq (or (srem P, M), (srem P, N)), 0
+    //  -> icmp eq (srem P, lcm(M, N)), 0
+    return new ICmpInst(Pred, DivL.create(Builder),
+                        ConstantInt::getNullValue(Or->getType()));
+  }
+
+#if 0
+    // icmp eq (or X, Y), 0
+    //  -> and (icmp eq X, 0), (icmp eq Y, 0)
+    // but only if this allows either subexpression to simplify further.
+    Instruction *ICmpX = nullptr, *ICmpY = nullptr;
+    if (auto *X = dyn_cast<Instruction>(LHSI->getOperand(0)))
+      ICmpX = visitICmpInstWithInstAndIntCst(ICI, X, RHS);
+    if (auto *Y = dyn_cast<Instruction>(LHSI->getOperand(1)))
+      ICmpY = visitICmpInstWithInstAndIntCst(ICI, Y, RHS);
+    if (ICmpX || ICmpX) {
+      Value *NewX, *NewY;
+      if (ICmpX) {
+        Worklist.Add(ICmpX);
+        NewX = Builder->Insert(ICmpX);
+      } else
+        NewX = Builder->CreateICmp(ICI.getPredicate(), LHSI->getOperand(0),
+                                    RHS);
+      if (ICmpY) {
+        Worklist.Add(ICmpY);
+        NewY = Builder->Insert(ICmpY);
+      } else
+        NewY = Builder->CreateICmp(ICI.getPredicate(), LHSI->getOperand(1),
+                                    RHS);
+      return BinaryOperator::CreateAnd(NewX, NewY);
+    }
+#endif
+
   Value *P, *Q;
   if (match(Or, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
     // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
Index: llvm/trunk/test/Transforms/InstCombine/divisibility.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/divisibility.ll
+++ llvm/trunk/test/Transforms/InstCombine/divisibility.ll
@@ -0,0 +1,297 @@
+; Test that multiple divisibility checks are merged.
+
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @test1(i32 %A) {
+  %B = srem i32 %A, 2
+  %C = srem i32 %A, 3
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: srem i32 %A, 6
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test2(i32 %A) {
+  %B = urem i32 %A, 2
+  %C = urem i32 %A, 3
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: urem i32 %A, 6
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test3(i32 %A) {
+  %B = srem i32 %A, 2
+  %C = urem i32 %A, 3
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: urem i32 %A, 6
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test4(i32 %A) {
+  %B = urem i32 %A, 2
+  %C = srem i32 %A, 3
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: srem i32 %A, 6
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test5(i32 %A) {
+  %B = srem i32 %A, 8
+  %C = srem i32 %A, 12
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: srem i32 %A, 24
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test6(i32 %A) {
+  %B = and i32 %A, 6
+  %C = srem i32 %A, 12
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: srem i32 %A, 24
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test7(i32 %A) {
+  %B = and i32 %A, 8
+  %C = srem i32 %A, 12
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: and i32 %A, 8
+; CHECK-NEXT: srem i32 %A, 12
+; CHECK-NEXT: or
+; CHECK-NEXT: icmp
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test8(i32 %A, i32 %B) {
+  %C = srem i32 %A, 2
+  %D = srem i32 %B, 3
+  %E = or i32 %C, %D
+  %F = icmp eq i32 %E, 0
+  ret i1 %F
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: srem i32 %B, 3
+; CHECK-NEXT: and i32 %A, 1
+; CHECK-NEXT: or
+; CHECK-NEXT: icmp
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test9(i32 %A) {
+  %B = srem i32 %A, 7589
+  %C = srem i32 %A, 395309
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: icmp eq i32 %A, 0
+; CHECK-NEXT: ret i1 %E
+}
+
+define i1 @test10(i32 %A) {
+  ; 7589 and 395309 are prime, and
+  ; 7589 * 395309 == 3000000001 == -1294967295 (2^32)
+  %B = urem i32 %A, 7589
+  %C = urem i32 %A, 395309
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: urem i32 %A, -1294967295
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test11(i32 %A) {
+  %B = urem i32 %A, 65535
+  %C = urem i32 %A, 65537
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: urem i32 %A, -1
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test12(i32 %A) {
+  %B = urem i32 %A, 65536
+  %C = urem i32 %A, 65537
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: icmp eq i32 %A, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test13(i32 %A) {
+  %B = srem i32 %A, 65536
+  %C = urem i32 %A, 65535
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: urem i32 %A, -65536
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test14(i32 %A) {
+  %B = srem i32 %A, 95
+  %C = srem i32 %A, 22605091
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: srem i32 %A, 2147483645
+; CHECK-NEXT: icmp eq i32 %{{.*}}, 0
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test15(i32 %A) {
+  %B = srem i32 %A, 97
+  %C = srem i32 %A, 22605091
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: icmp eq i32 %A, 0
+; CHECK-NEXT: ret i1
+}
+
+define i32 @test16(i32 %A) {
+  %B = srem i32 %A, 3
+  %C = srem i32 %A, 5
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  %F = zext i1 %E to i32
+  %G = add i32 %B, %F
+  ret i32 %G
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:  %B = srem i32 %A, 3
+; CHECK-NEXT:  %[[REM:.*]] = srem i32 %A, 15
+; CHECK-NEXT:  %E = icmp eq i32 %[[REM]], 0
+; CHECK-NEXT:  %F = zext i1 %E to i32
+; CHECK-NEXT:  %G = add i32 %B, %F
+; CHECK-NEXT:  ret i32 %G
+}
+
+define i32 @test17(i32 %A) {
+  %B = srem i32 %A, 3
+  %C = srem i32 %A, 5
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  %F = zext i1 %E to i32
+  %G = add i32 %B, %F
+  %H = add i32 %C, %G
+  ret i32 %H
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:  %B = srem i32 %A, 3
+; CHECK-NEXT:  %C = srem i32 %A, 5
+; CHECK-NOT: srem
+; CHECK: ret i32
+}
+
+define i32 @test18(i32 %A) {
+  %B = srem i32 %A, 3
+  %C = and i32 %A, 7
+  %D = or i32 %B, %C
+  %E = icmp eq i32 %D, 0
+  %F = zext i1 %E to i32
+  %G = add i32 %C, %F
+  ret i32 %G
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:  %C = and i32 %A, 7
+; CHECK-NEXT:  %[[REM:.*]] = srem i32 %A, 24
+; CHECK-NEXT:  %E = icmp eq i32 %[[REM]], 0
+; CHECK-NEXT:  %F = zext i1 %E to i32
+; CHECK-NEXT:  %G = add
+; CHECK-NEXT:  ret i32 %G
+}
+
+define i1 @test19(i32 %A) {
+  %B = srem i32 %A, 6
+  %C = srem i32 %A, 10
+  %D = icmp eq i32 %B, 0
+  %E = icmp eq i32 %C, 0
+  %F = and i1 %D, %E
+  ret i1 %F
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:  %[[REM:.*]] = srem i32 %A, 30
+; CHECK-NEXT:  icmp eq i32 %[[REM]], 0
+; CHECK-NEXT:  ret i1
+}
+
+define i1 @test20(i32 %A) {
+  %B = and i32 %A, 1
+  %C = srem i32 %A, 3
+  %D = and i32 %A, 3
+  %E = srem i32 %A, 5
+  %F = srem i32 %A, 6
+  %G = icmp eq i32 %B, 0
+  %H = icmp eq i32 %C, 0
+  %I = icmp eq i32 %D, 0
+  %J = icmp eq i32 %E, 0
+  %K = icmp eq i32 %F, 0
+  %L = and i1 %G, %H
+  %M = and i1 %L, %I
+  %N = and i1 %M, %J
+  %O = and i1 %N, %K
+  ret i1 %O
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:  srem i32 %A, 60
+; CHECK-NEXT:  icmp eq i32
+; CHECK-NEXT:  ret i1
+}
+
+define i1 @test21(i32 %A) {
+  %B = srem i32 %A, -2147483648
+  %C = srem i32 %A, 1024
+  %D = icmp eq i32 %B, 0
+  %E = icmp eq i32 %C, 0
+  %F = and i1 %D, %E
+  ret i1 %F
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:  and i32 %A, 2147483647
+; CHECK-NEXT:  icmp eq i32
+; CHECK-NEXT:  ret i1
+}
+
+define i1 @test22(i32 %A) {
+  %B = srem i32 %A, 1024
+  %C = srem i32 %A, -2147483648
+  %D = icmp eq i32 %B, 0
+  %E = icmp eq i32 %C, 0
+  %F = and i1 %D, %E
+  ret i1 %F
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:  and i32 %A, 2147483647
+; CHECK-NEXT:  icmp eq i32
+; CHECK-NEXT:  ret i1
+}
Index: llvm/trunk/unittests/ADT/APIntTest.cpp
===================================================================
--- llvm/trunk/unittests/ADT/APIntTest.cpp
+++ llvm/trunk/unittests/ADT/APIntTest.cpp
@@ -1977,3 +1977,46 @@
   i128.setHighBits(2);
   EXPECT_EQ(0xc, i128.getHiBits(4));
 }
+
+TEST(APIntTest, GCD) {
+  using APIntOps::GreatestCommonDivisor;
+
+  for (unsigned Bits : {1, 2, 32, 63, 64, 65}) {
+    // Test some corner cases near zero.
+    APInt Zero(Bits, 0), One(Bits, 1);
+    EXPECT_EQ(GreatestCommonDivisor(Zero, Zero), Zero);
+    EXPECT_EQ(GreatestCommonDivisor(Zero, One), One);
+    EXPECT_EQ(GreatestCommonDivisor(One, Zero), One);
+    EXPECT_EQ(GreatestCommonDivisor(One, One), One);
+
+    if (Bits > 1) {
+      APInt Two(Bits, 2);
+      EXPECT_EQ(GreatestCommonDivisor(Zero, Two), Two);
+      EXPECT_EQ(GreatestCommonDivisor(One, Two), One);
+      EXPECT_EQ(GreatestCommonDivisor(Two, Two), Two);
+
+      // Test some corner cases near the highest representable value.
+      APInt Max(Bits, 0);
+      Max.setAllBits();
+      EXPECT_EQ(GreatestCommonDivisor(Zero, Max), Max);
+      EXPECT_EQ(GreatestCommonDivisor(One, Max), One);
+      EXPECT_EQ(GreatestCommonDivisor(Two, Max), One);
+      EXPECT_EQ(GreatestCommonDivisor(Max, Max), Max);
+
+      APInt MaxOver2 = Max.udiv(Two);
+      EXPECT_EQ(GreatestCommonDivisor(MaxOver2, Max), One);
+      // Max - 1 == Max / 2 * 2, because Max is odd.
+      EXPECT_EQ(GreatestCommonDivisor(MaxOver2, Max - 1), MaxOver2);
+    }
+  }
+
+  // Compute the 20th Mersenne prime.
+  const unsigned BitWidth = 4450;
+  APInt HugePrime = APInt::getLowBitsSet(BitWidth, 4423);
+
+  // 9931 and 123456 are coprime.
+  APInt A = HugePrime * APInt(BitWidth, 9931);
+  APInt B = HugePrime * APInt(BitWidth, 123456);
+  APInt C = GreatestCommonDivisor(A, B);
+  EXPECT_EQ(C, HugePrime);
+}