Index: include/llvm/ADT/APInt.h
===================================================================
--- include/llvm/ADT/APInt.h
+++ include/llvm/ADT/APInt.h
@@ -16,6 +16,7 @@
 #ifndef LLVM_ADT_APINT_H
 #define LLVM_ADT_APINT_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
@@ -30,7 +31,6 @@
 class raw_ostream;
 
 template <typename T> class SmallVectorImpl;
-template <typename T> class ArrayRef;
 
 // An unsigned host type used as a single part of a multi-part
 // bignum.
@@ -78,10 +78,10 @@
   unsigned BitWidth; ///< The number of bits in this APInt.
 
   /// This union is used to store the integer value. When the
-  /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
+  /// integer bit-width <= 128, it uses VAL, otherwise it uses pVal.
   union {
-    uint64_t VAL;   ///< Used to store the <= 64 bits integer value.
-    uint64_t *pVal; ///< Used to store the >64 bits integer value.
+    uint64_t VAL[2]; ///< Used to store the <= 128 bits integer value.
+    uint64_t *pVal;  ///< Used to store the >128 bits integer value.
   };
 
   /// This enum is used to hold the constants we needed for APInt.
@@ -106,6 +106,19 @@
   /// \returns true if the number of bits <= 64, false otherwise.
   bool isSingleWord() const { return BitWidth <= APINT_BITS_PER_WORD; }
 
+  /// \brief Determine if this APInt just has two words to store value.
+  ///
+  /// \returns true if the number of bits > 64 && <= 128, false otherwise.
+  bool isTwoWords() const {
+    return BitWidth > APINT_BITS_PER_WORD &&
+           BitWidth <= APINT_BITS_PER_WORD * 2;
+  }
+
+  /// \brief Determine if this APInt uses storage allocated in-line.
+  ///
+  /// \returns true if the number of bits <= 128, false otherwise.
+  bool isInline() const { return BitWidth <= APINT_BITS_PER_WORD * 2; }
+
   /// \brief Determine which word a bit is in.
   ///
   /// \returns the word position for the specified bit position.
@@ -149,7 +162,9 @@
     // Mask out the high bits.
     uint64_t mask = ~uint64_t(0ULL) >> (APINT_BITS_PER_WORD - wordBits);
     if (isSingleWord())
-      VAL &= mask;
+      VAL[0] &= mask;
+    else if (isTwoWords())
+      VAL[1] &= mask;
     else
       pVal[getNumWords() - 1] &= mask;
     return *this;
@@ -158,7 +173,11 @@
   /// \brief Get the word corresponding to a bit position
   /// \returns the corresponding word for the specified bit position.
   uint64_t getWord(unsigned bitPosition) const {
-    return isSingleWord() ? VAL : pVal[whichWord(bitPosition)];
+    if (isSingleWord())
+      return VAL[0];
+    if (isTwoWords())
+      return VAL[bitPosition >= APINT_BITS_PER_WORD ? 1 : 0];
+    return pVal[whichWord(bitPosition)];
   }
 
   /// \brief Convert a char array into an APInt
@@ -223,6 +242,12 @@
   /// out-of-line slow case for countPopulation
   unsigned countPopulationSlowCase() const;
 
+  uint64_t *getMutableRawData() {
+    if (isInline())
+      return &VAL[0];
+    return &pVal[0];
+  }
+
 public:
   /// \name Constructors
   /// @{
@@ -238,12 +263,17 @@
   /// \param val the initial value of the APInt
   /// \param isSigned how to treat signedness of val
   APInt(unsigned numBits, uint64_t val, bool isSigned = false)
-      : BitWidth(numBits), VAL(0) {
+      : BitWidth(numBits) {
     assert(BitWidth && "bitwidth too small");
-    if (isSingleWord())
-      VAL = val;
-    else
+    VAL[0] = 0;
+    if (isSingleWord()) {
+      VAL[0] = val;
+    } else if (isTwoWords()) {
+      VAL[0] = val;
+      VAL[1] = isSigned && int64_t(val) < 0 ? -1ULL : 0;
+    } else {
       initSlowCase(val, isSigned);
+    }
     clearUnusedBits();
   }
 
@@ -280,15 +310,23 @@
 
   /// Simply makes *this a copy of that.
   /// @brief Copy Constructor.
-  APInt(const APInt &that) : BitWidth(that.BitWidth), VAL(0) {
-    if (isSingleWord())
-      VAL = that.VAL;
-    else
+  APInt(const APInt &that) : BitWidth(that.BitWidth) {
+    VAL[0] = 0;
+    if (isSingleWord()) {
+      VAL[0] = that.VAL[0];
+    } else if (isTwoWords()) {
+      VAL[0] = that.VAL[0];
+      VAL[1] = that.VAL[1];
+    } else {
       initSlowCase(that);
+    }
   }
 
   /// \brief Move Constructor.
-  APInt(APInt &&that) : BitWidth(that.BitWidth), VAL(that.VAL) {
+  APInt(APInt &&that) : BitWidth(that.BitWidth) {
+    VAL[0] = that.VAL[0];
+    if (isTwoWords())
+      VAL[1] = that.VAL[1];
     that.BitWidth = 0;
   }
 
@@ -303,10 +341,10 @@
   ///
   /// This is useful for object deserialization (pair this with the static
   ///  method Read).
-  explicit APInt() : BitWidth(1), VAL(0) {}
+  explicit APInt() : BitWidth(1) { VAL[0] = 0; }
 
   /// \brief Returns whether this instance allocated memory.
-  bool needsCleanup() const { return !isSingleWord(); }
+  bool needsCleanup() const { return !isInline(); }
 
   /// Used to insert APInt objects, or objects that contain APInt objects, into
   ///  FoldingSets.
@@ -341,7 +379,10 @@
   /// This checks to see if the value has all bits of the APInt are set or not.
   bool isAllOnesValue() const {
     if (isSingleWord())
-      return VAL == ~integerPart(0) >> (APINT_BITS_PER_WORD - BitWidth);
+      return VAL[0] == ~integerPart(0) >> (APINT_BITS_PER_WORD - BitWidth);
+    if (isTwoWords())
+      return VAL[0] == ~integerPart(0) &&
+             VAL[1] == ~integerPart(0) >> (APINT_BITS_PER_WORD - BitWidth);
     return countPopulationSlowCase() == BitWidth;
   }
 
@@ -390,7 +431,9 @@
   /// \returns true if the argument APInt value is a power of two > 0.
   bool isPowerOf2() const {
     if (isSingleWord())
-      return isPowerOf2_64(VAL);
+      return isPowerOf2_64(VAL[0]);
+    if (isTwoWords())
+      return VAL[0] != VAL[1] && isPowerOf2_64(VAL[0] | VAL[1]);
     return countPopulationSlowCase() == 1;
   }
 
@@ -576,8 +619,8 @@
   /// This is useful for writing out the APInt in binary form without any
   /// conversions.
   const uint64_t *getRawData() const {
-    if (isSingleWord())
-      return &VAL;
+    if (isInline())
+      return &VAL[0];
     return &pVal[0];
   }
 
@@ -631,7 +674,9 @@
   /// \returns true if *this is zero, false otherwise.
   bool operator!() const {
     if (isSingleWord())
-      return !VAL;
+      return !VAL[0];
+    if (isTwoWords())
+      return !VAL[0] && !VAL[1];
 
     for (unsigned i = 0; i != getNumWords(); ++i)
       if (pVal[i])
@@ -648,8 +693,10 @@
   /// \returns *this after assignment of RHS.
   APInt &operator=(const APInt &RHS) {
     // If the bitwidths are the same, we can avoid mucking with memory
-    if (isSingleWord() && RHS.isSingleWord()) {
-      VAL = RHS.VAL;
+    if (isInline() && RHS.isInline()) {
+      VAL[0] = RHS.VAL[0];
+      if (RHS.isTwoWords())
+        VAL[1] = RHS.VAL[1];
       BitWidth = RHS.BitWidth;
       return clearUnusedBits();
     }
@@ -659,7 +706,7 @@
 
   /// @brief Move assignment operator.
   APInt &operator=(APInt &&that) {
-    if (!isSingleWord()) {
+    if (!isInline()) {
       // The MSVC STL shipped in 2013 requires that self move assignment be a
       // no-op.  Otherwise algorithms like stable_sort will produce answers
       // where half of the output is left in a moved-from state.
@@ -670,7 +717,7 @@
 
     // Use memcpy so that type based alias analysis sees both VAL and pVal
     // as modified.
-    memcpy(&VAL, &that.VAL, sizeof(uint64_t));
+    memcpy(&VAL, &that.VAL, sizeof(uint64_t) * 2);
 
     // If 'this == &that', avoid zeroing our own bitwidth by storing to 'that'
     // first.
@@ -712,8 +759,8 @@
   /// logically zero-extended or truncated to match the bit-width of
   /// the LHS.
   APInt &operator|=(uint64_t RHS) {
-    if (isSingleWord()) {
-      VAL |= RHS;
+    if (isInline()) {
+      VAL[0] |= RHS;
       clearUnusedBits();
     } else {
       pVal[0] |= RHS;
@@ -774,7 +821,12 @@
   APInt operator&(const APInt &RHS) const {
     assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
     if (isSingleWord())
-      return APInt(getBitWidth(), VAL & RHS.VAL);
+      return APInt(getBitWidth(), VAL[0] & RHS.VAL[0]);
+    if (isTwoWords()) {
+      uint64_t Lo = VAL[0] & RHS.VAL[0];
+      uint64_t Hi = VAL[1] & RHS.VAL[1];
+      return APInt(getBitWidth(), {Lo, Hi});
+    }
     return AndSlowCase(RHS);
   }
   APInt LLVM_ATTRIBUTE_UNUSED_RESULT And(const APInt &RHS) const {
@@ -789,7 +841,12 @@
   APInt operator|(const APInt &RHS) const {
     assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
     if (isSingleWord())
-      return APInt(getBitWidth(), VAL | RHS.VAL);
+      return APInt(getBitWidth(), VAL[0] | RHS.VAL[0]);
+    if (isTwoWords()) {
+      uint64_t Lo = VAL[0] | RHS.VAL[0];
+      uint64_t Hi = VAL[1] | RHS.VAL[1];
+      return APInt(getBitWidth(), {Lo, Hi});
+    }
     return OrSlowCase(RHS);
   }
 
@@ -811,7 +868,12 @@
   APInt operator^(const APInt &RHS) const {
     assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
     if (isSingleWord())
-      return APInt(BitWidth, VAL ^ RHS.VAL);
+      return APInt(BitWidth, VAL[0] ^ RHS.VAL[0]);
+    if (isTwoWords()) {
+      uint64_t Lo = VAL[0] ^ RHS.VAL[0];
+      uint64_t Hi = VAL[1] ^ RHS.VAL[1];
+      return APInt(getBitWidth(), {Lo, Hi});
+    }
     return XorSlowCase(RHS);
   }
 
@@ -855,10 +917,25 @@
   /// Left-shift this APInt by shiftAmt.
   APInt LLVM_ATTRIBUTE_UNUSED_RESULT shl(unsigned shiftAmt) const {
     assert(shiftAmt <= BitWidth && "Invalid shift amount");
-    if (isSingleWord()) {
-      if (shiftAmt >= BitWidth)
-        return APInt(BitWidth, 0); // avoid undefined shift results
-      return APInt(BitWidth, VAL << shiftAmt);
+    if (shiftAmt >= BitWidth)
+      return APInt(BitWidth, 0); // avoid undefined shift results
+    if (isSingleWord())
+      return APInt(BitWidth, VAL[0] << shiftAmt);
+    if (isTwoWords()) {
+      if (shiftAmt == 0)
+        return *this;
+
+      uint64_t Hi;
+      uint64_t Lo;
+      if (shiftAmt < APINT_BITS_PER_WORD) {
+        Hi =
+            (VAL[1] << shiftAmt) | (VAL[0] >> (APINT_BITS_PER_WORD - shiftAmt));
+        Lo = VAL[0] << shiftAmt;
+      } else {
+        Hi = VAL[0] << (shiftAmt - APINT_BITS_PER_WORD);
+        Lo = 0;
+      }
+      return APInt(BitWidth, {Lo, Hi});
     }
     return shlSlowCase(shiftAmt);
   }
@@ -948,9 +1025,9 @@
   /// \returns the bit value at bitPosition
   bool operator[](unsigned bitPosition) const {
     assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
-    return (maskBit(bitPosition) &
-            (isSingleWord() ? VAL : pVal[whichWord(bitPosition)])) !=
-           0;
+    uint64_t MaskBit = maskBit(bitPosition);
+    uint64_t Word = getWord(bitPosition);
+    return (MaskBit & Word) != 0;
   }
 
   /// @}
@@ -964,7 +1041,9 @@
   bool operator==(const APInt &RHS) const {
     assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths");
     if (isSingleWord())
-      return VAL == RHS.VAL;
+      return VAL[0] == RHS.VAL[0];
+    if (isTwoWords())
+      return VAL[0] == RHS.VAL[0] && VAL[1] == RHS.VAL[1];
     return EqualSlowCase(RHS);
   }
 
@@ -976,7 +1055,9 @@
   /// \returns true if *this == Val
   bool operator==(uint64_t Val) const {
     if (isSingleWord())
-      return VAL == Val;
+      return VAL[0] == Val;
+    if (isTwoWords())
+      return VAL[0] == Val && VAL[1] == 0;
     return EqualSlowCase(Val);
   }
 
@@ -1207,9 +1288,12 @@
 
   /// \brief Set every bit to 1.
   void setAllBits() {
-    if (isSingleWord())
-      VAL = UINT64_MAX;
-    else {
+    if (isSingleWord()) {
+      VAL[0] = UINT64_MAX;
+    } else if (isTwoWords()) {
+      VAL[0] = UINT64_MAX;
+      VAL[1] = UINT64_MAX;
+    } else {
       // Set all the bits in all the words.
       for (unsigned i = 0; i < getNumWords(); ++i)
         pVal[i] = UINT64_MAX;
@@ -1225,10 +1309,14 @@
 
   /// \brief Set every bit to 0.
   void clearAllBits() {
-    if (isSingleWord())
-      VAL = 0;
-    else
+    if (isSingleWord()) {
+      VAL[0] = 0;
+    } else if (isTwoWords()) {
+      VAL[0] = 0;
+      VAL[1] = 0;
+    } else {
       memset(pVal, 0, getNumWords() * APINT_WORD_SIZE);
+    }
   }
 
   /// \brief Set a given bit to 0.
@@ -1238,9 +1326,12 @@
 
   /// \brief Toggle every bit to its opposite value.
   void flipAllBits() {
-    if (isSingleWord())
-      VAL ^= UINT64_MAX;
-    else {
+    if (isSingleWord()) {
+      VAL[0] ^= UINT64_MAX;
+    } else if (isTwoWords()) {
+      VAL[0] ^= UINT64_MAX;
+      VAL[1] ^= UINT64_MAX;
+    } else {
       for (unsigned i = 0; i < getNumWords(); ++i)
         pVal[i] ^= UINT64_MAX;
     }
@@ -1314,7 +1405,11 @@
   /// uint64_t. Otherwise an assertion will result.
   uint64_t getZExtValue() const {
     if (isSingleWord())
-      return VAL;
+      return VAL[0];
+    if (isTwoWords()) {
+      assert(VAL[1] == 0 && "Too many bits for uint64_t");
+      return VAL[0];
+    }
     assert(getActiveBits() <= 64 && "Too many bits for uint64_t");
     return pVal[0];
   }
@@ -1326,8 +1421,16 @@
   /// int64_t. Otherwise an assertion will result.
   int64_t getSExtValue() const {
     if (isSingleWord())
-      return int64_t(VAL << (APINT_BITS_PER_WORD - BitWidth)) >>
+      return int64_t(VAL[0] << (APINT_BITS_PER_WORD - BitWidth)) >>
              (APINT_BITS_PER_WORD - BitWidth);
+    if (isTwoWords()) {
+      int64_t Sext = int64_t(VAL[0]);
+      assert((Sext < 0
+                  ? SignExtend64(VAL[1], BitWidth - APINT_BITS_PER_WORD) == -1
+                  : VAL[1] == 0) &&
+             "Too many bits for int64_t");
+      return Sext;
+    }
     assert(getMinSignedBits() <= 64 && "Too many bits for int64_t");
     return int64_t(pVal[0]);
   }
@@ -1349,7 +1452,14 @@
   unsigned countLeadingZeros() const {
     if (isSingleWord()) {
       unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
-      return llvm::countLeadingZeros(VAL) - unusedBits;
+      return llvm::countLeadingZeros(VAL[0]) - unusedBits;
+    }
+    if (isTwoWords()) {
+      if (VAL[1] == 0)
+        return llvm::countLeadingZeros(VAL[0]) +
+               (BitWidth - APINT_BITS_PER_WORD);
+      unsigned unusedBits = APINT_BITS_PER_WORD * 2 - BitWidth;
+      return llvm::countLeadingZeros(VAL[1]) - unusedBits;
     }
     return countLeadingZerosSlowCase();
   }
@@ -1390,7 +1500,11 @@
   /// of ones from the least significant bit to the first zero bit.
   unsigned countTrailingOnes() const {
     if (isSingleWord())
-      return llvm::countTrailingOnes(VAL);
+      return llvm::countTrailingOnes(VAL[0]);
+    if (isTwoWords())
+      return VAL[0] == -1ULL
+                 ? APINT_BITS_PER_WORD + llvm::countTrailingOnes(VAL[1])
+                 : llvm::countTrailingOnes(VAL[0]);
     return countTrailingOnesSlowCase();
   }
 
@@ -1402,7 +1516,9 @@
   /// \returns 0 if the value is zero, otherwise returns the number of set bits.
   unsigned countPopulation() const {
     if (isSingleWord())
-      return llvm::countPopulation(VAL);
+      return llvm::countPopulation(VAL[0]);
+    if (isTwoWords())
+      return llvm::countPopulation(VAL[0]) + llvm::countPopulation(VAL[1]);
     return countPopulationSlowCase();
   }
 
@@ -1461,7 +1577,7 @@
       uint64_t I;
       double D;
     } T;
-    T.I = (isSingleWord() ? VAL : pVal[0]);
+    T.I = (isInline() ? VAL[0] : pVal[0]);
     return T.D;
   }
 
@@ -1475,7 +1591,7 @@
       unsigned I;
       float F;
     } T;
-    T.I = unsigned((isSingleWord() ? VAL : pVal[0]));
+    T.I = unsigned((isInline() ? VAL[0] : pVal[0]));
     return T.F;
   }
 
@@ -1533,7 +1649,7 @@
     // get 0. If VAL is 0, we get UINT64_MAX which gets truncated to
     // UINT32_MAX.
     if (BitWidth == 1)
-      return VAL - 1;
+      return VAL[0] - 1;
 
     // Handle the zero case.
     if (!getBoolValue())
Index: lib/IR/LLVMContextImpl.h
===================================================================
--- lib/IR/LLVMContextImpl.h
+++ lib/IR/LLVMContextImpl.h
@@ -50,12 +50,12 @@
 struct DenseMapAPIntKeyInfo {
   static inline APInt getEmptyKey() {
     APInt V(nullptr, 0);
-    V.VAL = 0;
+    V.VAL[0] = 0;
     return V;
   }
   static inline APInt getTombstoneKey() {
     APInt V(nullptr, 0);
-    V.VAL = 1;
+    V.VAL[0] = 1;
     return V;
   }
   static unsigned getHashValue(const APInt &Key) {
Index: lib/Support/APInt.cpp
===================================================================
--- lib/Support/APInt.cpp
+++ lib/Support/APInt.cpp
@@ -91,9 +91,15 @@
 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
   assert(BitWidth && "Bitwidth too small");
   assert(bigVal.data() && "Null pointer detected!");
-  if (isSingleWord())
-    VAL = bigVal[0];
-  else {
+  if (isSingleWord()) {
+    VAL[0] = bigVal[0];
+  } else if (isTwoWords()) {
+    VAL[0] = bigVal[0];
+    if (bigVal.size() > 1)
+      VAL[1] = bigVal[1];
+    else
+      VAL[1] = 0;
+  } else {
     // Get memory, cleared to 0
     pVal = getClearedMemory(getNumWords());
     // Calculate the number of words to copy
@@ -106,18 +112,23 @@
 }
 
 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
-  : BitWidth(numBits), VAL(0) {
+  : BitWidth(numBits) {
+  VAL[0] = 0;
   initFromArray(bigVal);
 }
 
 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
-  : BitWidth(numBits), VAL(0) {
+  : BitWidth(numBits) {
+  VAL[0] = 0;
   initFromArray(makeArrayRef(bigVal, numWords));
 }
 
 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
-  : BitWidth(numbits), VAL(0) {
+  : BitWidth(numbits) {
   assert(BitWidth && "Bitwidth too small");
+  VAL[0] = 0;
+  if (isTwoWords())
+    VAL[1] = 0;
   fromString(numbits, Str, radix);
 }
 
@@ -128,22 +139,24 @@
 
   if (BitWidth == RHS.getBitWidth()) {
     // assume same bit-width single-word case is already handled
-    assert(!isSingleWord());
+    assert(!isInline());
     memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
     return *this;
   }
 
-  if (isSingleWord()) {
+  if (isInline()) {
     // assume case where both are single words is already handled
-    assert(!RHS.isSingleWord());
-    VAL = 0;
+    assert(!RHS.isInline());
+    VAL[0] = 0;
     pVal = getMemory(RHS.getNumWords());
     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
   } else if (getNumWords() == RHS.getNumWords())
     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
-  else if (RHS.isSingleWord()) {
+  else if (RHS.isInline()) {
     delete [] pVal;
-    VAL = RHS.VAL;
+    VAL[0] = RHS.VAL[0];
+    if (RHS.isTwoWords())
+      VAL[1] = RHS.VAL[1];
   } else {
     delete [] pVal;
     pVal = getMemory(RHS.getNumWords());
@@ -154,9 +167,11 @@
 }
 
 APInt& APInt::operator=(uint64_t RHS) {
-  if (isSingleWord())
-    VAL = RHS;
-  else {
+  if (isInline()) {
+    VAL[0] = RHS;
+    if (isTwoWords())
+      VAL[1] = 0;
+  } else {
     pVal[0] = RHS;
     memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
   }
@@ -168,7 +183,13 @@
   ID.AddInteger(BitWidth);
 
   if (isSingleWord()) {
-    ID.AddInteger(VAL);
+    ID.AddInteger(VAL[0]);
+    return;
+  }
+
+  if (isTwoWords()) {
+    ID.AddInteger(VAL[0]);
+    ID.AddInteger(VAL[1]);
     return;
   }
 
@@ -197,7 +218,9 @@
 /// @brief Prefix increment operator. Increments the APInt by one.
 APInt& APInt::operator++() {
   if (isSingleWord())
-    ++VAL;
+    ++VAL[0];
+  else if (isTwoWords())
+    add_1(VAL, VAL, /*len=*/2, 1);
   else
     add_1(pVal, pVal, getNumWords(), 1);
   return clearUnusedBits();
@@ -226,7 +249,9 @@
 /// @brief Prefix decrement operator. Decrements the APInt by one.
 APInt& APInt::operator--() {
   if (isSingleWord())
-    --VAL;
+    --VAL[0];
+  else if (isTwoWords())
+    sub_1(VAL, /*len=*/2, 1);
   else
     sub_1(pVal, getNumWords(), 1);
   return clearUnusedBits();
@@ -252,9 +277,11 @@
 /// @brief Addition assignment operator.
 APInt& APInt::operator+=(const APInt& RHS) {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
-  if (isSingleWord())
-    VAL += RHS.VAL;
-  else {
+  if (isSingleWord()){
+    VAL[0] += RHS.VAL[0];
+  } else if (isTwoWords()) {
+    add(VAL, VAL, RHS.VAL, /*len=*/2);
+  } else {
     add(pVal, pVal, RHS.pVal, getNumWords());
   }
   return clearUnusedBits();
@@ -262,7 +289,9 @@
 
 APInt& APInt::operator+=(uint64_t RHS) {
   if (isSingleWord())
-    VAL += RHS;
+    VAL[0] += RHS;
+  else if (isTwoWords())
+    add_1(VAL, VAL, /*len=*/2, RHS);
   else
     add_1(pVal, pVal, getNumWords(), RHS);
   return clearUnusedBits();
@@ -288,7 +317,9 @@
 APInt& APInt::operator-=(const APInt& RHS) {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   if (isSingleWord())
-    VAL -= RHS.VAL;
+    VAL[0] -= RHS.VAL[0];
+  else if (isTwoWords())
+    sub(VAL, VAL, RHS.VAL, /*len=*/2);
   else
     sub(pVal, pVal, RHS.pVal, getNumWords());
   return clearUnusedBits();
@@ -296,7 +327,9 @@
 
 APInt& APInt::operator-=(uint64_t RHS) {
   if (isSingleWord())
-    VAL -= RHS;
+    VAL[0] -= RHS;
+  else if (isTwoWords())
+    sub_1(VAL, /*len=*/2, RHS);
   else
     sub_1(pVal, getNumWords(), RHS);
   return clearUnusedBits();
@@ -373,10 +406,48 @@
 APInt& APInt::operator*=(const APInt& RHS) {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   if (isSingleWord()) {
-    VAL *= RHS.VAL;
+    VAL[0] *= RHS.VAL[0];
     clearUnusedBits();
     return *this;
   }
+  if (isTwoWords()) {
+#ifdef __SIZEOF_INT128__
+    __uint128_t LHSVal = (((__uint128_t)VAL[1]) << 64) | VAL[0];
+    __uint128_t RHSVal = (((__uint128_t)RHS.VAL[1]) << 64) | RHS.VAL[0];
+    __uint128_t Result = LHSVal * RHSVal;
+    VAL[0] = Result;
+    VAL[1] = Result >> 64;
+    clearUnusedBits();
+    return *this;
+#else
+    // https://chromium.googlesource.com/chromium/src/net/+/master/base/int128.h
+    uint64_t hi = VAL[1];
+    uint64_t lo = VAL[0];
+    uint64_t rhs_hi = RHS.VAL[1];
+    uint64_t rhs_lo = RHS.VAL[0];
+    uint64_t a96 = hi >> 32;
+    uint64_t a64 = hi & 0xffffffffu;
+    uint64_t a32 = lo >> 32;
+    uint64_t a00 = lo & 0xffffffffu;
+    uint64_t b96 = rhs_hi >> 32;
+    uint64_t b64 = rhs_hi & 0xffffffffu;
+    uint64_t b32 = rhs_lo >> 32;
+    uint64_t b00 = rhs_lo & 0xffffffffu;
+    // multiply [a96 .. a00] x [b96 .. b00]
+    // terms higher than c96 disappear off the high side
+    // terms c96 and c64 are safe to ignore carry bit
+    uint64_t c96 = a96 * b00 + a64 * b32 + a32 * b64 + a00 * b96;
+    uint64_t c64 = a64 * b00 + a32 * b32 + a00 * b64;
+    VAL[1] = (c96 << 32) + c64;
+    VAL[0] = 0;
+    clearUnusedBits();
+    // add terms after this one at a time to capture carry
+    *this += APInt(BitWidth, a32 * b00) << 32;
+    *this += APInt(BitWidth, a00 * b32) << 32;
+    *this += a00 * b00;
+    return *this;
+#endif
+  }
 
   // Get some bit facts about LHS and check for zero
   unsigned lhsBits = getActiveBits();
@@ -415,7 +486,12 @@
 APInt& APInt::operator&=(const APInt& RHS) {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   if (isSingleWord()) {
-    VAL &= RHS.VAL;
+    VAL[0] &= RHS.VAL[0];
+    return *this;
+  }
+  if (isTwoWords()) {
+    VAL[0] &= RHS.VAL[0];
+    VAL[1] &= RHS.VAL[1];
     return *this;
   }
   unsigned numWords = getNumWords();
@@ -427,7 +503,12 @@
 APInt& APInt::operator|=(const APInt& RHS) {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   if (isSingleWord()) {
-    VAL |= RHS.VAL;
+    VAL[0] |= RHS.VAL[0];
+    return *this;
+  }
+  if (isTwoWords()) {
+    VAL[0] |= RHS.VAL[0];
+    VAL[1] |= RHS.VAL[1];
     return *this;
   }
   unsigned numWords = getNumWords();
@@ -439,7 +520,13 @@
 APInt& APInt::operator^=(const APInt& RHS) {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   if (isSingleWord()) {
-    VAL ^= RHS.VAL;
+    VAL[0] ^= RHS.VAL[0];
+    this->clearUnusedBits();
+    return *this;
+  }
+  if (isTwoWords()) {
+    VAL[0] ^= RHS.VAL[0];
+    VAL[1] ^= RHS.VAL[1];
     this->clearUnusedBits();
     return *this;
   }
@@ -480,7 +567,7 @@
 APInt APInt::operator*(const APInt& RHS) const {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   if (isSingleWord())
-    return APInt(BitWidth, VAL * RHS.VAL);
+    return APInt(BitWidth, VAL[0] * RHS.VAL[0]);
   APInt Result(*this);
   Result *= RHS;
   return Result;
@@ -501,7 +588,9 @@
 bool APInt::ult(const APInt& RHS) const {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
   if (isSingleWord())
-    return VAL < RHS.VAL;
+    return VAL[0] < RHS.VAL[0];
+  if (isTwoWords())
+    return VAL[1] == RHS.VAL[1] ? VAL[0] < RHS.VAL[0] : VAL[1] < RHS.VAL[1];
 
   // Get active bit length of both operands
   unsigned n1 = getActiveBits();
@@ -533,8 +622,16 @@
 bool APInt::slt(const APInt& RHS) const {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
   if (isSingleWord()) {
-    int64_t lhsSext = SignExtend64(VAL, BitWidth);
-    int64_t rhsSext = SignExtend64(RHS.VAL, BitWidth);
+    int64_t lhsSext = SignExtend64(VAL[0], BitWidth);
+    int64_t rhsSext = SignExtend64(RHS.VAL[0], BitWidth);
+    return lhsSext < rhsSext;
+  }
+  if (isTwoWords()) {
+    if (VAL[1] == RHS.VAL[1])
+      return VAL[0] < RHS.VAL[0];
+    unsigned HighBits = BitWidth - APINT_BITS_PER_WORD;
+    int64_t lhsSext = SignExtend64(VAL[1], HighBits);
+    int64_t rhsSext = SignExtend64(RHS.VAL[1], HighBits);
     return lhsSext < rhsSext;
   }
 
@@ -552,7 +649,9 @@
 
 void APInt::setBit(unsigned bitPosition) {
   if (isSingleWord())
-    VAL |= maskBit(bitPosition);
+    VAL[0] |= maskBit(bitPosition);
+  else if (isTwoWords())
+    VAL[whichWord(bitPosition)] |= maskBit(bitPosition);
   else
     pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
 }
@@ -561,7 +660,9 @@
 /// @brief Set a given bit to 0.
 void APInt::clearBit(unsigned bitPosition) {
   if (isSingleWord())
-    VAL &= ~maskBit(bitPosition);
+    VAL[0] &= ~maskBit(bitPosition);
+  else if (isTwoWords())
+    VAL[whichWord(bitPosition)] &= ~maskBit(bitPosition);
   else
     pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
 }
@@ -632,7 +733,9 @@
 
 hash_code llvm::hash_value(const APInt &Arg) {
   if (Arg.isSingleWord())
-    return hash_combine(Arg.VAL);
+    return hash_combine(Arg.VAL[0]);
+  if (Arg.isTwoWords())
+    return hash_combine(Arg.VAL[0], Arg.VAL[1]);
 
   return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords());
 }
@@ -675,7 +778,14 @@
 
 unsigned APInt::countLeadingOnes() const {
   if (isSingleWord())
-    return llvm::countLeadingOnes(VAL << (APINT_BITS_PER_WORD - BitWidth));
+    return llvm::countLeadingOnes(VAL[0] << (APINT_BITS_PER_WORD - BitWidth));
+  if (isTwoWords()) {
+    unsigned Count =
+        llvm::countLeadingOnes(VAL[1] << (APINT_BITS_PER_WORD * 2 - BitWidth));
+    if (Count == BitWidth - APINT_BITS_PER_WORD)
+      Count += llvm::countLeadingOnes(VAL[0]);
+    return Count;
+  }
 
   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
   unsigned shift;
@@ -702,7 +812,14 @@
 
 unsigned APInt::countTrailingZeros() const {
   if (isSingleWord())
-    return std::min(unsigned(llvm::countTrailingZeros(VAL)), BitWidth);
+    return std::min(unsigned(llvm::countTrailingZeros(VAL[0])), BitWidth);
+  if (isTwoWords()) {
+    size_t Count = llvm::countTrailingZeros(VAL[0]);
+    if (Count == APINT_BITS_PER_WORD)
+      Count += std::min(unsigned(llvm::countTrailingZeros(VAL[1])),
+                        BitWidth - APINT_BITS_PER_WORD);
+    return Count;
+  }
   unsigned Count = 0;
   unsigned i = 0;
   for (; i < getNumWords() && pVal[i] == 0; ++i)
@@ -731,7 +848,7 @@
 
 /// Perform a logical right-shift from Src to Dst, which must be equal or
 /// non-overlapping, of Words words, by Shift, which must be less than 64.
-static void lshrNear(uint64_t *Dst, uint64_t *Src, unsigned Words,
+static void lshrNear(uint64_t *Dst, const uint64_t *Src, unsigned Words,
                      unsigned Shift) {
   uint64_t Carry = 0;
   for (int I = Words - 1; I >= 0; --I) {
@@ -744,24 +861,24 @@
 APInt APInt::byteSwap() const {
   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
   if (BitWidth == 16)
-    return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
+    return APInt(BitWidth, ByteSwap_16(uint16_t(VAL[0])));
   if (BitWidth == 32)
-    return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
+    return APInt(BitWidth, ByteSwap_32(unsigned(VAL[0])));
   if (BitWidth == 48) {
-    unsigned Tmp1 = unsigned(VAL >> 16);
+    unsigned Tmp1 = unsigned(VAL[0] >> 16);
     Tmp1 = ByteSwap_32(Tmp1);
-    uint16_t Tmp2 = uint16_t(VAL);
+    uint16_t Tmp2 = uint16_t(VAL[0]);
     Tmp2 = ByteSwap_16(Tmp2);
     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
   }
   if (BitWidth == 64)
-    return APInt(BitWidth, ByteSwap_64(VAL));
+    return APInt(BitWidth, ByteSwap_64(VAL[0]));
 
   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
-    Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]);
+    Result.getMutableRawData()[I] = ByteSwap_64(getRawData()[N - I - 1]);
   if (Result.BitWidth != BitWidth) {
-    lshrNear(Result.pVal, Result.pVal, getNumWords(),
+    lshrNear(Result.getMutableRawData(), Result.getRawData(), getNumWords(),
              Result.BitWidth - BitWidth);
     Result.BitWidth = BitWidth;
   }
@@ -771,13 +888,13 @@
 APInt APInt::reverseBits() const {
   switch (BitWidth) {
   case 64:
-    return APInt(BitWidth, llvm::reverseBits<uint64_t>(VAL));
+    return APInt(BitWidth, llvm::reverseBits<uint64_t>(VAL[0]));
   case 32:
-    return APInt(BitWidth, llvm::reverseBits<uint32_t>(VAL));
+    return APInt(BitWidth, llvm::reverseBits<uint32_t>(VAL[0]));
   case 16:
-    return APInt(BitWidth, llvm::reverseBits<uint16_t>(VAL));
+    return APInt(BitWidth, llvm::reverseBits<uint16_t>(VAL[0]));
   case 8:
-    return APInt(BitWidth, llvm::reverseBits<uint8_t>(VAL));
+    return APInt(BitWidth, llvm::reverseBits<uint8_t>(VAL[0]));
   default:
     break;
   }
@@ -892,13 +1009,15 @@
   uint64_t mantissa;
   unsigned hiWord = whichWord(n-1);
   if (hiWord == 0) {
-    mantissa = Tmp.pVal[0];
+    mantissa = Tmp.getRawData()[0];
     if (n > 52)
       mantissa >>= n - 52; // shift down, we want the top 52 bits.
   } else {
     assert(hiWord > 0 && "huh?");
-    uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
-    uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
+    uint64_t hibits = Tmp.getRawData()[hiWord]
+                      << (52 - n % APINT_BITS_PER_WORD);
+    uint64_t lobits =
+        Tmp.getRawData()[hiWord - 1] >> (11 + n % APINT_BITS_PER_WORD);
     mantissa = hibits | lobits;
   }
 
@@ -919,6 +1038,11 @@
 
   if (width <= APINT_BITS_PER_WORD)
     return APInt(width, getRawData()[0]);
+  if (width <= APINT_BITS_PER_WORD * 2) {
+    uint64_t Lo = getRawData()[0];
+    uint64_t Hi = getRawData()[1];
+    return APInt(width, {Lo, Hi});
+  }
 
   APInt Result(getMemory(getNumWords(width)), width);
 
@@ -940,10 +1064,22 @@
   assert(width > BitWidth && "Invalid APInt SignExtend request");
 
   if (width <= APINT_BITS_PER_WORD) {
-    uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth);
+    uint64_t val = VAL[0] << (APINT_BITS_PER_WORD - BitWidth);
     val = (int64_t)val >> (width - BitWidth);
     return APInt(width, val >> (APINT_BITS_PER_WORD - width));
   }
+  if (width <= APINT_BITS_PER_WORD * 2) {
+    uint64_t Lo;
+    uint64_t Hi;
+    if (BitWidth > APINT_BITS_PER_WORD) {
+      Lo = VAL[0];
+      Hi = SignExtend64(VAL[1], BitWidth - APINT_BITS_PER_WORD);
+    } else {
+      Lo = SignExtend64(VAL[0], BitWidth);
+      Hi = static_cast<int64_t>(Lo) < 0 ? -1ULL : 0;
+    }
+    return APInt(width, {Lo, Hi});
+  }
 
   APInt Result(getMemory(getNumWords(width)), width);
 
@@ -981,7 +1117,12 @@
   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
 
   if (width <= APINT_BITS_PER_WORD)
-    return APInt(width, VAL);
+    return APInt(width, VAL[0]);
+  if (width <= APINT_BITS_PER_WORD * 2) {
+    uint64_t Lo = VAL[0];
+    uint64_t Hi = BitWidth > APINT_BITS_PER_WORD ? VAL[1] : 0;
+    return APInt(width, {Lo, Hi});
+  }
 
   APInt Result(getMemory(getNumWords(width)), width);
 
@@ -1038,13 +1179,6 @@
   if (shiftAmt == 0)
     return *this;
 
-  // Handle single word shifts with built-in ashr
-  if (isSingleWord()) {
-    if (shiftAmt == BitWidth)
-      return APInt(BitWidth, 0); // undefined
-    return APInt(BitWidth, SignExtend64(VAL, BitWidth) >> shiftAmt);
-  }
-
   // If all the bits were shifted out, the result is, technically, undefined.
   // We return -1 if it was negative, 0 otherwise. We check this early to avoid
   // issues in the algorithm below.
@@ -1055,6 +1189,31 @@
       return APInt(BitWidth, 0);
   }
 
+  // Handle single word shifts with built-in ashr
+  if (isSingleWord())
+    return APInt(BitWidth, SignExtend64(VAL[0], BitWidth) >> shiftAmt);
+  if (isTwoWords()) {
+    uint64_t Hi;
+    uint64_t Lo;
+    unsigned HighBits = BitWidth - APINT_BITS_PER_WORD;
+    int64_t HighSext = SignExtend64(VAL[1], HighBits);
+#ifdef __SIZEOF_INT128__
+    __int128_t Val = (((__uint128_t)HighSext) << 64) | VAL[0];
+    __int128_t Result = Val >> shiftAmt;
+    Lo = Result;
+    Hi = Result >> 64;
+#else
+    if (shiftAmt < 64) {
+      Lo = (VAL[0] >> shiftAmt) | (HighSext << (64 - shiftAmt));
+      Hi = HighSext >> shiftAmt;
+    } else {
+      Lo = HighSext >> (shiftAmt - 64);
+      Hi = HighSext < 0 ? -1ULL : 0ULL;
+    }
+#endif
+    return APInt(BitWidth, {Lo, Hi});
+  }
+
   // Create some space for the result.
   uint64_t * val = new uint64_t[getNumWords()];
 
@@ -1120,19 +1279,36 @@
 /// Logical right-shift this APInt by shiftAmt.
 /// @brief Logical right-shift function.
 APInt APInt::lshr(unsigned shiftAmt) const {
-  if (isSingleWord()) {
-    if (shiftAmt >= BitWidth)
-      return APInt(BitWidth, 0);
-    else
-      return APInt(BitWidth, this->VAL >> shiftAmt);
-  }
-
   // If all the bits were shifted out, the result is 0. This avoids issues
   // with shifting by the size of the integer type, which produces undefined
   // results. We define these "undefined results" to always be 0.
   if (shiftAmt >= BitWidth)
     return APInt(BitWidth, 0);
 
+  if (isSingleWord())
+    return APInt(BitWidth, this->VAL[0] >> shiftAmt);
+  if (isTwoWords()) {
+    if (shiftAmt == 0)
+      return *this;
+    uint64_t Hi;
+    uint64_t Lo;
+#ifdef __SIZEOF_INT128__
+    __uint128_t Val = (((__uint128_t)VAL[1]) << 64) | VAL[0];
+    __uint128_t Result = Val >> shiftAmt;
+    Lo = Result;
+    Hi = Result >> 64;
+#else
+    if (shiftAmt < 64) {
+      Lo = (VAL[0] >> shiftAmt) | (VAL[1] << (64 - shiftAmt));
+      Hi = VAL[1] >> shiftAmt;
+    } else {
+      Lo = VAL[1] >> (shiftAmt - 64);
+      Hi = 0;
+    }
+#endif
+    return APInt(BitWidth, {Lo, Hi});
+  }
+
   // If none of the bits are shifted out, the result is *this. This avoids
   // issues with shifting by the size of the integer type, which produces
   // undefined results in the code below. This is also an optimization.
@@ -1189,11 +1365,7 @@
 }
 
 APInt APInt::shlSlowCase(unsigned shiftAmt) const {
-  // If all the bits were shifted out, the result is 0. This avoids issues
-  // with shifting by the size of the integer type, which produces undefined
-  // results. We define these "undefined results" to always be 0.
-  if (shiftAmt == BitWidth)
-    return APInt(BitWidth, 0);
+  assert(shiftAmt <= BitWidth && "Invalid shift amount");
 
   // If none of the bits are shifted out, the result is *this. This avoids a
   // lshr by the words size in the loop below which can produce incorrect
@@ -1290,7 +1462,7 @@
       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       /*    31 */ 6
     };
-    return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
+    return APInt(BitWidth, results[getRawData()[0]]);
   }
 
   // If the magnitude of the value fits in less than 52 bits (the precision of
@@ -1298,8 +1470,7 @@
   // libc sqrt function which will probably use a hardware sqrt computation.
   // This should be faster than the algorithm below.
   if (magnitude < 52) {
-    return APInt(BitWidth,
-                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
+    return APInt(BitWidth, uint64_t(::round(::sqrt(double(getRawData()[0])))));
   }
 
   // Okay, all the short cuts are exhausted. We must compute it. The following
@@ -1677,7 +1848,7 @@
   // Initialize the dividend
   memset(U, 0, (m+n+1)*sizeof(unsigned));
   for (unsigned i = 0; i < lhsWords; ++i) {
-    uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
+    uint64_t tmp = LHS.getRawData()[i];
     U[i * 2] = (unsigned)(tmp & mask);
     U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
   }
@@ -1686,7 +1857,7 @@
   // Initialize the divisor
   memset(V, 0, (n)*sizeof(unsigned));
   for (unsigned i = 0; i < rhsWords; ++i) {
-    uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
+    uint64_t tmp = RHS.getRawData()[i];
     V[i * 2] = (unsigned)(tmp & mask);
     V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
   }
@@ -1745,12 +1916,16 @@
   if (Quotient) {
     // Set up the Quotient value's memory.
     if (Quotient->BitWidth != LHS.BitWidth) {
-      if (Quotient->isSingleWord())
-        Quotient->VAL = 0;
-      else
+      if (Quotient->isSingleWord()) {
+        Quotient->VAL[0] = 0;
+      } else if (Quotient->isTwoWords()) {
+        Quotient->VAL[0] = 0;
+        Quotient->VAL[1] = 0;
+      } else {
         delete [] Quotient->pVal;
+      }
       Quotient->BitWidth = LHS.BitWidth;
-      if (!Quotient->isSingleWord())
+      if (!Quotient->isInline())
         Quotient->pVal = getClearedMemory(Quotient->getNumWords());
     } else
       Quotient->clearAllBits();
@@ -1762,15 +1937,12 @@
     if (lhsWords == 1) {
       uint64_t tmp =
         uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
-      if (Quotient->isSingleWord())
-        Quotient->VAL = tmp;
-      else
-        Quotient->pVal[0] = tmp;
+      Quotient->getMutableRawData()[0] = tmp;
     } else {
-      assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
       for (unsigned i = 0; i < lhsWords; ++i)
-        Quotient->pVal[i] =
-          uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
+        Quotient->getMutableRawData()[i] =
+            uint64_t(Q[i * 2]) |
+            (uint64_t(Q[i * 2 + 1]) << (APINT_BITS_PER_WORD / 2));
     }
   }
 
@@ -1778,12 +1950,16 @@
   if (Remainder) {
     // Set up the Remainder value's memory.
     if (Remainder->BitWidth != RHS.BitWidth) {
-      if (Remainder->isSingleWord())
-        Remainder->VAL = 0;
-      else
+      if (Remainder->isSingleWord()) {
+        Remainder->VAL[0] = 0;
+      } else if (Remainder->isTwoWords()) {
+        Remainder->VAL[0] = 0;
+        Remainder->VAL[1] = 0;
+      } else {
         delete [] Remainder->pVal;
+      }
       Remainder->BitWidth = RHS.BitWidth;
-      if (!Remainder->isSingleWord())
+      if (!Remainder->isInline())
         Remainder->pVal = getClearedMemory(Remainder->getNumWords());
     } else
       Remainder->clearAllBits();
@@ -1793,15 +1969,12 @@
     if (rhsWords == 1) {
       uint64_t tmp =
         uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
-      if (Remainder->isSingleWord())
-        Remainder->VAL = tmp;
-      else
-        Remainder->pVal[0] = tmp;
+      Remainder->getMutableRawData()[0] = tmp;
     } else {
-      assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
       for (unsigned i = 0; i < rhsWords; ++i)
-        Remainder->pVal[i] =
-          uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
+        Remainder->getMutableRawData()[i] =
+            uint64_t(R[i * 2]) |
+            (uint64_t(R[i * 2 + 1]) << (APINT_BITS_PER_WORD / 2));
     }
   }
 
@@ -1819,9 +1992,19 @@
 
   // First, deal with the easy case
   if (isSingleWord()) {
-    assert(RHS.VAL != 0 && "Divide by zero?");
-    return APInt(BitWidth, VAL / RHS.VAL);
+    assert(RHS.VAL[0] != 0 && "Divide by zero?");
+    return APInt(BitWidth, VAL[0] / RHS.VAL[0]);
+  }
+#ifdef __SIZEOF_INT128__
+  if (isTwoWords()) {
+    __uint128_t LHSVal = (((__uint128_t)VAL[1]) << 64) | VAL[0];
+    __uint128_t RHSVal = (((__uint128_t)RHS.VAL[1]) << 64) | RHS.VAL[0];
+    __uint128_t Result = LHSVal / RHSVal;
+    uint64_t Lo = Result;
+    uint64_t Hi = Result >> 64;
+    return APInt(BitWidth, {Lo, Hi});
   }
+#endif
 
   // Get some facts about the LHS and RHS number of bits and words
   unsigned rhsBits = RHS.getActiveBits();
@@ -1865,9 +2048,19 @@
 APInt APInt::urem(const APInt& RHS) const {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   if (isSingleWord()) {
-    assert(RHS.VAL != 0 && "Remainder by zero?");
-    return APInt(BitWidth, VAL % RHS.VAL);
+    assert(RHS.VAL[0] != 0 && "Remainder by zero?");
+    return APInt(BitWidth, VAL[0] % RHS.VAL[0]);
   }
+#ifdef __SIZEOF_INT128__
+  if (isTwoWords()) {
+    __uint128_t LHSVal = (((__uint128_t)VAL[1]) << 64) | VAL[0];
+    __uint128_t RHSVal = (((__uint128_t)RHS.VAL[1]) << 64) | RHS.VAL[0];
+    __uint128_t Result = LHSVal % RHSVal;
+    uint64_t Lo = Result;
+    uint64_t Hi = Result >> 64;
+    return APInt(BitWidth, {Lo, Hi});
+  }
+#endif
 
   // Get some facts about the LHS
   unsigned lhsBits = getActiveBits();
@@ -1916,13 +2109,28 @@
 
   // First, deal with the easy case
   if (LHS.isSingleWord()) {
-    assert(RHS.VAL != 0 && "Divide by zero?");
-    uint64_t QuotVal = LHS.VAL / RHS.VAL;
-    uint64_t RemVal = LHS.VAL % RHS.VAL;
+    assert(RHS.VAL[0] != 0 && "Divide by zero?");
+    uint64_t QuotVal = LHS.VAL[0] / RHS.VAL[0];
+    uint64_t RemVal = LHS.VAL[0] % RHS.VAL[0];
     Quotient = APInt(LHS.BitWidth, QuotVal);
     Remainder = APInt(LHS.BitWidth, RemVal);
     return;
   }
+#ifdef __SIZEOF_INT128__
+  if (LHS.isTwoWords()) {
+    __uint128_t LHSVal = (((__uint128_t)LHS.VAL[1]) << 64) | LHS.VAL[0];
+    __uint128_t RHSVal = (((__uint128_t)RHS.VAL[1]) << 64) | RHS.VAL[0];
+    __uint128_t QuotVal = LHSVal / RHSVal;
+    __uint128_t RemVal = LHSVal % RHSVal;
+    Quotient.VAL[0] = QuotVal;
+    Quotient.VAL[1] = QuotVal >> 64;
+    Quotient.clearUnusedBits();
+    Remainder.VAL[0] = RemVal;
+    Remainder.VAL[1] = RemVal >> 64;
+    Remainder.clearUnusedBits();
+    return;
+  }
+#endif
 
   // Get some size facts about the dividend and divisor
   unsigned lhsBits  = LHS.getActiveBits();
@@ -1951,8 +2159,8 @@
 
   if (lhsWords == 1 && rhsWords == 1) {
     // There is only one word to consider so use the native versions.
-    uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
-    uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
+    uint64_t lhsValue = LHS.getRawData()[0];
+    uint64_t rhsValue = RHS.getRawData()[0];
     Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
     Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
     return;
@@ -2080,7 +2288,7 @@
          "Insufficient bit width");
 
   // Allocate memory
-  if (!isSingleWord())
+  if (!isInline())
     pVal = getClearedMemory(getNumWords());
 
   // Figure out if we can shift instead of multiply
@@ -2105,10 +2313,7 @@
     }
 
     // Add in the digit we just interpreted
-    if (apdigit.isSingleWord())
-      apdigit.VAL = digit;
-    else
-      apdigit.pVal[0] = digit;
+    apdigit.getMutableRawData()[0] = digit;
     *this += apdigit;
   }
   // If its negative, put it in two's complement form
Index: unittests/ADT/APIntTest.cpp
===================================================================
--- unittests/ADT/APIntTest.cpp
+++ unittests/ADT/APIntTest.cpp
@@ -164,7 +164,7 @@
   EXPECT_EQ(zero, one.shl(1));
   EXPECT_EQ(one, one.shl(0));
   EXPECT_EQ(zero, one.lshr(1));
-  EXPECT_EQ(zero, one.ashr(1));
+  EXPECT_EQ(neg_one, one.ashr(1));
 
   // Rotates.
   EXPECT_EQ(one, one.rotl(0));
Index: unittests/ADT/APSIntTest.cpp
===================================================================
--- unittests/ADT/APSIntTest.cpp
+++ unittests/ADT/APSIntTest.cpp
@@ -34,7 +34,7 @@
   A = APSInt(64, true);
   EXPECT_TRUE(A.isUnsigned());
 
-  Wide = APInt(128, 1);
+  Wide = APInt(129, 1);
   Bits = Wide.getRawData();
   A = std::move(Wide);
   EXPECT_TRUE(A.isUnsigned());