Index: bindings/go/llvm/DIBuilderBindings.cpp
===================================================================
--- bindings/go/llvm/DIBuilderBindings.cpp
+++ bindings/go/llvm/DIBuilderBindings.cpp
@@ -19,8 +19,6 @@
 
 using namespace llvm;
 
-DEFINE_SIMPLE_CONVERSION_FUNCTIONS(DIBuilder, LLVMDIBuilderRef)
-
 LLVMDIBuilderRef LLVMNewDIBuilder(LLVMModuleRef mref) {
   Module *m = unwrap(mref);
   return wrap(new DIBuilder(*m));
Index: docs/LangRef.rst
===================================================================
--- docs/LangRef.rst
+++ docs/LangRef.rst
@@ -12290,6 +12290,7 @@
 assumed. This argument must be one of the following strings:
 
 ::
+
       "round.dynamic"
       "round.tonearest"
       "round.downward"
@@ -12321,6 +12322,7 @@
 strings:
 
 ::
+
       "fpexcept.ignore"
       "fpexcept.maytrap"
       "fpexcept.strict"
Index: include/llvm-c/DebugInfo.h
===================================================================
--- /dev/null
+++ include/llvm-c/DebugInfo.h
@@ -0,0 +1,121 @@
+//===------------ DebugInfo.h - LLVM C API Debug Info API -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the C API endpoints for generating DWARF Debug Info
+//
+// Note: This interface is experimental. It is *NOT* stable, and may be
+//       changed without warning.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Core.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// Debug info flags.
+typedef enum {
+#define HANDLE_DI_FLAG(ID, NAME) LLVMDIFlag##NAME = ID,
+    #include "llvm/IR/DebugInfoFlags.def"
+    LLVMDIFlagAccessibility = LLVMDIFlagPrivate
+                            | LLVMDIFlagProtected
+                            | LLVMDIFlagPublic
+} LLVMDIFlags;
+
+/// Source languages known by DWARF.
+typedef enum {
+#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) LLVMDWARFSourceLanguage##NAME = ID,
+    #include "llvm/Support/Dwarf.def"
+} LLVMDWARFSourceLanguage;
+
+/// The amount of debug information to emit.
+typedef enum {
+    LLVMDWARFEmissionNone = 0,
+    LLVMDWARFEmissionFull,
+    LLVMDWARFEmissionLineTablesOnly
+} LLVMDWARFEmissionKind;
+
+/// The current debug metadata version number.
+uint32_t LLVMDebugMetadataVersion();
+
+/// The version of debug metadata that's present in the provided \c Module.
+unsigned LLVMGetModuleDebugMetadataVersion(LLVMModuleRef Module);
+
+/// \brief Strip debug info in the module if it exists.
+///
+/// To do this, we remove all calls to the debugger intrinsics and any named
+/// metadata for debugging. We also remove debug locations for instructions.
+/// Return true if module is modified.
+uint8_t LLVMStripModuleDebugInfo(LLVMModuleRef Module);
+
+/// Construct a builder for a module.
+///
+/// If \c AllowUnresolved, collect unresolved nodes attached to the module
+/// in order to resolve cycles during a call to \c LLVMDIBuilderFinalize.
+LLVMDIBuilderRef LLVMDIBuilderCreate(LLVMModuleRef M, uint8_t AllowUnresolved);
+
+/// Deallocates the DIBuilder and everything it owns.
+void LLVMDIBuilderDispose(LLVMDIBuilderRef Builder);
+
+/// Construct any deferred debug info descriptors.
+void LLVMDIBuilderFinalize(LLVMDIBuilderRef Builder);
+
+/// A CompileUnit provides an anchor for all debugging
+/// information generated during this instance of compilation.
+/// \param Lang          Source programming language, eg.
+///                      \c LLVMDWARFSourceLanguageC99
+/// \param File          File info.
+/// \param Producer      Identify the producer of debugging information
+///                      and code.  Usually this is a compiler
+///                      version string.
+/// \param isOptimized   A boolean flag which indicates whether optimization
+///                      is enabled or not.
+/// \param Flags         This string lists command line options. This
+///                      string is directly embedded in debug info
+///                      output which may be used by a tool
+///                      analyzing generated debugging information.
+/// \param RuntimeVer    This indicates runtime version for languages like
+///                      Objective-C.
+/// \param SplitName     The name of the file that we'll split debug info
+///                      out into.
+/// \param Kind          The kind of debug information to generate.
+/// \param DWOId         The DWOId if this is a split skeleton compile unit.
+/// \param SplitDebugInlining    Whether to emit inline debug info.
+/// \param DebugInfoForProfiling Whether to emit extra debug info for
+///                              profile collection.
+LLVMMetadataRef LLVMDIBuilderCreateCompileUnit(
+    LLVMDIBuilderRef Builder, LLVMDWARFSourceLanguage Lang,
+    LLVMMetadataRef FileRef, const char *Producer, uint8_t isOptimized,
+    const char *Flags, unsigned RuntimeVer, const char *SplitName,
+    LLVMDWARFEmissionKind Kind, uint64_t DWOId, uint8_t SplitDebugInlining,
+    uint8_t DebugInfoForProfiling);
+
+/// Create a file descriptor to hold debugging information for a file.
+/// \param Builder   The DIBuilder.
+/// \param Filename  File name.
+/// \param Directory Directory.
+LLVMMetadataRef
+LLVMDIBuilderCreateFile(LLVMDIBuilderRef Builder, const char *Filename,
+                        const char *Directory);
+
+/// Creates a new DebugLocation that describes a source location.
+/// \param Line The line in the source file.
+/// \param Column The column in the source file.
+/// \param Scope The scope in which the location resides.
+/// \param InlinedAt The scope where this location was inlined, if at all.
+///                  (optional).
+LLVMMetadataRef
+LLVMDIBuilderCreateDebugLocation(LLVMContextRef Ctx, unsigned Line,
+                                 unsigned Column, LLVMMetadataRef Scope,
+                                 LLVMMetadataRef InlinedAt);
+
+#ifdef __cplusplus
+} // end extern "C"
+#endif
Index: include/llvm-c/Types.h
===================================================================
--- include/llvm-c/Types.h
+++ include/llvm-c/Types.h
@@ -97,6 +97,13 @@
 typedef struct LLVMOpaqueBuilder *LLVMBuilderRef;
 
 /**
+ * Represents an LLVM debug info builder.
+ *
+ * This models llvm::DIBuilder.
+ */
+typedef struct LLVMOpaqueDIBuilder *LLVMDIBuilderRef;
+
+/**
  * Interface used to provide a module to JIT or interpreter.
  * This is now just a synonym for llvm::Module, but we have to keep using the
  * different type to keep binary compatibility.
Index: include/llvm/ADT/APFloat.h
===================================================================
--- include/llvm/ADT/APFloat.h
+++ include/llvm/ADT/APFloat.h
@@ -397,6 +397,12 @@
   ///   consider inserting before falling back to scientific
   ///   notation.  0 means to always use scientific notation.
   ///
+  /// \param TruncateZero Indicate whether to remove the trailing zero in
+  ///   fraction part or not. Also setting this parameter to false forcing
+  ///   producing of output more similar to default printf behavior.
+  ///   Specifically the lower e is used as exponent delimiter and exponent
+  ///   always contains no less than two digits.
+  ///
   /// Number       Precision    MaxPadding      Result
   /// ------       ---------    ----------      ------
   /// 1.01E+4              5             2       10100
@@ -406,7 +412,7 @@
   /// 1.01E-2              4             2       0.0101
   /// 1.01E-2              4             1       1.01E-2
   void toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision = 0,
-                unsigned FormatMaxPadding = 3) const;
+                unsigned FormatMaxPadding = 3, bool TruncateZero = true) const;
 
   /// If this value has an exact multiplicative inverse, store it in inv and
   /// return true.
@@ -649,7 +655,7 @@
   bool isInteger() const;
 
   void toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
-                unsigned FormatMaxPadding) const;
+                unsigned FormatMaxPadding, bool TruncateZero = true) const;
 
   bool getExactInverse(APFloat *inv) const;
 
@@ -1144,9 +1150,9 @@
   APFloat &operator=(APFloat &&RHS) = default;
 
   void toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision = 0,
-                unsigned FormatMaxPadding = 3) const {
+                unsigned FormatMaxPadding = 3, bool TruncateZero = true) const {
     APFLOAT_DISPATCH_ON_SEMANTICS(
-        toString(Str, FormatPrecision, FormatMaxPadding));
+        toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero));
   }
 
   void print(raw_ostream &) const;
Index: include/llvm/ADT/APInt.h
===================================================================
--- include/llvm/ADT/APInt.h
+++ include/llvm/ADT/APInt.h
@@ -212,6 +212,9 @@
   /// out-of-line slow case for intersects.
   bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY;
 
+  /// out-of-line slow case for isSubsetOf.
+  bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY;
+
   /// out-of-line slow case for setBits.
   void setBitsSlowCase(unsigned loBit, unsigned hiBit);
 
@@ -227,6 +230,14 @@
   /// out-of-line slow case for operator^=.
   void XorAssignSlowCase(const APInt& RHS);
 
+  /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
+  /// to, or greater than RHS.
+  int compare(const APInt &RHS) const LLVM_READONLY;
+
+  /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
+  /// to, or greater than RHS.
+  int compareSigned(const APInt &RHS) const LLVM_READONLY;
+
 public:
   /// \name Constructors
   /// @{
@@ -413,10 +424,10 @@
     return countPopulationSlowCase() == 1;
   }
 
-  /// \brief Check if the APInt's value is returned by getSignBit.
+  /// \brief Check if the APInt's value is returned by getSignMask.
   ///
-  /// \returns true if this is the value returned by getSignBit.
-  bool isSignBit() const { return isMinSignedValue(); }
+  /// \returns true if this is the value returned by getSignMask.
+  bool isSignMask() const { return isMinSignedValue(); }
 
   /// \brief Convert APInt to a boolean value.
   ///
@@ -494,11 +505,11 @@
     return API;
   }
 
-  /// \brief Get the SignBit for a specific bit width.
+  /// \brief Get the SignMask for a specific bit width.
   ///
   /// This is just a wrapper function of getSignedMinValue(), and it helps code
-  /// readability when we want to get a SignBit.
-  static APInt getSignBit(unsigned BitWidth) {
+  /// readability when we want to get a SignMask.
+  static APInt getSignMask(unsigned BitWidth) {
     return getSignedMinValue(BitWidth);
   }
 
@@ -1076,7 +1087,7 @@
   /// the validity of the less-than relationship.
   ///
   /// \returns true if *this < RHS when both are considered unsigned.
-  bool ult(const APInt &RHS) const LLVM_READONLY;
+  bool ult(const APInt &RHS) const { return compare(RHS) < 0; }
 
   /// \brief Unsigned less than comparison
   ///
@@ -1095,7 +1106,7 @@
   /// validity of the less-than relationship.
   ///
   /// \returns true if *this < RHS when both are considered signed.
-  bool slt(const APInt &RHS) const LLVM_READONLY;
+  bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; }
 
   /// \brief Signed less than comparison
   ///
@@ -1114,7 +1125,7 @@
   /// validity of the less-or-equal relationship.
   ///
   /// \returns true if *this <= RHS when both are considered unsigned.
-  bool ule(const APInt &RHS) const { return ult(RHS) || eq(RHS); }
+  bool ule(const APInt &RHS) const { return compare(RHS) <= 0; }
 
   /// \brief Unsigned less or equal comparison
   ///
@@ -1130,7 +1141,7 @@
   /// validity of the less-or-equal relationship.
   ///
   /// \returns true if *this <= RHS when both are considered signed.
-  bool sle(const APInt &RHS) const { return slt(RHS) || eq(RHS); }
+  bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; }
 
   /// \brief Signed less or equal comparison
   ///
@@ -1146,7 +1157,7 @@
   /// the validity of the greater-than relationship.
   ///
   /// \returns true if *this > RHS when both are considered unsigned.
-  bool ugt(const APInt &RHS) const { return !ult(RHS) && !eq(RHS); }
+  bool ugt(const APInt &RHS) const { return !ule(RHS); }
 
   /// \brief Unsigned greater than comparison
   ///
@@ -1165,7 +1176,7 @@
   /// validity of the greater-than relationship.
   ///
   /// \returns true if *this > RHS when both are considered signed.
-  bool sgt(const APInt &RHS) const { return !slt(RHS) && !eq(RHS); }
+  bool sgt(const APInt &RHS) const { return !sle(RHS); }
 
   /// \brief Signed greater than comparison
   ///
@@ -1219,6 +1230,14 @@
     return intersectsSlowCase(RHS);
   }
 
+  /// This operation checks that all bits set in this APInt are also set in RHS.
+  bool isSubsetOf(const APInt &RHS) const {
+    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+    if (isSingleWord())
+      return (VAL & ~RHS.VAL) == 0;
+    return isSubsetOfSlowCase(RHS);
+  }
+
   /// @}
   /// \name Resizing Operators
   /// @{
Index: include/llvm/ADT/BitVector.h
===================================================================
--- include/llvm/ADT/BitVector.h
+++ include/llvm/ADT/BitVector.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_ADT_BITVECTOR_H
 #define LLVM_ADT_BITVECTOR_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
 #include <cassert>
@@ -161,6 +162,22 @@
     return -1;
   }
 
+  /// find_last - Returns the index of the last set bit, -1 if none of the bits
+  /// are set.
+  int find_last() const {
+    if (Size == 0)
+      return -1;
+
+    unsigned N = NumBitWords(size());
+    assert(N > 0);
+
+    unsigned i = N - 1;
+    while (i > 0 && Bits[i] == BitWord(0))
+      --i;
+
+    return int((i + 1) * BITWORD_SIZE - countLeadingZeros(Bits[i])) - 1;
+  }
+
   /// find_first_unset - Returns the index of the first unset bit, -1 if all
   /// of the bits are set.
   int find_first_unset() const {
@@ -172,6 +189,30 @@
     return -1;
   }
 
+  /// find_last_unset - Returns the index of the last unset bit, -1 if all of
+  /// the bits are set.
+  int find_last_unset() const {
+    if (Size == 0)
+      return -1;
+
+    const unsigned N = NumBitWords(size());
+    assert(N > 0);
+
+    unsigned i = N - 1;
+    BitWord W = Bits[i];
+
+    // The last word in the BitVector has some unused bits, so we need to set
+    // them all to 1 first.  Set them all to 1 so they don't get treated as
+    // valid unset bits.
+    unsigned UnusedCount = BITWORD_SIZE - size() % BITWORD_SIZE;
+    W |= maskLeadingOnes<BitWord>(UnusedCount);
+
+    while (W == ~BitWord(0) && --i > 0)
+      W = Bits[i];
+
+    return int((i + 1) * BITWORD_SIZE - countLeadingOnes(W)) - 1;
+  }
+
   /// find_next - Returns the index of the next set bit following the
   /// "Prev" bit. Returns -1 if the next set bit is not found.
   int find_next(unsigned Prev) const {
@@ -455,6 +496,105 @@
     return *this;
   }
 
+  BitVector &operator>>=(unsigned N) {
+    assert(N <= Size);
+    if (LLVM_UNLIKELY(empty() || N == 0))
+      return *this;
+
+    unsigned NumWords = NumBitWords(Size);
+    assert(NumWords >= 1);
+
+    wordShr(N / BITWORD_SIZE);
+
+    unsigned BitDistance = N % BITWORD_SIZE;
+    if (BitDistance == 0)
+      return *this;
+
+    // When the shift size is not a multiple of the word size, then we have
+    // a tricky situation where each word in succession needs to extract some
+    // of the bits from the next word and or them into this word while
+    // shifting this word to make room for the new bits.  This has to be done
+    // for every word in the array.
+
+    // Since we're shifting each word right, some bits will fall off the end
+    // of each word to the right, and empty space will be created on the left.
+    // The final word in the array will lose bits permanently, so starting at
+    // the beginning, work forwards shifting each word to the right, and
+    // OR'ing in the bits from the end of the next word to the beginning of
+    // the current word.
+
+    // Example:
+    //   Starting with {0xAABBCCDD, 0xEEFF0011, 0x22334455} and shifting right
+    //   by 4 bits.
+    // Step 1: Word[0] >>= 4           ; 0x0ABBCCDD
+    // Step 2: Word[0] |= 0x10000000   ; 0x1ABBCCDD
+    // Step 3: Word[1] >>= 4           ; 0x0EEFF001
+    // Step 4: Word[1] |= 0x50000000   ; 0x5EEFF001
+    // Step 5: Word[2] >>= 4           ; 0x02334455
+    // Result: { 0x1ABBCCDD, 0x5EEFF001, 0x02334455 }
+    const BitWord Mask = maskTrailingOnes<BitWord>(BitDistance);
+    const unsigned LSH = BITWORD_SIZE - BitDistance;
+
+    for (unsigned I = 0; I < NumWords - 1; ++I) {
+      Bits[I] >>= BitDistance;
+      Bits[I] |= (Bits[I + 1] & Mask) << LSH;
+    }
+
+    Bits[NumWords - 1] >>= BitDistance;
+
+    return *this;
+  }
+
+  BitVector &operator<<=(unsigned N) {
+    assert(N <= Size);
+    if (LLVM_UNLIKELY(empty() || N == 0))
+      return *this;
+
+    unsigned NumWords = NumBitWords(Size);
+    assert(NumWords >= 1);
+
+    wordShl(N / BITWORD_SIZE);
+
+    unsigned BitDistance = N % BITWORD_SIZE;
+    if (BitDistance == 0)
+      return *this;
+
+    // When the shift size is not a multiple of the word size, then we have
+    // a tricky situation where each word in succession needs to extract some
+    // of the bits from the previous word and or them into this word while
+    // shifting this word to make room for the new bits.  This has to be done
+    // for every word in the array.  This is similar to the algorithm outlined
+    // in operator>>=, but backwards.
+
+    // Since we're shifting each word left, some bits will fall off the end
+    // of each word to the left, and empty space will be created on the right.
+    // The first word in the array will lose bits permanently, so starting at
+    // the end, work backwards shifting each word to the left, and OR'ing
+    // in the bits from the end of the next word to the beginning of the
+    // current word.
+
+    // Example:
+    //   Starting with {0xAABBCCDD, 0xEEFF0011, 0x22334455} and shifting left
+    //   by 4 bits.
+    // Step 1: Word[2] <<= 4           ; 0x23344550
+    // Step 2: Word[2] |= 0x0000000E   ; 0x2334455E
+    // Step 3: Word[1] <<= 4           ; 0xEFF00110
+    // Step 4: Word[1] |= 0x0000000A   ; 0xEFF0011A
+    // Step 5: Word[0] <<= 4           ; 0xABBCCDD0
+    // Result: { 0xABBCCDD0, 0xEFF0011A, 0x2334455E }
+    const BitWord Mask = maskLeadingOnes<BitWord>(BitDistance);
+    const unsigned RSH = BITWORD_SIZE - BitDistance;
+
+    for (int I = NumWords - 1; I > 0; --I) {
+      Bits[I] <<= BitDistance;
+      Bits[I] |= (Bits[I - 1] & Mask) >> RSH;
+    }
+    Bits[0] <<= BitDistance;
+    clear_unused_bits();
+
+    return *this;
+  }
+
   // Assignment operator.
   const BitVector &operator=(const BitVector &RHS) {
     if (this == &RHS) return *this;
@@ -538,6 +678,54 @@
   }
 
 private:
+  /// \brief Perform a logical left shift of \p Count words by moving everything
+  /// \p Count words to the right in memory.
+  ///
+  /// While confusing, words are stored from least significant at Bits[0] to
+  /// most significant at Bits[NumWords-1].  A logical shift left, however,
+  /// moves the current least significant bit to a higher logical index, and
+  /// fills the previous least significant bits with 0.  Thus, we actually
+  /// need to move the bytes of the memory to the right, not to the left.
+  /// Example:
+  ///   Words = [0xBBBBAAAA, 0xDDDDFFFF, 0x00000000, 0xDDDD0000]
+  /// represents a BitVector where 0xBBBBAAAA contain the least significant
+  /// bits.  So if we want to shift the BitVector left by 2 words, we need to
+  /// turn this into 0x00000000 0x00000000 0xBBBBAAAA 0xDDDDFFFF by using a
+  /// memmove which moves right, not left.
+  void wordShl(uint32_t Count) {
+    if (Count == 0)
+      return;
+
+    uint32_t NumWords = NumBitWords(Size);
+
+    auto Src = ArrayRef<BitWord>(Bits, NumWords).drop_back(Count);
+    auto Dest = MutableArrayRef<BitWord>(Bits, NumWords).drop_front(Count);
+
+    // Since we always move Word-sized chunks of data with src and dest both
+    // aligned to a word-boundary, we don't need to worry about endianness
+    // here.
+    std::memmove(Dest.begin(), Src.begin(), Dest.size() * sizeof(BitWord));
+    std::memset(Bits, 0, Count * sizeof(BitWord));
+    clear_unused_bits();
+  }
+
+  /// \brief Perform a logical right shift of \p Count words by moving those
+  /// words to the left in memory.  See wordShl for more information.
+  ///
+  void wordShr(uint32_t Count) {
+    if (Count == 0)
+      return;
+
+    uint32_t NumWords = NumBitWords(Size);
+
+    auto Src = ArrayRef<BitWord>(Bits, NumWords).drop_front(Count);
+    auto Dest = MutableArrayRef<BitWord>(Bits, NumWords).drop_back(Count);
+    assert(Dest.size() == Src.size());
+
+    std::memmove(Dest.begin(), Src.begin(), Dest.size() * sizeof(BitWord));
+    std::memset(Dest.end(), 0, Count * sizeof(BitWord));
+  }
+
   int next_unset_in_word(int WordIndex, BitWord Word) const {
     unsigned Result = WordIndex * BITWORD_SIZE + countTrailingOnes(Word);
     return Result < size() ? Result : -1;
Index: include/llvm/ADT/SmallBitVector.h
===================================================================
--- include/llvm/ADT/SmallBitVector.h
+++ include/llvm/ADT/SmallBitVector.h
@@ -117,9 +117,7 @@
   }
 
   // Return the size.
-  size_t getSmallSize() const {
-    return getSmallRawBits() >> SmallNumDataBits;
-  }
+  size_t getSmallSize() const { return getSmallRawBits() >> SmallNumDataBits; }
 
   void setSmallSize(size_t Size) {
     setSmallRawBits(getSmallBits() | (Size << SmallNumDataBits));
@@ -216,6 +214,16 @@
     return getPointer()->find_first();
   }
 
+  int find_last() const {
+    if (isSmall()) {
+      uintptr_t Bits = getSmallBits();
+      if (Bits == 0)
+        return -1;
+      return NumBaseBits - countLeadingZeros(Bits);
+    }
+    return getPointer()->find_last();
+  }
+
   /// Returns the index of the first unset bit, -1 if all of the bits are set.
   int find_first_unset() const {
     if (isSmall()) {
@@ -228,6 +236,17 @@
     return getPointer()->find_first_unset();
   }
 
+  int find_last_unset() const {
+    if (isSmall()) {
+      if (count() == getSmallSize())
+        return -1;
+
+      uintptr_t Bits = getSmallBits();
+      return NumBaseBits - countLeadingOnes(Bits);
+    }
+    return getPointer()->find_last_unset();
+  }
+
   /// Returns the index of the next set bit following the "Prev" bit.
   /// Returns -1 if the next set bit is not found.
   int find_next(unsigned Prev) const {
@@ -508,6 +527,22 @@
     return *this;
   }
 
+  SmallBitVector &operator<<=(unsigned N) {
+    if (isSmall())
+      setSmallBits(getSmallBits() << N);
+    else
+      getPointer()->operator<<=(N);
+    return *this;
+  }
+
+  SmallBitVector &operator>>=(unsigned N) {
+    if (isSmall())
+      setSmallBits(getSmallBits() >> N);
+    else
+      getPointer()->operator>>=(N);
+    return *this;
+  }
+
   // Assignment operator.
   const SmallBitVector &operator=(const SmallBitVector &RHS) {
     if (isSmall()) {
Index: include/llvm/CodeGen/GlobalISel/InstructionSelector.h
===================================================================
--- include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -18,20 +18,50 @@
 
 #include "llvm/ADT/Optional.h"
 #include <cstdint>
+#include <bitset>
 
 namespace llvm {
 class MachineInstr;
+class MachineFunction;
 class MachineOperand;
 class MachineRegisterInfo;
 class RegisterBankInfo;
 class TargetInstrInfo;
 class TargetRegisterInfo;
 
+/// Container class for CodeGen predicate results.
+/// This is convenient because std::bitset does not have a constructor
+/// with an initializer list of set bits.
+///
+/// Each InstructionSelector subclass should define a PredicateBitset class with:
+///   const unsigned MAX_SUBTARGET_PREDICATES = 192;
+///   using PredicateBitset = PredicateBitsetImpl<MAX_SUBTARGET_PREDICATES>;
+/// and updating the constant to suit the target. Tablegen provides a suitable
+/// definition for the predicates in use in <Target>GenGlobalISel.inc when
+/// GET_GLOBALISEL_PREDICATE_BITSET is defined.
+template <std::size_t MaxPredicates>
+class PredicateBitsetImpl : public std::bitset<MaxPredicates> {
+public:
+  // Cannot inherit constructors because it's not supported by VC++..
+  PredicateBitsetImpl() = default;
+
+  PredicateBitsetImpl(const std::bitset<MaxPredicates> &B)
+      : std::bitset<MaxPredicates>(B) {}
+
+  PredicateBitsetImpl(std::initializer_list<unsigned> Init) {
+    for (auto I : Init)
+      std::bitset<MaxPredicates>::set(I);
+  }
+};
+
 /// Provides the logic to select generic machine instructions.
 class InstructionSelector {
 public:
   virtual ~InstructionSelector() {}
 
+  /// This is executed before selecting a function.
+  virtual void beginFunction(const MachineFunction &MF) {}
+
   /// Select the (possibly generic) instruction \p I to only use target-specific
   /// opcodes. It is OK to insert multiple instructions, but they cannot be
   /// generic pre-isel instructions.
Index: include/llvm/CodeGen/MachineValueType.h
===================================================================
--- include/llvm/CodeGen/MachineValueType.h
+++ include/llvm/CodeGen/MachineValueType.h
@@ -28,155 +28,246 @@
   /// type can be represented by an MVT.
 class MVT {
   public:
-    enum SimpleValueType : int8_t {
-      // Simple value types less than zero are considered extended value types.
-      INVALID_SIMPLE_VALUE_TYPE = -1,
+    enum SimpleValueType : uint8_t {
+      // Simple value types that aren't explicitly part of this enumeration
+      // are considered extended value types.
+      INVALID_SIMPLE_VALUE_TYPE = 0,
 
       // If you change this numbering, you must change the values in
       // ValueTypes.td as well!
-      Other          =   0,   // This is a non-standard value
-      i1             =   1,   // This is a 1 bit integer value
-      i8             =   2,   // This is an 8 bit integer value
-      i16            =   3,   // This is a 16 bit integer value
-      i32            =   4,   // This is a 32 bit integer value
-      i64            =   5,   // This is a 64 bit integer value
-      i128           =   6,   // This is a 128 bit integer value
+      Other          =   1,   // This is a non-standard value
+      i1             =   2,   // This is a 1 bit integer value
+      i8             =   3,   // This is an 8 bit integer value
+      i16            =   4,   // This is a 16 bit integer value
+      i32            =   5,   // This is a 32 bit integer value
+      i64            =   6,   // This is a 64 bit integer value
+      i128           =   7,   // This is a 128 bit integer value
 
       FIRST_INTEGER_VALUETYPE = i1,
       LAST_INTEGER_VALUETYPE  = i128,
 
-      f16            =   7,   // This is a 16 bit floating point value
-      f32            =   8,   // This is a 32 bit floating point value
-      f64            =   9,   // This is a 64 bit floating point value
-      f80            =  10,   // This is a 80 bit floating point value
-      f128           =  11,   // This is a 128 bit floating point value
-      ppcf128        =  12,   // This is a PPC 128-bit floating point value
+      f16            =   8,   // This is a 16 bit floating point value
+      f32            =   9,   // This is a 32 bit floating point value
+      f64            =  10,   // This is a 64 bit floating point value
+      f80            =  11,   // This is a 80 bit floating point value
+      f128           =  12,   // This is a 128 bit floating point value
+      ppcf128        =  13,   // This is a PPC 128-bit floating point value
 
       FIRST_FP_VALUETYPE = f16,
       LAST_FP_VALUETYPE  = ppcf128,
 
-      v2i1           =  13,   //    2 x i1
-      v4i1           =  14,   //    4 x i1
-      v8i1           =  15,   //    8 x i1
-      v16i1          =  16,   //   16 x i1
-      v32i1          =  17,   //   32 x i1
-      v64i1          =  18,   //   64 x i1
-      v512i1         =  19,   //  512 x i1
-      v1024i1        =  20,   // 1024 x i1
-
-      v1i8           =  21,   //  1 x i8
-      v2i8           =  22,   //  2 x i8
-      v4i8           =  23,   //  4 x i8
-      v8i8           =  24,   //  8 x i8
-      v16i8          =  25,   // 16 x i8
-      v32i8          =  26,   // 32 x i8
-      v64i8          =  27,   // 64 x i8
-      v128i8         =  28,   //128 x i8
-      v256i8         =  29,   //256 x i8
-
-      v1i16          =  30,   //  1 x i16
-      v2i16          =  31,   //  2 x i16
-      v4i16          =  32,   //  4 x i16
-      v8i16          =  33,   //  8 x i16
-      v16i16         =  34,   // 16 x i16
-      v32i16         =  35,   // 32 x i16
-      v64i16         =  36,   // 64 x i16
-      v128i16        =  37,   //128 x i16
-
-      v1i32          =  38,   //  1 x i32
-      v2i32          =  39,   //  2 x i32
-      v4i32          =  40,   //  4 x i32
-      v8i32          =  41,   //  8 x i32
-      v16i32         =  42,   // 16 x i32
-      v32i32         =  43,   // 32 x i32
-      v64i32         =  44,   // 64 x i32
-
-      v1i64          =  45,   //  1 x i64
-      v2i64          =  46,   //  2 x i64
-      v4i64          =  47,   //  4 x i64
-      v8i64          =  48,   //  8 x i64
-      v16i64         =  49,   // 16 x i64
-      v32i64         =  50,   // 32 x i64
-
-      v1i128         =  51,   //  1 x i128
+      v2i1           =  14,   //    2 x i1
+      v4i1           =  15,   //    4 x i1
+      v8i1           =  16,   //    8 x i1
+      v16i1          =  17,   //   16 x i1
+      v32i1          =  18,   //   32 x i1
+      v64i1          =  19,   //   64 x i1
+      v512i1         =  20,   //  512 x i1
+      v1024i1        =  21,   // 1024 x i1
+
+      v1i8           =  22,   //  1 x i8
+      v2i8           =  23,   //  2 x i8
+      v4i8           =  24,   //  4 x i8
+      v8i8           =  25,   //  8 x i8
+      v16i8          =  26,   // 16 x i8
+      v32i8          =  27,   // 32 x i8
+      v64i8          =  28,   // 64 x i8
+      v128i8         =  29,   //128 x i8
+      v256i8         =  30,   //256 x i8
+
+      v1i16          =  31,   //  1 x i16
+      v2i16          =  32,   //  2 x i16
+      v4i16          =  33,   //  4 x i16
+      v8i16          =  34,   //  8 x i16
+      v16i16         =  35,   // 16 x i16
+      v32i16         =  36,   // 32 x i16
+      v64i16         =  37,   // 64 x i16
+      v128i16        =  38,   //128 x i16
+
+      v1i32          =  39,   //  1 x i32
+      v2i32          =  40,   //  2 x i32
+      v4i32          =  41,   //  4 x i32
+      v8i32          =  42,   //  8 x i32
+      v16i32         =  43,   // 16 x i32
+      v32i32         =  44,   // 32 x i32
+      v64i32         =  45,   // 64 x i32
+
+      v1i64          =  46,   //  1 x i64
+      v2i64          =  47,   //  2 x i64
+      v4i64          =  48,   //  4 x i64
+      v8i64          =  49,   //  8 x i64
+      v16i64         =  50,   // 16 x i64
+      v32i64         =  51,   // 32 x i64
+
+      v1i128         =  52,   //  1 x i128
+
+      // Scalable integer types
+      nxv2i1         =  53,   // n x  2 x i1
+      nxv4i1         =  54,   // n x  4 x i1
+      nxv8i1         =  55,   // n x  8 x i1
+      nxv16i1        =  56,   // n x 16 x i1
+      nxv32i1        =  57,   // n x 32 x i1
+
+      nxv1i8         =  58,   // n x  1 x i8
+      nxv2i8         =  59,   // n x  2 x i8
+      nxv4i8         =  60,   // n x  4 x i8
+      nxv8i8         =  61,   // n x  8 x i8
+      nxv16i8        =  62,   // n x 16 x i8
+      nxv32i8        =  63,   // n x 32 x i8
+
+      nxv1i16        =  64,   // n x  1 x i16
+      nxv2i16        =  65,   // n x  2 x i16
+      nxv4i16        =  66,   // n x  4 x i16
+      nxv8i16        =  67,   // n x  8 x i16
+      nxv16i16       =  68,   // n x 16 x i16
+      nxv32i16       =  69,   // n x 32 x i16
+
+      nxv1i32        =  70,   // n x  1 x i32
+      nxv2i32        =  71,   // n x  2 x i32
+      nxv4i32        =  72,   // n x  4 x i32
+      nxv8i32        =  73,   // n x  8 x i32
+      nxv16i32       =  74,   // n x 16 x i32
+      nxv32i32       =  75,   // n x 32 x i32
+
+      nxv1i64        =  76,   // n x  1 x i64
+      nxv2i64        =  77,   // n x  2 x i64
+      nxv4i64        =  78,   // n x  4 x i64
+      nxv8i64        =  79,   // n x  8 x i64
+      nxv16i64       =  80,   // n x 16 x i64
+      nxv32i64       =  81,   // n x 32 x i64
 
       FIRST_INTEGER_VECTOR_VALUETYPE = v2i1,
-      LAST_INTEGER_VECTOR_VALUETYPE = v1i128,
-
-      v2f16          =  52,   //  2 x f16
-      v4f16          =  53,   //  4 x f16
-      v8f16          =  54,   //  8 x f16
-      v1f32          =  55,   //  1 x f32
-      v2f32          =  56,   //  2 x f32
-      v4f32          =  57,   //  4 x f32
-      v8f32          =  58,   //  8 x f32
-      v16f32         =  59,   // 16 x f32
-      v1f64          =  60,   //  1 x f64
-      v2f64          =  61,   //  2 x f64
-      v4f64          =  62,   //  4 x f64
-      v8f64          =  63,   //  8 x f64
+      LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64,
+
+      FIRST_INTEGER_SCALABLE_VALUETYPE = nxv2i1,
+      LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64,
+
+      v2f16          =  82,   //  2 x f16
+      v4f16          =  83,   //  4 x f16
+      v8f16          =  84,   //  8 x f16
+      v1f32          =  85,   //  1 x f32
+      v2f32          =  86,   //  2 x f32
+      v4f32          =  87,   //  4 x f32
+      v8f32          =  88,   //  8 x f32
+      v16f32         =  89,   // 16 x f32
+      v1f64          =  90,   //  1 x f64
+      v2f64          =  91,   //  2 x f64
+      v4f64          =  92,   //  4 x f64
+      v8f64          =  93,   //  8 x f64
+
+      nxv2f16        =  94,   // n x  2 x f16
+      nxv4f16        =  95,   // n x  4 x f16
+      nxv8f16        =  96,   // n x  8 x f16
+      nxv1f32        =  97,   // n x  1 x f32
+      nxv2f32        =  98,   // n x  2 x f32
+      nxv4f32        =  99,   // n x  4 x f32
+      nxv8f32        = 100,   // n x  8 x f32
+      nxv16f32       = 101,   // n x 16 x f32
+      nxv1f64        = 102,   // n x  1 x f64
+      nxv2f64        = 103,   // n x  2 x f64
+      nxv4f64        = 104,   // n x  4 x f64
+      nxv8f64        = 105,   // n x  8 x f64
 
       FIRST_FP_VECTOR_VALUETYPE = v2f16,
-      LAST_FP_VECTOR_VALUETYPE = v8f64,
+      LAST_FP_VECTOR_VALUETYPE = nxv8f64,
+
+      FIRST_FP_SCALABLE_VALUETYPE = nxv2f16,
+      LAST_FP_SCALABLE_VALUETYPE = nxv8f64,
 
       FIRST_VECTOR_VALUETYPE = v2i1,
-      LAST_VECTOR_VALUETYPE  = v8f64,
+      LAST_VECTOR_VALUETYPE  = nxv8f64,
 
-      x86mmx         =  64,   // This is an X86 MMX value
+      x86mmx         =  106,   // This is an X86 MMX value
 
-      Glue           =  65,   // This glues nodes together during pre-RA sched
+      Glue           =  107,   // This glues nodes together during pre-RA sched
 
-      isVoid         =  66,   // This has no value
+      isVoid         =  108,   // This has no value
 
-      Untyped        =  67,   // This value takes a register, but has
-                              // unspecified type.  The register class
-                              // will be determined by the opcode.
+      Untyped        =  109,   // This value takes a register, but has
+                               // unspecified type.  The register class
+                               // will be determined by the opcode.
 
-      FIRST_VALUETYPE = 0,    // This is always the beginning of the list.
-      LAST_VALUETYPE =  68,   // This always remains at the end of the list.
+      FIRST_VALUETYPE = 1,     // This is always the beginning of the list.
+      LAST_VALUETYPE =  110,   // This always remains at the end of the list.
 
       // This is the current maximum for LAST_VALUETYPE.
       // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
       // This value must be a multiple of 32.
-      MAX_ALLOWED_VALUETYPE = 96,
+      MAX_ALLOWED_VALUETYPE = 128,
 
       // A value of type llvm::TokenTy
-      token          = 120,
+      token          = 248,
 
       // This is MDNode or MDString.
-      Metadata       = 121,
+      Metadata       = 249,
 
       // An int value the size of the pointer of the current
       // target to any address space. This must only be used internal to
       // tblgen. Other than for overloading, we treat iPTRAny the same as iPTR.
-      iPTRAny        = 122,
+      iPTRAny        = 250,
 
       // A vector with any length and element size. This is used
       // for intrinsics that have overloadings based on vector types.
       // This is only for tblgen's consumption!
-      vAny           = 123,
+      vAny           = 251,
 
       // Any floating-point or vector floating-point value. This is used
       // for intrinsics that have overloadings based on floating-point types.
       // This is only for tblgen's consumption!
-      fAny           = 124,
+      fAny           = 252,
 
       // An integer or vector integer value of any bit width. This is
       // used for intrinsics that have overloadings based on integer bit widths.
       // This is only for tblgen's consumption!
-      iAny           = 125,
+      iAny           = 253,
 
       // An int value the size of the pointer of the current
       // target.  This should only be used internal to tblgen!
-      iPTR           = 126,
+      iPTR           = 254,
 
       // Any type. This is used for intrinsics that have overloadings.
       // This is only for tblgen's consumption!
-      Any            = 127
+      Any            = 255
     };
 
     SimpleValueType SimpleTy;
 
+
+    // A class to represent the number of elements in a vector
+    //
+    // For fixed-length vectors, the total number of elements is equal to 'Min'
+    // For scalable vectors, the total number of elements is a multiple of 'Min'
+    class ElementCount {
+    public:
+      unsigned Min;
+      bool Scalable;
+
+      ElementCount(unsigned Min, bool Scalable)
+      : Min(Min), Scalable(Scalable) {}
+
+      ElementCount operator*(unsigned RHS) {
+        return { Min * RHS, Scalable };
+      }
+
+      ElementCount& operator*=(unsigned RHS) {
+        Min *= RHS;
+        return *this;
+      }
+
+      ElementCount operator/(unsigned RHS) {
+        return { Min / RHS, Scalable };
+      }
+
+      ElementCount& operator/=(unsigned RHS) {
+        Min /= RHS;
+        return *this;
+      }
+
+      bool operator==(const ElementCount& RHS) {
+        return Min == RHS.Min && Scalable == RHS.Scalable;
+      }
+    };
+
     constexpr MVT() : SimpleTy(INVALID_SIMPLE_VALUE_TYPE) {}
     constexpr MVT(SimpleValueType SVT) : SimpleTy(SVT) {}
 
@@ -221,6 +312,15 @@
               SimpleTy <= MVT::LAST_VECTOR_VALUETYPE);
     }
 
+    /// Return true if this is a vector value type where the
+    /// runtime length is machine dependent
+    bool isScalableVector() const {
+      return ((SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VALUETYPE &&
+               SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VALUETYPE) ||
+              (SimpleTy >= MVT::FIRST_FP_SCALABLE_VALUETYPE &&
+               SimpleTy <= MVT::LAST_FP_SCALABLE_VALUETYPE));
+    }
+
     /// Return true if this is a 16-bit vector type.
     bool is16BitVector() const {
       return (SimpleTy == MVT::v2i8  || SimpleTy == MVT::v1i16 ||
@@ -318,7 +418,12 @@
       case v32i1:
       case v64i1:
       case v512i1:
-      case v1024i1: return i1;
+      case v1024i1:
+      case nxv2i1:
+      case nxv4i1:
+      case nxv8i1:
+      case nxv16i1:
+      case nxv32i1: return i1;
       case v1i8:
       case v2i8:
       case v4i8:
@@ -327,7 +432,13 @@
       case v32i8:
       case v64i8:
       case v128i8:
-      case v256i8: return i8;
+      case v256i8:
+      case nxv1i8:
+      case nxv2i8:
+      case nxv4i8:
+      case nxv8i8:
+      case nxv16i8:
+      case nxv32i8: return i8;
       case v1i16:
       case v2i16:
       case v4i16:
@@ -335,33 +446,63 @@
       case v16i16:
       case v32i16:
       case v64i16:
-      case v128i16: return i16;
+      case v128i16:
+      case nxv1i16:
+      case nxv2i16:
+      case nxv4i16:
+      case nxv8i16:
+      case nxv16i16:
+      case nxv32i16: return i16;
       case v1i32:
       case v2i32:
       case v4i32:
       case v8i32:
       case v16i32:
       case v32i32:
-      case v64i32: return i32;
+      case v64i32:
+      case nxv1i32:
+      case nxv2i32:
+      case nxv4i32:
+      case nxv8i32:
+      case nxv16i32:
+      case nxv32i32: return i32;
       case v1i64:
       case v2i64:
       case v4i64:
       case v8i64:
       case v16i64:
-      case v32i64: return i64;
+      case v32i64:
+      case nxv1i64:
+      case nxv2i64:
+      case nxv4i64:
+      case nxv8i64:
+      case nxv16i64:
+      case nxv32i64: return i64;
       case v1i128: return i128;
       case v2f16:
       case v4f16:
-      case v8f16: return f16;
+      case v8f16:
+      case nxv2f16:
+      case nxv4f16:
+      case nxv8f16: return f16;
       case v1f32:
       case v2f32:
       case v4f32:
       case v8f32:
-      case v16f32: return f32;
+      case v16f32:
+      case nxv1f32:
+      case nxv2f32:
+      case nxv4f32:
+      case nxv8f32:
+      case nxv16f32: return f32;
       case v1f64:
       case v2f64:
       case v4f64:
-      case v8f64: return f64;
+      case v8f64:
+      case nxv1f64:
+      case nxv2f64:
+      case nxv4f64:
+      case nxv8f64: return f64;
       }
     }
 
@@ -382,13 +523,24 @@
       case v32i8:
       case v32i16:
       case v32i32:
-      case v32i64: return 32;
+      case v32i64:
+      case nxv32i1:
+      case nxv32i8:
+      case nxv32i16:
+      case nxv32i32:
+      case nxv32i64: return 32;
       case v16i1:
       case v16i8:
       case v16i16:
       case v16i32:
       case v16i64:
-      case v16f32: return 16;
+      case v16f32:
+      case nxv16i1:
+      case nxv16i8:
+      case nxv16i16:
+      case nxv16i32:
+      case nxv16i64:
+      case nxv16f32: return 16;
       case v8i1:
       case v8i8:
       case v8i16:
@@ -396,7 +548,15 @@
       case v8i64:
       case v8f16:
       case v8f32:
-      case v8f64: return 8;
+      case v8f64:
+      case nxv8i1:
+      case nxv8i8:
+      case nxv8i16:
+      case nxv8i32:
+      case nxv8i64:
+      case nxv8f16:
+      case nxv8f32:
+      case nxv8f64: return 8;
       case v4i1:
       case v4i8:
       case v4i16:
@@ -404,7 +564,15 @@
       case v4i64:
       case v4f16:
       case v4f32:
-      case v4f64: return 4;
+      case v4f64:
+      case nxv4i1:
+      case nxv4i8:
+      case nxv4i16:
+      case nxv4i32:
+      case nxv4i64:
+      case nxv4f16:
+      case nxv4f32:
+      case nxv4f64: return 4;
       case v2i1:
       case v2i8:
       case v2i16:
@@ -412,17 +580,35 @@
       case v2i64:
       case v2f16:
       case v2f32:
-      case v2f64: return 2;
+      case v2f64:
+      case nxv2i1:
+      case nxv2i8:
+      case nxv2i16:
+      case nxv2i32:
+      case nxv2i64:
+      case nxv2f16:
+      case nxv2f32:
+      case nxv2f64: return 2;
       case v1i8:
       case v1i16:
       case v1i32:
       case v1i64:
       case v1i128:
       case v1f32:
-      case v1f64: return 1;
+      case v1f64:
+      case nxv1i8:
+      case nxv1i16:
+      case nxv1i32:
+      case nxv1i64:
+      case nxv1f32:
+      case nxv1f64: return 1;
       }
     }
 
+    MVT::ElementCount getVectorElementCount() const {
+      return { getVectorNumElements(), isScalableVector() };
+    }
+
     unsigned getSizeInBits() const {
       switch (SimpleTy) {
       default:
@@ -443,16 +629,23 @@
       case Metadata:
         llvm_unreachable("Value type is metadata.");
       case i1  :  return 1;
-      case v2i1:  return 2;
-      case v4i1:  return 4;
+      case v2i1:
+      case nxv2i1: return 2;
+      case v4i1:
+      case nxv4i1: return 4;
       case i8  :
       case v1i8:
-      case v8i1: return 8;
+      case v8i1:
+      case nxv1i8:
+      case nxv8i1: return 8;
       case i16 :
       case f16:
       case v16i1:
       case v2i8:
-      case v1i16: return 16;
+      case v1i16:
+      case nxv16i1:
+      case nxv2i8:
+      case nxv1i16: return 16;
       case f32 :
       case i32 :
       case v32i1:
@@ -460,7 +653,13 @@
       case v2i16:
       case v2f16:
       case v1f32:
-      case v1i32: return 32;
+      case v1i32:
+      case nxv32i1:
+      case nxv4i8:
+      case nxv2i16:
+      case nxv1i32:
+      case nxv2f16:
+      case nxv1f32: return 32;
       case x86mmx:
       case f64 :
       case i64 :
@@ -471,7 +670,14 @@
       case v1i64:
       case v4f16:
       case v2f32:
-      case v1f64: return 64;
+      case v1f64:
+      case nxv8i8:
+      case nxv4i16:
+      case nxv2i32:
+      case nxv1i64:
+      case nxv4f16:
+      case nxv2f32:
+      case nxv1f64: return 64;
       case f80 :  return 80;
       case f128:
       case ppcf128:
@@ -483,29 +689,50 @@
       case v1i128:
       case v8f16:
       case v4f32:
-      case v2f64: return 128;
+      case v2f64:
+      case nxv16i8:
+      case nxv8i16:
+      case nxv4i32:
+      case nxv2i64:
+      case nxv8f16:
+      case nxv4f32:
+      case nxv2f64: return 128;
       case v32i8:
       case v16i16:
       case v8i32:
       case v4i64:
       case v8f32:
-      case v4f64: return 256;
+      case v4f64:
+      case nxv32i8:
+      case nxv16i16:
+      case nxv8i32:
+      case nxv4i64:
+      case nxv8f32:
+      case nxv4f64: return 256;
       case v512i1:
       case v64i8:
       case v32i16:
       case v16i32:
       case v8i64:
       case v16f32:
-      case v8f64: return 512;
+      case v8f64:
+      case nxv32i16:
+      case nxv16i32:
+      case nxv8i64:
+      case nxv16f32:
+      case nxv8f64: return 512;
       case v1024i1:
       case v128i8:
       case v64i16:
       case v32i32:
-      case v16i64: return 1024;
+      case v16i64:
+      case nxv32i32:
+      case nxv16i64: return 1024;
       case v256i8:
       case v128i16:
       case v64i32:
-      case v32i64: return 2048;
+      case v32i64:
+      case nxv32i64: return 2048;
       }
     }
 
@@ -659,6 +886,83 @@
       return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
     }
 
+    static MVT getScalableVectorVT(MVT VT, unsigned NumElements) {
+      switch(VT.SimpleTy) {
+        default:
+          break;
+        case MVT::i1:
+          if (NumElements == 2)  return MVT::nxv2i1;
+          if (NumElements == 4)  return MVT::nxv4i1;
+          if (NumElements == 8)  return MVT::nxv8i1;
+          if (NumElements == 16) return MVT::nxv16i1;
+          if (NumElements == 32) return MVT::nxv32i1;
+          break;
+        case MVT::i8:
+          if (NumElements == 1)  return MVT::nxv1i8;
+          if (NumElements == 2)  return MVT::nxv2i8;
+          if (NumElements == 4)  return MVT::nxv4i8;
+          if (NumElements == 8)  return MVT::nxv8i8;
+          if (NumElements == 16) return MVT::nxv16i8;
+          if (NumElements == 32) return MVT::nxv32i8;
+          break;
+        case MVT::i16:
+          if (NumElements == 1)  return MVT::nxv1i16;
+          if (NumElements == 2)  return MVT::nxv2i16;
+          if (NumElements == 4)  return MVT::nxv4i16;
+          if (NumElements == 8)  return MVT::nxv8i16;
+          if (NumElements == 16) return MVT::nxv16i16;
+          if (NumElements == 32) return MVT::nxv32i16;
+          break;
+        case MVT::i32:
+          if (NumElements == 1)  return MVT::nxv1i32;
+          if (NumElements == 2)  return MVT::nxv2i32;
+          if (NumElements == 4)  return MVT::nxv4i32;
+          if (NumElements == 8)  return MVT::nxv8i32;
+          if (NumElements == 16) return MVT::nxv16i32;
+          if (NumElements == 32) return MVT::nxv32i32;
+          break;
+        case MVT::i64:
+          if (NumElements == 1)  return MVT::nxv1i64;
+          if (NumElements == 2)  return MVT::nxv2i64;
+          if (NumElements == 4)  return MVT::nxv4i64;
+          if (NumElements == 8)  return MVT::nxv8i64;
+          if (NumElements == 16) return MVT::nxv16i64;
+          if (NumElements == 32) return MVT::nxv32i64;
+          break;
+        case MVT::f16:
+          if (NumElements == 2)  return MVT::nxv2f16;
+          if (NumElements == 4)  return MVT::nxv4f16;
+          if (NumElements == 8)  return MVT::nxv8f16;
+          break;
+        case MVT::f32:
+          if (NumElements == 1)  return MVT::nxv1f32;
+          if (NumElements == 2)  return MVT::nxv2f32;
+          if (NumElements == 4)  return MVT::nxv4f32;
+          if (NumElements == 8)  return MVT::nxv8f32;
+          if (NumElements == 16) return MVT::nxv16f32;
+          break;
+        case MVT::f64:
+          if (NumElements == 1)  return MVT::nxv1f64;
+          if (NumElements == 2)  return MVT::nxv2f64;
+          if (NumElements == 4)  return MVT::nxv4f64;
+          if (NumElements == 8)  return MVT::nxv8f64;
+          break;
+      }
+      return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
+    }
+
+    static MVT getVectorVT(MVT VT, unsigned NumElements, bool IsScalable) {
+      if (IsScalable)
+        return getScalableVectorVT(VT, NumElements);
+      return getVectorVT(VT, NumElements);
+    }
+
+    static MVT getVectorVT(MVT VT, MVT::ElementCount EC) {
+      if (EC.Scalable)
+        return getScalableVectorVT(VT, EC.Min);
+      return getVectorVT(VT, EC.Min);
+    }
+
     /// Return the value type corresponding to the specified type.  This returns
     /// all pointers as iPTR.  If HandleUnknown is true, unknown types are
     /// returned as Other, otherwise they are invalid.
@@ -709,6 +1013,14 @@
           MVT::FIRST_FP_VECTOR_VALUETYPE,
           (MVT::SimpleValueType)(MVT::LAST_FP_VECTOR_VALUETYPE + 1));
     }
+    static mvt_range integer_scalable_vector_valuetypes() {
+      return mvt_range(MVT::FIRST_INTEGER_SCALABLE_VALUETYPE,
+              (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VALUETYPE + 1));
+    }
+    static mvt_range fp_scalable_vector_valuetypes() {
+      return mvt_range(MVT::FIRST_FP_SCALABLE_VALUETYPE,
+                   (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VALUETYPE + 1));
+    }
     /// @}
   };
 
Index: include/llvm/CodeGen/ValueTypes.h
===================================================================
--- include/llvm/CodeGen/ValueTypes.h
+++ include/llvm/CodeGen/ValueTypes.h
@@ -44,7 +44,7 @@
     bool operator!=(EVT VT) const {
       if (V.SimpleTy != VT.V.SimpleTy)
         return true;
-      if (V.SimpleTy < 0)
+      if (V.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
         return LLVMTy != VT.LLVMTy;
       return false;
     }
@@ -60,31 +60,48 @@
     /// bits.
     static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth) {
       MVT M = MVT::getIntegerVT(BitWidth);
-      if (M.SimpleTy >= 0)
+      if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
         return M;
       return getExtendedIntegerVT(Context, BitWidth);
     }
 
     /// Returns the EVT that represents a vector NumElements in length, where
     /// each element is of type VT.
-    static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements) {
-      MVT M = MVT::getVectorVT(VT.V, NumElements);
-      if (M.SimpleTy >= 0)
+    static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements,
+                           bool IsScalable = false) {
+      MVT M = MVT::getVectorVT(VT.V, NumElements, IsScalable);
+      if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
         return M;
+
+      assert(!IsScalable && "We don't support extended scalable types yet");
       return getExtendedVectorVT(Context, VT, NumElements);
     }
 
+    /// Returns the EVT that represents a vector EC.Min elements in length,
+    /// where each element is of type VT.
+    static EVT getVectorVT(LLVMContext &Context, EVT VT, MVT::ElementCount EC) {
+      MVT M = MVT::getVectorVT(VT.V, EC);
+      if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
+        return M;
+      assert (!EC.Scalable && "We don't support extended scalable types yet");
+      return getExtendedVectorVT(Context, VT, EC.Min);
+    }
+
     /// Return a vector with the same number of elements as this vector, but
     /// with the element type converted to an integer type with the same
     /// bitwidth.
     EVT changeVectorElementTypeToInteger() const {
-      if (!isSimple())
+      if (!isSimple()) {
+        assert (!isScalableVector() &&
+                "We don't support extended scalable types yet");
         return changeExtendedVectorElementTypeToInteger();
+      }
       MVT EltTy = getSimpleVT().getVectorElementType();
       unsigned BitWidth = EltTy.getSizeInBits();
       MVT IntTy = MVT::getIntegerVT(BitWidth);
-      MVT VecTy = MVT::getVectorVT(IntTy, getVectorNumElements());
-      assert(VecTy.SimpleTy >= 0 &&
+      MVT VecTy = MVT::getVectorVT(IntTy, getVectorNumElements(),
+                                   isScalableVector());
+      assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE &&
              "Simple vector VT not representable by simple integer vector VT!");
       return VecTy;
     }
@@ -104,7 +121,7 @@
 
     /// Test if the given EVT is simple (as opposed to being extended).
     bool isSimple() const {
-      return V.SimpleTy >= 0;
+      return V.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE;
     }
 
     /// Test if the given EVT is extended (as opposed to being simple).
@@ -132,6 +149,17 @@
       return isSimple() ? V.isVector() : isExtendedVector();
     }
 
+    /// Return true if this is a vector type where the runtime
+    /// length is machine dependent
+    bool isScalableVector() const {
+      // FIXME: We don't support extended scalable types yet, because the
+      // matching IR type doesn't exist. Once it has been added, this can
+      // be changed to call isExtendedScalableVector.
+      if (!isSimple())
+        return false;
+      return V.isScalableVector();
+    }
+
     /// Return true if this is a 16-bit vector type.
     bool is16BitVector() const {
       return isSimple() ? V.is16BitVector() : isExtended16BitVector();
@@ -247,6 +275,17 @@
       return getExtendedVectorNumElements();
     }
 
+    // Given a (possibly scalable) vector type, return the ElementCount
+    MVT::ElementCount getVectorElementCount() const {
+      assert((isVector()) && "Invalid vector type!");
+      if (isSimple())
+        return V.getVectorElementCount();
+
+      assert(!isScalableVector() &&
+             "We don't support extended scalable types yet");
+      return {getExtendedVectorNumElements(), false};
+    }
+
     /// Return the size of the specified value type in bits.
     unsigned getSizeInBits() const {
       if (isSimple())
@@ -301,7 +340,7 @@
     EVT widenIntegerVectorElementType(LLVMContext &Context) const {
       EVT EltVT = getVectorElementType();
       EltVT = EVT::getIntegerVT(Context, 2 * EltVT.getSizeInBits());
-      return EVT::getVectorVT(Context, EltVT, getVectorNumElements());
+      return EVT::getVectorVT(Context, EltVT, getVectorElementCount());
     }
 
     // Return a VT for a vector type with the same element type but
@@ -309,9 +348,8 @@
     // extended type.
     EVT getHalfNumVectorElementsVT(LLVMContext &Context) const {
       EVT EltVT = getVectorElementType();
-      auto EltCnt = getVectorNumElements();
-      assert(!(getVectorNumElements() & 1) &&
-             "Splitting vector, but not in half!");
+      auto EltCnt = getVectorElementCount();
+      assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!");
       return EVT::getVectorVT(Context, EltVT, EltCnt / 2);
     }
 
@@ -327,7 +365,8 @@
       if (!isPow2VectorType()) {
         unsigned NElts = getVectorNumElements();
         unsigned Pow2NElts = 1 <<  Log2_32_Ceil(NElts);
-        return EVT::getVectorVT(Context, getVectorElementType(), Pow2NElts);
+        return EVT::getVectorVT(Context, getVectorElementType(), Pow2NElts,
+                                isScalableVector());
       }
       else {
         return *this;
Index: include/llvm/CodeGen/ValueTypes.td
===================================================================
--- include/llvm/CodeGen/ValueTypes.td
+++ include/llvm/CodeGen/ValueTypes.td
@@ -19,101 +19,147 @@
   int Value = value;
 }
 
-def OtherVT: ValueType<0  ,  0>;   // "Other" value
-def i1     : ValueType<1  ,  1>;   // One bit boolean value
-def i8     : ValueType<8  ,  2>;   // 8-bit integer value
-def i16    : ValueType<16 ,  3>;   // 16-bit integer value
-def i32    : ValueType<32 ,  4>;   // 32-bit integer value
-def i64    : ValueType<64 ,  5>;   // 64-bit integer value
-def i128   : ValueType<128,  6>;   // 128-bit integer value
-def f16    : ValueType<16 ,  7>;   // 16-bit floating point value
-def f32    : ValueType<32 ,  8>;   // 32-bit floating point value
-def f64    : ValueType<64 ,  9>;   // 64-bit floating point value
-def f80    : ValueType<80 , 10>;   // 80-bit floating point value
-def f128   : ValueType<128, 11>;   // 128-bit floating point value
-def ppcf128: ValueType<128, 12>;   // PPC 128-bit floating point value
-
-def v2i1   : ValueType<2 ,  13>;   //   2 x i1 vector value
-def v4i1   : ValueType<4 ,  14>;   //   4 x i1 vector value
-def v8i1   : ValueType<8 ,  15>;   //   8 x i1 vector value
-def v16i1  : ValueType<16,  16>;   //  16 x i1 vector value
-def v32i1  : ValueType<32 , 17>;   //  32 x i1 vector value
-def v64i1  : ValueType<64 , 18>;   //  64 x i1 vector value
-def v512i1 : ValueType<512, 19>;   // 512 x i1 vector value
-def v1024i1: ValueType<1024,20>;   //1024 x i1 vector value
-
-def v1i8   : ValueType<16,  21>;   //  1 x i8  vector value
-def v2i8   : ValueType<16 , 22>;   //  2 x i8  vector value
-def v4i8   : ValueType<32 , 23>;   //  4 x i8  vector value
-def v8i8   : ValueType<64 , 24>;   //  8 x i8  vector value
-def v16i8  : ValueType<128, 25>;   // 16 x i8  vector value
-def v32i8  : ValueType<256, 26>;   // 32 x i8  vector value
-def v64i8  : ValueType<512, 27>;   // 64 x i8  vector value
-def v128i8 : ValueType<1024,28>;   //128 x i8  vector value
-def v256i8 : ValueType<2048,29>;   //256 x i8  vector value
-
-def v1i16  : ValueType<16 , 30>;   //  1 x i16 vector value
-def v2i16  : ValueType<32 , 31>;   //  2 x i16 vector value
-def v4i16  : ValueType<64 , 32>;   //  4 x i16 vector value
-def v8i16  : ValueType<128, 33>;   //  8 x i16 vector value
-def v16i16 : ValueType<256, 34>;   // 16 x i16 vector value
-def v32i16 : ValueType<512, 35>;   // 32 x i16 vector value
-def v64i16 : ValueType<1024,36>;   // 64 x i16 vector value
-def v128i16: ValueType<2048,37>;   //128 x i16 vector value
-
-def v1i32  : ValueType<32 , 38>;   //  1 x i32 vector value
-def v2i32  : ValueType<64 , 39>;   //  2 x i32 vector value
-def v4i32  : ValueType<128, 40>;   //  4 x i32 vector value
-def v8i32  : ValueType<256, 41>;   //  8 x i32 vector value
-def v16i32 : ValueType<512, 42>;   // 16 x i32 vector value
-def v32i32 : ValueType<1024,43>;   // 32 x i32 vector value
-def v64i32 : ValueType<2048,44>;   // 32 x i32 vector value
-
-def v1i64  : ValueType<64 , 45>;   //  1 x i64 vector value
-def v2i64  : ValueType<128, 46>;   //  2 x i64 vector value
-def v4i64  : ValueType<256, 47>;   //  4 x i64 vector value
-def v8i64  : ValueType<512, 48>;   //  8 x i64 vector value
-def v16i64 : ValueType<1024,49>;   // 16 x i64 vector value
-def v32i64 : ValueType<2048,50>;   // 32 x i64 vector value
-
-def v1i128 : ValueType<128, 51>;   //  1 x i128 vector value
-
-def v2f16  : ValueType<32 , 52>;   //  2 x f16 vector value
-def v4f16  : ValueType<64 , 53>;   //  4 x f16 vector value
-def v8f16  : ValueType<128, 54>;   //  8 x f16 vector value
-def v1f32  : ValueType<32 , 55>;   //  1 x f32 vector value
-def v2f32  : ValueType<64 , 56>;   //  2 x f32 vector value
-def v4f32  : ValueType<128, 57>;   //  4 x f32 vector value
-def v8f32  : ValueType<256, 58>;   //  8 x f32 vector value
-def v16f32 : ValueType<512, 59>;   // 16 x f32 vector value
-def v1f64  : ValueType<64,  60>;   //  1 x f64 vector value
-def v2f64  : ValueType<128, 61>;   //  2 x f64 vector value
-def v4f64  : ValueType<256, 62>;   //  4 x f64 vector value
-def v8f64  : ValueType<512, 63>;   //  8 x f64 vector value
-
-
-def x86mmx : ValueType<64 , 64>;   // X86 MMX value
-def FlagVT : ValueType<0  , 65>;   // Pre-RA sched glue
-def isVoid : ValueType<0  , 66>;   // Produces no value
-def untyped: ValueType<8  , 67>;   // Produces an untyped value
-def token  : ValueType<0  , 120>;  // TokenTy
-def MetadataVT: ValueType<0, 121>; // Metadata
+def OtherVT: ValueType<0  ,  1>;   // "Other" value
+def i1     : ValueType<1  ,  2>;   // One bit boolean value
+def i8     : ValueType<8  ,  3>;   // 8-bit integer value
+def i16    : ValueType<16 ,  4>;   // 16-bit integer value
+def i32    : ValueType<32 ,  5>;   // 32-bit integer value
+def i64    : ValueType<64 ,  6>;   // 64-bit integer value
+def i128   : ValueType<128,  7>;   // 128-bit integer value
+def f16    : ValueType<16 ,  8>;   // 16-bit floating point value
+def f32    : ValueType<32 ,  9>;   // 32-bit floating point value
+def f64    : ValueType<64 , 10>;   // 64-bit floating point value
+def f80    : ValueType<80 , 11>;   // 80-bit floating point value
+def f128   : ValueType<128, 12>;   // 128-bit floating point value
+def ppcf128: ValueType<128, 13>;   // PPC 128-bit floating point value
+
+def v2i1   : ValueType<2 ,  14>;   //   2 x i1 vector value
+def v4i1   : ValueType<4 ,  15>;   //   4 x i1 vector value
+def v8i1   : ValueType<8 ,  16>;   //   8 x i1 vector value
+def v16i1  : ValueType<16,  17>;   //  16 x i1 vector value
+def v32i1  : ValueType<32 , 18>;   //  32 x i1 vector value
+def v64i1  : ValueType<64 , 19>;   //  64 x i1 vector value
+def v512i1 : ValueType<512, 20>;   // 512 x i1 vector value
+def v1024i1: ValueType<1024,21>;   //1024 x i1 vector value
+
+def v1i8   : ValueType<16,  22>;   //  1 x i8  vector value
+def v2i8   : ValueType<16 , 23>;   //  2 x i8  vector value
+def v4i8   : ValueType<32 , 24>;   //  4 x i8  vector value
+def v8i8   : ValueType<64 , 25>;   //  8 x i8  vector value
+def v16i8  : ValueType<128, 26>;   // 16 x i8  vector value
+def v32i8  : ValueType<256, 27>;   // 32 x i8  vector value
+def v64i8  : ValueType<512, 28>;   // 64 x i8  vector value
+def v128i8 : ValueType<1024,29>;   //128 x i8  vector value
+def v256i8 : ValueType<2048,30>;   //256 x i8  vector value
+
+def v1i16  : ValueType<16 , 31>;   //  1 x i16 vector value
+def v2i16  : ValueType<32 , 32>;   //  2 x i16 vector value
+def v4i16  : ValueType<64 , 33>;   //  4 x i16 vector value
+def v8i16  : ValueType<128, 34>;   //  8 x i16 vector value
+def v16i16 : ValueType<256, 35>;   // 16 x i16 vector value
+def v32i16 : ValueType<512, 36>;   // 32 x i16 vector value
+def v64i16 : ValueType<1024,37>;   // 64 x i16 vector value
+def v128i16: ValueType<2048,38>;   //128 x i16 vector value
+
+def v1i32  : ValueType<32 , 39>;   //  1 x i32 vector value
+def v2i32  : ValueType<64 , 40>;   //  2 x i32 vector value
+def v4i32  : ValueType<128, 41>;   //  4 x i32 vector value
+def v8i32  : ValueType<256, 42>;   //  8 x i32 vector value
+def v16i32 : ValueType<512, 43>;   // 16 x i32 vector value
+def v32i32 : ValueType<1024,44>;   // 32 x i32 vector value
+def v64i32 : ValueType<2048,45>;   // 32 x i32 vector value
+
+def v1i64  : ValueType<64 , 46>;   //  1 x i64 vector value
+def v2i64  : ValueType<128, 47>;   //  2 x i64 vector value
+def v4i64  : ValueType<256, 48>;   //  4 x i64 vector value
+def v8i64  : ValueType<512, 49>;   //  8 x i64 vector value
+def v16i64 : ValueType<1024,50>;   // 16 x i64 vector value
+def v32i64 : ValueType<2048,51>;   // 32 x i64 vector value
+
+def v1i128 : ValueType<128, 52>;   //  1 x i128 vector value
+
+def nxv2i1  : ValueType<2,   53>;  // n x  2 x i1  vector value
+def nxv4i1  : ValueType<4,   54>;  // n x  4 x i1  vector value
+def nxv8i1  : ValueType<8,   55>;  // n x  8 x i1  vector value
+def nxv16i1 : ValueType<16,  56>;  // n x 16 x i1  vector value
+def nxv32i1 : ValueType<32,  57>;  // n x 32 x i1  vector value
+
+def nxv1i8  : ValueType<8,   58>;  // n x  1 x i8  vector value
+def nxv2i8  : ValueType<16,  59>;  // n x  2 x i8  vector value
+def nxv4i8  : ValueType<32,  60>;  // n x  4 x i8  vector value
+def nxv8i8  : ValueType<64,  61>;  // n x  8 x i8  vector value
+def nxv16i8 : ValueType<128, 62>;  // n x 16 x i8  vector value
+def nxv32i8 : ValueType<256, 63>;  // n x 32 x i8  vector value
+
+def nxv1i16 : ValueType<16,  64>;  // n x  1 x i16 vector value
+def nxv2i16 : ValueType<32,  65>;  // n x  2 x i16 vector value
+def nxv4i16 : ValueType<64,  66>;  // n x  4 x i16 vector value
+def nxv8i16 : ValueType<128, 67>;  // n x  8 x i16 vector value
+def nxv16i16: ValueType<256, 68>;  // n x 16 x i16 vector value
+def nxv32i16: ValueType<512, 69>;  // n x 32 x i16 vector value
+
+def nxv1i32 : ValueType<32,  70>;  // n x  1 x i32 vector value
+def nxv2i32 : ValueType<64,  71>;  // n x  2 x i32 vector value
+def nxv4i32 : ValueType<128, 72>;  // n x  4 x i32 vector value
+def nxv8i32 : ValueType<256, 73>;  // n x  8 x i32 vector value
+def nxv16i32: ValueType<512, 74>;  // n x 16 x i32 vector value
+def nxv32i32: ValueType<1024,75>;  // n x 32 x i32 vector value
+
+def nxv1i64 : ValueType<64,  76>;  // n x  1 x i64 vector value
+def nxv2i64 : ValueType<128, 77>;  // n x  2 x i64 vector value
+def nxv4i64 : ValueType<256, 78>;  // n x  4 x i64 vector value
+def nxv8i64 : ValueType<512, 79>;  // n x  8 x i64 vector value
+def nxv16i64: ValueType<1024,80>;  // n x 16 x i64 vector value
+def nxv32i64: ValueType<2048,81>;  // n x 32 x i64 vector value
+
+def v2f16  : ValueType<32 , 82>;   //  2 x f16 vector value
+def v4f16  : ValueType<64 , 83>;   //  4 x f16 vector value
+def v8f16  : ValueType<128, 84>;   //  8 x f16 vector value
+def v1f32  : ValueType<32 , 85>;   //  1 x f32 vector value
+def v2f32  : ValueType<64 , 86>;   //  2 x f32 vector value
+def v4f32  : ValueType<128, 87>;   //  4 x f32 vector value
+def v8f32  : ValueType<256, 88>;   //  8 x f32 vector value
+def v16f32 : ValueType<512, 89>;   // 16 x f32 vector value
+def v1f64  : ValueType<64,  90>;   //  1 x f64 vector value
+def v2f64  : ValueType<128, 91>;   //  2 x f64 vector value
+def v4f64  : ValueType<256, 92>;   //  4 x f64 vector value
+def v8f64  : ValueType<512, 93>;   //  8 x f64 vector value
+
+def nxv2f16  : ValueType<32 ,  94>; // n x  2 x f16 vector value
+def nxv4f16  : ValueType<64 ,  95>; // n x  4 x f16 vector value
+def nxv8f16  : ValueType<128,  96>; // n x  8 x f16 vector value
+def nxv1f32  : ValueType<32 ,  97>; // n x  1 x f32 vector value
+def nxv2f32  : ValueType<64 ,  98>; // n x  2 x f32 vector value
+def nxv4f32  : ValueType<128,  99>; // n x  4 x f32 vector value
+def nxv8f32  : ValueType<256, 100>; // n x  8 x f32 vector value
+def nxv16f32 : ValueType<512, 101>; // n x 16 x f32 vector value
+def nxv1f64  : ValueType<64,  102>; // n x  1 x f64 vector value
+def nxv2f64  : ValueType<128, 103>; // n x  2 x f64 vector value
+def nxv4f64  : ValueType<256, 104>; // n x  4 x f64 vector value
+def nxv8f64  : ValueType<512, 105>; // n x  8 x f64 vector value
+
+def x86mmx : ValueType<64 , 106>;   // X86 MMX value
+def FlagVT : ValueType<0  , 107>;   // Pre-RA sched glue
+def isVoid : ValueType<0  , 108>;   // Produces no value
+def untyped: ValueType<8  , 109>;   // Produces an untyped value
+def token  : ValueType<0  , 248>;   // TokenTy
+def MetadataVT: ValueType<0, 249>;  // Metadata
 
 // Pseudo valuetype mapped to the current pointer size to any address space.
 // Should only be used in TableGen.
-def iPTRAny   : ValueType<0, 122>;
+def iPTRAny   : ValueType<0, 250>;
 
 // Pseudo valuetype to represent "vector of any size"
-def vAny   : ValueType<0  , 123>;
+def vAny   : ValueType<0  , 251>;
 
 // Pseudo valuetype to represent "float of any format"
-def fAny   : ValueType<0  , 124>;
+def fAny   : ValueType<0  , 252>;
 
 // Pseudo valuetype to represent "integer of any bit width"
-def iAny   : ValueType<0  , 125>;
+def iAny   : ValueType<0  , 253>;
 
 // Pseudo valuetype mapped to the current pointer size.
-def iPTR   : ValueType<0  , 126>;
+def iPTR   : ValueType<0  , 254>;
 
 // Pseudo valuetype to represent "any type of any size".
-def Any    : ValueType<0  , 127>;
+def Any    : ValueType<0  , 255>;
Index: include/llvm/DebugInfo/DWARF/DWARFContext.h
===================================================================
--- include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -50,6 +50,11 @@
 // entire size of the debug info sections.
 typedef DenseMap<uint64_t, std::pair<uint8_t, int64_t>> RelocAddrMap;
 
+/// Reads a value from data extractor and applies a relocation to the result if
+/// one exists for the given offset.
+uint64_t getRelocatedValue(const DataExtractor &Data, uint32_t Size,
+                           uint32_t *Off, const RelocAddrMap *Relocs);
+
 /// DWARFContext
 /// This data structure is the top level entity that deals with dwarf debug
 /// information parsing. The actual data is supplied through pure virtual
Index: include/llvm/IR/ConstantRange.h
===================================================================
--- include/llvm/IR/ConstantRange.h
+++ include/llvm/IR/ConstantRange.h
@@ -93,7 +93,7 @@
   ///
   /// NB! The returned set does *not* contain **all** possible values of X for
   /// which "X BinOpC Y" does not wrap -- some viable values of X may be
-  /// missing, so you cannot use this to contrain X's range.  E.g. in the last
+  /// missing, so you cannot use this to constrain X's range.  E.g. in the last
   /// example, "(-2) + 1" is both nsw and nuw (so the "X" could be -2), but (-2)
   /// is not in the set returned.
   ///
Index: include/llvm/IR/DIBuilder.h
===================================================================
--- include/llvm/IR/DIBuilder.h
+++ include/llvm/IR/DIBuilder.h
@@ -778,6 +778,9 @@
     }
   };
 
+  // Create wrappers for C Binding types (see CBindingWrapping.h).
+  DEFINE_ISA_CONVERSION_FUNCTIONS(DIBuilder, LLVMDIBuilderRef)
+
 } // end namespace llvm
 
 #endif // LLVM_IR_DIBUILDER_H
Index: include/llvm/IR/PatternMatch.h
===================================================================
--- include/llvm/IR/PatternMatch.h
+++ include/llvm/IR/PatternMatch.h
@@ -267,15 +267,15 @@
 }
 inline api_pred_ty<is_all_ones> m_AllOnes(const APInt *&V) { return V; }
 
-struct is_sign_bit {
-  bool isValue(const APInt &C) { return C.isSignBit(); }
+struct is_sign_mask {
+  bool isValue(const APInt &C) { return C.isSignMask(); }
 };
 
 /// \brief Match an integer or vector with only the sign bit(s) set.
-inline cst_pred_ty<is_sign_bit> m_SignBit() {
-  return cst_pred_ty<is_sign_bit>();
+inline cst_pred_ty<is_sign_mask> m_SignMask() {
+  return cst_pred_ty<is_sign_mask>();
 }
-inline api_pred_ty<is_sign_bit> m_SignBit(const APInt *&V) { return V; }
+inline api_pred_ty<is_sign_mask> m_SignMask(const APInt *&V) { return V; }
 
 struct is_power2 {
   bool isValue(const APInt &C) { return C.isPowerOf2(); }
Index: include/llvm/ObjectYAML/DWARFYAML.h
===================================================================
--- include/llvm/ObjectYAML/DWARFYAML.h
+++ include/llvm/ObjectYAML/DWARFYAML.h
@@ -236,7 +236,7 @@
   static void mapping(IO &IO, DWARFYAML::InitialLength &DWARF);
 };
 
-#define HANDLE_DW_TAG(unused, name)                                            \
+#define HANDLE_DW_TAG(unused, name, unused2, unused3)                          \
   io.enumCase(value, "DW_TAG_" #name, dwarf::DW_TAG_##name);
 
 template <> struct ScalarEnumerationTraits<dwarf::Tag> {
@@ -266,7 +266,7 @@
   }
 };
 
-#define HANDLE_DW_AT(unused, name)                                             \
+#define HANDLE_DW_AT(unused, name, unused2, unused3)                           \
   io.enumCase(value, "DW_AT_" #name, dwarf::DW_AT_##name);
 
 template <> struct ScalarEnumerationTraits<dwarf::Attribute> {
@@ -276,7 +276,7 @@
   }
 };
 
-#define HANDLE_DW_FORM(unused, name)                                           \
+#define HANDLE_DW_FORM(unused, name, unused2, unused3)                         \
   io.enumCase(value, "DW_FORM_" #name, dwarf::DW_FORM_##name);
 
 template <> struct ScalarEnumerationTraits<dwarf::Form> {
Index: include/llvm/PassSupport.h
===================================================================
--- include/llvm/PassSupport.h
+++ include/llvm/PassSupport.h
@@ -93,11 +93,7 @@
 /// static RegisterPass<YourPassClassName> tmp("passopt", "My Pass Name");
 ///
 /// This statement will cause your pass to be created by calling the default
-/// constructor exposed by the pass.  If you have a different constructor that
-/// must be called, create a global constructor function (which takes the
-/// arguments you need and returns a Pass*) and register your pass like this:
-///
-/// static RegisterPass<PassClassName> tmp("passopt", "My Name");
+/// constructor exposed by the pass.
 ///
 template <typename passName> struct RegisterPass : public PassInfo {
   // Register Pass using default constructor...
Index: include/llvm/Support/ArrayRecycler.h
===================================================================
--- include/llvm/Support/ArrayRecycler.h
+++ include/llvm/Support/ArrayRecycler.h
@@ -47,7 +47,9 @@
     FreeList *Entry = Bucket[Idx];
     if (!Entry)
       return nullptr;
+    __asan_unpoison_memory_region(Entry, Capacity::get(Idx).getSize());
     Bucket[Idx] = Entry->Next;
+    __msan_allocated_memory(Entry, Capacity::get(Idx).getSize());
     return reinterpret_cast<T*>(Entry);
   }
 
@@ -59,6 +61,7 @@
       Bucket.resize(size_t(Idx) + 1);
     Entry->Next = Bucket[Idx];
     Bucket[Idx] = Entry;
+    __asan_poison_memory_region(Ptr, Capacity::get(Idx).getSize());
   }
 
 public:
Index: include/llvm/Support/BinaryStreamArray.h
===================================================================
--- include/llvm/Support/BinaryStreamArray.h
+++ include/llvm/Support/BinaryStreamArray.h
@@ -162,6 +162,11 @@
     return ThisValue;
   }
 
+  ValueType &operator*() {
+    assert(Array && !HasError);
+    return ThisValue;
+  }
+
   IterType &operator+=(unsigned N) {
     for (unsigned I = 0; I < N; ++I) {
       // We are done with the current record, discard it so that we are
Index: include/llvm/Support/BranchProbability.h
===================================================================
--- include/llvm/Support/BranchProbability.h
+++ include/llvm/Support/BranchProbability.h
@@ -112,6 +112,13 @@
     return *this;
   }
 
+  BranchProbability &operator*=(uint32_t RHS) {
+    assert(N != UnknownN &&
+           "Unknown probability cannot participate in arithmetics.");
+    N = (uint64_t(N) * RHS > D) ? D : N * RHS;
+    return *this;
+  }
+
   BranchProbability &operator/=(uint32_t RHS) {
     assert(N != UnknownN &&
            "Unknown probability cannot participate in arithmetics.");
@@ -135,6 +142,11 @@
     return Prob *= RHS;
   }
 
+  BranchProbability operator*(uint32_t RHS) const {
+    BranchProbability Prob(*this);
+    return Prob *= RHS;
+  }
+
   BranchProbability operator/(uint32_t RHS) const {
     BranchProbability Prob(*this);
     return Prob /= RHS;
Index: include/llvm/Support/Dwarf.h
===================================================================
--- include/llvm/Support/Dwarf.h
+++ include/llvm/Support/Dwarf.h
@@ -46,7 +46,15 @@
   DWARF_VERSION = 4,       // Default dwarf version we output.
   DW_PUBTYPES_VERSION = 2, // Section version number for .debug_pubtypes.
   DW_PUBNAMES_VERSION = 2, // Section version number for .debug_pubnames.
-  DW_ARANGES_VERSION = 2   // Section version number for .debug_aranges.
+  DW_ARANGES_VERSION = 2,  // Section version number for .debug_aranges.
+  // Identifiers we use to distinguish vendor extensions.
+  DWARF_VENDOR_DWARF = 0,  // Defined in v2 or later of the DWARF standard.
+  DWARF_VENDOR_APPLE = 1,
+  DWARF_VENDOR_BORLAND = 2,
+  DWARF_VENDOR_GNU = 3,
+  DWARF_VENDOR_GOOGLE = 4,
+  DWARF_VENDOR_LLVM = 5,
+  DWARF_VENDOR_MIPS = 6
 };
 
 // Special ID values that distinguish a CIE from a FDE in DWARF CFI.
@@ -55,7 +63,7 @@
 const uint64_t DW64_CIE_ID = UINT64_MAX;
 
 enum Tag : uint16_t {
-#define HANDLE_DW_TAG(ID, NAME) DW_TAG_##NAME = ID,
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) DW_TAG_##NAME = ID,
 #include "llvm/Support/Dwarf.def"
   DW_TAG_lo_user = 0x4080,
   DW_TAG_hi_user = 0xffff,
@@ -92,20 +100,20 @@
 
 /// Attributes.
 enum Attribute : uint16_t {
-#define HANDLE_DW_AT(ID, NAME) DW_AT_##NAME = ID,
+#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) DW_AT_##NAME = ID,
 #include "llvm/Support/Dwarf.def"
   DW_AT_lo_user = 0x2000,
   DW_AT_hi_user = 0x3fff,
 };
 
 enum Form : uint16_t {
-#define HANDLE_DW_FORM(ID, NAME) DW_FORM_##NAME = ID,
+#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) DW_FORM_##NAME = ID,
 #include "llvm/Support/Dwarf.def"
  DW_FORM_lo_user = 0x1f00, ///< Not specified by DWARF.
 };
 
 enum LocationAtom {
-#define HANDLE_DW_OP(ID, NAME) DW_OP_##NAME = ID,
+#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) DW_OP_##NAME = ID,
 #include "llvm/Support/Dwarf.def"
   DW_OP_lo_user = 0xe0,
   DW_OP_hi_user = 0xff,
@@ -113,7 +121,7 @@
 };
 
 enum TypeKind {
-#define HANDLE_DW_ATE(ID, NAME) DW_ATE_##NAME = ID,
+#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) DW_ATE_##NAME = ID,
 #include "llvm/Support/Dwarf.def"
   DW_ATE_lo_user = 0x80,
   DW_ATE_hi_user = 0xff
@@ -164,7 +172,7 @@
 };
 
 enum SourceLanguage {
-#define HANDLE_DW_LANG(ID, NAME) DW_LANG_##NAME = ID,
+#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) DW_LANG_##NAME = ID,
 #include "llvm/Support/Dwarf.def"
   DW_LANG_lo_user = 0x8000,
   DW_LANG_hi_user = 0xffff
@@ -220,8 +228,8 @@
   DW_LNE_hi_user = 0xff
 };
 
-enum LinerNumberEntryFormat {
-#define HANDLE_DW_LNCT(ID, NAME) DW_DEFAULTED_##NAME = ID,
+enum LineNumberEntryFormat {
+#define HANDLE_DW_LNCT(ID, NAME) DW_LNCT_##NAME = ID,
 #include "llvm/Support/Dwarf.def"
   DW_LNCT_lo_user = 0x2000,
   DW_LNCT_hi_user = 0x3fff,
@@ -406,6 +414,40 @@
 unsigned getMacinfo(StringRef MacinfoString);
 /// @}
 
+/// \defgroup DwarfConstantsVersioning Dwarf version for constants
+///
+/// For constants defined by DWARF, returns the DWARF version when the constant
+/// was first defined. For vendor extensions, if there is a version-related
+/// policy for when to emit it, returns a version number for that policy.
+/// Otherwise returns 0.
+///
+/// @{
+unsigned TagVersion(Tag T);
+unsigned AttributeVersion(Attribute A);
+unsigned FormVersion(Form F);
+unsigned OperationVersion(LocationAtom O);
+unsigned AttributeEncodingVersion(TypeKind E);
+unsigned LanguageVersion(SourceLanguage L);
+/// @}
+
+/// \defgroup DwarfConstantsVendor Dwarf "vendor" for constants
+///
+/// These functions return an identifier describing "who" defined the constant,
+/// either the DWARF standard itself or the vendor who defined the extension.
+///
+/// @{
+unsigned TagVendor(Tag T);
+unsigned AttributeVendor(Attribute A);
+unsigned FormVendor(Form F);
+unsigned OperationVendor(LocationAtom O);
+unsigned AttributeEncodingVendor(TypeKind E);
+unsigned LanguageVendor(SourceLanguage L);
+/// @}
+
+/// Tells whether the specified form is defined in the specified version,
+/// or is an extension if extensions are allowed.
+bool isValidFormForVersion(Form F, unsigned Version, bool ExtensionsOk = true);
+
 /// \brief Returns the symbolic string representing Val when used as a value
 /// for attribute Attr.
 StringRef AttributeValueString(uint16_t Attr, unsigned Val);
Index: include/llvm/Support/Dwarf.def
===================================================================
--- include/llvm/Support/Dwarf.def
+++ include/llvm/Support/Dwarf.def
@@ -25,27 +25,27 @@
 #endif
 
 #ifndef HANDLE_DW_TAG
-#define HANDLE_DW_TAG(ID, NAME)
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR)
 #endif
 
 #ifndef HANDLE_DW_AT
-#define HANDLE_DW_AT(ID, NAME)
+#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR)
 #endif
 
 #ifndef HANDLE_DW_FORM
-#define HANDLE_DW_FORM(ID, NAME)
+#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR)
 #endif
 
 #ifndef HANDLE_DW_OP
-#define HANDLE_DW_OP(ID, NAME)
+#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR)
 #endif
 
 #ifndef HANDLE_DW_LANG
-#define HANDLE_DW_LANG(ID, NAME)
+#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)
 #endif
 
 #ifndef HANDLE_DW_ATE
-#define HANDLE_DW_ATE(ID, NAME)
+#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR)
 #endif
 
 #ifndef HANDLE_DW_VIRTUALITY
@@ -92,591 +92,591 @@
 #define HANDLE_DW_UT(ID, NAME)
 #endif
 
-HANDLE_DW_TAG(0x0000, null)
-HANDLE_DW_TAG(0x0001, array_type)
-HANDLE_DW_TAG(0x0002, class_type)
-HANDLE_DW_TAG(0x0003, entry_point)
-HANDLE_DW_TAG(0x0004, enumeration_type)
-HANDLE_DW_TAG(0x0005, formal_parameter)
-HANDLE_DW_TAG(0x0008, imported_declaration)
-HANDLE_DW_TAG(0x000a, label)
-HANDLE_DW_TAG(0x000b, lexical_block)
-HANDLE_DW_TAG(0x000d, member)
-HANDLE_DW_TAG(0x000f, pointer_type)
-HANDLE_DW_TAG(0x0010, reference_type)
-HANDLE_DW_TAG(0x0011, compile_unit)
-HANDLE_DW_TAG(0x0012, string_type)
-HANDLE_DW_TAG(0x0013, structure_type)
-HANDLE_DW_TAG(0x0015, subroutine_type)
-HANDLE_DW_TAG(0x0016, typedef)
-HANDLE_DW_TAG(0x0017, union_type)
-HANDLE_DW_TAG(0x0018, unspecified_parameters)
-HANDLE_DW_TAG(0x0019, variant)
-HANDLE_DW_TAG(0x001a, common_block)
-HANDLE_DW_TAG(0x001b, common_inclusion)
-HANDLE_DW_TAG(0x001c, inheritance)
-HANDLE_DW_TAG(0x001d, inlined_subroutine)
-HANDLE_DW_TAG(0x001e, module)
-HANDLE_DW_TAG(0x001f, ptr_to_member_type)
-HANDLE_DW_TAG(0x0020, set_type)
-HANDLE_DW_TAG(0x0021, subrange_type)
-HANDLE_DW_TAG(0x0022, with_stmt)
-HANDLE_DW_TAG(0x0023, access_declaration)
-HANDLE_DW_TAG(0x0024, base_type)
-HANDLE_DW_TAG(0x0025, catch_block)
-HANDLE_DW_TAG(0x0026, const_type)
-HANDLE_DW_TAG(0x0027, constant)
-HANDLE_DW_TAG(0x0028, enumerator)
-HANDLE_DW_TAG(0x0029, file_type)
-HANDLE_DW_TAG(0x002a, friend)
-HANDLE_DW_TAG(0x002b, namelist)
-HANDLE_DW_TAG(0x002c, namelist_item)
-HANDLE_DW_TAG(0x002d, packed_type)
-HANDLE_DW_TAG(0x002e, subprogram)
-HANDLE_DW_TAG(0x002f, template_type_parameter)
-HANDLE_DW_TAG(0x0030, template_value_parameter)
-HANDLE_DW_TAG(0x0031, thrown_type)
-HANDLE_DW_TAG(0x0032, try_block)
-HANDLE_DW_TAG(0x0033, variant_part)
-HANDLE_DW_TAG(0x0034, variable)
-HANDLE_DW_TAG(0x0035, volatile_type)
+HANDLE_DW_TAG(0x0000, null, 2, DWARF)
+HANDLE_DW_TAG(0x0001, array_type, 2, DWARF)
+HANDLE_DW_TAG(0x0002, class_type, 2, DWARF)
+HANDLE_DW_TAG(0x0003, entry_point, 2, DWARF)
+HANDLE_DW_TAG(0x0004, enumeration_type, 2, DWARF)
+HANDLE_DW_TAG(0x0005, formal_parameter, 2, DWARF)
+HANDLE_DW_TAG(0x0008, imported_declaration, 2, DWARF)
+HANDLE_DW_TAG(0x000a, label, 2, DWARF)
+HANDLE_DW_TAG(0x000b, lexical_block, 2, DWARF)
+HANDLE_DW_TAG(0x000d, member, 2, DWARF)
+HANDLE_DW_TAG(0x000f, pointer_type, 2, DWARF)
+HANDLE_DW_TAG(0x0010, reference_type, 2, DWARF)
+HANDLE_DW_TAG(0x0011, compile_unit, 2, DWARF)
+HANDLE_DW_TAG(0x0012, string_type, 2, DWARF)
+HANDLE_DW_TAG(0x0013, structure_type, 2, DWARF)
+HANDLE_DW_TAG(0x0015, subroutine_type, 2, DWARF)
+HANDLE_DW_TAG(0x0016, typedef, 2, DWARF)
+HANDLE_DW_TAG(0x0017, union_type, 2, DWARF)
+HANDLE_DW_TAG(0x0018, unspecified_parameters, 2, DWARF)
+HANDLE_DW_TAG(0x0019, variant, 2, DWARF)
+HANDLE_DW_TAG(0x001a, common_block, 2, DWARF)
+HANDLE_DW_TAG(0x001b, common_inclusion, 2, DWARF)
+HANDLE_DW_TAG(0x001c, inheritance, 2, DWARF)
+HANDLE_DW_TAG(0x001d, inlined_subroutine, 2, DWARF)
+HANDLE_DW_TAG(0x001e, module, 2, DWARF)
+HANDLE_DW_TAG(0x001f, ptr_to_member_type, 2, DWARF)
+HANDLE_DW_TAG(0x0020, set_type, 2, DWARF)
+HANDLE_DW_TAG(0x0021, subrange_type, 2, DWARF)
+HANDLE_DW_TAG(0x0022, with_stmt, 2, DWARF)
+HANDLE_DW_TAG(0x0023, access_declaration, 2, DWARF)
+HANDLE_DW_TAG(0x0024, base_type, 2, DWARF)
+HANDLE_DW_TAG(0x0025, catch_block, 2, DWARF)
+HANDLE_DW_TAG(0x0026, const_type, 2, DWARF)
+HANDLE_DW_TAG(0x0027, constant, 2, DWARF)
+HANDLE_DW_TAG(0x0028, enumerator, 2, DWARF)
+HANDLE_DW_TAG(0x0029, file_type, 2, DWARF)
+HANDLE_DW_TAG(0x002a, friend, 2, DWARF)
+HANDLE_DW_TAG(0x002b, namelist, 2, DWARF)
+HANDLE_DW_TAG(0x002c, namelist_item, 2, DWARF)
+HANDLE_DW_TAG(0x002d, packed_type, 2, DWARF)
+HANDLE_DW_TAG(0x002e, subprogram, 2, DWARF)
+HANDLE_DW_TAG(0x002f, template_type_parameter, 2, DWARF)
+HANDLE_DW_TAG(0x0030, template_value_parameter, 2, DWARF)
+HANDLE_DW_TAG(0x0031, thrown_type, 2, DWARF)
+HANDLE_DW_TAG(0x0032, try_block, 2, DWARF)
+HANDLE_DW_TAG(0x0033, variant_part, 2, DWARF)
+HANDLE_DW_TAG(0x0034, variable, 2, DWARF)
+HANDLE_DW_TAG(0x0035, volatile_type, 2, DWARF)
 // New in DWARF v3:
-HANDLE_DW_TAG(0x0036, dwarf_procedure)
-HANDLE_DW_TAG(0x0037, restrict_type)
-HANDLE_DW_TAG(0x0038, interface_type)
-HANDLE_DW_TAG(0x0039, namespace)
-HANDLE_DW_TAG(0x003a, imported_module)
-HANDLE_DW_TAG(0x003b, unspecified_type)
-HANDLE_DW_TAG(0x003c, partial_unit)
-HANDLE_DW_TAG(0x003d, imported_unit)
-HANDLE_DW_TAG(0x003f, condition)
-HANDLE_DW_TAG(0x0040, shared_type)
+HANDLE_DW_TAG(0x0036, dwarf_procedure, 3, DWARF)
+HANDLE_DW_TAG(0x0037, restrict_type, 3, DWARF)
+HANDLE_DW_TAG(0x0038, interface_type, 3, DWARF)
+HANDLE_DW_TAG(0x0039, namespace, 3, DWARF)
+HANDLE_DW_TAG(0x003a, imported_module, 3, DWARF)
+HANDLE_DW_TAG(0x003b, unspecified_type, 3, DWARF)
+HANDLE_DW_TAG(0x003c, partial_unit, 3, DWARF)
+HANDLE_DW_TAG(0x003d, imported_unit, 3, DWARF)
+HANDLE_DW_TAG(0x003f, condition, 3, DWARF)
+HANDLE_DW_TAG(0x0040, shared_type, 3, DWARF)
 // New in DWARF v4:
-HANDLE_DW_TAG(0x0041, type_unit)
-HANDLE_DW_TAG(0x0042, rvalue_reference_type)
-HANDLE_DW_TAG(0x0043, template_alias)
+HANDLE_DW_TAG(0x0041, type_unit, 4, DWARF)
+HANDLE_DW_TAG(0x0042, rvalue_reference_type, 4, DWARF)
+HANDLE_DW_TAG(0x0043, template_alias, 4, DWARF)
 // New in DWARF v5:
-HANDLE_DW_TAG(0x0044, coarray_type)
-HANDLE_DW_TAG(0x0045, generic_subrange)
-HANDLE_DW_TAG(0x0046, dynamic_type)
-HANDLE_DW_TAG(0x0047, atomic_type)
-HANDLE_DW_TAG(0x0048, call_site)
-HANDLE_DW_TAG(0x0049, call_site_parameter)
-HANDLE_DW_TAG(0x004a, skeleton_unit)
-HANDLE_DW_TAG(0x004b, immutable_type)
+HANDLE_DW_TAG(0x0044, coarray_type, 5, DWARF)
+HANDLE_DW_TAG(0x0045, generic_subrange, 5, DWARF)
+HANDLE_DW_TAG(0x0046, dynamic_type, 5, DWARF)
+HANDLE_DW_TAG(0x0047, atomic_type, 5, DWARF)
+HANDLE_DW_TAG(0x0048, call_site, 5, DWARF)
+HANDLE_DW_TAG(0x0049, call_site_parameter, 5, DWARF)
+HANDLE_DW_TAG(0x004a, skeleton_unit, 5, DWARF)
+HANDLE_DW_TAG(0x004b, immutable_type, 5, DWARF)
 // Vendor extensions:
-HANDLE_DW_TAG(0x4081, MIPS_loop)
-HANDLE_DW_TAG(0x4101, format_label)
-HANDLE_DW_TAG(0x4102, function_template)
-HANDLE_DW_TAG(0x4103, class_template)
-HANDLE_DW_TAG(0x4106, GNU_template_template_param)
-HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack)
-HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack)
-HANDLE_DW_TAG(0x4200, APPLE_property)
-HANDLE_DW_TAG(0xb000, BORLAND_property)
-HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string)
-HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array)
-HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set)
-HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant)
+HANDLE_DW_TAG(0x4081, MIPS_loop, 0, MIPS)
+HANDLE_DW_TAG(0x4101, format_label, 0, GNU)
+HANDLE_DW_TAG(0x4102, function_template, 0, GNU)
+HANDLE_DW_TAG(0x4103, class_template, 0, GNU)
+HANDLE_DW_TAG(0x4106, GNU_template_template_param, 0, GNU)
+HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack, 0, GNU)
+HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack, 0, GNU)
+HANDLE_DW_TAG(0x4200, APPLE_property, 0, APPLE)
+HANDLE_DW_TAG(0xb000, BORLAND_property, 0, BORLAND)
+HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string, 0, BORLAND)
+HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array, 0, BORLAND)
+HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set, 0, BORLAND)
+HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant, 0, BORLAND)
 
 // Attributes.
-HANDLE_DW_AT(0x01, sibling)
-HANDLE_DW_AT(0x02, location)
-HANDLE_DW_AT(0x03, name)
-HANDLE_DW_AT(0x09, ordering)
-HANDLE_DW_AT(0x0b, byte_size)
-HANDLE_DW_AT(0x0c, bit_offset)
-HANDLE_DW_AT(0x0d, bit_size)
-HANDLE_DW_AT(0x10, stmt_list)
-HANDLE_DW_AT(0x11, low_pc)
-HANDLE_DW_AT(0x12, high_pc)
-HANDLE_DW_AT(0x13, language)
-HANDLE_DW_AT(0x15, discr)
-HANDLE_DW_AT(0x16, discr_value)
-HANDLE_DW_AT(0x17, visibility)
-HANDLE_DW_AT(0x18, import)
-HANDLE_DW_AT(0x19, string_length)
-HANDLE_DW_AT(0x1a, common_reference)
-HANDLE_DW_AT(0x1b, comp_dir)
-HANDLE_DW_AT(0x1c, const_value)
-HANDLE_DW_AT(0x1d, containing_type)
-HANDLE_DW_AT(0x1e, default_value)
-HANDLE_DW_AT(0x20, inline)
-HANDLE_DW_AT(0x21, is_optional)
-HANDLE_DW_AT(0x22, lower_bound)
-HANDLE_DW_AT(0x25, producer)
-HANDLE_DW_AT(0x27, prototyped)
-HANDLE_DW_AT(0x2a, return_addr)
-HANDLE_DW_AT(0x2c, start_scope)
-HANDLE_DW_AT(0x2e, bit_stride)
-HANDLE_DW_AT(0x2f, upper_bound)
-HANDLE_DW_AT(0x31, abstract_origin)
-HANDLE_DW_AT(0x32, accessibility)
-HANDLE_DW_AT(0x33, address_class)
-HANDLE_DW_AT(0x34, artificial)
-HANDLE_DW_AT(0x35, base_types)
-HANDLE_DW_AT(0x36, calling_convention)
-HANDLE_DW_AT(0x37, count)
-HANDLE_DW_AT(0x38, data_member_location)
-HANDLE_DW_AT(0x39, decl_column)
-HANDLE_DW_AT(0x3a, decl_file)
-HANDLE_DW_AT(0x3b, decl_line)
-HANDLE_DW_AT(0x3c, declaration)
-HANDLE_DW_AT(0x3d, discr_list)
-HANDLE_DW_AT(0x3e, encoding)
-HANDLE_DW_AT(0x3f, external)
-HANDLE_DW_AT(0x40, frame_base)
-HANDLE_DW_AT(0x41, friend)
-HANDLE_DW_AT(0x42, identifier_case)
-HANDLE_DW_AT(0x43, macro_info)
-HANDLE_DW_AT(0x44, namelist_item)
-HANDLE_DW_AT(0x45, priority)
-HANDLE_DW_AT(0x46, segment)
-HANDLE_DW_AT(0x47, specification)
-HANDLE_DW_AT(0x48, static_link)
-HANDLE_DW_AT(0x49, type)
-HANDLE_DW_AT(0x4a, use_location)
-HANDLE_DW_AT(0x4b, variable_parameter)
-HANDLE_DW_AT(0x4c, virtuality)
-HANDLE_DW_AT(0x4d, vtable_elem_location)
+HANDLE_DW_AT(0x01, sibling, 2, DWARF)
+HANDLE_DW_AT(0x02, location, 2, DWARF)
+HANDLE_DW_AT(0x03, name, 2, DWARF)
+HANDLE_DW_AT(0x09, ordering, 2, DWARF)
+HANDLE_DW_AT(0x0b, byte_size, 2, DWARF)
+HANDLE_DW_AT(0x0c, bit_offset, 2, DWARF)
+HANDLE_DW_AT(0x0d, bit_size, 2, DWARF)
+HANDLE_DW_AT(0x10, stmt_list, 2, DWARF)
+HANDLE_DW_AT(0x11, low_pc, 2, DWARF)
+HANDLE_DW_AT(0x12, high_pc, 2, DWARF)
+HANDLE_DW_AT(0x13, language, 2, DWARF)
+HANDLE_DW_AT(0x15, discr, 2, DWARF)
+HANDLE_DW_AT(0x16, discr_value, 2, DWARF)
+HANDLE_DW_AT(0x17, visibility, 2, DWARF)
+HANDLE_DW_AT(0x18, import, 2, DWARF)
+HANDLE_DW_AT(0x19, string_length, 2, DWARF)
+HANDLE_DW_AT(0x1a, common_reference, 2, DWARF)
+HANDLE_DW_AT(0x1b, comp_dir, 2, DWARF)
+HANDLE_DW_AT(0x1c, const_value, 2, DWARF)
+HANDLE_DW_AT(0x1d, containing_type, 2, DWARF)
+HANDLE_DW_AT(0x1e, default_value, 2, DWARF)
+HANDLE_DW_AT(0x20, inline, 2, DWARF)
+HANDLE_DW_AT(0x21, is_optional, 2, DWARF)
+HANDLE_DW_AT(0x22, lower_bound, 2, DWARF)
+HANDLE_DW_AT(0x25, producer, 2, DWARF)
+HANDLE_DW_AT(0x27, prototyped, 2, DWARF)
+HANDLE_DW_AT(0x2a, return_addr, 2, DWARF)
+HANDLE_DW_AT(0x2c, start_scope, 2, DWARF)
+HANDLE_DW_AT(0x2e, bit_stride, 2, DWARF)
+HANDLE_DW_AT(0x2f, upper_bound, 2, DWARF)
+HANDLE_DW_AT(0x31, abstract_origin, 2, DWARF)
+HANDLE_DW_AT(0x32, accessibility, 2, DWARF)
+HANDLE_DW_AT(0x33, address_class, 2, DWARF)
+HANDLE_DW_AT(0x34, artificial, 2, DWARF)
+HANDLE_DW_AT(0x35, base_types, 2, DWARF)
+HANDLE_DW_AT(0x36, calling_convention, 2, DWARF)
+HANDLE_DW_AT(0x37, count, 2, DWARF)
+HANDLE_DW_AT(0x38, data_member_location, 2, DWARF)
+HANDLE_DW_AT(0x39, decl_column, 2, DWARF)
+HANDLE_DW_AT(0x3a, decl_file, 2, DWARF)
+HANDLE_DW_AT(0x3b, decl_line, 2, DWARF)
+HANDLE_DW_AT(0x3c, declaration, 2, DWARF)
+HANDLE_DW_AT(0x3d, discr_list, 2, DWARF)
+HANDLE_DW_AT(0x3e, encoding, 2, DWARF)
+HANDLE_DW_AT(0x3f, external, 2, DWARF)
+HANDLE_DW_AT(0x40, frame_base, 2, DWARF)
+HANDLE_DW_AT(0x41, friend, 2, DWARF)
+HANDLE_DW_AT(0x42, identifier_case, 2, DWARF)
+HANDLE_DW_AT(0x43, macro_info, 2, DWARF)
+HANDLE_DW_AT(0x44, namelist_item, 2, DWARF)
+HANDLE_DW_AT(0x45, priority, 2, DWARF)
+HANDLE_DW_AT(0x46, segment, 2, DWARF)
+HANDLE_DW_AT(0x47, specification, 2, DWARF)
+HANDLE_DW_AT(0x48, static_link, 2, DWARF)
+HANDLE_DW_AT(0x49, type, 2, DWARF)
+HANDLE_DW_AT(0x4a, use_location, 2, DWARF)
+HANDLE_DW_AT(0x4b, variable_parameter, 2, DWARF)
+HANDLE_DW_AT(0x4c, virtuality, 2, DWARF)
+HANDLE_DW_AT(0x4d, vtable_elem_location, 2, DWARF)
 // New in DWARF v3:
-HANDLE_DW_AT(0x4e, allocated)
-HANDLE_DW_AT(0x4f, associated)
-HANDLE_DW_AT(0x50, data_location)
-HANDLE_DW_AT(0x51, byte_stride)
-HANDLE_DW_AT(0x52, entry_pc)
-HANDLE_DW_AT(0x53, use_UTF8)
-HANDLE_DW_AT(0x54, extension)
-HANDLE_DW_AT(0x55, ranges)
-HANDLE_DW_AT(0x56, trampoline)
-HANDLE_DW_AT(0x57, call_column)
-HANDLE_DW_AT(0x58, call_file)
-HANDLE_DW_AT(0x59, call_line)
-HANDLE_DW_AT(0x5a, description)
-HANDLE_DW_AT(0x5b, binary_scale)
-HANDLE_DW_AT(0x5c, decimal_scale)
-HANDLE_DW_AT(0x5d, small)
-HANDLE_DW_AT(0x5e, decimal_sign)
-HANDLE_DW_AT(0x5f, digit_count)
-HANDLE_DW_AT(0x60, picture_string)
-HANDLE_DW_AT(0x61, mutable)
-HANDLE_DW_AT(0x62, threads_scaled)
-HANDLE_DW_AT(0x63, explicit)
-HANDLE_DW_AT(0x64, object_pointer)
-HANDLE_DW_AT(0x65, endianity)
-HANDLE_DW_AT(0x66, elemental)
-HANDLE_DW_AT(0x67, pure)
-HANDLE_DW_AT(0x68, recursive)
+HANDLE_DW_AT(0x4e, allocated, 3, DWARF)
+HANDLE_DW_AT(0x4f, associated, 3, DWARF)
+HANDLE_DW_AT(0x50, data_location, 3, DWARF)
+HANDLE_DW_AT(0x51, byte_stride, 3, DWARF)
+HANDLE_DW_AT(0x52, entry_pc, 3, DWARF)
+HANDLE_DW_AT(0x53, use_UTF8, 3, DWARF)
+HANDLE_DW_AT(0x54, extension, 3, DWARF)
+HANDLE_DW_AT(0x55, ranges, 3, DWARF)
+HANDLE_DW_AT(0x56, trampoline, 3, DWARF)
+HANDLE_DW_AT(0x57, call_column, 3, DWARF)
+HANDLE_DW_AT(0x58, call_file, 3, DWARF)
+HANDLE_DW_AT(0x59, call_line, 3, DWARF)
+HANDLE_DW_AT(0x5a, description, 3, DWARF)
+HANDLE_DW_AT(0x5b, binary_scale, 3, DWARF)
+HANDLE_DW_AT(0x5c, decimal_scale, 3, DWARF)
+HANDLE_DW_AT(0x5d, small, 3, DWARF)
+HANDLE_DW_AT(0x5e, decimal_sign, 3, DWARF)
+HANDLE_DW_AT(0x5f, digit_count, 3, DWARF)
+HANDLE_DW_AT(0x60, picture_string, 3, DWARF)
+HANDLE_DW_AT(0x61, mutable, 3, DWARF)
+HANDLE_DW_AT(0x62, threads_scaled, 3, DWARF)
+HANDLE_DW_AT(0x63, explicit, 3, DWARF)
+HANDLE_DW_AT(0x64, object_pointer, 3, DWARF)
+HANDLE_DW_AT(0x65, endianity, 3, DWARF)
+HANDLE_DW_AT(0x66, elemental, 3, DWARF)
+HANDLE_DW_AT(0x67, pure, 3, DWARF)
+HANDLE_DW_AT(0x68, recursive, 3, DWARF)
 // New in DWARF v4:
-HANDLE_DW_AT(0x69, signature)
-HANDLE_DW_AT(0x6a, main_subprogram)
-HANDLE_DW_AT(0x6b, data_bit_offset)
-HANDLE_DW_AT(0x6c, const_expr)
-HANDLE_DW_AT(0x6d, enum_class)
-HANDLE_DW_AT(0x6e, linkage_name)
+HANDLE_DW_AT(0x69, signature, 4, DWARF)
+HANDLE_DW_AT(0x6a, main_subprogram, 4, DWARF)
+HANDLE_DW_AT(0x6b, data_bit_offset, 4, DWARF)
+HANDLE_DW_AT(0x6c, const_expr, 4, DWARF)
+HANDLE_DW_AT(0x6d, enum_class, 4, DWARF)
+HANDLE_DW_AT(0x6e, linkage_name, 4, DWARF)
 // New in DWARF v5:
-HANDLE_DW_AT(0x6f, string_length_bit_size)
-HANDLE_DW_AT(0x70, string_length_byte_size)
-HANDLE_DW_AT(0x71, rank)
-HANDLE_DW_AT(0x72, str_offsets_base)
-HANDLE_DW_AT(0x73, addr_base)
-HANDLE_DW_AT(0x74, rnglists_base)
-HANDLE_DW_AT(0x75, dwo_id) ///< Retracted from DWARF 5.
-HANDLE_DW_AT(0x76, dwo_name)
-HANDLE_DW_AT(0x77, reference)
-HANDLE_DW_AT(0x78, rvalue_reference)
-HANDLE_DW_AT(0x79, macros)
-HANDLE_DW_AT(0x7a, call_all_calls)
-HANDLE_DW_AT(0x7b, call_all_source_calls)
-HANDLE_DW_AT(0x7c, call_all_tail_calls)
-HANDLE_DW_AT(0x7d, call_return_pc)
-HANDLE_DW_AT(0x7e, call_value)
-HANDLE_DW_AT(0x7f, call_origin)
-HANDLE_DW_AT(0x80, call_parameter)
-HANDLE_DW_AT(0x81, call_pc)
-HANDLE_DW_AT(0x82, call_tail_call)
-HANDLE_DW_AT(0x83, call_target)
-HANDLE_DW_AT(0x84, call_target_clobbered)
-HANDLE_DW_AT(0x85, call_data_location)
-HANDLE_DW_AT(0x86, call_data_value)
-HANDLE_DW_AT(0x87, noreturn)
-HANDLE_DW_AT(0x88, alignment)
-HANDLE_DW_AT(0x89, export_symbols)
-HANDLE_DW_AT(0x8a, deleted)
-HANDLE_DW_AT(0x8b, defaulted)
-HANDLE_DW_AT(0x8c, loclists_base)
+HANDLE_DW_AT(0x6f, string_length_bit_size, 5, DWARF)
+HANDLE_DW_AT(0x70, string_length_byte_size, 5, DWARF)
+HANDLE_DW_AT(0x71, rank, 5, DWARF)
+HANDLE_DW_AT(0x72, str_offsets_base, 5, DWARF)
+HANDLE_DW_AT(0x73, addr_base, 5, DWARF)
+HANDLE_DW_AT(0x74, rnglists_base, 5, DWARF)
+HANDLE_DW_AT(0x75, dwo_id, 0, DWARF) ///< Retracted from DWARF v5.
+HANDLE_DW_AT(0x76, dwo_name, 5, DWARF)
+HANDLE_DW_AT(0x77, reference, 5, DWARF)
+HANDLE_DW_AT(0x78, rvalue_reference, 5, DWARF)
+HANDLE_DW_AT(0x79, macros, 5, DWARF)
+HANDLE_DW_AT(0x7a, call_all_calls, 5, DWARF)
+HANDLE_DW_AT(0x7b, call_all_source_calls, 5, DWARF)
+HANDLE_DW_AT(0x7c, call_all_tail_calls, 5, DWARF)
+HANDLE_DW_AT(0x7d, call_return_pc, 5, DWARF)
+HANDLE_DW_AT(0x7e, call_value, 5, DWARF)
+HANDLE_DW_AT(0x7f, call_origin, 5, DWARF)
+HANDLE_DW_AT(0x80, call_parameter, 5, DWARF)
+HANDLE_DW_AT(0x81, call_pc, 5, DWARF)
+HANDLE_DW_AT(0x82, call_tail_call, 5, DWARF)
+HANDLE_DW_AT(0x83, call_target, 5, DWARF)
+HANDLE_DW_AT(0x84, call_target_clobbered, 5, DWARF)
+HANDLE_DW_AT(0x85, call_data_location, 5, DWARF)
+HANDLE_DW_AT(0x86, call_data_value, 5, DWARF)
+HANDLE_DW_AT(0x87, noreturn, 5, DWARF)
+HANDLE_DW_AT(0x88, alignment, 5, DWARF)
+HANDLE_DW_AT(0x89, export_symbols, 5, DWARF)
+HANDLE_DW_AT(0x8a, deleted, 5, DWARF)
+HANDLE_DW_AT(0x8b, defaulted, 5, DWARF)
+HANDLE_DW_AT(0x8c, loclists_base, 5, DWARF)
 // Vendor extensions:
-HANDLE_DW_AT(0x2002, MIPS_loop_begin)
-HANDLE_DW_AT(0x2003, MIPS_tail_loop_begin)
-HANDLE_DW_AT(0x2004, MIPS_epilog_begin)
-HANDLE_DW_AT(0x2005, MIPS_loop_unroll_factor)
-HANDLE_DW_AT(0x2006, MIPS_software_pipeline_depth)
-HANDLE_DW_AT(0x2007, MIPS_linkage_name)
-HANDLE_DW_AT(0x2008, MIPS_stride)
-HANDLE_DW_AT(0x2009, MIPS_abstract_name)
-HANDLE_DW_AT(0x200a, MIPS_clone_origin)
-HANDLE_DW_AT(0x200b, MIPS_has_inlines)
-HANDLE_DW_AT(0x200c, MIPS_stride_byte)
-HANDLE_DW_AT(0x200d, MIPS_stride_elem)
-HANDLE_DW_AT(0x200e, MIPS_ptr_dopetype)
-HANDLE_DW_AT(0x200f, MIPS_allocatable_dopetype)
-HANDLE_DW_AT(0x2010, MIPS_assumed_shape_dopetype)
+HANDLE_DW_AT(0x2002, MIPS_loop_begin, 0, MIPS)
+HANDLE_DW_AT(0x2003, MIPS_tail_loop_begin, 0, MIPS)
+HANDLE_DW_AT(0x2004, MIPS_epilog_begin, 0, MIPS)
+HANDLE_DW_AT(0x2005, MIPS_loop_unroll_factor, 0, MIPS)
+HANDLE_DW_AT(0x2006, MIPS_software_pipeline_depth, 0, MIPS)
+HANDLE_DW_AT(0x2007, MIPS_linkage_name, 0, MIPS)
+HANDLE_DW_AT(0x2008, MIPS_stride, 0, MIPS)
+HANDLE_DW_AT(0x2009, MIPS_abstract_name, 0, MIPS)
+HANDLE_DW_AT(0x200a, MIPS_clone_origin, 0, MIPS)
+HANDLE_DW_AT(0x200b, MIPS_has_inlines, 0, MIPS)
+HANDLE_DW_AT(0x200c, MIPS_stride_byte, 0, MIPS)
+HANDLE_DW_AT(0x200d, MIPS_stride_elem, 0, MIPS)
+HANDLE_DW_AT(0x200e, MIPS_ptr_dopetype, 0, MIPS)
+HANDLE_DW_AT(0x200f, MIPS_allocatable_dopetype, 0, MIPS)
+HANDLE_DW_AT(0x2010, MIPS_assumed_shape_dopetype, 0, MIPS)
 // This one appears to have only been implemented by Open64 for
 // fortran and may conflict with other extensions.
-HANDLE_DW_AT(0x2011, MIPS_assumed_size)
+HANDLE_DW_AT(0x2011, MIPS_assumed_size, 0, MIPS)
 // GNU extensions
-HANDLE_DW_AT(0x2101, sf_names)
-HANDLE_DW_AT(0x2102, src_info)
-HANDLE_DW_AT(0x2103, mac_info)
-HANDLE_DW_AT(0x2104, src_coords)
-HANDLE_DW_AT(0x2105, body_begin)
-HANDLE_DW_AT(0x2106, body_end)
-HANDLE_DW_AT(0x2107, GNU_vector)
-HANDLE_DW_AT(0x2110, GNU_template_name)
-HANDLE_DW_AT(0x210f, GNU_odr_signature)
-HANDLE_DW_AT(0x2119, GNU_macros)
+HANDLE_DW_AT(0x2101, sf_names, 0, GNU)
+HANDLE_DW_AT(0x2102, src_info, 0, GNU)
+HANDLE_DW_AT(0x2103, mac_info, 0, GNU)
+HANDLE_DW_AT(0x2104, src_coords, 0, GNU)
+HANDLE_DW_AT(0x2105, body_begin, 0, GNU)
+HANDLE_DW_AT(0x2106, body_end, 0, GNU)
+HANDLE_DW_AT(0x2107, GNU_vector, 0, GNU)
+HANDLE_DW_AT(0x2110, GNU_template_name, 0, GNU)
+HANDLE_DW_AT(0x210f, GNU_odr_signature, 0, GNU)
+HANDLE_DW_AT(0x2119, GNU_macros, 0, GNU)
 // Extensions for Fission proposal.
-HANDLE_DW_AT(0x2130, GNU_dwo_name)
-HANDLE_DW_AT(0x2131, GNU_dwo_id)
-HANDLE_DW_AT(0x2132, GNU_ranges_base)
-HANDLE_DW_AT(0x2133, GNU_addr_base)
-HANDLE_DW_AT(0x2134, GNU_pubnames)
-HANDLE_DW_AT(0x2135, GNU_pubtypes)
-HANDLE_DW_AT(0x2136, GNU_discriminator)
+HANDLE_DW_AT(0x2130, GNU_dwo_name, 0, GNU)
+HANDLE_DW_AT(0x2131, GNU_dwo_id, 0, GNU)
+HANDLE_DW_AT(0x2132, GNU_ranges_base, 0, GNU)
+HANDLE_DW_AT(0x2133, GNU_addr_base, 0, GNU)
+HANDLE_DW_AT(0x2134, GNU_pubnames, 0, GNU)
+HANDLE_DW_AT(0x2135, GNU_pubtypes, 0, GNU)
+HANDLE_DW_AT(0x2136, GNU_discriminator, 0, GNU)
 // Borland extensions.
-HANDLE_DW_AT(0x3b11, BORLAND_property_read)
-HANDLE_DW_AT(0x3b12, BORLAND_property_write)
-HANDLE_DW_AT(0x3b13, BORLAND_property_implements)
-HANDLE_DW_AT(0x3b14, BORLAND_property_index)
-HANDLE_DW_AT(0x3b15, BORLAND_property_default)
-HANDLE_DW_AT(0x3b20, BORLAND_Delphi_unit)
-HANDLE_DW_AT(0x3b21, BORLAND_Delphi_class)
-HANDLE_DW_AT(0x3b22, BORLAND_Delphi_record)
-HANDLE_DW_AT(0x3b23, BORLAND_Delphi_metaclass)
-HANDLE_DW_AT(0x3b24, BORLAND_Delphi_constructor)
-HANDLE_DW_AT(0x3b25, BORLAND_Delphi_destructor)
-HANDLE_DW_AT(0x3b26, BORLAND_Delphi_anonymous_method)
-HANDLE_DW_AT(0x3b27, BORLAND_Delphi_interface)
-HANDLE_DW_AT(0x3b28, BORLAND_Delphi_ABI)
-HANDLE_DW_AT(0x3b29, BORLAND_Delphi_return)
-HANDLE_DW_AT(0x3b30, BORLAND_Delphi_frameptr)
-HANDLE_DW_AT(0x3b31, BORLAND_closure)
+HANDLE_DW_AT(0x3b11, BORLAND_property_read, 0, BORLAND)
+HANDLE_DW_AT(0x3b12, BORLAND_property_write, 0, BORLAND)
+HANDLE_DW_AT(0x3b13, BORLAND_property_implements, 0, BORLAND)
+HANDLE_DW_AT(0x3b14, BORLAND_property_index, 0, BORLAND)
+HANDLE_DW_AT(0x3b15, BORLAND_property_default, 0, BORLAND)
+HANDLE_DW_AT(0x3b20, BORLAND_Delphi_unit, 0, BORLAND)
+HANDLE_DW_AT(0x3b21, BORLAND_Delphi_class, 0, BORLAND)
+HANDLE_DW_AT(0x3b22, BORLAND_Delphi_record, 0, BORLAND)
+HANDLE_DW_AT(0x3b23, BORLAND_Delphi_metaclass, 0, BORLAND)
+HANDLE_DW_AT(0x3b24, BORLAND_Delphi_constructor, 0, BORLAND)
+HANDLE_DW_AT(0x3b25, BORLAND_Delphi_destructor, 0, BORLAND)
+HANDLE_DW_AT(0x3b26, BORLAND_Delphi_anonymous_method, 0, BORLAND)
+HANDLE_DW_AT(0x3b27, BORLAND_Delphi_interface, 0, BORLAND)
+HANDLE_DW_AT(0x3b28, BORLAND_Delphi_ABI, 0, BORLAND)
+HANDLE_DW_AT(0x3b29, BORLAND_Delphi_return, 0, BORLAND)
+HANDLE_DW_AT(0x3b30, BORLAND_Delphi_frameptr, 0, BORLAND)
+HANDLE_DW_AT(0x3b31, BORLAND_closure, 0, BORLAND)
 // LLVM project extensions.
-HANDLE_DW_AT(0x3e00, LLVM_include_path)
-HANDLE_DW_AT(0x3e01, LLVM_config_macros)
-HANDLE_DW_AT(0x3e02, LLVM_isysroot)
+HANDLE_DW_AT(0x3e00, LLVM_include_path, 0, LLVM)
+HANDLE_DW_AT(0x3e01, LLVM_config_macros, 0, LLVM)
+HANDLE_DW_AT(0x3e02, LLVM_isysroot, 0, LLVM)
 // Apple extensions.
-HANDLE_DW_AT(0x3fe1, APPLE_optimized)
-HANDLE_DW_AT(0x3fe2, APPLE_flags)
-HANDLE_DW_AT(0x3fe3, APPLE_isa)
-HANDLE_DW_AT(0x3fe4, APPLE_block)
-HANDLE_DW_AT(0x3fe5, APPLE_major_runtime_vers)
-HANDLE_DW_AT(0x3fe6, APPLE_runtime_class)
-HANDLE_DW_AT(0x3fe7, APPLE_omit_frame_ptr)
-HANDLE_DW_AT(0x3fe8, APPLE_property_name)
-HANDLE_DW_AT(0x3fe9, APPLE_property_getter)
-HANDLE_DW_AT(0x3fea, APPLE_property_setter)
-HANDLE_DW_AT(0x3feb, APPLE_property_attribute)
-HANDLE_DW_AT(0x3fec, APPLE_objc_complete_type)
-HANDLE_DW_AT(0x3fed, APPLE_property)
+HANDLE_DW_AT(0x3fe1, APPLE_optimized, 0, APPLE)
+HANDLE_DW_AT(0x3fe2, APPLE_flags, 0, APPLE)
+HANDLE_DW_AT(0x3fe3, APPLE_isa, 0, APPLE)
+HANDLE_DW_AT(0x3fe4, APPLE_block, 0, APPLE)
+HANDLE_DW_AT(0x3fe5, APPLE_major_runtime_vers, 0, APPLE)
+HANDLE_DW_AT(0x3fe6, APPLE_runtime_class, 0, APPLE)
+HANDLE_DW_AT(0x3fe7, APPLE_omit_frame_ptr, 0, APPLE)
+HANDLE_DW_AT(0x3fe8, APPLE_property_name, 0, APPLE)
+HANDLE_DW_AT(0x3fe9, APPLE_property_getter, 0, APPLE)
+HANDLE_DW_AT(0x3fea, APPLE_property_setter, 0, APPLE)
+HANDLE_DW_AT(0x3feb, APPLE_property_attribute, 0, APPLE)
+HANDLE_DW_AT(0x3fec, APPLE_objc_complete_type, 0, APPLE)
+HANDLE_DW_AT(0x3fed, APPLE_property, 0, APPLE)
 
 // Attribute form encodings.
-HANDLE_DW_FORM(0x01, addr)
-HANDLE_DW_FORM(0x03, block2)
-HANDLE_DW_FORM(0x04, block4)
-HANDLE_DW_FORM(0x05, data2)
-HANDLE_DW_FORM(0x06, data4)
-HANDLE_DW_FORM(0x07, data8)
-HANDLE_DW_FORM(0x08, string)
-HANDLE_DW_FORM(0x09, block)
-HANDLE_DW_FORM(0x0a, block1)
-HANDLE_DW_FORM(0x0b, data1)
-HANDLE_DW_FORM(0x0c, flag)
-HANDLE_DW_FORM(0x0d, sdata)
-HANDLE_DW_FORM(0x0e, strp)
-HANDLE_DW_FORM(0x0f, udata)
-HANDLE_DW_FORM(0x10, ref_addr)
-HANDLE_DW_FORM(0x11, ref1)
-HANDLE_DW_FORM(0x12, ref2)
-HANDLE_DW_FORM(0x13, ref4)
-HANDLE_DW_FORM(0x14, ref8)
-HANDLE_DW_FORM(0x15, ref_udata)
-HANDLE_DW_FORM(0x16, indirect)
+HANDLE_DW_FORM(0x01, addr, 2, DWARF)
+HANDLE_DW_FORM(0x03, block2, 2, DWARF)
+HANDLE_DW_FORM(0x04, block4, 2, DWARF)
+HANDLE_DW_FORM(0x05, data2, 2, DWARF)
+HANDLE_DW_FORM(0x06, data4, 2, DWARF)
+HANDLE_DW_FORM(0x07, data8, 2, DWARF)
+HANDLE_DW_FORM(0x08, string, 2, DWARF)
+HANDLE_DW_FORM(0x09, block, 2, DWARF)
+HANDLE_DW_FORM(0x0a, block1, 2, DWARF)
+HANDLE_DW_FORM(0x0b, data1, 2, DWARF)
+HANDLE_DW_FORM(0x0c, flag, 2, DWARF)
+HANDLE_DW_FORM(0x0d, sdata, 2, DWARF)
+HANDLE_DW_FORM(0x0e, strp, 2, DWARF)
+HANDLE_DW_FORM(0x0f, udata, 2, DWARF)
+HANDLE_DW_FORM(0x10, ref_addr, 2, DWARF)
+HANDLE_DW_FORM(0x11, ref1, 2, DWARF)
+HANDLE_DW_FORM(0x12, ref2, 2, DWARF)
+HANDLE_DW_FORM(0x13, ref4, 2, DWARF)
+HANDLE_DW_FORM(0x14, ref8, 2, DWARF)
+HANDLE_DW_FORM(0x15, ref_udata, 2, DWARF)
+HANDLE_DW_FORM(0x16, indirect, 2, DWARF)
 // New in DWARF v4:
-HANDLE_DW_FORM(0x17, sec_offset)
-HANDLE_DW_FORM(0x18, exprloc)
-HANDLE_DW_FORM(0x19, flag_present)
+HANDLE_DW_FORM(0x17, sec_offset, 4, DWARF)
+HANDLE_DW_FORM(0x18, exprloc, 4, DWARF)
+HANDLE_DW_FORM(0x19, flag_present, 4, DWARF)
 // This was defined out of sequence.
-HANDLE_DW_FORM(0x20, ref_sig8)
+HANDLE_DW_FORM(0x20, ref_sig8, 4, DWARF)
 // New in DWARF v5:
-HANDLE_DW_FORM(0x1a, strx)
-HANDLE_DW_FORM(0x1b, addrx)
-HANDLE_DW_FORM(0x1c, ref_sup4)
-HANDLE_DW_FORM(0x1d, strp_sup)
-HANDLE_DW_FORM(0x1e, data16)
-HANDLE_DW_FORM(0x1f, line_strp)
-HANDLE_DW_FORM(0x21, implicit_const)
-HANDLE_DW_FORM(0x22, loclistx)
-HANDLE_DW_FORM(0x23, rnglistx)
-HANDLE_DW_FORM(0x24, ref_sup8)
-HANDLE_DW_FORM(0x25, strx1)
-HANDLE_DW_FORM(0x26, strx2)
-HANDLE_DW_FORM(0x27, strx3)
-HANDLE_DW_FORM(0x28, strx4)
-HANDLE_DW_FORM(0x29, addrx1)
-HANDLE_DW_FORM(0x2a, addrx2)
-HANDLE_DW_FORM(0x2b, addrx3)
-HANDLE_DW_FORM(0x2c, addrx4)
+HANDLE_DW_FORM(0x1a, strx, 5, DWARF)
+HANDLE_DW_FORM(0x1b, addrx, 5, DWARF)
+HANDLE_DW_FORM(0x1c, ref_sup4, 5, DWARF)
+HANDLE_DW_FORM(0x1d, strp_sup, 5, DWARF)
+HANDLE_DW_FORM(0x1e, data16, 5, DWARF)
+HANDLE_DW_FORM(0x1f, line_strp, 5, DWARF)
+HANDLE_DW_FORM(0x21, implicit_const, 5, DWARF)
+HANDLE_DW_FORM(0x22, loclistx, 5, DWARF)
+HANDLE_DW_FORM(0x23, rnglistx, 5, DWARF)
+HANDLE_DW_FORM(0x24, ref_sup8, 5, DWARF)
+HANDLE_DW_FORM(0x25, strx1, 5, DWARF)
+HANDLE_DW_FORM(0x26, strx2, 5, DWARF)
+HANDLE_DW_FORM(0x27, strx3, 5, DWARF)
+HANDLE_DW_FORM(0x28, strx4, 5, DWARF)
+HANDLE_DW_FORM(0x29, addrx1, 5, DWARF)
+HANDLE_DW_FORM(0x2a, addrx2, 5, DWARF)
+HANDLE_DW_FORM(0x2b, addrx3, 5, DWARF)
+HANDLE_DW_FORM(0x2c, addrx4, 5, DWARF)
 // Extensions for Fission proposal
-HANDLE_DW_FORM(0x1f01, GNU_addr_index)
-HANDLE_DW_FORM(0x1f02, GNU_str_index)
+HANDLE_DW_FORM(0x1f01, GNU_addr_index, 0, GNU)
+HANDLE_DW_FORM(0x1f02, GNU_str_index, 0, GNU)
 // Alternate debug sections proposal (output of "dwz" tool).
-HANDLE_DW_FORM(0x1f20, GNU_ref_alt)
-HANDLE_DW_FORM(0x1f21, GNU_strp_alt)
+HANDLE_DW_FORM(0x1f20, GNU_ref_alt, 0, GNU)
+HANDLE_DW_FORM(0x1f21, GNU_strp_alt, 0, GNU)
 
 // DWARF Expression operators.
-HANDLE_DW_OP(0x03, addr)
-HANDLE_DW_OP(0x06, deref)
-HANDLE_DW_OP(0x08, const1u)
-HANDLE_DW_OP(0x09, const1s)
-HANDLE_DW_OP(0x0a, const2u)
-HANDLE_DW_OP(0x0b, const2s)
-HANDLE_DW_OP(0x0c, const4u)
-HANDLE_DW_OP(0x0d, const4s)
-HANDLE_DW_OP(0x0e, const8u)
-HANDLE_DW_OP(0x0f, const8s)
-HANDLE_DW_OP(0x10, constu)
-HANDLE_DW_OP(0x11, consts)
-HANDLE_DW_OP(0x12, dup)
-HANDLE_DW_OP(0x13, drop)
-HANDLE_DW_OP(0x14, over)
-HANDLE_DW_OP(0x15, pick)
-HANDLE_DW_OP(0x16, swap)
-HANDLE_DW_OP(0x17, rot)
-HANDLE_DW_OP(0x18, xderef)
-HANDLE_DW_OP(0x19, abs)
-HANDLE_DW_OP(0x1a, and)
-HANDLE_DW_OP(0x1b, div)
-HANDLE_DW_OP(0x1c, minus)
-HANDLE_DW_OP(0x1d, mod)
-HANDLE_DW_OP(0x1e, mul)
-HANDLE_DW_OP(0x1f, neg)
-HANDLE_DW_OP(0x20, not)
-HANDLE_DW_OP(0x21, or)
-HANDLE_DW_OP(0x22, plus)
-HANDLE_DW_OP(0x23, plus_uconst)
-HANDLE_DW_OP(0x24, shl)
-HANDLE_DW_OP(0x25, shr)
-HANDLE_DW_OP(0x26, shra)
-HANDLE_DW_OP(0x27, xor)
-HANDLE_DW_OP(0x28, bra)
-HANDLE_DW_OP(0x29, eq)
-HANDLE_DW_OP(0x2a, ge)
-HANDLE_DW_OP(0x2b, gt)
-HANDLE_DW_OP(0x2c, le)
-HANDLE_DW_OP(0x2d, lt)
-HANDLE_DW_OP(0x2e, ne)
-HANDLE_DW_OP(0x2f, skip)
-HANDLE_DW_OP(0x30, lit0)
-HANDLE_DW_OP(0x31, lit1)
-HANDLE_DW_OP(0x32, lit2)
-HANDLE_DW_OP(0x33, lit3)
-HANDLE_DW_OP(0x34, lit4)
-HANDLE_DW_OP(0x35, lit5)
-HANDLE_DW_OP(0x36, lit6)
-HANDLE_DW_OP(0x37, lit7)
-HANDLE_DW_OP(0x38, lit8)
-HANDLE_DW_OP(0x39, lit9)
-HANDLE_DW_OP(0x3a, lit10)
-HANDLE_DW_OP(0x3b, lit11)
-HANDLE_DW_OP(0x3c, lit12)
-HANDLE_DW_OP(0x3d, lit13)
-HANDLE_DW_OP(0x3e, lit14)
-HANDLE_DW_OP(0x3f, lit15)
-HANDLE_DW_OP(0x40, lit16)
-HANDLE_DW_OP(0x41, lit17)
-HANDLE_DW_OP(0x42, lit18)
-HANDLE_DW_OP(0x43, lit19)
-HANDLE_DW_OP(0x44, lit20)
-HANDLE_DW_OP(0x45, lit21)
-HANDLE_DW_OP(0x46, lit22)
-HANDLE_DW_OP(0x47, lit23)
-HANDLE_DW_OP(0x48, lit24)
-HANDLE_DW_OP(0x49, lit25)
-HANDLE_DW_OP(0x4a, lit26)
-HANDLE_DW_OP(0x4b, lit27)
-HANDLE_DW_OP(0x4c, lit28)
-HANDLE_DW_OP(0x4d, lit29)
-HANDLE_DW_OP(0x4e, lit30)
-HANDLE_DW_OP(0x4f, lit31)
-HANDLE_DW_OP(0x50, reg0)
-HANDLE_DW_OP(0x51, reg1)
-HANDLE_DW_OP(0x52, reg2)
-HANDLE_DW_OP(0x53, reg3)
-HANDLE_DW_OP(0x54, reg4)
-HANDLE_DW_OP(0x55, reg5)
-HANDLE_DW_OP(0x56, reg6)
-HANDLE_DW_OP(0x57, reg7)
-HANDLE_DW_OP(0x58, reg8)
-HANDLE_DW_OP(0x59, reg9)
-HANDLE_DW_OP(0x5a, reg10)
-HANDLE_DW_OP(0x5b, reg11)
-HANDLE_DW_OP(0x5c, reg12)
-HANDLE_DW_OP(0x5d, reg13)
-HANDLE_DW_OP(0x5e, reg14)
-HANDLE_DW_OP(0x5f, reg15)
-HANDLE_DW_OP(0x60, reg16)
-HANDLE_DW_OP(0x61, reg17)
-HANDLE_DW_OP(0x62, reg18)
-HANDLE_DW_OP(0x63, reg19)
-HANDLE_DW_OP(0x64, reg20)
-HANDLE_DW_OP(0x65, reg21)
-HANDLE_DW_OP(0x66, reg22)
-HANDLE_DW_OP(0x67, reg23)
-HANDLE_DW_OP(0x68, reg24)
-HANDLE_DW_OP(0x69, reg25)
-HANDLE_DW_OP(0x6a, reg26)
-HANDLE_DW_OP(0x6b, reg27)
-HANDLE_DW_OP(0x6c, reg28)
-HANDLE_DW_OP(0x6d, reg29)
-HANDLE_DW_OP(0x6e, reg30)
-HANDLE_DW_OP(0x6f, reg31)
-HANDLE_DW_OP(0x70, breg0)
-HANDLE_DW_OP(0x71, breg1)
-HANDLE_DW_OP(0x72, breg2)
-HANDLE_DW_OP(0x73, breg3)
-HANDLE_DW_OP(0x74, breg4)
-HANDLE_DW_OP(0x75, breg5)
-HANDLE_DW_OP(0x76, breg6)
-HANDLE_DW_OP(0x77, breg7)
-HANDLE_DW_OP(0x78, breg8)
-HANDLE_DW_OP(0x79, breg9)
-HANDLE_DW_OP(0x7a, breg10)
-HANDLE_DW_OP(0x7b, breg11)
-HANDLE_DW_OP(0x7c, breg12)
-HANDLE_DW_OP(0x7d, breg13)
-HANDLE_DW_OP(0x7e, breg14)
-HANDLE_DW_OP(0x7f, breg15)
-HANDLE_DW_OP(0x80, breg16)
-HANDLE_DW_OP(0x81, breg17)
-HANDLE_DW_OP(0x82, breg18)
-HANDLE_DW_OP(0x83, breg19)
-HANDLE_DW_OP(0x84, breg20)
-HANDLE_DW_OP(0x85, breg21)
-HANDLE_DW_OP(0x86, breg22)
-HANDLE_DW_OP(0x87, breg23)
-HANDLE_DW_OP(0x88, breg24)
-HANDLE_DW_OP(0x89, breg25)
-HANDLE_DW_OP(0x8a, breg26)
-HANDLE_DW_OP(0x8b, breg27)
-HANDLE_DW_OP(0x8c, breg28)
-HANDLE_DW_OP(0x8d, breg29)
-HANDLE_DW_OP(0x8e, breg30)
-HANDLE_DW_OP(0x8f, breg31)
-HANDLE_DW_OP(0x90, regx)
-HANDLE_DW_OP(0x91, fbreg)
-HANDLE_DW_OP(0x92, bregx)
-HANDLE_DW_OP(0x93, piece)
-HANDLE_DW_OP(0x94, deref_size)
-HANDLE_DW_OP(0x95, xderef_size)
-HANDLE_DW_OP(0x96, nop)
+HANDLE_DW_OP(0x03, addr, 2, DWARF)
+HANDLE_DW_OP(0x06, deref, 2, DWARF)
+HANDLE_DW_OP(0x08, const1u, 2, DWARF)
+HANDLE_DW_OP(0x09, const1s, 2, DWARF)
+HANDLE_DW_OP(0x0a, const2u, 2, DWARF)
+HANDLE_DW_OP(0x0b, const2s, 2, DWARF)
+HANDLE_DW_OP(0x0c, const4u, 2, DWARF)
+HANDLE_DW_OP(0x0d, const4s, 2, DWARF)
+HANDLE_DW_OP(0x0e, const8u, 2, DWARF)
+HANDLE_DW_OP(0x0f, const8s, 2, DWARF)
+HANDLE_DW_OP(0x10, constu, 2, DWARF)
+HANDLE_DW_OP(0x11, consts, 2, DWARF)
+HANDLE_DW_OP(0x12, dup, 2, DWARF)
+HANDLE_DW_OP(0x13, drop, 2, DWARF)
+HANDLE_DW_OP(0x14, over, 2, DWARF)
+HANDLE_DW_OP(0x15, pick, 2, DWARF)
+HANDLE_DW_OP(0x16, swap, 2, DWARF)
+HANDLE_DW_OP(0x17, rot, 2, DWARF)
+HANDLE_DW_OP(0x18, xderef, 2, DWARF)
+HANDLE_DW_OP(0x19, abs, 2, DWARF)
+HANDLE_DW_OP(0x1a, and, 2, DWARF)
+HANDLE_DW_OP(0x1b, div, 2, DWARF)
+HANDLE_DW_OP(0x1c, minus, 2, DWARF)
+HANDLE_DW_OP(0x1d, mod, 2, DWARF)
+HANDLE_DW_OP(0x1e, mul, 2, DWARF)
+HANDLE_DW_OP(0x1f, neg, 2, DWARF)
+HANDLE_DW_OP(0x20, not, 2, DWARF)
+HANDLE_DW_OP(0x21, or, 2, DWARF)
+HANDLE_DW_OP(0x22, plus, 2, DWARF)
+HANDLE_DW_OP(0x23, plus_uconst, 2, DWARF)
+HANDLE_DW_OP(0x24, shl, 2, DWARF)
+HANDLE_DW_OP(0x25, shr, 2, DWARF)
+HANDLE_DW_OP(0x26, shra, 2, DWARF)
+HANDLE_DW_OP(0x27, xor, 2, DWARF)
+HANDLE_DW_OP(0x28, bra, 2, DWARF)
+HANDLE_DW_OP(0x29, eq, 2, DWARF)
+HANDLE_DW_OP(0x2a, ge, 2, DWARF)
+HANDLE_DW_OP(0x2b, gt, 2, DWARF)
+HANDLE_DW_OP(0x2c, le, 2, DWARF)
+HANDLE_DW_OP(0x2d, lt, 2, DWARF)
+HANDLE_DW_OP(0x2e, ne, 2, DWARF)
+HANDLE_DW_OP(0x2f, skip, 2, DWARF)
+HANDLE_DW_OP(0x30, lit0, 2, DWARF)
+HANDLE_DW_OP(0x31, lit1, 2, DWARF)
+HANDLE_DW_OP(0x32, lit2, 2, DWARF)
+HANDLE_DW_OP(0x33, lit3, 2, DWARF)
+HANDLE_DW_OP(0x34, lit4, 2, DWARF)
+HANDLE_DW_OP(0x35, lit5, 2, DWARF)
+HANDLE_DW_OP(0x36, lit6, 2, DWARF)
+HANDLE_DW_OP(0x37, lit7, 2, DWARF)
+HANDLE_DW_OP(0x38, lit8, 2, DWARF)
+HANDLE_DW_OP(0x39, lit9, 2, DWARF)
+HANDLE_DW_OP(0x3a, lit10, 2, DWARF)
+HANDLE_DW_OP(0x3b, lit11, 2, DWARF)
+HANDLE_DW_OP(0x3c, lit12, 2, DWARF)
+HANDLE_DW_OP(0x3d, lit13, 2, DWARF)
+HANDLE_DW_OP(0x3e, lit14, 2, DWARF)
+HANDLE_DW_OP(0x3f, lit15, 2, DWARF)
+HANDLE_DW_OP(0x40, lit16, 2, DWARF)
+HANDLE_DW_OP(0x41, lit17, 2, DWARF)
+HANDLE_DW_OP(0x42, lit18, 2, DWARF)
+HANDLE_DW_OP(0x43, lit19, 2, DWARF)
+HANDLE_DW_OP(0x44, lit20, 2, DWARF)
+HANDLE_DW_OP(0x45, lit21, 2, DWARF)
+HANDLE_DW_OP(0x46, lit22, 2, DWARF)
+HANDLE_DW_OP(0x47, lit23, 2, DWARF)
+HANDLE_DW_OP(0x48, lit24, 2, DWARF)
+HANDLE_DW_OP(0x49, lit25, 2, DWARF)
+HANDLE_DW_OP(0x4a, lit26, 2, DWARF)
+HANDLE_DW_OP(0x4b, lit27, 2, DWARF)
+HANDLE_DW_OP(0x4c, lit28, 2, DWARF)
+HANDLE_DW_OP(0x4d, lit29, 2, DWARF)
+HANDLE_DW_OP(0x4e, lit30, 2, DWARF)
+HANDLE_DW_OP(0x4f, lit31, 2, DWARF)
+HANDLE_DW_OP(0x50, reg0, 2, DWARF)
+HANDLE_DW_OP(0x51, reg1, 2, DWARF)
+HANDLE_DW_OP(0x52, reg2, 2, DWARF)
+HANDLE_DW_OP(0x53, reg3, 2, DWARF)
+HANDLE_DW_OP(0x54, reg4, 2, DWARF)
+HANDLE_DW_OP(0x55, reg5, 2, DWARF)
+HANDLE_DW_OP(0x56, reg6, 2, DWARF)
+HANDLE_DW_OP(0x57, reg7, 2, DWARF)
+HANDLE_DW_OP(0x58, reg8, 2, DWARF)
+HANDLE_DW_OP(0x59, reg9, 2, DWARF)
+HANDLE_DW_OP(0x5a, reg10, 2, DWARF)
+HANDLE_DW_OP(0x5b, reg11, 2, DWARF)
+HANDLE_DW_OP(0x5c, reg12, 2, DWARF)
+HANDLE_DW_OP(0x5d, reg13, 2, DWARF)
+HANDLE_DW_OP(0x5e, reg14, 2, DWARF)
+HANDLE_DW_OP(0x5f, reg15, 2, DWARF)
+HANDLE_DW_OP(0x60, reg16, 2, DWARF)
+HANDLE_DW_OP(0x61, reg17, 2, DWARF)
+HANDLE_DW_OP(0x62, reg18, 2, DWARF)
+HANDLE_DW_OP(0x63, reg19, 2, DWARF)
+HANDLE_DW_OP(0x64, reg20, 2, DWARF)
+HANDLE_DW_OP(0x65, reg21, 2, DWARF)
+HANDLE_DW_OP(0x66, reg22, 2, DWARF)
+HANDLE_DW_OP(0x67, reg23, 2, DWARF)
+HANDLE_DW_OP(0x68, reg24, 2, DWARF)
+HANDLE_DW_OP(0x69, reg25, 2, DWARF)
+HANDLE_DW_OP(0x6a, reg26, 2, DWARF)
+HANDLE_DW_OP(0x6b, reg27, 2, DWARF)
+HANDLE_DW_OP(0x6c, reg28, 2, DWARF)
+HANDLE_DW_OP(0x6d, reg29, 2, DWARF)
+HANDLE_DW_OP(0x6e, reg30, 2, DWARF)
+HANDLE_DW_OP(0x6f, reg31, 2, DWARF)
+HANDLE_DW_OP(0x70, breg0, 2, DWARF)
+HANDLE_DW_OP(0x71, breg1, 2, DWARF)
+HANDLE_DW_OP(0x72, breg2, 2, DWARF)
+HANDLE_DW_OP(0x73, breg3, 2, DWARF)
+HANDLE_DW_OP(0x74, breg4, 2, DWARF)
+HANDLE_DW_OP(0x75, breg5, 2, DWARF)
+HANDLE_DW_OP(0x76, breg6, 2, DWARF)
+HANDLE_DW_OP(0x77, breg7, 2, DWARF)
+HANDLE_DW_OP(0x78, breg8, 2, DWARF)
+HANDLE_DW_OP(0x79, breg9, 2, DWARF)
+HANDLE_DW_OP(0x7a, breg10, 2, DWARF)
+HANDLE_DW_OP(0x7b, breg11, 2, DWARF)
+HANDLE_DW_OP(0x7c, breg12, 2, DWARF)
+HANDLE_DW_OP(0x7d, breg13, 2, DWARF)
+HANDLE_DW_OP(0x7e, breg14, 2, DWARF)
+HANDLE_DW_OP(0x7f, breg15, 2, DWARF)
+HANDLE_DW_OP(0x80, breg16, 2, DWARF)
+HANDLE_DW_OP(0x81, breg17, 2, DWARF)
+HANDLE_DW_OP(0x82, breg18, 2, DWARF)
+HANDLE_DW_OP(0x83, breg19, 2, DWARF)
+HANDLE_DW_OP(0x84, breg20, 2, DWARF)
+HANDLE_DW_OP(0x85, breg21, 2, DWARF)
+HANDLE_DW_OP(0x86, breg22, 2, DWARF)
+HANDLE_DW_OP(0x87, breg23, 2, DWARF)
+HANDLE_DW_OP(0x88, breg24, 2, DWARF)
+HANDLE_DW_OP(0x89, breg25, 2, DWARF)
+HANDLE_DW_OP(0x8a, breg26, 2, DWARF)
+HANDLE_DW_OP(0x8b, breg27, 2, DWARF)
+HANDLE_DW_OP(0x8c, breg28, 2, DWARF)
+HANDLE_DW_OP(0x8d, breg29, 2, DWARF)
+HANDLE_DW_OP(0x8e, breg30, 2, DWARF)
+HANDLE_DW_OP(0x8f, breg31, 2, DWARF)
+HANDLE_DW_OP(0x90, regx, 2, DWARF)
+HANDLE_DW_OP(0x91, fbreg, 2, DWARF)
+HANDLE_DW_OP(0x92, bregx, 2, DWARF)
+HANDLE_DW_OP(0x93, piece, 2, DWARF)
+HANDLE_DW_OP(0x94, deref_size, 2, DWARF)
+HANDLE_DW_OP(0x95, xderef_size, 2, DWARF)
+HANDLE_DW_OP(0x96, nop, 2, DWARF)
 // New in DWARF v3:
-HANDLE_DW_OP(0x97, push_object_address)
-HANDLE_DW_OP(0x98, call2)
-HANDLE_DW_OP(0x99, call4)
-HANDLE_DW_OP(0x9a, call_ref)
-HANDLE_DW_OP(0x9b, form_tls_address)
-HANDLE_DW_OP(0x9c, call_frame_cfa)
-HANDLE_DW_OP(0x9d, bit_piece)
+HANDLE_DW_OP(0x97, push_object_address, 3, DWARF)
+HANDLE_DW_OP(0x98, call2, 3, DWARF)
+HANDLE_DW_OP(0x99, call4, 3, DWARF)
+HANDLE_DW_OP(0x9a, call_ref, 3, DWARF)
+HANDLE_DW_OP(0x9b, form_tls_address, 3, DWARF)
+HANDLE_DW_OP(0x9c, call_frame_cfa, 3, DWARF)
+HANDLE_DW_OP(0x9d, bit_piece, 3, DWARF)
 // New in DWARF v4:
-HANDLE_DW_OP(0x9e, implicit_value)
-HANDLE_DW_OP(0x9f, stack_value)
+HANDLE_DW_OP(0x9e, implicit_value, 4, DWARF)
+HANDLE_DW_OP(0x9f, stack_value, 4, DWARF)
 // New in DWARF v5:
-HANDLE_DW_OP(0xa0, implicit_pointer)
-HANDLE_DW_OP(0xa1, addrx)
-HANDLE_DW_OP(0xa2, constx)
-HANDLE_DW_OP(0xa3, entry_value)
-HANDLE_DW_OP(0xa4, const_type)
-HANDLE_DW_OP(0xa5, regval_type)
-HANDLE_DW_OP(0xa6, deref_type)
-HANDLE_DW_OP(0xa7, xderef_type)
-HANDLE_DW_OP(0xa8, convert)
-HANDLE_DW_OP(0xa9, reinterpret)
+HANDLE_DW_OP(0xa0, implicit_pointer, 5, DWARF)
+HANDLE_DW_OP(0xa1, addrx, 5, DWARF)
+HANDLE_DW_OP(0xa2, constx, 5, DWARF)
+HANDLE_DW_OP(0xa3, entry_value, 5, DWARF)
+HANDLE_DW_OP(0xa4, const_type, 5, DWARF)
+HANDLE_DW_OP(0xa5, regval_type, 5, DWARF)
+HANDLE_DW_OP(0xa6, deref_type, 5, DWARF)
+HANDLE_DW_OP(0xa7, xderef_type, 5, DWARF)
+HANDLE_DW_OP(0xa8, convert, 5, DWARF)
+HANDLE_DW_OP(0xa9, reinterpret, 5, DWARF)
 // Vendor extensions:
 // Extensions for GNU-style thread-local storage.
-HANDLE_DW_OP(0xe0, GNU_push_tls_address)
+HANDLE_DW_OP(0xe0, GNU_push_tls_address, 0, GNU)
 // Extensions for Fission proposal.
-HANDLE_DW_OP(0xfb, GNU_addr_index)
-HANDLE_DW_OP(0xfc, GNU_const_index)
+HANDLE_DW_OP(0xfb, GNU_addr_index, 0, GNU)
+HANDLE_DW_OP(0xfc, GNU_const_index, 0, GNU)
 
 // DWARF languages.
-HANDLE_DW_LANG(0x0001, C89)
-HANDLE_DW_LANG(0x0002, C)
-HANDLE_DW_LANG(0x0003, Ada83)
-HANDLE_DW_LANG(0x0004, C_plus_plus)
-HANDLE_DW_LANG(0x0005, Cobol74)
-HANDLE_DW_LANG(0x0006, Cobol85)
-HANDLE_DW_LANG(0x0007, Fortran77)
-HANDLE_DW_LANG(0x0008, Fortran90)
-HANDLE_DW_LANG(0x0009, Pascal83)
-HANDLE_DW_LANG(0x000a, Modula2)
+HANDLE_DW_LANG(0x0001, C89, 2, DWARF)
+HANDLE_DW_LANG(0x0002, C, 2, DWARF)
+HANDLE_DW_LANG(0x0003, Ada83, 2, DWARF)
+HANDLE_DW_LANG(0x0004, C_plus_plus, 2, DWARF)
+HANDLE_DW_LANG(0x0005, Cobol74, 2, DWARF)
+HANDLE_DW_LANG(0x0006, Cobol85, 2, DWARF)
+HANDLE_DW_LANG(0x0007, Fortran77, 2, DWARF)
+HANDLE_DW_LANG(0x0008, Fortran90, 2, DWARF)
+HANDLE_DW_LANG(0x0009, Pascal83, 2, DWARF)
+HANDLE_DW_LANG(0x000a, Modula2, 2, DWARF)
 // New in DWARF v3:
-HANDLE_DW_LANG(0x000b, Java)
-HANDLE_DW_LANG(0x000c, C99)
-HANDLE_DW_LANG(0x000d, Ada95)
-HANDLE_DW_LANG(0x000e, Fortran95)
-HANDLE_DW_LANG(0x000f, PLI)
-HANDLE_DW_LANG(0x0010, ObjC)
-HANDLE_DW_LANG(0x0011, ObjC_plus_plus)
-HANDLE_DW_LANG(0x0012, UPC)
-HANDLE_DW_LANG(0x0013, D)
+HANDLE_DW_LANG(0x000b, Java, 3, DWARF)
+HANDLE_DW_LANG(0x000c, C99, 3, DWARF)
+HANDLE_DW_LANG(0x000d, Ada95, 3, DWARF)
+HANDLE_DW_LANG(0x000e, Fortran95, 3, DWARF)
+HANDLE_DW_LANG(0x000f, PLI, 3, DWARF)
+HANDLE_DW_LANG(0x0010, ObjC, 3, DWARF)
+HANDLE_DW_LANG(0x0011, ObjC_plus_plus, 3, DWARF)
+HANDLE_DW_LANG(0x0012, UPC, 3, DWARF)
+HANDLE_DW_LANG(0x0013, D, 3, DWARF)
 // New in DWARF v4:
-HANDLE_DW_LANG(0x0014, Python)
+HANDLE_DW_LANG(0x0014, Python, 4, DWARF)
 // New in DWARF v5:
-HANDLE_DW_LANG(0x0015, OpenCL)
-HANDLE_DW_LANG(0x0016, Go)
-HANDLE_DW_LANG(0x0017, Modula3)
-HANDLE_DW_LANG(0x0018, Haskell)
-HANDLE_DW_LANG(0x0019, C_plus_plus_03)
-HANDLE_DW_LANG(0x001a, C_plus_plus_11)
-HANDLE_DW_LANG(0x001b, OCaml)
-HANDLE_DW_LANG(0x001c, Rust)
-HANDLE_DW_LANG(0x001d, C11)
-HANDLE_DW_LANG(0x001e, Swift)
-HANDLE_DW_LANG(0x001f, Julia)
-HANDLE_DW_LANG(0x0020, Dylan)
-HANDLE_DW_LANG(0x0021, C_plus_plus_14)
-HANDLE_DW_LANG(0x0022, Fortran03)
-HANDLE_DW_LANG(0x0023, Fortran08)
-HANDLE_DW_LANG(0x0024, RenderScript)
-HANDLE_DW_LANG(0x0025, BLISS)
+HANDLE_DW_LANG(0x0015, OpenCL, 5, DWARF)
+HANDLE_DW_LANG(0x0016, Go, 5, DWARF)
+HANDLE_DW_LANG(0x0017, Modula3, 5, DWARF)
+HANDLE_DW_LANG(0x0018, Haskell, 5, DWARF)
+HANDLE_DW_LANG(0x0019, C_plus_plus_03, 5, DWARF)
+HANDLE_DW_LANG(0x001a, C_plus_plus_11, 5, DWARF)
+HANDLE_DW_LANG(0x001b, OCaml, 5, DWARF)
+HANDLE_DW_LANG(0x001c, Rust, 5, DWARF)
+HANDLE_DW_LANG(0x001d, C11, 5, DWARF)
+HANDLE_DW_LANG(0x001e, Swift, 5, DWARF)
+HANDLE_DW_LANG(0x001f, Julia, 5, DWARF)
+HANDLE_DW_LANG(0x0020, Dylan, 5, DWARF)
+HANDLE_DW_LANG(0x0021, C_plus_plus_14, 5, DWARF)
+HANDLE_DW_LANG(0x0022, Fortran03, 5, DWARF)
+HANDLE_DW_LANG(0x0023, Fortran08, 5, DWARF)
+HANDLE_DW_LANG(0x0024, RenderScript, 5, DWARF)
+HANDLE_DW_LANG(0x0025, BLISS, 5, DWARF)
 // Vendor extensions:
-HANDLE_DW_LANG(0x8001, Mips_Assembler)
-HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript)
-HANDLE_DW_LANG(0xb000, BORLAND_Delphi)
+HANDLE_DW_LANG(0x8001, Mips_Assembler, 0, MIPS)
+HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript, 0, GOOGLE)
+HANDLE_DW_LANG(0xb000, BORLAND_Delphi, 0, BORLAND)
 
 // DWARF attribute type encodings.
-HANDLE_DW_ATE(0x01, address)
-HANDLE_DW_ATE(0x02, boolean)
-HANDLE_DW_ATE(0x03, complex_float)
-HANDLE_DW_ATE(0x04, float)
-HANDLE_DW_ATE(0x05, signed)
-HANDLE_DW_ATE(0x06, signed_char)
-HANDLE_DW_ATE(0x07, unsigned)
-HANDLE_DW_ATE(0x08, unsigned_char)
+HANDLE_DW_ATE(0x01, address, 2, DWARF)
+HANDLE_DW_ATE(0x02, boolean, 2, DWARF)
+HANDLE_DW_ATE(0x03, complex_float, 2, DWARF)
+HANDLE_DW_ATE(0x04, float, 2, DWARF)
+HANDLE_DW_ATE(0x05, signed, 2, DWARF)
+HANDLE_DW_ATE(0x06, signed_char, 2, DWARF)
+HANDLE_DW_ATE(0x07, unsigned, 2, DWARF)
+HANDLE_DW_ATE(0x08, unsigned_char, 2, DWARF)
 // New in DWARF v3:
-HANDLE_DW_ATE(0x09, imaginary_float)
-HANDLE_DW_ATE(0x0a, packed_decimal)
-HANDLE_DW_ATE(0x0b, numeric_string)
-HANDLE_DW_ATE(0x0c, edited)
-HANDLE_DW_ATE(0x0d, signed_fixed)
-HANDLE_DW_ATE(0x0e, unsigned_fixed)
-HANDLE_DW_ATE(0x0f, decimal_float)
+HANDLE_DW_ATE(0x09, imaginary_float, 3, DWARF)
+HANDLE_DW_ATE(0x0a, packed_decimal, 3, DWARF)
+HANDLE_DW_ATE(0x0b, numeric_string, 3, DWARF)
+HANDLE_DW_ATE(0x0c, edited, 3, DWARF)
+HANDLE_DW_ATE(0x0d, signed_fixed, 3, DWARF)
+HANDLE_DW_ATE(0x0e, unsigned_fixed, 3, DWARF)
+HANDLE_DW_ATE(0x0f, decimal_float, 3, DWARF)
 // New in DWARF v4:
-HANDLE_DW_ATE(0x10, UTF)
+HANDLE_DW_ATE(0x10, UTF, 4, DWARF)
 // New in DWARF v5:
-HANDLE_DW_ATE(0x11, UCS)
-HANDLE_DW_ATE(0x12, ASCII)
+HANDLE_DW_ATE(0x11, UCS, 5, DWARF)
+HANDLE_DW_ATE(0x12, ASCII, 5, DWARF)
 
 // DWARF virtuality codes.
 HANDLE_DW_VIRTUALITY(0x00, none)
Index: include/llvm/Support/Recycler.h
===================================================================
--- include/llvm/Support/Recycler.h
+++ include/llvm/Support/Recycler.h
@@ -42,13 +42,16 @@
 
   FreeNode *pop_val() {
     auto *Val = FreeList;
+    __asan_unpoison_memory_region(Val, Size);
     FreeList = FreeList->Next;
+    __msan_allocated_memory(Val, Size);
     return Val;
   }
 
   void push(FreeNode *N) {
     N->Next = FreeList;
     FreeList = N;
+    __asan_poison_memory_region(N, Size);
   }
 
 public:
Index: include/llvm/Target/TargetLowering.h
===================================================================
--- include/llvm/Target/TargetLowering.h
+++ include/llvm/Target/TargetLowering.h
@@ -230,6 +230,12 @@
     return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
   }
 
+  /// Return the type for frame index, which is determined by
+  /// the alloca address space specified through the data layout.
+  MVT getFrameIndexTy(const DataLayout &DL) const {
+    return getPointerTy(DL, DL.getAllocaAddrSpace());
+  }
+
   /// EVT is not used in-tree, but is used by out-of-tree target.
   /// A documentation for this function would be nice...
   virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
@@ -2382,30 +2388,39 @@
       New = N;
       return true;
     }
-
-    /// Check to see if the specified operand of the specified instruction is a
-    /// constant integer.  If so, check to see if there are any bits set in the
-    /// constant that are not demanded.  If so, shrink the constant and return
-    /// true.
-    bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded);
-
-    /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.  This
-    /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
-    /// generalized for targets with other types of implicit widening casts.
-    bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded,
-                          const SDLoc &dl);
-
-    /// Helper for SimplifyDemandedBits that can simplify an operation with
-    /// multiple uses.  This function uses TLI.SimplifyDemandedBits to
-    /// simplify Operand \p OpIdx of \p User and then updated \p User with
-    /// the simplified version.  No other uses of \p OpIdx are updated.
-    /// If \p User is the only user of \p OpIdx, this function behaves exactly
-    /// like TLI.SimplifyDemandedBits except that it also updates the DAG by
-    /// calling DCI.CommitTargetLoweringOpt.
-    bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
-                              const APInt &Demanded, DAGCombinerInfo &DCI);
   };
 
+  /// Check to see if the specified operand of the specified instruction is a
+  /// constant integer.  If so, check to see if there are any bits set in the
+  /// constant that are not demanded.  If so, shrink the constant and return
+  /// true.
+  bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+                              TargetLoweringOpt &TLO) const;
+
+  // Target hook to do target-specific const optimization, which is called by
+  // ShrinkDemandedConstant. This function should return true if the target
+  // doesn't want ShrinkDemandedConstant to further optimize the constant.
+  virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+                                            TargetLoweringOpt &TLO) const {
+    return false;
+  }
+
+  /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.  This
+  /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
+  /// generalized for targets with other types of implicit widening casts.
+  bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded,
+                        TargetLoweringOpt &TLO) const;
+
+  /// Helper for SimplifyDemandedBits that can simplify an operation with
+  /// multiple uses.  This function simplifies operand \p OpIdx of \p User and
+  /// then updates \p User with the simplified version. No other uses of
+  /// \p OpIdx are updated. If \p User is the only user of \p OpIdx, this
+  /// function behaves exactly like function SimplifyDemandedBits declared
+  /// below except that it also updates the DAG by calling
+  /// DCI.CommitTargetLoweringOpt.
+  bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, const APInt &Demanded,
+                            DAGCombinerInfo &DCI, TargetLoweringOpt &TLO) const;
+
   /// Look at Op.  At this point, we know that only the DemandedMask bits of the
   /// result of Op are ever used downstream.  If we can use this information to
   /// simplify Op, create a new simplified DAG node and return true, returning
Index: include/llvm/Transforms/Scalar/ConstantHoisting.h
===================================================================
--- include/llvm/Transforms/Scalar/ConstantHoisting.h
+++ include/llvm/Transforms/Scalar/ConstantHoisting.h
@@ -36,6 +36,7 @@
 #ifndef LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H
 #define LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H
 
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/PassManager.h"
@@ -98,7 +99,7 @@
 
   // Glue for old PM.
   bool runImpl(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
-               BasicBlock &Entry);
+               BlockFrequencyInfo *BFI, BasicBlock &Entry);
 
   void releaseMemory() {
     ConstantVec.clear();
@@ -112,6 +113,7 @@
 
   const TargetTransformInfo *TTI;
   DominatorTree *DT;
+  BlockFrequencyInfo *BFI;
   BasicBlock *Entry;
 
   /// Keeps track of constant candidates found in the function.
@@ -124,8 +126,8 @@
   SmallVector<consthoist::ConstantInfo, 8> ConstantVec;
 
   Instruction *findMatInsertPt(Instruction *Inst, unsigned Idx = ~0U) const;
-  Instruction *findConstantInsertionPoint(
-      const consthoist::ConstantInfo &ConstInfo) const;
+  SmallPtrSet<Instruction *, 8>
+  findConstantInsertionPoint(const consthoist::ConstantInfo &ConstInfo) const;
   void collectConstantCandidates(ConstCandMapType &ConstCandMap,
                                  Instruction *Inst, unsigned Idx,
                                  ConstantInt *ConstInt);
Index: include/llvm/Transforms/Utils/CodeExtractor.h
===================================================================
--- include/llvm/Transforms/Utils/CodeExtractor.h
+++ include/llvm/Transforms/Utils/CodeExtractor.h
@@ -65,14 +65,6 @@
     /// Blocks containing EHPads, allocas, invokes, or vastarts are not valid.
     static bool isBlockValidForExtraction(const BasicBlock &BB);
 
-    /// \brief Create a code extractor for a single basic block.
-    ///
-    /// In this formation, we don't require a dominator tree. The given basic
-    /// block is set up for extraction.
-    CodeExtractor(BasicBlock *BB, bool AggregateArgs = false,
-                  BlockFrequencyInfo *BFI = nullptr,
-                  BranchProbabilityInfo *BPI = nullptr);
-
     /// \brief Create a code extractor for a sequence of blocks.
     ///
     /// Given a sequence of basic blocks where the first block in the sequence
@@ -91,14 +83,6 @@
                   BlockFrequencyInfo *BFI = nullptr,
                   BranchProbabilityInfo *BPI = nullptr);
 
-    /// \brief Create a code extractor for a region node.
-    ///
-    /// Behaves just like the generic code sequence constructor, but uses the
-    /// block sequence of the region node passed in.
-    CodeExtractor(DominatorTree &DT, const RegionNode &RN,
-                  bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
-                  BranchProbabilityInfo *BPI = nullptr);
-
     /// \brief Perform the extraction, returning the new function.
     ///
     /// Returns zero when called on a CodeExtractor instance where isEligible
Index: lib/Analysis/InstructionSimplify.cpp
===================================================================
--- lib/Analysis/InstructionSimplify.cpp
+++ lib/Analysis/InstructionSimplify.cpp
@@ -568,11 +568,11 @@
       match(Op1, m_Not(m_Specific(Op0))))
     return Constant::getAllOnesValue(Ty);
 
-  // add nsw/nuw (xor Y, signbit), signbit --> Y
+  // add nsw/nuw (xor Y, signmask), signmask --> Y
   // The no-wrapping add guarantees that the top bit will be set by the add.
   // Therefore, the xor must be clearing the already set sign bit of Y.
-  if ((isNSW || isNUW) && match(Op1, m_SignBit()) &&
-      match(Op0, m_Xor(m_Value(Y), m_SignBit())))
+  if ((isNSW || isNUW) && match(Op1, m_SignMask()) &&
+      match(Op0, m_Xor(m_Value(Y), m_SignMask())))
     return Y;
 
   /// i1 add -> xor.
@@ -2819,7 +2819,7 @@
             return ConstantInt::getTrue(RHS->getContext());
         }
       }
-      if (CIVal->isSignBit() && *CI2Val == 1) {
+      if (CIVal->isSignMask() && *CI2Val == 1) {
         if (Pred == ICmpInst::ICMP_UGT)
           return ConstantInt::getFalse(RHS->getContext());
         if (Pred == ICmpInst::ICMP_ULE)
Index: lib/Analysis/MemorySSAUpdater.cpp
===================================================================
--- lib/Analysis/MemorySSAUpdater.cpp
+++ lib/Analysis/MemorySSAUpdater.cpp
@@ -29,7 +29,7 @@
 
 #define DEBUG_TYPE "memoryssa"
 using namespace llvm;
-namespace llvm {
+
 // This is the marker algorithm from "Simple and Efficient Construction of
 // Static Single Assignment Form"
 // The simple, non-marker algorithm places phi nodes at any join
@@ -211,8 +211,8 @@
 }
 
 // Set every incoming edge {BB, MP->getBlock()} of MemoryPhi MP to NewDef.
-void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB,
-                               MemoryAccess *NewDef) {
+static void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB,
+                                      MemoryAccess *NewDef) {
   // Replace any operand with us an incoming block with the new defining
   // access.
   int i = MP->getBasicBlockIndex(BB);
@@ -415,6 +415,7 @@
   }
   return MA;
 }
+
 void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
   assert(!MSSA->isLiveOnEntryDef(MA) &&
          "Trying to remove the live on entry def");
@@ -490,5 +491,3 @@
                               ++InsertPt->getIterator());
   return NewAccess;
 }
-
-} // namespace llvm
Index: lib/Analysis/ScalarEvolution.cpp
===================================================================
--- lib/Analysis/ScalarEvolution.cpp
+++ lib/Analysis/ScalarEvolution.cpp
@@ -4007,9 +4007,9 @@
 
   case Instruction::Xor:
     if (auto *RHSC = dyn_cast<ConstantInt>(Op->getOperand(1)))
-      // If the RHS of the xor is a signbit, then this is just an add.
-      // Instcombine turns add of signbit into xor as a strength reduction step.
-      if (RHSC->getValue().isSignBit())
+      // If the RHS of the xor is a signmask, then this is just an add.
+      // Instcombine turns add of signmask into xor as a strength reduction step.
+      if (RHSC->getValue().isSignMask())
         return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1));
     return BinaryOp(Op);
 
@@ -5328,12 +5328,28 @@
       break;
 
     case Instruction::Or:
-      // Use ValueTracking to check whether this is actually an add.
-      if (haveNoCommonBitsSet(BO->LHS, BO->RHS, getDataLayout(), &AC,
-                              nullptr, &DT)) {
-        // There aren't any common bits set, so the add can't wrap.
-        auto Flags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNSW);
-        return getAddExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags);
+      // If the RHS of the Or is a constant, we may have something like:
+      // X*4+1 which got turned into X*4|1.  Handle this as an Add so loop
+      // optimizations will transparently handle this case.
+      //
+      // In order for this transformation to be safe, the LHS must be of the
+      // form X*(2^n) and the Or constant must be less than 2^n.
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
+        const SCEV *LHS = getSCEV(BO->LHS);
+        const APInt &CIVal = CI->getValue();
+        if (GetMinTrailingZeros(LHS) >=
+            (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
+          // Build a plain add SCEV.
+          const SCEV *S = getAddExpr(LHS, getSCEV(CI));
+          // If the LHS of the add was an addrec and it has no-wrap flags,
+          // transfer the no-wrap flags, since an or won't introduce a wrap.
+          if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
+            const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
+            const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
+                OldAR->getNoWrapFlags());
+          }
+          return S;
+        }
       }
       break;
 
@@ -5369,7 +5385,7 @@
                 // using an add, which is equivalent, and re-apply the zext.
                 APInt Trunc = CI->getValue().trunc(Z0TySize);
                 if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
-                    Trunc.isSignBit())
+                    Trunc.isSignMask())
                   return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
                                            UTy);
               }
Index: lib/Analysis/ValueTracking.cpp
===================================================================
--- lib/Analysis/ValueTracking.cpp
+++ lib/Analysis/ValueTracking.cpp
@@ -778,7 +778,7 @@
   // so this isn't a real bug. On the other hand, the program may have undefined
   // behavior, or we might have a bug in the compiler. We can't assert/crash, so
   // clear out the known bits, try to warn the user, and hope for the best.
-  if ((KnownZero & KnownOne) != 0) {
+  if (KnownZero.intersects(KnownOne)) {
     KnownZero.clearAllBits();
     KnownOne.clearAllBits();
 
@@ -860,7 +860,8 @@
 
   computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
 
-  KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+  KnownZero.setAllBits();
+  KnownOne.setAllBits();
   for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) {
     // Combine the shifted known input bits only for those shift amounts
     // compatible with its known constraints.
@@ -888,7 +889,7 @@
   // return anything we'd like, but we need to make sure the sets of known bits
   // stay disjoint (it should be better for some other code to actually
   // propagate the undef than to pick a value here using known bits).
-  if ((KnownZero & KnownOne) != 0) {
+  if (KnownZero.intersects(KnownOne)) {
     KnownZero.clearAllBits();
     KnownOne.clearAllBits();
   }
@@ -1640,9 +1641,9 @@
   if (match(V, m_Shl(m_One(), m_Value())))
     return true;
 
-  // (signbit) >>l X is clearly a power of two if the one is not shifted off the
-  // bottom.  If it is shifted off the bottom then the result is undefined.
-  if (match(V, m_LShr(m_SignBit(), m_Value())))
+  // (signmask) >>l X is clearly a power of two if the one is not shifted off
+  // the bottom.  If it is shifted off the bottom then the result is undefined.
+  if (match(V, m_LShr(m_SignMask(), m_Value())))
     return true;
 
   // The remaining tests are all recursive, so bail out if we hit the limit.
Index: lib/CodeGen/AsmPrinter/DIE.cpp
===================================================================
--- lib/CodeGen/AsmPrinter/DIE.cpp
+++ lib/CodeGen/AsmPrinter/DIE.cpp
@@ -31,6 +31,8 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "dwarfdebug"
+
 //===----------------------------------------------------------------------===//
 // DIEAbbrevData Implementation
 //===----------------------------------------------------------------------===//
@@ -79,15 +81,22 @@
                     dwarf::AttributeString(AttrData.getAttribute()).data());
 
     // Emit form type.
+#ifndef NDEBUG
+    // Could be an assertion, but this way we can see the failing form code
+    // easily, which helps track down where it came from.
+    if (!dwarf::isValidFormForVersion(AttrData.getForm(),
+                                      AP->getDwarfVersion())) {
+      DEBUG(dbgs() << "Invalid form " << format("0x%x", AttrData.getForm())
+                   << " for DWARF version " << AP->getDwarfVersion() << "\n");
+      llvm_unreachable("Invalid form for specified DWARF version");
+    }
+#endif
     AP->EmitULEB128(AttrData.getForm(),
                     dwarf::FormEncodingString(AttrData.getForm()).data());
 
     // Emit value for DW_FORM_implicit_const.
-    if (AttrData.getForm() == dwarf::DW_FORM_implicit_const) {
-      assert(AP->getDwarfVersion() >= 5 &&
-            "DW_FORM_implicit_const is supported starting from DWARFv5");
+    if (AttrData.getForm() == dwarf::DW_FORM_implicit_const)
       AP->EmitSLEB128(AttrData.getValue());
-    }
   }
 
   // Mark end of abbreviation.
Index: lib/CodeGen/AsmPrinter/DwarfExpression.h
===================================================================
--- lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -72,6 +72,8 @@
   }
   /// Determine whether there are any operations left in this expression.
   operator bool() const { return Start != End; }
+  DIExpression::expr_op_iterator begin() const { return Start; }
+  DIExpression::expr_op_iterator end() const { return End; }
 
   /// Retrieve the fragment information, if any.
   Optional<DIExpression::FragmentInfo> getFragmentInfo() const {
Index: lib/CodeGen/AsmPrinter/DwarfExpression.cpp
===================================================================
--- lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -226,8 +226,15 @@
     return true;
   }
 
-  // FIXME:
   // Don't emit locations that cannot be expressed without DW_OP_stack_value.
+  if (DwarfVersion < 4)
+    if (std::any_of(ExprCursor.begin(), ExprCursor.end(),
+                    [](DIExpression::ExprOperand Op) -> bool {
+                      return Op.getOp() == dwarf::DW_OP_stack_value;
+                    })) {
+      DwarfRegs.clear();
+      return false;
+    }
 
   assert(DwarfRegs.size() == 1);
   auto Reg = DwarfRegs[0];
Index: lib/CodeGen/GlobalISel/Legalizer.cpp
===================================================================
--- lib/CodeGen/GlobalISel/Legalizer.cpp
+++ lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -24,6 +24,8 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 
+#include <iterator>
+
 #define DEBUG_TYPE "legalizer"
 
 using namespace llvm;
@@ -161,7 +163,7 @@
   // convergence for performance reasons.
   bool Changed = false;
   MachineBasicBlock::iterator NextMI;
-  for (auto &MBB : MF)
+  for (auto &MBB : MF) {
     for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) {
       // Get the next Instruction before we try to legalize, because there's a
       // good chance MI will be deleted.
@@ -171,18 +173,21 @@
       // and are assumed to be legal.
       if (!isPreISelGenericOpcode(MI->getOpcode()))
         continue;
+      unsigned NumNewInsns = 0;
       SmallVector<MachineInstr *, 4> WorkList;
-      Helper.MIRBuilder.recordInsertions(
-          [&](MachineInstr *MI) { WorkList.push_back(MI); });
+      Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) {
+        ++NumNewInsns;
+        WorkList.push_back(MI);
+      });
       WorkList.push_back(&*MI);
 
+      bool Changed = false;
       LegalizerHelper::LegalizeResult Res;
       unsigned Idx = 0;
       do {
         Res = Helper.legalizeInstrStep(*WorkList[Idx]);
         // Error out if we couldn't legalize this instruction. We may want to
-        // fall
-        // back to DAG ISel instead in the future.
+        // fall back to DAG ISel instead in the future.
         if (Res == LegalizerHelper::UnableToLegalize) {
           Helper.MIRBuilder.stopRecordingInsertions();
           if (Res == LegalizerHelper::UnableToLegalize) {
@@ -194,10 +199,21 @@
         }
         Changed |= Res == LegalizerHelper::Legalized;
         ++Idx;
+
+#ifndef NDEBUG
+        if (NumNewInsns) {
+          DEBUG(dbgs() << ".. .. Emitted " << NumNewInsns << " insns\n");
+          for (auto I = WorkList.end() - NumNewInsns, E = WorkList.end();
+               I != E; ++I)
+            DEBUG(dbgs() << ".. .. New MI: "; (*I)->print(dbgs()));
+          NumNewInsns = 0;
+        }
+#endif
       } while (Idx < WorkList.size());
 
       Helper.MIRBuilder.stopRecordingInsertions();
     }
+  }
 
   MachineRegisterInfo &MRI = MF.getRegInfo();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
@@ -207,7 +223,11 @@
       // good chance MI will be deleted.
       NextMI = std::next(MI);
 
-      Changed |= combineExtracts(*MI, MRI, TII);
+      // combineExtracts erases MI.
+      if (combineExtracts(*MI, MRI, TII)) {
+        Changed = true;
+        continue;
+      }
       Changed |= combineMerges(*MI, MRI, TII);
     }
   }
Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp
===================================================================
--- lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -24,7 +24,7 @@
 
 #include <sstream>
 
-#define DEBUG_TYPE "legalize-mir"
+#define DEBUG_TYPE "legalizer"
 
 using namespace llvm;
 
@@ -35,24 +35,34 @@
 
 LegalizerHelper::LegalizeResult
 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
+  DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
+
   auto Action = LI.getAction(MI, MRI);
   switch (std::get<0>(Action)) {
   case LegalizerInfo::Legal:
+    DEBUG(dbgs() << ".. Already legal\n");
     return AlreadyLegal;
   case LegalizerInfo::Libcall:
+    DEBUG(dbgs() << ".. Convert to libcall\n");
     return libcall(MI);
   case LegalizerInfo::NarrowScalar:
+    DEBUG(dbgs() << ".. Narrow scalar\n");
     return narrowScalar(MI, std::get<1>(Action), std::get<2>(Action));
   case LegalizerInfo::WidenScalar:
+    DEBUG(dbgs() << ".. Widen scalar\n");
     return widenScalar(MI, std::get<1>(Action), std::get<2>(Action));
   case LegalizerInfo::Lower:
+    DEBUG(dbgs() << ".. Lower\n");
     return lower(MI, std::get<1>(Action), std::get<2>(Action));
   case LegalizerInfo::FewerElements:
+    DEBUG(dbgs() << ".. Reduce number of elements\n");
     return fewerElementsVector(MI, std::get<1>(Action), std::get<2>(Action));
   case LegalizerInfo::Custom:
+    DEBUG(dbgs() << ".. Custom legalization\n");
     return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized
                                                   : UnableToLegalize;
   default:
+    DEBUG(dbgs() << ".. Unable to legalize\n");
     return UnableToLegalize;
   }
 }
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2146,7 +2146,7 @@
     if (N->getFlags()->hasNoUnsignedWrap())
       return N0;
 
-    if (DAG.MaskedValueIsZero(N1, ~APInt::getSignBit(BitWidth))) {
+    if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
       // N1 must be 0 because negating the minimum signed value is undefined.
       if (N->getFlags()->hasNoSignedWrap())
@@ -8298,11 +8298,11 @@
   switch (N0.getOpcode()) {
   case ISD::AND:
     FPOpcode = ISD::FABS;
-    SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits());
+    SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
     break;
   case ISD::XOR:
     FPOpcode = ISD::FNEG;
-    SignMask = APInt::getSignBit(SourceVT.getSizeInBits());
+    SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
     break;
   // TODO: ISD::OR --> ISD::FNABS?
   default:
@@ -8413,7 +8413,7 @@
     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
       assert(VT.getSizeInBits() == 128);
       SDValue SignBit = DAG.getConstant(
-          APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
+          APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
       SDValue FlipBit;
       if (N0.getOpcode() == ISD::FNEG) {
         FlipBit = SignBit;
@@ -8433,7 +8433,7 @@
       AddToWorklist(FlipBits.getNode());
       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
     }
-    APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+    APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
     if (N0.getOpcode() == ISD::FNEG)
       return DAG.getNode(ISD::XOR, DL, VT,
                          NewConv, DAG.getConstant(SignBit, DL, VT));
@@ -8481,7 +8481,7 @@
       }
 
       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
-        APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
+        APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
         AddToWorklist(Cst.getNode());
         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
@@ -8502,7 +8502,7 @@
         AddToWorklist(FlipBits.getNode());
         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
       }
-      APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+      APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
       AddToWorklist(X.getNode());
@@ -10313,11 +10313,11 @@
       if (N0.getValueType().isVector()) {
         // For a vector, get a mask such as 0x80... per scalar element
         // and splat it.
-        SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits());
+        SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
       } else {
         // For a scalar, just generate 0x80...
-        SignMask = APInt::getSignBit(IntVT.getSizeInBits());
+        SignMask = APInt::getSignMask(IntVT.getSizeInBits());
       }
       SDLoc DL0(N0);
       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
@@ -10418,11 +10418,11 @@
       if (N0.getValueType().isVector()) {
         // For a vector, get a mask such as 0x7f... per scalar element
         // and splat it.
-        SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits());
+        SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
       } else {
         // For a scalar, just generate 0x7f...
-        SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
+        SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
       }
       SDLoc DL(N0);
       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
@@ -16036,7 +16036,7 @@
 
 /// Return true if base is a frame index, which is known not to alias with
 /// anything but itself.  Provides base object and offset as results.
-static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
+static bool findBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
                            const GlobalValue *&GV, const void *&CV) {
   // Assume it is a primitive operation.
   Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
@@ -16089,53 +16089,56 @@
     return false;
 
   // Gather base node and offset information.
-  SDValue Base1, Base2;
-  int64_t Offset1, Offset2;
-  const GlobalValue *GV1, *GV2;
-  const void *CV1, *CV2;
-  bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
+  SDValue Base0, Base1;
+  int64_t Offset0, Offset1;
+  const GlobalValue *GV0, *GV1;
+  const void *CV0, *CV1;
+  bool IsFrameIndex0 = findBaseOffset(Op0->getBasePtr(),
+                                      Base0, Offset0, GV0, CV0);
+  bool IsFrameIndex1 = findBaseOffset(Op1->getBasePtr(),
                                       Base1, Offset1, GV1, CV1);
-  bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
-                                      Base2, Offset2, GV2, CV2);
 
-  // If they have a same base address then check to see if they overlap.
-  if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
-    return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
-             (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
+  // If they have the same base address, then check to see if they overlap.
+  unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3;
+  unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
+  if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1)))
+    return !((Offset0 + NumBytes0) <= Offset1 ||
+             (Offset1 + NumBytes1) <= Offset0);
 
   // It is possible for different frame indices to alias each other, mostly
   // when tail call optimization reuses return address slots for arguments.
   // To catch this case, look up the actual index of frame indices to compute
   // the real alias relationship.
-  if (isFrameIndex1 && isFrameIndex2) {
+  if (IsFrameIndex0 && IsFrameIndex1) {
     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+    Offset0 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base0)->getIndex());
     Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
-    Offset2 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
-    return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
-             (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
+    return !((Offset0 + NumBytes0) <= Offset1 ||
+             (Offset1 + NumBytes1) <= Offset0);
   }
 
   // Otherwise, if we know what the bases are, and they aren't identical, then
   // we know they cannot alias.
-  if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
+  if ((IsFrameIndex0 || CV0 || GV0) && (IsFrameIndex1 || CV1 || GV1))
     return false;
 
   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
   // compared to the size and offset of the access, we may be able to prove they
-  // do not alias.  This check is conservative for now to catch cases created by
+  // do not alias. This check is conservative for now to catch cases created by
   // splitting vector types.
-  if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
-      (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
-      (Op0->getMemoryVT().getSizeInBits() >> 3 ==
-       Op1->getMemoryVT().getSizeInBits() >> 3) &&
-      (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) {
-    int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
-    int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
+  int64_t SrcValOffset0 = Op0->getSrcValueOffset();
+  int64_t SrcValOffset1 = Op1->getSrcValueOffset();
+  unsigned OrigAlignment0 = Op0->getOriginalAlignment();
+  unsigned OrigAlignment1 = Op1->getOriginalAlignment();
+  if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
+      NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
+    int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
+    int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
 
     // There is no overlap between these relatively aligned accesses of similar
-    // size, return no alias.
-    if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
-        (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
+    // size. Return no alias.
+    if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
+        (OffAlign1 + NumBytes1) <= OffAlign0)
       return false;
   }
 
@@ -16147,19 +16150,17 @@
       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
     UseAA = false;
 #endif
+
   if (UseAA &&
       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
     // Use alias analysis information.
-    int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
-                                 Op1->getSrcValueOffset());
-    int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
-        Op0->getSrcValueOffset() - MinOffset;
-    int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
-        Op1->getSrcValueOffset() - MinOffset;
+    int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
+    int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
+    int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
     AliasResult AAResult =
-        AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
+        AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
                                 UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
-                 MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
+                 MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
                                 UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
     if (AAResult == NoAlias)
       return false;
Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1343,7 +1343,7 @@
   // Convert to an integer of the same size.
   if (TLI.isTypeLegal(IVT)) {
     State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value);
-    State.SignMask = APInt::getSignBit(NumBits);
+    State.SignMask = APInt::getSignMask(NumBits);
     State.SignBit = NumBits - 1;
     return;
   }
@@ -2984,7 +2984,7 @@
     EVT NVT = Node->getValueType(0);
     APFloat apf(DAG.EVTToAPFloatSemantics(VT),
                 APInt::getNullValue(VT.getSizeInBits()));
-    APInt x = APInt::getSignBit(NVT.getSizeInBits());
+    APInt x = APInt::getSignMask(NVT.getSizeInBits());
     (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
     Tmp1 = DAG.getConstantFP(apf, dl, VT);
     Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT),
Index: lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -925,9 +925,9 @@
   assert(Op.getValueType().isVector() && "Only applies to vectors!");
   unsigned EltWidth = Op.getScalarValueSizeInBits();
   EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
-  unsigned NumElts = Op.getValueType().getVectorNumElements();
+  auto EltCnt = Op.getValueType().getVectorElementCount();
   return DAG.getNode(ISD::BITCAST, SDLoc(Op),
-                     EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
+                     EVT::getVectorVT(*DAG.getContext(), EltNVT, EltCnt), Op);
 }
 
 SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -639,12 +639,15 @@
   // If we have operands, deallocate them.
   removeOperands(N);
 
+  NodeAllocator.Deallocate(AllNodes.remove(N));
+
   // Set the opcode to DELETED_NODE to help catch bugs when node
   // memory is reallocated.
+  // FIXME: There are places in SDag that have grown a dependency on the opcode
+  // value in the released node.
+  __asan_unpoison_memory_region(&N->NodeType, sizeof(N->NodeType));
   N->NodeType = ISD::DELETED_NODE;
 
-  NodeAllocator.Deallocate(AllNodes.remove(N));
-
   // If any of the SDDbgValue nodes refer to this SDNode, invalidate
   // them and forget about that node.
   DbgInfo->erase(N);
@@ -1826,7 +1829,7 @@
       std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);
 
   int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
-  return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
+  return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
 }
 
 SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
@@ -1839,7 +1842,7 @@
 
   MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
   int FrameIdx = MFI.CreateStackObject(Bytes, Align, false);
-  return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
+  return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
 }
 
 SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
@@ -1955,7 +1958,7 @@
 /// use this predicate to simplify operations downstream.
 bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
   unsigned BitWidth = Op.getScalarValueSizeInBits();
-  return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
+  return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth);
 }
 
 /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
@@ -2344,11 +2347,11 @@
       KnownOne.lshrInPlace(*ShAmt);
       // If we know the value of the sign bit, then we know it is copied across
       // the high bits by the shift amount.
-      APInt SignBit = APInt::getSignBit(BitWidth);
-      SignBit.lshrInPlace(*ShAmt);  // Adjust to where it is now in the mask.
-      if (KnownZero.intersects(SignBit)) {
+      APInt SignMask = APInt::getSignMask(BitWidth);
+      SignMask.lshrInPlace(*ShAmt);  // Adjust to where it is now in the mask.
+      if (KnownZero.intersects(SignMask)) {
         KnownZero.setHighBits(ShAmt->getZExtValue());// New bits are known zero.
-      } else if (KnownOne.intersects(SignBit)) {
+      } else if (KnownOne.intersects(SignMask)) {
         KnownOne.setHighBits(ShAmt->getZExtValue()); // New bits are known one.
       }
     }
@@ -2361,14 +2364,14 @@
     // present in the input.
     APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits);
 
-    APInt InSignBit = APInt::getSignBit(EBits);
+    APInt InSignMask = APInt::getSignMask(EBits);
     APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits);
 
     // If the sign extended bits are demanded, we know that the sign
     // bit is demanded.
-    InSignBit = InSignBit.zext(BitWidth);
+    InSignMask = InSignMask.zext(BitWidth);
     if (NewBits.getBoolValue())
-      InputDemandedBits |= InSignBit;
+      InputDemandedBits |= InSignMask;
 
     computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
                      Depth + 1);
@@ -2377,10 +2380,10 @@
 
     // If the sign bit of the input is known set or clear, then we know the
     // top bits of the result.
-    if (KnownZero.intersects(InSignBit)) {         // Input sign bit known clear
+    if (KnownZero.intersects(InSignMask)) {        // Input sign bit known clear
       KnownZero |= NewBits;
       KnownOne  &= ~NewBits;
-    } else if (KnownOne.intersects(InSignBit)) {   // Input sign bit known set
+    } else if (KnownOne.intersects(InSignMask)) {  // Input sign bit known set
       KnownOne  |= NewBits;
       KnownZero &= ~NewBits;
     } else {                              // Input sign bit unknown
@@ -2745,7 +2748,7 @@
     // a set bit that isn't the sign bit (otherwise it could be INT_MIN).
     KnownOne2.clearBit(BitWidth - 1);
     if (KnownOne2.getBoolValue()) {
-      KnownZero = APInt::getSignBit(BitWidth);
+      KnownZero = APInt::getSignMask(BitWidth);
       break;
     }
     break;
@@ -2874,7 +2877,7 @@
   // one bit set.
   if (Val.getOpcode() == ISD::SRL) {
     auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0));
-    if (C && C->getAPIntValue().isSignBit())
+    if (C && C->getAPIntValue().isSignMask())
       return true;
   }
 
Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1151,7 +1151,7 @@
       FuncInfo.StaticAllocaMap.find(AI);
     if (SI != FuncInfo.StaticAllocaMap.end())
       return DAG.getFrameIndex(SI->second,
-                               TLI.getPointerTy(DAG.getDataLayout()));
+                               TLI.getFrameIndexTy(DAG.getDataLayout()));
   }
 
   // If this is an instruction which fast-isel has deferred, select it now.
@@ -5617,7 +5617,7 @@
       SDValue Ops[2];
       Ops[0] = getRoot();
       Ops[1] =
-          DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true);
+          DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true);
       unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
 
       Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
@@ -6630,7 +6630,7 @@
   unsigned Align = DL.getPrefTypeAlignment(Ty);
   MachineFunction &MF = DAG.getMachineFunction();
   int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
-  SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DL));
+  SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
   Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot,
                        MachinePointerInfo::getFixedStack(MF, SSFI));
   OpInfo.CallOperand = StackSlot;
@@ -7393,7 +7393,7 @@
     } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
       const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
       Ops.push_back(Builder.DAG.getTargetFrameIndex(
-          FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout())));
+          FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout())));
     } else
       Ops.push_back(OpVal);
   }
@@ -7661,7 +7661,7 @@
     DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
     Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
 
-    DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL));
+    DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
     ArgListEntry Entry;
     Entry.Node = DemoteStackSlot;
     Entry.Ty = StackSlotPtrType;
Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -342,11 +342,16 @@
 /// If the specified instruction has a constant integer operand and there are
 /// bits set in that constant that are not demanded, then clear those bits and
 /// return true.
-bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(
-    SDValue Op, const APInt &Demanded) {
+bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+                                            TargetLoweringOpt &TLO) const {
+  SelectionDAG &DAG = TLO.DAG;
   SDLoc DL(Op);
   unsigned Opcode = Op.getOpcode();
 
+  // Do target-specific constant optimization.
+  if (targetShrinkDemandedConstant(Op, Demanded, TLO))
+    return TLO.New.getNode();
+
   // FIXME: ISD::SELECT, ISD::SELECT_CC
   switch (Opcode) {
   default:
@@ -367,7 +372,7 @@
       EVT VT = Op.getValueType();
       SDValue NewC = DAG.getConstant(Demanded & C, DL, VT);
       SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
-      return CombineTo(Op, NewOp);
+      return TLO.CombineTo(Op, NewOp);
     }
 
     break;
@@ -380,15 +385,17 @@
 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
 /// generalized for targets with other types of implicit widening casts.
-bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
-                                                         unsigned BitWidth,
-                                                         const APInt &Demanded,
-                                                         const SDLoc &dl) {
+bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
+                                      const APInt &Demanded,
+                                      TargetLoweringOpt &TLO) const {
   assert(Op.getNumOperands() == 2 &&
          "ShrinkDemandedOp only supports binary operators!");
   assert(Op.getNode()->getNumValues() == 1 &&
          "ShrinkDemandedOp only supports nodes with one result!");
 
+  SelectionDAG &DAG = TLO.DAG;
+  SDLoc dl(Op);
+
   // Early return, as this function cannot handle vector types.
   if (Op.getValueType().isVector())
     return false;
@@ -418,23 +425,22 @@
       bool NeedZext = DemandedSize > SmallVTBits;
       SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND,
                               dl, Op.getValueType(), X);
-      return CombineTo(Op, Z);
+      return TLO.CombineTo(Op, Z);
     }
   }
   return false;
 }
 
 bool
-TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User,
-                                                        unsigned OpIdx,
-                                                        const APInt &Demanded,
-                                                        DAGCombinerInfo &DCI) {
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
+                                     const APInt &Demanded,
+                                     DAGCombinerInfo &DCI,
+                                     TargetLoweringOpt &TLO) const {
   SDValue Op = User->getOperand(OpIdx);
   APInt KnownZero, KnownOne;
 
-  if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne,
-                                *this, 0, true))
+  if (!SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne,
+                            TLO, 0, true))
     return false;
 
 
@@ -446,9 +452,9 @@
   // with the value 'x', which will give us:
   // Old = i32 and x, 0xffffff
   // New = x
-  if (Old.hasOneUse()) {
+  if (TLO.Old.hasOneUse()) {
     // For the one use case, we just commit the change.
-    DCI.CommitTargetLoweringOpt(*this);
+    DCI.CommitTargetLoweringOpt(TLO);
     return true;
   }
 
@@ -456,17 +462,17 @@
   // AssumeSingleUse flag is not propogated to recursive calls of
   // SimplifyDemanded bits, so the only node with multiple use that
   // it will attempt to combine will be opt.
-  assert(Old == Op);
+  assert(TLO.Old == Op);
 
   SmallVector <SDValue, 4> NewOps;
   for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
     if (i == OpIdx) {
-      NewOps.push_back(New);
+      NewOps.push_back(TLO.New);
       continue;
     }
     NewOps.push_back(User->getOperand(i));
   }
-  DAG.UpdateNodeOperands(User, NewOps);
+  TLO.DAG.UpdateNodeOperands(User, NewOps);
   // Op has less users now, so we may be able to perform additional combines
   // with it.
   DCI.AddToWorklist(Op.getNode());
@@ -585,7 +591,7 @@
 
       // If any of the set bits in the RHS are known zero on the LHS, shrink
       // the constant.
-      if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
+      if (ShrinkDemandedConstant(Op, ~LHSZero & NewMask, TLO))
         return true;
 
       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
@@ -620,10 +626,10 @@
     if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType()));
     // If the RHS is a constant, see if we can simplify it.
-    if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
+    if (ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask, TLO))
       return true;
     // If the operation can be done in a smaller type, do so.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
       return true;
 
     // Output known-1 bits are only known if set in both the LHS & RHS.
@@ -654,10 +660,10 @@
     if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
       return TLO.CombineTo(Op, Op.getOperand(1));
     // If the RHS is a constant, see if we can simplify it.
-    if (TLO.ShrinkDemandedConstant(Op, NewMask))
+    if (ShrinkDemandedConstant(Op, NewMask, TLO))
       return true;
     // If the operation can be done in a smaller type, do so.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
       return true;
 
     // Output known-0 bits are only known if clear in both the LHS & RHS.
@@ -682,7 +688,7 @@
     if ((KnownZero2 & NewMask) == NewMask)
       return TLO.CombineTo(Op, Op.getOperand(1));
     // If the operation can be done in a smaller type, do so.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
       return true;
 
     // If all of the unknown bits are known to be zero on one side or the other
@@ -727,7 +733,7 @@
         }
         // If it already has all the bits set, nothing to change
         // but don't shrink either!
-      } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
+      } else if (ShrinkDemandedConstant(Op, NewMask, TLO)) {
         return true;
       }
     }
@@ -746,7 +752,7 @@
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
     // If the operands are constants, see if we can simplify them.
-    if (TLO.ShrinkDemandedConstant(Op, NewMask))
+    if (ShrinkDemandedConstant(Op, NewMask, TLO))
       return true;
 
     // Only known if known in both the LHS and RHS.
@@ -764,7 +770,7 @@
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
     // If the operands are constants, see if we can simplify them.
-    if (TLO.ShrinkDemandedConstant(Op, NewMask))
+    if (ShrinkDemandedConstant(Op, NewMask, TLO))
       return true;
 
     // Only known if known in both the LHS and RHS.
@@ -778,7 +784,7 @@
     // If (1) we only need the sign-bit, (2) the setcc operands are the same
     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
     // -1, we may be able to bypass the setcc.
-    if (NewMask.isSignBit() && Op0.getScalarValueSizeInBits() == BitWidth &&
+    if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth &&
         getBooleanContents(Op.getValueType()) ==
             BooleanContent::ZeroOrNegativeOneBooleanContent) {
       // If we're testing X < 0, then this compare isn't needed - just use X!
@@ -964,7 +970,7 @@
       // demand the input sign bit.
       APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
       if (HighBits.intersects(NewMask))
-        InDemandedMask |= APInt::getSignBit(VT.getScalarSizeInBits());
+        InDemandedMask |= APInt::getSignMask(VT.getScalarSizeInBits());
 
       if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
                                KnownZero, KnownOne, TLO, Depth+1))
@@ -974,11 +980,11 @@
       KnownOne.lshrInPlace(ShAmt);
 
       // Handle the sign bit, adjusted to where it is now in the mask.
-      APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
+      APInt SignMask = APInt::getSignMask(BitWidth).lshr(ShAmt);
 
       // If the input sign bit is known to be zero, or if none of the top bits
       // are demanded, turn this into an unsigned shift right.
-      if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
+      if (KnownZero.intersects(SignMask) || (HighBits & ~NewMask) == HighBits) {
         SDNodeFlags Flags;
         Flags.setExact(cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact());
         return TLO.CombineTo(Op,
@@ -996,7 +1002,7 @@
                                                  Op.getOperand(0), NewSA));
       }
 
-      if (KnownOne.intersects(SignBit))
+      if (KnownOne.intersects(SignMask))
         // New bits are known one.
         KnownOne |= HighBits;
     }
@@ -1040,7 +1046,7 @@
       return TLO.CombineTo(Op, Op.getOperand(0));
 
     APInt InSignBit =
-      APInt::getSignBit(ExVT.getScalarSizeInBits()).zext(BitWidth);
+      APInt::getSignMask(ExVT.getScalarSizeInBits()).zext(BitWidth);
     APInt InputDemandedBits =
       APInt::getLowBitsSet(BitWidth,
                            ExVT.getScalarSizeInBits()) &
@@ -1250,7 +1256,7 @@
     if (!TLO.LegalOperations() &&
         !Op.getValueType().isVector() &&
         !Op.getOperand(0).getValueType().isVector() &&
-        NewMask == APInt::getSignBit(Op.getValueSizeInBits()) &&
+        NewMask == APInt::getSignMask(Op.getValueSizeInBits()) &&
         Op.getOperand(0).getValueType().isFloatingPoint()) {
       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
       bool i32Legal  = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
@@ -1284,7 +1290,7 @@
         SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
                              KnownOne2, TLO, Depth+1) ||
         // See if the operation should be performed at a smaller bit width.
-        TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) {
+        ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) {
       const SDNodeFlags *Flags = Op.getNode()->getFlags();
       if (Flags->hasNoSignedWrap() || Flags->hasNoUnsignedWrap()) {
         // Disable the nsw and nuw flags. We can no longer guarantee that we
@@ -3356,7 +3362,7 @@
   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
   SDValue Bias = DAG.getConstant(127, dl, IntVT);
-  SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), dl,
+  SDValue SignMask = DAG.getConstant(APInt::getSignMask(VT.getSizeInBits()), dl,
                                      IntVT);
   SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT);
   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
Index: lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
===================================================================
--- lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
 #include "llvm/Support/Dwarf.h"
@@ -112,10 +113,8 @@
         continue;
       }
       while (AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) {
-        unsigned StringOffset = AccelSection.getU32(&DataOffset);
-        RelocAddrMap::const_iterator Reloc = Relocs.find(DataOffset-4);
-        if (Reloc != Relocs.end())
-          StringOffset += Reloc->second.second;
+        unsigned StringOffset =
+            getRelocatedValue(AccelSection, 4, &DataOffset, &Relocs);
         if (!StringOffset)
           break;
         OS << format("    Name: %08x \"%s\"\n", StringOffset,
Index: lib/DebugInfo/DWARF/DWARFContext.cpp
===================================================================
--- lib/DebugInfo/DWARF/DWARFContext.cpp
+++ lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -56,6 +56,16 @@
 typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
 typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind;
 
+uint64_t llvm::getRelocatedValue(const DataExtractor &Data, uint32_t Size,
+                                 uint32_t *Off, const RelocAddrMap *Relocs) {
+  if (!Relocs)
+    return Data.getUnsigned(Off, Size);
+  RelocAddrMap::const_iterator AI = Relocs->find(*Off);
+  if (AI == Relocs->end())
+    return Data.getUnsigned(Off, Size);
+  return Data.getUnsigned(Off, Size) + AI->second.second;
+}
+
 static void dumpAccelSection(raw_ostream &OS, StringRef Name,
                              const DWARFSection& Section, StringRef StringSection,
                              bool LittleEndian) {
Index: lib/DebugInfo/DWARF/DWARFDebugLine.cpp
===================================================================
--- lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/SmallString.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
 #include "llvm/Support/Dwarf.h"
@@ -302,16 +303,9 @@
         // relocatable address. All of the other statement program opcodes
         // that affect the address register add a delta to it. This instruction
         // stores a relocatable value into it instead.
-        {
-          // If this address is in our relocation map, apply the relocation.
-          RelocAddrMap::const_iterator AI = RMap->find(*offset_ptr);
-          if (AI != RMap->end()) {
-            const std::pair<uint8_t, int64_t> &R = AI->second;
-            State.Row.Address =
-                debug_line_data.getAddress(offset_ptr) + R.second;
-          } else
-            State.Row.Address = debug_line_data.getAddress(offset_ptr);
-        }
+        State.Row.Address =
+            getRelocatedValue(debug_line_data, debug_line_data.getAddressSize(),
+                              offset_ptr, RMap);
         break;
 
       case DW_LNE_define_file:
Index: lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
===================================================================
--- lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
 #include "llvm/Support/Dwarf.h"
@@ -48,18 +49,10 @@
     // 2.6.2 Location Lists
     // A location list entry consists of:
     while (true) {
+      // A beginning and ending address offsets.
       Entry E;
-      RelocAddrMap::const_iterator AI = RelocMap.find(Offset);
-      // 1. A beginning address offset. ...
-      E.Begin = data.getUnsigned(&Offset, AddressSize);
-      if (AI != RelocMap.end())
-        E.Begin += AI->second.second;
-
-      AI = RelocMap.find(Offset);
-      // 2. An ending address offset. ...
-      E.End = data.getUnsigned(&Offset, AddressSize);
-      if (AI != RelocMap.end())
-        E.End += AI->second.second;
+      E.Begin = getRelocatedValue(data, AddressSize, &Offset, &RelocMap);
+      E.End = getRelocatedValue(data, AddressSize, &Offset, &RelocMap);
 
       // The end of any given location list is marked by an end of list entry,
       // which consists of a 0 for the beginning address offset and a 0 for the
Index: lib/DebugInfo/DWARF/DWARFFormValue.cpp
===================================================================
--- lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -334,11 +334,8 @@
           (Form == DW_FORM_addr)
               ? U->getAddressByteSize()
               : U->getRefAddrByteSize();
-      RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr);
-      if (AI != U->getRelocMap()->end()) {
-        Value.uval = data.getUnsigned(offset_ptr, AddrSize) + AI->second.second;
-      } else
-        Value.uval = data.getUnsigned(offset_ptr, AddrSize);
+      Value.uval =
+          getRelocatedValue(data, AddrSize, offset_ptr, U->getRelocMap());
       break;
     }
     case DW_FORM_exprloc:
@@ -376,12 +373,8 @@
     case DW_FORM_ref_sup4:
     case DW_FORM_strx4:
     case DW_FORM_addrx4: {
-      Value.uval = data.getU32(offset_ptr);
-      if (!U)
-        break;
-      RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr-4);
-      if (AI != U->getRelocMap()->end())
-        Value.uval += AI->second.second;
+      const RelocAddrMap* RelocMap = U ? U->getRelocMap() : nullptr;
+      Value.uval = getRelocatedValue(data, 4, offset_ptr, RelocMap);
       break;
     }
     case DW_FORM_data8:
@@ -411,11 +404,8 @@
     case DW_FORM_strp_sup: {
       if (!U)
         return false;
-      RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr);
-      uint8_t Size = U->getDwarfOffsetByteSize();
-      Value.uval = data.getUnsigned(offset_ptr, Size);
-      if (AI != U->getRelocMap()->end())
-        Value.uval += AI->second.second;
+      Value.uval = getRelocatedValue(data, U->getDwarfOffsetByteSize(),
+                                     offset_ptr, U->getRelocMap());
       break;
     }
     case DW_FORM_flag_present:
Index: lib/Fuzzer/CMakeLists.txt
===================================================================
--- lib/Fuzzer/CMakeLists.txt
+++ lib/Fuzzer/CMakeLists.txt
@@ -1,3 +1,18 @@
+include(CheckCXXSourceCompiles)
+
+if( APPLE )
+  CHECK_CXX_SOURCE_COMPILES("
+      static thread_local int blah;
+      int main() {
+        return 0;
+      }
+      " HAS_THREAD_LOCAL)
+
+  if( NOT HAS_THREAD_LOCAL )
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Dthread_local=__thread")
+  endif()
+endif()
+
 set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS}")
 # Disable the coverage and sanitizer instrumentation for the fuzzer itself.
 set(CMAKE_CXX_FLAGS "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=trace-pc-guard,edge,trace-cmp,indirect-calls,8bit-counters -Werror")
Index: lib/Fuzzer/FuzzerDefs.h
===================================================================
--- lib/Fuzzer/FuzzerDefs.h
+++ lib/Fuzzer/FuzzerDefs.h
@@ -36,12 +36,20 @@
 #error "Support for your platform has not been implemented"
 #endif
 
+#ifndef __has_attribute
+#  define __has_attribute(x) 0
+#endif
+
 #define LIBFUZZER_POSIX LIBFUZZER_APPLE || LIBFUZZER_LINUX
 
 #ifdef __x86_64
-#define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt")))
+#  if __has_attribute(target)
+#    define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt")))
+#  else
+#    define ATTRIBUTE_TARGET_POPCNT
+#  endif
 #else
-#define ATTRIBUTE_TARGET_POPCNT
+#  define ATTRIBUTE_TARGET_POPCNT
 #endif
 
 
Index: lib/IR/AsmWriter.cpp
===================================================================
--- lib/IR/AsmWriter.cpp
+++ lib/IR/AsmWriter.cpp
@@ -1103,35 +1103,34 @@
   }
 
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
-    if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle() ||
-        &CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble()) {
+    const APFloat &APF = CFP->getValueAPF();
+    if (&APF.getSemantics() == &APFloat::IEEEsingle() ||
+        &APF.getSemantics() == &APFloat::IEEEdouble()) {
       // We would like to output the FP constant value in exponential notation,
       // but we cannot do this if doing so will lose precision.  Check here to
       // make sure that we only output it in exponential format if we can parse
       // the value back and get the same value.
       //
       bool ignored;
-      bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble();
-      bool isInf = CFP->getValueAPF().isInfinity();
-      bool isNaN = CFP->getValueAPF().isNaN();
+      bool isDouble = &APF.getSemantics() == &APFloat::IEEEdouble();
+      bool isInf = APF.isInfinity();
+      bool isNaN = APF.isNaN();
       if (!isInf && !isNaN) {
-        double Val = isDouble ? CFP->getValueAPF().convertToDouble() :
-                                CFP->getValueAPF().convertToFloat();
+        double Val = isDouble ? APF.convertToDouble() : APF.convertToFloat();
         SmallString<128> StrVal;
-        raw_svector_ostream(StrVal) << Val;
-
+        APF.toString(StrVal, 6, 0, false);
         // Check to make sure that the stringized number is not some string like
         // "Inf" or NaN, that atof will accept, but the lexer will not.  Check
         // that the string matches the "[-+]?[0-9]" regex.
         //
-        if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
-            ((StrVal[0] == '-' || StrVal[0] == '+') &&
-             (StrVal[1] >= '0' && StrVal[1] <= '9'))) {
-          // Reparse stringized version!
-          if (APFloat(APFloat::IEEEdouble(), StrVal).convertToDouble() == Val) {
-            Out << StrVal;
-            return;
-          }
+        assert(((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+                ((StrVal[0] == '-' || StrVal[0] == '+') &&
+                 (StrVal[1] >= '0' && StrVal[1] <= '9'))) &&
+               "[-+]?[0-9] regex does not match!");
+        // Reparse stringized version!
+        if (APFloat(APFloat::IEEEdouble(), StrVal).convertToDouble() == Val) {
+          Out << StrVal;
+          return;
         }
       }
       // Otherwise we could not reparse it to exactly the same value, so we must
@@ -1140,7 +1139,7 @@
       // x86, so we must not use these types.
       static_assert(sizeof(double) == sizeof(uint64_t),
                     "assuming that double is 64 bits!");
-      APFloat apf = CFP->getValueAPF();
+      APFloat apf = APF;
       // Floats are represented in ASCII IR as double, convert.
       if (!isDouble)
         apf.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
@@ -1153,27 +1152,27 @@
     // These appear as a magic letter identifying the type, then a
     // fixed number of hex digits.
     Out << "0x";
-    APInt API = CFP->getValueAPF().bitcastToAPInt();
-    if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended()) {
+    APInt API = APF.bitcastToAPInt();
+    if (&APF.getSemantics() == &APFloat::x87DoubleExtended()) {
       Out << 'K';
       Out << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4,
                                   /*Upper=*/true);
       Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
                                   /*Upper=*/true);
       return;
-    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad()) {
+    } else if (&APF.getSemantics() == &APFloat::IEEEquad()) {
       Out << 'L';
       Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
                                   /*Upper=*/true);
       Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16,
                                   /*Upper=*/true);
-    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble()) {
+    } else if (&APF.getSemantics() == &APFloat::PPCDoubleDouble()) {
       Out << 'M';
       Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
                                   /*Upper=*/true);
       Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16,
                                   /*Upper=*/true);
-    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf()) {
+    } else if (&APF.getSemantics() == &APFloat::IEEEhalf()) {
       Out << 'H';
       Out << format_hex_no_prefix(API.getZExtValue(), 4,
                                   /*Upper=*/true);
Index: lib/IR/Attributes.cpp
===================================================================
--- lib/IR/Attributes.cpp
+++ lib/IR/Attributes.cpp
@@ -1043,46 +1043,7 @@
 
 AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index,
                                               AttributeList Attrs) const {
-  if (!pImpl)
-    return AttributeList();
-  if (!Attrs.pImpl) return *this;
-
-  // FIXME it is not obvious how this should work for alignment.
-  // For now, say we can't pass in alignment, which no current use does.
-  assert(!Attrs.hasAttribute(Index, Attribute::Alignment) &&
-         "Attempt to change alignment!");
-
-  // Add the attribute slots before the one we're trying to add.
-  SmallVector<AttributeList, 4> AttrSet;
-  uint64_t NumAttrs = pImpl->getNumSlots();
-  AttributeList AL;
-  uint64_t LastIndex = 0;
-  for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
-    if (getSlotIndex(I) >= Index) {
-      if (getSlotIndex(I) == Index) AL = getSlotAttributes(LastIndex++);
-      break;
-    }
-    LastIndex = I + 1;
-    AttrSet.push_back(getSlotAttributes(I));
-  }
-
-  // Now remove the attribute from the correct slot. There may already be an
-  // AttributeList there.
-  AttrBuilder B(AL, Index);
-
-  for (unsigned I = 0, E = Attrs.pImpl->getNumSlots(); I != E; ++I)
-    if (Attrs.getSlotIndex(I) == Index) {
-      B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Index);
-      break;
-    }
-
-  AttrSet.push_back(AttributeList::get(C, Index, B));
-
-  // Add the remaining attribute slots.
-  for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
-    AttrSet.push_back(getSlotAttributes(I));
-
-  return get(C, AttrSet);
+  return removeAttributes(C, Index, AttrBuilder(Attrs.getAttributes(Index)));
 }
 
 AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index,
@@ -1095,31 +1056,30 @@
   assert(!Attrs.hasAlignmentAttr() && "Attempt to change alignment!");
 
   // Add the attribute slots before the one we're trying to add.
-  SmallVector<AttributeList, 4> AttrSet;
+  SmallVector<IndexAttrPair, 4> AttrSets;
   uint64_t NumAttrs = pImpl->getNumSlots();
-  AttributeList AL;
+  AttrBuilder B;
   uint64_t LastIndex = 0;
   for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
     if (getSlotIndex(I) >= Index) {
-      if (getSlotIndex(I) == Index) AL = getSlotAttributes(LastIndex++);
+      if (getSlotIndex(I) == Index)
+        B = AttrBuilder(pImpl->getSlotNode(LastIndex++));
       break;
     }
     LastIndex = I + 1;
-    AttrSet.push_back(getSlotAttributes(I));
+    AttrSets.push_back({getSlotIndex(I), pImpl->getSlotNode(I)});
   }
 
-  // Now remove the attribute from the correct slot. There may already be an
-  // AttributeList there.
-  AttrBuilder B(AL, Index);
+  // Remove the attributes from the existing set and add them.
   B.remove(Attrs);
-
-  AttrSet.push_back(AttributeList::get(C, Index, B));
+  if (B.hasAttributes())
+    AttrSets.push_back({Index, AttributeSet::get(C, B)});
 
   // Add the remaining attribute slots.
   for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
-    AttrSet.push_back(getSlotAttributes(I));
+    AttrSets.push_back({getSlotIndex(I), pImpl->getSlotNode(I)});
 
-  return get(C, AttrSet);
+  return get(C, AttrSets);
 }
 
 AttributeList AttributeList::removeAttributes(LLVMContext &C,
@@ -1403,18 +1363,7 @@
 }
 
 AttrBuilder &AttrBuilder::removeAttributes(AttributeList A, uint64_t Index) {
-  unsigned Slot = ~0U;
-  for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
-    if (A.getSlotIndex(I) == Index) {
-      Slot = I;
-      break;
-    }
-
-  assert(Slot != ~0U && "Couldn't find index in AttributeList!");
-
-  for (AttributeList::iterator I = A.begin(Slot), E = A.end(Slot); I != E;
-       ++I) {
-    Attribute Attr = *I;
+  for (Attribute Attr : A.getAttributes(Index)) {
     if (Attr.isEnumAttribute() || Attr.isIntAttribute()) {
       removeAttribute(Attr.getKindAsEnum());
     } else {
Index: lib/IR/DebugInfo.cpp
===================================================================
--- lib/IR/DebugInfo.cpp
+++ lib/IR/DebugInfo.cpp
@@ -12,6 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm-c/DebugInfo.h"
 #include "llvm/IR/DebugInfo.h"
 #include "LLVMContextImpl.h"
 #include "llvm/ADT/STLExtras.h"
@@ -659,3 +660,60 @@
     return Val->getZExtValue();
   return 0;
 }
+
+//===----------------------------------------------------------------------===//
+// LLVM C API implementations.
+//===----------------------------------------------------------------------===//
+
+template <typename DIT> DIT *unwrapDI(LLVMMetadataRef Ref) {
+  return (DIT *)(Ref ? unwrap<MDNode>(Ref) : nullptr);
+}
+
+uint32_t LLVMDebugMetadataVersion() {
+  return DEBUG_METADATA_VERSION;
+}
+
+unsigned LLVMGetModuleDebugMetadataVersion(LLVMModuleRef M) {
+  return getDebugMetadataVersionFromModule(*unwrap(M));
+}
+
+uint8_t LLVMStripModuleDebugInfo(LLVMModuleRef M) {
+  return StripDebugInfo(*unwrap(M));
+}
+
+void LLVMDIBuilderDispose(LLVMDIBuilderRef Builder) {
+  delete unwrap(Builder);
+}
+
+void LLVMDIBuilderFinalize(LLVMDIBuilderRef Builder) {
+  unwrap(Builder)->finalize();
+}
+
+LLVMMetadataRef LLVMDIBuilderCreateCompileUnit(
+    LLVMDIBuilderRef Builder, LLVMDWARFSourceLanguage Lang,
+    LLVMMetadataRef FileRef, const char *Producer, uint8_t isOptimized,
+    const char *Flags, unsigned RuntimeVer, const char *SplitName,
+    LLVMDWARFEmissionKind Kind, uint64_t DWOId, uint8_t SplitDebugInlining,
+    uint8_t DebugInfoForProfiling) {
+  auto *File = unwrapDI<DIFile>(FileRef);
+
+  return wrap(unwrap(Builder)->createCompileUnit(
+                 static_cast<unsigned>(Lang), File, Producer,
+                 isOptimized, Flags, RuntimeVer, SplitName,
+                 static_cast<DICompileUnit::DebugEmissionKind>(Kind), DWOId,
+                 SplitDebugInlining, DebugInfoForProfiling));
+}
+
+LLVMMetadataRef
+LLVMDIBuilderCreateFile(LLVMDIBuilderRef Builder, const char *Filename,
+                        const char *Directory) {
+  return wrap(unwrap(Builder)->createFile(Filename, Directory));
+}
+
+LLVMMetadataRef
+LLVMDIBuilderCreateDebugLocation(LLVMContextRef Ctx, unsigned Line,
+                                 unsigned Column, LLVMMetadataRef Scope,
+                                 LLVMMetadataRef InlinedAt) {
+  return wrap(DILocation::get(*unwrap(Ctx), Line, Column, unwrap(Scope),
+                              unwrap(InlinedAt)));
+}
Index: lib/Support/APFloat.cpp
===================================================================
--- lib/Support/APFloat.cpp
+++ lib/Support/APFloat.cpp
@@ -3393,7 +3393,7 @@
 }
 
 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
-                         unsigned FormatMaxPadding) const {
+                         unsigned FormatMaxPadding, bool TruncateZero) const {
   switch (category) {
   case fcInfinity:
     if (isNegative())
@@ -3407,9 +3407,16 @@
     if (isNegative())
       Str.push_back('-');
 
-    if (!FormatMaxPadding)
-      append(Str, "0.0E+0");
-    else
+    if (!FormatMaxPadding) {
+      if (TruncateZero)
+        append(Str, "0.0E+0");
+      else {
+        append(Str, "0.0");
+        if (FormatPrecision > 1)
+          Str.append(FormatPrecision - 1, '0');
+        append(Str, "e+00");
+      }
+    } else
       Str.push_back('0');
     return;
 
@@ -3543,12 +3550,16 @@
 
     Str.push_back(buffer[NDigits-1]);
     Str.push_back('.');
-    if (NDigits == 1)
+    if (NDigits == 1 && TruncateZero)
       Str.push_back('0');
     else
       for (unsigned I = 1; I != NDigits; ++I)
         Str.push_back(buffer[NDigits-1-I]);
-    Str.push_back('E');
+    // Fill with zeros up to FormatPrecision.
+    if (!TruncateZero && FormatPrecision > NDigits - 1)
+      Str.append(FormatPrecision - NDigits + 1, '0');
+    // For !TruncateZero we use lower 'e'.
+    Str.push_back(TruncateZero ? 'E' : 'e');
 
     Str.push_back(exp >= 0 ? '+' : '-');
     if (exp < 0) exp = -exp;
@@ -3557,6 +3568,9 @@
       expbuf.push_back((char) ('0' + (exp % 10)));
       exp /= 10;
     } while (exp);
+    // Exponent always at least two digits if we do not truncate zeros.
+    if (!TruncateZero && expbuf.size() < 2)
+      expbuf.push_back('0');
     for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
       Str.push_back(expbuf[E-1-I]);
     return;
@@ -4362,10 +4376,11 @@
 
 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
                              unsigned FormatPrecision,
-                             unsigned FormatMaxPadding) const {
+                             unsigned FormatMaxPadding,
+                             bool TruncateZero) const {
   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
   APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
-      .toString(Str, FormatPrecision, FormatMaxPadding);
+      .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
 }
 
 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
Index: lib/Support/APInt.cpp
===================================================================
--- lib/Support/APInt.cpp
+++ lib/Support/APInt.cpp
@@ -364,44 +364,20 @@
   return std::equal(pVal, pVal + getNumWords(), RHS.pVal);
 }
 
-bool APInt::ult(const APInt& RHS) const {
+int APInt::compare(const APInt& RHS) const {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
   if (isSingleWord())
-    return VAL < RHS.VAL;
+    return VAL < RHS.VAL ? -1 : VAL > RHS.VAL;
 
-  // Get active bit length of both operands
-  unsigned n1 = getActiveBits();
-  unsigned n2 = RHS.getActiveBits();
-
-  // If magnitude of LHS is less than RHS, return true.
-  if (n1 < n2)
-    return true;
-
-  // If magnitude of RHS is greater than LHS, return false.
-  if (n2 < n1)
-    return false;
-
-  // If they both fit in a word, just compare the low order word
-  if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
-    return pVal[0] < RHS.pVal[0];
-
-  // Otherwise, compare all words
-  unsigned topWord = whichWord(std::max(n1,n2)-1);
-  for (int i = topWord; i >= 0; --i) {
-    if (pVal[i] > RHS.pVal[i])
-      return false;
-    if (pVal[i] < RHS.pVal[i])
-      return true;
-  }
-  return false;
+  return tcCompare(pVal, RHS.pVal, getNumWords());
 }
 
-bool APInt::slt(const APInt& RHS) const {
+int APInt::compareSigned(const APInt& RHS) const {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
   if (isSingleWord()) {
     int64_t lhsSext = SignExtend64(VAL, BitWidth);
     int64_t rhsSext = SignExtend64(RHS.VAL, BitWidth);
-    return lhsSext < rhsSext;
+    return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
   }
 
   bool lhsNeg = isNegative();
@@ -409,11 +385,11 @@
 
   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
   if (lhsNeg != rhsNeg)
-    return lhsNeg;
+    return lhsNeg ? -1 : 1;
 
   // Otherwise we can just use an unsigned comparison, because even negative
   // numbers compare correctly this way if both have the same signed-ness.
-  return ult(RHS);
+  return tcCompare(pVal, RHS.pVal, getNumWords());
 }
 
 void APInt::setBit(unsigned bitPosition) {
@@ -730,6 +706,14 @@
   return false;
 }
 
+bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
+  for (unsigned i = 0, e = getNumWords(); i != e; ++i)
+    if ((pVal[i] & ~RHS.pVal[i]) != 0)
+      return false;
+
+  return true;
+}
+
 APInt APInt::byteSwap() const {
   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
   if (BitWidth == 16)
@@ -2676,10 +2660,8 @@
                      unsigned parts) {
   while (parts) {
     parts--;
-    if (lhs[parts] == rhs[parts])
-      continue;
-
-    return (lhs[parts] > rhs[parts]) ? 1 : -1;
+    if (lhs[parts] != rhs[parts])
+      return (lhs[parts] > rhs[parts]) ? 1 : -1;
   }
 
   return 0;
Index: lib/Support/Dwarf.cpp
===================================================================
--- lib/Support/Dwarf.cpp
+++ lib/Support/Dwarf.cpp
@@ -22,7 +22,7 @@
   switch (Tag) {
   default:
     return StringRef();
-#define HANDLE_DW_TAG(ID, NAME)                                                \
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR)                               \
   case DW_TAG_##NAME:                                                          \
     return "DW_TAG_" #NAME;
 #include "llvm/Support/Dwarf.def"
@@ -31,11 +31,34 @@
 
 unsigned llvm::dwarf::getTag(StringRef TagString) {
   return StringSwitch<unsigned>(TagString)
-#define HANDLE_DW_TAG(ID, NAME) .Case("DW_TAG_" #NAME, DW_TAG_##NAME)
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR)                               \
+      .Case("DW_TAG_" #NAME, DW_TAG_##NAME)
 #include "llvm/Support/Dwarf.def"
       .Default(DW_TAG_invalid);
 }
 
+unsigned llvm::dwarf::TagVersion(dwarf::Tag Tag) {
+  switch (Tag) {
+  default:
+    return 0;
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR)                               \
+  case DW_TAG_##NAME:                                                          \
+    return VERSION;
+#include "llvm/Support/Dwarf.def"
+  }
+}
+
+unsigned llvm::dwarf::TagVendor(dwarf::Tag Tag) {
+  switch (Tag) {
+  default:
+    return 0;
+#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR)                               \
+  case DW_TAG_##NAME:                                                          \
+    return DWARF_VENDOR_##VENDOR;
+#include "llvm/Support/Dwarf.def"
+  }
+}
+
 StringRef llvm::dwarf::ChildrenString(unsigned Children) {
   switch (Children) {
   case DW_CHILDREN_no:                   return "DW_CHILDREN_no";
@@ -48,29 +71,73 @@
   switch (Attribute) {
   default:
     return StringRef();
-#define HANDLE_DW_AT(ID, NAME)                                                \
-  case DW_AT_##NAME:                                                          \
+#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR)                                \
+  case DW_AT_##NAME:                                                           \
     return "DW_AT_" #NAME;
 #include "llvm/Support/Dwarf.def"
   }
 }
 
+unsigned llvm::dwarf::AttributeVersion(dwarf::Attribute Attribute) {
+  switch (Attribute) {
+  default:
+    return 0;
+#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR)                                \
+  case DW_AT_##NAME:                                                           \
+    return VERSION;
+#include "llvm/Support/Dwarf.def"
+  }
+}
+
+unsigned llvm::dwarf::AttributeVendor(dwarf::Attribute Attribute) {
+  switch (Attribute) {
+  default:
+    return 0;
+#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR)                                \
+  case DW_AT_##NAME:                                                           \
+    return DWARF_VENDOR_##VENDOR;
+#include "llvm/Support/Dwarf.def"
+  }
+}
+
 StringRef llvm::dwarf::FormEncodingString(unsigned Encoding) {
   switch (Encoding) {
   default:
     return StringRef();
-#define HANDLE_DW_FORM(ID, NAME)                                                \
-  case DW_FORM_##NAME:                                                          \
+#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR)                              \
+  case DW_FORM_##NAME:                                                         \
     return "DW_FORM_" #NAME;
 #include "llvm/Support/Dwarf.def"
   }
 }
 
+unsigned llvm::dwarf::FormVersion(dwarf::Form Form) {
+  switch (Form) {
+  default:
+    return 0;
+#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR)                              \
+  case DW_FORM_##NAME:                                                         \
+    return VERSION;
+#include "llvm/Support/Dwarf.def"
+  }
+}
+
+unsigned llvm::dwarf::FormVendor(dwarf::Form Form) {
+  switch (Form) {
+  default:
+    return 0;
+#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR)                              \
+  case DW_FORM_##NAME:                                                         \
+    return DWARF_VENDOR_##VENDOR;
+#include "llvm/Support/Dwarf.def"
+  }
+}
+
 StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) {
   switch (Encoding) {
   default:
     return StringRef();
-#define HANDLE_DW_OP(ID, NAME)                                                 \
+#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR)                                \
   case DW_OP_##NAME:                                                           \
     return "DW_OP_" #NAME;
 #include "llvm/Support/Dwarf.def"
@@ -81,17 +148,40 @@
 
 unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) {
   return StringSwitch<unsigned>(OperationEncodingString)
-#define HANDLE_DW_OP(ID, NAME) .Case("DW_OP_" #NAME, DW_OP_##NAME)
+#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR)                                \
+      .Case("DW_OP_" #NAME, DW_OP_##NAME)
 #include "llvm/Support/Dwarf.def"
       .Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment)
       .Default(0);
 }
 
+unsigned llvm::dwarf::OperationVersion(dwarf::LocationAtom Op) {
+  switch (Op) {
+  default:
+    return 0;
+#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR)                                \
+  case DW_OP_##NAME:                                                           \
+    return VERSION;
+#include "llvm/Support/Dwarf.def"
+  }
+}
+
+unsigned llvm::dwarf::OperationVendor(dwarf::LocationAtom Op) {
+  switch (Op) {
+  default:
+    return 0;
+#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR)                                \
+  case DW_OP_##NAME:                                                           \
+    return DWARF_VENDOR_##VENDOR;
+#include "llvm/Support/Dwarf.def"
+  }
+}
+
 StringRef llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
   switch (Encoding) {
   default:
     return StringRef();
-#define HANDLE_DW_ATE(ID, NAME)                                                \
+#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR)                               \
   case DW_ATE_##NAME:                                                          \
     return "DW_ATE_" #NAME;
 #include "llvm/Support/Dwarf.def"
@@ -100,11 +190,34 @@
 
 unsigned llvm::dwarf::getAttributeEncoding(StringRef EncodingString) {
   return StringSwitch<unsigned>(EncodingString)
-#define HANDLE_DW_ATE(ID, NAME) .Case("DW_ATE_" #NAME, DW_ATE_##NAME)
+#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR)                               \
+      .Case("DW_ATE_" #NAME, DW_ATE_##NAME)
 #include "llvm/Support/Dwarf.def"
       .Default(0);
 }
 
+unsigned llvm::dwarf::AttributeEncodingVersion(dwarf::TypeKind ATE) {
+  switch (ATE) {
+  default:
+    return 0;
+#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR)                               \
+  case DW_ATE_##NAME:                                                          \
+    return VERSION;
+#include "llvm/Support/Dwarf.def"
+  }
+}
+
+unsigned llvm::dwarf::AttributeEncodingVendor(dwarf::TypeKind ATE) {
+  switch (ATE) {
+  default:
+    return 0;
+#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR)                               \
+  case DW_ATE_##NAME:                                                          \
+    return DWARF_VENDOR_##VENDOR;
+#include "llvm/Support/Dwarf.def"
+  }
+}
+
 StringRef llvm::dwarf::DecimalSignString(unsigned Sign) {
   switch (Sign) {
   case DW_DS_unsigned:                   return "DW_DS_unsigned";
@@ -169,7 +282,7 @@
   switch (Language) {
   default:
     return StringRef();
-#define HANDLE_DW_LANG(ID, NAME)                                               \
+#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)                              \
   case DW_LANG_##NAME:                                                         \
     return "DW_LANG_" #NAME;
 #include "llvm/Support/Dwarf.def"
@@ -178,11 +291,34 @@
 
 unsigned llvm::dwarf::getLanguage(StringRef LanguageString) {
   return StringSwitch<unsigned>(LanguageString)
-#define HANDLE_DW_LANG(ID, NAME) .Case("DW_LANG_" #NAME, DW_LANG_##NAME)
+#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)                              \
+  .Case("DW_LANG_" #NAME, DW_LANG_##NAME)
 #include "llvm/Support/Dwarf.def"
       .Default(0);
 }
 
+unsigned llvm::dwarf::LanguageVersion(dwarf::SourceLanguage Lang) {
+  switch (Lang) {
+  default:
+    return 0;
+#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)                              \
+  case DW_LANG_##NAME:                                                         \
+    return VERSION;
+#include "llvm/Support/Dwarf.def"
+  }
+}
+
+unsigned llvm::dwarf::LanguageVendor(dwarf::SourceLanguage Lang) {
+  switch (Lang) {
+  default:
+    return 0;
+#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)                              \
+  case DW_LANG_##NAME:                                                         \
+    return DWARF_VENDOR_##VENDOR;
+#include "llvm/Support/Dwarf.def"
+  }
+}
+
 StringRef llvm::dwarf::CaseString(unsigned Case) {
   switch (Case) {
   case DW_ID_case_sensitive:             return "DW_ID_case_sensitive";
@@ -394,3 +530,12 @@
 
   return StringRef();
 }
+
+bool llvm::dwarf::isValidFormForVersion(Form F, unsigned Version,
+                                        bool ExtensionsOk) {
+  if (FormVendor(F) == DWARF_VENDOR_DWARF) {
+    unsigned FV = FormVersion(F);
+    return FV > 0 && FV <= Version;
+  }
+  return ExtensionsOk;
+}
Index: lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
===================================================================
--- lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -942,6 +942,7 @@
                           AArch64::XZR, NextMBBI);
   case AArch64::CMP_SWAP_128:
     return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
+
   }
   return false;
 }
Index: lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.h
+++ lib/Target/AArch64/AArch64ISelLowering.h
@@ -255,6 +255,9 @@
                                      const SelectionDAG &DAG,
                                      unsigned Depth = 0) const override;
 
+  bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+                                    TargetLoweringOpt &TLO) const override;
+
   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
 
   /// Returns true if the target allows unaligned memory accesses of the
@@ -508,6 +511,18 @@
                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
                       SelectionDAG &DAG) const override;
 
+  SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
+                        unsigned Flag) const;
+  SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
+                        unsigned Flag) const;
+  SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
+                        unsigned Flag) const;
+  SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
+                        unsigned Flag) const;
+  template <class NodeTy> SDValue getGOT(NodeTy *N, SelectionDAG &DAG) const;
+  template <class NodeTy>
+  SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG) const;
+  template <class NodeTy> SDValue getAddr(NodeTy *N, SelectionDAG &DAG) const;
   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -91,6 +91,7 @@
 
 STATISTIC(NumTailCalls, "Number of tail calls");
 STATISTIC(NumShiftInserts, "Number of vector shift inserts");
+STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
 
 static cl::opt<bool>
 EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
@@ -105,6 +106,12 @@
     cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
     cl::init(false));
 
+static cl::opt<bool>
+EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
+                         cl::desc("Enable AArch64 logical imm instruction "
+                                  "optimization"),
+                         cl::init(true));
+
 /// Value type used for condition codes.
 static const MVT MVT_CC = MVT::i32;
 
@@ -787,6 +794,140 @@
   return VT.changeVectorElementTypeToInteger();
 }
 
+static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
+                               const APInt &Demanded,
+                               TargetLowering::TargetLoweringOpt &TLO,
+                               unsigned NewOpc) {
+  uint64_t OldImm = Imm, NewImm, Enc;
+  uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
+
+  // Return if the immediate is already all zeros, all ones, a bimm32 or a
+  // bimm64.
+  if (Imm == 0 || Imm == Mask ||
+      AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
+    return false;
+
+  unsigned EltSize = Size;
+  uint64_t DemandedBits = Demanded.getZExtValue();
+
+  // Clear bits that are not demanded.
+  Imm &= DemandedBits;
+
+  while (true) {
+    // The goal here is to set the non-demanded bits in a way that minimizes
+    // the number of switching between 0 and 1. In order to achieve this goal,
+    // we set the non-demanded bits to the value of the preceding demanded bits.
+    // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
+    // non-demanded bit), we copy bit0 (1) to the least significant 'x',
+    // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
+    // The final result is 0b11000011.
+    uint64_t NonDemandedBits = ~DemandedBits;
+    uint64_t InvertedImm = ~Imm & DemandedBits;
+    uint64_t RotatedImm =
+        ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
+        NonDemandedBits;
+    uint64_t Sum = RotatedImm + NonDemandedBits;
+    bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
+    uint64_t Ones = (Sum + Carry) & NonDemandedBits;
+    NewImm = (Imm | Ones) & Mask;
+
+    // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
+    // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
+    // we halve the element size and continue the search.
+    if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
+      break;
+
+    // We cannot shrink the element size any further if it is 2-bits.
+    if (EltSize == 2)
+      return false;
+
+    EltSize /= 2;
+    Mask >>= EltSize;
+    uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
+
+    // Return if there is mismatch in any of the demanded bits of Imm and Hi.
+    if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
+      return false;
+
+    // Merge the upper and lower halves of Imm and DemandedBits.
+    Imm |= Hi;
+    DemandedBits |= DemandedBitsHi;
+  }
+
+  ++NumOptimizedImms;
+
+  // Replicate the element across the register width.
+  while (EltSize < Size) {
+    NewImm |= NewImm << EltSize;
+    EltSize *= 2;
+  }
+
+  (void)OldImm;
+  assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
+         "demanded bits should never be altered");
+  assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
+
+  // Create the new constant immediate node.
+  EVT VT = Op.getValueType();
+  SDLoc DL(Op);
+
+  // If the new constant immediate is all-zeros or all-ones, let the target
+  // independent DAG combine optimize this node.
+  if (NewImm == 0 || NewImm == OrigMask)
+    return TLO.CombineTo(Op.getOperand(1), TLO.DAG.getConstant(NewImm, DL, VT));
+
+  // Otherwise, create a machine node so that target independent DAG combine
+  // doesn't undo this optimization.
+  Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
+  SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
+  SDValue New(
+      TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
+
+  return TLO.CombineTo(Op, New);
+}
+
+bool AArch64TargetLowering::targetShrinkDemandedConstant(
+    SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
+  // Delay this optimization to as late as possible.
+  if (!TLO.LegalOps)
+    return false;
+
+  if (!EnableOptimizeLogicalImm)
+    return false;
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector())
+    return false;
+
+  unsigned Size = VT.getSizeInBits();
+  assert((Size == 32 || Size == 64) &&
+         "i32 or i64 is expected after legalization.");
+
+  // Exit early if we demand all bits.
+  if (Demanded.countPopulation() == Size)
+    return false;
+
+  unsigned NewOpc;
+  switch (Op.getOpcode()) {
+  default:
+    return false;
+  case ISD::AND:
+    NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
+    break;
+  case ISD::OR:
+    NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
+    break;
+  case ISD::XOR:
+    NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
+    break;
+  }
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+  if (!C)
+    return false;
+  uint64_t Imm = C->getZExtValue();
+  return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
+}
+
 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
 /// Mask are known to be either zero or one and return them in the
 /// KnownZero/KnownOne bitsets.
@@ -3418,11 +3559,75 @@
 //  Other Lowering Code
 //===----------------------------------------------------------------------===//
 
+SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
+                                             SelectionDAG &DAG,
+                                             unsigned Flag) const {
+  return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag);
+}
+
+SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
+                                             SelectionDAG &DAG,
+                                             unsigned Flag) const {
+  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
+}
+
+SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
+                                             SelectionDAG &DAG,
+                                             unsigned Flag) const {
+  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
+                                   N->getOffset(), Flag);
+}
+
+SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
+                                             SelectionDAG &DAG,
+                                             unsigned Flag) const {
+  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
+}
+
+// (loadGOT sym)
+template <class NodeTy>
+SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG) const {
+  DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n");
+  SDLoc DL(N);
+  EVT Ty = getPointerTy(DAG.getDataLayout());
+  SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT);
+  // FIXME: Once remat is capable of dealing with instructions with register
+  // operands, expand this into two nodes instead of using a wrapper node.
+  return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
+}
+
+// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
+template <class NodeTy>
+SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG)
+  const {
+  DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
+  SDLoc DL(N);
+  EVT Ty = getPointerTy(DAG.getDataLayout());
+  const unsigned char MO_NC = AArch64II::MO_NC;
+  return DAG.getNode(
+        AArch64ISD::WrapperLarge, DL, Ty,
+        getTargetNode(N, Ty, DAG, AArch64II::MO_G3),
+        getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC),
+        getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC),
+        getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC));
+}
+
+// (addlow (adrp %hi(sym)) %lo(sym))
+template <class NodeTy>
+SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG) const {
+  DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
+  SDLoc DL(N);
+  EVT Ty = getPointerTy(DAG.getDataLayout());
+  SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE);
+  SDValue Lo = getTargetNode(N, Ty, DAG,
+                             AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
+  return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
+}
+
 SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
                                                   SelectionDAG &DAG) const {
-  EVT PtrVT = getPointerTy(DAG.getDataLayout());
-  SDLoc DL(Op);
-  const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+  GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
   const GlobalValue *GV = GN->getGlobal();
   unsigned char OpFlags =
       Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
@@ -3430,32 +3635,15 @@
   assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
          "unexpected offset in global node");
 
-  // This also catched the large code model case for Darwin.
+  // This also catches the large code model case for Darwin.
   if ((OpFlags & AArch64II::MO_GOT) != 0) {
-    SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
-    // FIXME: Once remat is capable of dealing with instructions with register
-    // operands, expand this into two nodes instead of using a wrapper node.
-    return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr);
+    return getGOT(GN, DAG);
   }
 
   if (getTargetMachine().getCodeModel() == CodeModel::Large) {
-    const unsigned char MO_NC = AArch64II::MO_NC;
-    return DAG.getNode(
-        AArch64ISD::WrapperLarge, DL, PtrVT,
-        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G3),
-        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G2 | MO_NC),
-        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G1 | MO_NC),
-        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G0 | MO_NC));
+    return getAddrLarge(GN, DAG);
   } else {
-    // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and
-    // the only correct model on Darwin.
-    SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
-                                            OpFlags | AArch64II::MO_PAGE);
-    unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC;
-    SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags);
-
-    SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
-    return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+    return getAddr(GN, DAG);
   }
 }
 
@@ -4232,90 +4420,37 @@
   // Jump table entries as PC relative offsets. No additional tweaking
   // is necessary here. Just get the address of the jump table.
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-  EVT PtrVT = getPointerTy(DAG.getDataLayout());
-  SDLoc DL(Op);
 
   if (getTargetMachine().getCodeModel() == CodeModel::Large &&
       !Subtarget->isTargetMachO()) {
-    const unsigned char MO_NC = AArch64II::MO_NC;
-    return DAG.getNode(
-        AArch64ISD::WrapperLarge, DL, PtrVT,
-        DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G3),
-        DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G2 | MO_NC),
-        DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G1 | MO_NC),
-        DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
-                               AArch64II::MO_G0 | MO_NC));
+    return getAddrLarge(JT, DAG);
   }
-
-  SDValue Hi =
-      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_PAGE);
-  SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
-                                      AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
-  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
-  return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+  return getAddr(JT, DAG);
 }
 
 SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
                                                  SelectionDAG &DAG) const {
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-  EVT PtrVT = getPointerTy(DAG.getDataLayout());
-  SDLoc DL(Op);
 
   if (getTargetMachine().getCodeModel() == CodeModel::Large) {
     // Use the GOT for the large code model on iOS.
     if (Subtarget->isTargetMachO()) {
-      SDValue GotAddr = DAG.getTargetConstantPool(
-          CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
-          AArch64II::MO_GOT);
-      return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr);
+      return getGOT(CP, DAG);
     }
-
-    const unsigned char MO_NC = AArch64II::MO_NC;
-    return DAG.getNode(
-        AArch64ISD::WrapperLarge, DL, PtrVT,
-        DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
-                                  CP->getOffset(), AArch64II::MO_G3),
-        DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
-                                  CP->getOffset(), AArch64II::MO_G2 | MO_NC),
-        DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
-                                  CP->getOffset(), AArch64II::MO_G1 | MO_NC),
-        DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
-                                  CP->getOffset(), AArch64II::MO_G0 | MO_NC));
+    return getAddrLarge(CP, DAG);
   } else {
-    // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on
-    // ELF, the only valid one on Darwin.
-    SDValue Hi =
-        DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
-                                  CP->getOffset(), AArch64II::MO_PAGE);
-    SDValue Lo = DAG.getTargetConstantPool(
-        CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
-        AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
-
-    SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
-    return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+    return getAddr(CP, DAG);
   }
 }
 
 SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
                                                SelectionDAG &DAG) const {
-  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
-  EVT PtrVT = getPointerTy(DAG.getDataLayout());
-  SDLoc DL(Op);
+  BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
   if (getTargetMachine().getCodeModel() == CodeModel::Large &&
       !Subtarget->isTargetMachO()) {
-    const unsigned char MO_NC = AArch64II::MO_NC;
-    return DAG.getNode(
-        AArch64ISD::WrapperLarge, DL, PtrVT,
-        DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G3),
-        DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G2 | MO_NC),
-        DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G1 | MO_NC),
-        DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G0 | MO_NC));
+    return getAddrLarge(BA, DAG);
   } else {
-    SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGE);
-    SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGEOFF |
-                                                             AArch64II::MO_NC);
-    SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
-    return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+    return getAddr(BA, DAG);
   }
 }
 
Index: lib/Target/AArch64/AArch64InstrAtomics.td
===================================================================
--- lib/Target/AArch64/AArch64InstrAtomics.td
+++ lib/Target/AArch64/AArch64InstrAtomics.td
@@ -14,6 +14,9 @@
 //===----------------------------------
 // Atomic fences
 //===----------------------------------
+let AddedComplexity = 15, Size = 0 in
+def CompilerBarrier : Pseudo<(outs), (ins i32imm:$ordering),
+                             [(atomic_fence imm:$ordering, 0)]>, Sched<[]>;
 def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>;
 def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>;
 
Index: lib/Target/AArch64/AArch64InstructionSelector.cpp
===================================================================
--- lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -41,12 +41,17 @@
 
 namespace {
 
+#define GET_GLOBALISEL_PREDICATE_BITSET
+#include "AArch64GenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATE_BITSET
+
 class AArch64InstructionSelector : public InstructionSelector {
 public:
   AArch64InstructionSelector(const AArch64TargetMachine &TM,
                              const AArch64Subtarget &STI,
                              const AArch64RegisterBankInfo &RBI);
 
+  void beginFunction(const MachineFunction &MF) override;
   bool select(MachineInstr &I) const override;
 
 private:
@@ -70,6 +75,12 @@
   const AArch64InstrInfo &TII;
   const AArch64RegisterInfo &TRI;
   const AArch64RegisterBankInfo &RBI;
+  bool ForCodeSize;
+
+  PredicateBitset AvailableFeatures;
+  PredicateBitset
+  computeAvailableFeatures(const MachineFunction *MF,
+                           const AArch64Subtarget *Subtarget) const;
 
 // We declare the temporaries used by selectImpl() in the class to minimize the
 // cost of constructing placeholder values.
@@ -88,7 +99,7 @@
     const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
     const AArch64RegisterBankInfo &RBI)
     : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
-      TRI(*STI.getRegisterInfo()), RBI(RBI)
+      TRI(*STI.getRegisterInfo()), RBI(RBI), ForCodeSize(), AvailableFeatures()
 #define GET_GLOBALISEL_TEMPORARIES_INIT
 #include "AArch64GenGlobalISel.inc"
 #undef GET_GLOBALISEL_TEMPORARIES_INIT
@@ -567,6 +578,12 @@
   return true;
 }
 
+void AArch64InstructionSelector::beginFunction(
+    const MachineFunction &MF) {
+  ForCodeSize = MF.getFunction()->optForSize();
+  AvailableFeatures = computeAvailableFeatures(&MF, &STI);
+}
+
 bool AArch64InstructionSelector::select(MachineInstr &I) const {
   assert(I.getParent() && "Instruction should be in a basic block!");
   assert(I.getParent()->getParent() && "Instruction should be in a function!");
Index: lib/Target/AArch64/AArch64SchedFalkor.td
===================================================================
--- lib/Target/AArch64/AArch64SchedFalkor.td
+++ lib/Target/AArch64/AArch64SchedFalkor.td
@@ -79,14 +79,14 @@
 def : WriteRes<WriteBr,    [FalkorUnitB]> { let Latency = 1; }
 def : WriteRes<WriteBrReg, [FalkorUnitB]> { let Latency = 1; }
 def : WriteRes<WriteLD,    [FalkorUnitLD]> { let Latency = 3; }
-def : WriteRes<WriteST,    [FalkorUnitLD, FalkorUnitST, FalkorUnitSD]>
-      { let Latency = 3; let NumMicroOps = 3; }
+def : WriteRes<WriteST,    [FalkorUnitST, FalkorUnitSD]>
+      { let Latency = 0; let NumMicroOps = 2; }
 def : WriteRes<WriteSTP,   [FalkorUnitST, FalkorUnitSD]>
       { let Latency = 0; let NumMicroOps = 2; }
-def : WriteRes<WriteAdr,   [FalkorUnitXYZ]> { let Latency = 5; }
+def : WriteRes<WriteAdr,   [FalkorUnitXYZ]> { let Latency = 1; }
 def : WriteRes<WriteLDIdx, [FalkorUnitLD]> { let Latency = 5; }
-def : WriteRes<WriteSTIdx, [FalkorUnitLD, FalkorUnitST, FalkorUnitSD]>
-      { let Latency = 4; let NumMicroOps = 3; }
+def : WriteRes<WriteSTIdx, [FalkorUnitST, FalkorUnitSD]>
+      { let Latency = 0; let NumMicroOps = 2; }
 def : WriteRes<WriteF,     [FalkorUnitVXVY, FalkorUnitVXVY]>
       { let Latency = 3; let NumMicroOps = 2; }
 def : WriteRes<WriteFCmp,  [FalkorUnitVXVY]> { let Latency = 2; }
Index: lib/Target/AArch64/AArch64SchedFalkorDetails.td
===================================================================
--- lib/Target/AArch64/AArch64SchedFalkorDetails.td
+++ lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -265,6 +265,12 @@
 // Arithmetic and Logical Instructions
 // -----------------------------------------------------------------------------
 def : InstRW<[FalkorWr_ADD],          (instregex "^ADD(S)?(W|X)r(s|x)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^AND(S)?(W|X)r(i|r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^BIC(S)?(W|X)r(r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^EON(W|X)r(r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^EOR(W|X)r(i|r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ORN(W|X)r(r|s)$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ORR(W|X)r(i|r|s)$")>;
 def : InstRW<[FalkorWr_2XYZ_2cyc],    (instregex "^SUB(S)?(W|X)r(s|x)$")>;
 
 // SIMD Miscellaneous Instructions
@@ -320,6 +326,10 @@
 
 // SIMD Store Instructions
 // -----------------------------------------------------------------------------
+def : InstRW<[WriteVST],              (instregex "^STP(D|S)(i)$")>;
+def : InstRW<[WriteVST, WriteAdr],    (instregex "^STP(D|S)(post|pre)$")>;
+def : InstRW<[FalkorWr_2XYZ_2ST_2VSD_0cyc], (instregex "^STRQro(W|X)$")>;
+
 def : InstRW<[WriteVST],                                                        (instregex "^ST1(One(v8b|v4h|v2s|v1d)(_POST)?|(i8|i16|i32|i64)(_POST)?|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
 def : InstRW<[WriteVST],                                                        (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>;
 def : InstRW<[WriteVST, WriteAdr],                                              (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
@@ -415,6 +425,7 @@
 
 def : InstRW<[FalkorWr_1VXVY_5cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)(H|S)rrr$")>;
 def : InstRW<[FalkorWr_1VXVY_6cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)Drrr$")>;
+
 // FP Miscellaneous Instructions
 // -----------------------------------------------------------------------------
 def : InstRW<[FalkorWr_FMOV],         (instregex "^FMOV(HW|HX|SW|DX|DXHigh)r$")>;
@@ -427,7 +438,6 @@
 
 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(S|U)CVTF(v2i64|v4i32|v8i16|v2f64|v4f32|v8f16)(_shift)?")>;
 
-
 // Load Instructions
 // -----------------------------------------------------------------------------
 def : InstRW<[FalkorWr_1ST_0cyc],     (instrs PRFMui, PRFMl)>;
@@ -455,6 +465,7 @@
 def : InstRW<[WriteLD, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
 def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi],(instrs LDPSWi)>;
 def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi, WriteAdr],(instregex "^LDPSW(post|pre)$")>;
+
 // Miscellaneous Data-Processing Instructions
 // -----------------------------------------------------------------------------
 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(S|U)?BFM(W|X)ri$")>;
@@ -496,28 +507,30 @@
 
 def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>;
 def : InstRW<[WriteVST],              (instrs STNPDi, STNPSi)>;
-def : InstRW<[WriteSTP],               (instrs STNPWi, STNPXi)>;
+def : InstRW<[WriteSTP],              (instrs STNPWi, STNPXi)>;
 def : InstRW<[FalkorWr_2LD_1Z_3cyc],  (instrs ERET)>;
 
-def : InstRW<[WriteST],               (instregex "^LDC.*$")>;
-def : InstRW<[WriteST],               (instregex "^STLR(B|H|W|X)$")>;
-def : InstRW<[WriteST],               (instregex "^STXP(W|X)$")>;
-def : InstRW<[WriteST],               (instregex "^STXR(B|H|W|X)$")>;
+def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>;
+def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STLR(B|H|W|X)$")>;
+def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXP(W|X)$")>;
+def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXR(B|H|W|X)$")>;
 
-def : InstRW<[WriteSTX],              (instregex "^STLXP(W|X)$")>;
-def : InstRW<[WriteSTX],              (instregex "^STLXR(B|H|W|X)$")>;
+def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXP(W|X)$")>;
+def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXR(B|H|W|X)$")>;
 def : InstRW<[WriteVST, WriteVST],    (instrs STNPQi)>;
 
 // Store Instructions
 // -----------------------------------------------------------------------------
-def : InstRW<[WriteVST],              (instregex "^STP(D|S)(i|post|pre)$")>;
-def : InstRW<[WriteST],               (instregex "^STP(W|X)(i|post|pre)$")>;
+def : InstRW<[WriteST],               (instregex "^STP(W|X)i$")>;
+def : InstRW<[WriteST, WriteAdr],     (instregex "^STP(W|X)(post|pre)$")>;
 def : InstRW<[WriteST],               (instregex "^STR(Q|D|S|BB|HH)ui$")>;
 def : InstRW<[WriteST],               (instregex "^STUR(Q|D|S|BB|HH)i$")>;
-def : InstRW<[WriteST],               (instregex "^STR(B|H|W|X)(post|pre|ui)$")>;
+def : InstRW<[WriteST],               (instregex "^STR(B|H|W|X)ui$")>;
+def : InstRW<[WriteST, WriteAdr],     (instregex "^STR(B|H|W|X)(post|pre)$")>;
 def : InstRW<[WriteST],               (instregex "^STTR(B|H|W|X)i$")>;
 def : InstRW<[WriteST],               (instregex "^STUR(B|H|W|X)i$")>;
 
 def : InstRW<[WriteST, WriteAdr],     (instregex "^STR(B|H|W|X)ro(W|X)$")>;
 
-def : InstRW<[WriteVST, WriteVST],    (instregex "^STPQ(i|post|pre)$")>;
+def : InstRW<[WriteVST, WriteVST],           (instregex "^STPQi$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^STPQ(post|pre)$")>;
Index: lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
===================================================================
--- lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
+++ lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
@@ -28,7 +28,6 @@
 //===----------------------------------------------------------------------===//
 // Define 1 micro-op types
 
-
 def FalkorWr_1X_2cyc    : SchedWriteRes<[FalkorUnitX]>   { let Latency = 2; }
 def FalkorWr_1X_4cyc    : SchedWriteRes<[FalkorUnitX]>   { let Latency = 4; }
 def FalkorWr_1X_5cyc    : SchedWriteRes<[FalkorUnitX]>   { let Latency = 5; }
@@ -175,18 +174,33 @@
 //===----------------------------------------------------------------------===//
 // Define 3 micro-op types
 
+def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
+                                               FalkorUnitLD]> {
+  let Latency = 0;
+  let NumMicroOps = 3;
+}
+
+def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
+                                               FalkorUnitLD]> {
+  let Latency = 3;
+  let NumMicroOps = 3;
+}
+
 def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
   let Latency = 3;
   let NumMicroOps = 3;
 }
+
 def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
   let Latency = 4;
   let NumMicroOps = 3;
 }
+
 def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
   let Latency = 5;
   let NumMicroOps = 3;
 }
+
 def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
   let Latency = 6;
   let NumMicroOps = 3;
@@ -196,10 +210,12 @@
   let Latency = 4;
   let NumMicroOps = 3;
 }
+
 def FalkorWr_2LD_1none_3cyc  : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
   let Latency = 3;
   let NumMicroOps = 3;
 }
+
 def FalkorWr_3LD_3cyc        : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
                                               FalkorUnitLD]> {
   let Latency = 3;
@@ -259,6 +275,12 @@
   let NumMicroOps = 4;
 }
 
+def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST,
+                                              FalkorUnitSD, FalkorUnitLD]> {
+  let Latency = 3;
+  let NumMicroOps = 4;
+}
+
 //===----------------------------------------------------------------------===//
 // Define 5 micro-op types
 
@@ -289,6 +311,13 @@
   let NumMicroOps = 6;
 }
 
+def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
+                                                FalkorUnitVSD, FalkorUnitXYZ,
+                                                FalkorUnitST, FalkorUnitVSD]> {
+  let Latency = 0;
+  let NumMicroOps = 6;
+}
+
 //===----------------------------------------------------------------------===//
 // Define 8 micro-op types
 
Index: lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
===================================================================
--- lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCRegisterInfo.h"
@@ -275,6 +276,12 @@
     }
   }
 
+  if (Opcode == AArch64::CompilerBarrier) {
+    O << '\t' << MAI.getCommentString() << " COMPILER BARRIER";
+    printAnnotation(O, Annot);
+    return;
+  }
+
   if (!printAliasInstr(MI, STI, O))
     printInstruction(MI, STI, O);
 
Index: lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
===================================================================
--- lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -565,6 +565,9 @@
     MCFixupKind Fixup = MCFixupKind(AArch64::fixup_aarch64_tlsdesc_call);
     Fixups.push_back(MCFixup::create(0, MI.getOperand(0).getExpr(), Fixup));
     return;
+  } else if (MI.getOpcode() == AArch64::CompilerBarrier) {
+    // This just prevents the compiler from reordering accesses, no actual code.
+    return;
   }
 
   uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2315,12 +2315,13 @@
 
   SelectionDAG &DAG = DCI.DAG;
   SDValue Op = Node24->getOperand(OpIdx);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT VT = Op.getValueType();
 
   APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24);
   APInt KnownZero, KnownOne;
   TargetLowering::TargetLoweringOpt TLO(DAG, true, true);
-  if (TLO.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI))
+  if (TLI.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI, TLO))
     return true;
 
   return false;
@@ -3361,7 +3362,7 @@
       TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
                                             !DCI.isBeforeLegalizeOps());
       const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-      if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
+      if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) ||
           TLI.SimplifyDemandedBits(BitsFrom, Demanded,
                                    KnownZero, KnownOne, TLO)) {
         DCI.CommitTargetLoweringOpt(TLO);
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4696,7 +4696,7 @@
   TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
                                         !DCI.isBeforeLegalizeOps());
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  if (TLO.ShrinkDemandedConstant(Src, Demanded) ||
+  if (TLI.ShrinkDemandedConstant(Src, Demanded, TLO) ||
       TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) {
     DCI.CommitTargetLoweringOpt(TLO);
   }
Index: lib/Target/ARM/ARMCallLowering.cpp
===================================================================
--- lib/Target/ARM/ARMCallLowering.cpp
+++ lib/Target/ARM/ARMCallLowering.cpp
@@ -35,7 +35,8 @@
 static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI,
                             Type *T) {
   EVT VT = TLI.getValueType(DL, T, true);
-  if (!VT.isSimple() || VT.isVector())
+  if (!VT.isSimple() || VT.isVector() ||
+      !(VT.isInteger() || VT.isFloatingPoint()))
     return false;
 
   unsigned VTSize = VT.getSimpleVT().getSizeInBits();
Index: lib/Target/ARM/ARMConstantIslandPass.cpp
===================================================================
--- lib/Target/ARM/ARMConstantIslandPass.cpp
+++ lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1741,10 +1741,9 @@
           .add(MI->getOperand(1));
       MI->eraseFromParent();
       MadeChange = true;
-    }
-    if (MI->getOpcode() == ARM::tPUSH &&
-        MI->getOperand(2).getReg() == ARM::LR &&
-        MI->getNumExplicitOperands() == 3) {
+    } else if (MI->getOpcode() == ARM::tPUSH &&
+               MI->getOperand(2).getReg() == ARM::LR &&
+               MI->getNumExplicitOperands() == 3) {
       // Just remove the push.
       MI->eraseFromParent();
       MadeChange = true;
@@ -2158,6 +2157,15 @@
 
       // If we're in PIC mode, there should be another ADD following.
       auto *TRI = STI->getRegisterInfo();
+
+      // %base cannot be redefined after the load as it will appear before
+      // TBB/TBH like:
+      //      %base =
+      //      %base =
+      //      tBB %base, %idx
+      if (registerDefinedBetween(BaseReg, Load->getNextNode(), MBB->end(), TRI))
+        continue;
+
       if (isPositionIndependentOrROPI) {
         MachineInstr *Add = Load->getNextNode();
         if (Add->getOpcode() != ARM::tADDrr ||
Index: lib/Target/ARM/ARMISelDAGToDAG.cpp
===================================================================
--- lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -539,11 +539,11 @@
     SDValue NewMulConst;
     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
       HandleSDNode Handle(N);
+      SDLoc Loc(N);
       replaceDAGValue(N.getOperand(1), NewMulConst);
       BaseReg = Handle.getValue();
-      Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
-                                                          PowerOfTwo),
-                                      SDLoc(N), MVT::i32);
+      Opc = CurDAG->getTargetConstant(
+          ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
       return true;
     }
   }
@@ -1854,6 +1854,14 @@
   return Opc; // If not one we handle, return it unchanged.
 }
 
+/// Returns true if the given increment is a Constant known to be equal to the
+/// access size performed by a NEON load/store. This means the "[rN]!" form can
+/// be used.
+static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
+  auto C = dyn_cast<ConstantSDNode>(Inc);
+  return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
+}
+
 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
                                 const uint16_t *DOpcodes,
                                 const uint16_t *QOpcodes0,
@@ -1921,13 +1929,13 @@
       SDValue Inc = N->getOperand(AddrOpIdx + 1);
       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
       // case entirely when the rest are updated to that form, too.
-      if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
+      bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
+      if ((NumVecs <= 2) && !IsImmUpdate)
         Opc = getVLDSTRegisterUpdateOpcode(Opc);
       // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
       // check for that explicitly too. Horribly hacky, but temporary.
-      if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
-          !isa<ConstantSDNode>(Inc.getNode()))
-        Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+      if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate)
+        Ops.push_back(IsImmUpdate ? Reg0 : Inc);
     }
     Ops.push_back(Pred);
     Ops.push_back(Reg0);
@@ -2075,11 +2083,12 @@
       SDValue Inc = N->getOperand(AddrOpIdx + 1);
       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
       // case entirely when the rest are updated to that form, too.
-      if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+      bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
+      if (NumVecs <= 2 && !IsImmUpdate)
         Opc = getVLDSTRegisterUpdateOpcode(Opc);
       // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
       // check for that explicitly too. Horribly hacky, but temporary.
-      if  (!isa<ConstantSDNode>(Inc.getNode()))
+      if  (!IsImmUpdate)
         Ops.push_back(Inc);
       else if (NumVecs > 2 && !isVSTfixed(Opc))
         Ops.push_back(Reg0);
@@ -2209,7 +2218,9 @@
   Ops.push_back(Align);
   if (isUpdating) {
     SDValue Inc = N->getOperand(AddrOpIdx + 1);
-    Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+    bool IsImmUpdate =
+        isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
+    Ops.push_back(IsImmUpdate ? Reg0 : Inc);
   }
 
   SDValue SuperReg;
@@ -2313,9 +2324,11 @@
     // fixed-stride update instructions don't have an explicit writeback
     // operand. It's implicit in the opcode itself.
     SDValue Inc = N->getOperand(2);
-    if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+    bool IsImmUpdate =
+        isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
+    if (NumVecs <= 2 && !IsImmUpdate)
       Opc = getVLDSTRegisterUpdateOpcode(Opc);
-    if (!isa<ConstantSDNode>(Inc.getNode()))
+    if (!IsImmUpdate)
       Ops.push_back(Inc);
     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
     else if (NumVecs > 2)
Index: lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- lib/Target/ARM/ARMISelLowering.cpp
+++ lib/Target/ARM/ARMISelLowering.cpp
@@ -3358,8 +3358,12 @@
 
 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
                                  const ARMSubtarget *Subtarget) {
-  // FIXME: handle "fence singlethread" more efficiently.
   SDLoc dl(Op);
+  ConstantSDNode *ScopeN = cast<ConstantSDNode>(Op.getOperand(2));
+  auto Scope = static_cast<SynchronizationScope>(ScopeN->getZExtValue());
+  if (Scope == SynchronizationScope::SingleThread)
+    return Op;
+
   if (!Subtarget->hasDataBarrier()) {
     // Some ARMv6 cpus can support data barriers with an mcr instruction.
     // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
@@ -9476,8 +9480,11 @@
       return SDValue();
   }
 
-  // Don't generate vpaddl+vmovn; we'll match it to vpadd later.
-  if (Vec.getValueType().getVectorElementType() == VT.getVectorElementType())
+  // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also don't try
+  // to handle an i8 -> i32 situation (or similar). vpaddl can only double the
+  // size.
+  if (2 * Vec.getValueType().getVectorElementType().getSizeInBits() !=
+      VT.getVectorElementType().getSizeInBits())
     return SDValue();
 
   // Create VPADDL node.
@@ -10873,11 +10880,8 @@
 
     // If the increment is a constant, it must match the memory ref size.
     SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
-    if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
-      uint64_t IncVal = CInc->getZExtValue();
-      if (IncVal != NumBytes)
-        continue;
-    } else if (NumBytes >= 3 * 16) {
+    ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
+    if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) {
       // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
       // separate instructions that make it harder to use a non-constant update.
       continue;
Index: lib/Target/ARM/ARMInstrInfo.td
===================================================================
--- lib/Target/ARM/ARMInstrInfo.td
+++ lib/Target/ARM/ARMInstrInfo.td
@@ -5975,3 +5975,10 @@
                              (ins GPR:$addr, GPRPair:$desired, GPRPair:$new),
                              NoItinerary, []>, Sched<[]>;
 }
+
+def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary,
+                                 [(atomic_fence imm:$ordering, 0)]> {
+  let hasSideEffects = 1;
+  let Size = 0;
+  let AsmString = "@ COMPILER BARRIER";
+}
Index: lib/Target/ARM/ARMInstrThumb.td
===================================================================
--- lib/Target/ARM/ARMInstrThumb.td
+++ lib/Target/ARM/ARMInstrThumb.td
@@ -953,7 +953,7 @@
   /// These opcodes will be converted to the real non-S opcodes by
   /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
   let hasPostISelHook = 1, Defs = [CPSR] in {
-    let isCommutable = 1 in
+    let isCommutable = 1, Uses = [CPSR] in
     def tADCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
                             2, IIC_iALUr,
                             [(set tGPR:$Rdn, CPSR, (ARMadde tGPR:$Rn, tGPR:$Rm,
@@ -1292,6 +1292,7 @@
 /// These opcodes will be converted to the real non-S opcodes by
 /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
 let hasPostISelHook = 1, Defs = [CPSR] in {
+  let Uses = [CPSR] in
   def tSBCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
                           2, IIC_iALUr,
                           [(set tGPR:$Rdn, CPSR, (ARMsube tGPR:$Rn, tGPR:$Rm,
Index: lib/Target/ARM/ARMInstructionSelector.cpp
===================================================================
--- lib/Target/ARM/ARMInstructionSelector.cpp
+++ lib/Target/ARM/ARMInstructionSelector.cpp
@@ -47,12 +47,9 @@
   unsigned SrcReg = I.getOperand(1).getReg();
   const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
   (void)SrcSize;
-  assert((DstSize == SrcSize ||
-          // Copies are a means to setup initial types, the number of
-          // bits may not exactly match.
-          (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
-           DstSize <= SrcSize)) &&
-         "Copy with different width?!");
+  // We use copies for trunc, so it's ok for the size of the destination to be
+  // smaller (the higher bits will just be undefined).
+  assert(DstSize <= SrcSize && "Copy with different width?!");
 
   assert((RegBank->getID() == ARM::GPRRegBankID ||
           RegBank->getID() == ARM::FPRRegBankID) &&
@@ -294,6 +291,28 @@
     }
     break;
   }
+  case G_TRUNC: {
+    // The high bits are undefined, so there's nothing special to do, just
+    // treat it as a copy.
+    auto SrcReg = I.getOperand(1).getReg();
+    auto DstReg = I.getOperand(0).getReg();
+
+    const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
+    const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+
+    if (SrcRegBank.getID() != DstRegBank.getID()) {
+      DEBUG(dbgs() << "G_TRUNC operands on different register banks\n");
+      return false;
+    }
+
+    if (SrcRegBank.getID() != ARM::GPRRegBankID) {
+      DEBUG(dbgs() << "G_TRUNC on non-GPR not supported yet\n");
+      return false;
+    }
+
+    I.setDesc(TII.get(COPY));
+    return selectCopy(I, TII, MRI, TRI, RBI);
+  }
   case G_ADD:
   case G_GEP:
     I.setDesc(TII.get(ARM::ADDrr));
Index: lib/Target/ARM/ARMRegisterBankInfo.cpp
===================================================================
--- lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -223,6 +223,7 @@
   case G_MUL:
   case G_SEXT:
   case G_ZEXT:
+  case G_TRUNC:
   case G_GEP:
     // FIXME: We're abusing the fact that everything lives in a GPR for now; in
     // the real world we would use different mappings.
Index: lib/Target/Mips/Relocation.txt
===================================================================
--- /dev/null
+++ lib/Target/Mips/Relocation.txt
@@ -0,0 +1,125 @@
+MIPS Relocation Principles
+
+In LLVM, there are several elements of the llvm::ISD::NodeType enum
+that deal with addresses and/or relocations. These are defined in
+include/llvm/Target/TargetSelectionDAG.td, namely:
+    GlobalAddress, GlobalTLSAddress, JumpTable, ConstantPool,
+    ExternalSymbol, BlockAddress
+The MIPS backend uses several principles to handle these.
+
+1. Code for lowering addresses references to machine dependent code is
+factored into common code for generating different address forms and
+is called by the relocation model specific lowering function, using
+templated functions. For example:
+
+  // lib/Target/Mips/MipsISelLowering.cpp
+  SDValue MipsTargetLowering::
+  lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+
+calls
+
+  template <class NodeTy> // lib/Target/Mips/MipsISelLowering.h
+  SDValue getAddrLocal(NodeTy *N, const SDLoc &DL, EVT Ty,
+                       SelectionDAG &DAG, bool IsN32OrN64) const
+
+which calls the overloaded function:
+
+  // lib/Target/Mips/MipsISelLowering.h
+  SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
+                        unsigned Flag) const;
+
+2. Generic address nodes are lowered to some combination of target
+independent and machine specific SDNodes (for example:
+MipsISD::{Highest, Higher, Hi, Lo}) depending upon relocation model,
+ABI, and compilation options.
+
+The choice of specific instructions that are to be used is delegated
+to ISel which in turn relies on TableGen patterns to choose subtarget
+specific instructions. For example, in getAddrLocal, the pseudo-code
+generated is:
+
+  (add (load (wrapper $gp, %got(sym)), %lo(sym))
+
+where "%lo" represents an instance of an SDNode with opcode
+"MipsISD::Lo", "wrapper" indicates one with opcode "MipsISD::Wrapper",
+and "%got" the global table pointer "getGlobalReg(...)". The "add" is
+"ISD::ADD", not a target dependent one.
+
+3. A TableGen multiclass pattern "MipsHiLoRelocs" is used to define a
+template pattern parameterized over the load upper immediate
+instruction, add operation, the zero register, and register class.
+Here the instantiation of MipsHiLoRelocs in MipsInstrInfo.td is used
+to MIPS32 to compute addresses for the static relocation model.
+
+  // lib/Target/Mips/MipsInstrInfo.td
+  multiclass MipsHiLoRelocs<Instruction Lui, Instruction Addiu,
+                            Register ZeroReg, RegisterOperand GPROpnd> {
+    def : MipsPat<(MipsHi tglobaladdr:$in), (Lui tglobaladdr:$in)>;
+    ...
+    def : MipsPat<(MipsLo tglobaladdr:$in), (Addiu ZeroReg, tglobaladdr:$in)>;
+    ...
+    def : MipsPat<(add GPROpnd:$hi, (MipsLo tglobaladdr:$lo)),
+                (Addiu GPROpnd:$hi, tglobaladdr:$lo)>;
+    ...
+  }
+  defm : MipsHiLoRelocs<LUi, ADDiu, ZERO, GPR32Opnd>;
+
+  // lib/Target/Mips/Mips64InstrInfo.td
+  defm : MipsHiLoRelocs<LUi64, DADDiu, ZERO_64, GPR64Opnd>, SYM_32;
+
+The instantiation in Mips64InstrInfo.td is used for MIPS64 in ILP32
+mode, as guarded by the predicate "SYM_32" and also for a submode of
+LP64 where symbols are assumed to be 32 bits wide. A similar
+multiclass for MIPS64 in LP64 mode is also defined:
+
+  // lib/Target/Mips/Mips64InstrInfo.td
+  multiclass MipsHighestHigherHiLoRelocs<Instruction Lui,
+                                         Instruction Daddiu> {
+  ...
+    def : MipsPat<(MipsHighest (i64 tglobaladdr:$in)),
+                  (Lui tglobaladdr:$in)>;
+  ...
+    def : MipsPat<(MipsHigher (i64 tglobaladdr:$in)),
+                  (Daddiu ZERO_64, tglobaladdr:$in)>;
+  ...
+    def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tglobaladdr:$lo))),
+                  (Daddiu GPR64:$hi, tglobaladdr:$lo)>;
+  ...
+    def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaladdr:$lo))),
+                  (Daddiu GPR64:$hi, tglobaladdr:$lo)>;
+  ...
+    def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaladdr:$lo))),
+                  (Daddiu GPR64:$hi, tglobaladdr:$lo)>;
+  }
+
+and it is instantiated twice:
+
+  // lib/Target/Mips/Mips64InstrInfo.td
+  defm : MipsHighestHigherHiLoRelocs<LUi64, DADDiu>, SYM_64;
+  // lib/Target/Mips/MicroMips64r6InstrInfo.td
+  defm : MipsHighestHigherHiLoRelocs<LUi64, DADDIU_MM64R6>, SYM_64,
+                                     ISA_MICROMIPS64R6;
+
+These patterns are used during instruction selection to match
+MipsISD::{Highest, Higher, Hi, Lo} to a specific machine instruction
+and operands.
+
+More details on how multiclasses in TableGen work can be found in the
+section "Multiclass definitions and instances" in the document
+"TableGen Language Introduction"
+
+4. Instruction definitions are multiply defined to cover the different
+register classes. In some cases, such as LW/LW64, this also accounts
+for the difference in the results of instruction execution. On MIPS32,
+"lw" loads a 32 bit value from memory. On MIPS64, "lw" loads a 32 bit
+value from memory and sign extends the value to 64 bits.
+
+  // lib/Target/Mips/MipsInstrInfo.td
+  def LUi   : MMRel, LoadUpper<"lui", GPR32Opnd, uimm16_relaxed>, LUI_FM;
+  // lib/Target/Mips/Mips64InstrInfo.td
+  def LUi64   : LoadUpper<"lui", GPR64Opnd, uimm16_64_relaxed>, LUI_FM;
+
+defines two names "LUi" and "LUi64" with two different register
+classes, but with the same encoding---"LUI_FM". These instructions load a
+16-bit immediate into bits 31-16 and clear the lower 15 bits. On MIPS64,
+the result is sign-extended to 64 bits.
Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -2977,10 +2977,10 @@
             SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
           SDValue Chain = LD->getChain();
           SDValue Ops[] = { Base, Offset, Chain };
-          SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
-                                              N->getValueType(0), Ops);
           MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
           MemOp[0] = LD->getMemOperand();
+          SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
+                                              N->getValueType(0), Ops);
           cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1);
           return;
         }
Index: lib/Target/WebAssembly/known_gcc_test_failures.txt
===================================================================
--- lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -1,5 +1,15 @@
 # Tests which are known to fail from the GCC torture test suite.
 
+# Syntax: Each line has a single test to be marked as a 'known failure' (or
+# 'exclusion'. Known failures are expected to fail, and will cause an error if
+# they pass. (Known failures that do not run at all will not cause an
+# error). The format is
+# <name> <attributes> # comment
+#
+# The attributes in this case represent the different arguments used to
+# compiler: 'wasm-s' is for compiling to .s files, and 'wasm-o' for compiling
+# to wasm object files (.o).
+
 # Computed gotos are not supported (Cannot select BlockAddress/BRIND)
 20040302-1.c
 20071210-1.c
@@ -66,3 +76,21 @@
 920728-1.c
 pr28865.c
 widechar-2.c
+
+# crash: Running pass 'WebAssembly Explicit Locals' on function
+20020107-1.c wasm-o
+20030222-1.c wasm-o
+20071220-1.c wasm-o
+20071220-2.c wasm-o
+990130-1.c wasm-o
+pr38533.c wasm-o
+pr41239.c wasm-o
+pr43385.c wasm-o
+pr43560.c wasm-o
+pr45695.c wasm-o
+pr49279.c wasm-o
+pr49390.c wasm-o
+pr52286.c wasm-o
+
+# fatal error: error in backend: data symbols must have a size set with .size
+921110-1.c wasm-o
Index: lib/Target/X86/X86.h
===================================================================
--- lib/Target/X86/X86.h
+++ lib/Target/X86/X86.h
@@ -95,7 +95,8 @@
 /// encoding when possible in order to reduce code size.
 FunctionPass *createX86EvexToVexInsts();
 
-InstructionSelector *createX86InstructionSelector(X86Subtarget &,
+InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
+                                                  X86Subtarget &,
                                                   X86RegisterBankInfo &);
 
 void initializeEvexToVexInstPassPass(PassRegistry &);
Index: lib/Target/X86/X86.td
===================================================================
--- lib/Target/X86/X86.td
+++ lib/Target/X86/X86.td
@@ -273,6 +273,16 @@
           "fast-shld-rotate", "HasFastSHLDRotate", "true",
           "SHLD can be used as a faster rotate">;
 
+// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
+// "string operations"). See "REP String Enhancement" in the Intel Software
+// Development Manual. This feature essentially means that REP MOVSB will copy
+// using the largest available size instead of copying bytes one by one, making
+// it at least as fast as REPMOVS{W,D,Q}.
+def FeatureERMSB
+    : SubtargetFeature<
+          "ermsb", "HasERMSB", "true",
+          "REP MOVS/STOS are fast">;
+
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
 //===----------------------------------------------------------------------===//
@@ -498,6 +508,7 @@
   FeatureAVX2,
   FeatureBMI,
   FeatureBMI2,
+  FeatureERMSB,
   FeatureFMA,
   FeatureLZCNT,
   FeatureMOVBE,
Index: lib/Target/X86/X86FrameLowering.h
===================================================================
--- lib/Target/X86/X86FrameLowering.h
+++ lib/Target/X86/X86FrameLowering.h
@@ -100,6 +100,8 @@
   int getFrameIndexReference(const MachineFunction &MF, int FI,
                              unsigned &FrameReg) const override;
 
+  int getFrameIndexReferenceSP(const MachineFunction &MF,
+                               int FI, unsigned &SPReg, int Adjustment) const;
   int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
                                      unsigned &FrameReg,
                                      bool IgnoreSPUpdates) const override;
Index: lib/Target/X86/X86FrameLowering.cpp
===================================================================
--- lib/Target/X86/X86FrameLowering.cpp
+++ lib/Target/X86/X86FrameLowering.cpp
@@ -1783,6 +1783,14 @@
   return Offset + FPDelta;
 }
 
+int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF,
+                                               int FI, unsigned &FrameReg,
+                                               int Adjustment) const {
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  FrameReg = TRI->getStackRegister();
+  return MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + Adjustment;
+}
+
 int
 X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
                                                  int FI, unsigned &FrameReg,
@@ -1839,9 +1847,6 @@
   assert(MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta() >= 0 &&
          "we don't handle this case!");
 
-  // Fill in FrameReg output argument.
-  FrameReg = TRI->getStackRegister();
-
   // This is how the math works out:
   //
   //  %rsp grows (i.e. gets lower) left to right. Each box below is
@@ -1866,12 +1871,8 @@
   // (C - E) == (C - A) - (B - A) + (B - E)
   //            { Using [1], [2] and [3] above }
   //         == getObjectOffset - LocalAreaOffset + StackSize
-  //
-
-  // Get the Offset from the StackPointer
-  int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
 
-  return Offset + StackSize;
+  return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
 }
 
 bool X86FrameLowering::assignCalleeSavedSpillSlots(
Index: lib/Target/X86/X86ISelDAGToDAG.cpp
===================================================================
--- lib/Target/X86/X86ISelDAGToDAG.cpp
+++ lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1311,8 +1311,9 @@
       ++Cost;
     // If the base is a register with multiple uses, this
     // transformation may save a mov.
-    if ((AM.BaseType == X86ISelAddressMode::RegBase &&
-         AM.Base_Reg.getNode() &&
+    // FIXME: Don't rely on DELETED_NODEs.
+    if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
+         AM.Base_Reg->getOpcode() != ISD::DELETED_NODE &&
          !AM.Base_Reg.getNode()->hasOneUse()) ||
         AM.BaseType == X86ISelAddressMode::FrameIndexBase)
       --Cost;
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -16069,7 +16069,7 @@
   unsigned EltBits = EltVT.getSizeInBits();
   // For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
   APInt MaskElt =
-    IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits);
+    IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits);
   const fltSemantics &Sem =
       EltVT == MVT::f64 ? APFloat::IEEEdouble() :
           (IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle());
@@ -16132,9 +16132,9 @@
   // The mask constants are automatically splatted for vector types.
   unsigned EltSizeInBits = VT.getScalarSizeInBits();
   SDValue SignMask = DAG.getConstantFP(
-      APFloat(Sem, APInt::getSignBit(EltSizeInBits)), dl, LogicVT);
+      APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT);
   SDValue MagMask = DAG.getConstantFP(
-      APFloat(Sem, ~APInt::getSignBit(EltSizeInBits)), dl, LogicVT);
+      APFloat(Sem, ~APInt::getSignMask(EltSizeInBits)), dl, LogicVT);
 
   // First, clear all bits but the sign bit from the second operand (sign).
   if (IsFakeVector)
@@ -17344,10 +17344,10 @@
   // bits of the inputs before performing those operations.
   if (FlipSigns) {
     MVT EltVT = VT.getVectorElementType();
-    SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), dl,
+    SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl,
                                  VT);
-    Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB);
-    Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB);
+    Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM);
+    Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM);
   }
 
   SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
@@ -22111,11 +22111,11 @@
   }
 
   // i64 vector arithmetic shift can be emulated with the transform:
-  // M = lshr(SIGN_BIT, Amt)
+  // M = lshr(SIGN_MASK, Amt)
   // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
   if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) &&
       Op.getOpcode() == ISD::SRA) {
-    SDValue S = DAG.getConstant(APInt::getSignBit(64), dl, VT);
+    SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT);
     SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
     R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
     R = DAG.getNode(ISD::XOR, dl, VT, R, M);
@@ -30152,7 +30152,7 @@
           // x s< 0 ? x^C : 0 --> subus x, C
           if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
               ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
-              OpRHSConst->getAPIntValue().isSignBit())
+              OpRHSConst->getAPIntValue().isSignMask())
             // Note that we have to rebuild the RHS constant here to ensure we
             // don't rely on particular values of undef lanes.
             return DAG.getNode(
@@ -30203,11 +30203,11 @@
       return SDValue();
 
     assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
-    APInt DemandedMask(APInt::getSignBit(BitWidth));
+    APInt DemandedMask(APInt::getSignMask(BitWidth));
     APInt KnownZero, KnownOne;
     TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
                                           DCI.isBeforeLegalizeOps());
-    if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) ||
+    if (TLI.ShrinkDemandedConstant(Cond, DemandedMask, TLO) ||
         TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne,
                                  TLO)) {
       // If we changed the computation somewhere in the DAG, this change will
@@ -33428,8 +33428,8 @@
   SDValue Op0 = peekThroughBitcasts(Op.getOperand(0));
 
   unsigned EltBits = Op1.getScalarValueSizeInBits();
-  auto isSignBitValue = [&](const ConstantFP *C) {
-    return C->getValueAPF().bitcastToAPInt() == APInt::getSignBit(EltBits);
+  auto isSignMask = [&](const ConstantFP *C) {
+    return C->getValueAPF().bitcastToAPInt() == APInt::getSignMask(EltBits);
   };
 
   // There is more than one way to represent the same constant on
@@ -33440,21 +33440,21 @@
   // We check all variants here.
   if (Op1.getOpcode() == X86ISD::VBROADCAST) {
     if (auto *C = getTargetConstantFromNode(Op1.getOperand(0)))
-      if (isSignBitValue(cast<ConstantFP>(C)))
+      if (isSignMask(cast<ConstantFP>(C)))
         return Op0;
 
   } else if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1)) {
     if (ConstantFPSDNode *CN = BV->getConstantFPSplatNode())
-      if (isSignBitValue(CN->getConstantFPValue()))
+      if (isSignMask(CN->getConstantFPValue()))
         return Op0;
 
   } else if (auto *C = getTargetConstantFromNode(Op1)) {
     if (C->getType()->isVectorTy()) {
       if (auto *SplatV = C->getSplatValue())
-        if (isSignBitValue(cast<ConstantFP>(SplatV)))
+        if (isSignMask(cast<ConstantFP>(SplatV)))
           return Op0;
     } else if (auto *FPConst = dyn_cast<ConstantFP>(C))
-      if (isSignBitValue(FPConst))
+      if (isSignMask(FPConst))
         return Op0;
   }
   return SDValue();
@@ -33777,7 +33777,7 @@
     TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
                                           !DCI.isBeforeLegalizeOps());
     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-    if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
+    if (TLI.ShrinkDemandedConstant(Op1, DemandedMask, TLO) ||
         TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
       DCI.CommitTargetLoweringOpt(TLO);
   }
Index: lib/Target/X86/X86InstrInfo.td
===================================================================
--- lib/Target/X86/X86InstrInfo.td
+++ lib/Target/X86/X86InstrInfo.td
@@ -897,6 +897,7 @@
 def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
 def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
 def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
+def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
 def HasMFence    : Predicate<"Subtarget->hasMFence()">;
 
 //===----------------------------------------------------------------------===//
Index: lib/Target/X86/X86InstructionSelector.cpp
===================================================================
--- lib/Target/X86/X86InstructionSelector.cpp
+++ lib/Target/X86/X86InstructionSelector.cpp
@@ -39,11 +39,16 @@
 
 namespace {
 
+#define GET_GLOBALISEL_PREDICATE_BITSET
+#include "X86GenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATE_BITSET
+
 class X86InstructionSelector : public InstructionSelector {
 public:
-  X86InstructionSelector(const X86Subtarget &STI,
+  X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI,
                          const X86RegisterBankInfo &RBI);
 
+  void beginFunction(const MachineFunction &MF) override;
   bool select(MachineInstr &I) const override;
 
 private:
@@ -70,10 +75,17 @@
   bool selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI,
                    MachineFunction &MF) const;
 
+  const X86TargetMachine &TM;
   const X86Subtarget &STI;
   const X86InstrInfo &TII;
   const X86RegisterInfo &TRI;
   const X86RegisterBankInfo &RBI;
+  bool OptForSize;
+  bool OptForMinSize;
+
+  PredicateBitset AvailableFeatures;
+  PredicateBitset computeAvailableFeatures(const MachineFunction *MF,
+                                           const X86Subtarget *Subtarget) const;
 
 #define GET_GLOBALISEL_TEMPORARIES_DECL
 #include "X86GenGlobalISel.inc"
@@ -86,10 +98,12 @@
 #include "X86GenGlobalISel.inc"
 #undef GET_GLOBALISEL_IMPL
 
-X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI,
+X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM,
+                                               const X86Subtarget &STI,
                                                const X86RegisterBankInfo &RBI)
-    : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()),
-      TRI(*STI.getRegisterInfo()), RBI(RBI)
+    : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
+      TRI(*STI.getRegisterInfo()), RBI(RBI), OptForSize(false),
+      OptForMinSize(false), AvailableFeatures()
 #define GET_GLOBALISEL_TEMPORARIES_INIT
 #include "X86GenGlobalISel.inc"
 #undef GET_GLOBALISEL_TEMPORARIES_INIT
@@ -181,6 +195,12 @@
   return true;
 }
 
+void X86InstructionSelector::beginFunction(const MachineFunction &MF) {
+  OptForSize = MF.getFunction()->optForSize();
+  OptForMinSize = MF.getFunction()->optForMinSize();
+  AvailableFeatures = computeAvailableFeatures(&MF, &STI);
+}
+
 bool X86InstructionSelector::select(MachineInstr &I) const {
   assert(I.getParent() && "Instruction should be in a basic block!");
   assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -571,7 +591,8 @@
 }
 
 InstructionSelector *
-llvm::createX86InstructionSelector(X86Subtarget &Subtarget,
+llvm::createX86InstructionSelector(const X86TargetMachine &TM,
+                                   X86Subtarget &Subtarget,
                                    X86RegisterBankInfo &RBI) {
-  return new X86InstructionSelector(Subtarget, RBI);
+  return new X86InstructionSelector(TM, Subtarget, RBI);
 }
Index: lib/Target/X86/X86RegisterInfo.cpp
===================================================================
--- lib/Target/X86/X86RegisterInfo.cpp
+++ lib/Target/X86/X86RegisterInfo.cpp
@@ -669,32 +669,27 @@
   MachineFunction &MF = *MI.getParent()->getParent();
   const X86FrameLowering *TFI = getFrameLowering(MF);
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
-  unsigned BasePtr;
 
-  unsigned Opc = MI.getOpcode();
-  bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm ||
-                    Opc == X86::TCRETURNmi || Opc == X86::TCRETURNmi64;
-
-  if (hasBasePointer(MF))
-    BasePtr = (FrameIndex < 0 ? FramePtr : getBaseRegister());
-  else if (needsStackRealignment(MF))
-    BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
-  else if (AfterFPPop)
-    BasePtr = StackPtr;
-  else
-    BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
+  // Determine base register and offset.
+  int FIOffset;
+  unsigned BasePtr;
+  if (MI.isReturn()) {
+    assert(MF.getFrameInfo().isFixedObjectIndex(FrameIndex) &&
+           "Should only reference fixed stack objects here");
+    FIOffset = TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0);
+  } else {
+    FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr);
+  }
 
   // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
   // simple FP case, and doesn't work with stack realignment. On 32-bit, the
   // offset is from the traditional base pointer location.  On 64-bit, the
   // offset is from the SP at the end of the prologue, not the FP location. This
   // matches the behavior of llvm.frameaddress.
-  unsigned IgnoredFrameReg;
+  unsigned Opc = MI.getOpcode();
   if (Opc == TargetOpcode::LOCAL_ESCAPE) {
     MachineOperand &FI = MI.getOperand(FIOperandNum);
-    int Offset;
-    Offset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
-    FI.ChangeToImmediate(Offset);
+    FI.ChangeToImmediate(FIOffset);
     return;
   }
 
@@ -710,15 +705,6 @@
   // FrameIndex with base register.  Add an offset to the offset.
   MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false);
 
-  // Now add the frame object offset to the offset from EBP.
-  int FIOffset;
-  if (AfterFPPop) {
-    // Tail call jmp happens after FP is popped.
-    const MachineFrameInfo &MFI = MF.getFrameInfo();
-    FIOffset = MFI.getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
-  } else
-    FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
-
   if (BasePtr == StackPtr)
     FIOffset += SPAdj;
 
Index: lib/Target/X86/X86SelectionDAGInfo.cpp
===================================================================
--- lib/Target/X86/X86SelectionDAGInfo.cpp
+++ lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -106,7 +106,6 @@
   SDValue Count;
   ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
   unsigned BytesLeft = 0;
-  bool TwoRepStos = false;
   if (ValC) {
     unsigned ValReg;
     uint64_t Val = ValC->getZExtValue() & 255;
@@ -163,20 +162,7 @@
   SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
   Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
 
-  if (TwoRepStos) {
-    InFlag = Chain.getValue(1);
-    Count  = Size;
-    EVT CVT = Count.getValueType();
-    SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
-                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, dl,
-                                               CVT));
-    Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
-                             Left, InFlag);
-    InFlag = Chain.getValue(1);
-    Tys = DAG.getVTList(MVT::Other, MVT::Glue);
-    SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
-    Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
-  } else if (BytesLeft) {
+  if (BytesLeft) {
     // Handle the last 1 - 7 bytes.
     unsigned Offset = SizeVal - BytesLeft;
     EVT AddrVT = Dst.getValueType();
@@ -195,6 +181,24 @@
   return Chain;
 }
 
+namespace {
+
+// Represents a cover of a buffer of SizeVal bytes with blocks of size
+// AVT, as well as how many bytes remain (BytesLeft is always smaller than
+// the block size).
+struct RepMovsRepeats {
+  RepMovsRepeats(const uint64_t SizeVal, const MVT& AVT) {
+    const unsigned UBytes = AVT.getSizeInBits() / 8;
+    Count = SizeVal / UBytes;
+    BytesLeft = SizeVal % UBytes;
+  }
+
+  unsigned Count;
+  unsigned BytesLeft;
+};
+
+}  // namespace
+
 SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
     SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
@@ -229,7 +233,12 @@
     return SDValue();
 
   MVT AVT;
-  if (Align & 1)
+  if (Subtarget.hasERMSB())
+    // If the target has enhanced REPMOVSB, then it's at least as fast to use
+    // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle
+    // BytesLeft.
+    AVT = MVT::i8;
+  else if (Align & 1)
     AVT = MVT::i8;
   else if (Align & 2)
     AVT = MVT::i16;
@@ -240,14 +249,18 @@
     // QWORD aligned
     AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
 
-  unsigned UBytes = AVT.getSizeInBits() / 8;
-  unsigned CountVal = SizeVal / UBytes;
-  SDValue Count = DAG.getIntPtrConstant(CountVal, dl);
-  unsigned BytesLeft = SizeVal % UBytes;
+  RepMovsRepeats Repeats(SizeVal, AVT);
+  if (Repeats.BytesLeft > 0 &&
+      DAG.getMachineFunction().getFunction()->optForMinSize()) {
+    // When agressively optimizing for size, avoid generating the code to handle
+    // BytesLeft.
+    AVT = MVT::i8;
+    Repeats = RepMovsRepeats(SizeVal, AVT);
+  }
 
   SDValue InFlag;
   Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
-                           Count, InFlag);
+                           DAG.getIntPtrConstant(Repeats.Count, dl), InFlag);
   InFlag = Chain.getValue(1);
   Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
                            Dst, InFlag);
@@ -262,9 +275,9 @@
 
   SmallVector<SDValue, 4> Results;
   Results.push_back(RepMovs);
-  if (BytesLeft) {
+  if (Repeats.BytesLeft) {
     // Handle the last 1 - 7 bytes.
-    unsigned Offset = SizeVal - BytesLeft;
+    unsigned Offset = SizeVal - Repeats.BytesLeft;
     EVT DstVT = Dst.getValueType();
     EVT SrcVT = Src.getValueType();
     EVT SizeVT = Size.getValueType();
@@ -275,7 +288,8 @@
                                     DAG.getNode(ISD::ADD, dl, SrcVT, Src,
                                                 DAG.getConstant(Offset, dl,
                                                                 SrcVT)),
-                                    DAG.getConstant(BytesLeft, dl, SizeVT),
+                                    DAG.getConstant(Repeats.BytesLeft, dl,
+                                                    SizeVT),
                                     Align, isVolatile, AlwaysInline, false,
                                     DstPtrInfo.getWithOffset(Offset),
                                     SrcPtrInfo.getWithOffset(Offset)));
Index: lib/Target/X86/X86Subtarget.h
===================================================================
--- lib/Target/X86/X86Subtarget.h
+++ lib/Target/X86/X86Subtarget.h
@@ -232,6 +232,9 @@
   /// True if SHLD based rotate is fast.
   bool HasFastSHLDRotate;
 
+  /// True if the processor has enhanced REP MOVSB/STOSB.
+  bool HasERMSB;
+
   /// True if the short functions should be padded to prevent
   /// a stall when returning too early.
   bool PadShortFunctions;
@@ -472,6 +475,7 @@
   bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
   bool hasFastLZCNT() const { return HasFastLZCNT; }
   bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
+  bool hasERMSB() const { return HasERMSB; }
   bool hasSlowDivide32() const { return HasSlowDivide32; }
   bool hasSlowDivide64() const { return HasSlowDivide64; }
   bool padShortFunctions() const { return PadShortFunctions; }
Index: lib/Target/X86/X86Subtarget.cpp
===================================================================
--- lib/Target/X86/X86Subtarget.cpp
+++ lib/Target/X86/X86Subtarget.cpp
@@ -303,6 +303,7 @@
   HasFastVectorFSQRT = false;
   HasFastLZCNT = false;
   HasFastSHLDRotate = false;
+  HasERMSB = false;
   HasSlowDivide32 = false;
   HasSlowDivide64 = false;
   PadShortFunctions = false;
Index: lib/Target/X86/X86TargetMachine.cpp
===================================================================
--- lib/Target/X86/X86TargetMachine.cpp
+++ lib/Target/X86/X86TargetMachine.cpp
@@ -286,7 +286,7 @@
 
     auto *RBI = new X86RegisterBankInfo(*I->getRegisterInfo());
     GISel->RegBankInfo.reset(RBI);
-    GISel->InstSelector.reset(createX86InstructionSelector(*I, *RBI));
+    GISel->InstSelector.reset(createX86InstructionSelector(*this, *I, *RBI));
 #endif
     I->setGISelAccessor(*GISel);
   }
Index: lib/Target/XCore/XCoreISelLowering.cpp
===================================================================
--- lib/Target/XCore/XCoreISelLowering.cpp
+++ lib/Target/XCore/XCoreISelLowering.cpp
@@ -1605,7 +1605,7 @@
         TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
                                               !DCI.isBeforeLegalizeOps());
         const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-        if (TLO.ShrinkDemandedConstant(OutVal, DemandedMask) ||
+        if (TLI.ShrinkDemandedConstant(OutVal, DemandedMask, TLO) ||
             TLI.SimplifyDemandedBits(OutVal, DemandedMask, KnownZero, KnownOne,
                                      TLO))
           DCI.CommitTargetLoweringOpt(TLO);
@@ -1622,7 +1622,7 @@
         TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
                                               !DCI.isBeforeLegalizeOps());
         const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-        if (TLO.ShrinkDemandedConstant(Time, DemandedMask) ||
+        if (TLI.ShrinkDemandedConstant(Time, DemandedMask, TLO) ||
             TLI.SimplifyDemandedBits(Time, DemandedMask, KnownZero, KnownOne,
                                      TLO))
           DCI.CommitTargetLoweringOpt(TLO);
Index: lib/Transforms/InstCombine/InstCombineAddSub.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1044,14 +1044,14 @@
 
   const APInt *RHSC;
   if (match(RHS, m_APInt(RHSC))) {
-    if (RHSC->isSignBit()) {
+    if (RHSC->isSignMask()) {
       // If wrapping is not allowed, then the addition must set the sign bit:
-      // X + (signbit) --> X | signbit
+      // X + (signmask) --> X | signmask
       if (I.hasNoSignedWrap() || I.hasNoUnsignedWrap())
         return BinaryOperator::CreateOr(LHS, RHS);
 
       // If wrapping is allowed, then the addition flips the sign bit of LHS:
-      // X + (signbit) --> X ^ signbit
+      // X + (signmask) --> X ^ signmask
       return BinaryOperator::CreateXor(LHS, RHS);
     }
 
@@ -1120,9 +1120,9 @@
           return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
                                            XorLHS);
       }
-      // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C,
-      // transform them into (X + (signbit ^ C))
-      if (XorRHS->getValue().isSignBit())
+      // (X + signmask) + C could have gotten canonicalized to (X^signmask) + C,
+      // transform them into (X + (signmask ^ C))
+      if (XorRHS->getValue().isSignMask())
         return BinaryOperator::CreateAdd(XorLHS,
                                          ConstantExpr::getXor(XorRHS, CI));
     }
@@ -1385,39 +1385,55 @@
   // integer add followed by a promotion.
   if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
     Value *LHSIntVal = LHSConv->getOperand(0);
+    Type *FPType = LHSConv->getType();
+
+    // TODO: This check is overly conservative. In many cases known bits
+    // analysis can tell us that the result of the addition has less significant
+    // bits than the integer type can hold.
+    auto IsValidPromotion = [](Type *FTy, Type *ITy) {
+      // Do we have enough bits in the significand to represent the result of
+      // the integer addition?
+      unsigned MaxRepresentableBits =
+          APFloat::semanticsPrecision(FTy->getFltSemantics());
+      return ITy->getIntegerBitWidth() <= MaxRepresentableBits;
+    };
 
     // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
     // ... if the constant fits in the integer value.  This is useful for things
     // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
     // requires a constant pool load, and generally allows the add to be better
     // instcombined.
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
-      Constant *CI =
-      ConstantExpr::getFPToSI(CFP, LHSIntVal->getType());
-      if (LHSConv->hasOneUse() &&
-          ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
-          WillNotOverflowSignedAdd(LHSIntVal, CI, I)) {
-        // Insert the new integer add.
-        Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
-                                              CI, "addconv");
-        return new SIToFPInst(NewAdd, I.getType());
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
+      if (IsValidPromotion(FPType, LHSIntVal->getType())) {
+        Constant *CI =
+          ConstantExpr::getFPToSI(CFP, LHSIntVal->getType());
+        if (LHSConv->hasOneUse() &&
+            ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
+            WillNotOverflowSignedAdd(LHSIntVal, CI, I)) {
+          // Insert the new integer add.
+          Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
+                                                CI, "addconv");
+          return new SIToFPInst(NewAdd, I.getType());
+        }
       }
-    }
 
     // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
     if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
       Value *RHSIntVal = RHSConv->getOperand(0);
-
-      // Only do this if x/y have the same type, if at least one of them has a
-      // single use (so we don't increase the number of int->fp conversions),
-      // and if the integer add will not overflow.
-      if (LHSIntVal->getType() == RHSIntVal->getType() &&
-          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
-          WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) {
-        // Insert the new integer add.
-        Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
-                                              RHSIntVal, "addconv");
-        return new SIToFPInst(NewAdd, I.getType());
+      // It's enough to check LHS types only because we require int types to
+      // be the same for this transform.
+      if (IsValidPromotion(FPType, LHSIntVal->getType())) {
+        // Only do this if x/y have the same type, if at least one of them has a
+        // single use (so we don't increase the number of int->fp conversions),
+        // and if the integer add will not overflow.
+        if (LHSIntVal->getType() == RHSIntVal->getType() &&
+            (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+            WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) {
+          // Insert the new integer add.
+          Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
+                                                RHSIntVal, "addconv");
+          return new SIToFPInst(NewAdd, I.getType());
+        }
       }
     }
   }
Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2480,8 +2480,8 @@
             Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
             return BinaryOperator::CreateSub(SubOne(NegOp0CI),
                                              Op0I->getOperand(0));
-          } else if (RHSC->getValue().isSignBit()) {
-            // (X + C) ^ signbit -> (X + C + signbit)
+          } else if (RHSC->getValue().isSignMask()) {
+            // (X + C) ^ signmask -> (X + C + signmask)
             Constant *C = Builder->getInt(RHSC->getValue() + Op0CI->getValue());
             return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
 
Index: lib/Transforms/InstCombine/InstCombineCompares.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -140,7 +140,7 @@
   case ICmpInst::ICMP_UGE:
     // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
     TrueIfSigned = true;
-    return RHS.isSignBit();
+    return RHS.isSignMask();
   default:
     return false;
   }
@@ -1532,14 +1532,14 @@
   }
 
   if (Xor->hasOneUse()) {
-    // (icmp u/s (xor X SignBit), C) -> (icmp s/u X, (xor C SignBit))
-    if (!Cmp.isEquality() && XorC->isSignBit()) {
+    // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask))
+    if (!Cmp.isEquality() && XorC->isSignMask()) {
       Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
                             : Cmp.getSignedPredicate();
       return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), *C ^ *XorC));
     }
 
-    // (icmp u/s (xor X ~SignBit), C) -> (icmp s/u X, (xor C ~SignBit))
+    // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask))
     if (!Cmp.isEquality() && XorC->isMaxSignedValue()) {
       Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
                             : Cmp.getSignedPredicate();
@@ -2402,9 +2402,9 @@
   const APInt &Upper = CR.getUpper();
   const APInt &Lower = CR.getLower();
   if (Cmp.isSigned()) {
-    if (Lower.isSignBit())
+    if (Lower.isSignMask())
       return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper));
-    if (Upper.isSignBit())
+    if (Upper.isSignMask())
       return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower));
   } else {
     if (Lower.isMinValue())
@@ -2604,7 +2604,7 @@
         break;
 
       // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
-      if (BOC->isSignBit()) {
+      if (BOC->isSignMask()) {
         Constant *Zero = Constant::getNullValue(BOp0->getType());
         auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
         return new ICmpInst(NewPred, BOp0, Zero);
@@ -3032,9 +3032,9 @@
       if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
         return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
                             BO1->getOperand(0));
-      // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+      // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
       if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
-        if (CI->getValue().isSignBit()) {
+        if (CI->getValue().isSignMask()) {
           ICmpInst::Predicate Pred =
               I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate();
           return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
@@ -3797,7 +3797,7 @@
 static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth,
                                     bool isSignCheck) {
   if (isSignCheck)
-    return APInt::getSignBit(BitWidth);
+    return APInt::getSignMask(BitWidth);
 
   ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
   if (!CI) return APInt::getAllOnesValue(BitWidth);
Index: lib/Transforms/InstCombine/InstCombineInternal.h
===================================================================
--- lib/Transforms/InstCombine/InstCombineInternal.h
+++ lib/Transforms/InstCombine/InstCombineInternal.h
@@ -551,9 +551,10 @@
                                          unsigned Depth, Instruction *CxtI);
   /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
   /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence.
-  Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl,
-                                    const APInt &DemandedMask, APInt &KnownZero,
-                                    APInt &KnownOne);
+  Value *simplifyShrShlDemandedBits(
+      Instruction *Shr, const APInt &ShrOp1, Instruction *Shl,
+      const APInt &ShlOp1, const APInt &DemandedMask, APInt &KnownZero,
+      APInt &KnownOne);
 
   /// \brief Tries to simplify operands to an integer instruction based on its
   /// demanded bits.
Index: lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1237,7 +1237,7 @@
 
   // If the sign bits of both operands are zero (i.e. we can prove they are
   // unsigned inputs), turn this into a udiv.
-  APInt Mask(APInt::getSignBit(I.getType()->getScalarSizeInBits()));
+  APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits()));
   if (MaskedValueIsZero(Op0, Mask, 0, &I)) {
     if (MaskedValueIsZero(Op1, Mask, 0, &I)) {
       // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
@@ -1543,7 +1543,7 @@
 
   // If the sign bits of both operands are zero (i.e. we can prove they are
   // unsigned inputs), turn this into a urem.
-  APInt Mask(APInt::getSignBit(I.getType()->getScalarSizeInBits()));
+  APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits()));
   if (MaskedValueIsZero(Op1, Mask, 0, &I) &&
       MaskedValueIsZero(Op0, Mask, 0, &I)) {
     // X srem Y -> X urem Y, iff X and Y don't have sign bit set
Index: lib/Transforms/InstCombine/InstCombineSelect.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -618,7 +618,7 @@
   {
     unsigned BitWidth =
         DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
-    APInt MinSignedValue = APInt::getSignBit(BitWidth);
+    APInt MinSignedValue = APInt::getSignedMinValue(BitWidth);
     Value *X;
     const APInt *Y, *C;
     bool TrueWhenUnset;
Index: lib/Transforms/InstCombine/InstCombineShifts.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -760,7 +760,7 @@
   }
 
   // See if we can turn a signed shr into an unsigned shr.
-  if (MaskedValueIsZero(Op0, APInt::getSignBit(BitWidth), 0, &I))
+  if (MaskedValueIsZero(Op0, APInt::getSignMask(BitWidth), 0, &I))
     return BinaryOperator::CreateLShr(Op0, Op1);
 
   return nullptr;
Index: lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -26,7 +26,7 @@
 /// constant integer. If so, check to see if there are any bits set in the
 /// constant that are not demanded. If so, shrink the constant and return true.
 static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
-                                   APInt Demanded) {
+                                   const APInt &Demanded) {
   assert(I && "No instruction?");
   assert(OpNo < I->getNumOperands() && "Operand index too large");
 
@@ -37,13 +37,11 @@
     return false;
 
   // If there are no bits set that aren't demanded, nothing to do.
-  Demanded = Demanded.zextOrTrunc(C->getBitWidth());
-  if ((~Demanded & *C) == 0)
+  if (C->isSubsetOf(Demanded))
     return false;
 
   // This instruction is producing bits that are not demanded. Shrink the RHS.
-  Demanded &= *C;
-  I->setOperand(OpNo, ConstantInt::get(Op->getType(), Demanded));
+  I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded));
 
   return true;
 }
@@ -117,27 +115,16 @@
       KnownOne.getBitWidth() == BitWidth &&
       "Value *V, DemandedMask, KnownZero and KnownOne "
       "must have same BitWidth");
-  const APInt *C;
-  if (match(V, m_APInt(C))) {
-    // We know all of the bits for a scalar constant or a splat vector constant!
-    KnownOne = *C;
-    KnownZero = ~KnownOne;
-    return nullptr;
-  }
-  if (isa<ConstantPointerNull>(V)) {
-    // We know all of the bits for a constant!
-    KnownOne.clearAllBits();
-    KnownZero.setAllBits();
+
+  if (isa<Constant>(V)) {
+    computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
     return nullptr;
   }
 
   KnownZero.clearAllBits();
   KnownOne.clearAllBits();
-  if (DemandedMask == 0) {   // Not demanding any bits from V.
-    if (isa<UndefValue>(V))
-      return nullptr;
+  if (DemandedMask == 0)     // Not demanding any bits from V.
     return UndefValue::get(VTy);
-  }
 
   if (Depth == 6)        // Limit search depth.
     return nullptr;
@@ -187,16 +174,14 @@
 
     // If the client is only demanding bits that we know, return the known
     // constant.
-    if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+    if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
       return Constant::getIntegerValue(VTy, IKnownOne);
 
     // If all of the demanded bits are known 1 on one side, return the other.
     // These bits cannot contribute to the result of the 'and'.
-    if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
-        (DemandedMask & ~LHSKnownZero))
+    if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne))
       return I->getOperand(0);
-    if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
-        (DemandedMask & ~RHSKnownZero))
+    if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne))
       return I->getOperand(1);
 
     // If the RHS is a constant, see if we can simplify it.
@@ -224,25 +209,14 @@
 
     // If the client is only demanding bits that we know, return the known
     // constant.
-    if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+    if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
       return Constant::getIntegerValue(VTy, IKnownOne);
 
     // If all of the demanded bits are known zero on one side, return the other.
     // These bits cannot contribute to the result of the 'or'.
-    if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
-        (DemandedMask & ~LHSKnownOne))
+    if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero))
       return I->getOperand(0);
-    if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
-        (DemandedMask & ~RHSKnownOne))
-      return I->getOperand(1);
-
-    // If all of the potentially set bits on one side are known to be set on
-    // the other side, just use the 'other' side.
-    if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
-        (DemandedMask & (~RHSKnownZero)))
-      return I->getOperand(0);
-    if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
-        (DemandedMask & (~LHSKnownZero)))
+    if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero))
       return I->getOperand(1);
 
     // If the RHS is a constant, see if we can simplify it.
@@ -271,20 +245,20 @@
 
     // If the client is only demanding bits that we know, return the known
     // constant.
-    if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+    if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
       return Constant::getIntegerValue(VTy, IKnownOne);
 
     // If all of the demanded bits are known zero on one side, return the other.
     // These bits cannot contribute to the result of the 'xor'.
-    if ((DemandedMask & RHSKnownZero) == DemandedMask)
+    if (DemandedMask.isSubsetOf(RHSKnownZero))
       return I->getOperand(0);
-    if ((DemandedMask & LHSKnownZero) == DemandedMask)
+    if (DemandedMask.isSubsetOf(LHSKnownZero))
       return I->getOperand(1);
 
     // If all of the demanded bits are known to be zero on one side or the
     // other, turn this into an *inclusive* or.
     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
-    if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
+    if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownZero)) {
       Instruction *Or =
         BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
                                  I->getName());
@@ -295,20 +269,28 @@
     // bits on that side are also known to be set on the other side, turn this
     // into an AND, as we know the bits will be cleared.
     //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
-    if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
-      // all known
-      if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
-        Constant *AndC = Constant::getIntegerValue(VTy,
-                                                   ~RHSKnownOne & DemandedMask);
-        Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
-        return InsertNewInstWith(And, *I);
-      }
+    if (DemandedMask.isSubsetOf(RHSKnownZero|RHSKnownOne) &&
+        RHSKnownOne.isSubsetOf(LHSKnownOne)) {
+      Constant *AndC = Constant::getIntegerValue(VTy,
+                                                 ~RHSKnownOne & DemandedMask);
+      Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
+      return InsertNewInstWith(And, *I);
     }
 
-    // If the RHS is a constant, see if we can simplify it.
-    // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
-    if (ShrinkDemandedConstant(I, 1, DemandedMask))
-      return I;
+    // If the RHS is a constant, see if we can change it. Don't alter a -1
+    // constant because that's a canonical 'not' op, and that is better for
+    // combining, SCEV, and codegen.
+    const APInt *C;
+    if (match(I->getOperand(1), m_APInt(C)) && !C->isAllOnesValue()) {
+      if ((*C | ~DemandedMask).isAllOnesValue()) {
+        // Force bits to 1 to create a 'not' op.
+        I->setOperand(1, ConstantInt::getAllOnesValue(VTy));
+        return I;
+      }
+      // If we can't turn this into a 'not', try to shrink the constant.
+      if (ShrinkDemandedConstant(I, 1, DemandedMask))
+        return I;
+    }
 
     // If our LHS is an 'and' and if it has one use, and if any of the bits we
     // are flipping are known to be set, then the xor is just resetting those
@@ -495,17 +477,15 @@
     computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
     break;
   }
-  case Instruction::Shl:
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      {
-        Value *VarX; ConstantInt *C1;
-        if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
-          Instruction *Shr = cast<Instruction>(I->getOperand(0));
-          Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
-                                                KnownZero, KnownOne);
-          if (R)
-            return R;
-        }
+  case Instruction::Shl: {
+    const APInt *SA;
+    if (match(I->getOperand(1), m_APInt(SA))) {
+      const APInt *ShrAmt;
+      if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt)))) {
+        Instruction *Shr = cast<Instruction>(I->getOperand(0));
+        if (Value *R = simplifyShrShlDemandedBits(
+                Shr, *ShrAmt, I, *SA, DemandedMask, KnownZero, KnownOne))
+          return R;
       }
 
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
@@ -529,9 +509,10 @@
         KnownZero.setLowBits(ShiftAmt);
     }
     break;
-  case Instruction::LShr:
-    // For a logical shift right
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+  }
+  case Instruction::LShr: {
+    const APInt *SA;
+    if (match(I->getOperand(1), m_APInt(SA))) {
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
 
       // Unsigned shift right.
@@ -552,7 +533,8 @@
         KnownZero.setHighBits(ShiftAmt);  // high bits known zero.
     }
     break;
-  case Instruction::AShr:
+  }
+  case Instruction::AShr: {
     // If this is an arithmetic shift right and only the low-bit is set, we can
     // always convert this into a logical shr, even if the shift amount is
     // variable.  The low bit of the shift cannot be an input sign bit unless
@@ -566,15 +548,16 @@
 
     // If the sign bit is the only bit demanded by this ashr, then there is no
     // need to do it, the shift doesn't change the high bit.
-    if (DemandedMask.isSignBit())
+    if (DemandedMask.isSignMask())
       return I->getOperand(0);
 
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+    const APInt *SA;
+    if (match(I->getOperand(1), m_APInt(SA))) {
       uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
 
       // Signed shift right.
       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
-      // If any of the "high bits" are demanded, we should set the sign bit as
+      // If any of the high bits are demanded, we should set the sign bit as
       // demanded.
       if (DemandedMask.countLeadingZeros() <= ShiftAmt)
         DemandedMaskIn.setSignBit();
@@ -587,6 +570,7 @@
       if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne,
                                Depth + 1))
         return I;
+
       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
       // Compute the new bits that are at the top now.
       APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
@@ -594,24 +578,24 @@
       KnownOne.lshrInPlace(ShiftAmt);
 
       // Handle the sign bits.
-      APInt SignBit(APInt::getSignBit(BitWidth));
+      APInt SignMask(APInt::getSignMask(BitWidth));
       // Adjust to where it is now in the mask.
-      SignBit.lshrInPlace(ShiftAmt);
+      SignMask.lshrInPlace(ShiftAmt);
 
       // If the input sign bit is known to be zero, or if none of the top bits
       // are demanded, turn this into an unsigned shift right.
       if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
-          (HighBits & ~DemandedMask) == HighBits) {
-        // Perform the logical shift right.
-        BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
-                                                            SA, I->getName());
-        NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
-        return InsertNewInstWith(NewVal, *I);
-      } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
+          !DemandedMask.intersects(HighBits)) {
+        BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0),
+                                                          I->getOperand(1));
+        LShr->setIsExact(cast<BinaryOperator>(I)->isExact());
+        return InsertNewInstWith(LShr, *I);
+      } else if (KnownOne.intersects(SignMask)) { // New bits are known one.
         KnownOne |= HighBits;
       }
     }
     break;
+  }
   case Instruction::SRem:
     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
       // X % -1 demands all the bits because we don't want to introduce
@@ -624,7 +608,7 @@
           return I->getOperand(0);
 
         APInt LowBits = RA - 1;
-        APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+        APInt Mask2 = LowBits | APInt::getSignMask(BitWidth);
         if (SimplifyDemandedBits(I, 0, Mask2, LHSKnownZero, LHSKnownOne,
                                  Depth + 1))
           return I;
@@ -635,12 +619,12 @@
 
         // If LHS is non-negative or has all low bits zero, then the upper bits
         // are all zero.
-        if (LHSKnownZero.isSignBitSet() || ((LHSKnownZero & LowBits) == LowBits))
+        if (LHSKnownZero.isSignBitSet() || LowBits.isSubsetOf(LHSKnownZero))
           KnownZero |= ~LowBits;
 
         // If LHS is negative and not all low bits are zero, then the upper bits
         // are all one.
-        if (LHSKnownOne.isSignBitSet() && ((LHSKnownOne & LowBits) != 0))
+        if (LHSKnownOne.isSignBitSet() && LowBits.intersects(LHSKnownOne))
           KnownOne |= ~LowBits;
 
         assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
@@ -744,7 +728,7 @@
 
   // If the client is only demanding bits that we know, return the known
   // constant.
-  if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+  if (DemandedMask.isSubsetOf(KnownZero|KnownOne))
     return Constant::getIntegerValue(VTy, KnownOne);
   return nullptr;
 }
@@ -783,17 +767,15 @@
 
     // If the client is only demanding bits that we know, return the known
     // constant.
-    if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+    if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
       return Constant::getIntegerValue(ITy, IKnownOne);
 
     // If all of the demanded bits are known 1 on one side, return the other.
     // These bits cannot contribute to the result of the 'and' in this
     // context.
-    if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
-        (DemandedMask & ~LHSKnownZero))
+    if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne))
       return I->getOperand(0);
-    if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
-        (DemandedMask & ~RHSKnownZero))
+    if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne))
       return I->getOperand(1);
 
     KnownZero = std::move(IKnownZero);
@@ -817,26 +799,15 @@
 
     // If the client is only demanding bits that we know, return the known
     // constant.
-    if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+    if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
       return Constant::getIntegerValue(ITy, IKnownOne);
 
     // If all of the demanded bits are known zero on one side, return the
     // other.  These bits cannot contribute to the result of the 'or' in this
     // context.
-    if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
-        (DemandedMask & ~LHSKnownOne))
-      return I->getOperand(0);
-    if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
-        (DemandedMask & ~RHSKnownOne))
-      return I->getOperand(1);
-
-    // If all of the potentially set bits on one side are known to be set on
-    // the other side, just use the 'other' side.
-    if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
-        (DemandedMask & (~RHSKnownZero)))
+    if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero))
       return I->getOperand(0);
-    if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
-        (DemandedMask & (~LHSKnownZero)))
+    if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero))
       return I->getOperand(1);
 
     KnownZero = std::move(IKnownZero);
@@ -861,14 +832,14 @@
 
     // If the client is only demanding bits that we know, return the known
     // constant.
-    if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+    if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
       return Constant::getIntegerValue(ITy, IKnownOne);
 
     // If all of the demanded bits are known zero on one side, return the
     // other.
-    if ((DemandedMask & RHSKnownZero) == DemandedMask)
+    if (DemandedMask.isSubsetOf(RHSKnownZero))
       return I->getOperand(0);
-    if ((DemandedMask & LHSKnownZero) == DemandedMask)
+    if (DemandedMask.isSubsetOf(LHSKnownZero))
       return I->getOperand(1);
 
     // Output known-0 bits are known if clear or set in both the LHS & RHS.
@@ -883,7 +854,7 @@
 
     // If this user is only demanding bits that we know, return the known
     // constant.
-    if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+    if (DemandedMask.isSubsetOf(KnownZero|KnownOne))
       return Constant::getIntegerValue(ITy, KnownOne);
 
     break;
@@ -910,20 +881,17 @@
 ///
 /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
 /// not successful.
-Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
-                                                Instruction *Shl,
-                                                const APInt &DemandedMask,
-                                                APInt &KnownZero,
-                                                APInt &KnownOne) {
-
-  const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
-  const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
+Value *
+InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
+                                         Instruction *Shl, const APInt &ShlOp1,
+                                         const APInt &DemandedMask,
+                                         APInt &KnownZero, APInt &KnownOne) {
   if (!ShlOp1 || !ShrOp1)
-      return nullptr; // Noop.
+    return nullptr; // No-op.
 
   Value *VarX = Shr->getOperand(0);
   Type *Ty = VarX->getType();
-  unsigned BitWidth = Ty->getIntegerBitWidth();
+  unsigned BitWidth = Ty->getScalarSizeInBits();
   if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
     return nullptr; // Undef.
 
Index: lib/Transforms/Scalar/ConstantHoisting.cpp
===================================================================
--- lib/Transforms/Scalar/ConstantHoisting.cpp
+++ lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -53,6 +53,12 @@
 STATISTIC(NumConstantsHoisted, "Number of constants hoisted");
 STATISTIC(NumConstantsRebased, "Number of constants rebased");
 
+static cl::opt<bool> ConstHoistWithBlockFrequency(
+    "consthoist-with-block-frequency", cl::init(false), cl::Hidden,
+    cl::desc("Enable the use of the block frequency analysis to reduce the "
+             "chance to execute const materialization more frequently than "
+             "without hoisting."));
+
 namespace {
 /// \brief The constant hoisting pass.
 class ConstantHoistingLegacyPass : public FunctionPass {
@@ -68,6 +74,8 @@
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
+    if (ConstHoistWithBlockFrequency)
+      AU.addRequired<BlockFrequencyInfoWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
   }
@@ -82,6 +90,7 @@
 char ConstantHoistingLegacyPass::ID = 0;
 INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist",
                       "Constant Hoisting", false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist",
@@ -99,9 +108,13 @@
   DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n");
   DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n');
 
-  bool MadeChange = Impl.runImpl(
-      Fn, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn),
-      getAnalysis<DominatorTreeWrapperPass>().getDomTree(), Fn.getEntryBlock());
+  bool MadeChange =
+      Impl.runImpl(Fn, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn),
+                   getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+                   ConstHoistWithBlockFrequency
+                       ? &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI()
+                       : nullptr,
+                   Fn.getEntryBlock());
 
   if (MadeChange) {
     DEBUG(dbgs() << "********** Function after Constant Hoisting: "
@@ -148,33 +161,142 @@
   return IDom->getBlock()->getTerminator();
 }
 
+/// \brief Given \p BBs as input, find another set of BBs which collectively
+/// dominates \p BBs and have the minimal sum of frequencies. Return the BB
+/// set found in \p BBs.
+void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
+                          BasicBlock *Entry,
+                          SmallPtrSet<BasicBlock *, 8> &BBs) {
+  assert(!BBs.count(Entry) && "Assume Entry is not in BBs");
+  // Nodes on the current path to the root.
+  SmallPtrSet<BasicBlock *, 8> Path;
+  // Candidates includes any block 'BB' in set 'BBs' that is not strictly
+  // dominated by any other blocks in set 'BBs', and all nodes in the path
+  // in the dominator tree from Entry to 'BB'.
+  SmallPtrSet<BasicBlock *, 16> Candidates;
+  for (auto BB : BBs) {
+    Path.clear();
+    // Walk up the dominator tree until Entry or another BB in BBs
+    // is reached. Insert the nodes on the way to the Path.
+    BasicBlock *Node = BB;
+    // The "Path" is a candidate path to be added into Candidates set.
+    bool isCandidate = false;
+    do {
+      Path.insert(Node);
+      if (Node == Entry || Candidates.count(Node)) {
+        isCandidate = true;
+        break;
+      }
+      assert(DT.getNode(Node)->getIDom() &&
+             "Entry doens't dominate current Node");
+      Node = DT.getNode(Node)->getIDom()->getBlock();
+    } while (!BBs.count(Node));
+
+    // If isCandidate is false, Node is another Block in BBs dominating
+    // current 'BB'. Drop the nodes on the Path.
+    if (!isCandidate)
+      continue;
+
+    // Add nodes on the Path into Candidates.
+    Candidates.insert(Path.begin(), Path.end());
+  }
+
+  // Sort the nodes in Candidates in top-down order and save the nodes
+  // in Orders.
+  unsigned Idx = 0;
+  SmallVector<BasicBlock *, 16> Orders;
+  Orders.push_back(Entry);
+  while (Idx != Orders.size()) {
+    BasicBlock *Node = Orders[Idx++];
+    for (auto ChildDomNode : DT.getNode(Node)->getChildren()) {
+      if (Candidates.count(ChildDomNode->getBlock()))
+        Orders.push_back(ChildDomNode->getBlock());
+    }
+  }
+
+  // Visit Orders in bottom-up order.
+  typedef std::pair<SmallPtrSet<BasicBlock *, 16>, BlockFrequency>
+      InsertPtsCostPair;
+  // InsertPtsMap is a map from a BB to the best insertion points for the
+  // subtree of BB (subtree not including the BB itself).
+  DenseMap<BasicBlock *, InsertPtsCostPair> InsertPtsMap;
+  InsertPtsMap.reserve(Orders.size() + 1);
+  for (auto RIt = Orders.rbegin(); RIt != Orders.rend(); RIt++) {
+    BasicBlock *Node = *RIt;
+    bool NodeInBBs = BBs.count(Node);
+    SmallPtrSet<BasicBlock *, 16> &InsertPts = InsertPtsMap[Node].first;
+    BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second;
+
+    // Return the optimal insert points in BBs.
+    if (Node == Entry) {
+      BBs.clear();
+      if (InsertPtsFreq > BFI.getBlockFreq(Node))
+        BBs.insert(Entry);
+      else
+        BBs.insert(InsertPts.begin(), InsertPts.end());
+      break;
+    }
+
+    BasicBlock *Parent = DT.getNode(Node)->getIDom()->getBlock();
+    // Initially, ParentInsertPts is empty and ParentPtsFreq is 0. Every child
+    // will update its parent's ParentInsertPts and ParentPtsFreq.
+    SmallPtrSet<BasicBlock *, 16> &ParentInsertPts = InsertPtsMap[Parent].first;
+    BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second;
+    // Choose to insert in Node or in subtree of Node.
+    if (InsertPtsFreq > BFI.getBlockFreq(Node) || NodeInBBs) {
+      ParentInsertPts.insert(Node);
+      ParentPtsFreq += BFI.getBlockFreq(Node);
+    } else {
+      ParentInsertPts.insert(InsertPts.begin(), InsertPts.end());
+      ParentPtsFreq += InsertPtsFreq;
+    }
+  }
+}
+
 /// \brief Find an insertion point that dominates all uses.
-Instruction *ConstantHoistingPass::findConstantInsertionPoint(
+SmallPtrSet<Instruction *, 8> ConstantHoistingPass::findConstantInsertionPoint(
     const ConstantInfo &ConstInfo) const {
   assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry.");
   // Collect all basic blocks.
   SmallPtrSet<BasicBlock *, 8> BBs;
+  SmallPtrSet<Instruction *, 8> InsertPts;
   for (auto const &RCI : ConstInfo.RebasedConstants)
     for (auto const &U : RCI.Uses)
       BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent());
 
-  if (BBs.count(Entry))
-    return &Entry->front();
+  if (BBs.count(Entry)) {
+    InsertPts.insert(&Entry->front());
+    return InsertPts;
+  }
+
+  if (BFI) {
+    findBestInsertionSet(*DT, *BFI, Entry, BBs);
+    for (auto BB : BBs) {
+      BasicBlock::iterator InsertPt = BB->begin();
+      for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
+        ;
+      InsertPts.insert(&*InsertPt);
+    }
+    return InsertPts;
+  }
 
   while (BBs.size() >= 2) {
     BasicBlock *BB, *BB1, *BB2;
     BB1 = *BBs.begin();
     BB2 = *std::next(BBs.begin());
     BB = DT->findNearestCommonDominator(BB1, BB2);
-    if (BB == Entry)
-      return &Entry->front();
+    if (BB == Entry) {
+      InsertPts.insert(&Entry->front());
+      return InsertPts;
+    }
     BBs.erase(BB1);
     BBs.erase(BB2);
     BBs.insert(BB);
   }
   assert((BBs.size() == 1) && "Expected only one element.");
   Instruction &FirstInst = (*BBs.begin())->front();
-  return findMatInsertPt(&FirstInst);
+  InsertPts.insert(findMatInsertPt(&FirstInst));
+  return InsertPts;
 }
 
 
@@ -557,29 +679,54 @@
   bool MadeChange = false;
   for (auto const &ConstInfo : ConstantVec) {
     // Hoist and hide the base constant behind a bitcast.
-    Instruction *IP = findConstantInsertionPoint(ConstInfo);
-    IntegerType *Ty = ConstInfo.BaseConstant->getType();
-    Instruction *Base =
-      new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP);
-    DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant << ") to BB "
-                 << IP->getParent()->getName() << '\n' << *Base << '\n');
-    NumConstantsHoisted++;
+    SmallPtrSet<Instruction *, 8> IPSet = findConstantInsertionPoint(ConstInfo);
+    assert(!IPSet.empty() && "IPSet is empty");
+
+    unsigned UsesNum = 0;
+    unsigned ReBasesNum = 0;
+    for (Instruction *IP : IPSet) {
+      IntegerType *Ty = ConstInfo.BaseConstant->getType();
+      Instruction *Base =
+          new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP);
+      DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant
+                   << ") to BB " << IP->getParent()->getName() << '\n'
+                   << *Base << '\n');
+
+      // Emit materialization code for all rebased constants.
+      unsigned Uses = 0;
+      for (auto const &RCI : ConstInfo.RebasedConstants) {
+        for (auto const &U : RCI.Uses) {
+          Uses++;
+          BasicBlock *OrigMatInsertBB =
+              findMatInsertPt(U.Inst, U.OpndIdx)->getParent();
+          // If Base constant is to be inserted in multiple places,
+          // generate rebase for U using the Base dominating U.
+          if (IPSet.size() == 1 ||
+              DT->dominates(Base->getParent(), OrigMatInsertBB)) {
+            emitBaseConstants(Base, RCI.Offset, U);
+            ReBasesNum++;
+          }
+        }
+      }
+      UsesNum = Uses;
 
-    // Emit materialization code for all rebased constants.
-    for (auto const &RCI : ConstInfo.RebasedConstants) {
-      NumConstantsRebased++;
-      for (auto const &U : RCI.Uses)
-        emitBaseConstants(Base, RCI.Offset, U);
+      // Use the same debug location as the last user of the constant.
+      assert(!Base->use_empty() && "The use list is empty!?");
+      assert(isa<Instruction>(Base->user_back()) &&
+             "All uses should be instructions.");
+      Base->setDebugLoc(cast<Instruction>(Base->user_back())->getDebugLoc());
     }
+    (void)UsesNum;
+    (void)ReBasesNum;
+    // Expect all uses are rebased after rebase is done.
+    assert(UsesNum == ReBasesNum && "Not all uses are rebased");
+
+    NumConstantsHoisted++;
 
-    // Use the same debug location as the last user of the constant.
-    assert(!Base->use_empty() && "The use list is empty!?");
-    assert(isa<Instruction>(Base->user_back()) &&
-           "All uses should be instructions.");
-    Base->setDebugLoc(cast<Instruction>(Base->user_back())->getDebugLoc());
+    // Base constant is also included in ConstInfo.RebasedConstants, so
+    // deduct 1 from ConstInfo.RebasedConstants.size().
+    NumConstantsRebased = ConstInfo.RebasedConstants.size() - 1;
 
-    // Correct for base constant, which we counted above too.
-    NumConstantsRebased--;
     MadeChange = true;
   }
   return MadeChange;
@@ -595,9 +742,11 @@
 
 /// \brief Optimize expensive integer constants in the given function.
 bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
-                                   DominatorTree &DT, BasicBlock &Entry) {
+                                   DominatorTree &DT, BlockFrequencyInfo *BFI,
+                                   BasicBlock &Entry) {
   this->TTI = &TTI;
   this->DT = &DT;
+  this->BFI = BFI;
   this->Entry = &Entry;  
   // Collect all constant candidates.
   collectConstantCandidates(Fn);
@@ -628,7 +777,10 @@
                                             FunctionAnalysisManager &AM) {
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
-  if (!runImpl(F, TTI, DT, F.getEntryBlock()))
+  auto BFI = ConstHoistWithBlockFrequency
+                 ? &AM.getResult<BlockFrequencyAnalysis>(F)
+                 : nullptr;
+  if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock()))
     return PreservedAnalyses::all();
 
   PreservedAnalyses PA;
Index: lib/Transforms/Utils/CmpInstAnalysis.cpp
===================================================================
--- lib/Transforms/Utils/CmpInstAnalysis.cpp
+++ lib/Transforms/Utils/CmpInstAnalysis.cpp
@@ -73,17 +73,17 @@
   default:
     return false;
   case ICmpInst::ICMP_SLT:
-    // X < 0 is equivalent to (X & SignBit) != 0.
+    // X < 0 is equivalent to (X & SignMask) != 0.
     if (!C->isZero())
       return false;
-    Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth()));
+    Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth()));
     Pred = ICmpInst::ICMP_NE;
     break;
   case ICmpInst::ICMP_SGT:
-    // X > -1 is equivalent to (X & SignBit) == 0.
+    // X > -1 is equivalent to (X & SignMask) == 0.
     if (!C->isAllOnesValue())
       return false;
-    Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth()));
+    Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth()));
     Pred = ICmpInst::ICMP_EQ;
     break;
   case ICmpInst::ICMP_ULT:
Index: lib/Transforms/Utils/CodeExtractor.cpp
===================================================================
--- lib/Transforms/Utils/CodeExtractor.cpp
+++ lib/Transforms/Utils/CodeExtractor.cpp
@@ -73,24 +73,26 @@
 }
 
 /// \brief Build a set of blocks to extract if the input blocks are viable.
-template <typename IteratorT>
-static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin,
-                                                       IteratorT BBEnd) {
+static SetVector<BasicBlock *>
+buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT) {
+  assert(!BBs.empty() && "The set of blocks to extract must be non-empty");
   SetVector<BasicBlock *> Result;
 
-  assert(BBBegin != BBEnd);
-
   // Loop over the blocks, adding them to our set-vector, and aborting with an
   // empty set if we encounter invalid blocks.
-  do {
-    if (!Result.insert(*BBBegin))
-      llvm_unreachable("Repeated basic blocks in extraction input");
+  for (BasicBlock *BB : BBs) {
 
-    if (!CodeExtractor::isBlockValidForExtraction(**BBBegin)) {
+    // If this block is dead, don't process it.
+    if (DT && !DT->isReachableFromEntry(BB))
+      continue;
+
+    if (!Result.insert(BB))
+      llvm_unreachable("Repeated basic blocks in extraction input");
+    if (!CodeExtractor::isBlockValidForExtraction(*BB)) {
       Result.clear();
       return Result;
     }
-  } while (++BBBegin != BBEnd);
+  }
 
 #ifndef NDEBUG
   for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()),
@@ -106,49 +108,19 @@
   return Result;
 }
 
-/// \brief Helper to call buildExtractionBlockSet with an ArrayRef.
-static SetVector<BasicBlock *>
-buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs) {
-  return buildExtractionBlockSet(BBs.begin(), BBs.end());
-}
-
-/// \brief Helper to call buildExtractionBlockSet with a RegionNode.
-static SetVector<BasicBlock *>
-buildExtractionBlockSet(const RegionNode &RN) {
-  if (!RN.isSubRegion())
-    // Just a single BasicBlock.
-    return buildExtractionBlockSet(RN.getNodeAs<BasicBlock>());
-
-  const Region &R = *RN.getNodeAs<Region>();
-
-  return buildExtractionBlockSet(R.block_begin(), R.block_end());
-}
-
-CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs,
-                             BlockFrequencyInfo *BFI,
-                             BranchProbabilityInfo *BPI)
-    : DT(nullptr), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
-      BPI(BPI), Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {}
-
 CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
                              bool AggregateArgs, BlockFrequencyInfo *BFI,
                              BranchProbabilityInfo *BPI)
     : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
-      BPI(BPI), Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {}
+      BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)), NumExitBlocks(~0U) {}
 
 CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
                              BlockFrequencyInfo *BFI,
                              BranchProbabilityInfo *BPI)
     : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
-      BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks())),
+      BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)),
       NumExitBlocks(~0U) {}
 
-CodeExtractor::CodeExtractor(DominatorTree &DT, const RegionNode &RN,
-                             bool AggregateArgs, BlockFrequencyInfo *BFI,
-                             BranchProbabilityInfo *BPI)
-    : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
-      BPI(BPI), Blocks(buildExtractionBlockSet(RN)), NumExitBlocks(~0U) {}
-
 /// definedInRegion - Return true if the specified value is defined in the
 /// extracted region.
 static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) {
@@ -218,9 +190,7 @@
   // containing PHI nodes merging values from outside of the region, and a
   // second that contains all of the code for the block and merges back any
   // incoming values from inside of the region.
-  BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI()->getIterator();
-  BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs,
-                                              Header->getName()+".ce");
+  BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT);
 
   // We only want to code extract the second block now, and it becomes the new
   // header of the region.
@@ -229,11 +199,6 @@
   Blocks.insert(NewBB);
   Header = NewBB;
 
-  // Okay, update dominator sets. The blocks that dominate the new one are the
-  // blocks that dominate TIBB plus the new block itself.
-  if (DT)
-    DT->splitBlock(NewBB);
-
   // Okay, now we need to adjust the PHI nodes and any branches from within the
   // region to go to the new header block instead of the old header block.
   if (NumPredsFromRegion) {
@@ -248,6 +213,7 @@
 
     // Okay, everything within the region is now branching to the right block, we
     // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
+    BasicBlock::iterator AfterPHIs;
     for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
       PHINode *PN = cast<PHINode>(AfterPHIs);
       // Create a new PHI node in the new region, which has an incoming value
Index: lib/Transforms/Utils/SimplifyCFG.cpp
===================================================================
--- lib/Transforms/Utils/SimplifyCFG.cpp
+++ lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3055,6 +3055,15 @@
   BasicBlock *QFB = QBI->getSuccessor(1);
   BasicBlock *PostBB = QFB->getSingleSuccessor();
 
+  // Make sure we have a good guess for PostBB. If QTB's only successor is
+  // QFB, then QFB is a better PostBB.
+  if (QTB->getSingleSuccessor() == QFB)
+    PostBB = QFB;
+
+  // If we couldn't find a good PostBB, stop.
+  if (!PostBB)
+    return false;
+
   bool InvertPCond = false, InvertQCond = false;
   // Canonicalize fallthroughs to the true branches.
   if (PFB == QBI->getParent()) {
@@ -3079,8 +3088,7 @@
   auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
     return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
   };
-  if (!PostBB ||
-      !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
+  if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
       !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
     return false;
   if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7324,8 +7324,16 @@
       return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
                                 VectorTy, VF - 1, VectorTy);
 
-    // TODO: IF-converted IFs become selects.
-    return 0;
+    // Phi nodes in non-header blocks (not inductions, reductions, etc.) are
+    // converted into select instructions. We require N - 1 selects per phi
+    // node, where N is the number of incoming values.
+    if (VF > 1 && Phi->getParent() != TheLoop->getHeader())
+      return (Phi->getNumIncomingValues() - 1) *
+             TTI.getCmpSelInstrCost(
+                 Instruction::Select, ToVectorTy(Phi->getType(), VF),
+                 ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF));
+
+    return TTI.getCFInstrCost(Instruction::PHI);
   }
   case Instruction::UDiv:
   case Instruction::SDiv:
Index: test/Analysis/ScalarEvolution/or-as-add.ll
===================================================================
--- test/Analysis/ScalarEvolution/or-as-add.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
-
-declare void @z(i32)
-declare void @z2(i64)
-
-define void @fun(i1 %bool, i32 %x) {
-entry:
-        br label %body
-body:
-        %i = phi i32 [ 0, %entry ], [ %i.next, %body ]
-        %bottom_zero = mul i32 %i, 2
-        %a = or i32 %bottom_zero, 1
-        call void @z(i32 %a)
-        %bool_ext = zext i1 %bool to i32
-        %b = or i32 %bool_ext, %bottom_zero
-        call void @z(i32 %b)
-        %shifted = lshr i32 %x, 31
-        %c = or i32 %shifted, %bottom_zero
-        call void @z(i32 %c)
-        %i_ext = zext i32 %i to i64
-        %d = or i64 %i_ext, 4294967296
-        call void @z2(i64 %d)
-        %i.next = add i32 %i, 1
-        %cond = icmp eq i32 %i.next, 10
-        br i1 %cond, label %exit, label %body
-exit:
-        ret void
-}
-
-; CHECK: %a = or i32 %bottom_zero, 1
-; CHECK-NEXT: -->  {1,+,2}<%body>
-; CHECK: %b = or i32 %bool_ext, %bottom_zero
-; CHECK-NEXT: -->  {(zext i1 %bool to i32),+,2}
-; CHECK: %c = or i32 %shifted, %bottom_zero
-; CHECK-NEXT: -->  {(%x /u -2147483648),+,2}<%body>
-; CHECK: %d = or i64 %i_ext, 4294967296
-; CHECK-NEXT: -->  {4294967296,+,1}<nuw><nsw><%body>
-
Index: test/CodeGen/AArch64/optimize-imm.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/optimize-imm.ll
@@ -0,0 +1,64 @@
+; RUN: llc -o - %s -mtriple=aarch64-- | FileCheck %s
+
+; CHECK-LABEL: and1:
+; CHECK: and {{w[0-9]+}}, w0, #0xfffffffd
+
+define void @and1(i32 %a, i8* nocapture %p) {
+entry:
+  %and = and i32 %a, 253
+  %conv = trunc i32 %and to i8
+  store i8 %conv, i8* %p, align 1
+  ret void
+}
+
+; (a & 0x3dfd) | 0xffffc000
+;
+; CHECK-LABEL: and2:
+; CHECK: and {{w[0-9]+}}, w0, #0xfdfdfdfd
+
+define i32 @and2(i32 %a) {
+entry:
+  %and = and i32 %a, 15869
+  %or = or i32 %and, -16384
+  ret i32 %or
+}
+
+; (a & 0x19) | 0xffffffc0
+;
+; CHECK-LABEL: and3:
+; CHECK: and {{w[0-9]+}}, w0, #0x99999999
+
+define i32 @and3(i32 %a) {
+entry:
+  %and = and i32 %a, 25
+  %or = or i32 %and, -64
+  ret i32 %or
+}
+
+; (a & 0xc5600) | 0xfff1f1ff
+;
+; CHECK-LABEL: and4:
+; CHECK: and {{w[0-9]+}}, w0, #0xfffc07ff
+
+define i32 @and4(i32 %a) {
+entry:
+  %and = and i32 %a, 787968
+  %or = or i32 %and, -921089
+  ret i32 %or
+}
+
+; Make sure we don't shrink or optimize an XOR's immediate operand if the
+; immediate is -1. Instruction selection turns (and ((xor $mask, -1), $v0)) into
+; a BIC.
+
+; CHECK-LABEL: xor1:
+; CHECK: orr [[R0:w[0-9]+]], wzr, #0x38
+; CHECK: bic {{w[0-9]+}}, [[R0]], w0, lsl #3
+
+define i32 @xor1(i32 %a) {
+entry:
+  %shl = shl i32 %a, 3
+  %xor = and i32 %shl, 56
+  %and = xor i32 %xor, 56
+  ret i32 %and
+}
Index: test/CodeGen/AMDGPU/frame-index-amdgiz.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/frame-index-amdgiz.ll
@@ -0,0 +1,55 @@
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+;
+; The original OpenCL kernel:
+; kernel void f(global int *a, int i,  int j) {
+;  int x[100];
+;  x[i] = 7;
+;  a[0] = x[j];
+; }
+; clang -cc1 -triple amdgcn---amdgizcl -emit-llvm -o - 
+
+target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
+target triple = "amdgcn---amdgiz"
+
+define amdgpu_kernel void @f(i32 addrspace(1)* nocapture %a, i32 %i, i32 %j) local_unnamed_addr #0 {
+entry:
+; CHECK: s_load_dword s2, s[0:1], 0xb
+; CHECK: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; CHECK: s_load_dword s0, s[0:1], 0xc
+; CHECK: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; CHECK: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; CHECK: s_mov_b32 s10, -1
+; CHECK: s_waitcnt lgkmcnt(0)
+; CHECK: s_lshl_b32 s1, s2, 2
+; CHECK: v_mov_b32_e32 v0, 4
+; CHECK: s_mov_b32 s11, 0xe8f000
+; CHECK: v_add_i32_e32 v1, vcc, s1, v0
+; CHECK: v_mov_b32_e32 v2, 7
+; CHECK: s_lshl_b32 s0, s0, 2
+; CHECK: buffer_store_dword v2, v1, s[8:11], s3 offen
+; CHECK: v_add_i32_e32 v0, vcc, s0, v0
+; CHECK: buffer_load_dword v0, v0, s[8:11], s3 offen
+; CHECK: s_mov_b32 s7, 0xf000
+; CHECK: s_mov_b32 s6, -1
+; CHECK: s_waitcnt vmcnt(0)
+; CHECK: buffer_store_dword v0, off, s[4:7], 0
+; CHECK: s_endpgm
+
+  %x = alloca [100 x i32], align 4, addrspace(5)
+  %0 = bitcast [100 x i32] addrspace(5)* %x to i8 addrspace(5)*
+  call void @llvm.lifetime.start.p5i8(i64 400, i8 addrspace(5)* nonnull %0) #0
+  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %x, i32 0, i32 %i
+  store i32 7, i32 addrspace(5)* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %x, i32 0, i32 %j
+  %1 = load i32, i32 addrspace(5)* %arrayidx2, align 4
+  store i32 %1, i32 addrspace(1)* %a, align 4
+  call void @llvm.lifetime.end.p5i8(i64 400, i8 addrspace(5)* nonnull %0) #0
+  ret void
+}
+
+declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #1
+
+declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { argmemonly nounwind }
Index: test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
===================================================================
--- test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
+++ test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
@@ -5,6 +5,8 @@
   define void @test_sext_s8() { ret void }
   define void @test_zext_s16() { ret void }
 
+  define void @test_trunc_s32_16() { ret void }
+
   define void @test_add_s8() { ret void }
   define void @test_add_s16() { ret void }
   define void @test_add_s32() { ret void }
@@ -142,6 +144,34 @@
     ; CHECK: BX_RET 14, _, implicit %r0
 ...
 ---
+name:            test_trunc_s32_16
+# CHECK-LABEL: name: test_trunc_s32_16
+legalized:       true
+regBankSelected: true
+selected:        false
+# CHECK: selected: true
+registers:
+  - { id: 0, class: gprb }
+  - { id: 1, class: gprb }
+# CHECK-DAG: id: 0, class: gpr
+# CHECK-DAG: id: 1, class: gpr
+body:             |
+  bb.0:
+    liveins: %r0
+
+    %0(s32) = COPY %r0
+    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+    %1(s16) = G_TRUNC %0(s32)
+    ; CHECK: [[VREGTRUNC:%[0-9]+]] = COPY [[VREGX]]
+
+    %r0 = COPY %1(s16)
+    ; CHECK: %r0 = COPY [[VREGTRUNC]]
+
+    BX_RET 14, _, implicit %r0
+    ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
 name:            test_add_s8
 # CHECK-LABEL: name: test_add_s8
 legalized:       true
Index: test/CodeGen/ARM/GlobalISel/arm-isel.ll
===================================================================
--- test/CodeGen/ARM/GlobalISel/arm-isel.ll
+++ test/CodeGen/ARM/GlobalISel/arm-isel.ll
@@ -40,6 +40,30 @@
   ret i16 %x
 }
 
+define void @test_trunc_i32_i16(i32 %v, i16 *%p) {
+; CHECK-LABEL: test_trunc_i32_i16:
+; The trunc doesn't result in any instructions, but we
+; expect the store to be explicitly 16-bit.
+; CHECK: strh r0, [r1]
+; CHECK: bx lr
+entry:
+  %v16 = trunc i32 %v to i16
+  store i16 %v16, i16 *%p
+  ret void
+}
+
+define void @test_trunc_i32_i8(i32 %v, i8 *%p) {
+; CHECK-LABEL: test_trunc_i32_i8:
+; The trunc doesn't result in any instructions, but we
+; expect the store to be explicitly 8-bit.
+; CHECK: strb r0, [r1]
+; CHECK: bx lr
+entry:
+  %v8 = trunc i32 %v to i8
+  store i8 %v8, i8 *%p
+  ret void
+}
+
 define i8 @test_add_i8(i8 %x, i8 %y) {
 ; CHECK-LABEL: test_add_i8:
 ; CHECK: add r0, r0, r1
Index: test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
===================================================================
--- test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
+++ test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
@@ -22,6 +22,8 @@
 
   define void @test_constants() { ret void }
 
+  define void @test_trunc_s32_16() { ret void }
+
   define void @test_fadd_s32() #0 { ret void }
   define void @test_fadd_s64() #0 { ret void }
 
@@ -442,6 +444,27 @@
     BX_RET 14, _, implicit %r0
 ...
 ---
+name:            test_trunc_s32_16
+# CHECK-LABEL: name: test_trunc_s32_16
+legalized:       true
+regBankSelected: false
+selected:        false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 1, class: gprb }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0
+
+    %0(s32) = COPY %r0
+    %1(s16) = G_TRUNC %0(s32)
+    %r0 = COPY %1(s16)
+    BX_RET 14, _, implicit %r0
+...
+---
 name:            test_fadd_s32
 # CHECK-LABEL: name: test_fadd_s32
 legalized:       true
Index: test/CodeGen/ARM/GlobalISel/arm-unsupported.ll
===================================================================
--- /dev/null
+++ test/CodeGen/ARM/GlobalISel/arm-unsupported.ll
@@ -0,0 +1,80 @@
+; RUN: llc -mtriple arm-unknown -verify-machineinstrs -global-isel -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o - 2>&1 | FileCheck %s
+
+; This file checks that we use the fallback path for things that are known to
+; be unsupported on the ARM target. It should progressively shrink in size.
+
+define <4 x i32> @test_int_vectors(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: remark: {{.*}} unable to lower arguments: <4 x i32> (<4 x i32>, <4 x i32>)*
+; CHECK-LABEL: warning: Instruction selection used fallback path for test_int_vectors
+  %res = add <4 x i32> %a, %b
+  ret <4 x i32> %res
+}
+
+define <4 x float> @test_float_vectors(<4 x float> %a, <4 x float> %b) {
+; CHECK: remark: {{.*}} unable to lower arguments: <4 x float> (<4 x float>, <4 x float>)*
+; CHECK-LABEL: warning: Instruction selection used fallback path for test_float_vectors
+  %res = fadd <4 x float> %a, %b
+  ret <4 x float> %res
+}
+
+define i64 @test_i64(i64 %a, i64 %b) {
+; CHECK: remark: {{.*}} unable to lower arguments: i64 (i64, i64)*
+; CHECK-LABEL: warning: Instruction selection used fallback path for test_i64
+  %res = add i64 %a, %b
+  ret i64 %res
+}
+
+define i128 @test_i128(i128 %a, i128 %b) {
+; CHECK: remark: {{.*}} unable to lower arguments: i128 (i128, i128)*
+; CHECK-LABEL: warning: Instruction selection used fallback path for test_i128
+  %res = add i128 %a, %b
+  ret i128 %res
+}
+
+define i17 @test_funny_ints(i17 %a, i17 %b) {
+; CHECK: remark: {{.*}} unable to lower arguments: i17 (i17, i17)*
+; CHECK-LABEL: warning: Instruction selection used fallback path for test_funny_ints
+  %res = add i17 %a, %b
+  ret i17 %res
+}
+
+define half @test_half(half %a, half %b) {
+; CHECK: remark: {{.*}} unable to lower arguments: half (half, half)*
+; CHECK-LABEL: warning: Instruction selection used fallback path for test_half
+  %res = fadd half %a, %b
+  ret half %res
+}
+
+; On ARM, clang lowers structs to arrays.
+define void @test_arrays([2 x i32] %this.could.come.from.a.struct) {
+; CHECK: remark: {{.*}} unable to lower arguments: void ([2 x i32])*
+; CHECK-LABEL: warning: Instruction selection used fallback path for test_arrays
+  ret void
+}
+
+define void @test_structs({i32, i32} %struct) {
+; CHECK: remark: {{.*}} unable to lower arguments: void ({ i32, i32 })*
+; CHECK-LABEL: warning: Instruction selection used fallback path for test_structs
+  ret void
+}
+
+define void @test_vararg_definition(i32 %a, ...) {
+; CHECK: remark: {{.*}} unable to lower arguments: void (i32, ...)*
+; CHECK-LABEL: warning: Instruction selection used fallback path for test_vararg_definition
+  ret void
+}
+
+define void @test_vararg_call(i32 %a) {
+; CHECK: remark: {{.*}} unable to translate instruction: call
+; CHECK-LABEL: warning: Instruction selection used fallback path for test_vararg_call
+  call void(i32, ...) @test_vararg_definition(i32 %a, i32 %a, i32 %a)
+  ret void
+}
+
+define i32 @test_thumb(i32 %a) #0 {
+; CHECK: remark: {{.*}} unable to lower arguments: i32 (i32)*
+; CHECK-LABEL: warning: Instruction selection used fallback path for test_thumb
+  ret i32 %a
+}
+
+attributes #0 = { "target-features"="+thumb-mode" }
Index: test/CodeGen/ARM/alloc-no-stack-realign.ll
===================================================================
--- test/CodeGen/ARM/alloc-no-stack-realign.ll
+++ test/CodeGen/ARM/alloc-no-stack-realign.ll
@@ -7,31 +7,32 @@
 
 define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" {
 entry:
-; CHECK-LABEL: test1
-; CHECK:	ldr	r[[R1:[0-9]+]], [pc, r1]
-; CHECK:	add	r[[R2:[0-9]+]], r1, #48
-; CHECK:	vld1.64	 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	mov	r[[R2:[0-9]+]], r[[R1]]
-; CHECK:	vld1.32	 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
-; CHECK:	vld1.64	 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	add	r[[R1:[0-9]+]], r[[R1]], #32
-; CHECK:	vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	mov	r[[R1:[0-9]+]], sp
-; CHECK:	vst1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	add	r[[R2:[0-9]+]], r[[R1]], #32
-; CHECK:	vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
-; CHECK:	vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
-; CHECK:	vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	add	r[[R1:[0-9]+]], r0, #48
-; CHECK:	vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	add	r[[R1:[0-9]+]], r0, #32
-; CHECK:	vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]!
-; CHECK:	vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128]
+; CHECK-LABEL: test1:
+; CHECK: ldr     r[[R1:[0-9]+]], [pc, r[[R1]]]
+; CHECK: mov     r[[R2:[0-9]+]], r[[R1]]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add     r[[R2:[0-9]+]], r[[R1]], #48
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add     r[[R1:[0-9]+]], r[[R1]], #32
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: mov     r[[R1:[0-9]+]], #32
+; CHECK: mov     r[[R2:[0-9]+]], sp
+; CHECK: mov     r[[R3:[0-9]+]], r[[R2]]
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128], r[[R1]]
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add     r[[R1:[0-9]+]], r0, #48
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: add     r[[R1:[0-9]+]], r0, #32
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]
  %retval = alloca <16 x float>, align 16
  %0 = load <16 x float>, <16 x float>* @T3_retval, align 16
  store <16 x float> %0, <16 x float>* %retval
@@ -42,30 +43,32 @@
 
 define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp {
 entry:
-; CHECK:	ldr	r[[R1:[0-9]+]], [pc, r1]
-; CHECK:	add	r[[R2:[0-9]+]], r[[R1]], #48
-; CHECK:	vld1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	mov	r[[R2:[0-9]+]], r[[R1]]
-; CHECK:	vld1.32	{{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
-; CHECK:	vld1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	add	r[[R1:[0-9]+]], r[[R1]], #32
-; CHECK:	vld1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	mov	r[[R1:[0-9]+]], sp
-; CHECK:	vst1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	orr	r[[R2:[0-9]+]], r[[R1]], #32
-; CHECK:	vst1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	vld1.32	{{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
-; CHECK:	vst1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	vld1.32	{{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
-; CHECK:	vst1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	vld1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	vld1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; CHECK:	add	r[[R1:[0-9]+]], r0, #48
-; CHECK:	vst1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	add	r[[R1:[0-9]+]], r0, #32
-; CHECK:	vst1.64	{{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; CHECK:	vst1.32	{{{d[0-9]+, d[0-9]+}}}, [r0:128]!
-; CHECK:	vst1.64	{{{d[0-9]+, d[0-9]+}}}, [r0:128]
+; CHECK-LABEL: test2:
+; CHECK: ldr     r[[R1:[0-9]+]], [pc, r[[R1]]]
+; CHECK: mov     r[[R2:[0-9]+]], r[[R1]]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add     r[[R2:[0-9]+]], r[[R1]], #48
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add     r[[R1:[0-9]+]], r[[R1]], #32
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: mov     r[[R1:[0-9]+]], #32
+; CHECK: mov     r[[R2:[0-9]+]], sp
+; CHECK: mov     r[[R3:[0-9]+]], r[[R2]]
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128], r[[R1]]
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]
+; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]
+; CHECK: add     r[[R1:[0-9]+]], r0, #48
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: add     r[[R1:[0-9]+]], r0, #32
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128]
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]!
+; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]
 
 
 %retval = alloca <16 x float>, align 16
Index: test/CodeGen/ARM/fence-singlethread.ll
===================================================================
--- /dev/null
+++ test/CodeGen/ARM/fence-singlethread.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=thumbv7-linux-gnueabihf %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv7-apple-ios %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv7-linux-gnueabihf %s -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=OBJ
+
+; OBJ-NOT: dmb
+
+define void @fence_singlethread() {
+; CHECK-LABEL: fence_singlethread:
+; CHECK-NOT: dmb
+; CHECK: @ COMPILER BARRIER
+; CHECK-NOT: dmb
+
+  fence singlethread seq_cst
+  ret void
+}
Index: test/CodeGen/ARM/memcpy-inline.ll
===================================================================
--- test/CodeGen/ARM/memcpy-inline.ll
+++ test/CodeGen/ARM/memcpy-inline.ll
@@ -30,10 +30,9 @@
 define void @t1(i8* nocapture %C) nounwind {
 entry:
 ; CHECK-LABEL: t1:
-; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
-; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; CHECK: adds r0, #15
-; CHECK: adds r1, #15
+; CHECK: movs [[INC:r[0-9]+]], #15
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1], [[INC]]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0], [[INC]]
 ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
 ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false)
@@ -43,13 +42,15 @@
 define void @t2(i8* nocapture %C) nounwind {
 entry:
 ; CHECK-LABEL: t2:
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
+; CHECK: movs [[INC:r[0-9]+]], #32
+; CHECK: add.w   r3, r0, #16
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0], [[INC]]
 ; CHECK: movw [[REG2:r[0-9]+]], #16716
 ; CHECK: movt [[REG2:r[0-9]+]], #72
-; CHECK: str [[REG2]], [r0, #32]
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
-; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
+; CHECK: str [[REG2]], [r0]
 ; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
-; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r3]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
   ret void
 }
Index: test/CodeGen/ARM/memset-inline.ll
===================================================================
--- test/CodeGen/ARM/memset-inline.ll
+++ test/CodeGen/ARM/memset-inline.ll
@@ -13,10 +13,10 @@
 define void @t2() nounwind ssp {
 entry:
 ; CHECK-LABEL: t2:
-; CHECK: add.w r1, r0, #10
 ; CHECK: vmov.i32 {{q[0-9]+}}, #0x0
-; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
-; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: movs r1, #10
+; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2], r1
+; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2]
   %buf = alloca [26 x i8], align 1
   %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0
   call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
Index: test/CodeGen/ARM/vector-load.ll
===================================================================
--- test/CodeGen/ARM/vector-load.ll
+++ test/CodeGen/ARM/vector-load.ll
@@ -253,11 +253,22 @@
 }
 
 ; CHECK-LABEL: test_silly_load:
-; CHECK: ldr {{r[0-9]+}}, [r0, #24]
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128]!
-; CHECK: vldr d{{[0-9]+}}, [r0]
+; CHECK: vldr d{{[0-9]+}}, [r0, #16]
+; CHECK: movs r1, #24
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128], r1
+; CHECK: ldr {{r[0-9]+}}, [r0]
 
 define void @test_silly_load(<28 x i8>* %addr) {
   load volatile <28 x i8>, <28 x i8>* %addr
   ret void
 }
+
+define <4 x i32>* @test_vld1_immoffset(<4 x i32>* %ptr.in, <4 x i32>* %ptr.out) {
+; CHECK-LABEL: test_vld1_immoffset:
+; CHECK: movs [[INC:r[0-9]+]], #32
+; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0], [[INC]]
+  %val = load <4 x i32>, <4 x i32>* %ptr.in
+  store <4 x i32> %val, <4 x i32>* %ptr.out
+  %next = getelementptr <4 x i32>, <4 x i32>* %ptr.in, i32 2
+  ret <4 x i32>* %next
+}
Index: test/CodeGen/ARM/vector-store.ll
===================================================================
--- test/CodeGen/ARM/vector-store.ll
+++ test/CodeGen/ARM/vector-store.ll
@@ -256,3 +256,13 @@
         store <4 x i8>* %inc, <4 x i8>** %ptr
 	ret void
 }
+
+define <4 x i32>* @test_vst1_1reg(<4 x i32>* %ptr.in, <4 x i32>* %ptr.out) {
+; CHECK-LABEL: test_vst1_1reg:
+; CHECK: movs [[INC:r[0-9]+]], #32
+; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r1], [[INC]]
+  %val = load <4 x i32>, <4 x i32>* %ptr.in
+  store <4 x i32> %val, <4 x i32>* %ptr.out
+  %next = getelementptr <4 x i32>, <4 x i32>* %ptr.out, i32 2
+  ret <4 x i32>* %next
+}
Index: test/CodeGen/ARM/vlddup.ll
===================================================================
--- test/CodeGen/ARM/vlddup.ll
+++ test/CodeGen/ARM/vlddup.ll
@@ -310,6 +310,23 @@
 	ret <4 x i16> %tmp5
 }
 
+define <4 x i16> @vld2dupi16_odd_update(i16** %ptr) nounwind {
+;CHECK-LABEL: vld2dupi16_odd_update:
+;CHECK: mov [[INC:r[0-9]+]], #6
+;CHECK: vld2.16 {d16[], d17[]}, [r1], [[INC]]
+	%A = load i16*, i16** %ptr
+        %A2 = bitcast i16* %A to i8*
+	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp5 = add <4 x i16> %tmp2, %tmp4
+	%tmp6 = getelementptr i16, i16* %A, i32 3
+	store i16* %tmp6, i16** %ptr
+	ret <4 x i16> %tmp5
+}
+
 define <2 x i32> @vld2dupi32(i8* %A) nounwind {
 ;CHECK-LABEL: vld2dupi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
Index: test/CodeGen/ARM/vldlane.ll
===================================================================
--- test/CodeGen/ARM/vldlane.ll
+++ test/CodeGen/ARM/vldlane.ll
@@ -150,6 +150,22 @@
 	ret <2 x i32> %tmp5
 }
 
+define <2 x i32> @vld2lanei32_odd_update(i32** %ptr, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: vld2lanei32_odd_update:
+;CHECK: mov [[INC:r[0-9]+]], #12
+;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}], [[INC]]
+	%A = load i32*, i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
+	%tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
+	%tmp5 = add <2 x i32> %tmp3, %tmp4
+	%tmp6 = getelementptr i32, i32* %A, i32 3
+	store i32* %tmp6, i32** %ptr
+	ret <2 x i32> %tmp5
+}
+
 define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vld2lanef:
 ;CHECK: vld2.32
Index: test/CodeGen/ARM/vpadd.ll
===================================================================
--- test/CodeGen/ARM/vpadd.ll
+++ test/CodeGen/ARM/vpadd.ll
@@ -485,6 +485,17 @@
   ret <2 x i16> %x
 }
 
+; And <2 x i8> to <2 x i32>
+define <2 x i8> @fromExtendingExtractVectorElt_2i8(<8 x i8> %in) {
+; CHECK-LABEL: fromExtendingExtractVectorElt_2i8:
+; CHECK:    vadd.i32
+  %tmp1 = shufflevector <8 x i8> %in, <8 x i8> undef, <2 x i32> <i32 0, i32 2>
+  %tmp2 = shufflevector <8 x i8> %in, <8 x i8> undef, <2 x i32> <i32 1, i32 3>
+  %x = add <2 x i8> %tmp2, %tmp1
+  ret <2 x i8> %x
+}
+
+
 declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone
 declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone
Index: test/CodeGen/Thumb/long.ll
===================================================================
--- test/CodeGen/Thumb/long.ll
+++ test/CodeGen/Thumb/long.ll
@@ -206,3 +206,34 @@
 ; CHECK: adds r0, r0, r2
 ; CHECK: sbcs r1, r3
 }
+
+declare void @f13(i64 %x)
+
+define void @f14(i1 %x, i64 %y) #0 {
+; CHECK-LABEL: f14:
+entry:
+  %a = add i64 %y, 47
+  call void @f13(i64 %a)
+; CHECK: bl
+  br i1 %x, label %if.end, label %if.then
+
+if.then:
+  call void @f13(i64 %y)
+; CHECK: bl
+  br label %if.end
+
+if.end:
+  %b = add i64 %y, 45
+  call void @f13(i64 %b)
+; CHECK: adds
+; CHECK: adcs
+; CHECK: bl
+  %c = add i64 %y, 47
+  call void @f13(i64 %c)
+; CHECK: adds
+; CHECK-NEXT: adcs
+; CHECK: bl
+  ret void
+}
+
+attributes #0 = { optsize }
Index: test/CodeGen/X86/constant-hoisting-bfi.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/constant-hoisting-bfi.ll
@@ -0,0 +1,115 @@
+; RUN: opt -consthoist -mtriple=x86_64-unknown-linux-gnu -consthoist-with-block-frequency=true -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Check when BFI is enabled for constant hoisting, constant 214748364701
+; will not be hoisted to the func entry.
+; CHECK-LABEL: @foo(
+; CHECK: entry:
+; CHECK-NOT: bitcast i64 214748364701 to i64
+; CHECK: if.then:
+
+; Function Attrs: norecurse nounwind uwtable
+define i64 @foo(i64* nocapture %a) {
+entry:
+  %arrayidx = getelementptr inbounds i64, i64* %a, i64 9
+  %t0 = load i64, i64* %arrayidx, align 8
+  %cmp = icmp slt i64 %t0, 564
+  br i1 %cmp, label %if.then, label %if.else5
+
+if.then:                                          ; preds = %entry
+  %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 5
+  %t1 = load i64, i64* %arrayidx1, align 8
+  %cmp2 = icmp slt i64 %t1, 1009
+  br i1 %cmp2, label %if.then3, label %return
+
+if.then3:                                         ; preds = %if.then
+  %arrayidx4 = getelementptr inbounds i64, i64* %a, i64 6
+  %t2 = load i64, i64* %arrayidx4, align 8
+  %inc = add nsw i64 %t2, 1
+  store i64 %inc, i64* %arrayidx4, align 8
+  br label %return
+
+if.else5:                                         ; preds = %entry
+  %arrayidx6 = getelementptr inbounds i64, i64* %a, i64 6
+  %t3 = load i64, i64* %arrayidx6, align 8
+  %cmp7 = icmp slt i64 %t3, 3512
+  br i1 %cmp7, label %if.then8, label %return
+
+if.then8:                                         ; preds = %if.else5
+  %arrayidx9 = getelementptr inbounds i64, i64* %a, i64 7
+  %t4 = load i64, i64* %arrayidx9, align 8
+  %inc10 = add nsw i64 %t4, 1
+  store i64 %inc10, i64* %arrayidx9, align 8
+  br label %return
+
+return:                                           ; preds = %if.else5, %if.then, %if.then8, %if.then3
+  %retval.0 = phi i64 [ 214748364701, %if.then3 ], [ 214748364701, %if.then8 ], [ 250148364702, %if.then ], [ 256148364704, %if.else5 ]
+  ret i64 %retval.0
+}
+
+; Check when BFI is enabled for constant hoisting, constant 214748364701
+; in while.body will be hoisted to while.body.preheader. 214748364701 in
+; if.then16 and if.else10 will be merged and hoisted to the beginning of
+; if.else10 because if.else10 dominates if.then16.
+; CHECK-LABEL: @goo(
+; CHECK: entry:
+; CHECK-NOT: bitcast i64 214748364701 to i64
+; CHECK: while.body.preheader:
+; CHECK-NEXT: bitcast i64 214748364701 to i64
+; CHECK-NOT: bitcast i64 214748364701 to i64
+; CHECK: if.else10:
+; CHECK-NEXT: bitcast i64 214748364701 to i64
+; CHECK-NOT: bitcast i64 214748364701 to i64
+define i64 @goo(i64* nocapture %a) {
+entry:
+  %arrayidx = getelementptr inbounds i64, i64* %a, i64 9
+  %t0 = load i64, i64* %arrayidx, align 8
+  %cmp = icmp ult i64 %t0, 56
+  br i1 %cmp, label %if.then, label %if.else10, !prof !0
+
+if.then:                                          ; preds = %entry
+  %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 5
+  %t1 = load i64, i64* %arrayidx1, align 8
+  %cmp2 = icmp ult i64 %t1, 10
+  br i1 %cmp2, label %while.cond.preheader, label %return, !prof !0
+
+while.cond.preheader:                             ; preds = %if.then
+  %arrayidx7 = getelementptr inbounds i64, i64* %a, i64 6
+  %t2 = load i64, i64* %arrayidx7, align 8
+  %cmp823 = icmp ugt i64 %t2, 10000
+  br i1 %cmp823, label %while.body.preheader, label %return
+
+while.body.preheader:                             ; preds = %while.cond.preheader
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %t3 = phi i64 [ %add, %while.body ], [ %t2, %while.body.preheader ]
+  %add = add i64 %t3, 214748364701
+  %cmp8 = icmp ugt i64 %add, 10000
+  br i1 %cmp8, label %while.body, label %while.cond.return.loopexit_crit_edge
+
+if.else10:                                        ; preds = %entry
+  %arrayidx11 = getelementptr inbounds i64, i64* %a, i64 6
+  %t4 = load i64, i64* %arrayidx11, align 8
+  %add2 = add i64 %t4, 214748364701
+  %cmp12 = icmp ult i64 %add2, 35
+  br i1 %cmp12, label %if.then16, label %return, !prof !0
+
+if.then16:                                        ; preds = %if.else10
+  %arrayidx17 = getelementptr inbounds i64, i64* %a, i64 7
+  %t5 = load i64, i64* %arrayidx17, align 8
+  %inc = add i64 %t5, 1
+  store i64 %inc, i64* %arrayidx17, align 8
+  br label %return
+
+while.cond.return.loopexit_crit_edge:             ; preds = %while.body
+  store i64 %add, i64* %arrayidx7, align 8
+  br label %return
+
+return:                                           ; preds = %while.cond.preheader, %while.cond.return.loopexit_crit_edge, %if.else10, %if.then, %if.then16
+  %retval.0 = phi i64 [ 214748364701, %if.then16 ], [ 0, %if.then ], [ 0, %if.else10 ], [ 0, %while.cond.return.loopexit_crit_edge ], [ 0, %while.cond.preheader ]
+  ret i64 %retval.0
+}
+
+!0 = !{!"branch_weights", i32 1, i32 2000}
Index: test/CodeGen/X86/fold-tied-op.ll
===================================================================
--- test/CodeGen/X86/fold-tied-op.ll
+++ test/CodeGen/X86/fold-tied-op.ll
@@ -7,7 +7,6 @@
 
 ; CHECK-LABEL: fn1
 ; CHECK:       addl  {{.*#+}} 4-byte Folded Reload
-; CHECK:       addl  {{.*#+}} 4-byte Folded Reload
 ; CHECK:       imull {{.*#+}} 4-byte Folded Reload
 ; CHECK:       orl   {{.*#+}} 4-byte Folded Reload
 ; CHECK:       retl
Index: test/CodeGen/X86/memcpy-struct-by-value.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/memcpy-struct-by-value.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=x86_64-linux-gnu -mattr=-ermsb < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOFAST
+; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+ermsb < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=FAST
+; RUN: llc -mtriple=i686-linux-gnu -mattr=-ermsb < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOFAST32
+; RUN: llc -mtriple=i686-linux-gnu -mattr=+ermsb < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=FAST
+; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=generic < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOFAST
+; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=haswell < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=FAST
+; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skylake < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=FAST
+; FIXME: The documentation states that ivybridge has ermsb, but this is not
+; enabled right now since I could not confirm by testing.
+; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=ivybridge < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOFAST
+
+%struct.large = type { [4096 x i8] }
+
+declare void @foo(%struct.large* align 8 byval) nounwind
+
+define void @test1(%struct.large* nocapture %x) nounwind {
+  call void @foo(%struct.large* align 8 byval %x)
+  ret void
+
+; ALL-LABEL: test1:
+; NOFAST: rep;movsq
+; NOFAST32: rep;movsl
+; FAST: rep;movsb
+}
+
+define void @test2(%struct.large* nocapture %x) nounwind minsize {
+  call void @foo(%struct.large* align 8 byval %x)
+  ret void
+
+; ALL-LABEL: test2:
+; NOFAST: rep;movsq
+; NOFAST32: rep;movsl
+; FAST: rep;movsb
+}
+
+%struct.large_oddsize = type { [4095 x i8] }
+
+declare void @foo_oddsize(%struct.large_oddsize* align 8 byval) nounwind
+
+define void @test3(%struct.large_oddsize* nocapture %x) nounwind minsize {
+  call void @foo_oddsize(%struct.large_oddsize* align 8 byval %x)
+  ret void
+
+; ALL-LABEL: test3:
+; NOFAST: rep;movsb
+; NOFAST32: rep;movsb
+; FAST: rep;movsb
+}
Index: test/DebugInfo/AMDGPU/pointer-address-space-dwarf-v1.ll
===================================================================
--- test/DebugInfo/AMDGPU/pointer-address-space-dwarf-v1.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; LLVM IR generated with the following command and OpenCL source:
-;
-; $clang -cl-std=CL2.0 -g -O0 -target amdgcn-amd-amdhsa -S -emit-llvm <path-to-file>
-;
-; kernel void kernel1() {
-;   global int *FuncVar0 = 0;
-;   constant int *FuncVar1 = 0;
-;   local int *FuncVar2 = 0;
-;   private int *FuncVar3 = 0;
-;   int *FuncVar4 = 0;
-; }
-
-; DW_AT_address_class is available since Dwarf Version 2.
-; CHECK-NOT: DW_AT_address_class
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-define amdgpu_kernel void @kernel1() #0 !dbg !7 {
-entry:
-  %FuncVar0 = alloca i32 addrspace(1)*, align 4
-  %FuncVar1 = alloca i32 addrspace(2)*, align 4
-  %FuncVar2 = alloca i32 addrspace(3)*, align 4
-  %FuncVar3 = alloca i32*, align 4
-  %FuncVar4 = alloca i32 addrspace(4)*, align 4
-  call void @llvm.dbg.declare(metadata i32 addrspace(1)** %FuncVar0, metadata !10, metadata !13), !dbg !14
-  store i32 addrspace(1)* null, i32 addrspace(1)** %FuncVar0, align 4, !dbg !14
-  call void @llvm.dbg.declare(metadata i32 addrspace(2)** %FuncVar1, metadata !15, metadata !13), !dbg !16
-  store i32 addrspace(2)* null, i32 addrspace(2)** %FuncVar1, align 4, !dbg !16
-  call void @llvm.dbg.declare(metadata i32 addrspace(3)** %FuncVar2, metadata !17, metadata !13), !dbg !19
-  store i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), i32 addrspace(3)** %FuncVar2, align 4, !dbg !19
-  call void @llvm.dbg.declare(metadata i32** %FuncVar3, metadata !20, metadata !13), !dbg !22
-  store i32* addrspacecast (i32 addrspace(4)* null to i32*), i32** %FuncVar3, align 4, !dbg !22
-  call void @llvm.dbg.declare(metadata i32 addrspace(4)** %FuncVar4, metadata !23, metadata !13), !dbg !24
-  store i32 addrspace(4)* null, i32 addrspace(4)** %FuncVar4, align 4, !dbg !24
-  ret void, !dbg !25
-}
-
-!llvm.dbg.cu = !{!0}
-!opencl.ocl.version = !{!3}
-!llvm.module.flags = !{!4, !5}
-!llvm.ident = !{!6}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
-!1 = !DIFile(filename: "pointer-address-space-dwarf-v1.cl", directory: "/some/random/directory")
-!2 = !{}
-!3 = !{i32 2, i32 0}
-!4 = !{i32 2, !"Dwarf Version", i32 1}
-!5 = !{i32 2, !"Debug Info Version", i32 3}
-!6 = !{!""}
-!7 = distinct !DISubprogram(name: "kernel1", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !0, variables: !2)
-!8 = !DISubroutineType(types: !9)
-!9 = !{null}
-!10 = !DILocalVariable(name: "FuncVar0", scope: !7, file: !1, line: 2, type: !11)
-!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64)
-!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
-!13 = !DIExpression()
-!14 = !DILocation(line: 2, column: 15, scope: !7)
-!15 = !DILocalVariable(name: "FuncVar1", scope: !7, file: !1, line: 3, type: !11)
-!16 = !DILocation(line: 3, column: 17, scope: !7)
-!17 = !DILocalVariable(name: "FuncVar2", scope: !7, file: !1, line: 4, type: !18)
-!18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 32, dwarfAddressSpace: 2)
-!19 = !DILocation(line: 4, column: 14, scope: !7)
-!20 = !DILocalVariable(name: "FuncVar3", scope: !7, file: !1, line: 5, type: !21)
-!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 32, dwarfAddressSpace: 1)
-!22 = !DILocation(line: 5, column: 16, scope: !7)
-!23 = !DILocalVariable(name: "FuncVar4", scope: !7, file: !1, line: 6, type: !11)
-!24 = !DILocation(line: 6, column: 8, scope: !7)
-!25 = !DILocation(line: 7, column: 1, scope: !7)
Index: test/DebugInfo/AMDGPU/variable-locations-dwarf-v1.ll
===================================================================
--- test/DebugInfo/AMDGPU/variable-locations-dwarf-v1.ll
+++ /dev/null
@@ -1,92 +0,0 @@
-; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; LLVM IR generated with the following command and OpenCL source:
-;
-; $clang -cl-std=CL2.0 -g -O0 -target amdgcn-amd-amdhsa -S -emit-llvm <path-to-file>
-;
-; global int GlobA;
-; global int GlobB;
-;
-; kernel void kernel1(unsigned int ArgN, global int *ArgA, global int *ArgB) {
-;   ArgA[ArgN] += ArgB[ArgN];
-; }
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata)
-
-; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x05> 03 00 00 00 00 )
-@GlobA = common addrspace(1) global i32 0, align 4, !dbg !0
-; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x05> 03 00 00 00 00 )
-@GlobB = common addrspace(1) global i32 0, align 4, !dbg !6
-
-define amdgpu_kernel void @kernel1(
-; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x06> 91 04 10 01 16 18 )
-    i32 %ArgN,
-; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x06> 91 08 10 01 16 18 )
-    i32 addrspace(1)* %ArgA,
-; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x06> 91 10 10 01 16 18 )
-    i32 addrspace(1)* %ArgB) !dbg !13 {
-entry:
-  %ArgN.addr = alloca i32, align 4
-  %ArgA.addr = alloca i32 addrspace(1)*, align 4
-  %ArgB.addr = alloca i32 addrspace(1)*, align 4
-  store i32 %ArgN, i32* %ArgN.addr, align 4
-  call void @llvm.dbg.declare(metadata i32* %ArgN.addr, metadata !22, metadata !23), !dbg !24
-  store i32 addrspace(1)* %ArgA, i32 addrspace(1)** %ArgA.addr, align 4
-  call void @llvm.dbg.declare(metadata i32 addrspace(1)** %ArgA.addr, metadata !25, metadata !23), !dbg !26
-  store i32 addrspace(1)* %ArgB, i32 addrspace(1)** %ArgB.addr, align 4
-  call void @llvm.dbg.declare(metadata i32 addrspace(1)** %ArgB.addr, metadata !27, metadata !23), !dbg !28
-  %0 = load i32 addrspace(1)*, i32 addrspace(1)** %ArgB.addr, align 4, !dbg !29
-  %1 = load i32, i32* %ArgN.addr, align 4, !dbg !30
-  %idxprom = zext i32 %1 to i64, !dbg !29
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 %idxprom, !dbg !29
-  %2 = load i32, i32 addrspace(1)* %arrayidx, align 4, !dbg !29
-  %3 = load i32 addrspace(1)*, i32 addrspace(1)** %ArgA.addr, align 4, !dbg !31
-  %4 = load i32, i32* %ArgN.addr, align 4, !dbg !32
-  %idxprom1 = zext i32 %4 to i64, !dbg !31
-  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %3, i64 %idxprom1, !dbg !31
-  %5 = load i32, i32 addrspace(1)* %arrayidx2, align 4, !dbg !33
-  %add = add nsw i32 %5, %2, !dbg !33
-  store i32 %add, i32 addrspace(1)* %arrayidx2, align 4, !dbg !33
-  ret void, !dbg !34
-}
-
-!llvm.dbg.cu = !{!2}
-!opencl.ocl.version = !{!9}
-!llvm.module.flags = !{!10, !11}
-!llvm.ident = !{!12}
-
-!0 = !DIGlobalVariableExpression(var: !1)
-!1 = distinct !DIGlobalVariable(name: "GlobA", scope: !2, file: !3, line: 1, type: !8, isLocal: false, isDefinition: true)
-!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 5.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5)
-!3 = !DIFile(filename: "variable-locations-dwarf-v1.cl", directory: "/some/random/directory")
-!4 = !{}
-!5 = !{!0, !6}
-!6 = !DIGlobalVariableExpression(var: !7)
-!7 = distinct !DIGlobalVariable(name: "GlobB", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: true)
-!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
-!9 = !{i32 2, i32 0}
-!10 = !{i32 2, !"Dwarf Version", i32 1}
-!11 = !{i32 2, !"Debug Info Version", i32 3}
-!12 = !{!"clang version 5.0.0"}
-!13 = distinct !DISubprogram(name: "kernel1", scope: !3, file: !3, line: 4, type: !14, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, unit: !2, variables: !4)
-!14 = !DISubroutineType(types: !15)
-!15 = !{null, !16, !17, !17}
-!16 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
-!17 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64)
-!18 = !{i32 0, i32 1, i32 1}
-!19 = !{!"none", !"none", !"none"}
-!20 = !{!"uint", !"int*", !"int*"}
-!21 = !{!"", !"", !""}
-!22 = !DILocalVariable(name: "ArgN", arg: 1, scope: !13, file: !3, line: 4, type: !16)
-!23 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)
-!24 = !DILocation(line: 4, column: 34, scope: !13)
-!25 = !DILocalVariable(name: "ArgA", arg: 2, scope: !13, file: !3, line: 4, type: !17)
-!26 = !DILocation(line: 4, column: 52, scope: !13)
-!27 = !DILocalVariable(name: "ArgB", arg: 3, scope: !13, file: !3, line: 4, type: !17)
-!28 = !DILocation(line: 4, column: 70, scope: !13)
-!29 = !DILocation(line: 5, column: 17, scope: !13)
-!30 = !DILocation(line: 5, column: 22, scope: !13)
-!31 = !DILocation(line: 5, column: 3, scope: !13)
-!32 = !DILocation(line: 5, column: 8, scope: !13)
-!33 = !DILocation(line: 5, column: 14, scope: !13)
-!34 = !DILocation(line: 6, column: 1, scope: !13)
Index: test/DebugInfo/X86/dw_op_minus_direct.ll
===================================================================
--- test/DebugInfo/X86/dw_op_minus_direct.ll
+++ test/DebugInfo/X86/dw_op_minus_direct.ll
@@ -1,11 +1,20 @@
 ; Test dwarf codegen of DW_OP_minus.
 ; RUN: llc -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s
+; RUN: llc -dwarf-version=2 -filetype=obj < %s | llvm-dwarfdump - \
+; RUN:   | FileCheck %s --check-prefix=DWARF2
+; RUN: llc -dwarf-version=3 -filetype=obj < %s | llvm-dwarfdump - \
+; RUN:   | FileCheck %s --check-prefix=DWARF2
 
 ; This was derived manually from:
 ; int inc(int i) {
 ;  return i+1;
 ; }
 
+; DWARF2: .debug_info
+; DWARF2: DW_TAG_formal_parameter
+; DWARF2-NEXT: DW_AT_name {{.*}}"i"
+; DWARF2-NOT:  DW_AT_location
+
 ; CHECK: Beginning address offset: 0x0000000000000000
 ; CHECK:    Ending address offset: 0x0000000000000004
 ; CHECK:     Location description: 70 00 10 ff ff ff ff 0f 1a 10 01 1c 9f
Index: test/TableGen/GlobalISelEmitter.td
===================================================================
--- test/TableGen/GlobalISelEmitter.td
+++ test/TableGen/GlobalISelEmitter.td
@@ -29,7 +29,31 @@
 def Z : OperandWithDefaultOps <i32, (ops R0)>;
 def m1Z : OperandWithDefaultOps <i32, (ops (i32 -1), R0)>;
 
-//===- Test the function definition boilerplate. --------------------------===//
+def HasA : Predicate<"Subtarget->hasA()">;
+def HasB : Predicate<"Subtarget->hasB()">;
+
+//===- Test the function boilerplate. -------------------------------------===//
+
+// CHECK-LABEL: enum SubtargetFeatureBits : uint8_t {
+// CHECK-NEXT:    Feature_HasABit = 0,
+// CHECK-NEXT:    Feature_HasBBit = 1,
+// CHECK-NEXT:  };
+
+// CHECK-LABEL: static const char *SubtargetFeatureNames[] = {
+// CHECK-NEXT:    "Feature_HasA",
+// CHECK-NEXT:    "Feature_HasB",
+// CHECK-NEXT:    nullptr
+// CHECK-NEXT:  };
+
+// CHECK-LABEL: PredicateBitset MyTargetInstructionSelector::
+// CHECK-NEXT:  computeAvailableFeatures(const MachineFunction *MF, const MyTargetSubtarget *Subtarget) const {
+// CHECK-NEXT:    PredicateBitset Features;
+// CHECK-NEXT:    if (Subtarget->hasA())
+// CHECK-NEXT:      Features[Feature_HasABit] = 1;
+// CHECK-NEXT:    if (Subtarget->hasB())
+// CHECK-NEXT:      Features[Feature_HasBBit] = 1;
+// CHECK-NEXT:    return Features;
+// CHECK-NEXT:  }
 
 // CHECK: bool MyTargetInstructionSelector::selectImpl(MachineInstr &I) const {
 // CHECK: MachineFunction &MF = *I.getParent()->getParent();
@@ -103,6 +127,9 @@
 //===- Test a nested instruction match. -----------------------------------===//
 
 // CHECK-LABEL: if ([&]() {
+// CHECK-NEXT:    PredicateBitset ExpectedFeatures = {Feature_HasABit};
+// CHECK-NEXT:    if ((AvailableFeatures & ExpectedFeatures) != ExpectedFeatures)
+// CHECK-NEXT:      return false;
 // CHECK-NEXT:    MachineInstr &MI0 = I;
 // CHECK-NEXT:    if (MI0.getNumOperands() < 3)
 // CHECK-NEXT:      return false;
@@ -142,6 +169,9 @@
 
 // We also get a second rule by commutativity.
 // CHECK-LABEL: if ([&]() {
+// CHECK-NEXT:    PredicateBitset ExpectedFeatures = {Feature_HasABit};
+// CHECK-NEXT:    if ((AvailableFeatures & ExpectedFeatures) != ExpectedFeatures)
+// CHECK-NEXT:      return false;
 // CHECK-NEXT:    MachineInstr &MI0 = I;
 // CHECK-NEXT:    if (MI0.getNumOperands() < 3)
 // CHECK-NEXT:      return false;
@@ -181,11 +211,15 @@
 
 def MULADD : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3),
                [(set GPR32:$dst,
-                     (mul (add GPR32:$src1, GPR32:$src2), GPR32:$src3))]>;
+                     (mul (add GPR32:$src1, GPR32:$src2), GPR32:$src3))]>,
+             Requires<[HasA]>;
 
 //===- Test another simple pattern with regclass operands. ----------------===//
 
 // CHECK-LABEL: if ([&]() {
+// CHECK-NEXT:    PredicateBitset ExpectedFeatures = {Feature_HasABit, Feature_HasBBit};
+// CHECK-NEXT:    if ((AvailableFeatures & ExpectedFeatures) != ExpectedFeatures)
+// CHECK-NEXT:      return false;
 // CHECK-NEXT:    MachineInstr &MI0 = I;
 // CHECK-NEXT:    if (MI0.getNumOperands() < 3)
 // CHECK-NEXT:      return false;
@@ -213,7 +247,8 @@
 // CHECK-NEXT:  }()) { return true; }
 
 def MUL : I<(outs GPR32:$dst), (ins GPR32:$src2, GPR32:$src1),
-             [(set GPR32:$dst, (mul GPR32:$src1, GPR32:$src2))]>;
+             [(set GPR32:$dst, (mul GPR32:$src1, GPR32:$src2))]>,
+          Requires<[HasA, HasB]>;
 
 //===- Test a pattern with ComplexPattern operands. -----------------------===//
 //
Index: test/TableGen/intrinsic-long-name.td
===================================================================
--- test/TableGen/intrinsic-long-name.td
+++ test/TableGen/intrinsic-long-name.td
@@ -22,7 +22,7 @@
   list<IntrinsicProperty> IntrProperties = [];
 }
 
-def iAny : ValueType<0, 125>;
+def iAny : ValueType<0, 253>;
 def llvm_anyint_ty : LLVMType<iAny>;
 
 // Make sure we generate the long name without crashing
Index: test/TableGen/intrinsic-varargs.td
===================================================================
--- test/TableGen/intrinsic-varargs.td
+++ test/TableGen/intrinsic-varargs.td
@@ -23,7 +23,7 @@
 }
 
 // isVoid needs to match the definition in ValueTypes.td
-def isVoid : ValueType<0, 66>;   // Produces no value
+def isVoid : ValueType<0, 108>;   // Produces no value
 def llvm_vararg_ty : LLVMType<isVoid>;   // this means vararg here
 
 // CHECK: /* 0 */ 0, 29, 0,
Index: test/Transforms/CodeExtractor/unreachable-block.ll
===================================================================
--- /dev/null
+++ test/Transforms/CodeExtractor/unreachable-block.ll
@@ -0,0 +1,38 @@
+; RUN: opt -S -partial-inliner %s | FileCheck %s
+
+; CHECK-LABEL: define void @dipsy(
+; CHECK-NEXT:   call void @tinkywinky.1_ontrue()
+; CHECK-NEXT:   call void @patatuccio()
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+; CHECK-LABEL: define internal void @tinkywinky.1_ontrue() {
+; CHECK-NEXT: newFuncRoot:
+; CHECK-NEXT:   br label %ontrue
+; CHECK: .exitStub:
+; CHECK-NEXT:   ret void
+; CHECK: ontrue:
+; CHECK-NEXT:   call void @patatino()
+; CHECK-NEXT:   br label %onfalse
+; CHECK: onfalse:
+; CHECK-NEXT:   br label %.exitStub
+; CHECK-NEXT: }
+
+declare void @patatino()
+declare void @patatuccio()
+
+define fastcc void @tinkywinky() {
+  br i1 true, label %ontrue, label %onfalse
+ontrue:
+  call void @patatino()
+  br label %onfalse
+onfalse:
+  call void @patatuccio()
+  ret void
+cantreachme:
+  ret void
+}
+define void @dipsy() {
+  call fastcc void @tinkywinky()
+  ret void
+}
Index: test/Transforms/ConstantHoisting/X86/ehpad.ll
===================================================================
--- test/Transforms/ConstantHoisting/X86/ehpad.ll
+++ test/Transforms/ConstantHoisting/X86/ehpad.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -S -consthoist < %s | FileCheck %s
+; RUN: opt -S -consthoist -consthoist-with-block-frequency=true < %s | FileCheck --check-prefix=BFIHOIST %s
 
 ; FIXME: The catchpad doesn't even use the constant, so a better fix would be to
 ; insert the bitcast in the catchpad block.
@@ -11,6 +12,16 @@
 ; CHECK-NEXT: bitcast i64 9209618997431186100 to i64
 ; CHECK-NEXT: br i1 %tobool
 
+; BFIHOIST-LABEL: define i32 @main
+; BFIHOIST: then:
+; BFIHOIST: %[[CONST1:.*]] = bitcast i64 9209618997431186100 to i64
+; BFIHOIST: %add = add i64 %call4, %[[CONST1]]
+; BFIHOIST: br label %endif
+; BFIHOIST: else:
+; BFIHOIST: %[[CONST2:.*]] = bitcast i64 9209618997431186100 to i64
+; BFIHOIST: %add6 = add i64 %call5, %[[CONST2]]
+; BFIHOIST: br label %endif
+
 ; Function Attrs: norecurse
 define i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
   %call = tail call i64 @fn(i64 0)
Index: test/Transforms/InstCombine/add-sitofp.ll
===================================================================
--- test/Transforms/InstCombine/add-sitofp.ll
+++ test/Transforms/InstCombine/add-sitofp.ll
@@ -15,3 +15,88 @@
   %p = fadd double %o, 1.0
   ret double %p
 }
+
+define double @test(i32 %a) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
+; CHECK-NEXT:    [[ADDCONV:%.*]] = add nuw nsw i32 [[A_AND]], 1
+; CHECK-NEXT:    [[RES:%.*]] = sitofp i32 [[ADDCONV]] to double
+; CHECK-NEXT:    ret double [[RES]]
+;
+  ; Drop two highest bits to guarantee that %a + 1 doesn't overflow
+  %a_and = and i32 %a, 1073741823
+  %a_and_fp = sitofp i32 %a_and to double
+  %res = fadd double %a_and_fp, 1.0
+  ret double %res
+}
+
+define float @test_neg(i32 %a) {
+; CHECK-LABEL: @test_neg(
+; CHECK-NEXT:    [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
+; CHECK-NEXT:    [[A_AND_FP:%.*]] = sitofp i32 [[A_AND]] to float
+; CHECK-NEXT:    [[RES:%.*]] = fadd float [[A_AND_FP]], 1.000000e+00
+; CHECK-NEXT:    ret float [[RES]]
+;
+  ; Drop two highest bits to guarantee that %a + 1 doesn't overflow
+  %a_and = and i32 %a, 1073741823
+  %a_and_fp = sitofp i32 %a_and to float
+  %res = fadd float %a_and_fp, 1.0
+  ret float %res
+}
+
+define double @test_2(i32 %a, i32 %b) {
+; CHECK-LABEL: @test_2(
+; CHECK-NEXT:    [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
+; CHECK-NEXT:    [[B_AND:%.*]] = and i32 [[B:%.*]], 1073741823
+; CHECK-NEXT:    [[ADDCONV:%.*]] = add nuw nsw i32 [[A_AND]], [[B_AND]]
+; CHECK-NEXT:    [[RES:%.*]] = sitofp i32 [[ADDCONV]] to double
+; CHECK-NEXT:    ret double [[RES]]
+;
+  ; Drop two highest bits to guarantee that %a + %b doesn't overflow
+  %a_and = and i32 %a, 1073741823
+  %b_and = and i32 %b, 1073741823
+
+  %a_and_fp = sitofp i32 %a_and to double
+  %b_and_fp = sitofp i32 %b_and to double
+
+  %res = fadd double %a_and_fp, %b_and_fp
+  ret double %res
+}
+
+define float @test_2_neg(i32 %a, i32 %b) {
+; CHECK-LABEL: @test_2_neg(
+; CHECK-NEXT:    [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
+; CHECK-NEXT:    [[B_AND:%.*]] = and i32 [[B:%.*]], 1073741823
+; CHECK-NEXT:    [[A_AND_FP:%.*]] = sitofp i32 [[A_AND]] to float
+; CHECK-NEXT:    [[B_AND_FP:%.*]] = sitofp i32 [[B_AND]] to float
+; CHECK-NEXT:    [[RES:%.*]] = fadd float [[A_AND_FP]], [[B_AND_FP]]
+; CHECK-NEXT:    ret float [[RES]]
+;
+  ; Drop two highest bits to guarantee that %a + %b doesn't overflow
+  %a_and = and i32 %a, 1073741823
+  %b_and = and i32 %b, 1073741823
+
+  %a_and_fp = sitofp i32 %a_and to float
+  %b_and_fp = sitofp i32 %b_and to float
+
+  %res = fadd float %a_and_fp, %b_and_fp
+  ret float %res
+}
+
+; This test demonstrates overly conservative legality check. The float addition
+; can be replaced with the integer addition because the result of the operation
+; can be represented in float, but we don't do that now.
+define float @test_3(i32 %a, i32 %b) {
+; CHECK-LABEL: @test_3(
+; CHECK-NEXT:    [[M:%.*]] = lshr i32 [[A:%.*]], 24
+; CHECK-NEXT:    [[N:%.*]] = and i32 [[M]], [[B:%.*]]
+; CHECK-NEXT:    [[O:%.*]] = sitofp i32 [[N]] to float
+; CHECK-NEXT:    [[P:%.*]] = fadd float [[O]], 1.000000e+00
+; CHECK-NEXT:    ret float [[P]]
+;
+  %m = lshr i32 %a, 24
+  %n = and i32 %m, %b
+  %o = sitofp i32 %n to float
+  %p = fadd float %o, 1.0
+  ret float %p
+}
Index: test/Transforms/InstCombine/apint-shift.ll
===================================================================
--- test/Transforms/InstCombine/apint-shift.ll
+++ test/Transforms/InstCombine/apint-shift.ll
@@ -287,13 +287,10 @@
   ret i47 %sh2
 }
 
-; FIXME: Same as above with vectors.
-
 define <2 x i47> @test12_splat_vec(<2 x i47> %X) {
 ; CHECK-LABEL: @test12_splat_vec(
-; CHECK-NEXT:    [[SH1:%.*]] = ashr <2 x i47> %X, <i47 8, i47 8>
-; CHECK-NEXT:    [[SH2:%.*]] = shl nsw <2 x i47> [[SH1]], <i47 8, i47 8>
-; CHECK-NEXT:    ret <2 x i47> [[SH2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i47> %X, <i47 -256, i47 -256>
+; CHECK-NEXT:    ret <2 x i47> [[TMP1]]
 ;
   %sh1 = ashr <2 x i47> %X, <i47 8, i47 8>
   %sh2 = shl <2 x i47> %sh1, <i47 8, i47 8>
Index: test/Transforms/InstCombine/pr17827.ll
===================================================================
--- test/Transforms/InstCombine/pr17827.ll
+++ test/Transforms/InstCombine/pr17827.ll
@@ -52,9 +52,7 @@
 define <2 x i1> @test_shift_and_cmp_changed1_vec(<2 x i8> %p, <2 x i8> %q) {
 ; CHECK-LABEL: @test_shift_and_cmp_changed1_vec(
 ; CHECK-NEXT:    [[ANDP:%.*]] = and <2 x i8> %p, <i8 6, i8 6>
-; CHECK-NEXT:    [[ANDQ:%.*]] = and <2 x i8> %q, <i8 8, i8 8>
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i8> [[ANDQ]], [[ANDP]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> [[OR]], <i8 5, i8 5>
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <2 x i8> [[ANDP]], <i8 5, i8 5>
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[SHL]], <i8 32, i8 32>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
Index: test/Transforms/InstCombine/shift.ll
===================================================================
--- test/Transforms/InstCombine/shift.ll
+++ test/Transforms/InstCombine/shift.ll
@@ -1049,12 +1049,11 @@
 }
 
 ; (X << C1) >>u C2  --> X << (C1 - C2) & (-1 >> C2)
-; FIXME: Demanded bits should change the mask constant as it does for the scalar case.
 
 define <2 x i8> @test53_no_nuw_splat_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @test53_no_nuw_splat_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> %x, <i8 2, i8 2>
-; CHECK-NEXT:    [[B:%.*]] = and <2 x i8> [[TMP1]], <i8 127, i8 127>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i8> [[TMP1]], <i8 124, i8 124>
 ; CHECK-NEXT:    ret <2 x i8> [[B]]
 ;
   %A = shl <2 x i8> %x, <i8 3, i8 3>
@@ -1074,6 +1073,17 @@
   ret i32 %and
 }
 
+define <2 x i32> @test54_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test54_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> %x, <i32 3, i32 3>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[TMP1]], <i32 16, i32 16>
+; CHECK-NEXT:    ret <2 x i32> [[AND]]
+;
+  %shr2 = lshr <2 x i32> %x, <i32 1, i32 1>
+  %shl = shl <2 x i32> %shr2, <i32 4, i32 4>
+  %and = and <2 x i32> %shl, <i32 16, i32 16>
+  ret <2 x i32> %and
+}
 
 define i32 @test55(i32 %x) {
 ; CHECK-LABEL: @test55(
@@ -1100,7 +1110,6 @@
   ret i32 %or
 }
 
-
 define i32 @test57(i32 %x) {
 ; CHECK-LABEL: @test57(
 ; CHECK-NEXT:    [[SHR1:%.*]] = lshr i32 %x, 1
@@ -1114,7 +1123,6 @@
   ret i32 %or
 }
 
-
 define i32 @test58(i32 %x) {
 ; CHECK-LABEL: @test58(
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 %x, 3
@@ -1127,6 +1135,17 @@
   ret i32 %or
 }
 
+define <2 x i32> @test58_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test58_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> %x, <i32 3, i32 3>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[OR]]
+;
+  %shr = ashr <2 x i32> %x, <i32 4, i32 4>
+  %shl = shl <2 x i32> %shr, <i32 1, i32 1>
+  %or = or <2 x i32> %shl, <i32 1, i32 1>
+  ret <2 x i32> %or
+}
 
 define i32 @test59(i32 %x) {
 ; CHECK-LABEL: @test59(
@@ -1257,8 +1276,7 @@
 
 define <2 x i64> @test_64_splat_vec(<2 x i32> %t) {
 ; CHECK-LABEL: @test_64_splat_vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> %t, <i32 16777215, i32 16777215>
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw <2 x i32> [[AND]], <i32 8, i32 8>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> %t, <i32 8, i32 8>
 ; CHECK-NEXT:    [[SHL:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[SHL]]
 ;
@@ -1268,3 +1286,23 @@
   ret <2 x i64> %shl
 }
 
+define <2 x i8> @ashr_demanded_bits_splat(<2 x i8> %x) {
+; CHECK-LABEL: @ashr_demanded_bits_splat(
+; CHECK-NEXT:    [[SHR:%.*]] = ashr <2 x i8> %x, <i8 7, i8 7>
+; CHECK-NEXT:    ret <2 x i8> [[SHR]]
+;
+  %and = and <2 x i8> %x, <i8 128, i8 128>
+  %shr = ashr <2 x i8> %and, <i8 7, i8 7>
+  ret <2 x i8> %shr
+}
+
+define <2 x i8> @lshr_demanded_bits_splat(<2 x i8> %x) {
+; CHECK-LABEL: @lshr_demanded_bits_splat(
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i8> %x, <i8 7, i8 7>
+; CHECK-NEXT:    ret <2 x i8> [[SHR]]
+;
+  %and = and <2 x i8> %x, <i8 128, i8 128>
+  %shr = lshr <2 x i8> %and, <i8 7, i8 7>
+  ret <2 x i8> %shr
+}
+
Index: test/Transforms/InstCombine/vector-casts.ll
===================================================================
--- test/Transforms/InstCombine/vector-casts.ll
+++ test/Transforms/InstCombine/vector-casts.ll
@@ -15,9 +15,9 @@
 ; The ashr turns into an lshr.
 define <2 x i64> @test2(<2 x i64> %a) {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[B:%.*]] = and <2 x i64> %a, <i64 65535, i64 65535>
-; CHECK-NEXT:    [[T:%.*]] = lshr <2 x i64> [[B]], <i64 1, i64 1>
-; CHECK-NEXT:    ret <2 x i64> [[T]]
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i64> %a, <i64 65534, i64 65534>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact <2 x i64> [[B]], <i64 1, i64 1>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %b = and <2 x i64> %a, <i64 65535, i64 65535>
   %t = ashr <2 x i64> %b, <i64 1, i64 1>
Index: test/Transforms/InstCombine/xor.ll
===================================================================
--- test/Transforms/InstCombine/xor.ll
+++ test/Transforms/InstCombine/xor.ll
@@ -536,3 +536,20 @@
   %xor = xor i32 %and, %B
   ret i32 %xor
 }
+
+; PR32706 - https://bugs.llvm.org/show_bug.cgi?id=32706
+; Pin an xor constant operand to -1 if possible because 'not' is better for SCEV and codegen.
+
+define i32 @not_is_canonical(i32 %x, i32 %y) {
+; CHECK-LABEL: @not_is_canonical(
+; CHECK-NEXT:    [[SUB:%.*]] = xor i32 %x, -1
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], %y
+; CHECK-NEXT:    [[MUL:%.*]] = shl i32 [[ADD]], 2
+; CHECK-NEXT:    ret i32 [[MUL]]
+;
+  %sub = xor i32 %x, 1073741823
+  %add = add i32 %sub, %y
+  %mul = shl i32 %add, 2
+  ret i32 %mul
+}
+
Index: test/Transforms/InstCombine/zext.ll
===================================================================
--- test/Transforms/InstCombine/zext.ll
+++ test/Transforms/InstCombine/zext.ll
@@ -35,7 +35,7 @@
 
 define <2 x i64> @test4(<2 x i64> %A) {
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i64> %A, <i64 63, i64 63>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i64> %A, <i64 -1, i64 -1>
 ; CHECK-NEXT:    [[XOR:%.*]] = and <2 x i64> [[TMP1]], <i64 23, i64 42>
 ; CHECK-NEXT:    ret <2 x i64> [[XOR]]
 ;
Index: test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
===================================================================
--- test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
+++ test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -198,7 +198,7 @@
 
 ; @testNeon is an important example of the nead for ivchains.
 ;
-; Currently we have three extra add.w's that keep the store address
+; Currently we have two extra add.w's that keep the store address
 ; live past the next increment because ISEL is unfortunately undoing
 ; the store chain. ISEL also fails to convert all but one of the stores to
 ; post-increment addressing. However, the loads should use
@@ -207,12 +207,10 @@
 ;
 ; A9: testNeon:
 ; A9: %.lr.ph
-; A9-NOT: lsl.w
-; A9-NOT: {{ldr|str|adds|add r}}
-; A9: vst1.8 {{.*}} [r{{[0-9]+}}]!
-; A9-NOT: {{ldr|str|adds|add r}}
 ; A9: add.w r
+; A9-NOT: lsl.w
 ; A9-NOT: {{ldr|str|adds|add r}}
+; A9: vst1.8 {{.*}} [r{{[0-9]+}}], r{{[0-9]+}}
 ; A9: add.w r
 ; A9-NOT: {{ldr|str|adds|add r}}
 ; A9-NOT: add.w r
Index: test/Transforms/LoopVectorize/phi-cost.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/phi-cost.ll
@@ -0,0 +1,86 @@
+; REQUIRES: asserts
+; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -instcombine -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: phi_two_incoming_values
+; CHECK:       LV: Found an estimated cost of 1 for VF 2 For instruction: %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ]
+; CHECK:       LV: Found an estimated cost of 1 for VF 2 For instruction: %tmp5 = phi i32 [ %tmp1, %for.body ], [ %tmp4, %if.then ]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK:         [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* {{.*}}
+; CHECK:         [[TMP5:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i32> [[WIDE_LOAD]], <i32 1, i32 1>
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP6]], <2 x i32> [[WIDE_LOAD]]
+; CHECK:         store <2 x i32> [[PREDPHI]], <2 x i32>* {{.*}}
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
+;
+define void @phi_two_incoming_values(i32* %a, i32* %b, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ]
+  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
+  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp2 = getelementptr inbounds i32, i32* %b, i64 %i
+  %tmp3 = icmp sgt i32 %tmp1, 0
+  br i1 %tmp3, label %if.then, label %if.end
+
+if.then:
+  %tmp4 = add i32 %tmp1, 1
+  br label %if.end
+
+if.end:
+  %tmp5 = phi i32 [ %tmp1, %for.body ], [ %tmp4, %if.then ]
+  store i32 %tmp5, i32* %tmp2, align 4
+  %i.next = add i64 %i, 1
+  %cond = icmp eq i64 %i, %n
+  br i1 %cond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: phi_three_incoming_values
+; CHECK:       LV: Found an estimated cost of 1 for VF 2 For instruction: %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ]
+; CHECK:       LV: Found an estimated cost of 2 for VF 2 For instruction: %tmp8 = phi i32 [ 9, %for.body ], [ 3, %if.then ], [ %tmp7, %if.else ]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK:         [[PREDPHI:%.*]] = select <2 x i1> {{.*}}, <2 x i32> <i32 3, i32 3>, <2 x i32> <i32 9, i32 9>
+; CHECK:         [[PREDPHI7:%.*]] = select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> [[PREDPHI]]
+; CHECK:         store <2 x i32> [[PREDPHI7]], <2 x i32>* {{.*}}
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
+;
+define void @phi_three_incoming_values(i32* %a, i32* %b, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ]
+  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
+  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp2 = getelementptr inbounds i32, i32* %b, i64 %i
+  %tmp3 = load i32, i32* %tmp2, align 4
+  %tmp4 = icmp sgt i32 %tmp1, %tmp3
+  br i1 %tmp4, label %if.then, label %if.end
+
+if.then:
+  %tmp5 = icmp sgt i32 %tmp1, 19
+  br i1 %tmp5, label %if.end, label %if.else
+
+if.else:
+  %tmp6 = icmp slt i32 %tmp3, 4
+  %tmp7 = select i1 %tmp6, i32 4, i32 5
+  br label %if.end
+
+if.end:
+  %tmp8 = phi i32 [ 9, %for.body ], [ 3, %if.then ], [ %tmp7, %if.else ]
+  store i32 %tmp8, i32* %tmp0, align 4
+  %i.next = add i64 %i, 1
+  %cond = icmp eq i64 %i, %n
+  br i1 %cond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
Index: test/Transforms/SimplifyCFG/merge-cond-stores.ll
===================================================================
--- test/Transforms/SimplifyCFG/merge-cond-stores.ll
+++ test/Transforms/SimplifyCFG/merge-cond-stores.ll
@@ -36,6 +36,39 @@
   ret void
 }
 
+; This is the same as test_simple, but the branch target order has been swapped
+define void @test_simple_commuted(i32* %p, i32 %a, i32 %b) {
+; CHECK-LABEL: @test_simple_commuted(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X1:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[X2:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = or i1 [[X1]], [[X2]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[TMP1:%.*]], label [[TMP2:%.*]]
+; CHECK:         [[DOT:%.*]] = zext i1 [[X2]] to i32
+; CHECK-NEXT:    store i32 [[DOT]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br label [[TMP2]]
+; CHECK:         ret void
+;
+entry:
+  %x1 = icmp eq i32 %a, 0
+  br i1 %x1, label %yes1, label %fallthrough
+
+yes1:
+  store i32 0, i32* %p
+  br label %fallthrough
+
+fallthrough:
+  %x2 = icmp eq i32 %b, 0
+  br i1 %x2, label %yes2, label %end
+
+yes2:
+  store i32 1, i32* %p
+  br label %end
+
+end:
+  ret void
+}
+
 ; This test should entirely fold away, leaving one large basic block.
 define void @test_recursive(i32* %p, i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-LABEL: @test_recursive(
Index: test/tools/llvm-cvtres/basic.test
===================================================================
--- /dev/null
+++ test/tools/llvm-cvtres/basic.test
@@ -0,0 +1,4 @@
+; RUN: llvm-cvtres /h > %t
+; RUN: FileCheck -input-file=%t %s -check-prefix=HELP_TEST
+
+; HELP_TEST: OVERVIEW: Resource Converter
Index: tools/LLVMBuild.txt
===================================================================
--- tools/LLVMBuild.txt
+++ tools/LLVMBuild.txt
@@ -26,6 +26,7 @@
  llvm-bcanalyzer
  llvm-cat
  llvm-cov
+ llvm-cvtres
  llvm-diff
  llvm-dis
  llvm-dwarfdump
Index: tools/dsymutil/DwarfLinker.cpp
===================================================================
--- tools/dsymutil/DwarfLinker.cpp
+++ tools/dsymutil/DwarfLinker.cpp
@@ -522,7 +522,8 @@
 
   /// \brief Emit the abbreviation table \p Abbrevs to the
   /// debug_abbrev section.
-  void emitAbbrevs(const std::vector<std::unique_ptr<DIEAbbrev>> &Abbrevs);
+  void emitAbbrevs(const std::vector<std::unique_ptr<DIEAbbrev>> &Abbrevs,
+                   unsigned DwarfVersion);
 
   /// \brief Emit the string table described by \p Pool.
   void emitStrings(const NonRelocatableStringpool &Pool);
@@ -690,8 +691,10 @@
 /// \brief Emit the \p Abbrevs array as the shared abbreviation table
 /// for the linked Dwarf file.
 void DwarfStreamer::emitAbbrevs(
-    const std::vector<std::unique_ptr<DIEAbbrev>> &Abbrevs) {
+    const std::vector<std::unique_ptr<DIEAbbrev>> &Abbrevs,
+    unsigned DwarfVersion) {
   MS->SwitchSection(MOFI->getDwarfAbbrevSection());
+  MC->setDwarfVersion(DwarfVersion);
   Asm->emitDwarfAbbrevs(Abbrevs);
 }
 
@@ -1129,6 +1132,12 @@
   /// \brief Called at the end of a debug object link.
   void endDebugObject();
 
+  /// Remembers the newest DWARF version we've seen in a unit.
+  void maybeUpdateMaxDwarfVersion(unsigned Version) {
+    if (MaxDwarfVersion < Version)
+      MaxDwarfVersion = Version;
+  }
+
   /// Keeps track of relocations.
   class RelocationManager {
     struct ValidReloc {
@@ -1430,6 +1439,7 @@
   std::unique_ptr<DwarfStreamer> Streamer;
   uint64_t OutputDebugInfoSize;
   unsigned UnitID; ///< A unique ID that identifies each compile unit.
+  unsigned MaxDwarfVersion = 0;
 
   /// The units of the current debug map object.
   std::vector<std::unique_ptr<CompileUnit>> Units;
@@ -3435,9 +3445,11 @@
         CUDie.dump(outs(), 0);
       }
 
-      if (!registerModuleReference(CUDie, *CU, ModuleMap))
+      if (!registerModuleReference(CUDie, *CU, ModuleMap)) {
         Units.push_back(llvm::make_unique<CompileUnit>(*CU, UnitID++,
                                                        !Options.NoODR, ""));
+        maybeUpdateMaxDwarfVersion(CU->getVersion());
+      }
     }
 
     // Now build the DIE parent links that we will use during the next phase.
@@ -3471,7 +3483,7 @@
 
   // Emit everything that's global.
   if (!Options.NoOutput) {
-    Streamer->emitAbbrevs(Abbreviations);
+    Streamer->emitAbbrevs(Abbreviations, MaxDwarfVersion);
     Streamer->emitStrings(StringPool);
   }
 
Index: tools/llvm-cvtres/CMakeLists.txt
===================================================================
--- /dev/null
+++ tools/llvm-cvtres/CMakeLists.txt
@@ -0,0 +1,13 @@
+set(LLVM_LINK_COMPONENTS
+  Option
+  Support
+  )
+
+set(LLVM_TARGET_DEFINITIONS Opts.td)
+
+tablegen(LLVM Opts.inc -gen-opt-parser-defs)
+add_public_tablegen_target(CvtResTableGen)
+
+add_llvm_tool(llvm-cvtres
+  llvm-cvtres.cpp
+  )
Index: tools/llvm-cvtres/LLVMBuild.txt
===================================================================
--- tools/llvm-cvtres/LLVMBuild.txt
+++ tools/llvm-cvtres/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./tools/LLVMBuild.txt ------------------------------------*- Conf -*--===;
+;===- ./tools/llvm-cvtres/LLVMBuild.txt ------------------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
@@ -15,39 +15,8 @@
 ;
 ;===------------------------------------------------------------------------===;
 
-[common]
-subdirectories =
- bugpoint
- dsymutil
- llc
- lli
- llvm-ar
- llvm-as
- llvm-bcanalyzer
- llvm-cat
- llvm-cov
- llvm-diff
- llvm-dis
- llvm-dwarfdump
- llvm-dwp
- llvm-extract
- llvm-jitlistener
- llvm-link
- llvm-lto
- llvm-mc
- llvm-mcmarkup
- llvm-modextract
- llvm-nm
- llvm-objdump
- llvm-pdbdump
- llvm-profdata
- llvm-rtdyld
- llvm-size
- llvm-split
- opt
- verify-uselistorder
-
 [component_0]
-type = Group
-name = Tools
-parent = $ROOT
+type = Tool
+name = llvm-cvtres
+parent = Tools
+required_libraries = Option Support
Index: tools/llvm-cvtres/Opts.td
===================================================================
--- /dev/null
+++ tools/llvm-cvtres/Opts.td
@@ -0,0 +1,11 @@
+include "llvm/Option/OptParser.td"
+
+def DEFINE : Joined<["/"], "DEFINE:">, HelpText<"">, MetaVarName<"symbol">;
+def FOLDDUPS : Flag<["/"], "FOLDDUPS:">, HelpText<"">;
+def MACHINE : Joined<["/"], "MACHINE:">, HelpText<"">, MetaVarName<"{ARM|EBC|IA64|X64|X86}">;
+def NOLOGO : Flag<["/"], "NOLOGO">, HelpText<"">;
+def OUT : Joined<["/"], "OUT:">, HelpText<"">, MetaVarName<"filename">;
+def READONLY : Flag<["/"], "READONLY">, HelpText<"">;
+def VERBOSE : Flag<["/"], "VERBOSE">, HelpText<"">;
+def HELP : Flag<["/"], "HELP">;
+def H : Flag<["/"], "H">, Alias<HELP>;
Index: tools/llvm-cvtres/llvm-cvtres.h
===================================================================
--- /dev/null
+++ tools/llvm-cvtres/llvm-cvtres.h
@@ -0,0 +1,13 @@
+//===- llvm-cvtres.h ------------------------------------------ *- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVMCVTRES_LLVMCVTRES_H
+#define LLVM_TOOLS_LLVMCVTRES_LLVMCVTRES_H
+
+#endif
Index: tools/llvm-cvtres/llvm-cvtres.cpp
===================================================================
--- /dev/null
+++ tools/llvm-cvtres/llvm-cvtres.cpp
@@ -0,0 +1,86 @@
+//===- llvm-cvtres.cpp - Serialize .res files into .obj ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Serialize .res files into .obj files.  This is intended to be a
+// platform-independent port of Microsoft's cvtres.exe.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-cvtres.h"
+
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+enum ID {
+  OPT_INVALID = 0, // This is not an option ID.
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
+               HELPTEXT, METAVAR)                                              \
+  OPT_##ID,
+#include "Opts.inc"
+#undef OPTION
+};
+
+#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
+#include "Opts.inc"
+#undef PREFIX
+
+static const opt::OptTable::Info InfoTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
+               HELPTEXT, METAVAR)                                              \
+  {                                                                            \
+      PREFIX,      NAME,     HELPTEXT,                                         \
+      METAVAR,     OPT_##ID, opt::Option::KIND##Class,                         \
+      PARAM,       FLAGS,    OPT_##GROUP,                                      \
+      OPT_##ALIAS, ALIASARGS},
+#include "Opts.inc"
+#undef OPTION
+};
+
+class CvtResOptTable : public opt::OptTable {
+public:
+  CvtResOptTable() : OptTable(InfoTable, true) {}
+};
+
+static ExitOnError ExitOnErr;
+}
+
+int main(int argc_, const char *argv_[]) {
+  sys::PrintStackTraceOnErrorSignal(argv_[0]);
+  PrettyStackTraceProgram X(argc_, argv_);
+
+  ExitOnErr.setBanner("llvm-cvtres: ");
+
+  SmallVector<const char *, 256> argv;
+  SpecificBumpPtrAllocator<char> ArgAllocator;
+  ExitOnErr(errorCodeToError(sys::Process::GetArgumentVector(
+      argv, makeArrayRef(argv_, argc_), ArgAllocator)));
+
+  llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
+
+  CvtResOptTable T;
+  unsigned MAI, MAC;
+  ArrayRef<const char *> ArgsArr = makeArrayRef(argv_, argc_);
+  opt::InputArgList InputArgs = T.ParseArgs(ArgsArr, MAI, MAC);
+
+  if (InputArgs.hasArg(OPT_HELP))
+    T.PrintHelp(outs(), "cvtres", "Resource Converter", false);
+
+  return 0;
+}
Index: unittests/ADT/APFloatTest.cpp
===================================================================
--- unittests/ADT/APFloatTest.cpp
+++ unittests/ADT/APFloatTest.cpp
@@ -27,10 +27,11 @@
   return F.convertToDouble();
 }
 
-static std::string convertToString(double d, unsigned Prec, unsigned Pad) {
+static std::string convertToString(double d, unsigned Prec, unsigned Pad,
+                                   bool Tr = true) {
   llvm::SmallVector<char, 100> Buffer;
   llvm::APFloat F(d);
-  F.toString(Buffer, Prec, Pad);
+  F.toString(Buffer, Prec, Pad, Tr);
   return std::string(Buffer.data(), Buffer.size());
 }
 
@@ -949,6 +950,22 @@
   ASSERT_EQ("873.18340000000001", convertToString(873.1834, 0, 1));
   ASSERT_EQ("8.7318340000000001E+2", convertToString(873.1834, 0, 0));
   ASSERT_EQ("1.7976931348623157E+308", convertToString(1.7976931348623157E+308, 0, 0));
+  ASSERT_EQ("10", convertToString(10.0, 6, 3, false));
+  ASSERT_EQ("1.000000e+01", convertToString(10.0, 6, 0, false));
+  ASSERT_EQ("10100", convertToString(1.01E+4, 5, 2, false));
+  ASSERT_EQ("1.0100e+04", convertToString(1.01E+4, 4, 2, false));
+  ASSERT_EQ("1.01000e+04", convertToString(1.01E+4, 5, 1, false));
+  ASSERT_EQ("0.0101", convertToString(1.01E-2, 5, 2, false));
+  ASSERT_EQ("0.0101", convertToString(1.01E-2, 4, 2, false));
+  ASSERT_EQ("1.01000e-02", convertToString(1.01E-2, 5, 1, false));
+  ASSERT_EQ("0.78539816339744828",
+            convertToString(0.78539816339744830961, 0, 3, false));
+  ASSERT_EQ("4.94065645841246540e-324",
+            convertToString(4.9406564584124654e-324, 0, 3, false));
+  ASSERT_EQ("873.18340000000001", convertToString(873.1834, 0, 1, false));
+  ASSERT_EQ("8.73183400000000010e+02", convertToString(873.1834, 0, 0, false));
+  ASSERT_EQ("1.79769313486231570e+308",
+            convertToString(1.7976931348623157E+308, 0, 0, false));
 }
 
 TEST(APFloatTest, toInteger) {
Index: unittests/ADT/APIntTest.cpp
===================================================================
--- unittests/ADT/APIntTest.cpp
+++ unittests/ADT/APIntTest.cpp
@@ -2057,4 +2057,33 @@
   EXPECT_EQ(0, neg_one.shl(128));
 }
 
+TEST(APIntTest, isSubsetOf) {
+  APInt i32_1(32, 1);
+  APInt i32_2(32, 2);
+  APInt i32_3(32, 3);
+  EXPECT_FALSE(i32_3.isSubsetOf(i32_1));
+  EXPECT_TRUE(i32_1.isSubsetOf(i32_3));
+  EXPECT_FALSE(i32_2.isSubsetOf(i32_1));
+  EXPECT_FALSE(i32_1.isSubsetOf(i32_2));
+  EXPECT_TRUE(i32_3.isSubsetOf(i32_3));
+
+  APInt i128_1(128, 1);
+  APInt i128_2(128, 2);
+  APInt i128_3(128, 3);
+  EXPECT_FALSE(i128_3.isSubsetOf(i128_1));
+  EXPECT_TRUE(i128_1.isSubsetOf(i128_3));
+  EXPECT_FALSE(i128_2.isSubsetOf(i128_1));
+  EXPECT_FALSE(i128_1.isSubsetOf(i128_2));
+  EXPECT_TRUE(i128_3.isSubsetOf(i128_3));
+
+  i128_1 <<= 64;
+  i128_2 <<= 64;
+  i128_3 <<= 64;
+  EXPECT_FALSE(i128_3.isSubsetOf(i128_1));
+  EXPECT_TRUE(i128_1.isSubsetOf(i128_3));
+  EXPECT_FALSE(i128_2.isSubsetOf(i128_1));
+  EXPECT_FALSE(i128_1.isSubsetOf(i128_2));
+  EXPECT_TRUE(i128_3.isSubsetOf(i128_3));
+}
+
 } // end anonymous namespace
Index: unittests/ADT/BitVectorTest.cpp
===================================================================
--- unittests/ADT/BitVectorTest.cpp
+++ unittests/ADT/BitVectorTest.cpp
@@ -186,7 +186,9 @@
   // Test finding in an empty BitVector.
   TypeParam A;
   EXPECT_EQ(-1, A.find_first());
+  EXPECT_EQ(-1, A.find_last());
   EXPECT_EQ(-1, A.find_first_unset());
+  EXPECT_EQ(-1, A.find_last_unset());
   EXPECT_EQ(-1, A.find_next(0));
   EXPECT_EQ(-1, A.find_next_unset(0));
 
@@ -196,12 +198,14 @@
   A.set(13);
   A.set(75);
 
+  EXPECT_EQ(75, A.find_last());
   EXPECT_EQ(12, A.find_first());
   EXPECT_EQ(13, A.find_next(12));
   EXPECT_EQ(75, A.find_next(13));
   EXPECT_EQ(-1, A.find_next(75));
 
   EXPECT_EQ(0, A.find_first_unset());
+  EXPECT_EQ(99, A.find_last_unset());
   EXPECT_EQ(14, A.find_next_unset(11));
   EXPECT_EQ(14, A.find_next_unset(12));
   EXPECT_EQ(14, A.find_next_unset(13));
@@ -213,12 +217,16 @@
   A.set(0, 100);
   EXPECT_EQ(100U, A.count());
   EXPECT_EQ(0, A.find_first());
+  EXPECT_EQ(99, A.find_last());
   EXPECT_EQ(-1, A.find_first_unset());
+  EXPECT_EQ(-1, A.find_last_unset());
 
   A.reset(0, 100);
   EXPECT_EQ(0U, A.count());
   EXPECT_EQ(-1, A.find_first());
+  EXPECT_EQ(-1, A.find_last());
   EXPECT_EQ(0, A.find_first_unset());
+  EXPECT_EQ(99, A.find_last_unset());
 }
 
 TYPED_TEST(BitVectorTest, CompoundAssignment) {
@@ -345,6 +353,128 @@
   EXPECT_FALSE(B.anyCommon(A));
 }
 
+typedef std::vector<std::pair<int, int>> RangeList;
+
+template <typename VecType>
+static inline VecType createBitVector(uint32_t Size,
+                                      const RangeList &setRanges) {
+  VecType V;
+  V.resize(Size);
+  for (auto &R : setRanges)
+    V.set(R.first, R.second);
+  return V;
+}
+
+TYPED_TEST(BitVectorTest, ShiftOpsSingleWord) {
+  // Test that shift ops work when the desired shift amount is less
+  // than one word.
+
+  // 1. Case where the number of bits in the BitVector also fit into a single
+  // word.
+  TypeParam A = createBitVector<TypeParam>(12, {{2, 4}, {8, 10}});
+  TypeParam B = A;
+
+  EXPECT_EQ(4U, A.count());
+  EXPECT_TRUE(A.test(2));
+  EXPECT_TRUE(A.test(3));
+  EXPECT_TRUE(A.test(8));
+  EXPECT_TRUE(A.test(9));
+
+  A >>= 1;
+  EXPECT_EQ(createBitVector<TypeParam>(12, {{1, 3}, {7, 9}}), A);
+
+  A <<= 1;
+  EXPECT_EQ(B, A);
+
+  A >>= 10;
+  EXPECT_EQ(createBitVector<TypeParam>(12, {}), A);
+
+  A = B;
+  A <<= 10;
+  EXPECT_EQ(createBitVector<TypeParam>(12, {}), A);
+
+  // 2. Case where the number of bits in the BitVector do not fit into a single
+  // word.
+
+  // 31----------------------------------------------------------------------0
+  // XXXXXXXX XXXXXXXX XXXXXXXX 00000111 | 11111110 00000000 00001111 11111111
+  A = createBitVector<TypeParam>(40, {{0, 12}, {25, 35}});
+  EXPECT_EQ(40U, A.size());
+  EXPECT_EQ(22U, A.count());
+
+  // 2a. Make sure that left shifting some 1 bits out of the vector works.
+  //   31----------------------------------------------------------------------0
+  // Before:
+  //   XXXXXXXX XXXXXXXX XXXXXXXX 00000111 | 11111110 00000000 00001111 11111111
+  // After:
+  //   XXXXXXXX XXXXXXXX XXXXXXXX 11111100 | 00000000 00011111 11111110 00000000
+  A <<= 9;
+  EXPECT_EQ(createBitVector<TypeParam>(40, {{9, 21}, {34, 40}}), A);
+
+  // 2b. Make sure that keeping the number of one bits unchanged works.
+  //   31----------------------------------------------------------------------0
+  // Before:
+  //   XXXXXXXX XXXXXXXX XXXXXXXX 11111100 | 00000000 00011111 11111110 00000000
+  // After:
+  //   XXXXXXXX XXXXXXXX XXXXXXXX 00000011 | 11110000 00000000 01111111 11111000
+  A >>= 6;
+  EXPECT_EQ(createBitVector<TypeParam>(40, {{3, 15}, {28, 34}}), A);
+
+  // 2c. Make sure that right shifting some 1 bits out of the vector works.
+  //   31----------------------------------------------------------------------0
+  // Before:
+  //   XXXXXXXX XXXXXXXX XXXXXXXX 00000011 | 11110000 00000000 01111111 11111000
+  // After:
+  //   XXXXXXXX XXXXXXXX XXXXXXXX 00000000 | 00000000 11111100 00000000 00011111
+  A >>= 10;
+  EXPECT_EQ(createBitVector<TypeParam>(40, {{0, 5}, {18, 24}}), A);
+
+  // 3. Big test.
+  A = createBitVector<TypeParam>(300, {{1, 30}, {60, 95}, {200, 275}});
+  A <<= 29;
+  EXPECT_EQ(createBitVector<TypeParam>(
+                300, {{1 + 29, 30 + 29}, {60 + 29, 95 + 29}, {200 + 29, 300}}),
+            A);
+}
+
+TYPED_TEST(BitVectorTest, ShiftOpsMultiWord) {
+  // Test that shift ops work when the desired shift amount is greater than or
+  // equal to the size of a single word.
+  auto A = createBitVector<TypeParam>(300, {{1, 30}, {60, 95}, {200, 275}});
+
+  // Make a copy so we can re-use it later.
+  auto B = A;
+
+  // 1. Shift left by an exact multiple of the word size.  This should invoke
+  // only a memmove and no per-word bit operations.
+  A <<= 64;
+  auto Expected = createBitVector<TypeParam>(
+      300, {{1 + 64, 30 + 64}, {60 + 64, 95 + 64}, {200 + 64, 300}});
+  EXPECT_EQ(Expected, A);
+
+  // 2. Shift left by a non multiple of the word size.  This should invoke both
+  // a memmove and per-word bit operations.
+  A = B;
+  A <<= 93;
+  EXPECT_EQ(createBitVector<TypeParam>(
+                300, {{1 + 93, 30 + 93}, {60 + 93, 95 + 93}, {200 + 93, 300}}),
+            A);
+
+  // 1. Shift right by an exact multiple of the word size.  This should invoke
+  // only a memmove and no per-word bit operations.
+  A = B;
+  A >>= 64;
+  EXPECT_EQ(
+      createBitVector<TypeParam>(300, {{0, 95 - 64}, {200 - 64, 275 - 64}}), A);
+
+  // 2. Shift left by a non multiple of the word size.  This should invoke both
+  // a memmove and per-word bit operations.
+  A = B;
+  A >>= 93;
+  EXPECT_EQ(
+      createBitVector<TypeParam>(300, {{0, 95 - 93}, {200 - 93, 275 - 93}}), A);
+}
+
 TYPED_TEST(BitVectorTest, RangeOps) {
   TypeParam A;
   A.resize(256);
Index: unittests/CodeGen/CMakeLists.txt
===================================================================
--- unittests/CodeGen/CMakeLists.txt
+++ unittests/CodeGen/CMakeLists.txt
@@ -9,6 +9,7 @@
   DIEHashTest.cpp
   LowLevelTypeTest.cpp
   MachineInstrBundleIteratorTest.cpp
+  ScalableVectorMVTsTest.cpp
   )
 
 add_llvm_unittest(CodeGenTests
Index: unittests/CodeGen/ScalableVectorMVTsTest.cpp
===================================================================
--- /dev/null
+++ unittests/CodeGen/ScalableVectorMVTsTest.cpp
@@ -0,0 +1,88 @@
+//===-------- llvm/unittest/CodeGen/ScalableVectorMVTsTest.cpp ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(ScalableVectorMVTsTest, IntegerMVTs) {
+  for (auto VecTy : MVT::integer_scalable_vector_valuetypes()) {
+    ASSERT_TRUE(VecTy.isValid());
+    ASSERT_TRUE(VecTy.isInteger());
+    ASSERT_TRUE(VecTy.isVector());
+    ASSERT_TRUE(VecTy.isScalableVector());
+    ASSERT_TRUE(VecTy.getScalarType().isValid());
+
+    ASSERT_FALSE(VecTy.isFloatingPoint());
+  }
+}
+
+TEST(ScalableVectorMVTsTest, FloatMVTs) {
+  for (auto VecTy : MVT::fp_scalable_vector_valuetypes()) {
+    ASSERT_TRUE(VecTy.isValid());
+    ASSERT_TRUE(VecTy.isFloatingPoint());
+    ASSERT_TRUE(VecTy.isVector());
+    ASSERT_TRUE(VecTy.isScalableVector());
+    ASSERT_TRUE(VecTy.getScalarType().isValid());
+
+    ASSERT_FALSE(VecTy.isInteger());
+  }
+}
+
+TEST(ScalableVectorMVTsTest, HelperFuncs) {
+  LLVMContext Ctx;
+
+  // Create with scalable flag
+  EVT Vnx4i32 = EVT::getVectorVT(Ctx, MVT::i32, 4, /*Scalable=*/true);
+  ASSERT_TRUE(Vnx4i32.isScalableVector());
+
+  // Create with separate MVT::ElementCount
+  auto EltCnt = MVT::ElementCount(2, true);
+  EVT Vnx2i32 = EVT::getVectorVT(Ctx, MVT::i32, EltCnt);
+  ASSERT_TRUE(Vnx2i32.isScalableVector());
+
+  // Create with inline MVT::ElementCount
+  EVT Vnx2i64 = EVT::getVectorVT(Ctx, MVT::i64, {2, true});
+  ASSERT_TRUE(Vnx2i64.isScalableVector());
+
+  // Check that changing scalar types/element count works
+  EXPECT_EQ(Vnx2i32.widenIntegerVectorElementType(Ctx), Vnx2i64);
+  EXPECT_EQ(Vnx4i32.getHalfNumVectorElementsVT(Ctx), Vnx2i32);
+
+  // Check that overloaded '*' and '/' operators work
+  EXPECT_EQ(EVT::getVectorVT(Ctx, MVT::i64, EltCnt * 2), MVT::nxv4i64);
+  EXPECT_EQ(EVT::getVectorVT(Ctx, MVT::i64, EltCnt / 2), MVT::nxv1i64);
+
+  // Check that float->int conversion works
+  EVT Vnx2f64 = EVT::getVectorVT(Ctx, MVT::f64, {2, true});
+  EXPECT_EQ(Vnx2f64.changeTypeToInteger(), Vnx2i64);
+
+  // Check fields inside MVT::ElementCount
+  EltCnt = Vnx4i32.getVectorElementCount();
+  EXPECT_EQ(EltCnt.Min, 4U);
+  ASSERT_TRUE(EltCnt.Scalable);
+
+  // Check that fixed-length vector types aren't scalable.
+  EVT V8i32 = EVT::getVectorVT(Ctx, MVT::i32, 8);
+  ASSERT_FALSE(V8i32.isScalableVector());
+  EVT V4f64 = EVT::getVectorVT(Ctx, MVT::f64, {4, false});
+  ASSERT_FALSE(V4f64.isScalableVector());
+
+  // Check that MVT::ElementCount works for fixed-length types.
+  EltCnt = V8i32.getVectorElementCount();
+  EXPECT_EQ(EltCnt.Min, 8U);
+  ASSERT_FALSE(EltCnt.Scalable);
+}
+
+}
Index: unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
===================================================================
--- unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
+++ unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
@@ -170,7 +170,8 @@
   CUDie.addAttribute(Attr_DW_FORM_ref8, DW_FORM_ref8, Data8);
 
   const auto Attr_DW_FORM_ref_sig8 = static_cast<dwarf::Attribute>(Attr++);
-  CUDie.addAttribute(Attr_DW_FORM_ref_sig8, DW_FORM_ref_sig8, Data8_2);
+  if (Version >= 4)
+    CUDie.addAttribute(Attr_DW_FORM_ref_sig8, DW_FORM_ref_sig8, Data8_2);
 
   const auto Attr_DW_FORM_ref_udata = static_cast<dwarf::Attribute>(Attr++);
   CUDie.addAttribute(Attr_DW_FORM_ref_udata, DW_FORM_ref_udata, UData[0]);
@@ -185,7 +186,8 @@
   CUDie.addAttribute(Attr_DW_FORM_flag_false, DW_FORM_flag, false);
 
   const auto Attr_DW_FORM_flag_present = static_cast<dwarf::Attribute>(Attr++);
-  CUDie.addAttribute(Attr_DW_FORM_flag_present, DW_FORM_flag_present);
+  if (Version >= 4)
+    CUDie.addAttribute(Attr_DW_FORM_flag_present, DW_FORM_flag_present);
 
   //----------------------------------------------------------------------
   // Test SLEB128 based forms
@@ -213,8 +215,9 @@
                      Dwarf32Values[0]);
 
   const auto Attr_DW_FORM_sec_offset = static_cast<dwarf::Attribute>(Attr++);
-  CUDie.addAttribute(Attr_DW_FORM_sec_offset, DW_FORM_sec_offset,
-                     Dwarf32Values[1]);
+  if (Version >= 4)
+    CUDie.addAttribute(Attr_DW_FORM_sec_offset, DW_FORM_sec_offset,
+                       Dwarf32Values[1]);
 
   //----------------------------------------------------------------------
   // Add an address at the end to make sure we can decode this value
@@ -307,7 +310,8 @@
   EXPECT_EQ(Data2, toReference(DieDG.find(Attr_DW_FORM_ref2), 0));
   EXPECT_EQ(Data4, toReference(DieDG.find(Attr_DW_FORM_ref4), 0));
   EXPECT_EQ(Data8, toReference(DieDG.find(Attr_DW_FORM_ref8), 0));
-  EXPECT_EQ(Data8_2, toReference(DieDG.find(Attr_DW_FORM_ref_sig8), 0));
+  if (Version >= 4)
+    EXPECT_EQ(Data8_2, toReference(DieDG.find(Attr_DW_FORM_ref_sig8), 0));
   EXPECT_EQ(UData[0], toReference(DieDG.find(Attr_DW_FORM_ref_udata), 0));
 
   //----------------------------------------------------------------------
@@ -315,7 +319,8 @@
   //----------------------------------------------------------------------
   EXPECT_EQ(1ULL, toUnsigned(DieDG.find(Attr_DW_FORM_flag_true), 0));
   EXPECT_EQ(0ULL, toUnsigned(DieDG.find(Attr_DW_FORM_flag_false), 1));
-  EXPECT_EQ(1ULL, toUnsigned(DieDG.find(Attr_DW_FORM_flag_present), 0));
+  if (Version >= 4)
+    EXPECT_EQ(1ULL, toUnsigned(DieDG.find(Attr_DW_FORM_flag_present), 0));
 
   //----------------------------------------------------------------------
   // Test SLEB128 based forms
@@ -334,8 +339,9 @@
   //----------------------------------------------------------------------
   EXPECT_EQ(Dwarf32Values[0],
             toReference(DieDG.find(Attr_DW_FORM_GNU_ref_alt), 0));
-  EXPECT_EQ(Dwarf32Values[1],
-            toSectionOffset(DieDG.find(Attr_DW_FORM_sec_offset), 0));
+  if (Version >= 4)
+    EXPECT_EQ(Dwarf32Values[1],
+              toSectionOffset(DieDG.find(Attr_DW_FORM_sec_offset), 0));
 
   //----------------------------------------------------------------------
   // Add an address at the end to make sure we can decode this value
Index: unittests/Support/BranchProbabilityTest.cpp
===================================================================
--- unittests/Support/BranchProbabilityTest.cpp
+++ unittests/Support/BranchProbabilityTest.cpp
@@ -115,6 +115,54 @@
   EXPECT_FALSE(BigZero >= BigOne);
 }
 
+TEST(BranchProbabilityTest, ArithmeticOperators) {
+  BP Z(0, 1);
+  BP O(1, 1);
+  BP H(1, 2);
+  BP Q(1, 4);
+  BP Q3(3, 4);
+
+  EXPECT_EQ(Z + O, O);
+  EXPECT_EQ(H + Z, H);
+  EXPECT_EQ(H + H, O);
+  EXPECT_EQ(Q + H, Q3);
+  EXPECT_EQ(Q + Q3, O);
+  EXPECT_EQ(H + Q3, O);
+  EXPECT_EQ(Q3 + Q3, O);
+
+  EXPECT_EQ(Z - O, Z);
+  EXPECT_EQ(O - Z, O);
+  EXPECT_EQ(O - H, H);
+  EXPECT_EQ(O - Q, Q3);
+  EXPECT_EQ(Q3 - H, Q);
+  EXPECT_EQ(Q - H, Z);
+  EXPECT_EQ(Q - Q3, Z);
+
+  EXPECT_EQ(Z * O, Z);
+  EXPECT_EQ(H * H, Q);
+  EXPECT_EQ(Q * O, Q);
+  EXPECT_EQ(O * O, O);
+  EXPECT_EQ(Z * Z, Z);
+
+  EXPECT_EQ(Z * 3, Z);
+  EXPECT_EQ(Q * 3, Q3);
+  EXPECT_EQ(H * 3, O);
+  EXPECT_EQ(Q3 * 2, O);
+  EXPECT_EQ(O * UINT32_MAX, O);
+
+  EXPECT_EQ(Z / 4, Z);
+  EXPECT_EQ(O / 4, Q);
+  EXPECT_EQ(Q3 / 3, Q);
+  EXPECT_EQ(H / 2, Q);
+  EXPECT_EQ(O / 2, H);
+  EXPECT_EQ(H / UINT32_MAX, Z);
+
+  BP Min(1, 1u << 31);
+
+  EXPECT_EQ(O / UINT32_MAX, Z);
+  EXPECT_EQ(Min * UINT32_MAX, O);
+}
+
 TEST(BranchProbabilityTest, getCompl) {
   EXPECT_EQ(BP(5, 7), BP(2, 7).getCompl());
   EXPECT_EQ(BP(2, 7), BP(5, 7).getCompl());
Index: utils/TableGen/AsmMatcherEmitter.cpp
===================================================================
--- utils/TableGen/AsmMatcherEmitter.cpp
+++ utils/TableGen/AsmMatcherEmitter.cpp
@@ -2861,7 +2861,7 @@
   emitValidateOperandClass(Info, OS);
 
   // Emit the available features compute function.
-  SubtargetFeatureInfo::emitComputeAvailableFeatures(
+  SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures(
       Info.Target.getName(), ClassName, "ComputeAvailableFeatures",
       Info.SubtargetFeatures, OS);
 
Index: utils/TableGen/CodeEmitterGen.cpp
===================================================================
--- utils/TableGen/CodeEmitterGen.cpp
+++ utils/TableGen/CodeEmitterGen.cpp
@@ -336,7 +336,7 @@
   o << "#endif // NDEBUG\n";
 
   // Emit the available features compute function.
-  SubtargetFeatureInfo::emitComputeAvailableFeatures(
+  SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures(
       Target.getName(), "MCCodeEmitter", "computeAvailableFeatures",
       SubtargetFeatures, o);
 
Index: utils/TableGen/CodeGenTarget.cpp
===================================================================
--- utils/TableGen/CodeGenTarget.cpp
+++ utils/TableGen/CodeGenTarget.cpp
@@ -126,6 +126,45 @@
   case MVT::v2f64:    return "MVT::v2f64";
   case MVT::v4f64:    return "MVT::v4f64";
   case MVT::v8f64:    return "MVT::v8f64";
+  case MVT::nxv2i1:   return "MVT::nxv2i1";
+  case MVT::nxv4i1:   return "MVT::nxv4i1";
+  case MVT::nxv8i1:   return "MVT::nxv8i1";
+  case MVT::nxv16i1:  return "MVT::nxv16i1";
+  case MVT::nxv32i1:  return "MVT::nxv32i1";
+  case MVT::nxv1i8:   return "MVT::nxv1i8";
+  case MVT::nxv2i8:   return "MVT::nxv2i8";
+  case MVT::nxv4i8:   return "MVT::nxv4i8";
+  case MVT::nxv8i8:   return "MVT::nxv8i8";
+  case MVT::nxv16i8:  return "MVT::nxv16i8";
+  case MVT::nxv32i8:  return "MVT::nxv32i8";
+  case MVT::nxv1i16:  return "MVT::nxv1i16";
+  case MVT::nxv2i16:  return "MVT::nxv2i16";
+  case MVT::nxv4i16:  return "MVT::nxv4i16";
+  case MVT::nxv8i16:  return "MVT::nxv8i16";
+  case MVT::nxv16i16: return "MVT::nxv16i16";
+  case MVT::nxv32i16: return "MVT::nxv32i16";
+  case MVT::nxv1i32:  return "MVT::nxv1i32";
+  case MVT::nxv2i32:  return "MVT::nxv2i32";
+  case MVT::nxv4i32:  return "MVT::nxv4i32";
+  case MVT::nxv8i32:  return "MVT::nxv8i32";
+  case MVT::nxv16i32: return "MVT::nxv16i32";
+  case MVT::nxv1i64:  return "MVT::nxv1i64";
+  case MVT::nxv2i64:  return "MVT::nxv2i64";
+  case MVT::nxv4i64:  return "MVT::nxv4i64";
+  case MVT::nxv8i64:  return "MVT::nxv8i64";
+  case MVT::nxv16i64: return "MVT::nxv16i64";
+  case MVT::nxv2f16:  return "MVT::nxv2f16";
+  case MVT::nxv4f16:  return "MVT::nxv4f16";
+  case MVT::nxv8f16:  return "MVT::nxv8f16";
+  case MVT::nxv1f32:  return "MVT::nxv1f32";
+  case MVT::nxv2f32:  return "MVT::nxv2f32";
+  case MVT::nxv4f32:  return "MVT::nxv4f32";
+  case MVT::nxv8f32:  return "MVT::nxv8f32";
+  case MVT::nxv16f32: return "MVT::nxv16f32";
+  case MVT::nxv1f64:  return "MVT::nxv1f64";
+  case MVT::nxv2f64:  return "MVT::nxv2f64";
+  case MVT::nxv4f64:  return "MVT::nxv4f64";
+  case MVT::nxv8f64:  return "MVT::nxv8f64";
   case MVT::token:    return "MVT::token";
   case MVT::Metadata: return "MVT::Metadata";
   case MVT::iPTR:     return "MVT::iPTR";
Index: utils/TableGen/GlobalISelEmitter.cpp
===================================================================
--- utils/TableGen/GlobalISelEmitter.cpp
+++ utils/TableGen/GlobalISelEmitter.cpp
@@ -31,6 +31,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenDAGPatterns.h"
+#include "SubtargetFeatureInfo.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -161,20 +162,6 @@
   return Explanation;
 }
 
-static std::string explainRulePredicates(const ArrayRef<Init *> Predicates) {
-  std::string Explanation = "";
-  StringRef Separator = "";
-  for (const auto *P : Predicates) {
-    Explanation += Separator;
-
-    if (const DefInit *PDef = dyn_cast<DefInit>(P)) {
-      Explanation += PDef->getDef()->getName();
-    } else
-      Explanation += "<unknown>";
-  }
-  return Explanation;
-}
-
 std::string explainOperator(Record *Operator) {
   if (Operator->isSubClassOf("SDNode"))
     return " (" + Operator->getValueAsString("Opcode") + ")";
@@ -238,6 +225,8 @@
   /// ID for the next instruction variable defined with defineInsnVar()
   unsigned NextInsnVarID;
 
+  std::vector<Record *> RequiredFeatures;
+
 public:
   RuleMatcher()
       : Matchers(), Actions(), InsnVariableNames(), NextInsnVarID(0) {}
@@ -245,6 +234,7 @@
   RuleMatcher &operator=(RuleMatcher &&Other) = default;
 
   InstructionMatcher &addInstructionMatcher();
+  void addRequiredFeature(Record *Feature);
 
   template <class Kind, class... Args> Kind &addAction(Args &&... args);
 
@@ -255,7 +245,9 @@
   void emitCxxCapturedInsnList(raw_ostream &OS);
   void emitCxxCaptureStmts(raw_ostream &OS, StringRef Expr);
 
-  void emit(raw_ostream &OS);
+  void emit(raw_ostream &OS,
+            std::map<Record *, SubtargetFeatureInfo, LessRecordByID>
+                SubtargetFeatures);
 
   /// Compare the priority of this object and B.
   ///
@@ -1092,6 +1084,10 @@
   return *Matchers.back();
 }
 
+void RuleMatcher::addRequiredFeature(Record *Feature) {
+  RequiredFeatures.push_back(Feature);
+}
+
 template <class Kind, class... Args>
 Kind &RuleMatcher::addAction(Args &&... args) {
   Actions.emplace_back(llvm::make_unique<Kind>(std::forward<Args>(args)...));
@@ -1135,7 +1131,9 @@
   Matchers.front()->emitCxxCaptureStmts(OS, *this, InsnVarName);
 }
 
-void RuleMatcher::emit(raw_ostream &OS) {
+void RuleMatcher::emit(raw_ostream &OS,
+                       std::map<Record *, SubtargetFeatureInfo, LessRecordByID>
+                           SubtargetFeatures) {
   if (Matchers.empty())
     llvm_unreachable("Unexpected empty matcher!");
 
@@ -1149,7 +1147,22 @@
   //    %elt0(s32), %elt1(s32) = TGT_LOAD_PAIR %ptr
   // on some targets but we don't need to make use of that yet.
   assert(Matchers.size() == 1 && "Cannot handle multi-root matchers yet");
-  OS << "if ([&]() {\n";
+
+  OS << "if (";
+  OS << "[&]() {\n";
+  if (!RequiredFeatures.empty()) {
+    OS << "  PredicateBitset ExpectedFeatures = {";
+    StringRef Separator = "";
+    for (const auto &Predicate : RequiredFeatures) {
+      const auto &I = SubtargetFeatures.find(Predicate);
+      assert(I != SubtargetFeatures.end() && "Didn't import predicate?");
+      OS << Separator << I->second.getEnumBitName();
+      Separator = ", ";
+    }
+    OS << "};\n";
+    OS << "if ((AvailableFeatures & ExpectedFeatures) != ExpectedFeatures)\n"
+       << "  return false;\n";
+  }
 
   emitCxxCaptureStmts(OS, "I");
 
@@ -1264,10 +1277,13 @@
   /// GIComplexPatternEquiv.
   DenseMap<const Record *, const Record *> ComplexPatternEquivs;
 
+  // Map of predicates to their subtarget features.
+  std::map<Record *, SubtargetFeatureInfo, LessRecordByID> SubtargetFeatures;
+
   void gatherNodeEquivs();
   const CodeGenInstruction *findNodeEquiv(Record *N) const;
 
-  Error importRulePredicates(RuleMatcher &M, ArrayRef<Init *> Predicates) const;
+  Error importRulePredicates(RuleMatcher &M, ArrayRef<Init *> Predicates);
   Expected<InstructionMatcher &>
   createAndImportSelDAGMatcher(InstructionMatcher &InsnMatcher,
                                const TreePatternNode *Src) const;
@@ -1287,6 +1303,8 @@
   /// Analyze pattern \p P, returning a matcher for it if possible.
   /// Otherwise, return an Error explaining why we don't support it.
   Expected<RuleMatcher> runOnPattern(const PatternToMatch &P);
+
+  void declareSubtargetFeature(Record *Predicate);
 };
 
 void GlobalISelEmitter::gatherNodeEquivs() {
@@ -1315,10 +1333,13 @@
 
 Error
 GlobalISelEmitter::importRulePredicates(RuleMatcher &M,
-                                        ArrayRef<Init *> Predicates) const {
-  if (!Predicates.empty())
-    return failedImport("Pattern has a rule predicate (" +
-                        explainRulePredicates(Predicates) + ")");
+                                        ArrayRef<Init *> Predicates) {
+  for (const Init *Predicate : Predicates) {
+    const DefInit *PredicateDef = static_cast<const DefInit *>(Predicate);
+    declareSubtargetFeature(PredicateDef->getDef());
+    M.addRequiredFeature(PredicateDef->getDef());
+  }
+
   return Error::success();
 }
 
@@ -1725,6 +1746,13 @@
   for (const auto &Rule : Rules)
     MaxTemporaries = std::max(MaxTemporaries, Rule.countTemporaryOperands());
 
+  OS << "#ifdef GET_GLOBALISEL_PREDICATE_BITSET\n"
+     << "const unsigned MAX_SUBTARGET_PREDICATES = " << SubtargetFeatures.size()
+     << ";\n"
+     << "using PredicateBitset = "
+        "llvm::PredicateBitsetImpl<MAX_SUBTARGET_PREDICATES>;\n"
+     << "#endif // ifdef GET_GLOBALISEL_PREDICATE_BITSET\n\n";
+
   OS << "#ifdef GET_GLOBALISEL_TEMPORARIES_DECL\n";
   for (unsigned I = 0; I < MaxTemporaries; ++I)
     OS << "  mutable MachineOperand TempOp" << I << ";\n";
@@ -1735,14 +1763,21 @@
     OS << ", TempOp" << I << "(MachineOperand::CreatePlaceholder())\n";
   OS << "#endif // ifdef GET_GLOBALISEL_TEMPORARIES_INIT\n\n";
 
-  OS << "#ifdef GET_GLOBALISEL_IMPL\n"
-     << "bool " << Target.getName()
+  OS << "#ifdef GET_GLOBALISEL_IMPL\n";
+  SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(SubtargetFeatures,
+                                                           OS);
+  SubtargetFeatureInfo::emitNameTable(SubtargetFeatures, OS);
+  SubtargetFeatureInfo::emitComputeAvailableFeatures(
+      Target.getName(), "InstructionSelector", "computeAvailableFeatures",
+      SubtargetFeatures, OS);
+
+  OS << "bool " << Target.getName()
      << "InstructionSelector::selectImpl(MachineInstr &I) const {\n"
      << "  MachineFunction &MF = *I.getParent()->getParent();\n"
      << "  const MachineRegisterInfo &MRI = MF.getRegInfo();\n";
 
   for (auto &Rule : Rules) {
-    Rule.emit(OS);
+    Rule.emit(OS, SubtargetFeatures);
     ++NumPatternEmitted;
   }
 
@@ -1751,6 +1786,12 @@
      << "#endif // ifdef GET_GLOBALISEL_IMPL\n";
 }
 
+void GlobalISelEmitter::declareSubtargetFeature(Record *Predicate) {
+  if (SubtargetFeatures.count(Predicate) == 0)
+    SubtargetFeatures.emplace(
+        Predicate, SubtargetFeatureInfo(Predicate, SubtargetFeatures.size()));
+}
+
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
Index: utils/TableGen/SubtargetFeatureInfo.h
===================================================================
--- utils/TableGen/SubtargetFeatureInfo.h
+++ utils/TableGen/SubtargetFeatureInfo.h
@@ -37,16 +37,34 @@
     return "Feature_" + TheDef->getName().str();
   }
 
+  /// \brief The name of the enumerated constant identifying the bitnumber for
+  /// this feature.
+  std::string getEnumBitName() const {
+    return "Feature_" + TheDef->getName().str() + "Bit";
+  }
+
   void dump() const;
   static std::vector<std::pair<Record *, SubtargetFeatureInfo>>
   getAll(const RecordKeeper &Records);
 
   /// Emit the subtarget feature flag definitions.
+  ///
+  /// This version emits the bit value for the feature and is therefore limited
+  /// to 64 feature bits.
   static void emitSubtargetFeatureFlagEnumeration(
       std::map<Record *, SubtargetFeatureInfo, LessRecordByID>
           &SubtargetFeatures,
       raw_ostream &OS);
 
+  /// Emit the subtarget feature flag definitions.
+  ///
+  /// This version emits the bit index for the feature and can therefore support
+  /// more than 64 feature bits.
+  static void emitSubtargetFeatureBitEnumeration(
+      std::map<Record *, SubtargetFeatureInfo, LessRecordByID>
+          &SubtargetFeatures,
+      raw_ostream &OS);
+
   static void emitNameTable(std::map<Record *, SubtargetFeatureInfo,
                                      LessRecordByID> &SubtargetFeatures,
                             raw_ostream &OS);
@@ -54,6 +72,9 @@
   /// Emit the function to compute the list of available features given a
   /// subtarget.
   ///
+  /// This version is used for subtarget features defined using Predicate<>
+  /// and supports more than 64 feature bits.
+  ///
   /// \param TargetName The name of the target as used in class prefixes (e.g.
   ///                   <TargetName>Subtarget)
   /// \param ClassName  The name of the class (without the <Target> prefix)
@@ -66,6 +87,25 @@
       std::map<Record *, SubtargetFeatureInfo, LessRecordByID>
           &SubtargetFeatures,
       raw_ostream &OS);
+
+  /// Emit the function to compute the list of available features given a
+  /// subtarget.
+  ///
+  /// This version is used for subtarget features defined using
+  /// AssemblerPredicate<> and supports up to 64 feature bits.
+  ///
+  /// \param TargetName The name of the target as used in class prefixes (e.g.
+  ///                   <TargetName>Subtarget)
+  /// \param ClassName  The name of the class (without the <Target> prefix)
+  ///                   that will contain the generated functions.
+  /// \param FuncName   The name of the function to emit.
+  /// \param SubtargetFeatures A map of TableGen records to the
+  ///                          SubtargetFeatureInfo equivalent.
+  static void emitComputeAssemblerAvailableFeatures(
+      StringRef TargetName, StringRef ClassName, StringRef FuncName,
+      std::map<Record *, SubtargetFeatureInfo, LessRecordByID>
+          &SubtargetFeatures,
+      raw_ostream &OS);
 };
 } // end namespace llvm
 
Index: utils/TableGen/SubtargetFeatureInfo.cpp
===================================================================
--- utils/TableGen/SubtargetFeatureInfo.cpp
+++ utils/TableGen/SubtargetFeatureInfo.cpp
@@ -59,6 +59,20 @@
   OS << "};\n\n";
 }
 
+void SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(
+    std::map<Record *, SubtargetFeatureInfo, LessRecordByID> &SubtargetFeatures,
+    raw_ostream &OS) {
+  OS << "// Bits for subtarget features that participate in "
+     << "instruction matching.\n";
+  OS << "enum SubtargetFeatureBits : "
+     << getMinimalTypeForRange(SubtargetFeatures.size()) << " {\n";
+  for (const auto &SF : SubtargetFeatures) {
+    const SubtargetFeatureInfo &SFI = SF.second;
+    OS << "  " << SFI.getEnumBitName() << " = " << SFI.Index << ",\n";
+  }
+  OS << "};\n\n";
+}
+
 void SubtargetFeatureInfo::emitNameTable(
     std::map<Record *, SubtargetFeatureInfo, LessRecordByID> &SubtargetFeatures,
     raw_ostream &OS) {
@@ -90,6 +104,24 @@
     StringRef TargetName, StringRef ClassName, StringRef FuncName,
     std::map<Record *, SubtargetFeatureInfo, LessRecordByID> &SubtargetFeatures,
     raw_ostream &OS) {
+  OS << "PredicateBitset " << TargetName << ClassName << "::\n"
+     << FuncName << "(const MachineFunction *MF, const " << TargetName
+     << "Subtarget *Subtarget) const {\n";
+  OS << "  PredicateBitset Features;\n";
+  for (const auto &SF : SubtargetFeatures) {
+    const SubtargetFeatureInfo &SFI = SF.second;
+
+    OS << "  if (" << SFI.TheDef->getValueAsString("CondString") << ")\n";
+    OS << "    Features[" << SFI.getEnumBitName() << "] = 1;\n";
+  }
+  OS << "  return Features;\n";
+  OS << "}\n\n";
+}
+
+void SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures(
+    StringRef TargetName, StringRef ClassName, StringRef FuncName,
+    std::map<Record *, SubtargetFeatureInfo, LessRecordByID> &SubtargetFeatures,
+    raw_ostream &OS) {
   OS << "uint64_t " << TargetName << ClassName << "::\n"
      << FuncName << "(const FeatureBitset& FB) const {\n";
   OS << "  uint64_t Features = 0;\n";
Index: utils/TableGen/Types.cpp
===================================================================
--- utils/TableGen/Types.cpp
+++ utils/TableGen/Types.cpp
@@ -40,5 +40,6 @@
   uint64_t MaxIndex = Size;
   if (MaxIndex > 0)
     MaxIndex--;
+  assert(MaxIndex <= 64 && "Too many bits");
   return getMinimalTypeForRange(1ULL << MaxIndex);
 }