Index: include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfo.h
+++ include/llvm/Analysis/TargetTransformInfo.h
@@ -581,13 +581,17 @@
   struct MemCmpExpansionOptions {
     // The list of available load sizes (in bytes), sorted in decreasing order.
     SmallVector<unsigned, 8> LoadSizes;
+    // Set to true to allow overlapping loads. For example, 7-byte compares can
+    // be done with two 4-byte compares instead of 4+2+1-byte compares. This
+    // requires all loads in LoadSizes to be doable in an unaligned way.
+    bool AllowOverlappingLoads = false;
   };
   const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
 
   /// Enable matching of interleaved access groups.
   bool enableInterleavedAccessVectorization() const;
 
-  /// Enable matching of interleaved access groups that contain predicated 
+  /// Enable matching of interleaved access groups that contain predicated
   /// accesses or gaps and therefore vectorized using masked
   /// vector loads/stores.
   bool enableMaskedInterleavedAccessVectorization() const;
@@ -772,7 +776,7 @@
   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
   /// The index and subtype parameters are used by the subvector insertion and
   /// extraction shuffle kinds to show the insert/extract point and the type of
-  /// the subvector being inserted/extracted. 
+  /// the subvector being inserted/extracted.
   /// NOTE: For subvector extractions Tp represents the source type.
   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
                      Type *SubTp = nullptr) const;
Index: include/llvm/CodeGen/TargetLowering.h
===================================================================
--- include/llvm/CodeGen/TargetLowering.h
+++ include/llvm/CodeGen/TargetLowering.h
@@ -1746,6 +1746,16 @@
     return false;
   }
 
+  /// Return true if it is more correct/profitable to use strict FP_TO_INT
+  /// conversion operations - canonicalizing the FP source value instead of
+  /// converting all cases and then selecting based on value.
+  /// This may be true if the target throws exceptions for out of bounds
+  /// conversions or has fast FP CMOV.
+  virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
+                                        bool IsSigned) const {
+    return false;
+  }
+
   //===--------------------------------------------------------------------===//
   // TargetLowering Configuration Methods - These methods should be invoked by
   // the derived class constructor to configure this object for the target.
Index: lib/CodeGen/ExpandMemCmp.cpp
===================================================================
--- lib/CodeGen/ExpandMemCmp.cpp
+++ lib/CodeGen/ExpandMemCmp.cpp
@@ -66,23 +66,18 @@
   // Represents the decomposition in blocks of the expansion. For example,
   // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
   // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}.
-  // TODO(courbet): Involve the target more in this computation. On X86, 7
-  // bytes can be done more efficiently with two overlaping 4-byte loads than
-  // covering the interval with [{4, 0},{2, 4},{1, 6}}.
   struct LoadEntry {
     LoadEntry(unsigned LoadSize, uint64_t Offset)
         : LoadSize(LoadSize), Offset(Offset) {
-      assert(Offset % LoadSize == 0 && "invalid load entry");
     }
 
-    uint64_t getGEPIndex() const { return Offset / LoadSize; }
-
     // The size of the load for this block, in bytes.
-    const unsigned LoadSize;
-    // The offset of this load WRT the base pointer, in bytes.
-    const uint64_t Offset;
+    unsigned LoadSize;
+    // The offset of this load from the base pointer, in bytes.
+    uint64_t Offset;
   };
-  SmallVector<LoadEntry, 8> LoadSequence;
+  using LoadEntryVector = SmallVector<LoadEntry, 8>;
+  LoadEntryVector LoadSequence;
 
   void createLoadCmpBlocks();
   void createResultBlock();
@@ -92,13 +87,23 @@
   void emitLoadCompareBlock(unsigned BlockIndex);
   void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
                                          unsigned &LoadIndex);
-  void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned GEPIndex);
+  void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
   void emitMemCmpResultBlock();
   Value *getMemCmpExpansionZeroCase();
   Value *getMemCmpEqZeroOneBlock();
   Value *getMemCmpOneBlock();
+  Value *getPtrToElementAtOffset(Value *Source, Type *LoadSizeType,
+                                 uint64_t OffsetBytes);
+
+  static LoadEntryVector
+  computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
+                            unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
+  static LoadEntryVector
+  computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
+                                 unsigned MaxNumLoads,
+                                 unsigned &NumLoadsNonOneByte);
 
- public:
+public:
   MemCmpExpansion(CallInst *CI, uint64_t Size,
                   const TargetTransformInfo::MemCmpExpansionOptions &Options,
                   unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
@@ -110,6 +115,75 @@
   Value *getMemCmpExpansion();
 };
 
+MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
+    uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
+    const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
+  NumLoadsNonOneByte = 0;
+  LoadEntryVector LoadSequence;
+  uint64_t Offset = 0;
+  while (Size && !LoadSizes.empty()) {
+    const unsigned LoadSize = LoadSizes.front();
+    const uint64_t NumLoadsForThisSize = Size / LoadSize;
+    if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
+      // Do not expand if the total number of loads is larger than what the
+      // target allows. Note that it's important that we exit before completing
+      // the expansion to avoid using a ton of memory to store the expansion for
+      // large sizes.
+      return {};
+    }
+    if (NumLoadsForThisSize > 0) {
+      for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
+        LoadSequence.push_back({LoadSize, Offset});
+        Offset += LoadSize;
+      }
+      if (LoadSize > 1) {
+        ++NumLoadsNonOneByte;
+      }
+      Size = Size % LoadSize;
+    }
+    LoadSizes = LoadSizes.drop_front();
+  }
+  return LoadSequence;
+}
+
+MemCmpExpansion::LoadEntryVector
+MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
+                                                const unsigned MaxLoadSize,
+                                                const unsigned MaxNumLoads,
+                                                unsigned &NumLoadsNonOneByte) {
+  // These are already handled by the greedy approach.
+  if (Size < 2 || MaxLoadSize < 2)
+    return {};
+
+  // We try to do as many non-overlapping loads as possible starting from the
+  // beginning.
+  const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
+  assert(NumNonOverlappingLoads && "there must be at least one load");
+  // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with
+  // an overlapping load.
+  Size = Size - NumNonOverlappingLoads * MaxLoadSize;
+  // Bail if the number of loads (non-overlapping + potential overlapping one)
+  // is larger than the max allowed.
+  if (NumNonOverlappingLoads + !!(Size > 0) > MaxNumLoads)
+    return {};
+
+  // Add non-overlapping loads.
+  LoadEntryVector LoadSequence;
+  uint64_t Offset = 0;
+  for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
+    LoadSequence.push_back({MaxLoadSize, Offset});
+    Offset += MaxLoadSize;
+  }
+
+  // Add the last overlapping load.
+  if (Size > 0) {
+    assert(Size < MaxLoadSize && "broken invariant");
+    LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
+  }
+  NumLoadsNonOneByte = 1;
+  return LoadSequence;
+}
+
 // Initialize the basic block structure required for expansion of memcmp call
 // with given maximum load size and memcmp size parameter.
 // This structure includes:
@@ -133,38 +207,31 @@
       Builder(CI) {
   assert(Size > 0 && "zero blocks");
   // Scale the max size down if the target can load more bytes than we need.
-  size_t LoadSizeIndex = 0;
-  while (LoadSizeIndex < Options.LoadSizes.size() &&
-         Options.LoadSizes[LoadSizeIndex] > Size) {
-    ++LoadSizeIndex;
+  llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
+  while (!LoadSizes.empty() && LoadSizes.front() > Size) {
+    LoadSizes = LoadSizes.drop_front();
   }
-  this->MaxLoadSize = Options.LoadSizes[LoadSizeIndex];
+  assert(!LoadSizes.empty() && "cannot load Size bytes");
+  MaxLoadSize = LoadSizes.front();
   // Compute the decomposition.
-  uint64_t CurSize = Size;
-  uint64_t Offset = 0;
-  while (CurSize && LoadSizeIndex < Options.LoadSizes.size()) {
-    const unsigned LoadSize = Options.LoadSizes[LoadSizeIndex];
-    assert(LoadSize > 0 && "zero load size");
-    const uint64_t NumLoadsForThisSize = CurSize / LoadSize;
-    if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
-      // Do not expand if the total number of loads is larger than what the
-      // target allows. Note that it's important that we exit before completing
-      // the expansion to avoid using a ton of memory to store the expansion for
-      // large sizes.
-      LoadSequence.clear();
-      return;
-    }
-    if (NumLoadsForThisSize > 0) {
-      for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
-        LoadSequence.push_back({LoadSize, Offset});
-        Offset += LoadSize;
-      }
-      if (LoadSize > 1) {
-        ++NumLoadsNonOneByte;
-      }
-      CurSize = CurSize % LoadSize;
+  unsigned GreedyNumLoadsNonOneByte = 0;
+  LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, MaxNumLoads,
+                                           GreedyNumLoadsNonOneByte);
+  NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
+  assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
+  // If we allow overlapping loads and the load sequence is not already optimal,
+  // use overlapping loads.
+  if (Options.AllowOverlappingLoads &&
+      (LoadSequence.empty() || LoadSequence.size() > 2)) {
+    unsigned OverlappingNumLoadsNonOneByte = 0;
+    auto OverlappingLoads = computeOverlappingLoadSequence(
+        Size, MaxLoadSize, MaxNumLoads, OverlappingNumLoadsNonOneByte);
+    if (!OverlappingLoads.empty() &&
+        (LoadSequence.empty() ||
+         OverlappingLoads.size() < LoadSequence.size())) {
+      LoadSequence = OverlappingLoads;
+      NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
     }
-    ++LoadSizeIndex;
   }
   assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
 }
@@ -189,30 +256,32 @@
                                    EndBlock->getParent(), EndBlock);
 }
 
+/// Return a pointer to an element of type `LoadSizeType` at offset
+/// `OffsetBytes`.
+Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source,
+                                                Type *LoadSizeType,
+                                                uint64_t OffsetBytes) {
+  if (OffsetBytes > 0) {
+    auto *ByteType = Type::getInt8Ty(CI->getContext());
+    Source = Builder.CreateGEP(
+        ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()),
+        ConstantInt::get(ByteType, OffsetBytes));
+  }
+  return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo());
+}
+
 // This function creates the IR instructions for loading and comparing 1 byte.
 // It loads 1 byte from each source of the memcmp parameters with the given
 // GEPIndex. It then subtracts the two loaded values and adds this result to the
 // final phi node for selecting the memcmp result.
 void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
-                                               unsigned GEPIndex) {
-  Value *Source1 = CI->getArgOperand(0);
-  Value *Source2 = CI->getArgOperand(1);
-
+                                               unsigned OffsetBytes) {
   Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
   Type *LoadSizeType = Type::getInt8Ty(CI->getContext());
-  // Cast source to LoadSizeType*.
-  if (Source1->getType() != LoadSizeType)
-    Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
-  if (Source2->getType() != LoadSizeType)
-    Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
-  // Get the base address using the GEPIndex.
-  if (GEPIndex != 0) {
-    Source1 = Builder.CreateGEP(LoadSizeType, Source1,
-                                ConstantInt::get(LoadSizeType, GEPIndex));
-    Source2 = Builder.CreateGEP(LoadSizeType, Source2,
-                                ConstantInt::get(LoadSizeType, GEPIndex));
-  }
+  Value *Source1 =
+      getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, OffsetBytes);
+  Value *Source2 =
+      getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, OffsetBytes);
 
   Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
   Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
@@ -270,24 +339,10 @@
     IntegerType *LoadSizeType =
         IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
 
-    Value *Source1 = CI->getArgOperand(0);
-    Value *Source2 = CI->getArgOperand(1);
-
-    // Cast source to LoadSizeType*.
-    if (Source1->getType() != LoadSizeType)
-      Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
-    if (Source2->getType() != LoadSizeType)
-      Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
-
-    // Get the base address using a GEP.
-    if (CurLoadEntry.Offset != 0) {
-      Source1 = Builder.CreateGEP(
-          LoadSizeType, Source1,
-          ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
-      Source2 = Builder.CreateGEP(
-          LoadSizeType, Source2,
-          ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
-    }
+    Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,
+                                             CurLoadEntry.Offset);
+    Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
+                                             CurLoadEntry.Offset);
 
     // Get a constant or load a value for each source address.
     Value *LoadSrc1 = nullptr;
@@ -378,8 +433,7 @@
   const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
 
   if (CurLoadEntry.LoadSize == 1) {
-    MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex,
-                                              CurLoadEntry.getGEPIndex());
+    MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
     return;
   }
 
@@ -388,25 +442,12 @@
   Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
   assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
 
-  Value *Source1 = CI->getArgOperand(0);
-  Value *Source2 = CI->getArgOperand(1);
-
   Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
-  // Cast source to LoadSizeType*.
-  if (Source1->getType() != LoadSizeType)
-    Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
-  if (Source2->getType() != LoadSizeType)
-    Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
 
-  // Get the base address using a GEP.
-  if (CurLoadEntry.Offset != 0) {
-    Source1 = Builder.CreateGEP(
-        LoadSizeType, Source1,
-        ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
-    Source2 = Builder.CreateGEP(
-        LoadSizeType, Source2,
-        ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex()));
-  }
+  Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,
+                                           CurLoadEntry.Offset);
+  Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
+                                           CurLoadEntry.Offset);
 
   // Load LoadSizeType from the base address.
   Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
@@ -694,7 +735,6 @@
   if (SizeVal == 0) {
     return false;
   }
-
   // TTI call to check if target would like to expand memcmp. Also, get the
   // available load sizes.
   const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9461,6 +9461,9 @@
   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
     return Res;
 
+  if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+    return SDValue(N, 0);
+
   return SDValue();
 }
 
@@ -9474,6 +9477,9 @@
   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
     return Res;
 
+  if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+    return SDValue(N, 0);
+
   return SDValue();
 }
 
Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1736,6 +1736,20 @@
     }
     break;
   }
+  case ISD::ANY_EXTEND_VECTOR_INREG:
+  case ISD::SIGN_EXTEND_VECTOR_INREG:
+  case ISD::ZERO_EXTEND_VECTOR_INREG: {
+    APInt SrcUndef, SrcZero;
+    SDValue Src = Op.getOperand(0);
+    unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+    APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
+    if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef,
+                                   SrcZero, TLO, Depth + 1))
+      return true;
+    KnownZero = SrcZero.zextOrTrunc(NumElts);
+    KnownUndef = SrcUndef.zextOrTrunc(NumElts);
+    break;
+  }
   case ISD::ADD:
   case ISD::SUB:
   case ISD::FADD:
@@ -1755,6 +1769,9 @@
     break;
   }
   case ISD::TRUNCATE:
+  case ISD::ANY_EXTEND:
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
                                    KnownZero, TLO, Depth + 1))
       return true;
@@ -4183,20 +4200,39 @@
     return true;
   }
 
-  // Expand based on maximum range of FP_TO_SINT:
-  // True = fp_to_sint(Src)
-  // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
-  // Result = select (Src < 0x8000000000000000), True, False
   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
   SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
 
-  SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
-  // TODO: Should any fast-math-flags be set for the FSUB?
-  SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
-                              DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
-  False = DAG.getNode(ISD::XOR, dl, DstVT, False,
-                      DAG.getConstant(SignMask, dl, DstVT));
-  Result = DAG.getSelect(dl, DstVT, Sel, True, False);
+  bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
+  if (Strict) {
+    // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
+    // signmask then offset (the result of which should be fully representable).
+    // Sel = Src < 0x8000000000000000
+    // Val = select Sel, Src, Src - 0x8000000000000000
+    // Ofs = select Sel, 0, 0x8000000000000000
+    // Result = fp_to_sint(Val) ^ Ofs
+
+    // TODO: Should any fast-math-flags be set for the FSUB?
+    SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src,
+                                DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
+    SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
+                                DAG.getConstant(SignMask, dl, DstVT));
+    Result = DAG.getNode(ISD::XOR, dl, DstVT,
+                         DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs);
+  } else {
+    // Expand based on maximum range of FP_TO_SINT:
+    // True = fp_to_sint(Src)
+    // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
+    // Result = select (Src < 0x8000000000000000), True, False
+
+    SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
+    // TODO: Should any fast-math-flags be set for the FSUB?
+    SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
+                                DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
+    False = DAG.getNode(ISD::XOR, dl, DstVT, False,
+                        DAG.getConstant(SignMask, dl, DstVT));
+    Result = DAG.getSelect(dl, DstVT, Sel, True, False);
+  }
   return true;
 }
 
Index: lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
===================================================================
--- lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -396,7 +396,8 @@
       uint8_t SegmentDescriptorSize = Version < 4 ? 0 : Data.getU8(&Offset);
       uint64_t CodeAlignmentFactor = Data.getULEB128(&Offset);
       int64_t DataAlignmentFactor = Data.getSLEB128(&Offset);
-      uint64_t ReturnAddressRegister = Data.getULEB128(&Offset);
+      uint64_t ReturnAddressRegister =
+          Version == 1 ? Data.getU8(&Offset) : Data.getULEB128(&Offset);
 
       // Parse the augmentation data for EH CIEs
       StringRef AugmentationData("");
Index: lib/Target/AArch64/AArch64.td
===================================================================
--- lib/Target/AArch64/AArch64.td
+++ lib/Target/AArch64/AArch64.td
@@ -65,18 +65,6 @@
 def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true",
   "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">;
 
-def FeaturePAN : SubtargetFeature<
-    "pan", "HasPAN", "true",
-    "Enables ARM v8.1 Privileged Access-Never extension">;
-
-def FeatureLOR : SubtargetFeature<
-    "lor", "HasLOR", "true",
-    "Enables ARM v8.1 Limited Ordering Regions extension">;
-
-def FeatureVH : SubtargetFeature<
-    "vh", "HasVH", "true",
-    "Enables ARM v8.1 Virtual Host extension">;
-
 def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
   "Enable ARMv8 PMUv3 Performance Monitors extension">;
 
@@ -89,18 +77,6 @@
 def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true",
   "Enable Statistical Profiling extension">;
 
-def FeaturePAN_RWV : SubtargetFeature<
-    "pan-rwv", "HasPAN_RWV", "true",
-    "Enable v8.2 PAN s1e1R and s1e1W Variants",
-    [FeaturePAN]>;
-
-// UAO PState
-def FeaturePsUAO : SubtargetFeature< "uaops", "HasPsUAO", "true",
-    "Enable v8.2 UAO PState">;
-
-def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
-    "true", "Enable v8.2 data Cache Clean to Point of Persistence" >;
-
 def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
   "Enable Scalable Vector Extension (SVE) instructions">;
 
@@ -219,66 +195,6 @@
     "dotprod", "HasDotProd", "true",
     "Enable dot product support">;
 
-def FeaturePA : SubtargetFeature<
-    "pa", "HasPA", "true",
-    "Enable v8.3-A Pointer Authentication enchancement">;
-
-def FeatureJS : SubtargetFeature<
-    "jsconv", "HasJS", "true",
-    "Enable v8.3-A JavaScript FP conversion enchancement",
-    [FeatureFPARMv8]>;
-
-def FeatureCCIDX : SubtargetFeature<
-    "ccidx", "HasCCIDX", "true",
-    "Enable v8.3-A Extend of the CCSIDR number of sets">;
-
-def FeatureComplxNum : SubtargetFeature<
-    "complxnum", "HasComplxNum", "true",
-    "Enable v8.3-A Floating-point complex number support",
-    [FeatureNEON]>;
-
-def FeatureNV : SubtargetFeature<
-    "nv", "HasNV", "true",
-    "Enable v8.4-A Nested Virtualization Enchancement">;
-
-def FeatureRASv8_4 : SubtargetFeature<
-    "rasv8_4", "HasRASv8_4", "true",
-    "Enable v8.4-A Reliability, Availability and Serviceability extension",
-    [FeatureRAS]>;
-
-def FeatureMPAM : SubtargetFeature<
-    "mpam", "HasMPAM", "true",
-    "Enable v8.4-A Memory system Partitioning and Monitoring extension">;
-
-def FeatureDIT : SubtargetFeature<
-    "dit", "HasDIT", "true",
-    "Enable v8.4-A Data Independent Timing instructions">;
-
-def FeatureTRACEV8_4 : SubtargetFeature<
-    "tracev8.4", "HasTRACEV8_4", "true",
-    "Enable v8.4-A Trace extension">;
-
-def FeatureAM : SubtargetFeature<
-    "am", "HasAM", "true",
-    "Enable v8.4-A Activity Monitors extension">;
-
-def FeatureSEL2 : SubtargetFeature<
-    "sel2", "HasSEL2", "true",
-    "Enable v8.4-A Secure Exception Level 2 extension">;
-
-def FeatureTLB_RMI : SubtargetFeature<
-    "tlb-rmi", "HasTLB_RMI", "true",
-    "Enable v8.4-A TLB Range and Maintenance Instructions">;
-
-def FeatureFMI : SubtargetFeature<
-    "fmi", "HasFMI", "true",
-    "Enable v8.4-A Flag Manipulation Instructions">;
-
-// 8.4 RCPC enchancements: LDAPR & STLR instructions with Immediate Offset
-def FeatureRCPC_IMMO : SubtargetFeature<"rcpc-immo", "HasRCPC_IMMO", "true",
-    "Enable v8.4-A RCPC instructions with Immediate Offsets",
-    [FeatureRCPC]>;
-
 def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
                                         "NegativeImmediates", "false",
                                         "Convert immediates and instructions "
@@ -316,7 +232,7 @@
   "Enable execution and data prediction invalidation instructions" >;
 
 def FeatureCacheDeepPersist : SubtargetFeature<"ccdp", "HasCCDP",
-    "true", "Enable v8.5 Cache Clean to Point of Deep Persistence" >;
+    "true", "Enable Cache Clean to Point of Deep Persistence" >;
 
 def FeatureBranchTargetId : SubtargetFeature<"bti", "HasBTI",
     "true", "Enable Branch Target Identification" >;
@@ -332,22 +248,16 @@
 //
 
 def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
-  "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM,
-  FeaturePAN, FeatureLOR, FeatureVH]>;
+  "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM]>;
 
 def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
-  "Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO, 
-  FeaturePAN_RWV, FeatureRAS, FeatureCCPP]>;
+  "Support ARM v8.2a instructions", [HasV8_1aOps, FeatureRAS]>;
 
 def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true",
-  "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC, FeaturePA,
-  FeatureJS, FeatureCCIDX, FeatureComplxNum]>;
+  "Support ARM v8.3a instructions", [HasV8_2aOps, FeatureRCPC]>;
 
 def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true",
-  "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd,
-  FeatureNV, FeatureRASv8_4, FeatureMPAM, FeatureDIT,
-  FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI,
-  FeatureFMI, FeatureRCPC_IMMO]>;
+  "Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd]>;
 
 def HasV8_5aOps : SubtargetFeature<
   "v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions",
Index: lib/Target/AArch64/AArch64InstrFormats.td
===================================================================
--- lib/Target/AArch64/AArch64InstrFormats.td
+++ lib/Target/AArch64/AArch64InstrFormats.td
@@ -9989,10 +9989,9 @@
   let Inst{4-0}   = Rd;
 }
 
-//8.3 CompNum - Floating-point complex number support
 multiclass SIMDThreeSameVectorComplexHSD<bit U, bits<3> opcode, Operand rottype,
                                           string asm, SDPatternOperator OpNode>{
-  let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
+  let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in {
   def v4f16 : BaseSIMDThreeSameVectorComplex<0, U, 0b01, opcode, V64, rottype,
               asm, ".4h",
               [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
@@ -10008,7 +10007,7 @@
                                                (rottype i32:$rot)))]>;
   }
 
-  let Predicates = [HasComplxNum, HasNEON] in {
+  let Predicates = [HasV8_3a, HasNEON] in {
   def v2f32 : BaseSIMDThreeSameVectorComplex<0, U, 0b10, opcode, V64, rottype,
               asm, ".2s",
               [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
@@ -10064,7 +10063,7 @@
 multiclass SIMDThreeSameVectorTiedComplexHSD<bit U, bits<3> opcode,
                                              Operand rottype, string asm,
                                              SDPatternOperator OpNode> {
-  let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
+  let Predicates = [HasV8_3a, HasNEON, HasFullFP16] in {
   def v4f16 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b01, opcode, V64,
               rottype, asm, ".4h",
               [(set (v4f16 V64:$dst), (OpNode (v4f16 V64:$Rd),
@@ -10080,7 +10079,7 @@
                                                (rottype i32:$rot)))]>;
   }
 
-  let Predicates = [HasComplxNum, HasNEON] in {
+  let Predicates = [HasV8_3a, HasNEON] in {
   def v2f32 : BaseSIMDThreeSameVectorTiedComplex<0, U, 0b10, opcode, V64,
               rottype, asm, ".2s",
               [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd),
@@ -10146,7 +10145,7 @@
 // classes.
 multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
                                      string asm, SDPatternOperator OpNode> {
-  let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
+  let Predicates = [HasV8_3a,HasNEON,HasFullFP16] in {
   def v4f16_indexed : BaseSIMDIndexedTiedComplex<0, 1, 0, 0b01, opc1, opc2, V64,
                       V64, V128, VectorIndexD, rottype, asm, ".4h", ".4h",
                       ".4h", ".h", []> {
@@ -10162,9 +10161,9 @@
     let Inst{11} = idx{1};
     let Inst{21} = idx{0};
   }
-  } // Predicates = HasComplxNum, HasNEON, HasFullFP16]
+  } // Predicates = [HasV8_3a,HasNEON,HasFullFP16]
 
-  let Predicates = [HasComplxNum, HasNEON] in {
+  let Predicates = [HasV8_3a,HasNEON] in {
   def v4f32_indexed : BaseSIMDIndexedTiedComplex<1, 1, 0, 0b10, opc1, opc2,
                       V128, V128, V128, VectorIndexD, rottype, asm, ".4s",
                       ".4s", ".4s", ".s", []> {
@@ -10172,7 +10171,7 @@
     let Inst{11} = idx{0};
     let Inst{21} = 0;
   }
-  } // Predicates = [HasComplxNum, HasNEON]
+  } // Predicates = [HasV8_3a,HasNEON]
 }
 
 //----------------------------------------------------------------------------
Index: lib/Target/AArch64/AArch64InstrInfo.cpp
===================================================================
--- lib/Target/AArch64/AArch64InstrInfo.cpp
+++ lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2382,9 +2382,8 @@
   if (BaseOp1.getType() != BaseOp2.getType())
     return false;
 
-  assert(BaseOp1.isReg() ||
-         BaseOp1.isFI() &&
-             "Only base registers and frame indices are supported.");
+  assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
+         "Only base registers and frame indices are supported.");
 
   // Check for both base regs and base FI.
   if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
Index: lib/Target/AArch64/AArch64InstrInfo.td
===================================================================
--- lib/Target/AArch64/AArch64InstrInfo.td
+++ lib/Target/AArch64/AArch64InstrInfo.td
@@ -24,54 +24,6 @@
                                  AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
 def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
                                  AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
-def HasVH            : Predicate<"Subtarget->hasVH()">,
-                       AssemblerPredicate<"FeatureVH", "vh">;
-
-def HasLOR           : Predicate<"Subtarget->hasLOR()">,
-                       AssemblerPredicate<"FeatureLOR", "lor">;
-
-def HasPA            : Predicate<"Subtarget->hasPA()">,
-                       AssemblerPredicate<"FeaturePA", "pa">;
-
-def HasJS            : Predicate<"Subtarget->hasJS()">,
-                       AssemblerPredicate<"FeatureJS", "jsconv">;
-
-def HasCCIDX         : Predicate<"Subtarget->hasCCIDX()">,
-                       AssemblerPredicate<"FeatureCCIDX", "ccidx">;
-
-def HasComplxNum      : Predicate<"Subtarget->hasComplxNum()">,
-                       AssemblerPredicate<"FeatureComplxNum", "complxnum">;
-
-def HasNV            : Predicate<"Subtarget->hasNV()">,
-                       AssemblerPredicate<"FeatureNV", "nv">;
-
-def HasRASv8_4       : Predicate<"Subtarget->hasRASv8_4()">,
-                       AssemblerPredicate<"FeatureRASv8_4", "rasv8_4">;
-
-def HasMPAM          : Predicate<"Subtarget->hasMPAM()">,
-                       AssemblerPredicate<"FeatureMPAM", "mpam">;
-
-def HasDIT           : Predicate<"Subtarget->hasDIT()">,
-                       AssemblerPredicate<"FeatureDIT", "dit">;
-
-def HasTRACEV8_4         : Predicate<"Subtarget->hasTRACEV8_4()">,
-                       AssemblerPredicate<"FeatureTRACEV8_4", "tracev8.4">;
-
-def HasAM            : Predicate<"Subtarget->hasAM()">,
-                       AssemblerPredicate<"FeatureAM", "am">;
-
-def HasSEL2          : Predicate<"Subtarget->hasSEL2()">,
-                       AssemblerPredicate<"FeatureSEL2", "sel2">;
-
-def HasTLB_RMI          : Predicate<"Subtarget->hasTLB_RMI()">,
-                       AssemblerPredicate<"FeatureTLB_RMI", "tlb-rmi">;
-
-def HasFMI           : Predicate<"Subtarget->hasFMI()">,
-                       AssemblerPredicate<"FeatureFMI", "fmi">;
-
-def HasRCPC_IMMO      : Predicate<"Subtarget->hasRCPCImm()">,
-                       AssemblerPredicate<"FeatureRCPC_IMMO", "rcpc-immo">;
-
 def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
                                AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
 def HasNEON          : Predicate<"Subtarget->hasNEON()">,
@@ -558,7 +510,7 @@
 def TSB   : CRmSystemI<barrier_op, 0b010, "tsb", []> {
   let CRm        = 0b0010;
   let Inst{12}   = 0;
-  let Predicates = [HasTRACEV8_4];
+  let Predicates = [HasV8_4a];
 }
 }
 
@@ -650,7 +602,7 @@
 }
 
 // These pointer authentication isntructions require armv8.3a
-let Predicates = [HasPA] in {
+let Predicates = [HasV8_3a] in {
   multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm> {
     def IA   : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia")>;
     def IB   : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib")>;
@@ -690,17 +642,17 @@
   defm LDRAA  : AuthLoad<0, "ldraa", simm10Scaled>;
   defm LDRAB  : AuthLoad<1, "ldrab", simm10Scaled>;
 
-}
+  // v8.3a floating point conversion for javascript
+  let Predicates = [HasV8_3a, HasFPARMv8] in
+  def FJCVTZS  : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
+                                        "fjcvtzs", []> {
+    let Inst{31} = 0;
+  }
 
-// v8.3a floating point conversion for javascript
-let Predicates = [HasJS, HasFPARMv8] in
-def FJCVTZS  : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
-                                      "fjcvtzs", []> {
-  let Inst{31} = 0;
-} // HasJS, HasFPARMv8
+} // HasV8_3a
 
 // v8.4 Flag manipulation instructions
-let Predicates = [HasFMI] in {
+let Predicates = [HasV8_4a] in {
 def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
   let Inst{20-5} = 0b0000001000000000;
 }
@@ -708,7 +660,7 @@
 def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
 def RMIF   : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
                         "{\t$Rn, $imm, $mask}">;
-} // HasFMI
+} // HasV8_4a
 
 // v8.5 flag manipulation instructions
 let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
@@ -2677,9 +2629,8 @@
                          [(truncstorei8 GPR32z:$Rt,
                                   (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
 
-// Armv8.4 Weaker Release Consistency enhancements
-//         LDAPR & STLR with Immediate Offset instructions
-let Predicates = [HasRCPC_IMMO] in {
+// Armv8.4 LDAPR & STLR with Immediate Offset instruction
+let Predicates = [HasV8_4a] in {
 defm STLURB     : BaseStoreUnscaleV84<"stlurb",  0b00, 0b00, GPR32>;
 defm STLURH     : BaseStoreUnscaleV84<"stlurh",  0b01, 0b00, GPR32>;
 defm STLURW     : BaseStoreUnscaleV84<"stlur",   0b10, 0b00, GPR32>;
@@ -2964,7 +2915,7 @@
 def STXPW  : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
 def STXPX  : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
 
-let Predicates = [HasLOR] in {
+let Predicates = [HasV8_1a] in {
   // v8.1a "Limited Order Region" extension load-acquire instructions
   def LDLARW  : LoadAcquire   <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
   def LDLARX  : LoadAcquire   <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
Index: lib/Target/AArch64/AArch64Subtarget.h
===================================================================
--- lib/Target/AArch64/AArch64Subtarget.h
+++ lib/Target/AArch64/AArch64Subtarget.h
@@ -83,33 +83,6 @@
   bool HasFP16FML = false;
   bool HasSPE = false;
 
-  // ARMv8.1 extensions
-  bool HasVH = false;
-  bool HasPAN = false;
-  bool HasLOR = false;
-
-  // ARMv8.2 extensions
-  bool HasPsUAO = false;
-  bool HasPAN_RWV = false;
-  bool HasCCPP = false;
-
-  // ARMv8.3 extensions
-  bool HasPA = false;
-  bool HasJS = false;
-  bool HasCCIDX = false;
-  bool HasComplxNum = false;
-
-  // ARMv8.4 extensions
-  bool HasNV = false;
-  bool HasRASv8_4 = false;
-  bool HasMPAM = false;
-  bool HasDIT = false;
-  bool HasTRACEV8_4 = false;
-  bool HasAM = false;
-  bool HasSEL2 = false;
-  bool HasTLB_RMI = false;
-  bool HasFMI = false;
-  bool HasRCPC_IMMO = false;
   // ARMv8.4 Crypto extensions
   bool HasSM4 = true;
   bool HasSHA3 = true;
@@ -378,30 +351,6 @@
 
   bool useAA() const override { return UseAA; }
 
-  bool hasVH() const { return HasVH; }
-  bool hasPAN() const { return HasPAN; }
-  bool hasLOR() const { return HasLOR; }
-
-  bool hasPsUAO() const { return HasPsUAO; }
-  bool hasPAN_RWV() const { return HasPAN_RWV; }
-  bool hasCCPP() const { return HasCCPP; }
-
-  bool hasPA() const { return HasPA; }
-  bool hasJS() const { return HasJS; }
-  bool hasCCIDX() const { return HasCCIDX; }
-  bool hasComplxNum() const { return HasComplxNum; }
-
-  bool hasNV() const { return HasNV; }
-  bool hasRASv8_4() const { return HasRASv8_4; }
-  bool hasMPAM() const { return HasMPAM; }
-  bool hasDIT() const { return HasDIT; }
-  bool hasTRACEV8_4() const { return HasTRACEV8_4; }
-  bool hasAM() const { return HasAM; }
-  bool hasSEL2() const { return HasSEL2; }
-  bool hasTLB_RMI() const { return HasTLB_RMI; }
-  bool hasFMI() const { return HasFMI; }
-  bool hasRCPC_IMMO() const { return HasRCPC_IMMO; }
-
   bool useSmallAddressing() const {
     switch (TLInfo.getTargetMachine().getCodeModel()) {
       case CodeModel::Kernel:
Index: lib/Target/AArch64/AArch64SystemOperands.td
===================================================================
--- lib/Target/AArch64/AArch64SystemOperands.td
+++ lib/Target/AArch64/AArch64SystemOperands.td
@@ -14,25 +14,6 @@
 
 include "llvm/TableGen/SearchableTable.td"
 
-//===----------------------------------------------------------------------===//
-// Features that, for the compiler, only enable system operands and PStates
-//===----------------------------------------------------------------------===//
-
-def HasCCPP    : Predicate<"Subtarget->hasCCPP()">,
-                 AssemblerPredicate<"FeatureCCPP", "ccpp">;
-
-def HasPAN     : Predicate<"Subtarget->hasPAN()">,
-                 AssemblerPredicate<"FeaturePAN",
-                 "ARM v8.1  Privileged Access-Never extension">;
-
-def HasPsUAO   : Predicate<"Subtarget->hasPsUAO()">,
-                 AssemblerPredicate<"FeaturePsUAO",
-                 "ARM v8.2 UAO PState extension (psuao)">;
-
-def HasPAN_RWV : Predicate<"Subtarget->hasPAN_RWV()">,
-                 AssemblerPredicate<"FeaturePAN_RWV",
-                 "ARM v8.2 PAN AT S1E1R and AT S1E1W Variation">;
-
 //===----------------------------------------------------------------------===//
 // AT (address translate) instruction options.
 //===----------------------------------------------------------------------===//
@@ -64,7 +45,7 @@
 def : AT<"S12E0R", 0b100, 0b0111, 0b1000, 0b110>;
 def : AT<"S12E0W", 0b100, 0b0111, 0b1000, 0b111>;
 
-let Requires = [{ {AArch64::FeaturePAN_RWV} }] in {
+let Requires = [{ {AArch64::HasV8_2aOps} }] in {
 def : AT<"S1E1RP", 0b000, 0b0111, 0b1001, 0b000>;
 def : AT<"S1E1WP", 0b000, 0b0111, 0b1001, 0b001>;
 }
@@ -121,7 +102,7 @@
 def : DC<"CIVAC", 0b011, 0b0111, 0b1110, 0b001>;
 def : DC<"CISW",  0b000, 0b0111, 0b1110, 0b010>;
 
-let Requires = [{ {AArch64::FeatureCCPP} }] in
+let Requires = [{ {AArch64::HasV8_2aOps} }] in
 def : DC<"CVAP",  0b011, 0b0111, 0b1100, 0b001>;
 
 let Requires = [{ {AArch64::FeatureCacheDeepPersist} }] in
@@ -197,7 +178,7 @@
   bits<4> Encoding;
   let Encoding = encoding;
 
-  code Requires = [{ {AArch64::FeatureTRACEV8_4} }];
+  code Requires = [{ {AArch64::HasV8_4aOps} }];
 }
 
 def : TSB<"csync", 0>;
@@ -333,14 +314,13 @@
 def : PState<"DAIFSet", 0b11110>;
 def : PState<"DAIFClr", 0b11111>;
 // v8.1a "Privileged Access Never" extension-specific PStates
-let Requires = [{ {AArch64::FeaturePAN} }] in
+let Requires = [{ {AArch64::HasV8_1aOps} }] in
 def : PState<"PAN",     0b00100>;
-
 // v8.2a "User Access Override" extension-specific PStates
-let Requires = [{ {AArch64::FeaturePsUAO} }] in
+let Requires = [{ {AArch64::HasV8_2aOps} }] in
 def : PState<"UAO",     0b00011>;
 // v8.4a timining insensitivity of data processing instructions
-let Requires = [{ {AArch64::FeatureDIT} }] in
+let Requires = [{ {AArch64::HasV8_4aOps} }] in
 def : PState<"DIT",     0b11010>;
 // v8.5a Spectre Mitigation
 let Requires = [{ {AArch64::FeatureSSBS} }] in
@@ -433,9 +413,8 @@
 def : TLBI<"VMALLS12E1",   0b100, 0b1000, 0b0111, 0b110, 0>;
 def : TLBI<"VAALE1",       0b000, 0b1000, 0b0111, 0b111>;
 
-// Armv8.4-A Translation Lookaside Buffer Instructions (TLBI)
-let Requires = [{ {AArch64::FeatureTLB_RMI} }] in {
 // Armv8.4-A Outer Sharable TLB Maintenance instructions:
+let Requires = [{ {AArch64::HasV8_4aOps} }] in {
 //                         op1    CRn     CRm     op2
 def : TLBI<"VMALLE1OS",    0b000, 0b1000, 0b0001, 0b000, 0>;
 def : TLBI<"VAE1OS",       0b000, 0b1000, 0b0001, 0b001>;
@@ -486,7 +465,7 @@
 def : TLBI<"RVALE3IS",     0b110, 0b1000, 0b0010, 0b101>;
 def : TLBI<"RVAE3OS",      0b110, 0b1000, 0b0101, 0b001>;
 def : TLBI<"RVALE3OS",     0b110, 0b1000, 0b0101, 0b101>;
-} //FeatureTLB_RMI
+}
 
 // Armv8.5-A Prediction Restriction by Context instruction options:
 class PRCTX<string name, bits<4> crm> : SearchableTable {
@@ -561,10 +540,8 @@
 def : ROSysReg<"PMCEID1_EL0",        0b11, 0b011, 0b1001, 0b1100, 0b111>;
 def : ROSysReg<"MIDR_EL1",           0b11, 0b000, 0b0000, 0b0000, 0b000>;
 def : ROSysReg<"CCSIDR_EL1",         0b11, 0b001, 0b0000, 0b0000, 0b000>;
-
-//v8.3 CCIDX - extending the CCsIDr number of sets
 def : ROSysReg<"CCSIDR2_EL1",        0b11, 0b001, 0b0000, 0b0000, 0b010> {
-  let Requires = [{ {AArch64::FeatureCCIDX} }];
+  let Requires = [{ {AArch64::HasV8_3aOps} }];
 }
 def : ROSysReg<"CLIDR_EL1",          0b11, 0b001, 0b0000, 0b0000, 0b001>;
 def : ROSysReg<"CTR_EL0",            0b11, 0b011, 0b0000, 0b0000, 0b001>;
@@ -602,7 +579,9 @@
 def : ROSysReg<"ID_AA64ISAR1_EL1",    0b11, 0b000, 0b0000, 0b0110, 0b001>;
 def : ROSysReg<"ID_AA64MMFR0_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b000>;
 def : ROSysReg<"ID_AA64MMFR1_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b001>;
-def : ROSysReg<"ID_AA64MMFR2_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b010>;
+def : ROSysReg<"ID_AA64MMFR2_EL1",    0b11, 0b000, 0b0000, 0b0111, 0b010> {
+  let Requires = [{ {AArch64::HasV8_2aOps} }];
+}
 def : ROSysReg<"MVFR0_EL1",          0b11, 0b000, 0b0000, 0b0011, 0b000>;
 def : ROSysReg<"MVFR1_EL1",          0b11, 0b000, 0b0000, 0b0011, 0b001>;
 def : ROSysReg<"MVFR2_EL1",          0b11, 0b000, 0b0000, 0b0011, 0b010>;
@@ -672,7 +651,7 @@
 
 // v8.1a "Limited Ordering Regions" extension-specific system register
 //                         Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::FeatureLOR} }] in
+let Requires = [{ {AArch64::HasV8_1aOps} }] in
 def : ROSysReg<"LORID_EL1",  0b11, 0b000, 0b1010, 0b0100, 0b111>;
 
 // v8.2a "RAS extension" registers
@@ -1206,21 +1185,21 @@
 def : RWSysReg<"ICH_LR15_EL2",       0b11, 0b100, 0b1100, 0b1101, 0b111>;
 
 // v8.1a "Privileged Access Never" extension-specific system registers
-let Requires = [{ {AArch64::FeaturePAN} }] in
+let Requires = [{ {AArch64::HasV8_1aOps} }] in
 def : RWSysReg<"PAN", 0b11, 0b000, 0b0100, 0b0010, 0b011>;
 
 // v8.1a "Limited Ordering Regions" extension-specific system registers
 //                         Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::FeatureLOR} }] in {
+let Requires = [{ {AArch64::HasV8_1aOps} }] in {
 def : RWSysReg<"LORSA_EL1",  0b11, 0b000, 0b1010, 0b0100, 0b000>;
 def : RWSysReg<"LOREA_EL1",  0b11, 0b000, 0b1010, 0b0100, 0b001>;
 def : RWSysReg<"LORN_EL1",   0b11, 0b000, 0b1010, 0b0100, 0b010>;
 def : RWSysReg<"LORC_EL1",   0b11, 0b000, 0b1010, 0b0100, 0b011>;
 }
 
-// v8.1a "Virtualization Host extensions" system registers
+// v8.1a "Virtualization hos extensions" system registers
 //                              Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::FeatureVH} }] in {
+let Requires = [{ {AArch64::HasV8_1aOps} }] in {
 def : RWSysReg<"TTBR1_EL2",       0b11, 0b100, 0b0010, 0b0000, 0b001>;
 def : RWSysReg<"CONTEXTIDR_EL2",  0b11, 0b100, 0b1101, 0b0000, 0b001>;
 def : RWSysReg<"CNTHV_TVAL_EL2",  0b11, 0b100, 0b1110, 0b0011, 0b000>;
@@ -1251,7 +1230,7 @@
 }
 // v8.2a registers
 //                  Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::FeaturePsUAO} }] in
+let Requires = [{ {AArch64::HasV8_2aOps} }] in
 def : RWSysReg<"UAO", 0b11, 0b000, 0b0100, 0b0010, 0b100>;
 
 // v8.2a "Statistical Profiling extension" registers
@@ -1288,7 +1267,7 @@
 
 // v8.3a "Pointer authentication extension" registers
 //                              Op0    Op1     CRn     CRm    Op2
-let Requires = [{ {AArch64::FeaturePA} }] in {
+let Requires = [{ {AArch64::HasV8_3aOps} }] in {
 def : RWSysReg<"APIAKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b000>;
 def : RWSysReg<"APIAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b001>;
 def : RWSysReg<"APIBKeyLo_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b010>;
@@ -1301,8 +1280,8 @@
 def : RWSysReg<"APGAKeyHi_EL1", 0b11, 0b000, 0b0010, 0b0011, 0b001>;
 }
 
-// v8.4 "Secure Exception Level 2 extension"
-let Requires = [{ {AArch64::FeatureSEL2} }] in {
+let Requires = [{ {AArch64::HasV8_4aOps} }] in {
+
 // v8.4a "Virtualization secure second stage translation" registers
 //                           Op0   Op1    CRn     CRm     Op2
 def : RWSysReg<"VSTCR_EL2" , 0b11, 0b100, 0b0010, 0b0110, 0b010>;
@@ -1320,22 +1299,18 @@
 // v8.4a "Virtualization debug state" registers
 //                           Op0   Op1    CRn     CRm     Op2
 def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>;
-} // FeatureSEL2
 
 // v8.4a RAS registers
-//                              Op0   Op1    CRn     CRm     Op2
-let Requires = [{ {AArch64::FeatureRASv8_4} }] in {
+//                              Op0   Op1    CRn     CRm    Op2
 def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>;
 def : RWSysReg<"ERXPFGCDN_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b110>;
 def : RWSysReg<"ERXTS_EL1",     0b11, 0b000, 0b0101, 0b0101, 0b111>;
 def : RWSysReg<"ERXMISC2_EL1",  0b11, 0b000, 0b0101, 0b0101, 0b010>;
 def : RWSysReg<"ERXMISC3_EL1",  0b11, 0b000, 0b0101, 0b0101, 0b011>;
 def : ROSysReg<"ERXPFGF_EL1",   0b11, 0b000, 0b0101, 0b0100, 0b100>;
-} // FeatureRASv8_4
 
 // v8.4a MPAM registers
 //                             Op0   Op1    CRn     CRm     Op2
-let Requires = [{ {AArch64::FeatureMPAM} }] in {
 def : RWSysReg<"MPAM0_EL1",    0b11, 0b000, 0b1010, 0b0101, 0b001>;
 def : RWSysReg<"MPAM1_EL1",    0b11, 0b000, 0b1010, 0b0101, 0b000>;
 def : RWSysReg<"MPAM2_EL2",    0b11, 0b100, 0b1010, 0b0101, 0b000>;
@@ -1352,11 +1327,9 @@
 def : RWSysReg<"MPAMVPM6_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b110>;
 def : RWSysReg<"MPAMVPM7_EL2", 0b11, 0b100, 0b1010, 0b0110, 0b111>;
 def : ROSysReg<"MPAMIDR_EL1",  0b11, 0b000, 0b1010, 0b0100, 0b100>;
-} //FeatureMPAM
 
-// v8.4a Activitiy Monitor registers
+// v8.4a Activitiy monitor registers
 //                                 Op0   Op1    CRn     CRm     Op2
-let Requires = [{ {AArch64::FeatureAM} }] in {
 def : RWSysReg<"AMCR_EL0",         0b11, 0b011, 0b1101, 0b0010, 0b000>;
 def : ROSysReg<"AMCFGR_EL0",       0b11, 0b011, 0b1101, 0b0010, 0b001>;
 def : ROSysReg<"AMCGCR_EL0",       0b11, 0b011, 0b1101, 0b0010, 0b010>;
@@ -1405,7 +1378,6 @@
 def : RWSysReg<"AMEVTYPER113_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b101>;
 def : RWSysReg<"AMEVTYPER114_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b110>;
 def : RWSysReg<"AMEVTYPER115_EL0", 0b11, 0b011, 0b1101, 0b1111, 0b111>;
-} //FeatureAM
 
 // v8.4a Trace Extension registers
 //
@@ -1414,24 +1386,19 @@
 // but they are already defined above.
 //
 //                                 Op0   Op1    CRn     CRm     Op2
-let Requires = [{ {AArch64::FeatureTRACEV8_4} }] in {
 def : RWSysReg<"TRFCR_EL1",        0b11, 0b000, 0b0001, 0b0010, 0b001>;
 def : RWSysReg<"TRFCR_EL2",        0b11, 0b100, 0b0001, 0b0010, 0b001>;
 def : RWSysReg<"TRFCR_EL12",       0b11, 0b101, 0b0001, 0b0010, 0b001>;
-} //FeatureTRACEV8_4
 
 // v8.4a Timining insensitivity of data processing instructions
-// DIT: Data Independent Timing instructions
 //                                 Op0   Op1    CRn     CRm     Op2
-let Requires = [{ {AArch64::FeatureDIT} }] in {
 def : RWSysReg<"DIT",              0b11, 0b011, 0b0100, 0b0010, 0b101>;
-} //FeatureDIT
 
 // v8.4a Enhanced Support for Nested Virtualization
 //                                 Op0   Op1    CRn     CRm     Op2
-let Requires = [{ {AArch64::FeatureNV} }] in {
 def : RWSysReg<"VNCR_EL2",         0b11, 0b100, 0b0010, 0b0010, 0b000>;
-} //FeatureNV
+
+} // HasV8_4aOps
 
 // SVE control registers
 //                                 Op0   Op1    CRn     CRm     Op2
Index: lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
===================================================================
--- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -2813,29 +2813,28 @@
   const char *Name;
   const FeatureBitset Features;
 } ExtensionMap[] = {
-    {"crc", {AArch64::FeatureCRC}},
-    {"sm4", {AArch64::FeatureSM4}},
-    {"sha3", {AArch64::FeatureSHA3}},
-    {"sha2", {AArch64::FeatureSHA2}},
-    {"aes", {AArch64::FeatureAES}},
-    {"crypto", {AArch64::FeatureCrypto}},
-    {"fp", {AArch64::FeatureFPARMv8}},
-    {"simd", {AArch64::FeatureNEON}},
-    {"ras", {AArch64::FeatureRAS}},
-    {"lse", {AArch64::FeatureLSE}},
-    {"predctrl", {AArch64::FeaturePredCtrl}},
-    {"ccdp", {AArch64::FeatureCacheDeepPersist}},
-    {"mte", {AArch64::FeatureMTE}},
-    {"tlb-rmi", {AArch64::FeatureTLB_RMI}},
-    {"pan-rwv", {AArch64::FeaturePAN_RWV}},
-    {"ccpp", {AArch64::FeatureCCPP}},
-    // FIXME: Unsupported extensions
-    {"pan", {}},
-    {"lor", {}},
-    {"rdma", {}},
-    {"profile", {}},
+  { "crc",  {AArch64::FeatureCRC} },
+  { "sm4",  {AArch64::FeatureSM4} },
+  { "sha3", {AArch64::FeatureSHA3} },
+  { "sha2", {AArch64::FeatureSHA2} },
+  { "aes",  {AArch64::FeatureAES} },
+  { "crypto", {AArch64::FeatureCrypto} },
+  { "fp", {AArch64::FeatureFPARMv8} },
+  { "simd", {AArch64::FeatureNEON} },
+  { "ras", {AArch64::FeatureRAS} },
+  { "lse", {AArch64::FeatureLSE} },
+  { "predctrl", {AArch64::FeaturePredCtrl} },
+  { "ccdp", {AArch64::FeatureCacheDeepPersist} },
+  { "mte", {AArch64::FeatureMTE} },
+
+  // FIXME: Unsupported extensions
+  { "pan", {} },
+  { "lor", {} },
+  { "rdma", {} },
+  { "profile", {} },
 };
 
+
 static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
   if (FBS[AArch64::HasV8_1aOps])
     Str += "ARMv8.1a";
Index: lib/Target/Hexagon/HexagonDepDecoders.h
===================================================================
--- lib/Target/Hexagon/HexagonDepDecoders.h
+++ lib/Target/Hexagon/HexagonDepDecoders.h
@@ -11,8 +11,10 @@
 
 // clang-format off
 
+#if defined(__clang__)
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wunused-function"
+#endif
 
 static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
     uint64_t, const void *Decoder) {
@@ -69,5 +71,9 @@
   signedDecoder<6>(MI, tmp, Decoder);
   return MCDisassembler::Success;
 }
+
+#if defined(__clang__)
 #pragma clang diagnostic pop
+#endif
+
 // clang-format on
Index: lib/Target/X86/X86ISelLowering.h
===================================================================
--- lib/Target/X86/X86ISelLowering.h
+++ lib/Target/X86/X86ISelLowering.h
@@ -1047,6 +1047,9 @@
 
     bool decomposeMulByConstant(EVT VT, SDValue C) const override;
 
+    bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
+                                  bool IsSigned) const override;
+
     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
     /// with this index.
     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -4812,6 +4812,12 @@
          (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
 }
 
+bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
+                                                 bool IsSigned) const {
+  // f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available.
+  return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov();
+}
+
 bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
                                                 unsigned Index) const {
   if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
@@ -24086,8 +24092,6 @@
   unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false);
   unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true);
 
-  Amt = peekThroughEXTRACT_SUBVECTORs(Amt);
-
   if (SDValue BaseShAmt = GetSplatValue(Amt, dl, DAG)) {
     if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
       MVT EltVT = VT.getVectorElementType();
Index: lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- lib/Target/X86/X86TargetTransformInfo.cpp
+++ lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1886,7 +1886,7 @@
     { ISD::FSQRT,      MVT::v4f32,  56 }, // Pentium III from http://www.agner.org/
   };
   static const CostTblEntry X64CostTbl[] = { // 64-bit targets
-    { ISD::BITREVERSE, MVT::i64,    14 } 
+    { ISD::BITREVERSE, MVT::i64,    14 }
   };
   static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
     { ISD::BITREVERSE, MVT::i32,    14 },
@@ -2899,6 +2899,8 @@
     Options.LoadSizes.push_back(4);
     Options.LoadSizes.push_back(2);
     Options.LoadSizes.push_back(1);
+    // All GPR loads can be unaligned, and vector loads too starting form SSE2.
+    Options.AllowOverlappingLoads = true;
     return Options;
   }();
   return IsZeroCmp ? &EqZeroOptions : &ThreeWayOptions;
Index: test/CodeGen/X86/fp-cvt.ll
===================================================================
--- test/CodeGen/X86/fp-cvt.ll
+++ test/CodeGen/X86/fp-cvt.ll
@@ -483,29 +483,20 @@
 ; X64-X87-NEXT:    flds {{.*}}(%rip)
 ; X64-X87-NEXT:    fld %st(1)
 ; X64-X87-NEXT:    fsub %st(1)
+; X64-X87-NEXT:    xorl %eax, %eax
+; X64-X87-NEXT:    fxch %st(1)
+; X64-X87-NEXT:    fucompi %st(2)
+; X64-X87-NEXT:    fcmovnbe %st(1), %st(0)
+; X64-X87-NEXT:    fstp %st(1)
 ; X64-X87-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
-; X64-X87-NEXT:    movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
-; X64-X87-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    fistpll -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
+; X64-X87-NEXT:    movzwl -{{[0-9]+}}(%rsp), %ecx
 ; X64-X87-NEXT:    movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
 ; X64-X87-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    fld %st(1)
+; X64-X87-NEXT:    movw %cx, -{{[0-9]+}}(%rsp)
 ; X64-X87-NEXT:    fistpll -{{[0-9]+}}(%rsp)
 ; X64-X87-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    fucompi %st(1)
-; X64-X87-NEXT:    fstp %st(0)
-; X64-X87-NEXT:    jbe .LBB10_1
-; X64-X87-NEXT:  # %bb.2:
-; X64-X87-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
-; X64-X87-NEXT:    retq
-; X64-X87-NEXT:  .LBB10_1:
-; X64-X87-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; X64-X87-NEXT:    setbe %al
+; X64-X87-NEXT:    shlq $63, %rax
 ; X64-X87-NEXT:    xorq -{{[0-9]+}}(%rsp), %rax
 ; X64-X87-NEXT:    retq
 ;
@@ -515,17 +506,14 @@
 ; X64-SSSE3-NEXT:    flds {{.*}}(%rip)
 ; X64-SSSE3-NEXT:    fld %st(1)
 ; X64-SSSE3-NEXT:    fsub %st(1)
+; X64-SSSE3-NEXT:    xorl %eax, %eax
+; X64-SSSE3-NEXT:    fxch %st(1)
+; X64-SSSE3-NEXT:    fucompi %st(2)
+; X64-SSSE3-NEXT:    fcmovnbe %st(1), %st(0)
+; X64-SSSE3-NEXT:    fstp %st(1)
 ; X64-SSSE3-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
-; X64-SSSE3-NEXT:    fld %st(1)
-; X64-SSSE3-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
-; X64-SSSE3-NEXT:    fucompi %st(1)
-; X64-SSSE3-NEXT:    fstp %st(0)
-; X64-SSSE3-NEXT:    jbe .LBB10_1
-; X64-SSSE3-NEXT:  # %bb.2:
-; X64-SSSE3-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
-; X64-SSSE3-NEXT:    retq
-; X64-SSSE3-NEXT:  .LBB10_1:
-; X64-SSSE3-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; X64-SSSE3-NEXT:    setbe %al
+; X64-SSSE3-NEXT:    shlq $63, %rax
 ; X64-SSSE3-NEXT:    xorq -{{[0-9]+}}(%rsp), %rax
 ; X64-SSSE3-NEXT:    retq
   %1 = fptoui x86_fp80 %a0 to i64
@@ -577,29 +565,20 @@
 ; X64-X87-NEXT:    flds {{.*}}(%rip)
 ; X64-X87-NEXT:    fld %st(1)
 ; X64-X87-NEXT:    fsub %st(1)
+; X64-X87-NEXT:    xorl %eax, %eax
+; X64-X87-NEXT:    fxch %st(1)
+; X64-X87-NEXT:    fucompi %st(2)
+; X64-X87-NEXT:    fcmovnbe %st(1), %st(0)
+; X64-X87-NEXT:    fstp %st(1)
 ; X64-X87-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
-; X64-X87-NEXT:    movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
-; X64-X87-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    fistpll -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
+; X64-X87-NEXT:    movzwl -{{[0-9]+}}(%rsp), %ecx
 ; X64-X87-NEXT:    movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
 ; X64-X87-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    fld %st(1)
+; X64-X87-NEXT:    movw %cx, -{{[0-9]+}}(%rsp)
 ; X64-X87-NEXT:    fistpll -{{[0-9]+}}(%rsp)
 ; X64-X87-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; X64-X87-NEXT:    fucompi %st(1)
-; X64-X87-NEXT:    fstp %st(0)
-; X64-X87-NEXT:    jbe .LBB11_1
-; X64-X87-NEXT:  # %bb.2:
-; X64-X87-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
-; X64-X87-NEXT:    retq
-; X64-X87-NEXT:  .LBB11_1:
-; X64-X87-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; X64-X87-NEXT:    setbe %al
+; X64-X87-NEXT:    shlq $63, %rax
 ; X64-X87-NEXT:    xorq -{{[0-9]+}}(%rsp), %rax
 ; X64-X87-NEXT:    retq
 ;
@@ -609,17 +588,14 @@
 ; X64-SSSE3-NEXT:    flds {{.*}}(%rip)
 ; X64-SSSE3-NEXT:    fld %st(1)
 ; X64-SSSE3-NEXT:    fsub %st(1)
+; X64-SSSE3-NEXT:    xorl %eax, %eax
+; X64-SSSE3-NEXT:    fxch %st(1)
+; X64-SSSE3-NEXT:    fucompi %st(2)
+; X64-SSSE3-NEXT:    fcmovnbe %st(1), %st(0)
+; X64-SSSE3-NEXT:    fstp %st(1)
 ; X64-SSSE3-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
-; X64-SSSE3-NEXT:    fld %st(1)
-; X64-SSSE3-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
-; X64-SSSE3-NEXT:    fucompi %st(1)
-; X64-SSSE3-NEXT:    fstp %st(0)
-; X64-SSSE3-NEXT:    jbe .LBB11_1
-; X64-SSSE3-NEXT:  # %bb.2:
-; X64-SSSE3-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
-; X64-SSSE3-NEXT:    retq
-; X64-SSSE3-NEXT:  .LBB11_1:
-; X64-SSSE3-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; X64-SSSE3-NEXT:    setbe %al
+; X64-SSSE3-NEXT:    shlq $63, %rax
 ; X64-SSSE3-NEXT:    xorq -{{[0-9]+}}(%rsp), %rax
 ; X64-SSSE3-NEXT:    retq
   %1 = load x86_fp80, x86_fp80 *%a0
Index: test/CodeGen/X86/memcmp-optsize.ll
===================================================================
--- test/CodeGen/X86/memcmp-optsize.ll
+++ test/CodeGen/X86/memcmp-optsize.ll
@@ -639,17 +639,33 @@
 }
 
 define i1 @length24_eq(i8* %x, i8* %y) nounwind optsize {
-; X86-LABEL: length24_eq:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl $0
-; X86-NEXT:    pushl $24
-; X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; X86-NEXT:    calll memcmp
-; X86-NEXT:    addl $16, %esp
-; X86-NEXT:    testl %eax, %eax
-; X86-NEXT:    sete %al
-; X86-NEXT:    retl
+; X86-NOSSE-LABEL: length24_eq:
+; X86-NOSSE:       # %bb.0:
+; X86-NOSSE-NEXT:    pushl $0
+; X86-NOSSE-NEXT:    pushl $24
+; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    calll memcmp
+; X86-NOSSE-NEXT:    addl $16, %esp
+; X86-NOSSE-NEXT:    testl %eax, %eax
+; X86-NOSSE-NEXT:    sete %al
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE2-LABEL: length24_eq:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movdqu (%ecx), %xmm0
+; X86-SSE2-NEXT:    movdqu 8(%ecx), %xmm1
+; X86-SSE2-NEXT:    movdqu (%eax), %xmm2
+; X86-SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
+; X86-SSE2-NEXT:    movdqu 8(%eax), %xmm0
+; X86-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
+; X86-SSE2-NEXT:    pand %xmm2, %xmm0
+; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
+; X86-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
+; X86-SSE2-NEXT:    sete %al
+; X86-SSE2-NEXT:    retl
 ;
 ; X64-SSE2-LABEL: length24_eq:
 ; X64-SSE2:       # %bb.0:
@@ -683,17 +699,30 @@
 }
 
 define i1 @length24_eq_const(i8* %X) nounwind optsize {
-; X86-LABEL: length24_eq_const:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl $0
-; X86-NEXT:    pushl $24
-; X86-NEXT:    pushl $.L.str
-; X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; X86-NEXT:    calll memcmp
-; X86-NEXT:    addl $16, %esp
-; X86-NEXT:    testl %eax, %eax
-; X86-NEXT:    setne %al
-; X86-NEXT:    retl
+; X86-NOSSE-LABEL: length24_eq_const:
+; X86-NOSSE:       # %bb.0:
+; X86-NOSSE-NEXT:    pushl $0
+; X86-NOSSE-NEXT:    pushl $24
+; X86-NOSSE-NEXT:    pushl $.L.str
+; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    calll memcmp
+; X86-NOSSE-NEXT:    addl $16, %esp
+; X86-NOSSE-NEXT:    testl %eax, %eax
+; X86-NOSSE-NEXT:    setne %al
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE2-LABEL: length24_eq_const:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movdqu (%eax), %xmm0
+; X86-SSE2-NEXT:    movdqu 8(%eax), %xmm1
+; X86-SSE2-NEXT:    pcmpeqb {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT:    pcmpeqb {{\.LCPI.*}}, %xmm0
+; X86-SSE2-NEXT:    pand %xmm1, %xmm0
+; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
+; X86-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
+; X86-SSE2-NEXT:    setne %al
+; X86-SSE2-NEXT:    retl
 ;
 ; X64-SSE2-LABEL: length24_eq_const:
 ; X64-SSE2:       # %bb.0:
Index: test/CodeGen/X86/memcmp.ll
===================================================================
--- test/CodeGen/X86/memcmp.ll
+++ test/CodeGen/X86/memcmp.ll
@@ -359,6 +359,33 @@
   ret i1 %c
 }
 
+define i1 @length7_eq(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length7_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %edx
+; X86-NEXT:    movl 3(%ecx), %ecx
+; X86-NEXT:    xorl (%eax), %edx
+; X86-NEXT:    xorl 3(%eax), %ecx
+; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length7_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    movl 3(%rdi), %ecx
+; X64-NEXT:    xorl (%rsi), %eax
+; X64-NEXT:    xorl 3(%rsi), %ecx
+; X64-NEXT:    orl %eax, %ecx
+; X64-NEXT:    setne %al
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 7) nounwind
+  %c = icmp ne i32 %m, 0
+  ret i1 %c
+}
+
 define i32 @length8(i8* %X, i8* %Y) nounwind {
 ; X86-LABEL: length8:
 ; X86:       # %bb.0:
@@ -370,7 +397,7 @@
 ; X86-NEXT:    bswapl %ecx
 ; X86-NEXT:    bswapl %edx
 ; X86-NEXT:    cmpl %edx, %ecx
-; X86-NEXT:    jne .LBB13_2
+; X86-NEXT:    jne .LBB14_2
 ; X86-NEXT:  # %bb.1: # %loadbb1
 ; X86-NEXT:    movl 4(%esi), %ecx
 ; X86-NEXT:    movl 4(%eax), %edx
@@ -378,13 +405,13 @@
 ; X86-NEXT:    bswapl %edx
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    cmpl %edx, %ecx
-; X86-NEXT:    je .LBB13_3
-; X86-NEXT:  .LBB13_2: # %res_block
+; X86-NEXT:    je .LBB14_3
+; X86-NEXT:  .LBB14_2: # %res_block
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    cmpl %edx, %ecx
 ; X86-NEXT:    setae %al
 ; X86-NEXT:    leal -1(%eax,%eax), %eax
-; X86-NEXT:  .LBB13_3: # %endblock
+; X86-NEXT:  .LBB14_3: # %endblock
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
@@ -450,6 +477,89 @@
   ret i1 %c
 }
 
+define i1 @length9_eq(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length9_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $9
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length9_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    xorq (%rsi), %rax
+; X64-NEXT:    movb 8(%rdi), %cl
+; X64-NEXT:    xorb 8(%rsi), %cl
+; X64-NEXT:    movzbl %cl, %ecx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 9) nounwind
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
+define i1 @length10_eq(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length10_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $10
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length10_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    xorq (%rsi), %rax
+; X64-NEXT:    movzwl 8(%rdi), %ecx
+; X64-NEXT:    xorw 8(%rsi), %cx
+; X64-NEXT:    movzwl %cx, %ecx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 10) nounwind
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
+define i1 @length11_eq(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length11_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $11
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length11_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    movq 3(%rdi), %rcx
+; X64-NEXT:    xorq (%rsi), %rax
+; X64-NEXT:    xorq 3(%rsi), %rcx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 11) nounwind
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
 define i1 @length12_eq(i8* %X, i8* %Y) nounwind {
 ; X86-LABEL: length12_eq:
 ; X86:       # %bb.0:
@@ -495,7 +605,7 @@
 ; X64-NEXT:    bswapq %rcx
 ; X64-NEXT:    bswapq %rdx
 ; X64-NEXT:    cmpq %rdx, %rcx
-; X64-NEXT:    jne .LBB17_2
+; X64-NEXT:    jne .LBB21_2
 ; X64-NEXT:  # %bb.1: # %loadbb1
 ; X64-NEXT:    movl 8(%rdi), %ecx
 ; X64-NEXT:    movl 8(%rsi), %edx
@@ -503,18 +613,99 @@
 ; X64-NEXT:    bswapl %edx
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpq %rdx, %rcx
-; X64-NEXT:    je .LBB17_3
-; X64-NEXT:  .LBB17_2: # %res_block
+; X64-NEXT:    je .LBB21_3
+; X64-NEXT:  .LBB21_2: # %res_block
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpq %rdx, %rcx
 ; X64-NEXT:    setae %al
 ; X64-NEXT:    leal -1(%rax,%rax), %eax
-; X64-NEXT:  .LBB17_3: # %endblock
+; X64-NEXT:  .LBB21_3: # %endblock
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
   ret i32 %m
 }
 
+define i1 @length13_eq(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length13_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $13
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length13_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    movq 5(%rdi), %rcx
+; X64-NEXT:    xorq (%rsi), %rax
+; X64-NEXT:    xorq 5(%rsi), %rcx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 13) nounwind
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
+define i1 @length14_eq(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length14_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $14
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length14_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    movq 6(%rdi), %rcx
+; X64-NEXT:    xorq (%rsi), %rax
+; X64-NEXT:    xorq 6(%rsi), %rcx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 14) nounwind
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
+define i1 @length15_eq(i8* %X, i8* %Y) nounwind {
+; X86-LABEL: length15_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $15
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll memcmp
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: length15_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    movq 7(%rdi), %rcx
+; X64-NEXT:    xorq (%rsi), %rax
+; X64-NEXT:    xorq 7(%rsi), %rcx
+; X64-NEXT:    orq %rax, %rcx
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 15) nounwind
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
 ; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
 
 define i32 @length16(i8* %X, i8* %Y) nounwind {
@@ -535,7 +726,7 @@
 ; X64-NEXT:    bswapq %rcx
 ; X64-NEXT:    bswapq %rdx
 ; X64-NEXT:    cmpq %rdx, %rcx
-; X64-NEXT:    jne .LBB18_2
+; X64-NEXT:    jne .LBB25_2
 ; X64-NEXT:  # %bb.1: # %loadbb1
 ; X64-NEXT:    movq 8(%rdi), %rcx
 ; X64-NEXT:    movq 8(%rsi), %rdx
@@ -543,13 +734,13 @@
 ; X64-NEXT:    bswapq %rdx
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpq %rdx, %rcx
-; X64-NEXT:    je .LBB18_3
-; X64-NEXT:  .LBB18_2: # %res_block
+; X64-NEXT:    je .LBB25_3
+; X64-NEXT:  .LBB25_2: # %res_block
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpq %rdx, %rcx
 ; X64-NEXT:    setae %al
 ; X64-NEXT:    leal -1(%rax,%rax), %eax
-; X64-NEXT:  .LBB18_3: # %endblock
+; X64-NEXT:  .LBB25_3: # %endblock
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
   ret i32 %m
@@ -694,17 +885,45 @@
 }
 
 define i1 @length24_eq(i8* %x, i8* %y) nounwind {
-; X86-LABEL: length24_eq:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl $0
-; X86-NEXT:    pushl $24
-; X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; X86-NEXT:    calll memcmp
-; X86-NEXT:    addl $16, %esp
-; X86-NEXT:    testl %eax, %eax
-; X86-NEXT:    sete %al
-; X86-NEXT:    retl
+; X86-NOSSE-LABEL: length24_eq:
+; X86-NOSSE:       # %bb.0:
+; X86-NOSSE-NEXT:    pushl $0
+; X86-NOSSE-NEXT:    pushl $24
+; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    calll memcmp
+; X86-NOSSE-NEXT:    addl $16, %esp
+; X86-NOSSE-NEXT:    testl %eax, %eax
+; X86-NOSSE-NEXT:    sete %al
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE1-LABEL: length24_eq:
+; X86-SSE1:       # %bb.0:
+; X86-SSE1-NEXT:    pushl $0
+; X86-SSE1-NEXT:    pushl $24
+; X86-SSE1-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    calll memcmp
+; X86-SSE1-NEXT:    addl $16, %esp
+; X86-SSE1-NEXT:    testl %eax, %eax
+; X86-SSE1-NEXT:    sete %al
+; X86-SSE1-NEXT:    retl
+;
+; X86-SSE2-LABEL: length24_eq:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE2-NEXT:    movdqu (%ecx), %xmm0
+; X86-SSE2-NEXT:    movdqu 8(%ecx), %xmm1
+; X86-SSE2-NEXT:    movdqu (%eax), %xmm2
+; X86-SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
+; X86-SSE2-NEXT:    movdqu 8(%eax), %xmm0
+; X86-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
+; X86-SSE2-NEXT:    pand %xmm2, %xmm0
+; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
+; X86-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
+; X86-SSE2-NEXT:    sete %al
+; X86-SSE2-NEXT:    retl
 ;
 ; X64-SSE2-LABEL: length24_eq:
 ; X64-SSE2:       # %bb.0:
@@ -738,17 +957,42 @@
 }
 
 define i1 @length24_eq_const(i8* %X) nounwind {
-; X86-LABEL: length24_eq_const:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl $0
-; X86-NEXT:    pushl $24
-; X86-NEXT:    pushl $.L.str
-; X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; X86-NEXT:    calll memcmp
-; X86-NEXT:    addl $16, %esp
-; X86-NEXT:    testl %eax, %eax
-; X86-NEXT:    setne %al
-; X86-NEXT:    retl
+; X86-NOSSE-LABEL: length24_eq_const:
+; X86-NOSSE:       # %bb.0:
+; X86-NOSSE-NEXT:    pushl $0
+; X86-NOSSE-NEXT:    pushl $24
+; X86-NOSSE-NEXT:    pushl $.L.str
+; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NOSSE-NEXT:    calll memcmp
+; X86-NOSSE-NEXT:    addl $16, %esp
+; X86-NOSSE-NEXT:    testl %eax, %eax
+; X86-NOSSE-NEXT:    setne %al
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE1-LABEL: length24_eq_const:
+; X86-SSE1:       # %bb.0:
+; X86-SSE1-NEXT:    pushl $0
+; X86-SSE1-NEXT:    pushl $24
+; X86-SSE1-NEXT:    pushl $.L.str
+; X86-SSE1-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    calll memcmp
+; X86-SSE1-NEXT:    addl $16, %esp
+; X86-SSE1-NEXT:    testl %eax, %eax
+; X86-SSE1-NEXT:    setne %al
+; X86-SSE1-NEXT:    retl
+;
+; X86-SSE2-LABEL: length24_eq_const:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movdqu (%eax), %xmm0
+; X86-SSE2-NEXT:    movdqu 8(%eax), %xmm1
+; X86-SSE2-NEXT:    pcmpeqb {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT:    pcmpeqb {{\.LCPI.*}}, %xmm0
+; X86-SSE2-NEXT:    pand %xmm1, %xmm0
+; X86-SSE2-NEXT:    pmovmskb %xmm0, %eax
+; X86-SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
+; X86-SSE2-NEXT:    setne %al
+; X86-SSE2-NEXT:    retl
 ;
 ; X64-SSE2-LABEL: length24_eq_const:
 ; X64-SSE2:       # %bb.0:
@@ -1100,5 +1344,3 @@
   %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 9223372036854775807) nounwind
   ret i32 %m
 }
-
-
Index: test/CodeGen/X86/scalar-fp-to-i64.ll
===================================================================
--- test/CodeGen/X86/scalar-fp-to-i64.ll
+++ test/CodeGen/X86/scalar-fp-to-i64.ll
@@ -1147,25 +1147,21 @@
 ;
 ; SSE3_64_WIN-LABEL: x_to_u64:
 ; SSE3_64_WIN:       # %bb.0:
-; SSE3_64_WIN-NEXT:    subq $16, %rsp
+; SSE3_64_WIN-NEXT:    pushq %rax
 ; SSE3_64_WIN-NEXT:    fldt (%rcx)
 ; SSE3_64_WIN-NEXT:    flds __real@{{.*}}(%rip)
 ; SSE3_64_WIN-NEXT:    fld %st(1)
 ; SSE3_64_WIN-NEXT:    fsub %st(1)
-; SSE3_64_WIN-NEXT:    fisttpll {{[0-9]+}}(%rsp)
-; SSE3_64_WIN-NEXT:    fld %st(1)
+; SSE3_64_WIN-NEXT:    xorl %eax, %eax
+; SSE3_64_WIN-NEXT:    fxch %st(1)
+; SSE3_64_WIN-NEXT:    fucompi %st(2)
+; SSE3_64_WIN-NEXT:    fcmovnbe %st(1), %st(0)
+; SSE3_64_WIN-NEXT:    fstp %st(1)
 ; SSE3_64_WIN-NEXT:    fisttpll (%rsp)
-; SSE3_64_WIN-NEXT:    fucompi %st(1)
-; SSE3_64_WIN-NEXT:    fstp %st(0)
-; SSE3_64_WIN-NEXT:    jbe .LBB4_1
-; SSE3_64_WIN-NEXT:  # %bb.2:
-; SSE3_64_WIN-NEXT:    movq (%rsp), %rax
-; SSE3_64_WIN-NEXT:    addq $16, %rsp
-; SSE3_64_WIN-NEXT:    retq
-; SSE3_64_WIN-NEXT:  .LBB4_1:
-; SSE3_64_WIN-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; SSE3_64_WIN-NEXT:    xorq {{[0-9]+}}(%rsp), %rax
-; SSE3_64_WIN-NEXT:    addq $16, %rsp
+; SSE3_64_WIN-NEXT:    setbe %al
+; SSE3_64_WIN-NEXT:    shlq $63, %rax
+; SSE3_64_WIN-NEXT:    xorq (%rsp), %rax
+; SSE3_64_WIN-NEXT:    popq %rcx
 ; SSE3_64_WIN-NEXT:    retq
 ;
 ; SSE3_64_LIN-LABEL: x_to_u64:
@@ -1174,17 +1170,14 @@
 ; SSE3_64_LIN-NEXT:    flds {{.*}}(%rip)
 ; SSE3_64_LIN-NEXT:    fld %st(1)
 ; SSE3_64_LIN-NEXT:    fsub %st(1)
+; SSE3_64_LIN-NEXT:    xorl %eax, %eax
+; SSE3_64_LIN-NEXT:    fxch %st(1)
+; SSE3_64_LIN-NEXT:    fucompi %st(2)
+; SSE3_64_LIN-NEXT:    fcmovnbe %st(1), %st(0)
+; SSE3_64_LIN-NEXT:    fstp %st(1)
 ; SSE3_64_LIN-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
-; SSE3_64_LIN-NEXT:    fld %st(1)
-; SSE3_64_LIN-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
-; SSE3_64_LIN-NEXT:    fucompi %st(1)
-; SSE3_64_LIN-NEXT:    fstp %st(0)
-; SSE3_64_LIN-NEXT:    jbe .LBB4_1
-; SSE3_64_LIN-NEXT:  # %bb.2:
-; SSE3_64_LIN-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
-; SSE3_64_LIN-NEXT:    retq
-; SSE3_64_LIN-NEXT:  .LBB4_1:
-; SSE3_64_LIN-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE3_64_LIN-NEXT:    setbe %al
+; SSE3_64_LIN-NEXT:    shlq $63, %rax
 ; SSE3_64_LIN-NEXT:    xorq -{{[0-9]+}}(%rsp), %rax
 ; SSE3_64_LIN-NEXT:    retq
 ;
@@ -1246,37 +1239,27 @@
 ;
 ; SSE2_64_WIN-LABEL: x_to_u64:
 ; SSE2_64_WIN:       # %bb.0:
-; SSE2_64_WIN-NEXT:    subq $24, %rsp
+; SSE2_64_WIN-NEXT:    subq $16, %rsp
 ; SSE2_64_WIN-NEXT:    fldt (%rcx)
 ; SSE2_64_WIN-NEXT:    flds __real@{{.*}}(%rip)
 ; SSE2_64_WIN-NEXT:    fld %st(1)
 ; SSE2_64_WIN-NEXT:    fsub %st(1)
+; SSE2_64_WIN-NEXT:    xorl %eax, %eax
+; SSE2_64_WIN-NEXT:    fxch %st(1)
+; SSE2_64_WIN-NEXT:    fucompi %st(2)
+; SSE2_64_WIN-NEXT:    fcmovnbe %st(1), %st(0)
+; SSE2_64_WIN-NEXT:    fstp %st(1)
 ; SSE2_64_WIN-NEXT:    fnstcw {{[0-9]+}}(%rsp)
-; SSE2_64_WIN-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax
+; SSE2_64_WIN-NEXT:    movzwl {{[0-9]+}}(%rsp), %ecx
 ; SSE2_64_WIN-NEXT:    movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F
 ; SSE2_64_WIN-NEXT:    fldcw {{[0-9]+}}(%rsp)
-; SSE2_64_WIN-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
+; SSE2_64_WIN-NEXT:    movw %cx, {{[0-9]+}}(%rsp)
 ; SSE2_64_WIN-NEXT:    fistpll {{[0-9]+}}(%rsp)
 ; SSE2_64_WIN-NEXT:    fldcw {{[0-9]+}}(%rsp)
-; SSE2_64_WIN-NEXT:    fnstcw {{[0-9]+}}(%rsp)
-; SSE2_64_WIN-NEXT:    movzwl {{[0-9]+}}(%rsp), %eax
-; SSE2_64_WIN-NEXT:    movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F
-; SSE2_64_WIN-NEXT:    fldcw {{[0-9]+}}(%rsp)
-; SSE2_64_WIN-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
-; SSE2_64_WIN-NEXT:    fld %st(1)
-; SSE2_64_WIN-NEXT:    fistpll {{[0-9]+}}(%rsp)
-; SSE2_64_WIN-NEXT:    fldcw {{[0-9]+}}(%rsp)
-; SSE2_64_WIN-NEXT:    fucompi %st(1)
-; SSE2_64_WIN-NEXT:    fstp %st(0)
-; SSE2_64_WIN-NEXT:    jbe .LBB4_1
-; SSE2_64_WIN-NEXT:  # %bb.2:
-; SSE2_64_WIN-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; SSE2_64_WIN-NEXT:    addq $24, %rsp
-; SSE2_64_WIN-NEXT:    retq
-; SSE2_64_WIN-NEXT:  .LBB4_1:
-; SSE2_64_WIN-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE2_64_WIN-NEXT:    setbe %al
+; SSE2_64_WIN-NEXT:    shlq $63, %rax
 ; SSE2_64_WIN-NEXT:    xorq {{[0-9]+}}(%rsp), %rax
-; SSE2_64_WIN-NEXT:    addq $24, %rsp
+; SSE2_64_WIN-NEXT:    addq $16, %rsp
 ; SSE2_64_WIN-NEXT:    retq
 ;
 ; SSE2_64_LIN-LABEL: x_to_u64:
@@ -1285,29 +1268,20 @@
 ; SSE2_64_LIN-NEXT:    flds {{.*}}(%rip)
 ; SSE2_64_LIN-NEXT:    fld %st(1)
 ; SSE2_64_LIN-NEXT:    fsub %st(1)
+; SSE2_64_LIN-NEXT:    xorl %eax, %eax
+; SSE2_64_LIN-NEXT:    fxch %st(1)
+; SSE2_64_LIN-NEXT:    fucompi %st(2)
+; SSE2_64_LIN-NEXT:    fcmovnbe %st(1), %st(0)
+; SSE2_64_LIN-NEXT:    fstp %st(1)
 ; SSE2_64_LIN-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
-; SSE2_64_LIN-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
+; SSE2_64_LIN-NEXT:    movzwl -{{[0-9]+}}(%rsp), %ecx
 ; SSE2_64_LIN-NEXT:    movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
 ; SSE2_64_LIN-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; SSE2_64_LIN-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
+; SSE2_64_LIN-NEXT:    movw %cx, -{{[0-9]+}}(%rsp)
 ; SSE2_64_LIN-NEXT:    fistpll -{{[0-9]+}}(%rsp)
 ; SSE2_64_LIN-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; SSE2_64_LIN-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
-; SSE2_64_LIN-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
-; SSE2_64_LIN-NEXT:    movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
-; SSE2_64_LIN-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; SSE2_64_LIN-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
-; SSE2_64_LIN-NEXT:    fld %st(1)
-; SSE2_64_LIN-NEXT:    fistpll -{{[0-9]+}}(%rsp)
-; SSE2_64_LIN-NEXT:    fldcw -{{[0-9]+}}(%rsp)
-; SSE2_64_LIN-NEXT:    fucompi %st(1)
-; SSE2_64_LIN-NEXT:    fstp %st(0)
-; SSE2_64_LIN-NEXT:    jbe .LBB4_1
-; SSE2_64_LIN-NEXT:  # %bb.2:
-; SSE2_64_LIN-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
-; SSE2_64_LIN-NEXT:    retq
-; SSE2_64_LIN-NEXT:  .LBB4_1:
-; SSE2_64_LIN-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE2_64_LIN-NEXT:    setbe %al
+; SSE2_64_LIN-NEXT:    shlq $63, %rax
 ; SSE2_64_LIN-NEXT:    xorq -{{[0-9]+}}(%rsp), %rax
 ; SSE2_64_LIN-NEXT:    retq
 ;
Index: test/CodeGen/X86/trunc-subvector.ll
===================================================================
--- test/CodeGen/X86/trunc-subvector.ll
+++ test/CodeGen/X86/trunc-subvector.ll
@@ -108,6 +108,7 @@
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm1
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
 ; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7],ymm1[24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23]
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,2,3]
@@ -227,6 +228,7 @@
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7],ymm1[24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23]
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,2,3]
Index: test/CodeGen/X86/vector-rotate-128.ll
===================================================================
--- test/CodeGen/X86/vector-rotate-128.ll
+++ test/CodeGen/X86/vector-rotate-128.ll
@@ -761,7 +761,6 @@
 ; SSE41-LABEL: splatvar_rotate_v4i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
 ; SSE41-NEXT:    movdqa %xmm0, %xmm3
 ; SSE41-NEXT:    pslld %xmm2, %xmm3
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [32,32,32,32]
@@ -774,7 +773,6 @@
 ; AVX1-LABEL: splatvar_rotate_v4i32:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
 ; AVX1-NEXT:    vpslld %xmm2, %xmm0, %xmm2
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32]
 ; AVX1-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
@@ -786,7 +784,6 @@
 ; AVX2-LABEL: splatvar_rotate_v4i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
 ; AVX2-NEXT:    vpslld %xmm2, %xmm0, %xmm2
 ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
 ; AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
@@ -876,8 +873,6 @@
 ; SSE41-LABEL: splatvar_rotate_v8i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; SSE41-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
 ; SSE41-NEXT:    movdqa %xmm0, %xmm3
 ; SSE41-NEXT:    psllw %xmm2, %xmm3
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
@@ -887,35 +882,20 @@
 ; SSE41-NEXT:    por %xmm3, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: splatvar_rotate_v8i16:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; AVX1-NEXT:    vpsllw %xmm2, %xmm0, %xmm2
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX1-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
-; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpor %xmm0, %xmm2, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: splatvar_rotate_v8i16:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX2-NEXT:    vpbroadcastw %xmm1, %xmm1
-; AVX2-NEXT:    vpsllw %xmm2, %xmm0, %xmm2
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX2-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
-; AVX2-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX2-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0
-; AVX2-NEXT:    retq
+; AVX-LABEL: splatvar_rotate_v8i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX-NEXT:    vpsllw %xmm2, %xmm0, %xmm2
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
+; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpor %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: splatvar_rotate_v8i16:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT:    vpbroadcastw %xmm1, %xmm1
 ; AVX512-NEXT:    vpsllw %xmm2, %xmm0, %xmm2
 ; AVX512-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
 ; AVX512-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
Index: test/CodeGen/X86/vector-rotate-256.ll
===================================================================
--- test/CodeGen/X86/vector-rotate-256.ll
+++ test/CodeGen/X86/vector-rotate-256.ll
@@ -602,14 +602,13 @@
 ;
 ; AVX2-LABEL: splatvar_rotate_v8i32:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpbroadcastd %xmm1, %ymm2
-; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT:    vpslld %xmm1, %ymm0, %ymm1
+; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
+; AVX2-NEXT:    vpslld %xmm2, %ymm0, %ymm2
 ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
-; AVX2-NEXT:    vpsubd %xmm2, %xmm3, %xmm2
-; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX2-NEXT:    vpsrld %xmm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
+; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-NEXT:    vpsrld %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm0, %ymm2, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: splatvar_rotate_v8i32:
@@ -687,26 +686,24 @@
 ;
 ; AVX2-LABEL: splatvar_rotate_v16i16:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpbroadcastw %xmm1, %ymm2
-; AVX2-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX2-NEXT:    vpsllw %xmm1, %ymm0, %ymm1
+; AVX2-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX2-NEXT:    vpsllw %xmm2, %ymm0, %ymm2
 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX2-NEXT:    vpsubw %xmm2, %xmm3, %xmm2
-; AVX2-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX2-NEXT:    vpsrlw %xmm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
+; AVX2-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm0, %ymm2, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: splatvar_rotate_v16i16:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpbroadcastw %xmm1, %ymm2
-; AVX512-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT:    vpsllw %xmm1, %ymm0, %ymm1
+; AVX512-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT:    vpsllw %xmm2, %ymm0, %ymm2
 ; AVX512-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512-NEXT:    vpsubw %xmm2, %xmm3, %xmm2
-; AVX512-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX512-NEXT:    vpsrlw %xmm2, %ymm0, %ymm0
-; AVX512-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX512-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
+; AVX512-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpor %ymm0, %ymm2, %ymm0
 ; AVX512-NEXT:    retq
 ;
 ; XOPAVX1-LABEL: splatvar_rotate_v16i16:
Index: test/CodeGen/X86/vector-rotate-512.ll
===================================================================
--- test/CodeGen/X86/vector-rotate-512.ll
+++ test/CodeGen/X86/vector-rotate-512.ll
@@ -343,26 +343,24 @@
 ;
 ; AVX512BW-LABEL: splatvar_rotate_v32i16:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpbroadcastw %xmm1, %zmm2
-; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT:    vpsllw %xmm1, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT:    vpsllw %xmm2, %zmm0, %zmm2
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512BW-NEXT:    vpsubw %xmm2, %xmm3, %xmm2
-; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX512BW-NEXT:    vpsrlw %xmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vporq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
+; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vporq %zmm0, %zmm2, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VLBW-LABEL: splatvar_rotate_v32i16:
 ; AVX512VLBW:       # %bb.0:
-; AVX512VLBW-NEXT:    vpbroadcastw %xmm1, %zmm2
-; AVX512VLBW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT:    vpsllw %xmm1, %zmm0, %zmm1
+; AVX512VLBW-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT:    vpsllw %xmm2, %zmm0, %zmm2
 ; AVX512VLBW-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VLBW-NEXT:    vpsubw %xmm2, %xmm3, %xmm2
-; AVX512VLBW-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX512VLBW-NEXT:    vpsrlw %xmm2, %zmm0, %zmm0
-; AVX512VLBW-NEXT:    vporq %zmm0, %zmm1, %zmm0
+; AVX512VLBW-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
+; AVX512VLBW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT:    vporq %zmm0, %zmm2, %zmm0
 ; AVX512VLBW-NEXT:    retq
   %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
   %splat16 = sub <32 x i16> <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>, %splat
Index: test/MC/AArch64/armv8.2a-at.s
===================================================================
--- test/MC/AArch64/armv8.2a-at.s
+++ test/MC/AArch64/armv8.2a-at.s
@@ -1,11 +1,9 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a %s -o - | FileCheck %s
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a,+pan-rwv %s -o - | FileCheck %s
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,-pan-rwv %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a < %s 2>&1 | FileCheck %s --check-prefix=ERROR
 
   at s1e1rp, x1
   at s1e1wp, x2
 // CHECK: at      s1e1rp, x1              // encoding: [0x01,0x79,0x08,0xd5]
 // CHECK: at      s1e1wp, x2              // encoding: [0x22,0x79,0x08,0xd5]
-// ERROR: error: AT S1E1RP requires pan-rwv
-// ERROR: error: AT S1E1WP requires pan-rwv
+// ERROR: error: AT S1E1RP requires ARMv8.2a
+// ERROR: error: AT S1E1WP requires ARMv8.2a
Index: test/MC/AArch64/armv8.2a-mmfr2.s
===================================================================
--- /dev/null
+++ test/MC/AArch64/armv8.2a-mmfr2.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a < %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+  mrs x3, id_aa64mmfr2_el1
+// CHECK: mrs x3, ID_AA64MMFR2_EL1       // encoding: [0x43,0x07,0x38,0xd5]
+// ERROR: error: expected readable system register
Index: test/MC/AArch64/armv8.2a-persistent-memory.s
===================================================================
--- test/MC/AArch64/armv8.2a-persistent-memory.s
+++ test/MC/AArch64/armv8.2a-persistent-memory.s
@@ -1,7 +1,6 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a -o - %s | FileCheck %s
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+ccpp -o - %s | FileCheck %s
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a -o - %s 2>&1 | FileCheck %s --check-prefix=ERROR
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a < %s 2>&1 | FileCheck %s --check-prefix=ERROR
 
   dc cvap, x7
 // CHECK: dc cvap, x7   // encoding: [0x27,0x7c,0x0b,0xd5]
-// ERROR: error: DC CVAP requires ccpp
+// ERROR: error: DC CVAP requires ARMv8.2a
Index: test/MC/AArch64/armv8.3a-complex.s
===================================================================
--- test/MC/AArch64/armv8.3a-complex.s
+++ test/MC/AArch64/armv8.3a-complex.s
@@ -1,44 +1,43 @@
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a,-fullfp16 -o - %s 2>%t | \
-// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=NO-FP16
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a,-fullfp16 < %s 2>%t | FileCheck %s --check-prefix=CHECK --check-prefix=NO-FP16
 // RUN: FileCheck --check-prefix=STDERR --check-prefix=STDERR-NO-FP16 %s < %t
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a,+fullfp16 -o - %s 2>%t | \
-// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=FP16
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a,+fullfp16 < %s 2>%t | FileCheck %s --check-prefix=CHECK --check-prefix=FP16
 // RUN: FileCheck --check-prefix=STDERR --check-prefix=STDERR-FP16 %s < %t
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,-v8.3a,+fullfp16,+complxnum -o - %s 2>&1 | \
-// RUN: FileCheck %s --check-prefix=FP16
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,-v8.3a,+fullfp16 < %s 2>&1 | FileCheck %s --check-prefix=NO-V83A
+
+
 // ==== FCMLA vector ====
 // Types
   fcmla v0.4h, v1.4h, v2.4h, #0
 // FP16: fcmla   v0.4h, v1.4h, v2.4h, #0 // encoding: [0x20,0xc4,0x42,0x2e]
 // STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16
-// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcmla v0.8h, v1.8h, v2.8h, #0
 // FP16: fcmla   v0.8h, v1.8h, v2.8h, #0 // encoding: [0x20,0xc4,0x42,0x6e]
 // STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16
-// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcmla v0.2s, v1.2s, v2.2s, #0
 // CHECK: fcmla   v0.2s, v1.2s, v2.2s, #0 // encoding: [0x20,0xc4,0x82,0x2e]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcmla v0.4s, v1.4s, v2.4s, #0
 // CHECK: fcmla   v0.4s, v1.4s, v2.4s, #0 // encoding: [0x20,0xc4,0x82,0x6e]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcmla v0.2d, v1.2d, v2.2d, #0
 // CHECK: fcmla   v0.2d, v1.2d, v2.2d, #0 // encoding: [0x20,0xc4,0xc2,0x6e]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
 
 // Rotations
   fcmla v0.2s, v1.2s, v2.2s, #0
 // CHECK: fcmla   v0.2s, v1.2s, v2.2s, #0 // encoding: [0x20,0xc4,0x82,0x2e]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcmla v0.2s, v1.2s, v2.2s, #90
 // CHECK: fcmla   v0.2s, v1.2s, v2.2s, #90 // encoding: [0x20,0xcc,0x82,0x2e]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcmla v0.2s, v1.2s, v2.2s, #180
 // CHECK: fcmla   v0.2s, v1.2s, v2.2s, #180 // encoding: [0x20,0xd4,0x82,0x2e]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcmla v0.2s, v1.2s, v2.2s, #270
 // CHECK: fcmla   v0.2s, v1.2s, v2.2s, #270 // encoding: [0x20,0xdc,0x82,0x2e]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
 
 // Invalid rotations
   fcmla v0.2s, v1.2s, v2.2s, #1
@@ -53,28 +52,28 @@
   fcadd v0.4h, v1.4h, v2.4h, #90
 // FP16: fcadd   v0.4h, v1.4h, v2.4h, #90 // encoding: [0x20,0xe4,0x42,0x2e]
 // STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16
-// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcadd v0.8h, v1.8h, v2.8h, #90
 // FP16: fcadd   v0.8h, v1.8h, v2.8h, #90 // encoding: [0x20,0xe4,0x42,0x6e]
 // STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16
-// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcadd v0.2s, v1.2s, v2.2s, #90
 // CHECK: fcadd   v0.2s, v1.2s, v2.2s, #90 // encoding: [0x20,0xe4,0x82,0x2e]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcadd v0.4s, v1.4s, v2.4s, #90
 // CHECK: fcadd   v0.4s, v1.4s, v2.4s, #90 // encoding: [0x20,0xe4,0x82,0x6e]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcadd v0.2d, v1.2d, v2.2d, #90
 // CHECK: fcadd   v0.2d, v1.2d, v2.2d, #90 // encoding: [0x20,0xe4,0xc2,0x6e]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
 
 // Rotations
   fcadd v0.2s, v1.2s, v2.2s, #90
 // CHECK: fcadd   v0.2s, v1.2s, v2.2s, #90 // encoding: [0x20,0xe4,0x82,0x2e]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcadd v0.2s, v1.2s, v2.2s, #270
 // CHECK: fcadd   v0.2s, v1.2s, v2.2s, #270 // encoding: [0x20,0xf4,0x82,0x2e]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
 
 // Invalid rotations
   fcadd v0.2s, v1.2s, v2.2s, #1
@@ -93,17 +92,17 @@
   fcmla v0.4h, v1.4h, v2.h[0], #0
 // FP16: fcmla   v0.4h, v1.4h, v2.h[0], #0 // encoding: [0x20,0x10,0x42,0x2f]
 // STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16
-// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcmla v0.8h, v1.8h, v2.h[0], #0
 // FP16: fcmla   v0.8h, v1.8h, v2.h[0], #0 // encoding: [0x20,0x10,0x42,0x6f]
 // STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16
-// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcmla v0.2s, v1.2s, v2.s[0], #0
 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: invalid operand for instruction
 // NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: invalid operand for instruction
   fcmla v0.4s, v1.4s, v2.s[0], #0
 // CHECK: fcmla   v0.4s, v1.4s, v2.s[0], #0 // encoding: [0x20,0x10,0x82,0x6f]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcmla v0.2d, v1.2d, v2.d[0], #0
 // STDERR: :[[@LINE-1]]:{{[0-9]*}}: error: invalid operand for instruction
 // NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: invalid operand for instruction
@@ -111,26 +110,26 @@
 // Rotations
   fcmla v0.4s, v1.4s, v2.s[0], #90
 // CHECK: fcmla   v0.4s, v1.4s, v2.s[0], #90 // encoding: [0x20,0x30,0x82,0x6f]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcmla v0.4s, v1.4s, v2.s[0], #180
 // CHECK: fcmla   v0.4s, v1.4s, v2.s[0], #180 // encoding: [0x20,0x50,0x82,0x6f]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcmla v0.4s, v1.4s, v2.s[0], #270
 // CHECK: fcmla   v0.4s, v1.4s, v2.s[0], #270 // encoding: [0x20,0x70,0x82,0x6f]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
 
 // Valid indices
   fcmla v0.4h, v1.4h, v2.h[1], #0
 // FP16: fcmla   v0.4h, v1.4h, v2.h[1], #0 // encoding: [0x20,0x10,0x62,0x2f]
 // STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16
-// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcmla v0.8h, v1.8h, v2.h[3], #0
 // FP16: fcmla   v0.8h, v1.8h, v2.h[3], #0 // encoding: [0x20,0x18,0x62,0x6f]
 // STDERR-NO-FP16: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: fullfp16
-// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-3]]:{{[0-9]*}}: error: instruction requires: armv8.3a
   fcmla v0.4s, v1.4s, v2.s[1], #0
 // CHECK: fcmla   v0.4s, v1.4s, v2.s[1], #0 // encoding: [0x20,0x18,0x82,0x6f]
-// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: complxnum
+// NO-V83A: :[[@LINE-2]]:{{[0-9]*}}: error: instruction requires: armv8.3a
 
 // Invalid indices
   fcmla v0.4h, v1.4h, v2.h[2], #0
Index: test/MC/AArch64/armv8.3a-js.s
===================================================================
--- test/MC/AArch64/armv8.3a-js.s
+++ test/MC/AArch64/armv8.3a-js.s
@@ -1,20 +1,10 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a -o - %s 2>&1 | \
-// RUN: FileCheck %s
-
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+jsconv -o - %s 2>&1 | \
-// RUN: FileCheck %s
-
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu %s 2>&1 | \
-// RUN: FileCheck --check-prefix=CHECK-JS %s
-
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+jsconv,-fp-armv8 -o - %s 2>&1 |\
-// RUN: FileCheck --check-prefix=CHECK-REQ %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a < %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-REQ < %t %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.3a,-fp-armv8 < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NOFP < %t %s
 
   fjcvtzs w0, d0
 // CHECK: fjcvtzs w0, d0    // encoding: [0x00,0x00,0x7e,0x1e]
-
-// CHECK-JS: error: instruction requires: jsconv
-
-// NOJS: error: instruction requires: jsconv
-
-// CHECK-REQ: error: instruction requires: fp-armv8 jsconv
+// CHECK-REQ: error: instruction requires: armv8.3a
+// CHECK-NOFP: error: instruction requires: fp-armv8
Index: test/MC/AArch64/armv8.3a-signed-pointer.s
===================================================================
--- test/MC/AArch64/armv8.3a-signed-pointer.s
+++ test/MC/AArch64/armv8.3a-signed-pointer.s
@@ -1,11 +1,7 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a -o - %s 2>&1 | \
-// RUN: FileCheck --check-prefixes=CHECK,ALL %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.3a < %s 2> %t | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-REQ %s < %t
 
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu %s -o - > %t.1 2>%t.2
-// RUN: FileCheck --check-prefixes=ALL,NOENC %s < %t.1
-// RUN: FileCheck --check-prefix=CHECK-REQ %s < %t.2
-
-// ALL: .text
   mrs x0, apiakeylo_el1
   mrs x0, apiakeyhi_el1
   mrs x0, apibkeylo_el1
@@ -16,39 +12,28 @@
   mrs x0, apdbkeyhi_el1
   mrs x0, apgakeylo_el1
   mrs x0, apgakeyhi_el1
-// ALL-EMPTY:
-// ALL-EMPTY:
-// CHECK-NEXT: mrs x0, APIAKeyLo_EL1     // encoding: [0x00,0x21,0x38,0xd5]
-// CHECK-NEXT: mrs x0, APIAKeyHi_EL1     // encoding: [0x20,0x21,0x38,0xd5]
-// CHECK-NEXT: mrs x0, APIBKeyLo_EL1     // encoding: [0x40,0x21,0x38,0xd5]
-// CHECK-NEXT: mrs x0, APIBKeyHi_EL1     // encoding: [0x60,0x21,0x38,0xd5]
-// CHECK-NEXT: mrs x0, APDAKeyLo_EL1     // encoding: [0x00,0x22,0x38,0xd5]
-// CHECK-NEXT: mrs x0, APDAKeyHi_EL1     // encoding: [0x20,0x22,0x38,0xd5]
-// CHECK-NEXT: mrs x0, APDBKeyLo_EL1     // encoding: [0x40,0x22,0x38,0xd5]
-// CHECK-NEXT: mrs x0, APDBKeyHi_EL1     // encoding: [0x60,0x22,0x38,0xd5]
-// CHECK-NEXT: mrs x0, APGAKeyLo_EL1     // encoding: [0x00,0x23,0x38,0xd5]
-// CHECK-NEXT: mrs x0, APGAKeyHi_EL1     // encoding: [0x20,0x23,0x38,0xd5]
+
+// CHECK: mrs x0, APIAKeyLo_EL1     // encoding: [0x00,0x21,0x38,0xd5]
+// CHECK: mrs x0, APIAKeyHi_EL1     // encoding: [0x20,0x21,0x38,0xd5]
+// CHECK: mrs x0, APIBKeyLo_EL1     // encoding: [0x40,0x21,0x38,0xd5]
+// CHECK: mrs x0, APIBKeyHi_EL1     // encoding: [0x60,0x21,0x38,0xd5]
+// CHECK: mrs x0, APDAKeyLo_EL1     // encoding: [0x00,0x22,0x38,0xd5]
+// CHECK: mrs x0, APDAKeyHi_EL1     // encoding: [0x20,0x22,0x38,0xd5]
+// CHECK: mrs x0, APDBKeyLo_EL1     // encoding: [0x40,0x22,0x38,0xd5]
+// CHECK: mrs x0, APDBKeyHi_EL1     // encoding: [0x60,0x22,0x38,0xd5]
+// CHECK: mrs x0, APGAKeyLo_EL1     // encoding: [0x00,0x23,0x38,0xd5]
+// CHECK: mrs x0, APGAKeyHi_EL1     // encoding: [0x20,0x23,0x38,0xd5]
 
 // CHECK-REQ: error: expected readable system register
-// CHECK-REQ-NEXT: mrs x0, apiakeylo_el1
 // CHECK-REQ: error: expected readable system register
-// CHECK-REQ-NEXT: mrs x0, apiakeyhi_el1
 // CHECK-REQ: error: expected readable system register
-// CHECK-REQ-NEXT: mrs x0, apibkeylo_el1
 // CHECK-REQ: error: expected readable system register
-// CHECK-REQ-NEXT: mrs x0, apibkeyhi_el1
 // CHECK-REQ: error: expected readable system register
-// CHECK-REQ-NEXT: mrs x0, apdakeylo_el1
 // CHECK-REQ: error: expected readable system register
-// CHECK-REQ-NEXT: mrs x0, apdakeyhi_el1
 // CHECK-REQ: error: expected readable system register
-// CHECK-REQ-NEXT: mrs x0, apdbkeylo_el1
 // CHECK-REQ: error: expected readable system register
-// CHECK-REQ-NEXT: mrs x0, apdbkeyhi_el1
 // CHECK-REQ: error: expected readable system register
-// CHECK-REQ-NEXT: mrs x0, apgakeylo_el1
 // CHECK-REQ: error: expected readable system register
-// CHECK-REQ-NEXT: mrs x0, apgakeyhi_el1
 
   msr apiakeylo_el1, x0
   msr apiakeyhi_el1, x0
@@ -60,249 +45,191 @@
   msr apdbkeyhi_el1, x0
   msr apgakeylo_el1, x0
   msr apgakeyhi_el1, x0
-// ALL-EMPTY:
-// ALL-EMPTY:
-// CHECK-NEXT: msr APIAKeyLo_EL1, x0     // encoding: [0x00,0x21,0x18,0xd5]
-// CHECK-NEXT: msr APIAKeyHi_EL1, x0     // encoding: [0x20,0x21,0x18,0xd5]
-// CHECK-NEXT: msr APIBKeyLo_EL1, x0     // encoding: [0x40,0x21,0x18,0xd5]
-// CHECK-NEXT: msr APIBKeyHi_EL1, x0     // encoding: [0x60,0x21,0x18,0xd5]
-// CHECK-NEXT: msr APDAKeyLo_EL1, x0     // encoding: [0x00,0x22,0x18,0xd5]
-// CHECK-NEXT: msr APDAKeyHi_EL1, x0     // encoding: [0x20,0x22,0x18,0xd5]
-// CHECK-NEXT: msr APDBKeyLo_EL1, x0     // encoding: [0x40,0x22,0x18,0xd5]
-// CHECK-NEXT: msr APDBKeyHi_EL1, x0     // encoding: [0x60,0x22,0x18,0xd5]
-// CHECK-NEXT: msr APGAKeyLo_EL1, x0     // encoding: [0x00,0x23,0x18,0xd5]
-// CHECK-NEXT: msr APGAKeyHi_EL1, x0     // encoding: [0x20,0x23,0x18,0xd5]
+
+// CHECK: msr APIAKeyLo_EL1, x0     // encoding: [0x00,0x21,0x18,0xd5]
+// CHECK: msr APIAKeyHi_EL1, x0     // encoding: [0x20,0x21,0x18,0xd5]
+// CHECK: msr APIBKeyLo_EL1, x0     // encoding: [0x40,0x21,0x18,0xd5]
+// CHECK: msr APIBKeyHi_EL1, x0     // encoding: [0x60,0x21,0x18,0xd5]
+// CHECK: msr APDAKeyLo_EL1, x0     // encoding: [0x00,0x22,0x18,0xd5]
+// CHECK: msr APDAKeyHi_EL1, x0     // encoding: [0x20,0x22,0x18,0xd5]
+// CHECK: msr APDBKeyLo_EL1, x0     // encoding: [0x40,0x22,0x18,0xd5]
+// CHECK: msr APDBKeyHi_EL1, x0     // encoding: [0x60,0x22,0x18,0xd5]
+// CHECK: msr APGAKeyLo_EL1, x0     // encoding: [0x00,0x23,0x18,0xd5]
+// CHECK: msr APGAKeyHi_EL1, x0     // encoding: [0x20,0x23,0x18,0xd5]
 
 // CHECK-REQ: error: expected writable system register or pstate
-// CHECK-REQ-NEXT:  msr apiakeylo_el1, x0
 // CHECK-REQ: error: expected writable system register or pstate
-// CHECK-REQ-NEXT:  msr apiakeyhi_el1, x0
 // CHECK-REQ: error: expected writable system register or pstate
-// CHECK-REQ-NEXT:  msr apibkeylo_el1, x0
 // CHECK-REQ: error: expected writable system register or pstate
-// CHECK-REQ-NEXT:  msr apibkeyhi_el1, x0
 // CHECK-REQ: error: expected writable system register or pstate
-// CHECK-REQ-NEXT:  msr apdakeylo_el1, x0
 // CHECK-REQ: error: expected writable system register or pstate
-// CHECK-REQ-NEXT:  msr apdakeyhi_el1, x0
 // CHECK-REQ: error: expected writable system register or pstate
-// CHECK-REQ-NEXT:  msr apdbkeylo_el1, x0
 // CHECK-REQ: error: expected writable system register or pstate
-// CHECK-REQ-NEXT:  msr apdbkeyhi_el1, x0
 // CHECK-REQ: error: expected writable system register or pstate
-// CHECK-REQ-NEXT:  msr apgakeylo_el1, x0
 // CHECK-REQ: error: expected writable system register or pstate
-// CHECK-REQ-NEXT:  msr apgakeyhi_el1, x0
 
-// ALL-EMPTY:
-// ALL-EMPTY:
   paciasp
-// CHECK-NEXT: paciasp        // encoding: [0x3f,0x23,0x03,0xd5]
-// NOENC-NEXT: paciasp
+// CHECK: paciasp        // encoding: [0x3f,0x23,0x03,0xd5]
+// CHECK-REQ-NOT: error: instruction requires: armv8.3a
   autiasp
-// CHECK-NEXT: autiasp        // encoding: [0xbf,0x23,0x03,0xd5]
-// NOENC-NEXT: autiasp
+// CHECK: autiasp        // encoding: [0xbf,0x23,0x03,0xd5]
+// CHECK-REQ-NOT: error: instruction requires: armv8.3a
   paciaz
-// CHECK-NEXT: paciaz         // encoding: [0x1f,0x23,0x03,0xd5]
-// NOENC-NEXT: paciaz
+// CHECK: paciaz         // encoding: [0x1f,0x23,0x03,0xd5]
+// CHECK-REQ-NOT: error: instruction requires: armv8.3a
   autiaz
-// CHECK-NEXT: autiaz         // encoding: [0x9f,0x23,0x03,0xd5]
-// NOENC-NEXT: autiaz
+// CHECK: autiaz         // encoding: [0x9f,0x23,0x03,0xd5]
+// CHECK-REQ-NOT: error: instruction requires: armv8.3a
   pacia1716
-// CHECK-NEXT: pacia1716      // encoding: [0x1f,0x21,0x03,0xd5]
-// NOENC-NEXT: pacia1716
+// CHECK: pacia1716      // encoding: [0x1f,0x21,0x03,0xd5]
+// CHECK-REQ-NOT: error: instruction requires: armv8.3a
   autia1716
-// CHECK-NEXT: autia1716      // encoding: [0x9f,0x21,0x03,0xd5]
-// NOENC-NEXT: autia1716
+// CHECK: autia1716      // encoding: [0x9f,0x21,0x03,0xd5]
+// CHECK-REQ-NOT: error: instruction requires: armv8.3a
   pacibsp
-// CHECK-NEXT: pacibsp        // encoding: [0x7f,0x23,0x03,0xd5]
-// NOENC-NEXT: pacibsp
+// CHECK: pacibsp        // encoding: [0x7f,0x23,0x03,0xd5]
+// CHECK-REQ-NOT: error: instruction requires: armv8.3a
   autibsp
-// CHECK-NEXT: autibsp        // encoding: [0xff,0x23,0x03,0xd5]
-// NOENC-NEXT: autibsp
+// CHECK: autibsp        // encoding: [0xff,0x23,0x03,0xd5]
+// CHECK-REQ-NOT: error: instruction requires: armv8.3a
   pacibz
-// CHECK-NEXT: pacibz         // encoding: [0x5f,0x23,0x03,0xd5]
-// NOENC-NEXT: pacibz
+// CHECK: pacibz         // encoding: [0x5f,0x23,0x03,0xd5]
+// CHECK-REQ-NOT: error: instruction requires: armv8.3a
   autibz
-// CHECK-NEXT: autibz         // encoding: [0xdf,0x23,0x03,0xd5]
-// NOENC-NEXT: autibz
+// CHECK: autibz         // encoding: [0xdf,0x23,0x03,0xd5]
+// CHECK-REQ-NOT: error: instruction requires: armv8.3a
   pacib1716
-// CHECK-NEXT: pacib1716      // encoding: [0x5f,0x21,0x03,0xd5]
-// NOENC-NEXT: pacib1716
+// CHECK: pacib1716      // encoding: [0x5f,0x21,0x03,0xd5]
+// CHECK-REQ-NOT: error: instruction requires: armv8.3a
   autib1716
-// CHECK-NEXT: autib1716      // encoding: [0xdf,0x21,0x03,0xd5]
-// NOENC-NEXT: autib1716
+// CHECK: autib1716      // encoding: [0xdf,0x21,0x03,0xd5]
+// CHECK-REQ-NOT: error: instruction requires: armv8.3a
   xpaclri
-// CHECK-NEXT: xpaclri           // encoding: [0xff,0x20,0x03,0xd5]
-// NOENC-NEXT: xpaclri
+// CHECK: xpaclri           // encoding: [0xff,0x20,0x03,0xd5]
+// CHECK-REQ-NOT: error: instruction requires: armv8.3a
 
-// ALL-EMPTY:
   pacia x0, x1
-// CHECK-NEXT: pacia x0, x1     // encoding: [0x20,0x00,0xc1,0xda]
-// CHECK-REQ-NEXT:      ^
-// CHECK-REQ-NEXT: error: instruction requires: pa
-// CHECK-REQ-NEXT: pacia x0, x1
+// CHECK: pacia x0, x1     // encoding: [0x20,0x00,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   autia x0, x1
-// CHECK-NEXT: autia x0, x1     // encoding: [0x20,0x10,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT: autia x0, x1
+// CHECK: autia x0, x1     // encoding: [0x20,0x10,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   pacda x0, x1
-// CHECK-NEXT: pacda x0, x1     // encoding: [0x20,0x08,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  pacda x0, x1
+// CHECK: pacda x0, x1     // encoding: [0x20,0x08,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   autda x0, x1
-// CHECK-NEXT: autda x0, x1     // encoding: [0x20,0x18,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  autda x0, x1
+// CHECK: autda x0, x1     // encoding: [0x20,0x18,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   pacib x0, x1
-// CHECK-NEXT: pacib x0, x1     // encoding: [0x20,0x04,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  pacib x0, x1
+// CHECK: pacib x0, x1     // encoding: [0x20,0x04,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   autib x0, x1
-// CHECK-NEXT: autib x0, x1     // encoding: [0x20,0x14,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  autib x0, x1
+// CHECK: autib x0, x1     // encoding: [0x20,0x14,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   pacdb x0, x1
-// CHECK-NEXT: pacdb x0, x1     // encoding: [0x20,0x0c,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  pacdb x0, x1
+// CHECK: pacdb x0, x1     // encoding: [0x20,0x0c,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   autdb x0, x1
-// CHECK-NEXT: autdb x0, x1     // encoding: [0x20,0x1c,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  autdb x0, x1
+// CHECK: autdb x0, x1     // encoding: [0x20,0x1c,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   pacga x0, x1, x2
-// CHECK-NEXT: pacga x0, x1, x2  // encoding: [0x20,0x30,0xc2,0x9a]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  pacga x0, x1, x2
+// CHECK: pacga x0, x1, x2  // encoding: [0x20,0x30,0xc2,0x9a]
+// CHECK-REQ: error: instruction requires: armv8.3a
   paciza x0
-// CHECK-NEXT: paciza x0         // encoding: [0xe0,0x23,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  paciza x0
+// CHECK: paciza x0         // encoding: [0xe0,0x23,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   autiza x0
-// CHECK-NEXT: autiza x0         // encoding: [0xe0,0x33,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  autiza x0
+// CHECK: autiza x0         // encoding: [0xe0,0x33,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   pacdza x0
-// CHECK-NEXT: pacdza x0         // encoding: [0xe0,0x2b,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  pacdza x0
+// CHECK: pacdza x0         // encoding: [0xe0,0x2b,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   autdza x0
-// CHECK-NEXT: autdza x0         // encoding: [0xe0,0x3b,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  autdza x0
+// CHECK: autdza x0         // encoding: [0xe0,0x3b,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   pacizb x0
-// CHECK-NEXT: pacizb x0         // encoding: [0xe0,0x27,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  pacizb x0
+// CHECK: pacizb x0         // encoding: [0xe0,0x27,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   autizb x0
-// CHECK-NEXT: autizb x0         // encoding: [0xe0,0x37,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  autizb x0
+// CHECK: autizb x0         // encoding: [0xe0,0x37,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   pacdzb x0
-// CHECK-NEXT: pacdzb x0         // encoding: [0xe0,0x2f,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  pacdzb x0
+// CHECK: pacdzb x0         // encoding: [0xe0,0x2f,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   autdzb x0
-// CHECK-NEXT: autdzb x0         // encoding: [0xe0,0x3f,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  autdzb x0
+// CHECK: autdzb x0         // encoding: [0xe0,0x3f,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   xpaci x0
-// CHECK-NEXT: xpaci x0          // encoding: [0xe0,0x43,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  xpaci x0
+// CHECK: xpaci x0          // encoding: [0xe0,0x43,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
   xpacd x0
-// CHECK-NEXT: xpacd x0          // encoding: [0xe0,0x47,0xc1,0xda]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  xpacd x0
+// CHECK: xpacd x0          // encoding: [0xe0,0x47,0xc1,0xda]
+// CHECK-REQ: error: instruction requires: armv8.3a
 
   braa x0, x1
-// CHECK-EMPTY:
-// CHECK-NEXT: braa x0, x1       // encoding: [0x01,0x08,0x1f,0xd7]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  braa x0, x1
+// CHECK: braa x0, x1       // encoding: [0x01,0x08,0x1f,0xd7]
+// CHECK-REQ: error: instruction requires: armv8.3a
   brab x0, x1
-// CHECK-NEXT: brab x0, x1       // encoding: [0x01,0x0c,0x1f,0xd7]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  brab x0, x1
+// CHECK: brab x0, x1       // encoding: [0x01,0x0c,0x1f,0xd7]
+// CHECK-REQ: error: instruction requires: armv8.3a
   blraa x0, x1
-// CHECK-NEXT: blraa x0, x1      // encoding: [0x01,0x08,0x3f,0xd7]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  blraa x0, x1
+// CHECK: blraa x0, x1      // encoding: [0x01,0x08,0x3f,0xd7]
+// CHECK-REQ: error: instruction requires: armv8.3a
   blrab x0, x1
-// CHECK-NEXT: blrab x0, x1      // encoding: [0x01,0x0c,0x3f,0xd7]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  blrab x0, x1
+// CHECK: blrab x0, x1      // encoding: [0x01,0x0c,0x3f,0xd7]
+// CHECK-REQ: error: instruction requires: armv8.3a
 
   braaz x0
-// CHECK-EMPTY:
-// CHECK-NEXT: braaz x0          // encoding: [0x1f,0x08,0x1f,0xd6]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  braaz x0
+// CHECK: braaz x0          // encoding: [0x1f,0x08,0x1f,0xd6]
+// CHECK-REQ: error: instruction requires: armv8.3a
   brabz x0
-// CHECK-NEXT: brabz x0          // encoding: [0x1f,0x0c,0x1f,0xd6]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  brabz x0
+// CHECK: brabz x0          // encoding: [0x1f,0x0c,0x1f,0xd6]
+// CHECK-REQ: error: instruction requires: armv8.3a
   blraaz x0
-// CHECK-NEXT: blraaz x0         // encoding: [0x1f,0x08,0x3f,0xd6]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  blraaz x0
+// CHECK: blraaz x0         // encoding: [0x1f,0x08,0x3f,0xd6]
+// CHECK-REQ: error: instruction requires: armv8.3a
   blrabz x0
-// CHECK-NEXT: blrabz x0         // encoding: [0x1f,0x0c,0x3f,0xd6]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  blrabz x0
+// CHECK: blrabz x0         // encoding: [0x1f,0x0c,0x3f,0xd6]
+// CHECK-REQ: error: instruction requires: armv8.3a
   retaa
-// CHECK-NEXT: retaa             // encoding: [0xff,0x0b,0x5f,0xd6]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  retaa
+// CHECK: retaa             // encoding: [0xff,0x0b,0x5f,0xd6]
+// CHECK-REQ: error: instruction requires: armv8.3a
   retab
-// CHECK-NEXT: retab             // encoding: [0xff,0x0f,0x5f,0xd6]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  retab
+// CHECK: retab             // encoding: [0xff,0x0f,0x5f,0xd6]
+// CHECK-REQ: error: instruction requires: armv8.3a
   eretaa
-// CHECK-NEXT: eretaa            // encoding: [0xff,0x0b,0x9f,0xd6]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  eretaa
+// CHECK: eretaa            // encoding: [0xff,0x0b,0x9f,0xd6]
+// CHECK-REQ: error: instruction requires: armv8.3a
   eretab
-// CHECK-NEXT: eretab            // encoding: [0xff,0x0f,0x9f,0xd6]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  eretab
+// CHECK: eretab            // encoding: [0xff,0x0f,0x9f,0xd6]
+// CHECK-REQ: error: instruction requires: armv8.3a
   ldraa x0, [x1, 4088]
-// CHECK-NEXT: ldraa x0, [x1, #4088]  // encoding: [0x20,0xf4,0x3f,0xf8]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  ldraa x0, [x1, 4088]
+// CHECK: ldraa x0, [x1, #4088]  // encoding: [0x20,0xf4,0x3f,0xf8]
+// CHECK-REQ: error: instruction requires: armv8.3a
   ldraa x0, [x1, -4096]
-// CHECK-NEXT: ldraa x0, [x1, #-4096] // encoding: [0x20,0x04,0x60,0xf8]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  ldraa x0, [x1, -4096]
+// CHECK: ldraa x0, [x1, #-4096] // encoding: [0x20,0x04,0x60,0xf8]
+// CHECK-REQ: error: instruction requires: armv8.3a
   ldrab x0, [x1, 4088]
-// CHECK-NEXT: ldrab x0, [x1, #4088]  // encoding: [0x20,0xf4,0xbf,0xf8]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  ldrab x0, [x1, 4088]
+// CHECK: ldrab x0, [x1, #4088]  // encoding: [0x20,0xf4,0xbf,0xf8]
+// CHECK-REQ: error: instruction requires: armv8.3a
   ldrab x0, [x1, -4096]
-// CHECK-NEXT: ldrab x0, [x1, #-4096] // encoding: [0x20,0x04,0xe0,0xf8]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  ldrab x0, [x1, -4096]
+// CHECK: ldrab x0, [x1, #-4096] // encoding: [0x20,0x04,0xe0,0xf8]
+// CHECK-REQ: error: instruction requires: armv8.3a
   ldraa x0, [x1, 4088]!
-// CHECK-NEXT: ldraa x0, [x1, #4088]!  // encoding: [0x20,0xfc,0x3f,0xf8]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  ldraa x0, [x1, 4088]!
+// CHECK: ldraa x0, [x1, #4088]!  // encoding: [0x20,0xfc,0x3f,0xf8]
+// CHECK-REQ: error: instruction requires: armv8.3a
   ldraa x0, [x1, -4096]!
-// CHECK-NEXT: ldraa x0, [x1, #-4096]! // encoding: [0x20,0x0c,0x60,0xf8]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  ldraa x0, [x1, -4096]!
+// CHECK: ldraa x0, [x1, #-4096]! // encoding: [0x20,0x0c,0x60,0xf8]
+// CHECK-REQ: error: instruction requires: armv8.3a
   ldrab x0, [x1, 4088]!
-// CHECK-NEXT: ldrab x0, [x1, #4088]!  // encoding: [0x20,0xfc,0xbf,0xf8]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  ldrab x0, [x1, 4088]!
+// CHECK: ldrab x0, [x1, #4088]!  // encoding: [0x20,0xfc,0xbf,0xf8]
+// CHECK-REQ: error: instruction requires: armv8.3a
   ldrab x0, [x1, -4096]!
-// CHECK-NEXT: ldrab x0, [x1, #-4096]! // encoding: [0x20,0x0c,0xe0,0xf8]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  ldrab x0, [x1, -4096]!
+// CHECK: ldrab x0, [x1, #-4096]! // encoding: [0x20,0x0c,0xe0,0xf8]
+// CHECK-REQ: error: instruction requires: armv8.3a
   ldraa x0, [x1]
-// CHECK-NEXT: ldraa x0, [x1]  // encoding: [0x20,0x04,0x20,0xf8]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  ldraa x0, [x1]
+// CHECK: ldraa x0, [x1]  // encoding: [0x20,0x04,0x20,0xf8]
+// CHECK-REQ: error: instruction requires: armv8.3a
   ldrab x0, [x1]
-// CHECK-NEXT: ldrab x0, [x1]  // encoding: [0x20,0x04,0xa0,0xf8]
-// CHECK-REQ: error: instruction requires: pa
-// CHECK-REQ-NEXT:  ldrab x0, [x1]
+// CHECK: ldrab x0, [x1]  // encoding: [0x20,0x04,0xa0,0xf8]
+// CHECK-REQ: error: instruction requires: armv8.3a
Index: test/MC/AArch64/armv8.4a-flag.s
===================================================================
--- test/MC/AArch64/armv8.4a-flag.s
+++ test/MC/AArch64/armv8.4a-flag.s
@@ -1,14 +1,5 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a %s -o - | \
-// RUN: FileCheck %s
-
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+fmi %s -o - 2>&1 | \
-// RUN: FileCheck %s
-
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.4a %s -o - 2>&1 | \
-// RUN: FileCheck %s --check-prefix=ERROR
-
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a,-fmi %s -o - 2>&1 | \
-// RUN: FileCheck %s --check-prefix=ERROR
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a < %s | FileCheck %s --check-prefix=CHECK
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.4a < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
 
 //------------------------------------------------------------------------------
 // Armv8.4-A flag manipulation instructions
@@ -30,24 +21,24 @@
 //CHECK-NEXT: rmif x1, #63, #15            // encoding: [0x2f,0x84,0x1f,0xba]
 //CHECK-NEXT: rmif xzr, #63, #15           // encoding: [0xef,0x87,0x1f,0xba]
 
-//ERROR:      error: instruction requires: fmi
-//ERROR-NEXT: cfinv
-//ERROR-NEXT: ^
-//ERROR-NEXT: error: instruction requires: fmi
-//ERROR-NEXT: setf8 w1
-//ERROR-NEXT: ^
-//ERROR-NEXT: error: instruction requires: fmi
-//ERROR-NEXT: setf8 wzr
-//ERROR-NEXT: ^
-//ERROR-NEXT: error: instruction requires: fmi
-//ERROR-NEXT: setf16 w1
-//ERROR-NEXT: ^
-//ERROR-NEXT: error: instruction requires: fmi
-//ERROR-NEXT: setf16 wzr
-//ERROR-NEXT: ^
-//ERROR-NEXT: error: instruction requires: fmi
-//ERROR-NEXT: rmif x1, #63, #15
-//ERROR-NEXT: ^
-//ERROR-NEXT: error: instruction requires: fmi
-//ERROR-NEXT: rmif xzr, #63, #15
-//ERROR-NEXT: ^
+//CHECK-ERROR:      error: instruction requires: armv8.4a
+//CHECK-ERROR-NEXT: cfinv
+//CHECK-ERROR-NEXT: ^
+//CHECK-ERROR-NEXT: error: instruction requires: armv8.4a
+//CHECK-ERROR-NEXT: setf8 w1
+//CHECK-ERROR-NEXT: ^
+//CHECK-ERROR-NEXT: error: instruction requires: armv8.4a
+//CHECK-ERROR-NEXT: setf8 wzr
+//CHECK-ERROR-NEXT: ^
+//CHECK-ERROR-NEXT: error: instruction requires: armv8.4a
+//CHECK-ERROR-NEXT: setf16 w1
+//CHECK-ERROR-NEXT: ^
+//CHECK-ERROR-NEXT: error: instruction requires: armv8.4a
+//CHECK-ERROR-NEXT: setf16 wzr
+//CHECK-ERROR-NEXT: ^
+//CHECK-ERROR-NEXT: error: instruction requires: armv8.4a
+//CHECK-ERROR-NEXT: rmif x1, #63, #15
+//CHECK-ERROR-NEXT: ^
+//CHECK-ERROR-NEXT: error: instruction requires: armv8.4a
+//CHECK-ERROR-NEXT: rmif xzr, #63, #15
+//CHECK-ERROR-NEXT: ^
Index: test/MC/AArch64/armv8.4a-ldst.s
===================================================================
--- test/MC/AArch64/armv8.4a-ldst.s
+++ test/MC/AArch64/armv8.4a-ldst.s
@@ -1,8 +1,5 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a -o - %s | FileCheck %s
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.4a,+rcpc-immo -o - %s 2>&1 | FileCheck %s
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a,-rcpc-immo -o - %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-V84
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.4a -o - %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-V84
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -o - %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-V84
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a < %s | FileCheck %s --check-prefix=CHECK
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.4a < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-V84
 
 //------------------------------------------------------------------------------
 // Armv8.4-A LDAPR and STLR instructions with immediate offsets
@@ -143,168 +140,168 @@
 //CHECK-NEXT:  ldapur  x13, [x4, #255]         // encoding: [0x8d,0xf0,0x4f,0xd9]
 //CHECK-NEXT:  ldapur  x14, [sp, #9]           // encoding: [0xee,0x93,0x40,0xd9]
 
-//CHECK-NO-V84:      error: instruction requires: rcpc-immo
+//CHECK-NO-V84:      error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: STLURB   WZR, [X10]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: STLURB   W1, [X10]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: STLURB   W1, [X10, #-256]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: stlurb   w2, [x11, #255]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: STLURB   W3, [SP, #-3]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: ldapurb  wzr, [x12]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: ldapurb  w4, [x12]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: ldapurb  w4, [x12, #-256]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURB  W5, [X13, #255]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURB  W6, [SP, #-2]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURSB W7, [X14]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURSB W7, [X14, #-256]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: ldapursb w8, [x15, #255]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: ldapursb w9, [sp, #-1]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURSB X0, [X16]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURSB X0, [X16, #-256]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURSB X1, [X17, #255]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: ldapursb x2, [sp, #0]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: ldapursb x2, [sp]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: stlurh   w10, [x18]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: stlurh   w10, [x18, #-256]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: STLURH   W11, [X19, #255]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: STLURH   W12, [SP, #1]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURH  W13, [X20]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURH  W13, [X20, #-256]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: ldapurh  w14, [x21, #255]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURH  W15, [SP, #2]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURSH W16, [X22]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURSH W16, [X22, #-256]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURSH W17, [X23, #255]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: ldapursh w18, [sp, #3]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: ldapursh x3, [x24]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: ldapursh x3, [x24, #-256]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURSH X4, [X25, #255]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURSH X5, [SP, #4]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: STLUR    W19, [X26]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: STLUR    W19, [X26, #-256]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: stlur    w20, [x27, #255]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: STLUR    W21, [SP, #5]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPUR   W22, [X28]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPUR   W22, [X28, #-256]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPUR   W23, [X29, #255]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: ldapur   w24, [sp, #6]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: ldapursw x6, [x30]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: ldapursw x6, [x30, #-256]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURSW X7, [X0, #255]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPURSW X8, [SP, #7]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: STLUR    X9, [X1]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: STLUR    X9, [X1, #-256]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: stlur    x10, [x2, #255]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: STLUR    X11, [SP, #8]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPUR   X12, [X3]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPUR   X12, [X3, #-256]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: LDAPUR   X13, [X4, #255]
 //CHECK-NO-V84-NEXT: ^
-//CHECK-NO-V84-NEXT: error: instruction requires: rcpc-immo
+//CHECK-NO-V84-NEXT: error: instruction requires: armv8.4a
 //CHECK-NO-V84-NEXT: ldapur   x14, [sp, #9]
 //CHECK-NO-V84-NEXT: ^
Index: test/MC/AArch64/armv8.4a-tlb.s
===================================================================
--- test/MC/AArch64/armv8.4a-tlb.s
+++ test/MC/AArch64/armv8.4a-tlb.s
@@ -1,9 +1,6 @@
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a < %s 2> %t | FileCheck %s --check-prefix=CHECK
 // RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+tlb-rmi < %s 2> %t | FileCheck %s --check-prefix=CHECK
-// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.4a < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-V84
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a,-tlb-rmi < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-V84
 
 // Outer shareable TLB maintenance instructions:
 tlbi vmalle1os
@@ -48,55 +45,55 @@
 //CHECK-ERROR-NEXT: tlbi vae1os, sp
 //CHECK-ERROR-NEXT:              ^
 
-//CHECK-NO-V84:      error: TLBI VMALLE1OS requires tlb-rmi
+//CHECK-NO-V84:      error: TLBI VMALLE1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi vmalle1os
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI VAE1OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI VAE1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi vae1os, xzr
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI VAE1OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI VAE1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi vae1os, x0
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI ASIDE1OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI ASIDE1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi aside1os, x1
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI VAAE1OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI VAAE1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi vaae1os, x2
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI VALE1OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI VALE1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi vale1os, x3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI VAALE1OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI VAALE1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi vaale1os, x4
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI IPAS2E1OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI IPAS2E1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi ipas2e1os, x5
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI IPAS2LE1OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI IPAS2LE1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi ipas2le1os, x6
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI VAE2OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI VAE2OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi vae2os, x7
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI VALE2OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI VALE2OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi vale2os, x8
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI VMALLS12E1OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI VMALLS12E1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi vmalls12e1os
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI VAE3OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI VAE3OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi vae3os, x9
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI VALE3OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI VALE3OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi vale3os, x10
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI ALLE2OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI ALLE2OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi alle2os
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI ALLE1OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI ALLE1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi alle1os
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI ALLE3OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI ALLE3OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi alle3os
 //CHECK-NO-V84-NEXT:      ^
 
@@ -171,96 +168,96 @@
 //CHECK-ERROR-NEXT: tlbi rvae1, sp
 //CHECK-ERROR-NEXT:             ^
 
-//CHECK-NO-V84:      error: TLBI RVAE1 requires tlb-rmi
+//CHECK-NO-V84:      error: TLBI RVAE1 requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvae1, x3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVAAE1 requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVAAE1 requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvaae1, x3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVALE1 requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVALE1 requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvale1, x3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVAALE1 requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVAALE1 requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvaale1, x3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT:  error: TLBI RVAE1IS requires tlb-rmi
+//CHECK-NO-V84-NEXT:  error: TLBI RVAE1IS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvae1is, x3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVAAE1IS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVAAE1IS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvaae1is, x3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVALE1IS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVALE1IS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvale1is, x3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVAALE1IS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVAALE1IS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvaale1is, x3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVAE1OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVAE1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvae1os, x3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVAAE1OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVAAE1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvaae1os, x3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVALE1OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVALE1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvale1os, x3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVAALE1OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVAALE1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvaale1os, x3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RIPAS2E1IS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RIPAS2E1IS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi ripas2e1is, x3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RIPAS2LE1IS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RIPAS2LE1IS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi ripas2le1is, x3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RIPAS2E1 requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RIPAS2E1 requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi ripas2e1, X3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RIPAS2LE1 requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RIPAS2LE1 requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi ripas2le1, X3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RIPAS2E1OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RIPAS2E1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi ripas2e1os, X3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RIPAS2LE1OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RIPAS2LE1OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi ripas2le1os, X3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVAE2 requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVAE2 requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvae2, X3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVALE2 requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVALE2 requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvale2, X3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVAE2IS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVAE2IS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvae2is, X3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVALE2IS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVALE2IS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvale2is, X3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVAE2OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVAE2OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvae2os, X3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVALE2OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVALE2OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvale2os, X3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVAE3 requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVAE3 requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvae3, X3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVALE3 requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVALE3 requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvale3, X3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVAE3IS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVAE3IS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvae3is, X3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVALE3IS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVALE3IS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvale3is, X3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVAE3OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVAE3OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvae3os, X3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVALE3OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVALE3OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvale3os, X3
 //CHECK-NO-V84-NEXT:      ^
-//CHECK-NO-V84-NEXT: error: TLBI RVALE3OS requires tlb-rmi
+//CHECK-NO-V84-NEXT: error: TLBI RVALE3OS requires ARMv8.4a
 //CHECK-NO-V84-NEXT: tlbi rvale3os, XZR
 //CHECK-NO-V84-NEXT:      ^
Index: test/MC/AArch64/armv8.4a-trace.s
===================================================================
--- test/MC/AArch64/armv8.4a-trace.s
+++ test/MC/AArch64/armv8.4a-trace.s
@@ -1,14 +1,5 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a -o - 2>&1 %s  | \
-// RUN: FileCheck %s
-
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+tracev8.4 -o - 2>&1 %s  | \
-// RUN: FileCheck %s
-
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.4a -o - %s 2>&1 | \
-// RUN: FileCheck %s --check-prefix=CHECK-ERROR
-
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a,-tracev8.4 -o - %s 2>&1 | \
-// RUN: FileCheck %s --check-prefixes NOFEATURE,CHECK-ERROR
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.4a < %s  | FileCheck %s --check-prefix=CHECK
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.4a < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
 
 //------------------------------------------------------------------------------
 // ARMV8.4-A Debug, Trace and PMU Extensions
@@ -54,4 +45,4 @@
 //CHECK-ERROR: mrs x0, TRFCR_EL12
 //CHECK-ERROR:         ^
 
-//CHECK-ERROR: error: instruction requires: tracev8.4
+//CHECK-ERROR: error: instruction requires: armv8.4a
Index: test/Transforms/ExpandMemCmp/X86/memcmp.ll
===================================================================
--- test/Transforms/ExpandMemCmp/X86/memcmp.ll
+++ test/Transforms/ExpandMemCmp/X86/memcmp.ll
@@ -130,11 +130,11 @@
 ; ALL-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
 ; ALL-NEXT:    br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
 ; ALL:       loadbb1:
-; ALL-NEXT:    [[TMP10:%.*]] = bitcast i8* [[X]] to i16*
-; ALL-NEXT:    [[TMP11:%.*]] = bitcast i8* [[Y]] to i16*
-; ALL-NEXT:    [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 2
-; ALL-NEXT:    [[TMP13:%.*]] = getelementptr i16, i16* [[TMP11]], i16 2
-; ALL-NEXT:    [[TMP14:%.*]] = load i16, i16* [[TMP12]]
+; ALL-NEXT:    [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 4
+; ALL-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i16*
+; ALL-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 4
+; ALL-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i16*
+; ALL-NEXT:    [[TMP14:%.*]] = load i16, i16* [[TMP11]]
 ; ALL-NEXT:    [[TMP15:%.*]] = load i16, i16* [[TMP13]]
 ; ALL-NEXT:    [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
 ; ALL-NEXT:    [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
@@ -178,11 +178,11 @@
 ; X32-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
 ; X32-NEXT:    br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
 ; X32:       loadbb1:
-; X32-NEXT:    [[TMP10:%.*]] = bitcast i8* [[X]] to i32*
-; X32-NEXT:    [[TMP11:%.*]] = bitcast i8* [[Y]] to i32*
-; X32-NEXT:    [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1
-; X32-NEXT:    [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1
-; X32-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP12]]
+; X32-NEXT:    [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 4
+; X32-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
+; X32-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 4
+; X32-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i32*
+; X32-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP11]]
 ; X32-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP13]]
 ; X32-NEXT:    [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
 ; X32-NEXT:    [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
@@ -272,11 +272,11 @@
 ; X64-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
 ; X64-NEXT:    br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
 ; X64:       loadbb1:
-; X64-NEXT:    [[TMP10:%.*]] = bitcast i8* [[X]] to i16*
-; X64-NEXT:    [[TMP11:%.*]] = bitcast i8* [[Y]] to i16*
-; X64-NEXT:    [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 4
-; X64-NEXT:    [[TMP13:%.*]] = getelementptr i16, i16* [[TMP11]], i16 4
-; X64-NEXT:    [[TMP14:%.*]] = load i16, i16* [[TMP12]]
+; X64-NEXT:    [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 8
+; X64-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i16*
+; X64-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 8
+; X64-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i16*
+; X64-NEXT:    [[TMP14:%.*]] = load i16, i16* [[TMP11]]
 ; X64-NEXT:    [[TMP15:%.*]] = load i16, i16* [[TMP13]]
 ; X64-NEXT:    [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
 ; X64-NEXT:    [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
@@ -324,11 +324,11 @@
 ; X64-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
 ; X64-NEXT:    br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
 ; X64:       loadbb1:
-; X64-NEXT:    [[TMP10:%.*]] = bitcast i8* [[X]] to i32*
-; X64-NEXT:    [[TMP11:%.*]] = bitcast i8* [[Y]] to i32*
-; X64-NEXT:    [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 2
-; X64-NEXT:    [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i32 2
-; X64-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP12]]
+; X64-NEXT:    [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 8
+; X64-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
+; X64-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 8
+; X64-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i32*
+; X64-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP11]]
 ; X64-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP13]]
 ; X64-NEXT:    [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
 ; X64-NEXT:    [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
@@ -394,11 +394,11 @@
 ; X64-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
 ; X64-NEXT:    br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
 ; X64:       loadbb1:
-; X64-NEXT:    [[TMP10:%.*]] = bitcast i8* [[X]] to i64*
-; X64-NEXT:    [[TMP11:%.*]] = bitcast i8* [[Y]] to i64*
-; X64-NEXT:    [[TMP12:%.*]] = getelementptr i64, i64* [[TMP10]], i64 1
-; X64-NEXT:    [[TMP13:%.*]] = getelementptr i64, i64* [[TMP11]], i64 1
-; X64-NEXT:    [[TMP14:%.*]] = load i64, i64* [[TMP12]]
+; X64-NEXT:    [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 8
+; X64-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i64*
+; X64-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 8
+; X64-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i64*
+; X64-NEXT:    [[TMP14:%.*]] = load i64, i64* [[TMP11]]
 ; X64-NEXT:    [[TMP15:%.*]] = load i64, i64* [[TMP13]]
 ; X64-NEXT:    [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
 ; X64-NEXT:    [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])
@@ -597,11 +597,11 @@
 ; X32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
 ; X32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
 ; X32-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X32-NEXT:    [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
-; X32-NEXT:    [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
-; X32-NEXT:    [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2
-; X32-NEXT:    [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2
-; X32-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]]
+; X32-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
+; X32-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
+; X32-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 4
+; X32-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+; X32-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP7]]
 ; X32-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]]
 ; X32-NEXT:    [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
 ; X32-NEXT:    [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
@@ -625,11 +625,11 @@
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
 ; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
 ; X64_1LD:       loadbb1:
-; X64_1LD-NEXT:    [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
-; X64_1LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
-; X64_1LD-NEXT:    [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2
-; X64_1LD-NEXT:    [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2
-; X64_1LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]]
+; X64_1LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
+; X64_1LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
+; X64_1LD-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 4
+; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP7]]
 ; X64_1LD-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]]
 ; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
 ; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
@@ -645,11 +645,11 @@
 ; X64_2LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
 ; X64_2LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
 ; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT:    [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
-; X64_2LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
-; X64_2LD-NEXT:    [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2
-; X64_2LD-NEXT:    [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2
-; X64_2LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]]
+; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
+; X64_2LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
+; X64_2LD-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 4
+; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP7]]
 ; X64_2LD-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]]
 ; X64_2LD-NEXT:    [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
 ; X64_2LD-NEXT:    [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
@@ -668,11 +668,71 @@
 }
 
 define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
-; ALL-LABEL: @cmp_eq7(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 7)
-; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT:    ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq7(
+; X32-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X32-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
+; X32-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3
+; X32-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
+; X32-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3
+; X32-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
+; X32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP7]]
+; X32-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X32-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
+; X32-NEXT:    [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
+; X32-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
+; X32-NEXT:    [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X32-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X32-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT:    ret i32 [[CONV]]
+;
+; X64_1LD-LABEL: @cmp_eq7(
+; X64_1LD-NEXT:    br label [[LOADBB:%.*]]
+; X64_1LD:       res_block:
+; X64_1LD-NEXT:    br label [[ENDBLOCK:%.*]]
+; X64_1LD:       loadbb:
+; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
+; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD:       loadbb1:
+; X64_1LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3
+; X64_1LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
+; X64_1LD-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3
+; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP7]]
+; X64_1LD-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
+; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD:       endblock:
+; X64_1LD-NEXT:    [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; X64_1LD-NEXT:    [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64_1LD-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_1LD-NEXT:    ret i32 [[CONV]]
+;
+; X64_2LD-LABEL: @cmp_eq7(
+; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
+; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3
+; X64_2LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
+; X64_2LD-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3
+; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP7]]
+; X64_2LD-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]]
+; X64_2LD-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
+; X64_2LD-NEXT:    [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
+; X64_2LD-NEXT:    [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
+; X64_2LD-NEXT:    [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X64_2LD-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_2LD-NEXT:    ret i32 [[CONV]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
   %cmp = icmp eq i32 %call, 0
@@ -687,11 +747,11 @@
 ; X32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]]
 ; X32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]]
 ; X32-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
-; X32-NEXT:    [[TMP6:%.*]] = bitcast i8* [[X]] to i32*
-; X32-NEXT:    [[TMP7:%.*]] = bitcast i8* [[Y]] to i32*
-; X32-NEXT:    [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1
-; X32-NEXT:    [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 1
-; X32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X32-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4
+; X32-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
+; X32-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 4
+; X32-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
+; X32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP7]]
 ; X32-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]]
 ; X32-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
 ; X32-NEXT:    [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
@@ -794,11 +854,11 @@
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
 ; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
 ; X64_1LD:       loadbb1:
-; X64_1LD-NEXT:    [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
-; X64_1LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
-; X64_1LD-NEXT:    [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 4
-; X64_1LD-NEXT:    [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 4
-; X64_1LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]]
+; X64_1LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
+; X64_1LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
+; X64_1LD-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 8
+; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP7]]
 ; X64_1LD-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]]
 ; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
 ; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
@@ -814,11 +874,11 @@
 ; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
 ; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
 ; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT:    [[TMP6:%.*]] = bitcast i8* [[X]] to i16*
-; X64_2LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[Y]] to i16*
-; X64_2LD-NEXT:    [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 4
-; X64_2LD-NEXT:    [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 4
-; X64_2LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP8]]
+; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
+; X64_2LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
+; X64_2LD-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 8
+; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP7]]
 ; X64_2LD-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP9]]
 ; X64_2LD-NEXT:    [[TMP12:%.*]] = zext i16 [[TMP10]] to i64
 ; X64_2LD-NEXT:    [[TMP13:%.*]] = zext i16 [[TMP11]] to i64
@@ -837,11 +897,57 @@
 }
 
 define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
-; ALL-LABEL: @cmp_eq11(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 11)
-; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT:    ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq11(
+; X32-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 11)
+; X32-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X32-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT:    ret i32 [[CONV]]
+;
+; X64_1LD-LABEL: @cmp_eq11(
+; X64_1LD-NEXT:    br label [[LOADBB:%.*]]
+; X64_1LD:       res_block:
+; X64_1LD-NEXT:    br label [[ENDBLOCK:%.*]]
+; X64_1LD:       loadbb:
+; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
+; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD:       loadbb1:
+; X64_1LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3
+; X64_1LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
+; X64_1LD-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3
+; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP7]]
+; X64_1LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
+; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD:       endblock:
+; X64_1LD-NEXT:    [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; X64_1LD-NEXT:    [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64_1LD-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_1LD-NEXT:    ret i32 [[CONV]]
+;
+; X64_2LD-LABEL: @cmp_eq11(
+; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
+; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 3
+; X64_2LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
+; X64_2LD-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 3
+; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP7]]
+; X64_2LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_2LD-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
+; X64_2LD-NEXT:    [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
+; X64_2LD-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
+; X64_2LD-NEXT:    [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X64_2LD-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_2LD-NEXT:    ret i32 [[CONV]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)
   %cmp = icmp eq i32 %call, 0
@@ -868,11 +974,11 @@
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
 ; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
 ; X64_1LD:       loadbb1:
-; X64_1LD-NEXT:    [[TMP6:%.*]] = bitcast i8* [[X]] to i32*
-; X64_1LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[Y]] to i32*
-; X64_1LD-NEXT:    [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2
-; X64_1LD-NEXT:    [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 2
-; X64_1LD-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X64_1LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
+; X64_1LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
+; X64_1LD-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 8
+; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP7]]
 ; X64_1LD-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]]
 ; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
 ; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
@@ -888,11 +994,11 @@
 ; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
 ; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
 ; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
-; X64_2LD-NEXT:    [[TMP6:%.*]] = bitcast i8* [[X]] to i32*
-; X64_2LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[Y]] to i32*
-; X64_2LD-NEXT:    [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2
-; X64_2LD-NEXT:    [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 2
-; X64_2LD-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8
+; X64_2LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32*
+; X64_2LD-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 8
+; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32*
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP7]]
 ; X64_2LD-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]]
 ; X64_2LD-NEXT:    [[TMP12:%.*]] = zext i32 [[TMP10]] to i64
 ; X64_2LD-NEXT:    [[TMP13:%.*]] = zext i32 [[TMP11]] to i64
@@ -911,11 +1017,57 @@
 }
 
 define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
-; ALL-LABEL: @cmp_eq13(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 13)
-; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT:    ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq13(
+; X32-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 13)
+; X32-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X32-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT:    ret i32 [[CONV]]
+;
+; X64_1LD-LABEL: @cmp_eq13(
+; X64_1LD-NEXT:    br label [[LOADBB:%.*]]
+; X64_1LD:       res_block:
+; X64_1LD-NEXT:    br label [[ENDBLOCK:%.*]]
+; X64_1LD:       loadbb:
+; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
+; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD:       loadbb1:
+; X64_1LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 5
+; X64_1LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
+; X64_1LD-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 5
+; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP7]]
+; X64_1LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
+; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD:       endblock:
+; X64_1LD-NEXT:    [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; X64_1LD-NEXT:    [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64_1LD-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_1LD-NEXT:    ret i32 [[CONV]]
+;
+; X64_2LD-LABEL: @cmp_eq13(
+; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
+; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 5
+; X64_2LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
+; X64_2LD-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 5
+; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP7]]
+; X64_2LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_2LD-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
+; X64_2LD-NEXT:    [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
+; X64_2LD-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
+; X64_2LD-NEXT:    [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X64_2LD-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_2LD-NEXT:    ret i32 [[CONV]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)
   %cmp = icmp eq i32 %call, 0
@@ -924,11 +1076,57 @@
 }
 
 define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
-; ALL-LABEL: @cmp_eq14(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 14)
-; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT:    ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq14(
+; X32-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 14)
+; X32-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X32-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT:    ret i32 [[CONV]]
+;
+; X64_1LD-LABEL: @cmp_eq14(
+; X64_1LD-NEXT:    br label [[LOADBB:%.*]]
+; X64_1LD:       res_block:
+; X64_1LD-NEXT:    br label [[ENDBLOCK:%.*]]
+; X64_1LD:       loadbb:
+; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
+; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD:       loadbb1:
+; X64_1LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 6
+; X64_1LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
+; X64_1LD-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 6
+; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP7]]
+; X64_1LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
+; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD:       endblock:
+; X64_1LD-NEXT:    [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; X64_1LD-NEXT:    [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64_1LD-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_1LD-NEXT:    ret i32 [[CONV]]
+;
+; X64_2LD-LABEL: @cmp_eq14(
+; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
+; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 6
+; X64_2LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
+; X64_2LD-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 6
+; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP7]]
+; X64_2LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_2LD-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
+; X64_2LD-NEXT:    [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
+; X64_2LD-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
+; X64_2LD-NEXT:    [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X64_2LD-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_2LD-NEXT:    ret i32 [[CONV]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)
   %cmp = icmp eq i32 %call, 0
@@ -937,11 +1135,57 @@
 }
 
 define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
-; ALL-LABEL: @cmp_eq15(
-; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15)
-; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT:    ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq15(
+; X32-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15)
+; X32-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X32-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT:    ret i32 [[CONV]]
+;
+; X64_1LD-LABEL: @cmp_eq15(
+; X64_1LD-NEXT:    br label [[LOADBB:%.*]]
+; X64_1LD:       res_block:
+; X64_1LD-NEXT:    br label [[ENDBLOCK:%.*]]
+; X64_1LD:       loadbb:
+; X64_1LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64_1LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64_1LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64_1LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
+; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD:       loadbb1:
+; X64_1LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 7
+; X64_1LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
+; X64_1LD-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 7
+; X64_1LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
+; X64_1LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP7]]
+; X64_1LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_1LD-NEXT:    [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
+; X64_1LD-NEXT:    br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD:       endblock:
+; X64_1LD-NEXT:    [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; X64_1LD-NEXT:    [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64_1LD-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_1LD-NEXT:    ret i32 [[CONV]]
+;
+; X64_2LD-LABEL: @cmp_eq15(
+; X64_2LD-NEXT:    [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64_2LD-NEXT:    [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64_2LD-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64_2LD-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64_2LD-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
+; X64_2LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 7
+; X64_2LD-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64*
+; X64_2LD-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i8 7
+; X64_2LD-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64*
+; X64_2LD-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP7]]
+; X64_2LD-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP9]]
+; X64_2LD-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
+; X64_2LD-NEXT:    [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
+; X64_2LD-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
+; X64_2LD-NEXT:    [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
+; X64_2LD-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
+; X64_2LD-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64_2LD-NEXT:    ret i32 [[CONV]]
 ;
   %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)
   %cmp = icmp eq i32 %call, 0
Index: test/tools/llvm-dwarfdump/X86/eh-frame-return-address-reg.s
===================================================================
--- /dev/null
+++ test/tools/llvm-dwarfdump/X86/eh-frame-return-address-reg.s
@@ -0,0 +1,51 @@
+# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux -o %t.o
+# RUN: llvm-dwarfdump -v %t.o | FileCheck %s
+
+# The format of the .eh_frame section is similar in
+# format and purpose to the .debug_frame section.
+# Version 1 is often used for .eh_frame,
+# and also it was used for DWARF v2. For that case,
+# return address register should be encoded as ubyte,
+# while later versions use ULEB128. This test case
+# checks that we are able to dump it correctly.
+
+# CHECK:      .eh_frame contents:
+# CHECK:      00000000 00000010 ffffffff CIE
+# CHECK-NEXT:   Version:               1
+# CHECK-NEXT:   Augmentation:          "zR"
+# CHECK-NEXT:   Code alignment factor: 1
+# CHECK-NEXT:   Data alignment factor: 1
+# CHECK-NEXT:   Return address column: 240
+# CHECK-NEXT:   Augmentation data:     1A
+
+.text
+.global _start
+_start:
+ nop
+
+.section .eh_frame, "a"
+  .long 16   # Size
+  .long 0x00 # ID
+  .byte 0x01 # Version
+
+  .byte 0x7A # Augmentation string: "zR"
+  .byte 0x52
+  .byte 0x00
+
+  .byte 0x01 # Code alignment factor, ULEB128
+  .byte 0x01 # Data alignment factor, ULEB128
+  
+  .byte 0xF0 # Return address register, ubyte for version 1.
+
+  .byte 0x01 # LEB128
+  .byte 0x1A # DW_EH_PE_pcrel | DW_EH_PE_sdata2
+
+  .byte 0x00
+  .byte 0x00
+  .byte 0x00
+
+  .long 10   # Size
+  .long 24   # ID
+fde:
+  .long _start - fde
+  .word 0