Index: include/llvm/MC/MCStreamer.h
===================================================================
--- include/llvm/MC/MCStreamer.h
+++ include/llvm/MC/MCStreamer.h
@@ -115,6 +115,7 @@
 
   virtual void switchVendor(StringRef Vendor);
   virtual void emitAttribute(unsigned Attribute, unsigned Value);
+  virtual void emitCPUAttribute(StringRef CPUName, StringRef ArchName);
   virtual void emitTextAttribute(unsigned Attribute, StringRef String);
   virtual void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
                                     StringRef StringValue = "");
Index: include/llvm/Support/TargetParser.h
===================================================================
--- include/llvm/Support/TargetParser.h
+++ include/llvm/Support/TargetParser.h
@@ -53,7 +53,8 @@
     FK_NEON_FP_ARMV8,
     FK_CRYPTO_NEON_FP_ARMV8,
     FK_SOFTVFP,
-    FK_LAST
+    FK_LAST,
+    FK_DEFAULT = FK_VFPV2, //default gcc's vfp for eabihf and -mfloat-abi=hard
   };
 
   // FPU Version
@@ -117,7 +118,8 @@
     AK_ARMV7L,
     AK_ARMV7HL,
     AK_ARMV7S,
-    AK_LAST
+    AK_LAST,
+    AK_DEFAULT = AK_ARMV4T,  // default gcc's subarch
   };
 
   // Arch extension modifiers for CPUs.
@@ -193,13 +195,14 @@
   static const char * getArchExtName(unsigned ArchExtKind);
   static const char * getHWDivName(unsigned HWDivKind);
   static const char * getDefaultCPU(StringRef Arch);
+  static const char * getGenericCPU(StringRef Arch);
 
   // Parser
   static unsigned parseHWDiv(StringRef HWDiv);
   static unsigned parseFPU(StringRef FPU);
   static unsigned parseArch(StringRef Arch);
   static unsigned parseArchExt(StringRef ArchExt);
-  static unsigned parseCPUArch(StringRef CPU);
+  static unsigned parseCPUArch(StringRef CPU, StringRef Arch);
   static unsigned parseArchISA(StringRef Arch);
   static unsigned parseArchEndian(StringRef Arch);
   static unsigned parseArchProfile(StringRef Arch);
Index: lib/Support/TargetParser.cpp
===================================================================
--- lib/Support/TargetParser.cpp
+++ lib/Support/TargetParser.cpp
@@ -155,17 +155,17 @@
   ARM::FPUKind DefaultFPU;
   bool Default; // is $Name the default CPU for $ArchID ?
 } CPUNames[] = {
-  { "arm2",          ARM::AK_ARMV2,    ARM::FK_NONE,       true },
-  { "arm3",          ARM::AK_ARMV2A,   ARM::FK_NONE,       true },
-  { "arm6",          ARM::AK_ARMV3,    ARM::FK_NONE,       true },
-  { "arm7m",         ARM::AK_ARMV3M,   ARM::FK_NONE,       true },
+  { "arm2",          ARM::AK_ARMV2,    ARM::FK_NONE,       false },
+  { "arm3",          ARM::AK_ARMV2A,   ARM::FK_NONE,       false },
+  { "arm6",          ARM::AK_ARMV3,    ARM::FK_NONE,       false },
+  { "arm7m",         ARM::AK_ARMV3M,   ARM::FK_NONE,       false },
   { "arm8",          ARM::AK_ARMV4,    ARM::FK_NONE,       false },
   { "arm810",        ARM::AK_ARMV4,    ARM::FK_NONE,       false },
-  { "strongarm",     ARM::AK_ARMV4,    ARM::FK_NONE,       true },
+  { "strongarm",     ARM::AK_ARMV4,    ARM::FK_NONE,       false },
   { "strongarm110",  ARM::AK_ARMV4,    ARM::FK_NONE,       false },
   { "strongarm1100", ARM::AK_ARMV4,    ARM::FK_NONE,       false },
   { "strongarm1110", ARM::AK_ARMV4,    ARM::FK_NONE,       false },
-  { "arm7tdmi",      ARM::AK_ARMV4T,   ARM::FK_NONE,       true },
+  { "arm7tdmi",      ARM::AK_ARMV4T,   ARM::FK_NONE,       false },
   { "arm7tdmi-s",    ARM::AK_ARMV4T,   ARM::FK_NONE,       false },
   { "arm710t",       ARM::AK_ARMV4T,   ARM::FK_NONE,       false },
   { "arm720t",       ARM::AK_ARMV4T,   ARM::FK_NONE,       false },
@@ -177,7 +177,7 @@
   { "arm9312",       ARM::AK_ARMV4T,   ARM::FK_NONE,       false },
   { "arm940t",       ARM::AK_ARMV4T,   ARM::FK_NONE,       false },
   { "ep9312",        ARM::AK_ARMV4T,   ARM::FK_NONE,       false },
-  { "arm10tdmi",     ARM::AK_ARMV5T,   ARM::FK_NONE,       true },
+  { "arm10tdmi",     ARM::AK_ARMV5T,   ARM::FK_NONE,       false },
   { "arm1020t",      ARM::AK_ARMV5T,   ARM::FK_NONE,       false },
   { "arm9e",         ARM::AK_ARMV5TE,  ARM::FK_NONE,       false },
   { "arm946e-s",     ARM::AK_ARMV5TE,  ARM::FK_NONE,       false },
@@ -185,59 +185,58 @@
   { "arm968e-s",     ARM::AK_ARMV5TE,  ARM::FK_NONE,       false },
   { "arm10e",        ARM::AK_ARMV5TE,  ARM::FK_NONE,       false },
   { "arm1020e",      ARM::AK_ARMV5TE,  ARM::FK_NONE,       false },
-  { "arm1022e",      ARM::AK_ARMV5TE,  ARM::FK_NONE,       true },
+  { "arm1022e",      ARM::AK_ARMV5TE,  ARM::FK_NONE,       false },
   { "iwmmxt",        ARM::AK_ARMV5TE,  ARM::FK_NONE,       false },
   { "xscale",        ARM::AK_ARMV5TE,  ARM::FK_NONE,       false },
-  { "arm926ej-s",    ARM::AK_ARMV5TEJ, ARM::FK_NONE,       true },
-  { "arm1136jf-s",   ARM::AK_ARMV6,    ARM::FK_VFPV2,      true },
+  { "arm926ej-s",    ARM::AK_ARMV5TEJ, ARM::FK_NONE,       false },
+  { "arm1136jf-s",   ARM::AK_ARMV6,    ARM::FK_VFPV2,      false },
   { "arm1176j-s",    ARM::AK_ARMV6K,   ARM::FK_NONE,       false },
   { "arm1176jz-s",   ARM::AK_ARMV6K,   ARM::FK_NONE,       false },
   { "mpcore",        ARM::AK_ARMV6K,   ARM::FK_VFPV2,      false },
   { "mpcorenovfp",   ARM::AK_ARMV6K,   ARM::FK_NONE,       false },
-  { "arm1176jzf-s",  ARM::AK_ARMV6K,   ARM::FK_VFPV2,      true },
-  { "arm1176jzf-s",  ARM::AK_ARMV6Z,   ARM::FK_VFPV2,      true },
-  { "arm1176jzf-s",  ARM::AK_ARMV6ZK,  ARM::FK_VFPV2,      true },
-  { "arm1156t2-s",   ARM::AK_ARMV6T2,  ARM::FK_NONE,       true },
+  { "arm1176jzf-s",  ARM::AK_ARMV6K,   ARM::FK_VFPV2,      false },
+  { "arm1176jzf-s",  ARM::AK_ARMV6Z,   ARM::FK_VFPV2,      false },
+  { "arm1176jzf-s",  ARM::AK_ARMV6ZK,  ARM::FK_VFPV2,      false },
+  { "arm1156t2-s",   ARM::AK_ARMV6T2,  ARM::FK_NONE,       false },
   { "arm1156t2f-s",  ARM::AK_ARMV6T2,  ARM::FK_VFPV2,      false },
-  { "cortex-m0",     ARM::AK_ARMV6M,   ARM::FK_NONE,       true },
+  { "cortex-m0",     ARM::AK_ARMV6M,   ARM::FK_NONE,       false },
   { "cortex-m0plus", ARM::AK_ARMV6M,   ARM::FK_NONE,       false },
   { "cortex-m1",     ARM::AK_ARMV6M,   ARM::FK_NONE,       false },
   { "sc000",         ARM::AK_ARMV6M,   ARM::FK_NONE,       false },
   { "cortex-a5",     ARM::AK_ARMV7A,   ARM::FK_NEON_VFPV4, false },
   { "cortex-a7",     ARM::AK_ARMV7A,   ARM::FK_NEON_VFPV4, false },
-  { "cortex-a8",     ARM::AK_ARMV7A,   ARM::FK_NEON,       true },
+  { "cortex-a8",     ARM::AK_ARMV7A,   ARM::FK_NEON,       false },
   { "cortex-a9",     ARM::AK_ARMV7A,   ARM::FK_NEON_FP16,  false },
   { "cortex-a12",    ARM::AK_ARMV7A,   ARM::FK_NEON_VFPV4, false },
   { "cortex-a15",    ARM::AK_ARMV7A,   ARM::FK_NEON_VFPV4, false },
   { "cortex-a17",    ARM::AK_ARMV7A,   ARM::FK_NEON_VFPV4, false },
   { "krait",         ARM::AK_ARMV7A,   ARM::FK_NEON_VFPV4, false },
-  { "cortex-r4",     ARM::AK_ARMV7R,   ARM::FK_NONE,       true },
+  { "cortex-r4",     ARM::AK_ARMV7R,   ARM::FK_NONE,       false },
   { "cortex-r4f",    ARM::AK_ARMV7R,   ARM::FK_VFPV3_D16,  false },
   { "cortex-r5",     ARM::AK_ARMV7R,   ARM::FK_VFPV3_D16,      false },
   { "cortex-r7",     ARM::AK_ARMV7R,   ARM::FK_VFPV3_D16_FP16, false },
   { "sc300",         ARM::AK_ARMV7M,   ARM::FK_NONE,           false },
-  { "cortex-m3",     ARM::AK_ARMV7M,   ARM::FK_NONE,           true },
-  { "cortex-m4",     ARM::AK_ARMV7EM,  ARM::FK_FPV4_SP_D16,    true },
+  { "cortex-m3",     ARM::AK_ARMV7M,   ARM::FK_NONE,           false },
+  { "cortex-m4",     ARM::AK_ARMV7EM,  ARM::FK_FPV4_SP_D16,    false },
   { "cortex-m7",     ARM::AK_ARMV7EM,  ARM::FK_FPV5_D16,             false },
-  { "cortex-a53",    ARM::AK_ARMV8A,   ARM::FK_CRYPTO_NEON_FP_ARMV8, true },
+  { "cortex-a53",    ARM::AK_ARMV8A,   ARM::FK_CRYPTO_NEON_FP_ARMV8, false },
   { "cortex-a57",    ARM::AK_ARMV8A,   ARM::FK_CRYPTO_NEON_FP_ARMV8, false },
   { "cortex-a72",    ARM::AK_ARMV8A,   ARM::FK_CRYPTO_NEON_FP_ARMV8, false },
   { "cyclone",       ARM::AK_ARMV8A,   ARM::FK_CRYPTO_NEON_FP_ARMV8, false },
-  { "generic",       ARM::AK_ARMV8_1A, ARM::FK_NEON_FP_ARMV8,        true },
   // Non-standard Arch names.
-  { "iwmmxt",        ARM::AK_IWMMXT,   ARM::FK_NONE,       true },
-  { "xscale",        ARM::AK_XSCALE,   ARM::FK_NONE,       true },
-  { "arm10tdmi",     ARM::AK_ARMV5,    ARM::FK_NONE,       true },
-  { "arm1022e",      ARM::AK_ARMV5E,   ARM::FK_NONE,       true },
-  { "arm1136j-s",    ARM::AK_ARMV6J,   ARM::FK_NONE,       true },
+  { "iwmmxt",        ARM::AK_IWMMXT,   ARM::FK_NONE,       false },
+  { "xscale",        ARM::AK_XSCALE,   ARM::FK_NONE,       false },
+  { "arm10tdmi",     ARM::AK_ARMV5,    ARM::FK_NONE,       false },
+  { "arm1022e",      ARM::AK_ARMV5E,   ARM::FK_NONE,       false },
+  { "arm1136j-s",    ARM::AK_ARMV6J,   ARM::FK_NONE,       false },
   { "arm1136jz-s",   ARM::AK_ARMV6J,   ARM::FK_NONE,       false },
-  { "cortex-m0",     ARM::AK_ARMV6SM,  ARM::FK_NONE,       true },
-  { "arm1176jzf-s",  ARM::AK_ARMV6HL,  ARM::FK_VFPV2,      true },
-  { "cortex-a8",     ARM::AK_ARMV7,    ARM::FK_NEON,       true },
-  { "cortex-a8",     ARM::AK_ARMV7L,   ARM::FK_NEON,       true },
-  { "cortex-a8",     ARM::AK_ARMV7HL,  ARM::FK_NEON,       true },
-  { "cortex-m4",     ARM::AK_ARMV7EM,  ARM::FK_NONE,       true },
+  { "cortex-m0",     ARM::AK_ARMV6SM,  ARM::FK_NONE,       false },
+  { "arm1176jzf-s",  ARM::AK_ARMV6HL,  ARM::FK_VFPV2,      false },
+  { "cortex-a8",     ARM::AK_ARMV7,    ARM::FK_NEON,       false },
+  { "cortex-a8",     ARM::AK_ARMV7L,   ARM::FK_NEON,       false },
+  { "cortex-a8",     ARM::AK_ARMV7HL,  ARM::FK_NEON,       false },
   { "swift",         ARM::AK_ARMV7S,   ARM::FK_NEON_VFPV4, true },
+
   // Invalid CPU
   { "invalid",       ARM::AK_INVALID,  ARM::FK_INVALID,    true }
 };
@@ -423,14 +422,19 @@
 const char *ARMTargetParser::getDefaultCPU(StringRef Arch) {
   unsigned AK = parseArch(Arch);
   if (AK == ARM::AK_INVALID)
-    return nullptr;
+    return getGenericCPU(Arch);
 
   // Look for multiple AKs to find the default for pair AK+Name.
   for (const auto CPU : CPUNames) {
     if (CPU.ArchID == AK && CPU.Default)
       return CPU.Name;
   }
-  return nullptr;
+  // Likewise GCC, for common arches, default cpu is generic
+  return getGenericCPU(Arch);
+}
+
+const char *ARMTargetParser::getGenericCPU(StringRef Arch) {
+    return "generic";
 }
 
 // ======================================================= //
@@ -510,7 +514,7 @@
 
   // Empty string means offset reached the end, which means it's valid.
   if (A.empty())
-    return Arch;
+    return getSubArch(ARM::AK_DEFAULT);
 
   // Only match non-marketing names
   if (offset != StringRef::npos) {
@@ -563,7 +567,10 @@
   return ARM::AEK_INVALID;
 }
 
-unsigned ARMTargetParser::parseCPUArch(StringRef CPU) {
+unsigned ARMTargetParser::parseCPUArch(StringRef CPU, StringRef Arch) {
+  if (CPU == getGenericCPU(Arch))
+    return parseArch(Arch);
+
   for (const auto C : CPUNames) {
     if (CPU == C.Name)
       return C.ArchID;
Index: lib/Support/Triple.cpp
===================================================================
--- lib/Support/Triple.cpp
+++ lib/Support/Triple.cpp
@@ -1303,12 +1303,14 @@
   if (MArch.empty())
     return nullptr;
 
-  const char *CPU = ARMTargetParser::getDefaultCPU(MArch);
-  if (CPU)
-    return CPU;
+  const char *DefaultCPU = ARMTargetParser::getDefaultCPU(MArch);
+  if (strcmp(DefaultCPU, ARMTargetParser::getGenericCPU(MArch)))
+    return DefaultCPU;
 
-  // If no specific architecture version is requested, return the minimum CPU
+  // If architecture version requested is too low, return the minimum CPU
   // required by the OS and environment.
+  unsigned ArchVersion = ARMTargetParser::parseArchVersion(MArch);
+
   switch (getOS()) {
   case llvm::Triple::NetBSD:
     switch (getEnvironment()) {
@@ -1316,20 +1318,23 @@
     case llvm::Triple::GNUEABI:
     case llvm::Triple::EABIHF:
     case llvm::Triple::EABI:
-      return "arm926ej-s";
+      if (ArchVersion <= 6)
+        return "arm926ej-s";
+      return DefaultCPU;
     default:
-      return "strongarm";
+      if (ArchVersion <= 4)
+        return "strongarm";
+      return DefaultCPU;
     }
   case llvm::Triple::NaCl:
-    return "cortex-a8";
-  default:
-    switch (getEnvironment()) {
-    case llvm::Triple::EABIHF:
-    case llvm::Triple::GNUEABIHF:
-      return "arm1176jzf-s";
+    switch (ArchVersion) {
+    case 6:
+      return "arm1136jf-s";
     default:
-      return "arm7tdmi";
+      return "cortex-a8";
     }
+  default:
+    return DefaultCPU;
   }
 
   llvm_unreachable("invalid arch name");
Index: lib/Target/ARM/ARM.td
===================================================================
--- lib/Target/ARM/ARM.td
+++ lib/Target/ARM/ARM.td
@@ -361,6 +361,7 @@
                                                        FeatureDB, FeatureMClass]>;
 
 // V6K Processors.
+def : Processor<"arm1176j-s",       ARMV6Itineraries, [HasV6KOps]>;
 def : Processor<"arm1176jz-s",      ARMV6Itineraries, [HasV6KOps]>;
 def : Processor<"arm1176jzf-s",     ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
                                                        FeatureHasSlowFPVMLx]>;
Index: lib/Target/ARM/ARMAsmPrinter.cpp
===================================================================
--- lib/Target/ARM/ARMAsmPrinter.cpp
+++ lib/Target/ARM/ARMAsmPrinter.cpp
@@ -556,6 +556,7 @@
   // FIXME: For ifunc related functions we could iterate over and look
   // for a feature string that doesn't match the default one.
   const Triple &TT = TM.getTargetTriple();
+  StringRef Arch = TT.getArchName();
   StringRef CPU = TM.getTargetCPU();
   StringRef FS = TM.getTargetFeatureString();
   std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
@@ -571,17 +572,15 @@
 
   std::string CPUString = STI.getCPUString();
 
-  if (CPUString.find("generic") != 0) { //CPUString doesn't start with "generic"
-    // FIXME: remove krait check when GNU tools support krait cpu
-    if (STI.isKrait()) {
-      ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
-      // We consider krait as a "cortex-a9" + hwdiv CPU
-      // Enable hwdiv through ".arch_extension idiv"
-      if (STI.hasDivide() || STI.hasDivideInARMMode())
-        ATS.emitArchExtension(ARM::AEK_HWDIV | ARM::AEK_HWDIVARM);
-    } else
-      ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
-  }
+  // FIXME: remove krait check when GNU tools support krait cpu
+  if (STI.isKrait()) {
+    ATS.emitCPUAttribute("cortex-a9", Arch);
+    // We consider krait as a "cortex-a9" + hwdiv CPU
+    // Enable hwdiv through ".arch_extension idiv"
+    if (STI.hasDivide() || STI.hasDivideInARMMode())
+      ATS.emitArchExtension(ARM::AEK_HWDIV | ARM::AEK_HWDIVARM);
+  } else
+    ATS.emitCPUAttribute(CPUString, Arch);
 
   ATS.emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(CPUString, &STI));
 
Index: lib/Target/ARM/ARMSubtarget.cpp
===================================================================
--- lib/Target/ARM/ARMSubtarget.cpp
+++ lib/Target/ARM/ARMSubtarget.cpp
@@ -27,6 +27,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetParser.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -148,13 +149,12 @@
 }
 
 void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
-  if (CPUString.empty()) {
-    if (isTargetDarwin() && TargetTriple.getArchName().endswith("v7s"))
-      // Default to the Swift CPU when targeting armv7s/thumbv7s.
-      CPUString = "swift";
-    else
-      CPUString = "generic";
-  }
+  StringRef ArchName = TargetTriple.getArchName();
+  if (CPUString.empty() ||
+      CPUString == ARMTargetParser::getGenericCPU(ArchName))
+    // change "generic" for default CPU. This makes sense for ex, for armv7s,
+    // or forced minimum OS/ABI requirements
+    CPUString = TargetTriple.getARMCPUForArch(ArchName);
 
   // Insert the architecture feature derived from the target triple into the
   // feature string. This is important for setting features that are implied
Index: lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
===================================================================
--- lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -77,6 +77,7 @@
 
   void switchVendor(StringRef Vendor) override;
   void emitAttribute(unsigned Attribute, unsigned Value) override;
+  void emitCPUAttribute(StringRef CPUName, StringRef ArchName) override;
   void emitTextAttribute(unsigned Attribute, StringRef String) override;
   void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
                             StringRef StrinValue) override;
@@ -162,6 +163,17 @@
   }
   OS << "\n";
 }
+void ARMTargetAsmStreamer::emitCPUAttribute(StringRef CPUName,
+                                         StringRef ArchName) {
+  if (CPUName == ARMTargetParser::getGenericCPU(ArchName)) {
+    // emit armXXX instead of thumbXXX
+    OS << "\t.arch\tarmv";
+    const char * CPUAttr = ARMTargetParser::getCPUAttr(
+        ARMTargetParser::parseCPUArch(CPUName, ArchName));
+    OS << StringRef(CPUAttr).lower() << "\n";
+  } else
+    OS << "\t.cpu\t" << CPUName.lower() << "\n";
+}
 void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute,
                                              StringRef String) {
   switch (Attribute) {
@@ -373,6 +385,7 @@
 
   void switchVendor(StringRef Vendor) override;
   void emitAttribute(unsigned Attribute, unsigned Value) override;
+  void emitCPUAttribute(StringRef CPUName, StringRef ArchName) override;
   void emitTextAttribute(unsigned Attribute, StringRef String) override;
   void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
                             StringRef StringValue) override;
@@ -664,6 +677,15 @@
 void ARMTargetELFStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
   setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true);
 }
+void ARMTargetELFStreamer::emitCPUAttribute(StringRef CPUName,
+                                         StringRef ArchName) {
+  if (CPUName == ARMTargetParser::getGenericCPU(ArchName))
+    CPUName =
+        ARMTargetParser::getCPUAttr(ARMTargetParser::parseCPUArch(CPUName,
+                                                                  ArchName));
+  setAttributeItem(ARMBuildAttrs::CPU_name, CPUName,
+                   /* OverwriteExisting= */ true);
+}
 void ARMTargetELFStreamer::emitTextAttribute(unsigned Attribute,
                                              StringRef Value) {
   setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true);
Index: lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
===================================================================
--- lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -24,6 +24,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetParser.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
@@ -134,100 +135,106 @@
   bool isThumb =
       TT.getArch() == Triple::thumb || TT.getArch() == Triple::thumbeb;
 
-  bool NoCPU = CPU == "generic" || CPU.empty();
+  StringRef ArchName = TT.getArchName();
+  bool NoCPU = CPU.empty() ||
+               CPU == llvm::ARMTargetParser::getGenericCPU(ArchName);
   std::string ARMArchFeature;
-  switch (TT.getSubArch()) {
-  default:
-    llvm_unreachable("invalid sub-architecture for ARM");
-  case Triple::ARMSubArch_v8:
-    if (NoCPU)
-      // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
-      //      FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
-      //      FeatureT2XtPk, FeatureCrypto, FeatureCRC
-      ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
-                       "+trustzone,+t2xtpk,+crypto,+crc";
-    else
-      // Use CPU to figure out the exact features
-      ARMArchFeature = "+v8";
-    break;
-  case Triple::ARMSubArch_v8_1a:
-    if (NoCPU)
-      // v8.1a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
-      //      FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
-      //      FeatureT2XtPk, FeatureCrypto, FeatureCRC, FeatureV8_1a
-      ARMArchFeature = "+v8.1a,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
-                       "+trustzone,+t2xtpk,+crypto,+crc";
-    else
-      // Use CPU to figure out the exact features
-      ARMArchFeature = "+v8.1a";
-    break;
-  case Triple::ARMSubArch_v7m:
-    isThumb = true;
-    if (NoCPU)
-      // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
-      ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
-    else
-      // Use CPU to figure out the exact features.
-      ARMArchFeature = "+v7";
-    break;
-  case Triple::ARMSubArch_v7em:
-    if (NoCPU)
-      // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
-      //       FeatureT2XtPk, FeatureMClass
-      ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,+t2xtpk,+mclass";
-    else
-      // Use CPU to figure out the exact features.
-      ARMArchFeature = "+v7";
-    break;
-  case Triple::ARMSubArch_v7s:
-    if (NoCPU)
-      // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS
-      //      Swift
-      ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+ras";
-    else
-      // Use CPU to figure out the exact features.
-      ARMArchFeature = "+v7";
-    break;
-  case Triple::ARMSubArch_v7:
-    // v7 CPUs have lots of different feature sets. If no CPU is specified,
-    // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
-    // the "minimum" feature set and use CPU string to figure out the exact
-    // features.
-    if (NoCPU)
-      // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
-      ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
-    else
-      // Use CPU to figure out the exact features.
-      ARMArchFeature = "+v7";
-    break;
-  case Triple::ARMSubArch_v6t2:
-    ARMArchFeature = "+v6t2";
-    break;
-  case Triple::ARMSubArch_v6k:
-    ARMArchFeature = "+v6k";
-    break;
-  case Triple::ARMSubArch_v6m:
-    isThumb = true;
-    if (NoCPU)
-      // v6m: FeatureNoARM, FeatureMClass
-      ARMArchFeature = "+v6m,+noarm,+mclass";
-    else
+  if (NoCPU) {
+    switch (TT.getSubArch()) {
+    default:
+      llvm_unreachable("invalid sub-architecture for ARM");
+    case Triple::ARMSubArch_v8:
+      ARMArchFeature = "+v8,+aclass,+db,+trustzone";
+      break;
+    case Triple::ARMSubArch_v8_1a:
+      ARMArchFeature = "+v8.1a,+aclass,+db,+trustzone";
+      break;
+    case Triple::ARMSubArch_v7m:
+      isThumb = true;
+      ARMArchFeature = "+v7,+mclass,+db,+noarm";
+      break;
+    case Triple::ARMSubArch_v7em:
+      isThumb = true;
+      ARMArchFeature = "+v7,+mclass,+db,+noarm,+t2dsp,+t2xtpk";
+      break;
+    case Triple::ARMSubArch_v7s:
+      ARMArchFeature = "+v7,+swift,+db,+neon,+t2dsp,+ras";
+      break;
+    case Triple::ARMSubArch_v7:
+      ARMArchFeature = "+v7,+db";
+      switch (ARMTargetParser::parseArch(ArchName)){
+      default:
+        break;
+      case ARM::AK_ARMV7A:
+        ARMArchFeature += ",+aclass";
+        break;
+      case ARM::AK_ARMV7R:
+        ARMArchFeature += ",+rclass";
+        break;
+      }
+      break;
+    case Triple::ARMSubArch_v6t2:
+      ARMArchFeature = "+v6t2";
+      break;
+    case Triple::ARMSubArch_v6k:
+      ARMArchFeature = "+v6k";
+      break;
+    case Triple::ARMSubArch_v6m:
+      isThumb = true;
+      ARMArchFeature = "+v6m,+mclass,+db,+noarm";
+      break;
+    case Triple::ARMSubArch_v6:
       ARMArchFeature = "+v6";
+      break;
+    case Triple::ARMSubArch_v5te:
+      ARMArchFeature = "+v5te";
+      break;
+    case Triple::ARMSubArch_v5:
+      ARMArchFeature = "+v5t";
+      break;
+    case Triple::ARMSubArch_v4t:
+      ARMArchFeature = "+v4t";
+      break;
+    case Triple::NoSubArch:
+      break;
+    }
+
+    if (TT.getVendor() == Triple::Apple)
+      switch (TT.getSubArch()) {
+      default: 
+        break;
+      case Triple::ARMSubArch_v8:
+      case Triple::ARMSubArch_v8_1a:
+        if (!ARMArchFeature.empty())
+          ARMArchFeature += ",";
+        ARMArchFeature += "+neon,+fp-armv8,+fp16";
+        break;
+      case Triple::ARMSubArch_v7:
+        if (!ARMArchFeature.empty())
+          ARMArchFeature += ",";
+        ARMArchFeature += "+neon,+vfp3";
+        break;
+      }
+  }
+
+  switch (TT.getEnvironment()) {
+  default:
     break;
-  case Triple::ARMSubArch_v6:
-    ARMArchFeature = "+v6";
-    break;
-  case Triple::ARMSubArch_v5te:
-    ARMArchFeature = "+v5te";
-    break;
-  case Triple::ARMSubArch_v5:
-    ARMArchFeature = "+v5t";
-    break;
-  case Triple::ARMSubArch_v4t:
-    ARMArchFeature = "+v4t";
-    break;
-  case Triple::NoSubArch:
-    break;
+  case llvm::Triple::GNUEABIHF:
+  case llvm::Triple::EABIHF:
+    if (!ARMArchFeature.empty())
+      ARMArchFeature += ",";
+    switch (ARMTargetParser::parseArchVersion(ArchName)) {
+    default:
+      ARMArchFeature += "+vfp2";
+      break;
+    case 7:
+      ARMArchFeature += "+vfp3";
+      break;
+    case 8:
+      ARMArchFeature += "+fp-armv8";
+      break;
+    }
   }
 
   if (isThumb) {
Index: lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
===================================================================
--- lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -57,6 +57,8 @@
 }
 void ARMTargetStreamer::switchVendor(StringRef Vendor) {}
 void ARMTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) {}
+void ARMTargetStreamer::emitCPUAttribute(StringRef CPUName,
+                                         StringRef ArchName) {}
 void ARMTargetStreamer::emitTextAttribute(unsigned Attribute,
                                           StringRef String) {}
 void ARMTargetStreamer::emitIntTextAttribute(unsigned Attribute,
Index: test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
===================================================================
--- test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
+++ test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -regalloc=fast
+; RUN: llc < %s -O0 -verify-machineinstrs -regalloc=fast -mattr=+neon
 ; Previously we'd crash as out of registers on this input by clobbering all of
 ; the aliases.
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
Index: test/CodeGen/ARM/2012-08-09-neon-extload.ll
===================================================================
--- test/CodeGen/ARM/2012-08-09-neon-extload.ll
+++ test/CodeGen/ARM/2012-08-09-neon-extload.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=armv7-none-linux-gnueabi < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-none-linux-gnueabi -mattr=+neon < %s | FileCheck %s
 
 @var_v2i8 = global <2 x i8> zeroinitializer
 @var_v4i8 = global <4 x i8> zeroinitializer
Index: test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
===================================================================
--- test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
+++ test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon| FileCheck %s
 ; Test that we correctly use registers and align elements when using va_arg
 
 %struct_t = type { double, double, double }
Index: test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
===================================================================
--- test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
+++ test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon| FileCheck %s
 
 @.str = private unnamed_addr constant [12 x i8] c"val.a = %f\0A\00"
 %struct_t = type { double, double, double }
Index: test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
===================================================================
--- test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
+++ test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
@@ -9,7 +9,7 @@
 ;CHECK: 	bl	fooUseParam
 ;CHECK: 	pop	{r11, lr}
 ;CHECK: 	add	sp, sp, #16
-;CHECK: 	mov	pc, lr
+;CHECK: 	bx lr
 
 ;CHECK-LABEL: foo2:
 ;CHECK: 	sub	sp, sp, #16
@@ -22,7 +22,7 @@
 ;CHECK: 	bl	fooUseParam
 ;CHECK: 	pop	{r11, lr}
 ;CHECK: 	add	sp, sp, #16
-;CHECK: 	mov	pc, lr
+;CHECK: 	bx lr
 
 ;CHECK-LABEL: doFoo:
 ;CHECK: 	push	{r11, lr}
@@ -30,7 +30,7 @@
 ;CHECK: 	ldr	r0, [r0]
 ;CHECK: 	bl	foo
 ;CHECK: 	pop	{r11, lr}
-;CHECK: 	mov	pc, lr
+;CHECK:     bx  lr
 
 
 ;CHECK-LABEL: doFoo2:
@@ -41,7 +41,7 @@
 ;CHECK: 	mov	r2, r0
 ;CHECK: 	bl	foo2
 ;CHECK: 	pop	{r11, lr}
-;CHECK: 	mov	pc, lr
+;CHECK:     bx  lr
 
 
 %artz = type { i32 }
Index: test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll
===================================================================
--- test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll
+++ test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll
@@ -51,7 +51,7 @@
 ;  foo( 1,2,3,4,5,6,7,8,9, 43 );
 ;}
 
-;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard -mattr=+neon< %s | FileCheck %s
 ;
 ;CHECK-LABEL:     foo:
 ;CHECK-NOT:     mov r0
Index: test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll
===================================================================
--- test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll
+++ test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll
@@ -14,7 +14,7 @@
 ;please, read 5.5 Parameter Passing, Stage C, stages C.2.cp, C.4 and C.5
 ;
 ;
-;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard -mattr=+neon < %s | FileCheck %s
 
 %struct_t = type { i32, i32, i32, i32 }
 @static_val = constant %struct_t { i32 777, i32 888, i32 999, i32 1000 }
Index: test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
===================================================================
--- test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
+++ test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
@@ -5,7 +5,7 @@
 ;This test is simplified IR version of
 ;test-suite/SingleSource/UnitTests/2002-05-02-ManyArguments.c
 
-;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard -mattr=+neon < %s | FileCheck %s
 
 @.str = private unnamed_addr constant [13 x i8] c"%d %d %f %i\0A\00", align 1
 
Index: test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
===================================================================
--- test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
+++ test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
@@ -1,6 +1,6 @@
 ;Check AAPCS, 5.5 Parameters Passing, C4 and C5 rules.
 ;Check case when NSAA != 0, and NCRN < R4, NCRN+ParamSize < R4
-;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard -mattr=+neon < %s | FileCheck %s
 
 %st_t = type { i32, i32 }
 @static_val = constant %st_t { i32 777, i32 888}
Index: test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll
===================================================================
--- test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll
+++ test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll
@@ -1,6 +1,6 @@
 ;Check AAPCS, 5.5 Parameters Passing, C4 and C5 rules.
 ;Check case when NSAA != 0, and NCRN < R4, NCRN+ParamSize > R4
-;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard -mattr=+neon< %s | FileCheck %s
 
 %st_t = type { i32, i32, i32, i32 }
 @static_val = constant %st_t { i32 777, i32 888, i32 787, i32 878}
Index: test/CodeGen/ARM/2014-02-05-vfp-regs-after-stack.ll
===================================================================
--- test/CodeGen/ARM/2014-02-05-vfp-regs-after-stack.ll
+++ test/CodeGen/ARM/2014-02-05-vfp-regs-after-stack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o - -filetype=asm | FileCheck %s
+; RUN: llc < %s -o - -filetype=asm -mattr=+neon | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
 target triple = "armv8-none--eabi"
Index: test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll
===================================================================
--- test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll
+++ test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll
@@ -75,7 +75,7 @@
 ; CHECK: bl      usePtr
 ; CHECK: pop     {r11, lr}
 ; CHECK: add     sp, sp, #16
-; CHECK: mov     pc, lr
+; CHECK: bx      lr
 
   call void @usePtr(%struct8bytes8align* %b)
   ret void
@@ -94,7 +94,7 @@
 ; CHECK: bl      usePtr
 ; CHECK: pop     {r11, lr}
 ; CHECK: add     sp, sp, #16
-; CHECK: mov     pc, lr
+; CHECK: bx      lr
 
   call void @usePtr(%struct8bytes8align* %a)
   ret void
@@ -108,7 +108,7 @@
 ; CHECK: add  r0, sp, #8
 ; CHECK: bl   usePtr
 ; CHECK: pop  {r11, lr}
-; CHECK: mov  pc, lr
+; CHECK: bx   lr
 
   call void @usePtr(%struct8bytes8align* %d)
   ret void
Index: test/CodeGen/ARM/Windows/alloca.ll
===================================================================
--- test/CodeGen/ARM/Windows/alloca.ll
+++ test/CodeGen/ARM/Windows/alloca.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -mtriple thumbv7-windows-itanium -filetype asm -o - %s | FileCheck %s
+; RUN: llc -O0 -mtriple thumbv7-windows-itanium -mattr=+neon -filetype asm -o - %s | FileCheck %s
 
 declare arm_aapcs_vfpcc i32 @num_entries()
 
@@ -16,7 +16,7 @@
 ; CHECK: movs [[R1:r[0-9]+]], #7
 ; CHECK: add.w [[R0:r[0-9]+]], [[R1]], [[R0]], lsl #2
 ; CHECK: bic [[R0]], [[R0]], #7
-; CHECK: lsrs r4, [[R0]], #2
+; CHECK: lsr.w r4, [[R0]], #2
 ; CHECK: bl __chkstk
 ; CHECK: sub.w sp, sp, r4
 
Index: test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
===================================================================
--- test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
+++ test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
@@ -19,9 +19,9 @@
 
 ; CHECK-LABEL: isel
 ; CHECK: push {r4, r5}
-; CHECK: movw r4, #{{\d*}}
 ; CHECK: movw r12, #0
 ; CHECK: movt r12, #0
+; CHECK: movw r4, #{{\d*}}
 ; CHECK: blx r12
 ; CHECK: sub.w sp, sp, r4
 
Index: test/CodeGen/ARM/aapcs-hfa-code.ll
===================================================================
--- test/CodeGen/ARM/aapcs-hfa-code.ll
+++ test/CodeGen/ARM/aapcs-hfa-code.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-linux-gnueabihf -o - | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-linux-gnueabihf -mattr=+neon -o - | FileCheck %s
 ; RUN: llc < %s -mtriple=thumbv7em-none-eabi -mcpu=cortex-m4 | FileCheck %s --check-prefix=CHECK-M4F
 
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
Index: test/CodeGen/ARM/aapcs-hfa.ll
===================================================================
--- test/CodeGen/ARM/aapcs-hfa.ll
+++ test/CodeGen/ARM/aapcs-hfa.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -float-abi=hard -debug-only arm-isel 2>&1 | FileCheck %s
-; RUN: llc < %s -float-abi=soft -debug-only arm-isel 2>&1 | FileCheck %s --check-prefix=SOFT
+; RUN: llc < %s -float-abi=hard -mattr=+neon -debug-only arm-isel 2>&1 | FileCheck %s
+; RUN: llc < %s -float-abi=soft -mattr=+neon -debug-only arm-isel 2>&1 | FileCheck %s --check-prefix=SOFT
 ; REQUIRES: asserts
 
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
Index: test/CodeGen/ARM/aggregate-padding.ll
===================================================================
--- test/CodeGen/ARM/aggregate-padding.ll
+++ test/CodeGen/ARM/aggregate-padding.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv7-linux-gnueabihf -mattr=+neon %s -o - | FileCheck %s
 
 ; [2 x i64] should be contiguous when split (e.g. we shouldn't try to align all
 ; i32 components to 64 bits). Also makes sure i64 based types are properly
Index: test/CodeGen/ARM/arguments.ll
===================================================================
--- test/CodeGen/ARM/arguments.ll
+++ test/CodeGen/ARM/arguments.ll
@@ -28,10 +28,10 @@
 ; unused and the value stored in [sp]
 ; ELF-LABEL: f3:
 ; ELF: ldr r0, [sp]
-; ELF-NEXT: mov pc, lr
+; ELF-NEXT: bx lr
 ; DARWIN-LABEL: f3:
 ; DARWIN: mov r0, r3
-; DARWIN-NEXT: mov pc, lr
+; DARWIN-NEXT: bx lr
 define i32 @f3(i32 %i, i32 %j, i32 %k, i64 %l, ...) {
 entry:
   %0 = trunc i64 %l to i32
Index: test/CodeGen/ARM/arm-shrink-wrapping.ll
===================================================================
--- test/CodeGen/ARM/arm-shrink-wrapping.ll
+++ test/CodeGen/ARM/arm-shrink-wrapping.ll
@@ -515,7 +515,7 @@
 ;
 ; ENABLE: push
 ;
-; CHECK: bl{{x?}} _abort
+; CHECK: b{{l?}}{{x?}} _abort
 ; ENABLE-NOT: pop
 define i32 @noreturn(i8 signext %bad_thing) {
 entry:
Index: test/CodeGen/ARM/build-attributes.ll
===================================================================
--- test/CodeGen/ARM/build-attributes.ll
+++ test/CodeGen/ARM/build-attributes.ll
@@ -7,8 +7,8 @@
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=V6M
 ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mattr=+strict-align  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST
-; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=V6M
-; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi -mattr=+strict-align -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST
+; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=V6SM
+; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi -mattr=+strict-align -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6SM-FAST
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -mattr=+strict-align | FileCheck %s --check-prefix=ARM1156T2F-S
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -mattr=+strict-align  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast  | FileCheck %s --check-prefix=ARM1156T2F-S-FAST
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
@@ -16,6 +16,9 @@
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V7M-FAST
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=V7
+; RUN: llc < %s -mtriple=armv7a-linux-gnueabi | FileCheck %s --check-prefix=V7A
+; RUN: llc < %s -mtriple=armv7r-linux-gnueabi | FileCheck %s --check-prefix=V7R
+; RUN: llc < %s -mtriple=armv7s-linux-gnueabi | FileCheck %s --check-prefix=V7S
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V7-FAST
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8
@@ -23,10 +26,10 @@
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi | FileCheck %s --check-prefix=Vt8
 ; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-neon,-crypto | FileCheck %s --check-prefix=V8-FPARMv8
-; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-fp-armv8,-crypto | FileCheck %s --check-prefix=V8-NEON
-; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-crypto | FileCheck %s --check-prefix=V8-FPARMv8-NEON
-; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8-FPARMv8-NEON-CRYPTO
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=+fp-armv8,-neon,-crypto | FileCheck %s --check-prefix=V8-FPARMv8
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-fp-armv8,+neon,-crypto | FileCheck %s --check-prefix=V8-NEON
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=+fp-armv8,+neon,-crypto | FileCheck %s --check-prefix=V8-FPARMv8-NEON
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=+fp-armv8,+neon,+crypto | FileCheck %s --check-prefix=V8-FPARMv8-NEON-CRYPTO
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 | FileCheck %s --check-prefix=CORTEX-A5-DEFAULT
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A5-DEFAULT-FAST
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
@@ -105,8 +108,12 @@
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 | FileCheck %s --check-prefix=CORTEX-A72
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A72-FAST
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi | FileCheck %s --check-prefix=GENERIC-ARMV8_1-A
-; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=GENERIC-ARMV8_1-A-FAST
+; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi                                | FileCheck %s --check-prefix=V8_1A
+; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -mattr=+fp-armv8,-neon,-crypto | FileCheck %s --check-prefix=V8_1A-FPARMv8
+; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -mattr=-fp-armv8,+neon,-crypto | FileCheck %s --check-prefix=V8_1A-NEON
+; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -mattr=+fp-armv8,+neon,-crypto | FileCheck %s --check-prefix=V8_1A-FPARMv8-NEON
+; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -mattr=+fp-armv8,+neon,+crypto | FileCheck %s --check-prefix=V8_1A-FPARMv8-NEON-CRYPTO
+; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V8_1A-FAST
 ; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s  --check-prefix=CORTEX-A7-CHECK
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s  --check-prefix=CORTEX-A7-CHECK-FAST
@@ -165,6 +172,7 @@
 
 ; DYN-ROUNDING: .eabi_attribute 19, 1
 
+; V6:   .arch armv6
 ; V6:   .eabi_attribute 6, 6
 ; V6:   .eabi_attribute 8, 1
 ;; We assume round-to-nearest by default (matches GCC)
@@ -198,6 +206,7 @@
 
 ;; We emit 6, 12 for both v6-M and v6S-M, technically this is incorrect for
 ;; V6-M, however we don't model the OS extension so this is fine.
+; V6M:  .arch armv6-m
 ; V6M:  .eabi_attribute 6, 12
 ; V6M-NOT:  .eabi_attribute 7
 ; V6M:  .eabi_attribute 8, 0
@@ -230,6 +239,39 @@
 ; V6M-FAST-NOT:   .eabi_attribute 22
 ; V6M-FAST:   .eabi_attribute 23, 1
 
+; V6SM:  .arch armv6s-m
+; V6SM:  .eabi_attribute 6, 12
+; V6SM-NOT:  .eabi_attribute 7
+; V6SM:  .eabi_attribute 8, 0
+; V6SM:  .eabi_attribute 9, 1
+; V6SM-NOT:   .eabi_attribute 19
+;; The default choice made by llc is for a V6M CPU without an FPU.
+;; This is not an interesting detail, but for such CPUs, the default intention is to use
+;; software floating-point support. The choice is not important for targets without
+;; FPU support!
+; V6SM:  .eabi_attribute 20, 1
+; V6SM:   .eabi_attribute 21, 1
+; V6SM-NOT:   .eabi_attribute 22
+; V6SM:   .eabi_attribute 23, 3
+; V6SM:  .eabi_attribute 24, 1
+; V6SM:  .eabi_attribute 25, 1
+; V6SM-NOT:  .eabi_attribute 27
+; V6SM-NOT:  .eabi_attribute 28
+; V6SM-NOT:  .eabi_attribute 36
+; V6SM:  .eabi_attribute 38, 1
+; V6SM-NOT:  .eabi_attribute 42
+; V6SM-NOT:  .eabi_attribute 44
+; V6SM-NOT:  .eabi_attribute 68
+
+; V6SM-FAST-NOT:   .eabi_attribute 19
+;; Despite the V6M CPU having no FPU by default, we chose to flush to
+;; positive zero here. There's no hardware support doing this, but the
+;; fast maths software library might.
+; V6SM-FAST-NOT:  .eabi_attribute 20
+; V6SM-FAST-NOT:   .eabi_attribute 21
+; V6SM-FAST-NOT:   .eabi_attribute 22
+; V6SM-FAST:   .eabi_attribute 23, 1
+
 ; ARM1156T2F-S: .cpu arm1156t2f-s
 ; ARM1156T2F-S: .eabi_attribute 6, 8
 ; ARM1156T2F-S: .eabi_attribute 8, 1
@@ -260,6 +302,7 @@
 ; ARM1156T2F-S-FAST-NOT:   .eabi_attribute 22
 ; ARM1156T2F-S-FAST:   .eabi_attribute 23, 1
 
+; V7M:  .arch armv7-m
 ; V7M:  .eabi_attribute 6, 10
 ; V7M:  .eabi_attribute 7, 77
 ; V7M:  .eabi_attribute 8, 0
@@ -293,8 +336,11 @@
 ; V7M-FAST:   .eabi_attribute 23, 1
 
 ; V7:      .syntax unified
+; V7: .arch armv7
 ; V7: .eabi_attribute 6, 10
+; V7A-NOT: .eabi_attribute 7
 ; V7-NOT:   .eabi_attribute 19
+; V7-NOT:   .fpu
 ;; In safe-maths mode we default to an IEEE 754 compliant choice.
 ; V7: .eabi_attribute 20, 1
 ; V7: .eabi_attribute 21, 1
@@ -310,6 +356,48 @@
 ; V7-NOT:    .eabi_attribute 44
 ; V7-NOT:    .eabi_attribute 68
 
+; V7A:      .syntax unified
+; V7A: .arch armv7-a
+; V7A: .eabi_attribute 6, 10
+; V7A: .eabi_attribute 7, 65
+; V7A-NOT:   .eabi_attribute 19
+; V7A-NOT:   .fpu
+;; In safe-maths mode we default to an IEEE 754 compliant choice.
+; V7A: .eabi_attribute 20, 1
+; V7A: .eabi_attribute 21, 1
+; V7A-NOT: .eabi_attribute 22
+; V7A: .eabi_attribute 23, 3
+; V7A: .eabi_attribute 24, 1
+; V7A: .eabi_attribute 25, 1
+; V7A-NOT: .eabi_attribute 27
+; V7A-NOT: .eabi_attribute 28
+; V7A-NOT: .eabi_attribute 36
+; V7A: .eabi_attribute 38, 1
+; V7A-NOT:    .eabi_attribute 42
+; V7A-NOT:    .eabi_attribute 44
+; V7A-NOT:    .eabi_attribute 68
+
+; V7R:      .syntax unified
+; V7R: .arch armv7-r
+; V7R: .eabi_attribute 6, 10
+; V7R: .eabi_attribute 7, 82
+; V7R-NOT:   .eabi_attribute 19
+; V7R-NOT:   .fpu
+;; In safe-maths mode we default to an IEEE 754 compliant choice.
+; V7R: .eabi_attribute 20, 1
+; V7R: .eabi_attribute 21, 1
+; V7R-NOT: .eabi_attribute 22
+; V7R: .eabi_attribute 23, 3
+; V7R: .eabi_attribute 24, 1
+; V7R: .eabi_attribute 25, 1
+; V7R-NOT: .eabi_attribute 27
+; V7R-NOT: .eabi_attribute 28
+; V7R-NOT: .eabi_attribute 36
+; V7R: .eabi_attribute 38, 1
+; V7R-NOT:    .eabi_attribute 42
+; V7R-NOT:    .eabi_attribute 44
+; V7R-NOT:    .eabi_attribute 68
+
 ; V7-FAST-NOT:   .eabi_attribute 19
 ;; The default CPU does have an FPU and it must be VFPv3 or better, so it flushes
 ;; denormals to zero preserving the sign.
@@ -318,9 +406,32 @@
 ; V7-FAST-NOT:   .eabi_attribute 22
 ; V7-FAST:   .eabi_attribute 23, 1
 
+; V7S:      .syntax unified
+; V7S: .cpu swift
+; V7S: .eabi_attribute 6, 10
+; V7S: .eabi_attribute 7, 65
+; V7S-NOT:   .eabi_attribute 19
+; V7S: .fpu neon-vfpv4
+;; In safe-maths mode we default to an IEEE 754 compliant choice.
+; V7s: .eabi_attribute 20, 1
+; V7S: .eabi_attribute 21, 1
+; V7S-NOT: .eabi_attribute 22
+; V7S: .eabi_attribute 23, 3
+; V7S: .eabi_attribute 24, 1
+; V7S: .eabi_attribute 25, 1
+; V7S-NOT: .eabi_attribute 27
+; V7S-NOT: .eabi_attribute 28
+; V7S: .eabi_attribute 36, 1
+; V7S: .eabi_attribute 38, 1
+; V7S:    .eabi_attribute 42, 1
+; V7S:    .eabi_attribute 44, 2
+; V7S:    .eabi_attribute 68, 1
+
 ; V8:      .syntax unified
 ; V8: .eabi_attribute 67, "2.09"
+; V8: .arch armv8-a
 ; V8: .eabi_attribute 6, 14
+; V8: .eabi_attribute 7, 65
 ; V8-NOT:   .eabi_attribute 19
 ; V8: .eabi_attribute 20, 1
 ; V8: .eabi_attribute 21, 1
@@ -343,21 +454,25 @@
 ; Vt8-NOT: .eabi_attribute 22
 ; Vt8: .eabi_attribute 23, 3
 
-; V8-FPARMv8:      .syntax unified
+; V8-FPARMv8: .syntax unified
+; V8-FPARMv8: .arch armv8-a
 ; V8-FPARMv8: .eabi_attribute 6, 14
 ; V8-FPARMv8: .fpu fp-armv8
 
-; V8-NEON:      .syntax unified
+; V8-NEON: .syntax unified
+; V8-NEON: .arch armv8-a
 ; V8-NEON: .eabi_attribute 6, 14
 ; V8-NEON: .fpu neon
 ; V8-NEON: .eabi_attribute 12, 3
 
-; V8-FPARMv8-NEON:      .syntax unified
+; V8-FPARMv8-NEON: .syntax unified
+; V8-FPARMv8-NEON: .arch armv8-a
 ; V8-FPARMv8-NEON: .eabi_attribute 6, 14
 ; V8-FPARMv8-NEON: .fpu neon-fp-armv8
 ; V8-FPARMv8-NEON: .eabi_attribute 12, 3
 
-; V8-FPARMv8-NEON-CRYPTO:      .syntax unified
+; V8-FPARMv8-NEON-CRYPTO: .syntax unified
+; V8-FPARMv8-NEON-CRYPTO: .arch armv8-a
 ; V8-FPARMv8-NEON-CRYPTO: .eabi_attribute 6, 14
 ; V8-FPARMv8-NEON-CRYPTO: .fpu crypto-neon-fp-armv8
 ; V8-FPARMv8-NEON-CRYPTO: .eabi_attribute 12, 3
@@ -1209,34 +1324,131 @@
 ; GENERIC-FPU-VFPV3XD-FP16: .fpu vfpv3xd-fp16
 ; GENERIC-FPU-NEON-FP16: .fpu neon-fp16
 
-; GENERIC-ARMV8_1-A:  .eabi_attribute 6, 14
-; GENERIC-ARMV8_1-A:  .eabi_attribute 7, 65
-; GENERIC-ARMV8_1-A:  .eabi_attribute 8, 1
-; GENERIC-ARMV8_1-A:  .eabi_attribute 9, 2
-; GENERIC-ARMV8_1-A:  .fpu crypto-neon-fp-armv8
-; GENERIC-ARMV8_1-A:  .eabi_attribute 12, 4
-; GENERIC-ARMV8_1-A-NOT:   .eabi_attribute 19
+; V8_1A:  .arch armv8.1-a
+; V8_1A:  .eabi_attribute 6, 14
+; V8_1A:  .eabi_attribute 7, 65
+; V8_1A:  .eabi_attribute 8, 1
+; V8_1A:  .eabi_attribute 9, 2
+; V8_1A-NOT:  .fpu
+; V8_1A-NOT:  .eabi_attribute 12
+; V8_1A-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; V8_1A:  .eabi_attribute 20, 1
+; V8_1A:  .eabi_attribute 21, 1
+; V8_1A-NOT:  .eabi_attribute 22
+; V8_1A:  .eabi_attribute 23, 3
+; V8_1A:  .eabi_attribute 24, 1
+; V8_1A:  .eabi_attribute 25, 1
+; V8_1A-NOT:  .eabi_attribute 27
+; V8_1A-NOT:  .eabi_attribute 28
+; V8_1A-NOT:  .eabi_attribute 36, 1
+; V8_1A:  .eabi_attribute 38, 1
+; V8_1A:  .eabi_attribute 42, 1
+; V8_1A-NOT:  .eabi_attribute 44
+; V8_1A:  .eabi_attribute 68, 3
+
+; V8_1A-FPARMv8:  .arch armv8.1-a
+; V8_1A-FPARMv8:  .eabi_attribute 6, 14
+; V8_1A-FPARMv8:  .eabi_attribute 7, 65
+; V8_1A-FPARMv8:  .eabi_attribute 8, 1
+; V8_1A-FPARMv8:  .eabi_attribute 9, 2
+; V8_1A-FPARMv8:  .fpu fp-armv8
+;; Tag_Advanced_SIMD_arch
+; V8_1A-FPARMv8-NOT:  .eabi_attribute 12, 4
+; V8_1A-FPARMv8-NOT:  .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; V8_1A-FPARMv8:  .eabi_attribute 20, 1
+; V8_1A-FPARMv8:  .eabi_attribute 21, 1
+; V8_1A-FPARMv8-NOT:  .eabi_attribute 22
+; V8_1A-FPARMv8:  .eabi_attribute 23, 3
+; V8_1A-FPARMv8:  .eabi_attribute 24, 1
+; V8_1A-FPARMv8:  .eabi_attribute 25, 1
+; V8_1A-FPARMv8-NOT:  .eabi_attribute 27
+; V8_1A-FPARMv8-NOT:  .eabi_attribute 28
+; V8_1A-FPARMv8:  .eabi_attribute 36, 1
+; V8_1A-FPARMv8:  .eabi_attribute 38, 1
+; V8_1A-FPARMv8:  .eabi_attribute 42, 1
+; V8_1A-FPARMv8-NOT:  .eabi_attribute 44
+; V8_1A-FPARMv8:  .eabi_attribute 68, 3
+
+; V8_1A-NEON:  .arch armv8.1-a
+; V8_1A-NEON:  .eabi_attribute 6, 14
+; V8_1A-NEON:  .eabi_attribute 7, 65
+; V8_1A-NEON:  .eabi_attribute 8, 1
+; V8_1A-NEON:  .eabi_attribute 9, 2
+; V8_1A-NEON:  .fpu neon
+;; Tag_Advanced_SIMD_arch
+; V8_1A-NEON:  .eabi_attribute 12, 4
+; V8_1A-NEON-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; V8_1A-NEON:  .eabi_attribute 20, 1
+; V8_1A-NEON:  .eabi_attribute 21, 1
+; V8_1A-NEON-NOT:  .eabi_attribute 22
+; V8_1A-NEON:  .eabi_attribute 23, 3
+; V8_1A-NEON:  .eabi_attribute 24, 1
+; V8_1A-NEON:  .eabi_attribute 25, 1
+; V8_1A-NEON-NOT:  .eabi_attribute 27
+; V8_1A-NEON-NOT:  .eabi_attribute 28
+; V8_1A-NEON-NOT:  .eabi_attribute 36, 1
+; V8_1A-NEON:  .eabi_attribute 38, 1
+; V8_1A-NEON:  .eabi_attribute 42, 1
+; V8_1A-NEON-NOT:  .eabi_attribute 44
+; V8_1A-NEON:  .eabi_attribute 68, 3
+
+; V8_1A-FPARMv8-NEON:  .arch armv8.1-a
+; V8_1A-FPARMv8-NEON:  .eabi_attribute 6, 14
+; V8_1A-FPARMv8-NEON:  .eabi_attribute 7, 65
+; V8_1A-FPARMv8-NEON:  .eabi_attribute 8, 1
+; V8_1A-FPARMv8-NEON:  .eabi_attribute 9, 2
+; V8_1A-FPARMv8-NEON:  .fpu neon-fp-armv8
+;; Tag_Advanced_SIMD_arch
+; V8_1A-FPARMv8-NEON:  .eabi_attribute 12, 4
+; V8_1A-FPARMv8-NEON-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; V8_1A-FPARMv8-NEON:  .eabi_attribute 20, 1
+; V8_1A-FPARMv8-NEON:  .eabi_attribute 21, 1
+; V8_1A-FPARMv8-NEON-NOT:  .eabi_attribute 22
+; V8_1A-FPARMv8-NEON:  .eabi_attribute 23, 3
+; V8_1A-FPARMv8-NEON:  .eabi_attribute 24, 1
+; V8_1A-FPARMv8-NEON:  .eabi_attribute 25, 1
+; V8_1A-FPARMv8-NEON-NOT:  .eabi_attribute 27
+; V8_1A-FPARMv8-NEON-NOT:  .eabi_attribute 28
+; V8_1A-FPARMv8-NEON:  .eabi_attribute 36, 1
+; V8_1A-FPARMv8-NEON:  .eabi_attribute 38, 1
+; V8_1A-FPARMv8-NEON:  .eabi_attribute 42, 1
+; V8_1A-FPARMv8-NEON-NOT:  .eabi_attribute 44
+; V8_1A-FPARMv8-NEON:  .eabi_attribute 68, 3
+
+; V8_1A-FPARMv8-NEON-CRYPTO:  .arch armv8.1-a
+; V8_1A-FPARMv8-NEON-CRYPTO:  .eabi_attribute 6, 14
+; V8_1A-FPARMv8-NEON-CRYPTO:  .eabi_attribute 7, 65
+; V8_1A-FPARMv8-NEON-CRYPTO:  .eabi_attribute 8, 1
+; V8_1A-FPARMv8-NEON-CRYPTO:  .eabi_attribute 9, 2
+; V8_1A-FPARMv8-NEON-CRYPTO:  .fpu crypto-neon-fp-armv8
+;; Tag_Advanced_SIMD_arch
+; V8_1A-FPARMv8-NEON-CRYPTO:  .eabi_attribute 12, 4
+; V8_1A-FPARMv8-NEON-CRYPTO-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
-; GENERIC-ARMV8_1-A:  .eabi_attribute 20, 1
-; GENERIC-ARMV8_1-A:  .eabi_attribute 21, 1
-; GENERIC-ARMV8_1-A-NOT:  .eabi_attribute 22
-; GENERIC-ARMV8_1-A:  .eabi_attribute 23, 3
-; GENERIC-ARMV8_1-A:  .eabi_attribute 24, 1
-; GENERIC-ARMV8_1-A:  .eabi_attribute 25, 1
-; GENERIC-ARMV8_1-A-NOT:  .eabi_attribute 27
-; GENERIC-ARMV8_1-A-NOT:  .eabi_attribute 28
-; GENERIC-ARMV8_1-A:  .eabi_attribute 36, 1
-; GENERIC-ARMV8_1-A:  .eabi_attribute 38, 1
-; GENERIC-ARMV8_1-A:  .eabi_attribute 42, 1
-; GENERIC-ARMV8_1-A-NOT:  .eabi_attribute 44
-; GENERIC-ARMV8_1-A:  .eabi_attribute 68, 3
-
-; GENERIC-ARMV8_1-A-FAST-NOT:   .eabi_attribute 19
-;; GENERIC-ARMV8_1-A has the ARMv8 FP unit, which always flushes preserving sign.
-; GENERIC-ARMV8_1-A-FAST:  .eabi_attribute 20, 2
-; GENERIC-ARMV8_1-A-FAST-NOT:  .eabi_attribute 21
-; GENERIC-ARMV8_1-A-FAST-NOT:  .eabi_attribute 22
-; GENERIC-ARMV8_1-A-FAST:  .eabi_attribute 23, 1
+; V8_1A-FPARMv8-NEON-CRYPTO:  .eabi_attribute 20, 1
+; V8_1A-FPARMv8-NEON-CRYPTO:  .eabi_attribute 21, 1
+; V8_1A-FPARMv8-NEON-CRYPTO-NOT:  .eabi_attribute 22
+; V8_1A-FPARMv8-NEON-CRYPTO:  .eabi_attribute 23, 3
+; V8_1A-FPARMv8-NEON-CRYPTO:  .eabi_attribute 24, 1
+; V8_1A-FPARMv8-NEON-CRYPTO:  .eabi_attribute 25, 1
+; V8_1A-FPARMv8-NEON-CRYPTO-NOT:  .eabi_attribute 27
+; V8_1A-FPARMv8-NEON-CRYPTO-NOT:  .eabi_attribute 28
+; V8_1A-FPARMv8-NEON-CRYPTO:  .eabi_attribute 36, 1
+; V8_1A-FPARMv8-NEON-CRYPTO:  .eabi_attribute 38, 1
+; V8_1A-FPARMv8-NEON-CRYPTO:  .eabi_attribute 42, 1
+; V8_1A-FPARMv8-NEON-CRYPTO-NOT:  .eabi_attribute 44
+; V8_1A-FPARMv8-NEON-CRYPTO:  .eabi_attribute 68, 3
+
+; V8_1A-FAST-NOT:   .eabi_attribute 19
+;; V8_1A has the ARMv8 FP unit, which always flushes preserving sign.
+; V8_1A-FAST:  .eabi_attribute 20, 2
+; V8_1A-FAST-NOT:  .eabi_attribute 21
+; V8_1A-FAST-NOT:  .eabi_attribute 22
+; V8_1A-FAST:  .eabi_attribute 23, 1
 
 ; RELOC-PIC:  .eabi_attribute 15, 1
 ; RELOC-PIC:  .eabi_attribute 16, 1
Index: test/CodeGen/ARM/call_nolink.ll
===================================================================
--- test/CodeGen/ARM/call_nolink.ll
+++ test/CodeGen/ARM/call_nolink.ll
@@ -58,5 +58,5 @@
 
 ; CHECK-LABEL: PR15520:
 ; CHECK: mov lr, pc
-; CHECK: mov pc, r0
+; CHECK: bx r0
 }
Index: test/CodeGen/ARM/constant-islands.ll
===================================================================
--- test/CodeGen/ARM/constant-islands.ll
+++ test/CodeGen/ARM/constant-islands.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=thumbv7-linux-gnueabihf -O0 -fast-isel=0 -o - %s | FileCheck %s
+; RUN: llc -mtriple=thumbv7-linux-gnueabihf -mattr=+neon -O0 -fast-isel=0 -o - %s | FileCheck %s
 
 define void @test_no_duplicate_branches(float %in) {
 ; CHECK-LABEL: test_no_duplicate_branches:
Index: test/CodeGen/ARM/crc32.ll
===================================================================
--- test/CodeGen/ARM/crc32.ll
+++ test/CodeGen/ARM/crc32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=thumbv8 -o - %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8 -mattr=+crc -o - %s | FileCheck %s
 
 define i32 @test_crc32b(i32 %cur, i8 %next) {
 ; CHECK-LABEL: test_crc32b:
Index: test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
===================================================================
--- test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
+++ test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
+; RUN: llc -mtriple armv7 -mattr=+neon %s -o - | FileCheck %s
 
 ; CHECK-LABEL: f:
 define float @f(<4 x i16>* nocapture %in) {
Index: test/CodeGen/ARM/dagcombine-concatvector.ll
===================================================================
--- test/CodeGen/ARM/dagcombine-concatvector.ll
+++ test/CodeGen/ARM/dagcombine-concatvector.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv7s-apple-ios3.0.0 -mcpu=generic | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE
-; RUN: llc < %s -mtriple=thumbeb -target-abi apcs -mattr=v7,neon | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
+; RUN: llc < %s -mtriple=thumbeb -target-abi apcs -mattr=+v7,+neon | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
 
 ; PR15525
 ; CHECK-LABEL: test1:
Index: test/CodeGen/ARM/data-in-code-annotations.ll
===================================================================
--- test/CodeGen/ARM/data-in-code-annotations.ll
+++ test/CodeGen/ARM/data-in-code-annotations.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin -arm-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mattr=+neon -arm-atomic-cfg-tidy=0 | FileCheck %s
 
 define double @f1() nounwind {
 ; CHECK-LABEL: f1:
Index: test/CodeGen/ARM/debug-frame.ll
===================================================================
--- test/CodeGen/ARM/debug-frame.ll
+++ test/CodeGen/ARM/debug-frame.ll
@@ -22,11 +22,11 @@
 ; RUN:     -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
 
-; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi -mattr=+neon \
 ; RUN:     -disable-fp-elim -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-V7-FP
 
-; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi -mattr=+neon \
 ; RUN:     -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM
 
@@ -38,15 +38,15 @@
 ; RUN:     -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-THUMB-FP-ELIM
 
-; RUN: llc -mtriple thumbv7-unknown-linux-gnueabi \
+; RUN: llc -mtriple thumbv7-unknown-linux-gnueabi -mattr=+neon \
 ; RUN:     -disable-fp-elim -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-THUMB-V7-FP
 
-; RUN: llc -mtriple thumbv7-unknown-linux-gnueabi \
+; RUN: llc -mtriple thumbv7-unknown-linux-gnueabi -mattr=+neon \
 ; RUN:     -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-THUMB-V7-FP-ELIM
 
-; RUN: llc -mtriple thumbv7-unknown-linux-gnueabi \
+; RUN: llc -mtriple thumbv7-unknown-linux-gnueabi -mattr=+neon \
 ; RUN:     -disable-fp-elim -no-integrated-as -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-THUMB-V7-FP-NOIAS
 
@@ -335,7 +335,7 @@
 ; CHECK-FP:   mov    r11, sp
 ; CHECK-FP:   .cfi_def_cfa_register r11
 ; CHECK-FP:   pop    {r11, lr}
-; CHECK-FP:   mov    pc, lr
+; CHECK-FP:   bx     lr
 ; CHECK-FP:   .cfi_endproc
 
 ; CHECK-FP-ELIM-LABEL: test2:
@@ -345,7 +345,7 @@
 ; CHECK-FP-ELIM:   .cfi_offset lr, -4
 ; CHECK-FP-ELIM:   .cfi_offset r11, -8
 ; CHECK-FP-ELIM:   pop   {r11, lr}
-; CHECK-FP-ELIM:   mov   pc, lr
+; CHECK-FP-ELIM:   bx    lr
 ; CHECK-FP-ELIM:   .cfi_endproc
 
 ; CHECK-V7-FP-LABEL: test2:
@@ -376,7 +376,8 @@
 ; CHECK-THUMB-FP:   .cfi_offset r7, -8
 ; CHECK-THUMB-FP:   add    r7, sp, #0
 ; CHECK-THUMB-FP:   .cfi_def_cfa_register r7
-; CHECK-THUMB-FP:   pop    {r7, pc}
+; CHECK-THUMB-FP:   pop    {r7}
+; CHECK-THUMB-FP:   pop    {pc}
 ; CHECK-THUMB-FP:   .cfi_endproc
 
 ; CHECK-THUMB-FP-ELIM-LABEL: test2:
@@ -385,7 +386,8 @@
 ; CHECK-THUMB-FP-ELIM:   .cfi_def_cfa_offset 8
 ; CHECK-THUMB-FP-ELIM:   .cfi_offset lr, -4
 ; CHECK-THUMB-FP-ELIM:   .cfi_offset r7, -8
-; CHECK-THUMB-FP-ELIM:   pop   {r7, pc}
+; CHECK-THUMB-FP-ELIM:   pop   {r7}
+; CHECK-THUMB-FP-ELIM:   pop   {pc}
 ; CHECK-THUMB-FP-ELIM:   .cfi_endproc
 
 ; CHECK-THUMB-V7-FP-LABEL: test2:
@@ -441,7 +443,7 @@
 ; CHECK-FP:   add    r11, sp, #8
 ; CHECK-FP:   .cfi_def_cfa r11, 8
 ; CHECK-FP:   pop    {r4, r5, r11, lr}
-; CHECK-FP:   mov    pc, lr
+; CHECK-FP:   bx     lr
 ; CHECK-FP:   .cfi_endproc
 
 ; CHECK-FP-ELIM-LABEL: test3:
@@ -453,7 +455,7 @@
 ; CHECK-FP-ELIM:   .cfi_offset r5, -12
 ; CHECK-FP-ELIM:   .cfi_offset r4, -16
 ; CHECK-FP-ELIM:   pop   {r4, r5, r11, lr}
-; CHECK-FP-ELIM:   mov   pc, lr
+; CHECK-FP-ELIM:   bx    lr
 ; CHECK-FP-ELIM:   .cfi_endproc
 
 ; CHECK-V7-FP-LABEL: test3:
@@ -490,7 +492,8 @@
 ; CHECK-THUMB-FP:   .cfi_offset r4, -16
 ; CHECK-THUMB-FP:   add    r7, sp, #8
 ; CHECK-THUMB-FP:   .cfi_def_cfa r7, 8
-; CHECK-THUMB-FP:   pop    {r4, r5, r7, pc}
+; CHECK-THUMB-FP:   pop    {r4, r5, r7}
+; CHECK-THUMB-FP:   pop    {pc}
 ; CHECK-THUMB-FP:   .cfi_endproc
 
 ; CHECK-THUMB-FP-ELIM-LABEL: test3:
@@ -501,7 +504,8 @@
 ; CHECK-THUMB-FP-ELIM:   .cfi_offset r7, -8
 ; CHECK-THUMB-FP-ELIM:   .cfi_offset r5, -12
 ; CHECK-THUMB-FP-ELIM:   .cfi_offset r4, -16
-; CHECK-THUMB-FP-ELIM:   pop   {r4, r5, r7, pc}
+; CHECK-THUMB-FP-ELIM:   pop   {r4, r5, r7}
+; CHECK-THUMB-FP-ELIM:   pop   {pc}
 ; CHECK-THUMB-FP-ELIM:   .cfi_endproc
 
 ; CHECK-THUMB-V7-FP-LABEL: test3:
@@ -539,11 +543,11 @@
 }
 
 ; CHECK-FP-LABEL: test4:
-; CHECK-FP:   mov pc, lr
+; CHECK-FP:   bx lr
 ; CHECK-FP-NOT:   .cfi_def_cfa_offset
 
 ; CHECK-FP-ELIM-LABEL: test4:
-; CHECK-FP-ELIM:   mov pc, lr
+; CHECK-FP-ELIM:   bx lr
 ; CHECK-FP-ELIM-NOT:   .cfi_def_cfa_offset
 
 ; CHECK-V7-FP-LABEL: test4:
Index: test/CodeGen/ARM/debug-info-branch-folding.ll
===================================================================
--- test/CodeGen/ARM/debug-info-branch-folding.ll
+++ test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s - | FileCheck %s
+; RUN: llc -mattr=+neon < %s - | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-macosx10.6.7"
 
Index: test/CodeGen/ARM/debug-info-d16-reg.ll
===================================================================
--- test/CodeGen/ARM/debug-info-d16-reg.ll
+++ test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -mattr=+neon < %s | FileCheck %s
 ; Radar 9309221
 ; Test dwarf reg no for d16
 ;CHECK: DW_OP_regx
Index: test/CodeGen/ARM/debug-info-qreg.ll
===================================================================
--- test/CodeGen/ARM/debug-info-qreg.ll
+++ test/CodeGen/ARM/debug-info-qreg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s - | FileCheck %s
+; RUN: llc -mattr=+neon < %s - | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-macosx10.6.7"
 
Index: test/CodeGen/ARM/debug-info-s16-reg.ll
===================================================================
--- test/CodeGen/ARM/debug-info-s16-reg.ll
+++ test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s - | FileCheck %s
+; RUN: llc -mattr=+neon < %s - | FileCheck %s
 ; Radar 9309221
 ; Test dwarf reg no for s16
 ;CHECK: super-register DW_OP_regx
Index: test/CodeGen/ARM/debug-info-sreg2.ll
===================================================================
--- test/CodeGen/ARM/debug-info-sreg2.ll
+++ test/CodeGen/ARM/debug-info-sreg2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s - -filetype=obj | llvm-dwarfdump -debug-dump=loc - | FileCheck %s
+; RUN: llc -mattr=+neon < %s - -filetype=obj | llvm-dwarfdump -debug-dump=loc - | FileCheck %s
 ; Radar 9376013
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-macosx10.6.7"
Index: test/CodeGen/ARM/default-float-abi.ll
===================================================================
--- test/CodeGen/ARM/default-float-abi.ll
+++ test/CodeGen/ARM/default-float-abi.ll
@@ -1,8 +1,8 @@
-; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s --check-prefix=CHECK-HARD
-; RUN: llc -mtriple=armv7-linux-eabihf %s -o - | FileCheck %s --check-prefix=CHECK-HARD
-; RUN: llc -mtriple=armv7-linux-gnueabihf -float-abi=soft %s -o - | FileCheck %s --check-prefix=CHECK-SOFT
-; RUN: llc -mtriple=armv7-linux-gnueabi %s -o - | FileCheck %s --check-prefix=CHECK-SOFT
-; RUN: llc -mtriple=armv7-linux-eabi -float-abi=hard %s -o - | FileCheck %s --check-prefix=CHECK-HARD
+; RUN: llc -mtriple=armv7-linux-gnueabihf -mattr=+neon %s -o - | FileCheck %s --check-prefix=CHECK-HARD
+; RUN: llc -mtriple=armv7-linux-eabihf    -mattr=+neon %s -o - | FileCheck %s --check-prefix=CHECK-HARD
+; RUN: llc -mtriple=armv7-linux-gnueabihf -mattr=+neon -float-abi=soft %s -o - | FileCheck %s --check-prefix=CHECK-SOFT
+; RUN: llc -mtriple=armv7-linux-gnueabi   -mattr=+neon %s -o - | FileCheck %s --check-prefix=CHECK-SOFT
+; RUN: llc -mtriple=armv7-linux-eabi      -mattr=+neon -float-abi=hard %s -o - | FileCheck %s --check-prefix=CHECK-HARD
 ; RUN: llc -mtriple=thumbv7-apple-ios6.0 %s -o - | FileCheck %s --check-prefix=CHECK-SOFT
 
 define float @test_abi(float %lhs, float %rhs) {
Index: test/CodeGen/ARM/dwarf-unwind.ll
===================================================================
--- test/CodeGen/ARM/dwarf-unwind.ll
+++ test/CodeGen/ARM/dwarf-unwind.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=thumbv7-netbsd-eabi -o - %s | FileCheck %s
+; RUN: llc -mtriple=thumbv7-netbsd-eabi -mattr=+neon -o - %s | FileCheck %s
 declare void @bar()
 
 ; ARM's frame lowering attempts to tack another callee-saved register onto the
Index: test/CodeGen/ARM/ehabi.ll
===================================================================
--- test/CodeGen/ARM/ehabi.ll
+++ test/CodeGen/ARM/ehabi.ll
@@ -26,11 +26,11 @@
 ; RUN:     -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
 
-; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi -mattr=+neon \
 ; RUN:     -disable-fp-elim -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-V7-FP
 
-; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi -mattr=+neon  \
 ; RUN:     -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM
 
@@ -42,11 +42,11 @@
 ; RUN:     -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
 
-; RUN: llc -mtriple armv7-unknown-linux-androideabi \
+; RUN: llc -mtriple armv7-unknown-linux-androideabi  -mattr=+neon \
 ; RUN:     -disable-fp-elim -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-V7-FP
 
-; RUN: llc -mtriple armv7-unknown-linux-androideabi \
+; RUN: llc -mtriple armv7-unknown-linux-androideabi  -mattr=+neon \
 ; RUN:     -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM
 
@@ -58,11 +58,11 @@
 ; RUN:     -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=DWARF-FP-ELIM
 
-; RUN: llc -mtriple armv7-unknown-netbsd-eabi \
+; RUN: llc -mtriple armv7-unknown-netbsd-eabi  -mattr=+neon \
 ; RUN:     -disable-fp-elim -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=DWARF-V7-FP
 
-; RUN: llc -mtriple armv7-unknown-netbsd-eabi \
+; RUN: llc -mtriple armv7-unknown-netbsd-eabi  -mattr=+neon \
 ; RUN:     -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=DWARF-V7-FP-ELIM
 
@@ -204,8 +204,7 @@
 ; DWARF-FP:    .cfi_def_cfa r11, 8
 ; DWARF-FP:    sub sp, sp, #44
 ; DWARF-FP:    sub sp, r11, #28
-; DWARF-FP:    pop {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; DWARF-FP:    mov pc, lr
+; DWARF-FP:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; DWARF-FP:    .cfi_endproc
 
 ; DWARF-FP-ELIM-LABEL: _Z4testiiiiiddddd:
@@ -226,8 +225,7 @@
 ; DWARF-FP-ELIM:    sub sp, sp, #36
 ; DWARF-FP-ELIM:    .cfi_def_cfa_offset 72
 ; DWARF-FP-ELIM:    add sp, sp, #36
-; DWARF-FP-ELIM:    pop {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; DWARF-FP-ELIM:    mov pc, lr
+; DWARF-FP-ELIM:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; DWARF-FP-ELIM:    .cfi_endproc
 
 ; DWARF-V7-FP-LABEL: _Z4testiiiiiddddd:
@@ -292,7 +290,7 @@
 ; CHECK-FP:   .setfp r11, sp
 ; CHECK-FP:   mov    r11, sp
 ; CHECK-FP:   pop    {r11, lr}
-; CHECK-FP:   mov    pc, lr
+; CHECK-FP:   bx     lr
 ; CHECK-FP:   .fnend
 
 ; CHECK-FP-ELIM-LABEL: test2:
@@ -300,7 +298,7 @@
 ; CHECK-FP-ELIM:   .save {r11, lr}
 ; CHECK-FP-ELIM:   push  {r11, lr}
 ; CHECK-FP-ELIM:   pop   {r11, lr}
-; CHECK-FP-ELIM:   mov   pc, lr
+; CHECK-FP-ELIM:   bx    lr
 ; CHECK-FP-ELIM:   .fnend
 
 ; CHECK-V7-FP-LABEL: test2:
@@ -327,8 +325,7 @@
 ; DWARF-FP:    .cfi_offset r11, -8
 ; DWARF-FP:    mov  r11, sp
 ; DWARF-FP:    .cfi_def_cfa_register r11
-; DWARF-FP:    pop  {r11, lr}
-; DWARF-FP:    mov  pc, lr
+; DWARF-FP:    pop  {r11, pc}
 ; DWARF-FP:    .cfi_endproc
 
 ; DWARF-FP-ELIM-LABEL: test2:
@@ -337,8 +334,7 @@
 ; DWARF-FP-ELIM:    .cfi_def_cfa_offset 8
 ; DWARF-FP-ELIM:    .cfi_offset lr, -4
 ; DWARF-FP-ELIM:    .cfi_offset r11, -8
-; DWARF-FP-ELIM:    pop  {r11, lr}
-; DWARF-FP-ELIM:    mov  pc, lr
+; DWARF-FP-ELIM:    pop  {r11, pc}
 ; DWARF-FP-ELIM:    .cfi_endproc
 
 ; DWARF-V7-FP-LABEL: test2:
@@ -390,7 +386,7 @@
 ; CHECK-FP:   .setfp r11, sp, #8
 ; CHECK-FP:   add    r11, sp, #8
 ; CHECK-FP:   pop    {r4, r5, r11, lr}
-; CHECK-FP:   mov    pc, lr
+; CHECK-FP:   bx     lr
 ; CHECK-FP:   .fnend
 
 ; CHECK-FP-ELIM-LABEL: test3:
@@ -398,7 +394,7 @@
 ; CHECK-FP-ELIM:   .save {r4, r5, r11, lr}
 ; CHECK-FP-ELIM:   push  {r4, r5, r11, lr}
 ; CHECK-FP-ELIM:   pop   {r4, r5, r11, lr}
-; CHECK-FP-ELIM:   mov   pc, lr
+; CHECK-FP-ELIM:   bx    lr
 ; CHECK-FP-ELIM:   .fnend
 
 ; CHECK-V7-FP-LABEL: test3:
@@ -427,8 +423,7 @@
 ; DWARF-FP:    .cfi_offset r4, -16
 ; DWARF-FP:    add  r11, sp, #8
 ; DWARF-FP:    .cfi_def_cfa r11, 8
-; DWARF-FP:    pop  {r4, r5, r11, lr}
-; DWARF-FP:    mov  pc, lr
+; DWARF-FP:    pop  {r4, r5, r11, pc}
 ; DWARF-FP:    .cfi_endproc
 
 ; DWARF-FP-ELIM-LABEL: test3:
@@ -439,8 +434,7 @@
 ; DWARF-FP-ELIM:    .cfi_offset r11, -8
 ; DWARF-FP-ELIM:    .cfi_offset r5, -12
 ; DWARF-FP-ELIM:    .cfi_offset r4, -16
-; DWARF-FP-ELIM:    pop  {r4, r5, r11, lr}
-; DWARF-FP-ELIM:    mov  pc, lr
+; DWARF-FP-ELIM:    pop  {r4, r5, r11, pc}
 ; DWARF-FP-ELIM:    .cfi_endproc
 
 ; DWARF-V7-FP-LABEL: test3:
@@ -479,13 +473,13 @@
 
 ; CHECK-FP-LABEL: test4:
 ; CHECK-FP:   .fnstart
-; CHECK-FP:   mov pc, lr
+; CHECK-FP:   bx lr
 ; CHECK-FP:   .cantunwind
 ; CHECK-FP:   .fnend
 
 ; CHECK-FP-ELIM-LABEL: test4:
 ; CHECK-FP-ELIM:   .fnstart
-; CHECK-FP-ELIM:   mov pc, lr
+; CHECK-FP-ELIM:   bx lr
 ; CHECK-FP-ELIM:   .cantunwind
 ; CHECK-FP-ELIM:   .fnend
 
@@ -503,13 +497,13 @@
 
 ; DWARF-FP-LABEL: test4:
 ; DWARF-FP-NOT: .cfi_startproc
-; DWARF-FP:    mov pc, lr
+; DWARF-FP:    bx lr
 ; DWARF-FP-NOT: .cfi_endproc
 ; DWARF-FP:    .size test4,
 
 ; DWARF-FP-ELIM-LABEL: test4:
 ; DWARF-FP-ELIM-NOT: .cfi_startproc
-; DWARF-FP-ELIM:     mov pc, lr
+; DWARF-FP-ELIM:     bx lr
 ; DWARF-FP-ELIM-NOT: .cfi_endproc
 ; DWARF-FP-ELIM:     .size test4,
 
Index: test/CodeGen/ARM/fast-isel-align.ll
===================================================================
--- test/CodeGen/ARM/fast-isel-align.ll
+++ test/CodeGen/ARM/fast-isel-align.ll
@@ -3,20 +3,20 @@
 ; RUN: llc < %s -O0 -mattr=+strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
 ; RUN: llc < %s -O0 -mattr=+strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
 
-; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -mattr=+strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0 -mattr=+strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
-
-; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -mattr=+strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-
-; RUN: llc < %s -O0 -mattr=+strict-align -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0  -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -mattr=+strict-align -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
-; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -mattr=+strict-align -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -mattr=+strict-align -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+; RUN: llc < %s -O0 -mattr=+neon -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -mattr=+neon -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -mattr=+neon,+strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -mattr=+neon,+strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+
+; RUN: llc < %s -O0 -mattr=+neon -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -mattr=+neon,+strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+
+; RUN: llc < %s -O0 -mattr=+neon,+strict-align -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -mattr=+neon,+strict-align -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+; RUN: llc < %s -O0 -mattr=+neon -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -mattr=+neon -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -mattr=+neon,+strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -mattr=+neon,+strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
 
 ; Check unaligned stores
 %struct.anon = type <{ float }>
Index: test/CodeGen/ARM/fast-isel-call.ll
===================================================================
--- test/CodeGen/ARM/fast-isel-call.ll
+++ test/CodeGen/ARM/fast-isel-call.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=+neon | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=+long-calls | FileCheck %s --check-prefix=ARM-LONG
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=+long-calls | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=+long-calls,+neon | FileCheck %s --check-prefix=ARM-LONG
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=+long-calls | FileCheck %s --check-prefix=THUMB-LONG
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
Index: test/CodeGen/ARM/fast-isel-cmp-imm.ll
===================================================================
--- test/CodeGen/ARM/fast-isel-cmp-imm.ll
+++ test/CodeGen/ARM/fast-isel-cmp-imm.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=+neon -verify-machineinstrs | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
 
 define void @t1a(float %a) uwtable ssp {
Index: test/CodeGen/ARM/fast-isel-conversion.ll
===================================================================
--- test/CodeGen/ARM/fast-isel-conversion.ll
+++ test/CodeGen/ARM/fast-isel-conversion.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -verify-machineinstrs -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -verify-machineinstrs -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -verify-machineinstrs -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=+neon | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -verify-machineinstrs -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Test sitofp
Index: test/CodeGen/ARM/fast-isel-static.ll
===================================================================
--- test/CodeGen/ARM/fast-isel-static.ll
+++ test/CodeGen/ARM/fast-isel-static.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=static -mattr=+long-calls | FileCheck -check-prefix=CHECK-LONG %s
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=static -mattr=+long-calls | FileCheck -check-prefix=CHECK-LONG %s
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=static -mattr=+long-calls,+neon | FileCheck -check-prefix=CHECK-LONG %s
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=static -mattr=+neon | FileCheck -check-prefix=CHECK-NORM %s
 
 define void @myadd(float* %sum, float* %addend) nounwind {
 entry:
Index: test/CodeGen/ARM/fold-stack-adjust.ll
===================================================================
--- test/CodeGen/ARM/fold-stack-adjust.ll
+++ test/CodeGen/ARM/fold-stack-adjust.ll
@@ -1,7 +1,7 @@
-; RUN: llc -mtriple=thumbv7-apple-none-macho < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv7-apple-none-macho -mattr=+neon < %s | FileCheck %s
 ; RUN: llc -mtriple=thumbv6m-apple-none-macho -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-T1
 ; RUN: llc -mtriple=thumbv7-apple-darwin-ios -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-IOS
-; RUN: llc -mtriple=thumbv7--linux-gnueabi -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-LINUX
+; RUN: llc -mtriple=thumbv7--linux-gnueabi -mattr=+neon -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-LINUX
 
 
 declare void @bar(i8*)
Index: test/CodeGen/ARM/fp16-promote.ll
===================================================================
--- test/CodeGen/ARM/fp16-promote.ll
+++ test/CodeGen/ARM/fp16-promote.ll
@@ -1,5 +1,5 @@
-; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16 -check-prefix=CHECK-ALL
-; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=CHECK-LIBCALL -check-prefix=CHECK-ALL
+; RUN: llc -asm-verbose=false < %s -mattr=+neon,+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16 -check-prefix=CHECK-ALL
+; RUN: llc -asm-verbose=false < %s -mattr=+neon | FileCheck %s -check-prefix=CHECK-LIBCALL -check-prefix=CHECK-ALL
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
 target triple = "armv7-eabihf"
Index: test/CodeGen/ARM/fp16.ll
===================================================================
--- test/CodeGen/ARM/fp16.ll
+++ test/CodeGen/ARM/fp16.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -mattr=+neon < %s | FileCheck %s
 ; RUN: llc -mattr=+vfp3,+fp16 < %s | FileCheck --check-prefix=CHECK-FP16 %s
-; RUN: llc -mtriple=armv8-eabihf < %s | FileCheck --check-prefix=CHECK-ARMV8 %s
+; RUN: llc -mtriple=armv8-eabihf -mattr=+fp16 < %s | FileCheck --check-prefix=CHECK-ARMV8 %s
 ; RUN: llc -mtriple=thumbv7m-eabi < %s | FileCheck --check-prefix=CHECK-SOFTFLOAT %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
Index: test/CodeGen/ARM/inlineasm-ldr-pseudo.ll
===================================================================
--- test/CodeGen/ARM/inlineasm-ldr-pseudo.ll
+++ test/CodeGen/ARM/inlineasm-ldr-pseudo.ll
@@ -7,7 +7,7 @@
 ; RUN: llc -mtriple=arm-apple-darwin < %s -filetype=obj | llvm-objdump -d - | FileCheck %s
 ; CHECK-LABEL: foo:
 ; CHECK: 0:       00 00 9f e5                                     ldr     r0, [pc]
-; CHECK: 4:       0e f0 a0 e1                                     mov     pc, lr
+; CHECK: 4:       1e ff 2f e1                                     bx      lr
 ; Make sure the constant pool entry comes after the return
 ; CHECK: 8:       01 00 00 00
 define i32 @foo() nounwind {
Index: test/CodeGen/ARM/integer_insertelement.ll
===================================================================
--- test/CodeGen/ARM/integer_insertelement.ll
+++ test/CodeGen/ARM/integer_insertelement.ll
@@ -8,7 +8,7 @@
 ; CHECK-NOT: vorr d
 ; CHECK: vmov.32 d
 ; CHECK-NOT: vorr d
-; CHECK: mov pc, lr
+; CHECK: bx lr
 define <4 x i32> @f(<4 x i32> %in) {
   %1 = insertelement <4 x i32> %in, i32 255, i32 3
   ret <4 x i32> %1
@@ -18,7 +18,7 @@
 ; CHECK-NOT: vorr d
 ; CHECK: vmov.16 d
 ; CHECK-NOT: vorr d
-; CHECK: mov pc, lr
+; CHECK: bx lr
 define <8 x i16> @g(<8 x i16> %in) {
   %1 = insertelement <8 x i16> %in, i16 255, i32 7
   ret <8 x i16> %1
@@ -28,7 +28,7 @@
 ; CHECK-NOT: vorr d
 ; CHECK: vmov.8 d
 ; CHECK-NOT: vorr d
-; CHECK: mov pc, lr
+; CHECK: bx lr
 define <16 x i8> @h(<16 x i8> %in) {
   %1 = insertelement <16 x i8> %in, i8 255, i32 15
   ret <16 x i8> %1
Index: test/CodeGen/ARM/isel-v8i32-crash.ll
===================================================================
--- test/CodeGen/ARM/isel-v8i32-crash.ll
+++ test/CodeGen/ARM/isel-v8i32-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-linux-gnu -mattr=+neon | FileCheck %s
 
 ; Check we don't crash when trying to combine:
 ;   (d1 = <float 8.000000e+00, float 8.000000e+00, ...>) (power of 2)
Index: test/CodeGen/ARM/neon-v8.1a.ll
===================================================================
--- test/CodeGen/ARM/neon-v8.1a.ll
+++ test/CodeGen/ARM/neon-v8.1a.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv8 -mattr=+v8.1a | FileCheck %s
+; RUN: llc < %s -mtriple=armv8 -mattr=+v8.1a,+neon | FileCheck %s
 
 ;-----------------------------------------------------------------------------
 ; RDMA Vector
Index: test/CodeGen/ARM/neon_spill.ll
===================================================================
--- test/CodeGen/ARM/neon_spill.ll
+++ test/CodeGen/ARM/neon_spill.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -verify-machineinstrs
-; RUN: llc < %s -verify-machineinstrs -O0
+; RUN: llc < %s -mattr=+neon -verify-machineinstrs
+; RUN: llc < %s -mattr=+neon -verify-machineinstrs -O0
 ; PR12177
 ;
 ; This test case spills a QQQQ register.
Index: test/CodeGen/ARM/nest-register.ll
===================================================================
--- test/CodeGen/ARM/nest-register.ll
+++ test/CodeGen/ARM/nest-register.ll
@@ -7,7 +7,7 @@
 ; CHECK-LABEL: nest_receiver:
 ; CHECK: @ BB#0:
 ; CHECK-NEXT: mov r0, r12
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: bx lr
         ret i8* %arg
 }
 
@@ -15,7 +15,7 @@
 ; CHECK-LABEL: nest_caller:
 ; CHECK: mov r12, r0
 ; CHECK-NEXT: bl nest_receiver
-; CHECK: mov pc, lr
+; CHECK: bx lr
         %result = call i8* @nest_receiver(i8* nest %arg)
         ret i8* %result
 }
Index: test/CodeGen/ARM/out-of-registers.ll
===================================================================
--- test/CodeGen/ARM/out-of-registers.ll
+++ test/CodeGen/ARM/out-of-registers.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 %s -o - | FileCheck %s
+; RUN: llc -mattr=+neon -O3 %s -o - | FileCheck %s
 ; ModuleID = 'fo.c'
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:32-n8:16:32-S64"
 target triple = "thumbv7-none-linux-gnueabi"
Index: test/CodeGen/ARM/setcc-type-mismatch.ll
===================================================================
--- test/CodeGen/ARM/setcc-type-mismatch.ll
+++ test/CodeGen/ARM/setcc-type-mismatch.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv7-linux-gnueabihf -mattr=+neon %s -o - | FileCheck %s
 
 define void @test_mismatched_setcc(<4 x i22> %l, <4 x i22> %r, <4 x i1>* %addr) {
 ; CHECK-LABEL: test_mismatched_setcc:
Index: test/CodeGen/ARM/struct_byval.ll
===================================================================
--- test/CodeGen/ARM/struct_byval.ll
+++ test/CodeGen/ARM/struct_byval.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios6.0 | FileCheck %s
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios6.0 | FileCheck %s -check-prefix=THUMB
-; RUN: llc < %s -mtriple=armv7-unknown-nacl-gnueabi | FileCheck %s -check-prefix=NACL
-; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi | FileCheck %s -check-prefix=NOMOVT
+; RUN: llc < %s -mtriple=armv7-unknown-nacl-gnueabi -mattr=+neon | FileCheck %s -check-prefix=NACL
+; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi -mattr=+neon | FileCheck %s -check-prefix=NOMOVT
 
 ; NOMOVT-NOT: movt
 
Index: test/CodeGen/ARM/struct_byval_arm_t1_t2.ll
===================================================================
--- test/CodeGen/ARM/struct_byval_arm_t1_t2.ll
+++ test/CodeGen/ARM/struct_byval_arm_t1_t2.ll
@@ -1,6 +1,6 @@
-;RUN: llc < %s -mtriple=armv7-none-linux-gnueabi   -mattr=+neon -verify-machineinstrs -filetype=obj | llvm-objdump -triple armv7-none-linux-gnueabi   -disassemble - | FileCheck %s --check-prefix=ARM
-;RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi -mattr=+neon -verify-machineinstrs -filetype=obj | llvm-objdump -triple thumbv7-none-linux-gnueabi -disassemble - | FileCheck %s --check-prefix=THUMB2
-;RUN: llc < %s -mtriple=armv7-none-linux-gnueabi   -mattr=-neon -verify-machineinstrs -filetype=obj | llvm-objdump -triple armv7-none-linux-gnueabi   -disassemble - | FileCheck %s --check-prefix=NO_NEON
+;RUN: llc < %s -mtriple=armv7-none-linux-gnueabi   -mattr=+neon -verify-machineinstrs -filetype=obj | llvm-objdump -triple armv7-none-linux-gnueabi   -mattr=+neon -disassemble - | FileCheck %s --check-prefix=ARM
+;RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi -mattr=+neon -verify-machineinstrs -filetype=obj | llvm-objdump -triple thumbv7-none-linux-gnueabi -mattr=+neon -disassemble - | FileCheck %s --check-prefix=THUMB2
+;RUN: llc < %s -mtriple=armv7-none-linux-gnueabi   -mattr=-neon -verify-machineinstrs -filetype=obj | llvm-objdump -triple armv7-none-linux-gnueabi   -mattr=+neon -disassemble - | FileCheck %s --check-prefix=NO_NEON
 ;We want to have both positive and negative checks for thumb1. These checks
 ;are not easy to do in a single pass so we generate the output once to a
 ;temp file and run filecheck twice with different prefixes.
Index: test/CodeGen/ARM/sub-cmp-peephole.ll
===================================================================
--- test/CodeGen/ARM/sub-cmp-peephole.ll
+++ test/CodeGen/ARM/sub-cmp-peephole.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
 ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s --check-prefix=V7
-; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi | FileCheck %s -check-prefix=V8
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mattr=+fp-armv8 | FileCheck %s -check-prefix=V8
 
 
 define i32 @f(i32 %a, i32 %b) nounwind ssp {
Index: test/CodeGen/ARM/vector-extend-narrow.ll
===================================================================
--- test/CodeGen/ARM/vector-extend-narrow.ll
+++ test/CodeGen/ARM/vector-extend-narrow.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
+; RUN: llc -mtriple armv7 -mattr=+neon %s -o - | FileCheck %s
 
 ; CHECK-LABEL: f:
 define float @f(<4 x i16>* nocapture %in) {
Index: test/CodeGen/ARM/vtrn.ll
===================================================================
--- test/CodeGen/ARM/vtrn.ll
+++ test/CodeGen/ARM/vtrn.ll
@@ -8,7 +8,7 @@
 ; CHECK-NEXT:    vtrn.8 d17, d16
 ; CHECK-NEXT:    vadd.i8 d16, d17, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -25,7 +25,7 @@
 ; CHECK-NEXT:    vtrn.8 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -40,7 +40,7 @@
 ; CHECK-NEXT:    vtrn.16 d17, d16
 ; CHECK-NEXT:    vadd.i16 d16, d17, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -57,7 +57,7 @@
 ; CHECK-NEXT:    vtrn.16 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7>
@@ -72,7 +72,7 @@
 ; CHECK-NEXT:    vtrn.32 d17, d16
 ; CHECK-NEXT:    vadd.i32 d16, d17, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
@@ -89,7 +89,7 @@
 ; CHECK-NEXT:    vtrn.32 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -104,7 +104,7 @@
 ; CHECK-NEXT:    vtrn.32 d17, d16
 ; CHECK-NEXT:    vadd.f32 d16, d17, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
@@ -121,7 +121,7 @@
 ; CHECK-NEXT:    vtrn.32 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -137,7 +137,7 @@
 ; CHECK-NEXT:    vadd.i8 q8, q9, q8
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
@@ -154,7 +154,7 @@
 ; CHECK-NEXT:    vtrn.8 q9, q8
 ; CHECK-NEXT:    vst1.8 {d18, d19}, [r0:128]!
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30, i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
@@ -170,7 +170,7 @@
 ; CHECK-NEXT:    vadd.i16 q8, q9, q8
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -187,7 +187,7 @@
 ; CHECK-NEXT:    vtrn.16 q9, q8
 ; CHECK-NEXT:    vst1.16 {d18, d19}, [r0:128]!
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
@@ -203,7 +203,7 @@
 ; CHECK-NEXT:    vadd.i32 q8, q9, q8
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -220,7 +220,7 @@
 ; CHECK-NEXT:    vtrn.32 q9, q8
 ; CHECK-NEXT:    vst1.32 {d18, d19}, [r0:128]!
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7>
@@ -236,7 +236,7 @@
 ; CHECK-NEXT:    vadd.f32 q8, q9, q8
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -253,7 +253,7 @@
 ; CHECK-NEXT:    vtrn.32 q9, q8
 ; CHECK-NEXT:    vst1.32 {d18, d19}, [r0:128]!
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7>
@@ -269,7 +269,7 @@
 ; CHECK-NEXT:    vtrn.8 d17, d16
 ; CHECK-NEXT:    vadd.i8 d16, d17, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
@@ -286,7 +286,7 @@
 ; CHECK-NEXT:    vtrn.8 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
@@ -302,7 +302,7 @@
 ; CHECK-NEXT:    vadd.i16 q8, q9, q8
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
@@ -319,7 +319,7 @@
 ; CHECK-NEXT:    vtrn.16 q9, q8
 ; CHECK-NEXT:    vst1.16 {d18, d19}, [r0:128]!
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14, i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
Index: test/CodeGen/ARM/vuzp.ll
===================================================================
--- test/CodeGen/ARM/vuzp.ll
+++ test/CodeGen/ARM/vuzp.ll
@@ -8,7 +8,7 @@
 ; CHECK-NEXT:    vuzp.8 d17, d16
 ; CHECK-NEXT:    vadd.i8 d16, d17, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -25,7 +25,7 @@
 ; CHECK-NEXT:    vuzp.8 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -40,7 +40,7 @@
 ; CHECK-NEXT:    vuzp.16 d17, d16
 ; CHECK-NEXT:    vadd.i16 d16, d17, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -57,7 +57,7 @@
 ; CHECK-NEXT:    vuzp.16 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
@@ -75,7 +75,7 @@
 ; CHECK-NEXT:    vadd.i8 q8, q9, q8
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
@@ -92,7 +92,7 @@
 ; CHECK-NEXT:    vuzp.8 q9, q8
 ; CHECK-NEXT:    vst1.8 {d18, d19}, [r0:128]!
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
@@ -108,7 +108,7 @@
 ; CHECK-NEXT:    vadd.i16 q8, q9, q8
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -125,7 +125,7 @@
 ; CHECK-NEXT:    vuzp.16 q9, q8
 ; CHECK-NEXT:    vst1.16 {d18, d19}, [r0:128]!
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -141,7 +141,7 @@
 ; CHECK-NEXT:    vadd.i32 q8, q9, q8
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -158,7 +158,7 @@
 ; CHECK-NEXT:    vuzp.32 q9, q8
 ; CHECK-NEXT:    vst1.32 {d18, d19}, [r0:128]!
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
@@ -174,7 +174,7 @@
 ; CHECK-NEXT:    vadd.f32 q8, q9, q8
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -191,7 +191,7 @@
 ; CHECK-NEXT:    vuzp.32 q9, q8
 ; CHECK-NEXT:    vst1.32 {d18, d19}, [r0:128]!
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
@@ -208,7 +208,7 @@
 ; CHECK-NEXT:    vuzp.8 d17, d16
 ; CHECK-NEXT:    vadd.i8 d16, d17, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
@@ -225,7 +225,7 @@
 ; CHECK-NEXT:    vuzp.8 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
@@ -241,7 +241,7 @@
 ; CHECK-NEXT:    vadd.i16 q8, q9, q8
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
@@ -258,7 +258,7 @@
 ; CHECK-NEXT:    vuzp.16 q9, q8
 ; CHECK-NEXT:    vst1.16 {d18, d19}, [r0:128]!
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
Index: test/CodeGen/ARM/vzip.ll
===================================================================
--- test/CodeGen/ARM/vzip.ll
+++ test/CodeGen/ARM/vzip.ll
@@ -8,7 +8,7 @@
 ; CHECK-NEXT:    vzip.8 d17, d16
 ; CHECK-NEXT:    vadd.i8 d16, d17, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -25,7 +25,7 @@
 ; CHECK-NEXT:    vzip.8 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -40,7 +40,7 @@
 ; CHECK-NEXT:    vzip.16 d17, d16
 ; CHECK-NEXT:    vadd.i16 d16, d17, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -57,7 +57,7 @@
 ; CHECK-NEXT:    vzip.16 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
@@ -75,7 +75,7 @@
 ; CHECK-NEXT:    vadd.i8 q8, q9, q8
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -92,7 +92,7 @@
 ; CHECK-NEXT:    vzip.8 q9, q8
 ; CHECK-NEXT:    vst1.8 {d18, d19}, [r0:128]!
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
@@ -108,7 +108,7 @@
 ; CHECK-NEXT:    vadd.i16 q8, q9, q8
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -125,7 +125,7 @@
 ; CHECK-NEXT:    vzip.16 q9, q8
 ; CHECK-NEXT:    vst1.16 {d18, d19}, [r0:128]!
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -141,7 +141,7 @@
 ; CHECK-NEXT:    vadd.i32 q8, q9, q8
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -158,7 +158,7 @@
 ; CHECK-NEXT:    vzip.32 q9, q8
 ; CHECK-NEXT:    vst1.32 {d18, d19}, [r0:128]!
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
@@ -174,7 +174,7 @@
 ; CHECK-NEXT:    vadd.f32 q8, q9, q8
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -191,7 +191,7 @@
 ; CHECK-NEXT:    vzip.32 q9, q8
 ; CHECK-NEXT:    vst1.32 {d18, d19}, [r0:128]!
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
@@ -208,7 +208,7 @@
 ; CHECK-NEXT:    vzip.8 d17, d16
 ; CHECK-NEXT:    vadd.i8 d16, d17, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
@@ -225,7 +225,7 @@
 ; CHECK-NEXT:    vzip.8 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
@@ -241,7 +241,7 @@
 ; CHECK-NEXT:    vadd.i8 q8, q9, q8
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -258,7 +258,7 @@
 ; CHECK-NEXT:    vzip.8 q9, q8
 ; CHECK-NEXT:    vst1.8 {d18, d19}, [r0:128]!
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    bx lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
Index: test/CodeGen/Thumb/thumb-shrink-wrapping.ll
===================================================================
--- test/CodeGen/Thumb/thumb-shrink-wrapping.ll
+++ test/CodeGen/Thumb/thumb-shrink-wrapping.ll
@@ -1,7 +1,7 @@
 ; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \
 ; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE 
-; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \
-; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+; rUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \
+; rUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
 ;
 ; Note: Lots of tests use inline asm instead of regular calls.
 ; This allows to have a better control on what the allocation will do.
@@ -46,7 +46,8 @@
 ; Without shrink-wrapping, epilogue is in the exit block.
 ; Epilogue code. (What we pop does not matter.)
 ; DISABLE: add sp, #8
-; DISABLE-NEXT: pop {r7, pc}
+; DISABLE-NEXT: pop {r7}
+; DISABLE-NEXT: pop {pc}
 ;
 ; ENABLE-NEXT: bx lr
 define i32 @foo(i32 %a, i32 %b) {
@@ -101,12 +102,14 @@
 ; CHECK: lsls [[SUM]], [[SUM]], #3
 ;
 ; Duplicated epilogue.
-; DISABLE: pop {r4, pc}
+; DISABLE: pop {r4}
+; DISABLE: pop {pc}
 ;
 ; CHECK: [[ELSE_LABEL]]: @ %if.else
 ; Shift second argument by one and store into returned register.
 ; CHECK: lsls r0, r1, #1
-; DISABLE-NEXT: pop {r4, pc}
+; DISABLE-NEXT: pop {r4}
+; DISABLE-NEXT: pop {pc}
 ;
 ; ENABLE-NEXT: bx lr
 define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
@@ -222,12 +225,14 @@
 ; ENABLE-NEXT: pop {r4, lr}
 ;
 ; Duplicated epilogue.
-; DISABLE: pop {r4, pc}
+; DISABLE: pop {r4}
+; DISABLE: pop {pc}
 ;
 ; CHECK: [[ELSE_LABEL]]: @ %if.else
 ; Shift second argument by one and store into returned register.
 ; CHECK: lsls r0, r1, #1
-; DISABLE-NEXT: pop {r4, pc}
+; DISABLE-NEXT: pop {r4}
+; DISABLE-NEXT: pop {pc}
 ;
 ; ENABLE-NEXT: bx lr
 define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
@@ -297,12 +302,14 @@
 ; ENABLE: pop {r4, lr}
 ;
 ; Duplicated epilogue.
-; DISABLE: pop {r4, pc}
+; DISABLE: pop {r4}
+; DISABLE: pop {pc}
 ;
 ; CHECK: [[ELSE_LABEL]]: @ %if.else
 ; Shift second argument by one and store into returned register.
 ; CHECK: lsls r0, r1, #1
-; DISABLE-NEXT: pop {r4, pc}
+; DISABLE-NEXT: pop {r4}
+; DISABLE-NEXT: pop {pc}
 ;
 ; ENABLE-NEXT: bx lr
 define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
@@ -373,12 +380,14 @@
 ; ENABLE-NEXT: pop {r4, lr}
 ;
 ; Duplicated epilogue.
-; DISABLE-NEXT: pop {r4, pc}
+; DISABLE-NEXT: pop {r4}
+; DISABLE-NEXT: pop {pc}
 ;
 ; CHECK: [[ELSE_LABEL]]: @ %if.else
 ; Shift second argument by one and store into returned register.
 ; CHECK: lsls r0, r1, #1
-; DISABLE-NEXT: pop {r4, pc}
+; DISABLE-NEXT: pop {r4}
+; DISABLE-NEXT: pop {pc}
 ;
 ; ENABLE-NEXT: bx lr
 define i32 @inlineAsm(i32 %cond, i32 %N) {
@@ -438,22 +447,25 @@
 ; CHECK-NEXT: pop {r3}
 ; CHECK-NEXT: bl
 ; CHECK-NEXT: lsls r0, r0, #3
-; CHECK-NEXT: add sp, #16
+; ENABLE-NEXT: add sp, #16
+; DISABLE:      [[ELSE_LABEL]]: @ %if.else
+; DISABLE-NEXT: lsls r0, r1, #1
+; DISABLE-NEXT: [[END_LABEL:LBB[0-9_]+]]: @ %if.end
+; DISABLE-NEXT: add sp, #16
 ;
 ; ENABLE-NEXT: pop {[[TMP]], lr}
 ;
 ; Duplicated epilogue.
-; DISABLE-NEXT: pop {[[TMP]], pc}
+; DISABLE-NEXT: pop {[[TMP]]}
+; DISABLE-NEXT: pop {pc}
 ;
-; CHECK: [[ELSE_LABEL]]: @ %if.else
+; ENABLE: [[ELSE_LABEL]]: @ %if.else
 ; Shift second argument by one and store into returned register.
-; CHECK: lsls r0, r1, #1
+; ENABLE-NEXT: lsls r0, r1, #1
 ;
 ; Epilogue code.
 ; ENABLE-NEXT: bx lr
 ;
-; DISABLE-NEXT: add sp, #16
-; DISABLE-NEXT: pop {[[TMP]], pc}
 define i32 @callVariadicFunc(i32 %cond, i32 %N) {
 entry:
   %tobool = icmp eq i32 %cond, 0
Index: test/MC/ARM/arm-thumb-cpus.s
===================================================================
--- test/MC/ARM/arm-thumb-cpus.s
+++ test/MC/ARM/arm-thumb-cpus.s
@@ -1,6 +1,15 @@
-@ RUN: not llvm-mc -show-encoding -triple=arm-eabi < %s 2>&1 \
+@ RUN: not llvm-mc -show-encoding -triple=armv2 < %s 2>&1 \
 @ RUN:  | FileCheck %s --check-prefix=CHECK-ARM-ONLY
 
+@ RUN: not llvm-mc -show-encoding -triple=armv3 < %s 2>&1 \
+@ RUN:  | FileCheck %s --check-prefix=CHECK-ARM-ONLY
+
+@ RUN: not llvm-mc -show-encoding -triple=armv4 < %s 2>&1 \
+@ RUN:  | FileCheck %s --check-prefix=CHECK-ARM-ONLY
+
+@ RUN: llvm-mc -show-encoding -triple=arm-eabi < %s 2>&1 \
+@ RUN:  | FileCheck %s --check-prefix=CHECK-ARM-THUMB
+
 @ RUN: llvm-mc -show-encoding -triple=armv4t < %s 2>&1 \
 @ RUN:  | FileCheck %s --check-prefix=CHECK-ARM-THUMB
 
Index: test/MC/ARM/crc32-thumb.s
===================================================================
--- test/MC/ARM/crc32-thumb.s
+++ test/MC/ARM/crc32-thumb.s
@@ -1,5 +1,7 @@
-@ RUN: llvm-mc -triple=thumbv8 -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv8 -mattr=+crc -show-encoding < %s | FileCheck %s
 @ RUN: not llvm-mc -triple=thumbv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+@ RUN: not llvm-mc -triple=thumbv7 -mattr=+crc -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOV8
+@ RUN: not llvm-mc -triple=thumbv8 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOCRC
 @ RUN: not llvm-mc -triple=thumbv8 -mattr=-crc -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOCRC
         crc32b  r0, r1, r2
         crc32h  r0, r1, r2
@@ -11,6 +13,9 @@
 @ CHECK-V7: error: instruction requires: crc armv8
 @ CHECK-V7: error: instruction requires: crc armv8
 @ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-NOV8: error: instruction requires: armv8
+@ CHECK-NOV8: error: instruction requires: armv8
+@ CHECK-NOV8: error: instruction requires: armv8
 @ CHECK-NOCRC: error: instruction requires: crc
 @ CHECK-NOCRC: error: instruction requires: crc
 @ CHECK-NOCRC: error: instruction requires: crc
@@ -25,6 +30,9 @@
 @ CHECK-V7: error: instruction requires: crc armv8
 @ CHECK-V7: error: instruction requires: crc armv8
 @ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-NOV8: error: instruction requires: armv8
+@ CHECK-NOV8: error: instruction requires: armv8
+@ CHECK-NOV8: error: instruction requires: armv8
 @ CHECK-NOCRC: error: instruction requires: crc
 @ CHECK-NOCRC: error: instruction requires: crc
 @ CHECK-NOCRC: error: instruction requires: crc
Index: test/MC/ARM/crc32.s
===================================================================
--- test/MC/ARM/crc32.s
+++ test/MC/ARM/crc32.s
@@ -1,5 +1,7 @@
-@ RUN: llvm-mc -triple=armv8 -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=armv8 -mattr=+crc -show-encoding < %s | FileCheck %s
 @ RUN: not llvm-mc -triple=armv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
+@ RUN: not llvm-mc -triple=armv7 -mattr=+crc -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOV8
+@ RUN: not llvm-mc -triple=thumbv8 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOCRC
 @ RUN: not llvm-mc -triple=thumbv8 -mattr=-crc -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOCRC
         crc32b  r0, r1, r2
         crc32h  r0, r1, r2
@@ -11,6 +13,9 @@
 @ CHECK-V7: error: instruction requires: crc armv8
 @ CHECK-V7: error: instruction requires: crc armv8
 @ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-NOV8: error: instruction requires: armv8
+@ CHECK-NOV8: error: instruction requires: armv8
+@ CHECK-NOV8: error: instruction requires: armv8
 @ CHECK-NOCRC: error: instruction requires: crc
 @ CHECK-NOCRC: error: instruction requires: crc
 @ CHECK-NOCRC: error: instruction requires: crc
@@ -25,6 +30,9 @@
 @ CHECK-V7: error: instruction requires: crc armv8
 @ CHECK-V7: error: instruction requires: crc armv8
 @ CHECK-V7: error: instruction requires: crc armv8
+@ CHECK-NOV8: error: instruction requires: armv8
+@ CHECK-NOV8: error: instruction requires: armv8
+@ CHECK-NOV8: error: instruction requires: armv8
 @ CHECK-NOCRC: error: instruction requires: crc
 @ CHECK-NOCRC: error: instruction requires: crc
 @ CHECK-NOCRC: error: instruction requires: crc
Index: test/MC/ARM/eh-directive-integrated-test.s
===================================================================
--- test/MC/ARM/eh-directive-integrated-test.s
+++ test/MC/ARM/eh-directive-integrated-test.s
@@ -19,7 +19,7 @@
 @ restore the general-purpose and VFP registers.
 
 
-@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -mattr=+vfp2 -filetype=obj -o - \
 @ RUN:   | llvm-readobj -s -sd | FileCheck %s
 
 
Index: test/MC/ARM/eh-directive-section-comdat.s
===================================================================
--- test/MC/ARM/eh-directive-section-comdat.s
+++ test/MC/ARM/eh-directive-section-comdat.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -mattr=+vfp2 -filetype=obj -o - \
 @ RUN:   | llvm-readobj -s -sd -sr -t | FileCheck %s
 
 @ Check the .group section for the function in comdat section.
Index: test/MC/ARM/eh-directive-vsave.s
===================================================================
--- test/MC/ARM/eh-directive-vsave.s
+++ test/MC/ARM/eh-directive-vsave.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -filetype=obj -o - \
+@ RUN: llvm-mc %s -triple=armv7-unknown-linux-gnueabi -mattr=+vfp2 -filetype=obj -o - \
 @ RUN:   | llvm-readobj -s -sd -sr | FileCheck %s
 
 @ Check the .vsave directive
Index: test/MC/ARM/single-precision-fp.s
===================================================================
--- test/MC/ARM/single-precision-fp.s
+++ test/MC/ARM/single-precision-fp.s
@@ -1,4 +1,4 @@
-@ RUN: not llvm-mc < %s -triple thumbv8-unknown-unknown -show-encoding -mattr=+fp-only-sp,-neon 2> %t > %t2
+@ RUN: not llvm-mc < %s -triple thumbv8-unknown-unknown -show-encoding -mattr=+fp-armv8,+fp-only-sp,-neon 2> %t > %t2
 @ RUN:     FileCheck %s < %t --check-prefix=CHECK-ERRORS
 @ RUN:     FileCheck %s < %t2
 
Index: test/MC/ARM/vmov-vmvn-byte-replicate.s
===================================================================
--- test/MC/ARM/vmov-vmvn-byte-replicate.s
+++ test/MC/ARM/vmov-vmvn-byte-replicate.s
@@ -1,5 +1,5 @@
 @ PR18921, "vmov" part.
-@ RUN: llvm-mc -triple=armv7-linux-gnueabi -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=armv7-linux-gnueabi -mattr=+neon -show-encoding < %s | FileCheck %s
 .text
 
 @ CHECK: vmov.i8 d2, #0xff @ encoding: [0x1f,0x2e,0x87,0xf3]
Index: test/MC/Disassembler/ARM/armv8.1a.txt
===================================================================
--- test/MC/Disassembler/ARM/armv8.1a.txt
+++ test/MC/Disassembler/ARM/armv8.1a.txt
@@ -1,5 +1,5 @@
-# RUN: llvm-mc -triple armv8 -mattr=+v8.1a  --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V81a
-# RUN: not llvm-mc -triple armv8 -mattr=+v8 --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V8
+# RUN: llvm-mc -triple armv8 -mattr=+v8.1a,+neon  --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V81a
+# RUN: not llvm-mc -triple armv8 -mattr=+v8,+neon --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V8
 
 [0x54,0x0b,0x12,0xf3]
 [0x12,0x0b,0x21,0xf3]
Index: test/MC/Disassembler/ARM/crc32-thumb.txt
===================================================================
--- test/MC/Disassembler/ARM/crc32-thumb.txt
+++ test/MC/Disassembler/ARM/crc32-thumb.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv8 2>&1 | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=thumbv8 -mattr=+crc 2>&1 | FileCheck %s
 
 # CHECK:  crc32b  r0, r1, r2
 # CHECK:  crc32h  r0, r1, r2
Index: test/MC/Disassembler/ARM/crc32.txt
===================================================================
--- test/MC/Disassembler/ARM/crc32.txt
+++ test/MC/Disassembler/ARM/crc32.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=armv8 2>&1 | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=armv8 -mattr=+crc 2>&1 | FileCheck %s
 
 # CHECK:  crc32b  r0, r1, r2
 # CHECK:  crc32h  r0, r1, r2
Index: test/MC/Disassembler/ARM/invalid-FSTMX-arm.txt
===================================================================
--- test/MC/Disassembler/ARM/invalid-FSTMX-arm.txt
+++ test/MC/Disassembler/ARM/invalid-FSTMX-arm.txt
@@ -1,5 +1,5 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7 2>&1 | FileCheck %s -check-prefix=CHECK-WARN
-# RUN: llvm-mc --disassemble %s -triple=armv7 2>&1 | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=armv7 -mattr=+vfp2 2>&1 | FileCheck %s -check-prefix=CHECK-WARN
+# RUN: llvm-mc --disassemble %s -triple=armv7 -mattr=+vfp2 2>&1 | FileCheck %s
 
 # offset=1
 # CHECK-WARN: potentially undefined
Index: test/MC/Disassembler/ARM/neont-VLD-reencoding.txt
===================================================================
--- test/MC/Disassembler/ARM/neont-VLD-reencoding.txt
+++ test/MC/Disassembler/ARM/neont-VLD-reencoding.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple thumbv7 -mattr=+neon -show-encoding -disassemble < %s | FileCheck %s
 
 0xa0 0xf9 0x00 0x00
 0xa0 0xf9 0x20 0x00
Index: test/MC/Disassembler/ARM/neont-VST-reencoding.txt
===================================================================
--- test/MC/Disassembler/ARM/neont-VST-reencoding.txt
+++ test/MC/Disassembler/ARM/neont-VST-reencoding.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple thumbv7 -mattr=+neon -show-encoding -disassemble < %s | FileCheck %s
 
 0x80 0xf9 0x00 0x00
 0x81 0xf9 0x21 0x10
Index: test/MC/Disassembler/ARM/thumb-v8.1a.txt
===================================================================
--- test/MC/Disassembler/ARM/thumb-v8.1a.txt
+++ test/MC/Disassembler/ARM/thumb-v8.1a.txt
@@ -1,5 +1,5 @@
-# RUN: llvm-mc -triple thumbv8 -mattr=+v8.1a  --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V81a
-# RUN: not llvm-mc -triple thumbv8 -mattr=+v8 --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V8
+# RUN: llvm-mc -triple thumbv8 -mattr=+v8.1a,+neon  --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V81a
+# RUN: not llvm-mc -triple thumbv8 -mattr=+v8,+neon --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V8
 
 [0x11,0xff,0x12,0x0b]
 # CHECK-V81a: vqrdmlah.s16  d0, d1, d2
Index: test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
===================================================================
--- test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
+++ test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -debug-only=loop-vectorize -loop-vectorize -instcombine -enable-interleaved-mem-accesses=true  < %s 2>&1 | FileCheck %s
+; RUN: opt -mattr=+neon -S -debug-only=loop-vectorize -loop-vectorize -instcombine -enable-interleaved-mem-accesses=true  < %s 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"