Index: lib/Target/X86/X86.td
===================================================================
--- lib/Target/X86/X86.td
+++ lib/Target/X86/X86.td
@@ -34,8 +34,16 @@
 def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
                                       "Enable X87 float instructions">;
 
+def Feature486Insns : SubtargetFeature<"i486insns","Has486Insns", "true",
+                                      "Enable i486 instructions">;
+
+def Feature586Insns : SubtargetFeature<"i586insns","Has586Insns", "true",
+                                      "Enable i586 instructions",
+				      [Feature486Insns]>;
+
 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
-                                      "Enable conditional move instructions">;
+                                      "Enable conditional move instructions",
+				      [Feature586Insns]>;
 
 def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
                                        "Support POPCNT instruction">;
@@ -264,11 +272,11 @@
 
 def : Proc<"generic",         [FeatureX87, FeatureSlowUAMem16]>;
 def : Proc<"i386",            [FeatureX87, FeatureSlowUAMem16]>;
-def : Proc<"i486",            [FeatureX87, FeatureSlowUAMem16]>;
-def : Proc<"i586",            [FeatureX87, FeatureSlowUAMem16]>;
-def : Proc<"pentium",         [FeatureX87, FeatureSlowUAMem16]>;
-def : Proc<"pentium-mmx",     [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
-def : Proc<"i686",            [FeatureX87, FeatureSlowUAMem16]>;
+def : Proc<"i486",            [FeatureX87, FeatureSlowUAMem16, Feature486Insns]>;
+def : Proc<"i586",            [FeatureX87, FeatureSlowUAMem16, Feature586Insns]>;
+def : Proc<"pentium",         [FeatureX87, FeatureSlowUAMem16, Feature586Insns]>;
+def : Proc<"pentium-mmx",     [FeatureX87, FeatureSlowUAMem16, FeatureMMX, Feature586Insns]>;
+def : Proc<"i686",            [FeatureX87, FeatureSlowUAMem16, Feature586Insns]>;
 def : Proc<"pentiumpro",      [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>;
 def : Proc<"pentium2",        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
                                FeatureCMOV, FeatureFXSR]>;
@@ -284,7 +292,7 @@
                                FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;
 
 // Intel Quark.
-def : Proc<"lakemont",        []>;
+def : Proc<"lakemont",        [Feature586Insns]>;
 
 // Intel Core Duo.
 def : ProcessorModel<"yonah", SandyBridgeModel,
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -90,9 +90,10 @@
     else
       setMaxAtomicSizeInBitsSupported(64);
   } else {
-    // FIXME: Check that we actually have cmpxchg (i486 or later)
-    // FIXME: Check that we actually have cmpxchg8b (i586 or later)
-    setMaxAtomicSizeInBitsSupported(64);
+    if (Subtarget.has586Insns())
+      setMaxAtomicSizeInBitsSupported(64); // has cmpxchg8b
+    else if (Subtarget.has486Insns())
+      setMaxAtomicSizeInBitsSupported(32); // has cmpxchg
   }
 
   // For 64-bit, since we have so many registers, use the ILP scheduler.
@@ -29851,6 +29852,10 @@
 }
 
 bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
+  // If we don't have bswap available, don't do these transforms.
+  if (!Subtarget.has486Insns())
+    return false;
+
   InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
 
   std::string AsmStr = IA->getAsmString();
@@ -29866,10 +29871,6 @@
   switch (AsmPieces.size()) {
   default: return false;
   case 1:
-    // FIXME: this should verify that we are targeting a 486 or better.  If not,
-    // we will turn this bswap into something that will be lowered to logical
-    // ops instead of emitting the bswap asm.  For now, we don't support 486 or
-    // lower so don't worry about this.
     // bswap $0
     if (matchAsm(AsmPieces[0], {"bswap", "$0"}) ||
         matchAsm(AsmPieces[0], {"bswapl", "$0"}) ||
Index: lib/Target/X86/X86InstrCompiler.td
===================================================================
--- lib/Target/X86/X86InstrCompiler.td
+++ lib/Target/X86/X86InstrCompiler.td
@@ -725,24 +725,28 @@
   let Defs = [AL, EFLAGS], Uses = [AL] in
   def NAME#8  : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
                   !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
-                  [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
+                  [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK,
+                  Requires<[Has486Insns]>;
   let Defs = [AX, EFLAGS], Uses = [AX] in
   def NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
                   !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"),
-                  [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize16, LOCK;
+                  [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize16, LOCK,
+                  Requires<[Has486Insns]>;
   let Defs = [EAX, EFLAGS], Uses = [EAX] in
   def NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
                   !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"),
-                  [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, OpSize32, LOCK;
+                  [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, OpSize32, LOCK,
+                  Requires<[Has486Insns]>;
   let Defs = [RAX, EFLAGS], Uses = [RAX] in
   def NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
                    !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"),
-                   [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
+                   [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK,
+                  Requires<[In64BitMode]>;
 }
 }
 
 let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
-    SchedRW = [WriteALULd, WriteRMW] in {
+    Predicates = [Has586Insns], SchedRW = [WriteALULd, WriteRMW] in {
 defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",
                                 X86cas8, i64mem,
                                 IIC_CMPX_LOCK_8B>;
@@ -815,28 +819,28 @@
                     !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
                     [(set GR8:$dst,
                           (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
-                    itin8>;
+                    itin8>, Requires<[Has486Insns]>;
     def NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
                     (ins GR16:$val, i16mem:$ptr),
                     !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
                     [(set
                        GR16:$dst,
                        (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
-                    itin>, OpSize16;
+                    itin>, OpSize16, Requires<[Has486Insns]>;
     def NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
                     (ins GR32:$val, i32mem:$ptr),
                     !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
                     [(set
                        GR32:$dst,
                        (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
-                    itin>, OpSize32;
+                    itin>, OpSize32, Requires<[Has486Insns]>;
     def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
                      (ins GR64:$val, i64mem:$ptr),
                      !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
                      [(set
                         GR64:$dst,
                         (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
-                     itin>;
+                     itin>, Requires<[In64BitMode]>;
   }
 }
 
@@ -1950,3 +1954,13 @@
 let Predicates = [HasMOVBE] in {
  def : Pat<(bswap GR16:$src), (ROL16ri GR16:$src, (i8 8))>;
 }
+
+// On a 386, we expand bswap to 3 rotates after register selection.
+let Predicates = [No486Insns],
+    Constraints = "$src = $dst", Defs = [EFLAGS],
+    isPseudo = 1 in {
+def PSEUDO_BSWAP32r : I<0, Pseudo,
+                       (outs GR32:$dst), (ins GR32:$src),
+                       "bswap\t$dst",
+                      [(set GR32:$dst, (bswap GR32:$src))]>;
+}
Index: lib/Target/X86/X86InstrInfo.cpp
===================================================================
--- lib/Target/X86/X86InstrInfo.cpp
+++ lib/Target/X86/X86InstrInfo.cpp
@@ -5468,6 +5468,20 @@
   MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0);
 }
 
+static bool ExpandPSEUDO_BSWAP32r(MachineInstr *MI,
+                                  const TargetInstrInfo &TII) {
+  MachineBasicBlock *BB = MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned Reg = MI->getOperand(0).getReg();
+  unsigned Reg16 = getX86SubSuperRegister(Reg, 16);
+  BuildMI(*BB, MI, DL, TII.get(X86::ROR16ri), Reg16).addReg(Reg16).addImm(8);
+  BuildMI(*BB, MI, DL, TII.get(X86::ROR32ri), Reg).addReg(Reg).addImm(16);
+  BuildMI(*BB, MI, DL, TII.get(X86::ROR16ri), Reg16).addReg(Reg16).addImm(8);
+
+  MI->eraseFromParent(); // The pseudo is gone now.
+  return true;
+}
+
 bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   bool HasAVX = Subtarget.hasAVX();
   MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
@@ -5527,6 +5541,8 @@
   case TargetOpcode::LOAD_STACK_GUARD:
     expandLoadStackGuard(MIB, *this);
     return true;
+  case X86::PSEUDO_BSWAP32r:
+    return ExpandPSEUDO_BSWAP32r(MIB, *this);
   }
   return false;
 }
Index: lib/Target/X86/X86InstrInfo.td
===================================================================
--- lib/Target/X86/X86InstrInfo.td
+++ lib/Target/X86/X86InstrInfo.td
@@ -767,6 +767,9 @@
 // X86 Instruction Predicate Definitions.
 def TruePredicate : Predicate<"true">;
 
+def Has486Insns  : Predicate<"Subtarget->has486Insns()">;
+def No486Insns  : Predicate<"!Subtarget->has486Insns()">;
+def Has586Insns  : Predicate<"Subtarget->has586Insns()">;
 def HasCMov      : Predicate<"Subtarget->hasCMov()">;
 def NoCMov       : Predicate<"!Subtarget->hasCMov()">;
 
@@ -1237,11 +1240,13 @@
 def BSWAP32r : I<0xC8, AddRegFrm,
                  (outs GR32:$dst), (ins GR32:$src),
                  "bswap{l}\t$dst",
-                 [(set GR32:$dst, (bswap GR32:$src))], IIC_BSWAP>, OpSize32, TB;
+                 [(set GR32:$dst, (bswap GR32:$src))], IIC_BSWAP>, OpSize32, TB,
+                 Requires<[Has486Insns]>;
 
 def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
                   "bswap{q}\t$dst",
-                  [(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB;
+                  [(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB,
+                  Requires<[Has486Insns]>;
 } // Constraints = "$src = $dst", SchedRW
 
 // Bit scan instructions.
@@ -1881,65 +1886,70 @@
 
 let SchedRW = [WriteALU] in {
 def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
-                "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
+                "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB,
+                Requires<[Has486Insns]>;
 def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
                  "xadd{w}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB,
-                 OpSize16;
+                 OpSize16, Requires<[Has486Insns]>;
 def XADD32rr  : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
                  "xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB,
-                 OpSize32;
+                 OpSize32, Requires<[Has486Insns]>;
 def XADD64rr  : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
-                   "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
+                   "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB,
+                   Requires<[In64BitMode]>;
 } // SchedRW
 
 let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
 def XADD8rm   : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
-                 "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB;
+                 "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB,
+		 Requires<[Has486Insns]>;
 def XADD16rm  : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
                  "xadd{w}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB,
-                 OpSize16;
+                 OpSize16, Requires<[Has486Insns]>;
 def XADD32rm  : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                  "xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB,
-                 OpSize32;
+                 OpSize32, Requires<[Has486Insns]>;
 def XADD64rm  : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                   "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB;
+                   "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB,
+		   Requires<[In64BitMode]>;
 
 }
 
 let SchedRW = [WriteALU] in {
 def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
                    "cmpxchg{b}\t{$src, $dst|$dst, $src}", [],
-                   IIC_CMPXCHG_REG8>, TB;
+                   IIC_CMPXCHG_REG8>, TB, Requires<[Has486Insns]>;
 def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
                     "cmpxchg{w}\t{$src, $dst|$dst, $src}", [],
-                    IIC_CMPXCHG_REG>, TB, OpSize16;
+                    IIC_CMPXCHG_REG>, TB, OpSize16, Requires<[Has486Insns]>;
 def CMPXCHG32rr  : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
                      "cmpxchg{l}\t{$src, $dst|$dst, $src}", [],
-                     IIC_CMPXCHG_REG>, TB, OpSize32;
+                     IIC_CMPXCHG_REG>, TB, OpSize32, Requires<[Has486Insns]>;
 def CMPXCHG64rr  : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
                       "cmpxchg{q}\t{$src, $dst|$dst, $src}", [],
-                      IIC_CMPXCHG_REG>, TB;
+                      IIC_CMPXCHG_REG>, TB, Requires<[In64BitMode]>;
 } // SchedRW
 
 let SchedRW = [WriteALULd, WriteRMW] in {
 let mayLoad = 1, mayStore = 1 in {
 def CMPXCHG8rm   : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
                      "cmpxchg{b}\t{$src, $dst|$dst, $src}", [],
-                     IIC_CMPXCHG_MEM8>, TB;
+                     IIC_CMPXCHG_MEM8>, TB, Requires<[Has486Insns]>;
 def CMPXCHG16rm  : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
                      "cmpxchg{w}\t{$src, $dst|$dst, $src}", [],
-                     IIC_CMPXCHG_MEM>, TB, OpSize16;
+                     IIC_CMPXCHG_MEM>, TB, OpSize16, Requires<[Has486Insns]>;
 def CMPXCHG32rm  : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                      "cmpxchg{l}\t{$src, $dst|$dst, $src}", [],
-                     IIC_CMPXCHG_MEM>, TB, OpSize32;
+                     IIC_CMPXCHG_MEM>, TB, OpSize32, Requires<[Has486Insns]>;
 def CMPXCHG64rm  : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
                       "cmpxchg{q}\t{$src, $dst|$dst, $src}", [],
-                      IIC_CMPXCHG_MEM>, TB;
+                      IIC_CMPXCHG_MEM>, TB, Requires<[In64BitMode]>;
 }
 
 let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
 def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
-                  "cmpxchg8b\t$dst", [], IIC_CMPXCHG_8B>, TB;
+                  "cmpxchg8b\t$dst", [], IIC_CMPXCHG_8B>, TB,
+                  Requires<[Has586Insns]>;
 
 let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
 def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
Index: lib/Target/X86/X86Subtarget.h
===================================================================
--- lib/Target/X86/X86Subtarget.h
+++ lib/Target/X86/X86Subtarget.h
@@ -73,6 +73,12 @@
   /// True if the processor supports X87 instructions.
   bool HasX87;
 
+  /// Target has the instructions added with i486.
+  bool Has486Insns;
+
+  /// Target has the instructions added with i586.
+  bool Has586Insns;
+
   /// True if this processor has conditional move instructions
   /// (generally pentium pro+).
   bool HasCMov;
@@ -374,6 +380,8 @@
   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
 
   bool hasX87() const { return HasX87; }
+  bool has486Insns() const { return Has486Insns; }
+  bool has586Insns() const { return Has586Insns; }
   bool hasCMov() const { return HasCMov; }
   bool hasSSE1() const { return X86SSELevel >= SSE1; }
   bool hasSSE2() const { return X86SSELevel >= SSE2; }
Index: test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
===================================================================
--- test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
+++ test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin | FileCheck %s
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin -mcpu=i686 | FileCheck %s
 ; PR8297
 ;
 ; On i386, i64 cmpxchg is lowered during legalize types to extract the
Index: test/CodeGen/X86/atomic-cpus.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/atomic-cpus.ll
@@ -0,0 +1,115 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck --check-prefix=X64_CX16 --check-prefix=CX8_64 --check-prefix=CX4 --check-prefix=CHECK %s
+; RUN: llc < %s -march=x86-64 -mcpu=x86-64 | FileCheck --check-prefix=X64_NOCX16 --check-prefix=CX8_64 --check-prefix=CX4 --check-prefix=CHECK %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=-cx16 | FileCheck --check-prefix=X64_NOCX16 --check-prefix=CX8_64 --check-prefix=CX4 --check-prefix=CHECK %s
+; RUN: llc < %s -march=x86 -mcpu=i586 | FileCheck --check-prefix=X32_NOCX16 --check-prefix=CX8_32 --check-prefix=CX4 --check-prefix=CHECK %s
+; RUN: llc < %s -march=x86 -mcpu=i486 | FileCheck --check-prefix=X32_NOCX16 --check-prefix=NOCX8 --check-prefix=CX4 --check-prefix=CHECK %s
+; RUN: llc < %s -march=x86 -mcpu=i386 | FileCheck --check-prefix=X32_NOCX16 --check-prefix=NOCX8 --check-prefix=NOCX4 --check-prefix=CHECK %s
+
+;; This test checks that various versions of the x86 do, or do not,
+;; support native atomic instructions of different sizes.
+
+define void @test_i128(i128* %a) nounwind {
+; CHECK-LABEL: test_i128:
+entry:
+; X64_NOCX16: __atomic_compare_exchange_16
+; X32_NOCX16: __atomic_compare_exchange{{$}}
+; X64_CX16: cmpxchg16b
+  %0 = cmpxchg i128* %a, i128 1, i128 1 seq_cst seq_cst
+; X64_NOCX16: __atomic_exchange_16
+; X32_NOCX16: __atomic_exchange{{$}}
+; X64_CX16: cmpxchg16b
+  %1 = atomicrmw xchg i128* %a, i128 1 seq_cst
+; X64_NOCX16: __atomic_fetch_add_16
+; X32_NOCX16: __atomic_compare_exchange{{$}}
+; X64_CX16: cmpxchg16b
+  %2 = atomicrmw add i128* %a, i128 1 seq_cst
+; X64_NOCX16: __atomic_fetch_sub_16
+; X32_NOCX16: __atomic_compare_exchange{{$}}
+; X64_CX16: cmpxchg16b
+  %3 = atomicrmw sub i128* %a, i128 1 seq_cst
+; X64_NOCX16: __atomic_fetch_and_16
+; X32_NOCX16: __atomic_compare_exchange{{$}}
+; X64_CX16: cmpxchg16b
+  %4 = atomicrmw and i128* %a, i128 1 seq_cst
+; X64_NOCX16: __atomic_fetch_nand_16
+; X32_NOCX16: __atomic_compare_exchange{{$}}
+; X64_CX16: cmpxchg16b
+  %5 = atomicrmw nand i128* %a, i128 1 seq_cst
+; X64_NOCX16: __atomic_fetch_or_16
+; X32_NOCX16: __atomic_compare_exchange{{$}}
+; X64_CX16: cmpxchg16b
+  %6 = atomicrmw or i128* %a, i128 1 seq_cst
+; X64_NOCX16: __atomic_fetch_xor_16
+; X32_NOCX16: __atomic_compare_exchange{{$}}
+; X64_CX16: cmpxchg16b
+  %7 = atomicrmw xor i128* %a, i128 1 seq_cst
+  ret void
+}
+
+define void @test_i64(i64* %a) nounwind {
+; CHECK-LABEL: test_i64:
+entry:
+; NOCX8: __atomic_compare_exchange_8
+; CX8_64: cmpxchgq
+; CX8_32: cmpxchg8b
+  %0 = cmpxchg i64* %a, i64 1, i64 1 seq_cst seq_cst
+; NOCX8: __atomic_exchange_8
+; CX8_64: xchgq
+; CX8_32: cmpxchg8b
+  %1 = atomicrmw xchg i64* %a, i64 1 seq_cst
+; NOCX8: __atomic_fetch_add_8
+; CX8_64: lock incq
+; CX8_32: cmpxchg8b
+  %2 = atomicrmw add i64* %a, i64 1 seq_cst
+; NOCX8: __atomic_fetch_sub_8
+; CX8_64: lock decq
+; CX8_32: cmpxchg8b
+  %3 = atomicrmw sub i64* %a, i64 1 seq_cst
+; NOCX8: __atomic_fetch_and_8
+; CX8_64: lock andq
+; CX8_32: cmpxchg8b
+  %4 = atomicrmw and i64* %a, i64 1 seq_cst
+; NOCX8: __atomic_fetch_nand_8
+; CX8_64: cmpxchgq
+; CX8_32: cmpxchg8b
+  %5 = atomicrmw nand i64* %a, i64 1 seq_cst
+; NOCX8: __atomic_fetch_or_8
+; CX8_64: lock orq
+; CX8_32: cmpxchg8b
+  %6 = atomicrmw or i64* %a, i64 1 seq_cst
+; NOCX8: __atomic_fetch_xor_8
+; CX8_64: lock xorq
+; CX8_32: cmpxchg8b
+  %7 = atomicrmw xor i64* %a, i64 1 seq_cst
+  ret void
+}
+
+define void @test_i32(i32* %a) nounwind {
+; CHECK-LABEL: test_i32:
+entry:
+; NOCX4: __atomic_compare_exchange_4
+; CX4: lock cmpxchgl
+  %0 = cmpxchg i32* %a, i32 1, i32 1 seq_cst seq_cst
+; NOCX4: __atomic_exchange_4
+; CX4: xchgl
+  %1 = atomicrmw xchg i32* %a, i32 1 seq_cst
+; NOCX4: __atomic_fetch_add_4
+; CX4: lock incl
+  %2 = atomicrmw add i32* %a, i32 1 seq_cst
+; NOCX4: __atomic_fetch_sub_4
+; CX4: lock decl
+  %3 = atomicrmw sub i32* %a, i32 1 seq_cst
+; NOCX4: __atomic_fetch_and_4
+; CX4: lock andl
+  %4 = atomicrmw and i32* %a, i32 1 seq_cst
+; NOCX4: __atomic_fetch_nand_4
+; CX4: lock cmpxchgl
+  %5 = atomicrmw nand i32* %a, i32 1 seq_cst
+; NOCX4: __atomic_fetch_or_4
+; CX4: lock orl
+  %6 = atomicrmw or i32* %a, i32 1 seq_cst
+; NOCX4: __atomic_fetch_xor_4
+; CX4: lock xorl
+  %7 = atomicrmw xor i32* %a, i32 1 seq_cst
+  ret void
+}
Index: test/CodeGen/X86/atomic-flags.ll
===================================================================
--- test/CodeGen/X86/atomic-flags.ll
+++ test/CodeGen/X86/atomic-flags.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mcpu=i686 -verify-machineinstrs | FileCheck %s
 
 ; Make sure that flags are properly preserved despite atomic optimizations.
 
Index: test/CodeGen/X86/atomic-pointer.ll
===================================================================
--- test/CodeGen/X86/atomic-pointer.ll
+++ test/CodeGen/X86/atomic-pointer.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-none-linux -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=i686-none-linux -mcpu=i686 -verify-machineinstrs | FileCheck %s
 
 define i32* @test_atomic_ptr_load(i32** %a0) {
 ; CHECK: test_atomic_ptr_load
Index: test/CodeGen/X86/atomic_mi.ll
===================================================================
--- test/CodeGen/X86/atomic_mi.ll
+++ test/CodeGen/X86/atomic_mi.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X32
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs -mcpu=i686 | FileCheck %s --check-prefix X32
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC
 
 ; This file checks that atomic (non-seq_cst) stores of immediate values are
Index: test/CodeGen/X86/bswap.ll
===================================================================
--- test/CodeGen/X86/bswap.ll
+++ test/CodeGen/X86/bswap.ll
@@ -1,7 +1,8 @@
 ; bswap should be constant folded when it is passed a constant argument
 
-; RUN: llc < %s -march=x86 -mcpu=i686 | FileCheck %s
-; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK64
+; RUN: llc < %s -march=x86 -mcpu=i386 | FileCheck --check-prefix=CHECK386 --check-prefix=CHECK %s
+; RUNX: llc < %s -march=x86 -mcpu=i486 | FileCheck --check-prefix=CHECK486 --check-prefix=CHECKBSW --check-prefix=CHECK %s
+; RUN: llc < %s -march=x86-64 | FileCheck --check-prefix=CHECK64 --check-prefix=CHECKBSW --check-prefix=CHECK %s
 
 declare i16 @llvm.bswap.i16(i16)
 
@@ -11,30 +12,31 @@
 
 define i16 @W(i16 %A) {
 ; CHECK-LABEL: W:
-; CHECK: rolw $8, %ax
-
-; CHECK64-LABEL: W:
-; CHECK64: rolw $8, %
+; CHECK: rolw $8, %
         %Z = call i16 @llvm.bswap.i16( i16 %A )         ; <i16> [#uses=1]
         ret i16 %Z
 }
 
 define i32 @X(i32 %A) {
 ; CHECK-LABEL: X:
-; CHECK: bswapl %eax
-
-; CHECK64-LABEL: X:
-; CHECK64: bswapl %
+; CHECK386: rorw $8, %ax
+; CHECK386: rorl $16, %eax
+; CHECK386: rorw $8, %ax
+; CHECKBSW: bswapl %
         %Z = call i32 @llvm.bswap.i32( i32 %A )         ; <i32> [#uses=1]
         ret i32 %Z
 }
 
 define i64 @Y(i64 %A) {
 ; CHECK-LABEL: Y:
-; CHECK: bswapl %eax
-; CHECK: bswapl %edx
-
-; CHECK64-LABEL: Y:
+; CHECK386: rorw $8, %ax
+; CHECK386: rorl $16, %eax
+; CHECK386: rorw $8, %ax
+; CHECK386: rorw $8, %dx
+; CHECK386: rorl $16, %edx
+; CHECK386: rorw $8, %dx
+; CHECK486: bswapl %eax
+; CHECK486: bswapl %edx
 ; CHECK64: bswapq %
         %Z = call i64 @llvm.bswap.i64( i64 %A )         ; <i64> [#uses=1]
         ret i64 %Z
@@ -44,12 +46,12 @@
 define i32 @test1(i32 %a) nounwind readnone {
 entry:
 ; CHECK-LABEL: test1:
-; CHECK: bswapl [[REG:%.*]]
-; CHECK: shrl $16, [[REG]]
-
-; CHECK64-LABEL: test1:
-; CHECK64: bswapl [[REG:%.*]]
-; CHECK64: shrl $16, [[REG]]
+; CHECK386: rorw $8, %[[REG:.*]]
+; CHECK386: rorl $16, %e[[REG]]
+; CHECK386: rorw $8, %[[REG]]
+; CHECK386: shrl $16, %e[[REG]]
+; CHECKBSW: bswapl [[REG:%.*]]
+; CHECKBSW: shrl $16, [[REG]]
   %and = lshr i32 %a, 8
   %shr3 = and i32 %and, 255
   %and2 = shl i32 %a, 8
@@ -61,12 +63,12 @@
 define i32 @test2(i32 %a) nounwind readnone {
 entry:
 ; CHECK-LABEL: test2:
-; CHECK: bswapl [[REG:%.*]]
-; CHECK: sarl $16, [[REG]]
-
-; CHECK64-LABEL: test2:
-; CHECK64: bswapl [[REG:%.*]]
-; CHECK64: sarl $16, [[REG]]
+; CHECK386: rorw $8, %[[REG:.*]]
+; CHECK386: rorl $16, %e[[REG]]
+; CHECK386: rorw $8, %[[REG]]
+; CHECK386: sarl $16, %e[[REG]]
+; CHECKBSW: bswapl [[REG:%.*]]
+; CHECKBSW: sarl $16, [[REG]]
   %and = lshr i32 %a, 8
   %shr4 = and i32 %and, 255
   %and2 = shl i32 %a, 8
@@ -86,11 +88,8 @@
 define i64 @not_bswap() {
 ; CHECK-LABEL: not_bswap:
 ; CHECK-NOT: bswapl
+; CHECK-NOT: bswapq
 ; CHECK: ret
-
-; CHECK64-LABEL: not_bswap:
-; CHECK64-NOT: bswapq
-; CHECK64: ret
   %init = load i16, i16* @var16
   %big = zext i16 %init to i64
 
@@ -109,12 +108,8 @@
 define i64 @not_useful_bswap() {
 ; CHECK-LABEL: not_useful_bswap:
 ; CHECK-NOT: bswapl
+; CHECK-NOT: bswapq
 ; CHECK: ret
-
-; CHECK64-LABEL: not_useful_bswap:
-; CHECK64-NOT: bswapq
-; CHECK64: ret
-
   %init = load i8, i8* @var8
   %big = zext i8 %init to i64
 
@@ -131,11 +126,13 @@
 
 define i64 @finally_useful_bswap() {
 ; CHECK-LABEL: finally_useful_bswap:
-; CHECK: bswapl [[REG:%.*]]
-; CHECK: shrl $16, [[REG]]
-; CHECK: ret
-
-; CHECK64-LABEL: finally_useful_bswap:
+; CHECK386: rorw $8, %[[REG:.*]]
+; CHECK386: rorl $16, %e[[REG]]
+; CHECK386: rorw $8, %[[REG]]
+; CHECK386: shrl $16, %e[[REG]]
+; CHECK486: bswapl [[REG:%.*]]
+; CHECK486: shrl $16, [[REG]]
+; CHECK486: ret
 ; CHECK64: bswapq [[REG:%.*]]
 ; CHECK64: shrq $48, [[REG]]
 ; CHECK64: ret
Index: test/CodeGen/X86/cmpxchg-clobber-flags.ll
===================================================================
--- test/CodeGen/X86/cmpxchg-clobber-flags.ll
+++ test/CodeGen/X86/cmpxchg-clobber-flags.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s -check-prefix=i386
-; RUN: llc -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f
+; RUN: llc -mtriple=i386-linux-gnu -mcpu=i686 %s -o - | FileCheck %s -check-prefix=i386
+; RUN: llc -mtriple=i386-linux-gnu -mcpu=i686 -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f
 
 ; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -check-prefix=x8664
 ; RUN: llc -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664
Index: test/CodeGen/X86/nocx16.ll
===================================================================
--- test/CodeGen/X86/nocx16.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=-cx16 | FileCheck %s
-define void @test(i128* %a) nounwind {
-entry:
-; CHECK: __atomic_compare_exchange_16
-  %0 = cmpxchg i128* %a, i128 1, i128 1 seq_cst seq_cst
-; CHECK: __atomic_exchange_16
-  %1 = atomicrmw xchg i128* %a, i128 1 seq_cst
-; CHECK: __atomic_fetch_add_16
-  %2 = atomicrmw add i128* %a, i128 1 seq_cst
-; CHECK: __atomic_fetch_sub_16
-  %3 = atomicrmw sub i128* %a, i128 1 seq_cst
-; CHECK: __atomic_fetch_and_16
-  %4 = atomicrmw and i128* %a, i128 1 seq_cst
-; CHECK: __atomic_fetch_nand_16
-  %5 = atomicrmw nand i128* %a, i128 1 seq_cst
-; CHECK: __atomic_fetch_or_16
-  %6 = atomicrmw or i128* %a, i128 1 seq_cst
-; CHECK: __atomic_fetch_xor_16
-  %7 = atomicrmw xor i128* %a, i128 1 seq_cst
-  ret void
-}
Index: test/CodeGen/X86/peephole-na-phys-copy-folding.ll
===================================================================
--- test/CodeGen/X86/peephole-na-phys-copy-folding.ll
+++ test/CodeGen/X86/peephole-na-phys-copy-folding.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s
+; RUN: llc -mtriple=i686-linux-gnu -mcpu=i686 %s -o - | FileCheck %s
 ; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s
 
 ; TODO: Reenable verify-machineinstrs once the if (!AXDead) // FIXME in
Index: test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll
===================================================================
--- test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll
+++ test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S %s -atomic-expand -mtriple=i686-linux-gnu | FileCheck %s
+; RUN: opt -S %s -atomic-expand -mtriple=i686-linux-gnu -mcpu=i686 | FileCheck %s
 
 ; This file tests the function `llvm::expandAtomicRMWToCmpXchg`.
 ; It isn't technically target specific, but is exposed through a pass that is.