Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -34,8 +34,16 @@ def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", "Enable X87 float instructions">; +def Feature486Insns : SubtargetFeature<"i486insns","Has486Insns", "true", + "Enable i486 instructions">; + +def Feature586Insns : SubtargetFeature<"i586insns","Has586Insns", "true", + "Enable i586 instructions", + [Feature486Insns]>; + def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", - "Enable conditional move instructions">; + "Enable conditional move instructions", + [Feature586Insns]>; def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", "Support POPCNT instruction">; @@ -264,11 +272,11 @@ def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16]>; -def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16]>; -def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16]>; -def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16]>; -def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; -def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16]>; +def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16, Feature486Insns]>; +def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16, Feature586Insns]>; +def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16, Feature586Insns]>; +def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, Feature586Insns]>; +def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, Feature586Insns]>; def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>; def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureCMOV, FeatureFXSR]>; @@ -284,7 +292,7 @@ FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>; // Intel Quark. -def : Proc<"lakemont", []>; +def : Proc<"lakemont", [Feature586Insns]>; // Intel Core Duo. def : ProcessorModel<"yonah", SandyBridgeModel, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -90,9 +90,10 @@ else setMaxAtomicSizeInBitsSupported(64); } else { - // FIXME: Check that we actually have cmpxchg (i486 or later) - // FIXME: Check that we actually have cmpxchg8b (i586 or later) - setMaxAtomicSizeInBitsSupported(64); + if (Subtarget.has586Insns()) + setMaxAtomicSizeInBitsSupported(64); // has cmpxchg8b + else if (Subtarget.has486Insns()) + setMaxAtomicSizeInBitsSupported(32); // has cmpxchg } // For 64-bit, since we have so many registers, use the ILP scheduler. @@ -29851,6 +29852,10 @@ } bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { + // If we don't have bswap available, don't do these transforms. + if (!Subtarget.has486Insns()) + return false; + InlineAsm *IA = cast(CI->getCalledValue()); std::string AsmStr = IA->getAsmString(); @@ -29866,10 +29871,6 @@ switch (AsmPieces.size()) { default: return false; case 1: - // FIXME: this should verify that we are targeting a 486 or better. If not, - // we will turn this bswap into something that will be lowered to logical - // ops instead of emitting the bswap asm. For now, we don't support 486 or - // lower so don't worry about this. // bswap $0 if (matchAsm(AsmPieces[0], {"bswap", "$0"}) || matchAsm(AsmPieces[0], {"bswapl", "$0"}) || Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -725,24 +725,28 @@ let Defs = [AL, EFLAGS], Uses = [AL] in def NAME#8 : I, TB, LOCK; + [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK, + Requires<[Has486Insns]>; let Defs = [AX, EFLAGS], Uses = [AX] in def NAME#16 : I, TB, OpSize16, LOCK; + [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize16, LOCK, + Requires<[Has486Insns]>; let Defs = [EAX, EFLAGS], Uses = [EAX] in def NAME#32 : I, TB, OpSize32, LOCK; + [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, OpSize32, LOCK, + Requires<[Has486Insns]>; let Defs = [RAX, EFLAGS], Uses = [RAX] in def NAME#64 : RI, TB, LOCK; + [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK, + Requires<[In64BitMode]>; } } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], - SchedRW = [WriteALULd, WriteRMW] in { + Predicates = [Has586Insns], SchedRW = [WriteALULd, WriteRMW] in { defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem, IIC_CMPX_LOCK_8B>; @@ -815,28 +819,28 @@ !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"), [(set GR8:$dst, (!cast(frag # "_8") addr:$ptr, GR8:$val))], - itin8>; + itin8>, Requires<[Has486Insns]>; def NAME#16 : I(frag # "_16") addr:$ptr, GR16:$val))], - itin>, OpSize16; + itin>, OpSize16, Requires<[Has486Insns]>; def NAME#32 : I(frag # "_32") addr:$ptr, GR32:$val))], - itin>, OpSize32; + itin>, OpSize32, Requires<[Has486Insns]>; def NAME#64 : RI(frag # "_64") addr:$ptr, GR64:$val))], - itin>; + itin>, Requires<[In64BitMode]>; } } @@ -1950,3 +1954,13 @@ let Predicates = [HasMOVBE] in { def : Pat<(bswap GR16:$src), (ROL16ri GR16:$src, (i8 8))>; } + +// On a 386, we expand bswap to 3 rotates after register selection. +let Predicates = [No486Insns], + Constraints = "$src = $dst", Defs = [EFLAGS], + isPseudo = 1 in { +def PSEUDO_BSWAP32r : I<0, Pseudo, + (outs GR32:$dst), (ins GR32:$src), + "bswap\t$dst", + [(set GR32:$dst, (bswap GR32:$src))]>; +} Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -5468,6 +5468,20 @@ MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0); } +static bool ExpandPSEUDO_BSWAP32r(MachineInstr *MI, + const TargetInstrInfo &TII) { + MachineBasicBlock *BB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Reg = MI->getOperand(0).getReg(); + unsigned Reg16 = getX86SubSuperRegister(Reg, 16); + BuildMI(*BB, MI, DL, TII.get(X86::ROR16ri), Reg16).addReg(Reg16).addImm(8); + BuildMI(*BB, MI, DL, TII.get(X86::ROR32ri), Reg).addReg(Reg).addImm(16); + BuildMI(*BB, MI, DL, TII.get(X86::ROR16ri), Reg16).addReg(Reg16).addImm(8); + + MI->eraseFromParent(); // The pseudo is gone now. + return true; +} + bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { bool HasAVX = Subtarget.hasAVX(); MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); @@ -5527,6 +5541,8 @@ case TargetOpcode::LOAD_STACK_GUARD: expandLoadStackGuard(MIB, *this); return true; + case X86::PSEUDO_BSWAP32r: + return ExpandPSEUDO_BSWAP32r(MIB, *this); } return false; } Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -767,6 +767,9 @@ // X86 Instruction Predicate Definitions. def TruePredicate : Predicate<"true">; +def Has486Insns : Predicate<"Subtarget->has486Insns()">; +def No486Insns : Predicate<"!Subtarget->has486Insns()">; +def Has586Insns : Predicate<"Subtarget->has586Insns()">; def HasCMov : Predicate<"Subtarget->hasCMov()">; def NoCMov : Predicate<"!Subtarget->hasCMov()">; @@ -1237,11 +1240,13 @@ def BSWAP32r : I<0xC8, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "bswap{l}\t$dst", - [(set GR32:$dst, (bswap GR32:$src))], IIC_BSWAP>, OpSize32, TB; + [(set GR32:$dst, (bswap GR32:$src))], IIC_BSWAP>, OpSize32, TB, + Requires<[Has486Insns]>; def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), "bswap{q}\t$dst", - [(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB; + [(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB, + Requires<[Has486Insns]>; } // Constraints = "$src = $dst", SchedRW // Bit scan instructions. @@ -1881,65 +1886,70 @@ let SchedRW = [WriteALU] in { def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), - "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB; + "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB, + Requires<[Has486Insns]>; def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), "xadd{w}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB, - OpSize16; + OpSize16, Requires<[Has486Insns]>; def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB, - OpSize32; + OpSize32, Requires<[Has486Insns]>; def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), - "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB; + "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB, + Requires<[In64BitMode]>; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), - "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB; + "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB, + Requires<[Has486Insns]>; def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "xadd{w}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB, - OpSize16; + OpSize16, Requires<[Has486Insns]>; def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB, - OpSize32; + OpSize32, Requires<[Has486Insns]>; def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB; + "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB, + Requires<[In64BitMode]>; } let SchedRW = [WriteALU] in { def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", [], - IIC_CMPXCHG_REG8>, TB; + IIC_CMPXCHG_REG8>, TB, Requires<[Has486Insns]>; def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), "cmpxchg{w}\t{$src, $dst|$dst, $src}", [], - IIC_CMPXCHG_REG>, TB, OpSize16; + IIC_CMPXCHG_REG>, TB, OpSize16, Requires<[Has486Insns]>; def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", [], - IIC_CMPXCHG_REG>, TB, OpSize32; + IIC_CMPXCHG_REG>, TB, OpSize32, Requires<[Has486Insns]>; def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", [], - IIC_CMPXCHG_REG>, TB; + IIC_CMPXCHG_REG>, TB, Requires<[In64BitMode]>; } // SchedRW let SchedRW = [WriteALULd, WriteRMW] in { let mayLoad = 1, mayStore = 1 in { def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", [], - IIC_CMPXCHG_MEM8>, TB; + IIC_CMPXCHG_MEM8>, TB, Requires<[Has486Insns]>; def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "cmpxchg{w}\t{$src, $dst|$dst, $src}", [], - IIC_CMPXCHG_MEM>, TB, OpSize16; + IIC_CMPXCHG_MEM>, TB, OpSize16, Requires<[Has486Insns]>; def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", [], - IIC_CMPXCHG_MEM>, TB, OpSize32; + IIC_CMPXCHG_MEM>, TB, OpSize32, Requires<[Has486Insns]>; def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", [], - IIC_CMPXCHG_MEM>, TB; + IIC_CMPXCHG_MEM>, TB, Requires<[In64BitMode]>; } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), - "cmpxchg8b\t$dst", [], IIC_CMPXCHG_8B>, TB; + "cmpxchg8b\t$dst", [], IIC_CMPXCHG_8B>, TB, + Requires<[Has586Insns]>; let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -73,6 +73,12 @@ /// True if the processor supports X87 instructions. bool HasX87; + /// Target has the instructions added with i486. + bool Has486Insns; + + /// Target has the instructions added with i586. + bool Has586Insns; + /// True if this processor has conditional move instructions /// (generally pentium pro+). bool HasCMov; @@ -374,6 +380,8 @@ void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } bool hasX87() const { return HasX87; } + bool has486Insns() const { return Has486Insns; } + bool has586Insns() const { return Has586Insns; } bool hasCMov() const { return HasCMov; } bool hasSSE1() const { return X86SSELevel >= SSE1; } bool hasSSE2() const { return X86SSELevel >= SSE2; } Index: test/CodeGen/X86/2010-10-08-cmpxchg8b.ll =================================================================== --- test/CodeGen/X86/2010-10-08-cmpxchg8b.ll +++ test/CodeGen/X86/2010-10-08-cmpxchg8b.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin | FileCheck %s +; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin -mcpu=i686 | FileCheck %s ; PR8297 ; ; On i386, i64 cmpxchg is lowered during legalize types to extract the Index: test/CodeGen/X86/atomic-cpus.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/atomic-cpus.ll @@ -0,0 +1,115 @@ +; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck --check-prefix=X64_CX16 --check-prefix=CX8_64 --check-prefix=CX4 --check-prefix=CHECK %s +; RUN: llc < %s -march=x86-64 -mcpu=x86-64 | FileCheck --check-prefix=X64_NOCX16 --check-prefix=CX8_64 --check-prefix=CX4 --check-prefix=CHECK %s +; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=-cx16 | FileCheck --check-prefix=X64_NOCX16 --check-prefix=CX8_64 --check-prefix=CX4 --check-prefix=CHECK %s +; RUN: llc < %s -march=x86 -mcpu=i586 | FileCheck --check-prefix=X32_NOCX16 --check-prefix=CX8_32 --check-prefix=CX4 --check-prefix=CHECK %s +; RUN: llc < %s -march=x86 -mcpu=i486 | FileCheck --check-prefix=X32_NOCX16 --check-prefix=NOCX8 --check-prefix=CX4 --check-prefix=CHECK %s +; RUN: llc < %s -march=x86 -mcpu=i386 | FileCheck --check-prefix=X32_NOCX16 --check-prefix=NOCX8 --check-prefix=NOCX4 --check-prefix=CHECK %s + +;; This test checks that various versions of the x86 do, or do not, +;; support native atomic instructions of different sizes. + +define void @test_i128(i128* %a) nounwind { +; CHECK-LABEL: test_i128: +entry: +; X64_NOCX16: __atomic_compare_exchange_16 +; X32_NOCX16: __atomic_compare_exchange{{$}} +; X64_CX16: cmpxchg16b + %0 = cmpxchg i128* %a, i128 1, i128 1 seq_cst seq_cst +; X64_NOCX16: __atomic_exchange_16 +; X32_NOCX16: __atomic_exchange{{$}} +; X64_CX16: cmpxchg16b + %1 = atomicrmw xchg i128* %a, i128 1 seq_cst +; X64_NOCX16: __atomic_fetch_add_16 +; X32_NOCX16: __atomic_compare_exchange{{$}} +; X64_CX16: cmpxchg16b + %2 = atomicrmw add i128* %a, i128 1 seq_cst +; X64_NOCX16: __atomic_fetch_sub_16 +; X32_NOCX16: __atomic_compare_exchange{{$}} +; X64_CX16: cmpxchg16b + %3 = atomicrmw sub i128* %a, i128 1 seq_cst +; X64_NOCX16: __atomic_fetch_and_16 +; X32_NOCX16: __atomic_compare_exchange{{$}} +; X64_CX16: cmpxchg16b + %4 = atomicrmw and i128* %a, i128 1 seq_cst +; X64_NOCX16: __atomic_fetch_nand_16 +; X32_NOCX16: __atomic_compare_exchange{{$}} +; X64_CX16: cmpxchg16b + %5 = atomicrmw nand i128* %a, i128 1 seq_cst +; X64_NOCX16: __atomic_fetch_or_16 +; X32_NOCX16: __atomic_compare_exchange{{$}} +; X64_CX16: cmpxchg16b + %6 = atomicrmw or i128* %a, i128 1 seq_cst +; X64_NOCX16: __atomic_fetch_xor_16 +; X32_NOCX16: __atomic_compare_exchange{{$}} +; X64_CX16: cmpxchg16b + %7 = atomicrmw xor i128* %a, i128 1 seq_cst + ret void +} + +define void @test_i64(i64* %a) nounwind { +; CHECK-LABEL: test_i64: +entry: +; NOCX8: __atomic_compare_exchange_8 +; CX8_64: cmpxchgq +; CX8_32: cmpxchg8b + %0 = cmpxchg i64* %a, i64 1, i64 1 seq_cst seq_cst +; NOCX8: __atomic_exchange_8 +; CX8_64: xchgq +; CX8_32: cmpxchg8b + %1 = atomicrmw xchg i64* %a, i64 1 seq_cst +; NOCX8: __atomic_fetch_add_8 +; CX8_64: lock incq +; CX8_32: cmpxchg8b + %2 = atomicrmw add i64* %a, i64 1 seq_cst +; NOCX8: __atomic_fetch_sub_8 +; CX8_64: lock decq +; CX8_32: cmpxchg8b + %3 = atomicrmw sub i64* %a, i64 1 seq_cst +; NOCX8: __atomic_fetch_and_8 +; CX8_64: lock andq +; CX8_32: cmpxchg8b + %4 = atomicrmw and i64* %a, i64 1 seq_cst +; NOCX8: __atomic_fetch_nand_8 +; CX8_64: cmpxchgq +; CX8_32: cmpxchg8b + %5 = atomicrmw nand i64* %a, i64 1 seq_cst +; NOCX8: __atomic_fetch_or_8 +; CX8_64: lock orq +; CX8_32: cmpxchg8b + %6 = atomicrmw or i64* %a, i64 1 seq_cst +; NOCX8: __atomic_fetch_xor_8 +; CX8_64: lock xorq +; CX8_32: cmpxchg8b + %7 = atomicrmw xor i64* %a, i64 1 seq_cst + ret void +} + +define void @test_i32(i32* %a) nounwind { +; CHECK-LABEL: test_i32: +entry: +; NOCX4: __atomic_compare_exchange_4 +; CX4: lock cmpxchgl + %0 = cmpxchg i32* %a, i32 1, i32 1 seq_cst seq_cst +; NOCX4: __atomic_exchange_4 +; CX4: xchgl + %1 = atomicrmw xchg i32* %a, i32 1 seq_cst +; NOCX4: __atomic_fetch_add_4 +; CX4: lock incl + %2 = atomicrmw add i32* %a, i32 1 seq_cst +; NOCX4: __atomic_fetch_sub_4 +; CX4: lock decl + %3 = atomicrmw sub i32* %a, i32 1 seq_cst +; NOCX4: __atomic_fetch_and_4 +; CX4: lock andl + %4 = atomicrmw and i32* %a, i32 1 seq_cst +; NOCX4: __atomic_fetch_nand_4 +; CX4: lock cmpxchgl + %5 = atomicrmw nand i32* %a, i32 1 seq_cst +; NOCX4: __atomic_fetch_or_4 +; CX4: lock orl + %6 = atomicrmw or i32* %a, i32 1 seq_cst +; NOCX4: __atomic_fetch_xor_4 +; CX4: lock xorl + %7 = atomicrmw xor i32* %a, i32 1 seq_cst + ret void +} Index: test/CodeGen/X86/atomic-flags.ll =================================================================== --- test/CodeGen/X86/atomic-flags.ll +++ test/CodeGen/X86/atomic-flags.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=i686-unknown-unknown -mcpu=i686 -verify-machineinstrs | FileCheck %s ; Make sure that flags are properly preserved despite atomic optimizations. Index: test/CodeGen/X86/atomic-pointer.ll =================================================================== --- test/CodeGen/X86/atomic-pointer.ll +++ test/CodeGen/X86/atomic-pointer.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i686-none-linux -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=i686-none-linux -mcpu=i686 -verify-machineinstrs | FileCheck %s define i32* @test_atomic_ptr_load(i32** %a0) { ; CHECK: test_atomic_ptr_load Index: test/CodeGen/X86/atomic_mi.ll =================================================================== --- test/CodeGen/X86/atomic_mi.ll +++ test/CodeGen/X86/atomic_mi.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X64 -; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X32 +; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs -mcpu=i686 | FileCheck %s --check-prefix X32 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC ; This file checks that atomic (non-seq_cst) stores of immediate values are Index: test/CodeGen/X86/bswap.ll =================================================================== --- test/CodeGen/X86/bswap.ll +++ test/CodeGen/X86/bswap.ll @@ -1,7 +1,8 @@ ; bswap should be constant folded when it is passed a constant argument -; RUN: llc < %s -march=x86 -mcpu=i686 | FileCheck %s -; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK64 +; RUN: llc < %s -march=x86 -mcpu=i386 | FileCheck --check-prefix=CHECK386 --check-prefix=CHECK %s +; RUNX: llc < %s -march=x86 -mcpu=i486 | FileCheck --check-prefix=CHECK486 --check-prefix=CHECKBSW --check-prefix=CHECK %s +; RUN: llc < %s -march=x86-64 | FileCheck --check-prefix=CHECK64 --check-prefix=CHECKBSW --check-prefix=CHECK %s declare i16 @llvm.bswap.i16(i16) @@ -11,30 +12,31 @@ define i16 @W(i16 %A) { ; CHECK-LABEL: W: -; CHECK: rolw $8, %ax - -; CHECK64-LABEL: W: -; CHECK64: rolw $8, % +; CHECK: rolw $8, % %Z = call i16 @llvm.bswap.i16( i16 %A ) ; [#uses=1] ret i16 %Z } define i32 @X(i32 %A) { ; CHECK-LABEL: X: -; CHECK: bswapl %eax - -; CHECK64-LABEL: X: -; CHECK64: bswapl % +; CHECK386: rorw $8, %ax +; CHECK386: rorl $16, %eax +; CHECK386: rorw $8, %ax +; CHECKBSW: bswapl % %Z = call i32 @llvm.bswap.i32( i32 %A ) ; [#uses=1] ret i32 %Z } define i64 @Y(i64 %A) { ; CHECK-LABEL: Y: -; CHECK: bswapl %eax -; CHECK: bswapl %edx - -; CHECK64-LABEL: Y: +; CHECK386: rorw $8, %ax +; CHECK386: rorl $16, %eax +; CHECK386: rorw $8, %ax +; CHECK386: rorw $8, %dx +; CHECK386: rorl $16, %edx +; CHECK386: rorw $8, %dx +; CHECK486: bswapl %eax +; CHECK486: bswapl %edx ; CHECK64: bswapq % %Z = call i64 @llvm.bswap.i64( i64 %A ) ; [#uses=1] ret i64 %Z @@ -44,12 +46,12 @@ define i32 @test1(i32 %a) nounwind readnone { entry: ; CHECK-LABEL: test1: -; CHECK: bswapl [[REG:%.*]] -; CHECK: shrl $16, [[REG]] - -; CHECK64-LABEL: test1: -; CHECK64: bswapl [[REG:%.*]] -; CHECK64: shrl $16, [[REG]] +; CHECK386: rorw $8, %[[REG:.*]] +; CHECK386: rorl $16, %e[[REG]] +; CHECK386: rorw $8, %[[REG]] +; CHECK386: shrl $16, %e[[REG]] +; CHECKBSW: bswapl [[REG:%.*]] +; CHECKBSW: shrl $16, [[REG]] %and = lshr i32 %a, 8 %shr3 = and i32 %and, 255 %and2 = shl i32 %a, 8 @@ -61,12 +63,12 @@ define i32 @test2(i32 %a) nounwind readnone { entry: ; CHECK-LABEL: test2: -; CHECK: bswapl [[REG:%.*]] -; CHECK: sarl $16, [[REG]] - -; CHECK64-LABEL: test2: -; CHECK64: bswapl [[REG:%.*]] -; CHECK64: sarl $16, [[REG]] +; CHECK386: rorw $8, %[[REG:.*]] +; CHECK386: rorl $16, %e[[REG]] +; CHECK386: rorw $8, %[[REG]] +; CHECK386: sarl $16, %e[[REG]] +; CHECKBSW: bswapl [[REG:%.*]] +; CHECKBSW: sarl $16, [[REG]] %and = lshr i32 %a, 8 %shr4 = and i32 %and, 255 %and2 = shl i32 %a, 8 @@ -86,11 +88,8 @@ define i64 @not_bswap() { ; CHECK-LABEL: not_bswap: ; CHECK-NOT: bswapl +; CHECK-NOT: bswapq ; CHECK: ret - -; CHECK64-LABEL: not_bswap: -; CHECK64-NOT: bswapq -; CHECK64: ret %init = load i16, i16* @var16 %big = zext i16 %init to i64 @@ -109,12 +108,8 @@ define i64 @not_useful_bswap() { ; CHECK-LABEL: not_useful_bswap: ; CHECK-NOT: bswapl +; CHECK-NOT: bswapq ; CHECK: ret - -; CHECK64-LABEL: not_useful_bswap: -; CHECK64-NOT: bswapq -; CHECK64: ret - %init = load i8, i8* @var8 %big = zext i8 %init to i64 @@ -131,11 +126,13 @@ define i64 @finally_useful_bswap() { ; CHECK-LABEL: finally_useful_bswap: -; CHECK: bswapl [[REG:%.*]] -; CHECK: shrl $16, [[REG]] -; CHECK: ret - -; CHECK64-LABEL: finally_useful_bswap: +; CHECK386: rorw $8, %[[REG:.*]] +; CHECK386: rorl $16, %e[[REG]] +; CHECK386: rorw $8, %[[REG]] +; CHECK386: shrl $16, %e[[REG]] +; CHECK486: bswapl [[REG:%.*]] +; CHECK486: shrl $16, [[REG]] +; CHECK486: ret ; CHECK64: bswapq [[REG:%.*]] ; CHECK64: shrq $48, [[REG]] ; CHECK64: ret Index: test/CodeGen/X86/cmpxchg-clobber-flags.ll =================================================================== --- test/CodeGen/X86/cmpxchg-clobber-flags.ll +++ test/CodeGen/X86/cmpxchg-clobber-flags.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s -check-prefix=i386 -; RUN: llc -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f +; RUN: llc -mtriple=i386-linux-gnu -mcpu=i686 %s -o - | FileCheck %s -check-prefix=i386 +; RUN: llc -mtriple=i386-linux-gnu -mcpu=i686 -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f ; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -check-prefix=x8664 ; RUN: llc -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664 Index: test/CodeGen/X86/nocx16.ll =================================================================== --- test/CodeGen/X86/nocx16.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=-cx16 | FileCheck %s -define void @test(i128* %a) nounwind { -entry: -; CHECK: __atomic_compare_exchange_16 - %0 = cmpxchg i128* %a, i128 1, i128 1 seq_cst seq_cst -; CHECK: __atomic_exchange_16 - %1 = atomicrmw xchg i128* %a, i128 1 seq_cst -; CHECK: __atomic_fetch_add_16 - %2 = atomicrmw add i128* %a, i128 1 seq_cst -; CHECK: __atomic_fetch_sub_16 - %3 = atomicrmw sub i128* %a, i128 1 seq_cst -; CHECK: __atomic_fetch_and_16 - %4 = atomicrmw and i128* %a, i128 1 seq_cst -; CHECK: __atomic_fetch_nand_16 - %5 = atomicrmw nand i128* %a, i128 1 seq_cst -; CHECK: __atomic_fetch_or_16 - %6 = atomicrmw or i128* %a, i128 1 seq_cst -; CHECK: __atomic_fetch_xor_16 - %7 = atomicrmw xor i128* %a, i128 1 seq_cst - ret void -} Index: test/CodeGen/X86/peephole-na-phys-copy-folding.ll =================================================================== --- test/CodeGen/X86/peephole-na-phys-copy-folding.ll +++ test/CodeGen/X86/peephole-na-phys-copy-folding.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s +; RUN: llc -mtriple=i686-linux-gnu -mcpu=i686 %s -o - | FileCheck %s ; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s ; TODO: Reenable verify-machineinstrs once the if (!AXDead) // FIXME in Index: test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll =================================================================== --- test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll +++ test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll @@ -1,4 +1,4 @@ -; RUN: opt -S %s -atomic-expand -mtriple=i686-linux-gnu | FileCheck %s +; RUN: opt -S %s -atomic-expand -mtriple=i686-linux-gnu -mcpu=i686 | FileCheck %s ; This file tests the function `llvm::expandAtomicRMWToCmpXchg`. ; It isn't technically target specific, but is exposed through a pass that is.