diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -39,8 +39,8 @@ def FeatureCMOV : SubtargetFeature<"cmov","HasCMOV", "true", "Enable conditional move instructions">; -def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCMPXCHG8B", "true", - "Support CMPXCHG8B instructions">; +def FeatureCX8 : SubtargetFeature<"cx8", "HasCX8", "true", + "Support CMPXCHG8B instructions">; def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true", "Enable SSE 4.2 CRC32 instruction">; @@ -100,9 +100,9 @@ // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode. def FeatureX86_64 : SubtargetFeature<"64bit", "HasX86_64", "true", "Support 64-bit instructions">; -def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCMPXCHG16B", "true", - "64-bit with cmpxchg16b", - [FeatureCMPXCHG8B]>; +def FeatureCX16 : SubtargetFeature<"cx16", "HasCX16", "true", + "64-bit with cmpxchg16b", + [FeatureCX8]>; def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", "Support SSE 4a instructions", [FeatureSSE3]>; @@ -636,11 +636,11 @@ def ProcessorFeatures { // x86-64 and x86-64-v[234] list X86_64V1Features = [ - FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2, + FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureX86_64, ]; list X86_64V2Features = !listconcat(X86_64V1Features, [ - FeatureCMPXCHG16B, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT, + FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT, FeatureSSE42 ]); list X86_64V3Features = !listconcat(X86_64V2Features, [ @@ -872,14 +872,14 @@ // Atom list AtomFeatures = [FeatureX87, - FeatureCMPXCHG8B, + FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSSE3, FeatureFXSR, FeatureNOPL, FeatureX86_64, - FeatureCMPXCHG16B, + FeatureCX16, FeatureMOVBE, FeatureLAHFSAHF64]; list AtomTuning = [ProcIntelAtom, @@ -978,13 +978,13 @@ // Knights Landing list KNLFeatures = [FeatureX87, - FeatureCMPXCHG8B, + FeatureCX8, FeatureCMOV, FeatureMMX, FeatureFXSR, FeatureNOPL, FeatureX86_64, - FeatureCMPXCHG16B, + FeatureCX16, FeatureCRC32, FeaturePOPCNT, FeaturePCLMUL, @@ -1022,12 +1022,12 @@ // Barcelona list BarcelonaFeatures = [FeatureX87, - FeatureCMPXCHG8B, + FeatureCX8, FeatureSSE4A, Feature3DNowA, FeatureFXSR, FeatureNOPL, - FeatureCMPXCHG16B, + FeatureCX16, FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT, @@ -1041,7 +1041,7 @@ // Bobcat list BtVer1Features = [FeatureX87, - FeatureCMPXCHG8B, + FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSSE3, @@ -1049,7 +1049,7 @@ FeatureFXSR, FeatureNOPL, FeatureX86_64, - FeatureCMPXCHG16B, + FeatureCX16, FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT, @@ -1085,11 +1085,11 @@ // Bulldozer list BdVer1Features = [FeatureX87, - FeatureCMPXCHG8B, + FeatureCX8, FeatureCMOV, FeatureXOP, FeatureX86_64, - FeatureCMPXCHG16B, + FeatureCX16, FeatureAES, FeatureCRC32, FeaturePRFCHW, @@ -1149,7 +1149,7 @@ FeatureCLZERO, FeatureCMOV, FeatureX86_64, - FeatureCMPXCHG16B, + FeatureCX16, FeatureCRC32, FeatureF16C, FeatureFMA, @@ -1226,7 +1226,7 @@ // most common X86 processors. The tunings might be changed over time. It is // recommended to use "x86-64" in lit tests for consistency. def : ProcModel<"generic", SandyBridgeModel, - [FeatureX87, FeatureCMPXCHG8B, FeatureX86_64], + [FeatureX87, FeatureCX8, FeatureX86_64], [TuningSlow3OpsLEA, TuningSlowDivide64, TuningMacroFusion, @@ -1238,25 +1238,25 @@ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; def : Proc<"i486", [FeatureX87], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"i586", [FeatureX87, FeatureCMPXCHG8B], +def : Proc<"i586", [FeatureX87, FeatureCX8], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"pentium", [FeatureX87, FeatureCMPXCHG8B], +def : Proc<"pentium", [FeatureX87, FeatureCX8], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"pentium-mmx", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX], +def : Proc<"pentium-mmx", [FeatureX87, FeatureCX8, FeatureMMX], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"i686", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV], +def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"pentiumpro", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, +def : Proc<"pentiumpro", [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"pentium2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureCMOV, +def : Proc<"pentium2", [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV, FeatureFXSR, FeatureNOPL], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; foreach P = ["pentium3", "pentium3m"] in { - def : Proc; } @@ -1272,42 +1272,42 @@ // changes slightly. def : ProcModel<"pentium-m", GenericPostRAModel, - [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2, + [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; foreach P = ["pentium4", "pentium4m"] in { def : ProcModel; } // Intel Quark. -def : Proc<"lakemont", [FeatureCMPXCHG8B], +def : Proc<"lakemont", [FeatureCX8], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; // Intel Core Duo. def : ProcModel<"yonah", SandyBridgeModel, - [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3, + [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, FeatureCMOV], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; // NetBurst. def : ProcModel<"prescott", GenericPostRAModel, - [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3, + [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, FeatureCMOV], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; def : ProcModel<"nocona", GenericPostRAModel, [ FeatureX87, - FeatureCMPXCHG8B, + FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, FeatureX86_64, - FeatureCMPXCHG16B, + FeatureCX16, ], [ TuningSlowUAMem16, @@ -1317,14 +1317,14 @@ // Intel Core 2 Solo/Duo. def : ProcModel<"core2", SandyBridgeModel, [ FeatureX87, - FeatureCMPXCHG8B, + FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSSE3, FeatureFXSR, FeatureNOPL, FeatureX86_64, - FeatureCMPXCHG16B, + FeatureCX16, FeatureLAHFSAHF64 ], [ @@ -1334,14 +1334,14 @@ ]>; def : ProcModel<"penryn", SandyBridgeModel, [ FeatureX87, - FeatureCMPXCHG8B, + FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE41, FeatureFXSR, FeatureNOPL, FeatureX86_64, - FeatureCMPXCHG16B, + FeatureCX16, FeatureLAHFSAHF64 ], [ @@ -1431,35 +1431,35 @@ // AMD CPUs. -def : Proc<"k6", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX], +def : Proc<"k6", [FeatureX87, FeatureCX8, FeatureMMX], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"k6-2", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow], +def : Proc<"k6-2", [FeatureX87, FeatureCX8, Feature3DNow], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"k6-3", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow], +def : Proc<"k6-3", [FeatureX87, FeatureCX8, Feature3DNow], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; foreach P = ["athlon", "athlon-tbird"] in { - def : Proc; } foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { - def : Proc; } foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { - def : Proc; } foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { - def : Proc; @@ -1497,7 +1497,7 @@ def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features, ProcessorFeatures.ZN3Tuning>; -def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA], +def : Proc<"geode", [FeatureX87, FeatureCX8, Feature3DNowA], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; def : Proc<"winchip-c6", [FeatureX87, FeatureMMX], @@ -1506,7 +1506,7 @@ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; def : Proc<"c3", [FeatureX87, Feature3DNow], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, +def : Proc<"c3-2", [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE1, FeatureFXSR, FeatureCMOV], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -170,7 +170,7 @@ // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b. // FIXME: Should we be limiting the atomic size on other configs? Default is // 1024. - if (!Subtarget.hasCMPXCHG8B()) + if (!Subtarget.canUseCMPXCHG8B()) setMaxAtomicSizeInBitsSupported(32); // Set up the register classes. @@ -516,9 +516,8 @@ if (!Subtarget.is64Bit()) setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); - if (Subtarget.hasCMPXCHG16B()) { + if (Subtarget.canUseCMPXCHG16B()) setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); - } // FIXME - use subtarget debug flags if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() && @@ -30362,9 +30361,9 @@ unsigned OpWidth = MemType->getPrimitiveSizeInBits(); if (OpWidth == 64) - return Subtarget.hasCMPXCHG8B() && !Subtarget.is64Bit(); + return Subtarget.canUseCMPXCHG8B() && !Subtarget.is64Bit(); if (OpWidth == 128) - return Subtarget.hasCMPXCHG16B(); + return Subtarget.canUseCMPXCHG16B(); return false; } @@ -32607,7 +32606,7 @@ EVT T = N->getValueType(0); assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair"); bool Regs64bit = T == MVT::i128; - assert((!Regs64bit || Subtarget.hasCMPXCHG16B()) && + assert((!Regs64bit || Subtarget.canUseCMPXCHG16B()) && "64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS requires CMPXCHG16B"); MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32; SDValue cpInL, cpInH; diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -895,7 +895,7 @@ } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], - Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW], + Predicates = [HasCX8], SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, usesCustomInserter = 1 in { def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), "cmpxchg8b\t$ptr", @@ -903,7 +903,7 @@ } let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], - Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW], + Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in { def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr), "cmpxchg16b\t$ptr", @@ -930,7 +930,7 @@ // the instruction and we are sure we will have a valid register to restore // the value of RBX. let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX], - Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW], + Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0, Constraints = "$rbx_save = $dst" in { @@ -942,7 +942,7 @@ // Pseudo instruction that doesn't read/write RBX. Will be turned into either // LCMPXCHG16B_SAVE_RBX or LCMPXCHG16B via a custom inserter. let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RCX, RDX], - Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW], + Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0, usesCustomInserter = 1 in { diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -981,8 +981,8 @@ def HasRDPID : Predicate<"Subtarget->hasRDPID()">; def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">; def HasINVPCID : Predicate<"Subtarget->hasINVPCID()">; -def HasCmpxchg8b : Predicate<"Subtarget->hasCMPXCHG8B()">; -def HasCmpxchg16b: Predicate<"Subtarget->hasCMPXCHG16B()">; +def HasCX8 : Predicate<"Subtarget->hasCX8()">; +def HasCX16 : Predicate<"Subtarget->hasCX16()">; def HasPCONFIG : Predicate<"Subtarget->hasPCONFIG()">; def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">; def HasKL : Predicate<"Subtarget->hasKL()">; @@ -2229,13 +2229,13 @@ let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), - "cmpxchg8b\t$dst", []>, TB, Requires<[HasCmpxchg8b]>; + "cmpxchg8b\t$dst", []>, TB, Requires<[HasCX8]>; let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in // NOTE: In64BitMode check needed for the AssemblerPredicate. def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), "cmpxchg16b\t$dst", []>, - TB, Requires<[HasCmpxchg16b,In64BitMode]>; + TB, Requires<[HasCX16,In64BitMode]>; } // SchedRW, mayLoad, mayStore, hasSideEffects diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -76,7 +76,7 @@ bool HasX87 = false; /// True if the processor supports CMPXCHG8B. - bool HasCMPXCHG8B = false; + bool HasCX8 = false; /// True if this processor has NOPL instruction /// (generally pentium pro+). @@ -227,7 +227,7 @@ /// True if this processor has the CMPXCHG16B instruction; /// this is true for most x86-64 chips, but not the first AMD chips. - bool HasCMPXCHG16B = false; + bool HasCX16 = false; /// True if the LEA instruction should be used for adjusting /// the stack pointer. This is an optimization for Intel Atom processors. @@ -632,7 +632,13 @@ void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } bool hasX87() const { return HasX87; } - bool hasCMPXCHG8B() const { return HasCMPXCHG8B; } + bool hasCX8() const { return HasCX8; } + bool hasCX16() const { return HasCX16; } + bool canUseCMPXCHG8B() const { return hasCX8(); } + bool canUseCMPXCHG16B() const { + // CX16 is just the CPUID bit, instruction requires 64-bit mode too. + return hasCX16() && is64Bit(); + } bool hasNOPL() const { return HasNOPL; } // SSE codegen depends on cmovs, and all SSE1+ processors support them. // All 64-bit processors support cmov. @@ -712,7 +718,6 @@ bool isUnalignedMem16Slow() const { return IsUnalignedMem16Slow; } bool isUnalignedMem32Slow() const { return IsUnalignedMem32Slow; } bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } - bool hasCMPXCHG16B() const { return HasCMPXCHG16B && is64Bit(); } bool useLeaForSP() const { return UseLeaForSP; } bool hasPOPCNTFalseDeps() const { return HasPOPCNTFalseDeps; } bool hasLZCNTFalseDeps() const { return HasLZCNTFalseDeps; } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -42,7 +42,7 @@ // These features don't have any intrinsics or ABI effect. X86::FeatureNOPL, - X86::FeatureCMPXCHG16B, + X86::FeatureCX16, X86::FeatureLAHFSAHF64, // Some older targets can be setup to fold unaligned loads.