Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -296,29 +296,54 @@ include "X86Schedule.td" +def ProcIntelOthers : SubtargetFeature<"generic", "X86ProcFamily", + "Others", "Intel Generic processors">; def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom", "Intel Atom processors">; def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM", "Intel Silvermont processors">; def ProcIntelGLM : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM", "Intel Goldmont processors">; +def ProcIntelHSW : SubtargetFeature<"haswell", "X86ProcFamily", + "IntelHaswell", "Intel Haswell processors">; +def ProcIntelBDW : SubtargetFeature<"broadwell", "X86ProcFamily", + "IntelBroadwell", "Intel Broadwell processors">; +def ProcIntelSKL : SubtargetFeature<"skylake", "X86ProcFamily", + "IntelSkylake", "Intel Skylake processors">; +def ProcIntelKNL : SubtargetFeature<"knl", "X86ProcFamily", + "IntelKNL", "Intel Knights Landing processors">; +def ProcIntelSKX : SubtargetFeature<"skx", "X86ProcFamily", + "IntelSKX", "Intel Skylake Server processors">; +def ProcIntelCNL : SubtargetFeature<"cannonlake", "X86ProcFamily", + "IntelCannonlake", "Intel Cannonlake processors">; class Proc Features> : ProcessorModel; -def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16]>; -def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16]>; -def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16]>; -def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16]>; -def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16]>; -def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; -def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16]>; -def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>; -def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, +def : Proc<"generic", [ProcIntelOthers, FeatureX87, + FeatureSlowUAMem16]>; +def : Proc<"i386", [ProcIntelOthers, FeatureX87, + FeatureSlowUAMem16]>; +def : Proc<"i486", [ProcIntelOthers, FeatureX87, + FeatureSlowUAMem16]>; +def : Proc<"i586", [ProcIntelOthers, FeatureX87, + FeatureSlowUAMem16]>; +def : Proc<"pentium", [ProcIntelOthers, FeatureX87, + FeatureSlowUAMem16]>; +def : Proc<"pentium-mmx", [ProcIntelOthers, FeatureX87, + FeatureSlowUAMem16, FeatureMMX]>; +def : Proc<"i686", [ProcIntelOthers, FeatureX87, + FeatureSlowUAMem16]>; +def : Proc<"pentiumpro", [ProcIntelOthers, FeatureX87, + FeatureSlowUAMem16, FeatureCMOV]>; +def : Proc<"pentium2", [ProcIntelOthers, FeatureX87, + FeatureSlowUAMem16, FeatureMMX, FeatureCMOV, FeatureFXSR]>; -def : Proc<"pentium3", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, +def : Proc<"pentium3", [ProcIntelOthers, FeatureX87, + FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, FeatureFXSR]>; -def : Proc<"pentium3m", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, +def : Proc<"pentium3m", [ProcIntelOthers, FeatureX87, + FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, FeatureFXSR, FeatureSlowBTMem]>; // Enable the PostRAScheduler for SSE2 and SSE3 class cpus. @@ -332,30 +357,36 @@ // changes slightly. def : ProcessorModel<"pentium-m", GenericPostRAModel, - [FeatureX87, FeatureSlowUAMem16, FeatureMMX, + [ProcIntelOthers, FeatureX87, + FeatureSlowUAMem16, FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>; def : ProcessorModel<"pentium4", GenericPostRAModel, - [FeatureX87, FeatureSlowUAMem16, FeatureMMX, + [ProcIntelOthers, FeatureX87, + FeatureSlowUAMem16, FeatureMMX, FeatureSSE2, FeatureFXSR]>; def : ProcessorModel<"pentium4m", GenericPostRAModel, - [FeatureX87, FeatureSlowUAMem16, FeatureMMX, + [ProcIntelOthers, FeatureX87, + FeatureSlowUAMem16, FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>; // Intel Quark. -def : Proc<"lakemont", []>; +def : Proc<"lakemont", [ProcIntelOthers]>; // Intel Core Duo. def : ProcessorModel<"yonah", SandyBridgeModel, - [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, + [ProcIntelOthers, FeatureX87, + FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureSlowBTMem]>; // NetBurst. def : ProcessorModel<"prescott", GenericPostRAModel, - [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, + [ProcIntelOthers, FeatureX87, + FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureSlowBTMem]>; def : ProcessorModel<"nocona", GenericPostRAModel, [ + ProcIntelOthers, FeatureX87, FeatureSlowUAMem16, FeatureMMX, @@ -367,6 +398,7 @@ // Intel Core 2 Solo/Duo. def : ProcessorModel<"core2", SandyBridgeModel, [ + ProcIntelOthers, FeatureX87, FeatureSlowUAMem16, FeatureMMX, @@ -377,6 +409,7 @@ FeatureLAHFSAHF ]>; def : ProcessorModel<"penryn", SandyBridgeModel, [ + ProcIntelOthers, FeatureX87, FeatureSlowUAMem16, FeatureMMX, @@ -463,6 +496,7 @@ // "Arrandale" along with corei3 and corei5 class NehalemProc : ProcessorModel : ProcessorModel : ProcModel; @@ -537,6 +573,7 @@ class IvyBridgeProc : ProcModel; @@ -555,11 +592,14 @@ ]>; class HaswellProc : ProcModel; + HSWFeatures.Value, [ + ProcIntelHSW + ]>; def : HaswellProc<"haswell">; def : HaswellProc<"core-avx2">; // Legacy alias. def BDWFeatures : ProcessorFeatures; @@ -579,12 +619,15 @@ // FIXME: define SKL model class SkylakeClientProc : ProcModel; + SKLFeatures.Value, [ + ProcIntelSKL + ]>; def : SkylakeClientProc<"skylake">; // FIXME: define KNL model class KnightsLandingProc : ProcModel : ProcModel; + SKXFeatures.Value, [ + ProcIntelSKX + ]>; def : SkylakeServerProc<"skylake-avx512">; def : SkylakeServerProc<"skx">; // Legacy alias. @@ -624,7 +669,9 @@ ]>; class CannonlakeProc : ProcModel; + CNLFeatures.Value, [ + ProcIntelCNL + ]>; def : CannonlakeProc<"cannonlake">; // AMD CPUs. Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -58,7 +58,16 @@ }; enum X86ProcFamilyEnum { - Others, IntelAtom, IntelSLM, IntelGLM + Others, + IntelAtom, + IntelSLM, + IntelGLM, + IntelHaswell, + IntelBroadwell, + IntelSkylake, + IntelKNL, + IntelSKX, + IntelCannonlake }; /// X86 processor family: Intel Atom, and others @@ -332,6 +341,10 @@ /// True if compiling for 16-bit, false for 32-bit or 64-bit. bool In16BitMode; + /// Contains the Overhead of gather\scatter instructions + int GatherOverhead; + int ScatterOverhead; + X86SelectionDAGInfo TSInfo; // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which // X86TargetLowering needs. @@ -477,6 +490,8 @@ bool isPMULLDSlow() const { return IsPMULLDSlow; } bool isUnalignedMem16Slow() const { return IsUAMem16Slow; } bool isUnalignedMem32Slow() const { return IsUAMem32Slow; } + int getGatherOverhead() const { return GatherOverhead; } + int getScatterOverhead() const { return ScatterOverhead; } bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } @@ -509,6 +524,9 @@ bool isXRaySupported() const override { return is64Bit(); } + X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; } + + /// TODO: to be removed later and replaced with suitable properties bool isAtom() const { return X86ProcFamily == IntelAtom; } bool isSLM() const { return X86ProcFamily == IntelSLM; } bool useSoftFloat() const { return UseSoftFloat; } Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -263,6 +263,24 @@ else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() || isTargetKFreeBSD() || In64BitMode) stackAlignment = 16; + + switch(X86ProcFamily) { + case IntelSkylake: + GatherOverhead = 2; + ScatterOverhead = 1024; // not relevant for AVX2 + break; + case IntelSKX: + GatherOverhead = 2; + ScatterOverhead = 2; + break; + default: + // Currently picking high overheads for other targets in order not to be selected + // TODO: need to get uArch overheads for hsw\bdw + // FIXME: giving 1024 as a max int because it may overflow in the CM calucation causing a + // wrong desicion or negative values, maybe need to move to FP? + GatherOverhead = 1024; + ScatterOverhead = 1024; + } } void X86Subtarget::initializeEnvironment() {