diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -707,25 +707,76 @@ //===----------------------------------------------------------------------===// def ProcessorFeatures { - // x86-64 and x86-64-v[234] + // x86-64 micro-architecture levels: x86-64 and x86-64-v[234] list X86_64V1Features = [ FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureX86_64, ]; + list X86_64V1Tuning = [ + TuningMacroFusion, + TuningSlow3OpsLEA, + TuningSlowDivide64, + TuningSlowIncDec, + TuningInsertVZEROUPPER + ]; + list X86_64V2Features = !listconcat(X86_64V1Features, [ FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT, FeatureSSE42 ]); + list X86_64V2Tuning = [ + TuningMacroFusion, + TuningSlow3OpsLEA, + TuningSlowDivide64, + TuningSlowUAMem32, + TuningFastScalarFSQRT, + TuningFastSHLDRotate, + TuningFast15ByteNOP, + TuningPOPCNTFalseDeps, + TuningInsertVZEROUPPER + ]; + list X86_64V3Features = !listconcat(X86_64V2Features, [ FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT, FeatureMOVBE, FeatureXSAVE ]); + list X86_64V3Tuning = [ + TuningMacroFusion, + TuningSlow3OpsLEA, + TuningSlowDivide64, + TuningFastScalarFSQRT, + TuningFastSHLDRotate, + TuningFast15ByteNOP, + TuningFastVariableCrossLaneShuffle, + TuningFastVariablePerLaneShuffle, + TuningPOPCNTFalseDeps, + TuningLZCNTFalseDeps, + TuningInsertVZEROUPPER, + TuningAllowLight256Bit + ]; + list X86_64V4Features = !listconcat(X86_64V3Features, [ FeatureBWI, FeatureCDI, FeatureDQI, FeatureVLX, ]); + list X86_64V4Tuning = [ + TuningMacroFusion, + TuningSlow3OpsLEA, + TuningSlowDivide64, + TuningFastScalarFSQRT, + TuningFastVectorFSQRT, + TuningFastSHLDRotate, + TuningFast15ByteNOP, + TuningFastVariableCrossLaneShuffle, + TuningFastVariablePerLaneShuffle, + TuningPrefer256Bit, + TuningFastGather, + TuningPOPCNTFalseDeps, + TuningInsertVZEROUPPER, + TuningAllowLight256Bit + ]; // Nehalem list NHMFeatures = X86_64V2Features; @@ -1654,23 +1705,16 @@ // knobs which need to be tuned differently for AMD chips, we might consider // forming a common base for them. def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features, -[ - TuningSlow3OpsLEA, - TuningSlowDivide64, - TuningSlowIncDec, - TuningMacroFusion, - TuningInsertVZEROUPPER -]>; - -// x86-64 micro-architecture levels. + ProcessorFeatures.X86_64V1Tuning>; +// Close to Sandybridge. def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features, - ProcessorFeatures.SNBTuning>; + ProcessorFeatures.X86_64V2Tuning>; // Close to Haswell. def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features, - ProcessorFeatures.HSWTuning>; + ProcessorFeatures.X86_64V3Tuning>; // Close to the AVX-512 level implemented by Xeon Scalable Processors. def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features, - ProcessorFeatures.SKXTuning>; + ProcessorFeatures.X86_64V4Tuning>; //===----------------------------------------------------------------------===// // Calling Conventions