Index: llvm/lib/Target/X86/X86.td =================================================================== --- llvm/lib/Target/X86/X86.td +++ llvm/lib/Target/X86/X86.td @@ -547,57 +547,49 @@ def ProcessorFeatures { // Nehalem - list NHMInheritableFeatures = [FeatureX87, - FeatureCMPXCHG8B, - FeatureCMOV, - FeatureMMX, - FeatureSSE42, - FeatureFXSR, - FeatureNOPL, - Feature64Bit, - FeatureCMPXCHG16B, - FeaturePOPCNT, - FeatureLAHFSAHF, - FeatureMacroFusion, - FeatureInsertVZEROUPPER]; - list NHMSpecificFeatures = []; - list NHMFeatures = - !listconcat(NHMInheritableFeatures, NHMSpecificFeatures); + list NHMFeatures = [FeatureX87, + FeatureCMPXCHG8B, + FeatureCMOV, + FeatureMMX, + FeatureSSE42, + FeatureFXSR, + FeatureNOPL, + Feature64Bit, + FeatureCMPXCHG16B, + FeaturePOPCNT, + FeatureLAHFSAHF]; + list NHMTuning = [FeatureMacroFusion, + FeatureInsertVZEROUPPER]; // Westmere list WSMAdditionalFeatures = [FeaturePCLMUL]; - list WSMSpecificFeatures = []; - list WSMInheritableFeatures = - !listconcat(NHMInheritableFeatures, WSMAdditionalFeatures); + list WSMTuning = NHMTuning; list WSMFeatures = - !listconcat(WSMInheritableFeatures, WSMSpecificFeatures); + !listconcat(NHMFeatures, WSMAdditionalFeatures); // Sandybridge list SNBAdditionalFeatures = [FeatureAVX, - FeatureSlowDivide64, FeatureXSAVE, - FeatureXSAVEOPT, - FeatureSlow3OpsLEA, - FeatureFastScalarFSQRT, - FeatureFastSHLDRotate, - FeatureFast15ByteNOP]; - list SNBSpecificFeatures = [FeatureSlowUAMem32, - FeaturePOPCNTFalseDeps]; - list SNBInheritableFeatures = - !listconcat(WSMInheritableFeatures, SNBAdditionalFeatures); + FeatureXSAVEOPT]; + list SNBTuning = [FeatureMacroFusion, + FeatureSlow3OpsLEA, + FeatureSlowDivide64, + FeatureSlowUAMem32, + FeatureFastScalarFSQRT, + FeatureFastSHLDRotate, + FeatureFast15ByteNOP, + FeaturePOPCNTFalseDeps, + FeatureInsertVZEROUPPER]; list SNBFeatures = - !listconcat(SNBInheritableFeatures, SNBSpecificFeatures); + !listconcat(WSMFeatures, SNBAdditionalFeatures); // Ivybridge list IVBAdditionalFeatures = [FeatureRDRAND, FeatureF16C, FeatureFSGSBase]; - list IVBSpecificFeatures = [FeatureSlowUAMem32, - FeaturePOPCNTFalseDeps]; - list IVBInheritableFeatures = - !listconcat(SNBInheritableFeatures, IVBAdditionalFeatures); + list IVBTuning = SNBTuning; list IVBFeatures = - !listconcat(IVBInheritableFeatures, IVBSpecificFeatures); + !listconcat(SNBFeatures, IVBAdditionalFeatures); // Haswell list HSWAdditionalFeatures = [FeatureAVX2, @@ -607,77 +599,89 @@ FeatureFMA, FeatureINVPCID, FeatureLZCNT, - FeatureMOVBE, - FeatureFastVariableShuffle]; - list HSWSpecificFeatures = [FeaturePOPCNTFalseDeps, - FeatureLZCNTFalseDeps]; - list HSWInheritableFeatures = - !listconcat(IVBInheritableFeatures, HSWAdditionalFeatures); + FeatureMOVBE]; + list HSWTuning = [FeatureMacroFusion, + FeatureSlow3OpsLEA, + FeatureSlowDivide64, + FeatureFastScalarFSQRT, + FeatureFastSHLDRotate, + FeatureFast15ByteNOP, + FeatureFastVariableShuffle, + FeaturePOPCNTFalseDeps, + FeatureLZCNTFalseDeps, + FeatureInsertVZEROUPPER]; list HSWFeatures = - !listconcat(HSWInheritableFeatures, HSWSpecificFeatures); + !listconcat(IVBFeatures, HSWAdditionalFeatures); // Broadwell list BDWAdditionalFeatures = [FeatureADX, FeatureRDSEED, FeaturePRFCHW]; - list BDWSpecificFeatures = [FeaturePOPCNTFalseDeps, - FeatureLZCNTFalseDeps]; - list BDWInheritableFeatures = - !listconcat(HSWInheritableFeatures, BDWAdditionalFeatures); + list BDWTuning = HSWTuning; list BDWFeatures = - !listconcat(BDWInheritableFeatures, BDWSpecificFeatures); + !listconcat(HSWFeatures, BDWAdditionalFeatures); // Skylake list SKLAdditionalFeatures = [FeatureAES, FeatureXSAVEC, FeatureXSAVES, FeatureCLFLUSHOPT, - FeatureFastVectorFSQRT]; - list SKLSpecificFeatures = [FeatureHasFastGather, - FeaturePOPCNTFalseDeps, - FeatureSGX]; - list SKLInheritableFeatures = - !listconcat(BDWInheritableFeatures, SKLAdditionalFeatures); + FeatureSGX]; + list SKLTuning = [FeatureHasFastGather, + FeatureMacroFusion, + FeatureSlow3OpsLEA, + FeatureSlowDivide64, + FeatureFastScalarFSQRT, + FeatureFastVectorFSQRT, + FeatureFastSHLDRotate, + FeatureFast15ByteNOP, + FeatureFastVariableShuffle, + FeaturePOPCNTFalseDeps, + FeatureInsertVZEROUPPER]; list SKLFeatures = - !listconcat(SKLInheritableFeatures, SKLSpecificFeatures); + !listconcat(BDWFeatures, SKLAdditionalFeatures); // Skylake-AVX512 - list SKXAdditionalFeatures = [FeatureAVX512, - FeaturePrefer256Bit, + list SKXAdditionalFeatures = [FeatureAES, + FeatureXSAVEC, + FeatureXSAVES, + FeatureCLFLUSHOPT, + FeatureAVX512, FeatureCDI, FeatureDQI, FeatureBWI, FeatureVLX, FeaturePKU, FeatureCLWB]; - list SKXSpecificFeatures = [FeatureHasFastGather, - FeaturePOPCNTFalseDeps]; - list SKXInheritableFeatures = - !listconcat(SKLInheritableFeatures, SKXAdditionalFeatures); + list SKXTuning = [FeatureHasFastGather, + FeatureMacroFusion, + FeatureSlow3OpsLEA, + FeatureSlowDivide64, + FeatureFastScalarFSQRT, + FeatureFastVectorFSQRT, + FeatureFastSHLDRotate, + FeatureFast15ByteNOP, + FeatureFastVariableShuffle, + FeaturePrefer256Bit, + FeaturePOPCNTFalseDeps, + FeatureInsertVZEROUPPER]; list SKXFeatures = - !listconcat(SKXInheritableFeatures, SKXSpecificFeatures); + !listconcat(BDWFeatures, SKXAdditionalFeatures); // Cascadelake list CLXAdditionalFeatures = [FeatureVNNI]; - list CLXSpecificFeatures = [FeatureHasFastGather, - FeaturePOPCNTFalseDeps]; - list CLXInheritableFeatures = - !listconcat(SKXInheritableFeatures, CLXAdditionalFeatures); + list CLXTuning = SKXTuning; list CLXFeatures = - !listconcat(CLXInheritableFeatures, CLXSpecificFeatures); + !listconcat(SKXFeatures, CLXAdditionalFeatures); // Cooperlake list CPXAdditionalFeatures = [FeatureBF16]; - list CPXSpecificFeatures = [FeatureHasFastGather, - FeaturePOPCNTFalseDeps]; - list CPXInheritableFeatures = - !listconcat(CLXInheritableFeatures, CPXAdditionalFeatures); + list CPXTuning = SKXTuning; list CPXFeatures = - !listconcat(CPXInheritableFeatures, CPXSpecificFeatures); + !listconcat(CLXFeatures, CPXAdditionalFeatures); // Cannonlake list CNLAdditionalFeatures = [FeatureAVX512, - FeaturePrefer256Bit, FeatureCDI, FeatureDQI, FeatureBWI, @@ -685,13 +689,20 @@ FeaturePKU, FeatureVBMI, FeatureIFMA, - FeatureSHA, - FeatureSGX]; - list CNLSpecificFeatures = [FeatureHasFastGather]; - list CNLInheritableFeatures = - !listconcat(SKLInheritableFeatures, CNLAdditionalFeatures); + FeatureSHA]; + list CNLTuning = [FeatureHasFastGather, + FeatureMacroFusion, + FeatureSlow3OpsLEA, + FeatureSlowDivide64, + FeatureFastScalarFSQRT, + FeatureFastVectorFSQRT, + FeatureFastSHLDRotate, + FeatureFast15ByteNOP, + FeatureFastVariableShuffle, + FeaturePrefer256Bit, + FeatureInsertVZEROUPPER]; list CNLFeatures = - !listconcat(CNLInheritableFeatures, CNLSpecificFeatures); + !listconcat(SKLFeatures, CNLAdditionalFeatures); // Icelake list ICLAdditionalFeatures = [FeatureBITALG, @@ -703,71 +714,65 @@ FeatureGFNI, FeatureCLWB, FeatureRDPID]; - list ICLSpecificFeatures = [FeatureHasFastGather]; - list ICLInheritableFeatures = - !listconcat(CNLInheritableFeatures, ICLAdditionalFeatures); + list ICLTuning = CNLTuning; list ICLFeatures = - !listconcat(ICLInheritableFeatures, ICLSpecificFeatures); + !listconcat(CNLFeatures, ICLAdditionalFeatures); // Icelake Server - list ICXSpecificFeatures = [FeaturePCONFIG, - FeatureWBNOINVD, - FeatureHasFastGather]; + list ICXAdditionalFeatures = [FeaturePCONFIG, + FeatureWBNOINVD]; + list ICXTuning = CNLTuning; list ICXFeatures = - !listconcat(ICLInheritableFeatures, ICXSpecificFeatures); + !listconcat(ICLFeatures, ICXAdditionalFeatures); //Tigerlake list TGLAdditionalFeatures = [FeatureVP2INTERSECT, FeatureMOVDIRI, FeatureMOVDIR64B, FeatureSHSTK]; - list TGLSpecificFeatures = [FeatureHasFastGather]; - list TGLInheritableFeatures = - !listconcat(TGLAdditionalFeatures ,TGLSpecificFeatures); + list TGLTuning = CNLTuning; list TGLFeatures = - !listconcat(ICLFeatures, TGLInheritableFeatures ); + !listconcat(ICLFeatures, TGLAdditionalFeatures ); // Atom - list AtomInheritableFeatures = [FeatureX87, - FeatureCMPXCHG8B, - FeatureCMOV, - FeatureMMX, - FeatureSSSE3, - FeatureFXSR, - FeatureNOPL, - Feature64Bit, - FeatureCMPXCHG16B, - FeatureMOVBE, - FeatureSlowTwoMemOps, - FeatureLAHFSAHF, - FeatureInsertVZEROUPPER]; - list AtomSpecificFeatures = [ProcIntelAtom, - FeatureSlowUAMem16, - FeatureLEAForSP, - FeatureSlowDivide32, - FeatureSlowDivide64, - FeatureLEAUsesAG, - FeaturePadShortFunctions]; - list AtomFeatures = - !listconcat(AtomInheritableFeatures, AtomSpecificFeatures); + list AtomFeatures = [FeatureX87, + FeatureCMPXCHG8B, + FeatureCMOV, + FeatureMMX, + FeatureSSSE3, + FeatureFXSR, + FeatureNOPL, + Feature64Bit, + FeatureCMPXCHG16B, + FeatureMOVBE, + FeatureLAHFSAHF]; + list AtomTuning = [ProcIntelAtom, + FeatureSlowUAMem16, + FeatureLEAForSP, + FeatureSlowDivide32, + FeatureSlowDivide64, + FeatureSlowTwoMemOps, + FeatureLEAUsesAG, + FeaturePadShortFunctions, + FeatureInsertVZEROUPPER]; // Silvermont list SLMAdditionalFeatures = [FeatureSSE42, FeaturePOPCNT, FeaturePCLMUL, FeaturePRFCHW, - FeatureSlowLEA, - FeatureSlowIncDec, FeatureRDRAND]; - list SLMSpecificFeatures = [ProcIntelSLM, - FeatureSlowDivide64, - FeatureSlowPMULLD, - FeatureFast7ByteNOP, - FeaturePOPCNTFalseDeps]; - list SLMInheritableFeatures = - !listconcat(AtomInheritableFeatures, SLMAdditionalFeatures); + list SLMTuning = [ProcIntelSLM, + FeatureSlowTwoMemOps, + FeatureSlowLEA, + FeatureSlowIncDec, + FeatureSlowDivide64, + FeatureSlowPMULLD, + FeatureFast7ByteNOP, + FeaturePOPCNTFalseDeps, + FeatureInsertVZEROUPPER]; list SLMFeatures = - !listconcat(SLMInheritableFeatures, SLMSpecificFeatures); + !listconcat(AtomFeatures, SLMAdditionalFeatures); // Goldmont list GLMAdditionalFeatures = [FeatureAES, @@ -779,31 +784,33 @@ FeatureXSAVES, FeatureCLFLUSHOPT, FeatureFSGSBase]; - list GLMSpecificFeatures = [FeatureUseGLMDivSqrtCosts, - FeaturePOPCNTFalseDeps]; - list GLMInheritableFeatures = - !listconcat(SLMInheritableFeatures, GLMAdditionalFeatures); + list GLMTuning = [FeatureUseGLMDivSqrtCosts, + FeatureSlowTwoMemOps, + FeatureSlowLEA, + FeatureSlowIncDec, + FeaturePOPCNTFalseDeps, + FeatureInsertVZEROUPPER]; list GLMFeatures = - !listconcat(GLMInheritableFeatures, GLMSpecificFeatures); + !listconcat(SLMFeatures, GLMAdditionalFeatures); // Goldmont Plus list GLPAdditionalFeatures = [FeaturePTWRITE, FeatureRDPID, FeatureSGX]; - list GLPSpecificFeatures = [FeatureUseGLMDivSqrtCosts]; - list GLPInheritableFeatures = - !listconcat(GLMInheritableFeatures, GLPAdditionalFeatures); + list GLPTuning = [FeatureUseGLMDivSqrtCosts, + FeatureSlowTwoMemOps, + FeatureSlowLEA, + FeatureSlowIncDec, + FeatureInsertVZEROUPPER]; list GLPFeatures = - !listconcat(GLPInheritableFeatures, GLPSpecificFeatures); + !listconcat(GLMFeatures, GLPAdditionalFeatures); // Tremont list TRMAdditionalFeatures = [FeatureCLWB, FeatureGFNI]; - list TRMSpecificFeatures = [FeatureUseGLMDivSqrtCosts]; - list TRMInheritableFeatures = - !listconcat(GLPInheritableFeatures, TRMAdditionalFeatures); + list TRMTuning = GLPTuning; list TRMFeatures = - !listconcat(TRMInheritableFeatures, TRMSpecificFeatures); + !listconcat(GLPFeatures, TRMAdditionalFeatures); // Knights Landing list KNLFeatures = [FeatureX87, @@ -815,13 +822,10 @@ Feature64Bit, FeatureCMPXCHG16B, FeaturePOPCNT, - FeatureSlowDivide64, FeaturePCLMUL, FeatureXSAVE, FeatureXSAVEOPT, FeatureLAHFSAHF, - FeatureSlow3OpsLEA, - FeatureSlowIncDec, FeatureAES, FeatureRDRAND, FeatureF16C, @@ -838,56 +842,56 @@ FeatureBMI, FeatureBMI2, FeatureFMA, - FeaturePRFCHW, - FeaturePreferMaskRegisters, - FeatureSlowTwoMemOps, - FeatureHasFastGather, - FeatureSlowPMADDWD]; + FeaturePRFCHW]; + list KNLTuning = [FeatureSlowDivide64, + FeatureSlow3OpsLEA, + FeatureSlowIncDec, + FeatureSlowTwoMemOps, + FeaturePreferMaskRegisters, + FeatureHasFastGather, + FeatureSlowPMADDWD]; // TODO Add AVX5124FMAPS/AVX5124VNNIW features list KNMFeatures = !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]); // Barcelona - list BarcelonaInheritableFeatures = [FeatureX87, - FeatureCMPXCHG8B, - FeatureSSE4A, - Feature3DNowA, - FeatureFXSR, - FeatureNOPL, - FeatureCMPXCHG16B, - FeaturePRFCHW, - FeatureLZCNT, - FeaturePOPCNT, - FeatureSlowSHLD, - FeatureLAHFSAHF, - FeatureCMOV, - Feature64Bit, - FeatureFastScalarShiftMasks, - FeatureInsertVZEROUPPER]; - list BarcelonaFeatures = BarcelonaInheritableFeatures; + list BarcelonaFeatures = [FeatureX87, + FeatureCMPXCHG8B, + FeatureSSE4A, + Feature3DNowA, + FeatureFXSR, + FeatureNOPL, + FeatureCMPXCHG16B, + FeaturePRFCHW, + FeatureLZCNT, + FeaturePOPCNT, + FeatureLAHFSAHF, + FeatureCMOV, + Feature64Bit]; + list BarcelonaTuning = [FeatureFastScalarShiftMasks, + FeatureSlowSHLD, + FeatureInsertVZEROUPPER]; // Bobcat - list BtVer1InheritableFeatures = [FeatureX87, - FeatureCMPXCHG8B, - FeatureCMOV, - FeatureMMX, - FeatureSSSE3, - FeatureSSE4A, - FeatureFXSR, - FeatureNOPL, - Feature64Bit, - FeatureCMPXCHG16B, - FeaturePRFCHW, - FeatureLZCNT, - FeaturePOPCNT, - FeatureSlowSHLD, - FeatureLAHFSAHF, - FeatureFast15ByteNOP, - FeatureFastScalarShiftMasks, - FeatureFastVectorShiftMasks]; - list BtVer1SpecificFeatures = [FeatureInsertVZEROUPPER]; - list BtVer1Features = - !listconcat(BtVer1InheritableFeatures, BtVer1SpecificFeatures); + list BtVer1Features = [FeatureX87, + FeatureCMPXCHG8B, + FeatureCMOV, + FeatureMMX, + FeatureSSSE3, + FeatureSSE4A, + FeatureFXSR, + FeatureNOPL, + Feature64Bit, + FeatureCMPXCHG16B, + FeaturePRFCHW, + FeatureLZCNT, + FeaturePOPCNT, + FeatureLAHFSAHF]; + list BtVer1Tuning = [FeatureFast15ByteNOP, + FeatureFastScalarShiftMasks, + FeatureFastVectorShiftMasks, + FeatureSlowSHLD, + FeatureInsertVZEROUPPER]; // Jaguar list BtVer2AdditionalFeatures = [FeatureAVX, @@ -898,38 +902,39 @@ FeatureMOVBE, FeatureXSAVE, FeatureXSAVEOPT]; - list BtVer2SpecificFeatures = [FeatureFastLZCNT, - FeatureFastBEXTR, - FeatureFastHorizontalOps]; - list BtVer2InheritableFeatures = - !listconcat(BtVer1InheritableFeatures, BtVer2AdditionalFeatures); + list BtVer2Tuning = [FeatureFastLZCNT, + FeatureFastBEXTR, + FeatureFastHorizontalOps, + FeatureFast15ByteNOP, + FeatureFastScalarShiftMasks, + FeatureFastVectorShiftMasks, + FeatureSlowSHLD]; list BtVer2Features = - !listconcat(BtVer2InheritableFeatures, BtVer2SpecificFeatures); + !listconcat(BtVer1Features, BtVer2AdditionalFeatures); // Bulldozer - list BdVer1InheritableFeatures = [FeatureX87, - FeatureCMPXCHG8B, - FeatureCMOV, - FeatureXOP, - Feature64Bit, - FeatureCMPXCHG16B, - FeatureAES, - FeaturePRFCHW, - FeaturePCLMUL, - FeatureMMX, - FeatureFXSR, - FeatureNOPL, - FeatureLZCNT, - FeaturePOPCNT, - FeatureXSAVE, - FeatureLWP, - FeatureSlowSHLD, - FeatureLAHFSAHF, - FeatureFast11ByteNOP, - FeatureFastScalarShiftMasks, - FeatureBranchFusion, - FeatureInsertVZEROUPPER]; - list BdVer1Features = BdVer1InheritableFeatures; + list BdVer1Features = [FeatureX87, + FeatureCMPXCHG8B, + FeatureCMOV, + FeatureXOP, + Feature64Bit, + FeatureCMPXCHG16B, + FeatureAES, + FeaturePRFCHW, + FeaturePCLMUL, + FeatureMMX, + FeatureFXSR, + FeatureNOPL, + FeatureLZCNT, + FeaturePOPCNT, + FeatureXSAVE, + FeatureLWP, + FeatureLAHFSAHF]; + list BdVer1Tuning = [FeatureSlowSHLD, + FeatureFast11ByteNOP, + FeatureFastScalarShiftMasks, + FeatureBranchFusion, + FeatureInsertVZEROUPPER]; // PileDriver list BdVer2AdditionalFeatures = [FeatureF16C, @@ -937,16 +942,16 @@ FeatureTBM, FeatureFMA, FeatureFastBEXTR]; - list BdVer2InheritableFeatures = - !listconcat(BdVer1InheritableFeatures, BdVer2AdditionalFeatures); - list BdVer2Features = BdVer2InheritableFeatures; + list BdVer2Tuning = BdVer1Tuning; + list BdVer2Features = + !listconcat(BdVer1Features, BdVer2AdditionalFeatures); // Steamroller list BdVer3AdditionalFeatures = [FeatureXSAVEOPT, FeatureFSGSBase]; - list BdVer3InheritableFeatures = - !listconcat(BdVer2InheritableFeatures, BdVer3AdditionalFeatures); - list BdVer3Features = BdVer3InheritableFeatures; + list BdVer3Tuning = BdVer2Tuning; + list BdVer3Features = + !listconcat(BdVer2Features, BdVer3AdditionalFeatures); // Excavator list BdVer4AdditionalFeatures = [FeatureAVX2, @@ -954,9 +959,9 @@ FeatureMOVBE, FeatureRDRAND, FeatureMWAITX]; - list BdVer4InheritableFeatures = - !listconcat(BdVer3InheritableFeatures, BdVer4AdditionalFeatures); - list BdVer4Features = BdVer4InheritableFeatures; + list BdVer4Tuning = BdVer3Tuning; + list BdVer4Features = + !listconcat(BdVer3Features, BdVer4AdditionalFeatures); // AMD Zen Processors common ISAs @@ -975,13 +980,8 @@ FeatureFSGSBase, FeatureFXSR, FeatureNOPL, - FeatureFastLZCNT, FeatureLAHFSAHF, FeatureLZCNT, - FeatureFastBEXTR, - FeatureFast15ByteNOP, - FeatureBranchFusion, - FeatureFastScalarShiftMasks, FeatureMMX, FeatureMOVBE, FeatureMWAITX, @@ -992,16 +992,22 @@ FeatureRDSEED, FeatureSHA, FeatureSSE4A, - FeatureSlowSHLD, - FeatureInsertVZEROUPPER, FeatureX87, FeatureXSAVE, FeatureXSAVEC, FeatureXSAVEOPT, FeatureXSAVES]; + list ZNTuning = [FeatureFastLZCNT, + FeatureFastBEXTR, + FeatureFast15ByteNOP, + FeatureBranchFusion, + FeatureFastScalarShiftMasks, + FeatureSlowSHLD, + FeatureInsertVZEROUPPER]; list ZN2AdditionalFeatures = [FeatureCLWB, FeatureRDPID, FeatureWBNOINVD]; + list ZN2Tuning = ZNTuning; list ZN2Features = !listconcat(ZNFeatures, ZN2AdditionalFeatures); } @@ -1010,42 +1016,47 @@ // X86 processors supported. //===----------------------------------------------------------------------===// -class Proc Features> - : ProcessorModel; +class Proc Features, + list TuneFeatures> + : ProcessorModel; + +class ProcModel Features, + list TuneFeatures> + : ProcessorModel; // NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled // if i386/i486 is specifically requested. // NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget // constructor checks that any CPU used in 64-bit mode has Feature64Bit enabled. // It has no effect on code generation. -def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16, - FeatureCMPXCHG8B, Feature64Bit, - FeatureInsertVZEROUPPER]>; -def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16, - FeatureInsertVZEROUPPER]>; -def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16, - FeatureInsertVZEROUPPER]>; -def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16, - FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>; -def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16, - FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>; -def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, - FeatureCMPXCHG8B, FeatureMMX, - FeatureInsertVZEROUPPER]>; - -def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - FeatureCMOV, FeatureInsertVZEROUPPER]>; -def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - FeatureCMOV, FeatureNOPL, FeatureInsertVZEROUPPER]>; - -def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - FeatureMMX, FeatureCMOV, FeatureFXSR, - FeatureNOPL, FeatureInsertVZEROUPPER]>; +def : Proc<"generic", [FeatureX87, FeatureCMPXCHG8B, Feature64Bit], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; +def : Proc<"i386", [FeatureX87], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; +def : Proc<"i486", [FeatureX87], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; +def : Proc<"i586", [FeatureX87, FeatureCMPXCHG8B], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; +def : Proc<"pentium", [FeatureX87, FeatureCMPXCHG8B], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; +def : Proc<"pentium-mmx", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; + +def : Proc<"i686", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; +def : Proc<"pentiumpro", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, + FeatureNOPL], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; + +def : Proc<"pentium2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureCMOV, + FeatureFXSR, FeatureNOPL], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; foreach P = ["pentium3", "pentium3m"] in { - def : Proc; + def : Proc; } // Enable the PostRAScheduler for SSE2 and SSE3 class cpus. @@ -1058,46 +1069,45 @@ // measure to avoid performance surprises, in case clang's default cpu // changes slightly. -def : ProcessorModel<"pentium-m", GenericPostRAModel, - [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL, - FeatureCMOV, FeatureInsertVZEROUPPER]>; +def : ProcModel<"pentium-m", GenericPostRAModel, + [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2, + FeatureFXSR, FeatureNOPL, FeatureCMOV], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; foreach P = ["pentium4", "pentium4m"] in { -// def : ProcessorModel; +// def : ProcModel; +// [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; // Since 'pentium4' is the default 32-bit CPU on Linux and Windows, // give it more modern tunings. // FIXME: This wouldn't be needed if we supported mtune. - def : ProcessorModel; + def : ProcModel; } // Intel Quark. -def : Proc<"lakemont", [FeatureSlowUAMem16, FeatureCMPXCHG8B, - FeatureInsertVZEROUPPER]>; +def : Proc<"lakemont", [FeatureCMPXCHG8B], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; // Intel Core Duo. -def : ProcessorModel<"yonah", SandyBridgeModel, - [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, - FeatureCMOV, FeatureInsertVZEROUPPER]>; +def : ProcModel<"yonah", SandyBridgeModel, + [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3, + FeatureFXSR, FeatureNOPL, FeatureCMOV], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; // NetBurst. -def : ProcessorModel<"prescott", GenericPostRAModel, - [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL, - FeatureCMOV, FeatureInsertVZEROUPPER]>; -def : ProcessorModel<"nocona", GenericPostRAModel, [ +def : ProcModel<"prescott", GenericPostRAModel, + [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3, + FeatureFXSR, FeatureNOPL, FeatureCMOV], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; +def : ProcModel<"nocona", GenericPostRAModel, [ FeatureX87, - FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, @@ -1106,13 +1116,15 @@ FeatureNOPL, Feature64Bit, FeatureCMPXCHG16B, +], +[ + FeatureSlowUAMem16, FeatureInsertVZEROUPPER ]>; // Intel Core 2 Solo/Duo. -def : ProcessorModel<"core2", SandyBridgeModel, [ +def : ProcModel<"core2", SandyBridgeModel, [ FeatureX87, - FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, @@ -1121,13 +1133,15 @@ FeatureNOPL, Feature64Bit, FeatureCMPXCHG16B, - FeatureLAHFSAHF, + FeatureLAHFSAHF +], +[ FeatureMacroFusion, + FeatureSlowUAMem16, FeatureInsertVZEROUPPER ]>; -def : ProcessorModel<"penryn", SandyBridgeModel, [ +def : ProcModel<"penryn", SandyBridgeModel, [ FeatureX87, - FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, @@ -1136,140 +1150,165 @@ FeatureNOPL, Feature64Bit, FeatureCMPXCHG16B, - FeatureLAHFSAHF, + FeatureLAHFSAHF +], +[ FeatureMacroFusion, + FeatureSlowUAMem16, FeatureInsertVZEROUPPER ]>; // Atom CPUs. foreach P = ["bonnell", "atom"] in { - def : ProcessorModel; + def : ProcModel; } foreach P = ["silvermont", "slm"] in { - def : ProcessorModel; + def : ProcModel; } -def : ProcessorModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures>; -def : ProcessorModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures>; -def : ProcessorModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures>; +def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures, + ProcessorFeatures.GLMTuning>; +def : ProcModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures, + ProcessorFeatures.GLPTuning>; +def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures, + ProcessorFeatures.TRMTuning>; // "Arrandale" along with corei3 and corei5 foreach P = ["nehalem", "corei7"] in { - def : ProcessorModel; + def : ProcModel; } // Westmere is the corei3/i5/i7 path from nehalem to sandybridge -def : ProcessorModel<"westmere", SandyBridgeModel, - ProcessorFeatures.WSMFeatures>; +def : ProcModel<"westmere", SandyBridgeModel, ProcessorFeatures.WSMFeatures, + ProcessorFeatures.WSMTuning>; foreach P = ["sandybridge", "corei7-avx"] in { - def : ProcessorModel; + def : ProcModel; } foreach P = ["ivybridge", "core-avx-i"] in { - def : ProcessorModel; + def : ProcModel; } foreach P = ["haswell", "core-avx2"] in { - def : ProcessorModel; + def : ProcModel; } -def : ProcessorModel<"broadwell", BroadwellModel, - ProcessorFeatures.BDWFeatures>; +def : ProcModel<"broadwell", BroadwellModel, ProcessorFeatures.BDWFeatures, + ProcessorFeatures.BDWTuning>; -def : ProcessorModel<"skylake", SkylakeClientModel, - ProcessorFeatures.SKLFeatures>; +def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures, + ProcessorFeatures.SKLTuning>; // FIXME: define KNL scheduler model -def : ProcessorModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures>; -def : ProcessorModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures>; +def : ProcModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures, + ProcessorFeatures.KNLTuning>; +def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures, + ProcessorFeatures.KNLTuning>; foreach P = ["skylake-avx512", "skx"] in { - def : ProcessorModel; + def : ProcModel; } -def : ProcessorModel<"cascadelake", SkylakeServerModel, - ProcessorFeatures.CLXFeatures>; -def : ProcessorModel<"cooperlake", SkylakeServerModel, - ProcessorFeatures.CPXFeatures>; -def : ProcessorModel<"cannonlake", SkylakeServerModel, - ProcessorFeatures.CNLFeatures>; -def : ProcessorModel<"icelake-client", SkylakeServerModel, - ProcessorFeatures.ICLFeatures>; -def : ProcessorModel<"icelake-server", SkylakeServerModel, - ProcessorFeatures.ICXFeatures>; -def : ProcessorModel<"tigerlake", SkylakeServerModel, - ProcessorFeatures.TGLFeatures>; +def : ProcModel<"cascadelake", SkylakeServerModel, + ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>; +def : ProcModel<"cooperlake", SkylakeServerModel, + ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>; +def : ProcModel<"cannonlake", SkylakeServerModel, + ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>; +def : ProcModel<"icelake-client", SkylakeServerModel, + ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>; +def : ProcModel<"icelake-server", SkylakeServerModel, + ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>; +def : ProcModel<"tigerlake", SkylakeServerModel, + ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>; // AMD CPUs. -def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - FeatureMMX, FeatureInsertVZEROUPPER]>; -def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - Feature3DNow, FeatureInsertVZEROUPPER]>; -def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - Feature3DNow, FeatureInsertVZEROUPPER]>; +def : Proc<"k6", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; +def : Proc<"k6-2", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; +def : Proc<"k6-3", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; foreach P = ["athlon", "athlon-tbird"] in { - def : Proc; + def : Proc; } foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { - def : Proc; + def : Proc; } foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { - def : Proc; + def : Proc; } foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { - def : Proc; + def : Proc; } foreach P = ["amdfam10", "barcelona"] in { - def : Proc; + def : Proc; } // Bobcat -def : Proc<"btver1", ProcessorFeatures.BtVer1Features>; +def : Proc<"btver1", ProcessorFeatures.BtVer1Features, + ProcessorFeatures.BtVer1Tuning>; // Jaguar -def : ProcessorModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features>; +def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features, + ProcessorFeatures.BtVer2Tuning>; // Bulldozer -def : ProcessorModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features>; +def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features, + ProcessorFeatures.BdVer1Tuning>; // Piledriver -def : ProcessorModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features>; +def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features, + ProcessorFeatures.BdVer2Tuning>; // Steamroller -def : Proc<"bdver3", ProcessorFeatures.BdVer3Features>; +def : Proc<"bdver3", ProcessorFeatures.BdVer3Features, + ProcessorFeatures.BdVer3Tuning>; // Excavator -def : Proc<"bdver4", ProcessorFeatures.BdVer4Features>; - -def : ProcessorModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures>; -def : ProcessorModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features>; - -def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - Feature3DNowA, FeatureInsertVZEROUPPER]>; - -def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, - FeatureInsertVZEROUPPER]>; -def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow, - FeatureInsertVZEROUPPER]>; -def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow, - FeatureInsertVZEROUPPER]>; -def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, - FeatureMMX, FeatureSSE1, FeatureFXSR, - FeatureCMOV, FeatureInsertVZEROUPPER]>; +def : Proc<"bdver4", ProcessorFeatures.BdVer4Features, + ProcessorFeatures.BdVer4Tuning>; + +def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures, + ProcessorFeatures.ZNTuning>; +def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features, + ProcessorFeatures.ZN2Tuning>; + +def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; + +def : Proc<"winchip-c6", [FeatureX87, FeatureMMX], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; +def : Proc<"winchip2", [FeatureX87, Feature3DNow], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; +def : Proc<"c3", [FeatureX87, Feature3DNow], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; +def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, + FeatureSSE1, FeatureFXSR, FeatureCMOV], + [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; // We also provide a generic 64-bit specific x86 processor model which tries to // be good for modern chips without enabling instruction set encodings past the @@ -1281,7 +1320,7 @@ // covers a huge swath of x86 processors. If there are specific scheduling // knobs which need to be tuned differently for AMD chips, we might consider // forming a common base for them. -def : ProcessorModel<"x86-64", SandyBridgeModel, [ +def : ProcModel<"x86-64", SandyBridgeModel, [ FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, @@ -1290,6 +1329,8 @@ FeatureFXSR, FeatureNOPL, Feature64Bit, +], +[ FeatureSlow3OpsLEA, FeatureSlowDivide64, FeatureSlowIncDec,