Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -167,9 +167,12 @@ "Support SMAP instructions">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; -def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", - "HasSlowDivide", "true", - "Use small divide for positive values less than 256">; +def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", + "HasSlowDivide32", "true", + "Use 8-bit divide for positive values less than 256">; +def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw", + "HasSlowDivide64", "true", + "Use 16-bit divide for positive values less than 65536">; def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", "PadShortFunctions", "true", "Pad short functions">; @@ -230,7 +233,7 @@ def : ProcessorModel<"atom", AtomModel, [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B, FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, - FeatureSlowDivide, + FeatureSlowDivide32, FeatureSlowDivide64, FeatureCallRegIndirect, FeatureLEAUsesAG, FeaturePadShortFunctions]>; @@ -240,6 +243,7 @@ FeatureSSE42, FeatureCMPXCHG16B, FeatureMOVBE, FeaturePOPCNT, FeaturePCLMUL, FeatureAES, + FeatureSlowDivide64, FeatureCallRegIndirect, FeaturePRFCHW, FeatureSlowLEA, FeatureSlowIncDec, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -261,9 +261,10 @@ setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); // Bypass expensive divides on Atom when compiling with O2 - if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) { - addBypassSlowDiv(32, 8); - if (Subtarget->is64Bit()) + if (TM.getOptLevel() >= CodeGenOpt::Default) { + if (Subtarget->hasSlowDivide32()) + addBypassSlowDiv(32, 8); + if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit()) addBypassSlowDiv(64, 16); } Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -171,9 +171,13 @@ /// the stack pointer. This is an optimization for Intel Atom processors. bool UseLeaForSP; - /// HasSlowDivide - True if smaller divides are significantly faster than - /// full divides and should be used when possible. - bool HasSlowDivide; + /// HasSlowDivide32 - True if 8-bit divisions are significantly faster than + /// 32-bit divisions and should be used when possible. + bool HasSlowDivide32; + + /// HasSlowDivide64 - True if 16-bit divides are significantly faster than + /// 64-bit divisions and should be used when possible. + bool HasSlowDivide64; /// PadShortFunctions - True if the short functions should be padded to prevent /// a stall when returning too early. @@ -363,7 +367,8 @@ bool hasVectorUAMem() const { return HasVectorUAMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } - bool hasSlowDivide() const { return HasSlowDivide; } + bool hasSlowDivide32() const { return HasSlowDivide32; } + bool hasSlowDivide64() const { return HasSlowDivide64; } bool padShortFunctions() const { return PadShortFunctions; } bool callRegIndirect() const { return CallRegIndirect; } bool LEAusesAG() const { return LEAUsesAG; } Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -272,7 +272,8 @@ HasVectorUAMem = false; HasCmpxchg16b = false; UseLeaForSP = false; - HasSlowDivide = false; + HasSlowDivide32 = false; + HasSlowDivide64 = false; PadShortFunctions = false; CallRegIndirect = false; LEAUsesAG = false;