Index: llvm/trunk/lib/Target/X86/X86.td =================================================================== --- llvm/trunk/lib/Target/X86/X86.td +++ llvm/trunk/lib/Target/X86/X86.td @@ -167,9 +167,12 @@ "Support SMAP instructions">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; -def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", - "HasSlowDivide", "true", - "Use small divide for positive values less than 256">; +def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", + "HasSlowDivide32", "true", + "Use 8-bit divide for positive values less than 256">; +def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw", + "HasSlowDivide64", "true", + "Use 16-bit divide for positive values less than 65536">; def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", "PadShortFunctions", "true", "Pad short functions">; @@ -234,7 +237,7 @@ def : ProcessorModel<"atom", AtomModel, [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B, FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, - FeatureSlowDivide, + FeatureSlowDivide32, FeatureSlowDivide64, FeatureCallRegIndirect, FeatureLEAUsesAG, FeaturePadShortFunctions]>; @@ -244,6 +247,7 @@ FeatureSSE42, FeatureCMPXCHG16B, FeatureMOVBE, FeaturePOPCNT, FeaturePCLMUL, FeatureAES, + FeatureSlowDivide64, FeatureCallRegIndirect, FeaturePRFCHW, FeatureSlowLEA, FeatureSlowIncDec, Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -249,9 +249,10 @@ setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); // Bypass expensive divides on Atom when compiling with O2 - if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) { - addBypassSlowDiv(32, 8); - if (Subtarget->is64Bit()) + if (TM.getOptLevel() >= CodeGenOpt::Default) { + if (Subtarget->hasSlowDivide32()) + addBypassSlowDiv(32, 8); + if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit()) addBypassSlowDiv(64, 16); } Index: llvm/trunk/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.h +++ llvm/trunk/lib/Target/X86/X86Subtarget.h @@ -171,9 +171,13 @@ /// the stack pointer. This is an optimization for Intel Atom processors. bool UseLeaForSP; - /// HasSlowDivide - True if smaller divides are significantly faster than - /// full divides and should be used when possible. - bool HasSlowDivide; + /// HasSlowDivide32 - True if 8-bit divisions are significantly faster than + /// 32-bit divisions and should be used when possible. + bool HasSlowDivide32; + + /// HasSlowDivide64 - True if 16-bit divides are significantly faster than + /// 64-bit divisions and should be used when possible. + bool HasSlowDivide64; /// PadShortFunctions - True if the short functions should be padded to prevent /// a stall when returning too early. @@ -373,7 +377,8 @@ bool hasVectorUAMem() const { return HasVectorUAMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } - bool hasSlowDivide() const { return HasSlowDivide; } + bool hasSlowDivide32() const { return HasSlowDivide32; } + bool hasSlowDivide64() const { return HasSlowDivide64; } bool padShortFunctions() const { return PadShortFunctions; } bool callRegIndirect() const { return CallRegIndirect; } bool LEAusesAG() const { return LEAUsesAG; } Index: llvm/trunk/lib/Target/X86/X86Subtarget.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.cpp +++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp @@ -267,7 +267,8 @@ HasVectorUAMem = false; HasCmpxchg16b = false; UseLeaForSP = false; - HasSlowDivide = false; + HasSlowDivide32 = false; + HasSlowDivide64 = false; PadShortFunctions = false; CallRegIndirect = false; LEAUsesAG = false; Index: llvm/trunk/test/CodeGen/X86/slow-div.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/slow-div.ll +++ llvm/trunk/test/CodeGen/X86/slow-div.ll @@ -0,0 +1,28 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivl-to-divb < %s | FileCheck -check-prefix=DIV32 %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivq-to-divw < %s | FileCheck -check-prefix=DIV64 %s + +define i32 @div32(i32 %a, i32 %b) { +entry: +; DIV32-LABEL: div32: +; DIV32: orl %{{.*}}, [[REG:%[a-z]+]] +; DIV32: testl $-256, [[REG]] +; DIV32: divb +; DIV64-LABEL: div32: +; DIV64-NOT: divb + %div = sdiv i32 %a, %b + ret i32 %div +} + +define i64 @div64(i64 %a, i64 %b) { +entry: +; DIV32-LABEL: div64: +; DIV32-NOT: divw +; DIV64-LABEL: div64: +; DIV64: orq %{{.*}}, [[REG:%[a-z]+]] +; DIV64: testq $-65536, [[REG]] +; DIV64: divw + %div = sdiv i64 %a, %b + ret i64 %div +} + +