Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -226,7 +226,11 @@ /// Return true if integer divide is usually cheaper than a sequence of /// several shifts, adds, and multiplies for this target. - bool isIntDivCheap() const { return IntDivIsCheap; } + /// The definition of "cheaper" may depend on whether we're optimizing + /// for speed or for size. + virtual bool isIntDivCheap(EVT VT, bool OptSize) const { + return IntDivIsCheap; + } /// Return true if sqrt(x) is as cheap or cheaper than 1 / rsqrt(x) bool isFsqrtCheap() const { @@ -243,7 +247,11 @@ } /// Return true if pow2 sdiv is cheaper than a chain of sra/srl/add/sra. - bool isPow2SDivCheap() const { return Pow2SDivIsCheap; } + /// The definition of "cheaper" may depend on whether we're optimizing + /// for speed or for size. + virtual bool isPow2SDivCheap(EVT VT, bool OptSize) const { + return Pow2SDivIsCheap; + } /// Return true if Flow Control is an expensive operation that should be /// avoided. @@ -1764,9 +1772,7 @@ bool HasExtractBitsInsn; /// Tells the code generator not to expand integer divides by constants into a - /// sequence of muls, adds, and shifts. This is a hack until a real cost - /// model is in place. If we ever optimize for size, this will be set to true - /// unconditionally. + /// sequence of muls, adds, and shifts. bool IntDivIsCheap; // Don't expand fsqrt with an approximation based on the inverse sqrt. Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2176,6 +2176,7 @@ N0, N1); } + bool MinSize = DAG.getMachineFunction().getFunction()->optForMinSize(); // fold (sdiv X, pow2) -> simple ops after legalize // FIXME: We check for the exact bit here because the generic lowering gives // better results in that case. The target-specific lowering should learn how @@ -2186,7 +2187,7 @@ (-N1C->getAPIntValue()).isPowerOf2())) { // If dividing by powers of two is cheap, then don't perform the following // fold. - if (TLI.isPow2SDivCheap()) + if (TLI.isPow2SDivCheap(N->getValueType(0), MinSize)) return SDValue(); // Target-specific implementation of sdiv x, pow2. @@ -2226,7 +2227,7 @@ // If integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. - if (N1C && !TLI.isIntDivCheap()) + if (N1C && !TLI.isIntDivCheap(N->getValueType(0), MinSize)) if (SDValue Op = BuildSDIV(N)) return Op; @@ -2280,8 +2281,10 @@ } } } + // fold (udiv x, c) -> alternate - if (N1C && !TLI.isIntDivCheap()) + bool MinSize = DAG.getMachineFunction().getFunction()->optForMinSize(); + if (N1C && !TLI.isIntDivCheap(N->getValueType(0), MinSize)) if (SDValue Op = BuildUDIV(N)) return Op; Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -902,6 +902,9 @@ /// \brief Customize the preferred legalization strategy for certain types. LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; + bool isPow2SDivCheap(EVT VT, bool OptSize) const override; + bool isIntDivCheap(EVT VT, bool OptSize) const override; + protected: std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -26422,3 +26422,18 @@ bool X86TargetLowering::isTargetFTOL() const { return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit(); } + +bool X86TargetLowering::isIntDivCheap(EVT VT, bool OptSize) const { + // Integer division on x86 is expensive. However, when aggressively optimizing + // for code size, we prefer to leave the div as is, as it is usually smaller + // than the alternative sequence. + // The exception to this is vector divs. Since we don't have vector integer + // division, leaving the div as-is is a loss even in terms of size, because it + // will have to be scalarized, while the alternative code sequence can be + // performed in vector form. + return OptSize && !VT.isVector(); +} + +bool X86TargetLowering::isPow2SDivCheap(EVT VT, bool OptSize) const { + return isIntDivCheap(VT, OptSize); +} Index: test/CodeGen/X86/divide-by-constant.ll =================================================================== --- test/CodeGen/X86/divide-by-constant.ll +++ test/CodeGen/X86/divide-by-constant.ll @@ -94,3 +94,35 @@ ; CHECK: shrl $11 ; CHECK: ret } + +define i32 @testsize1(i32 %x) minsize nounwind { +entry: + %div = sdiv i32 %x, 32 + ret i32 %div +; CHECK-LABEL: testsize1: +; CHECK: divl +} + +define i32 @testsize2(i32 %x) minsize nounwind { +entry: + %div = sdiv i32 %x, 33 + ret i32 %div +; CHECK-LABEL: testsize2: +; CHECK: divl +} + +define i32 @testsize3(i32 %x) minsize nounwind { +entry: + %div = udiv i32 %x, 32 + ret i32 %div +; CHECK-LABEL: testsize3: +; CHECK: shrl +} + +define i32 @testsize4(i32 %x) minsize nounwind { +entry: + %div = udiv i32 %x, 33 + ret i32 %div +; CHECK-LABEL: testsize4: +; CHECK: divl +} Index: test/CodeGen/X86/vec_sdiv_to_shift.ll =================================================================== --- test/CodeGen/X86/vec_sdiv_to_shift.ll +++ test/CodeGen/X86/vec_sdiv_to_shift.ll @@ -13,6 +13,19 @@ ret <8 x i16> %0 } +define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize { +entry: +; CHECK: sdiv_vec8x16_minsize +; CHECK: psraw $15 +; CHECK: vpsrlw $11 +; CHECK: vpaddw +; CHECK: vpsraw $5 +; CHECK: ret + %0 = sdiv <8 x i16> %var, + ret <8 x i16> %0 +} + + define <4 x i32> @sdiv_zero(<4 x i32> %var) { entry: ; CHECK: sdiv_zero