Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -226,7 +226,11 @@ /// Return true if integer divide is usually cheaper than a sequence of /// several shifts, adds, and multiplies for this target. - bool isIntDivCheap() const { return IntDivIsCheap; } + /// The definition of "cheaper" may depend on whether we're optimizing + /// for speed or for size. + virtual bool isIntDivCheap(EVT VT, bool OptSize) const { + return false; + } /// Return true if sqrt(x) is as cheap or cheaper than 1 / rsqrt(x) bool isFsqrtCheap() const { @@ -242,9 +246,6 @@ return BypassSlowDivWidths; } - /// Return true if pow2 sdiv is cheaper than a chain of sra/srl/add/sra. - bool isPow2SDivCheap() const { return Pow2SDivIsCheap; } - /// Return true if Flow Control is an expensive operation that should be /// avoided. bool isJumpExpensive() const { return JumpIsExpensive; } @@ -1252,11 +1253,6 @@ /// control. void setJumpIsExpensive(bool isExpensive = true); - /// Tells the code generator that integer divide is expensive, and if - /// possible, should be replaced by an alternate sequence of instructions not - /// containing an integer divide. - void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; } - /// Tells the code generator that fsqrt is cheap, and should not be replaced /// with an alternative sequence of instructions. void setFsqrtIsCheap(bool isCheap = true) { FsqrtIsCheap = isCheap; } @@ -1272,10 +1268,6 @@ BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; } - /// Tells the code generator that it shouldn't generate sra/srl/add/sra for a - /// signed divide by power of two; let the target handle it. - void setPow2SDivIsCheap(bool isCheap = true) { Pow2SDivIsCheap = isCheap; } - /// Add the specified register class as an available regclass for the /// specified value type. This indicates the selector can handle values of /// that class natively. @@ -1763,12 +1755,6 @@ /// combined with "shift" to BitExtract instructions. bool HasExtractBitsInsn; - /// Tells the code generator not to expand integer divides by constants into a - /// sequence of muls, adds, and shifts. This is a hack until a real cost - /// model is in place. If we ever optimize for size, this will be set to true - /// unconditionally. - bool IntDivIsCheap; - // Don't expand fsqrt with an approximation based on the inverse sqrt. bool FsqrtIsCheap; @@ -1778,10 +1764,6 @@ /// div/rem when the operands are positive and less than 256. DenseMap BypassSlowDivWidths; - /// Tells the code generator that it shouldn't generate sra/srl/add/sra for a - /// signed divide by power of two; let the target handle it. - bool Pow2SDivIsCheap; - /// Tells the code generator that it shouldn't generate extra flow control /// instructions and should attempt to combine flow control instructions via /// predication. Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2176,6 +2176,7 @@ N0, N1); } + bool MinSize = DAG.getMachineFunction().getFunction()->optForMinSize(); // fold (sdiv X, pow2) -> simple ops after legalize // FIXME: We check for the exact bit here because the generic lowering gives // better results in that case. The target-specific lowering should learn how @@ -2184,9 +2185,8 @@ !cast(N)->Flags.hasExact() && (N1C->getAPIntValue().isPowerOf2() || (-N1C->getAPIntValue()).isPowerOf2())) { - // If dividing by powers of two is cheap, then don't perform the following - // fold. - if (TLI.isPow2SDivCheap()) + // If integer division is cheap, then don't perform the following fold + if (TLI.isIntDivCheap(N->getValueType(0), MinSize)) return SDValue(); // Target-specific implementation of sdiv x, pow2. @@ -2226,7 +2226,7 @@ // If integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. - if (N1C && !TLI.isIntDivCheap()) + if (N1C && !TLI.isIntDivCheap(N->getValueType(0), MinSize)) if (SDValue Op = BuildSDIV(N)) return Op; @@ -2280,8 +2280,10 @@ } } } + // fold (udiv x, c) -> alternate - if (N1C && !TLI.isIntDivCheap()) + bool MinSize = DAG.getMachineFunction().getFunction()->optForMinSize(); + if (N1C && !TLI.isIntDivCheap(N->getValueType(0), MinSize)) if (SDValue Op = BuildUDIV(N)) return Op; Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -758,9 +758,7 @@ SelectIsExpensive = false; HasMultipleConditionRegisters = false; HasExtractBitsInsn = false; - IntDivIsCheap = false; FsqrtIsCheap = false; - Pow2SDivIsCheap = false; JumpIsExpensive = JumpIsExpensiveOverride; PredictableSelectIsExpensive = false; MaskAndBranchFoldingIsLegal = false; Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -429,10 +429,6 @@ setSelectIsExpensive(false); PredictableSelectIsExpensive = false; - // There are no integer divide instructions, and these expand to a pretty - // large sequence of instructions. - setIntDivIsCheap(false); - setPow2SDivIsCheap(false); setFsqrtIsCheap(true); // FIXME: Need to really handle these. Index: lib/Target/MSP430/MSP430ISelLowering.cpp =================================================================== --- lib/Target/MSP430/MSP430ISelLowering.cpp +++ lib/Target/MSP430/MSP430ISelLowering.cpp @@ -69,10 +69,6 @@ computeRegisterProperties(STI.getRegisterInfo()); // Provide all sorts of operation actions - - // Division is expensive - setIntDivIsCheap(false); - setStackPointerRegisterToSaveRestore(MSP430::SP); setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -902,6 +902,8 @@ /// \brief Customize the preferred legalization strategy for certain types. LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; + bool isIntDivCheap(EVT VT, bool OptSize) const override; + protected: std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -26422,3 +26422,14 @@ bool X86TargetLowering::isTargetFTOL() const { return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit(); } + +bool X86TargetLowering::isIntDivCheap(EVT VT, bool OptSize) const { + // Integer division on x86 is expensive. However, when aggressively optimizing + // for code size, we prefer to leave the div as is, as it is usually smaller + // than the alternative sequence. + // The exception to this is vector divs. Since we don't have vector integer + // division, leaving the div as-is is a loss even in terms of size, because it + // will have to be scalarized, while the alternative code sequence can be + // performed in vector form. + return OptSize && !VT.isVector(); +} Index: lib/Target/XCore/XCoreISelLowering.cpp =================================================================== --- lib/Target/XCore/XCoreISelLowering.cpp +++ lib/Target/XCore/XCoreISelLowering.cpp @@ -79,9 +79,6 @@ // Compute derived properties from the register classes computeRegisterProperties(Subtarget.getRegisterInfo()); - // Division is expensive - setIntDivIsCheap(false); - setStackPointerRegisterToSaveRestore(XCore::SP); setSchedulingPreference(Sched::Source); Index: test/CodeGen/X86/divide-by-constant.ll =================================================================== --- test/CodeGen/X86/divide-by-constant.ll +++ test/CodeGen/X86/divide-by-constant.ll @@ -94,3 +94,35 @@ ; CHECK: shrl $11 ; CHECK: ret } + +define i32 @testsize1(i32 %x) minsize nounwind { +entry: + %div = sdiv i32 %x, 32 + ret i32 %div +; CHECK-LABEL: testsize1: +; CHECK: divl +} + +define i32 @testsize2(i32 %x) minsize nounwind { +entry: + %div = sdiv i32 %x, 33 + ret i32 %div +; CHECK-LABEL: testsize2: +; CHECK: divl +} + +define i32 @testsize3(i32 %x) minsize nounwind { +entry: + %div = udiv i32 %x, 32 + ret i32 %div +; CHECK-LABEL: testsize3: +; CHECK: shrl +} + +define i32 @testsize4(i32 %x) minsize nounwind { +entry: + %div = udiv i32 %x, 33 + ret i32 %div +; CHECK-LABEL: testsize4: +; CHECK: divl +} Index: test/CodeGen/X86/vec_sdiv_to_shift.ll =================================================================== --- test/CodeGen/X86/vec_sdiv_to_shift.ll +++ test/CodeGen/X86/vec_sdiv_to_shift.ll @@ -13,6 +13,19 @@ ret <8 x i16> %0 } +define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize { +entry: +; CHECK: sdiv_vec8x16_minsize +; CHECK: psraw $15 +; CHECK: vpsrlw $11 +; CHECK: vpaddw +; CHECK: vpsraw $5 +; CHECK: ret + %0 = sdiv <8 x i16> %var, + ret <8 x i16> %0 +} + + define <4 x i32> @sdiv_zero(<4 x i32> %var) { entry: ; CHECK: sdiv_zero