Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h @@ -414,6 +414,16 @@ Type *Ty) const; int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const; + + /// \brief Return the expected cost for the given integer when optimising + /// for size. This is different than the other integer immediate cost + /// functions in that it is subtarget agnostic. This is useful when you e.g. + /// target one ISA such as Aarch32 but smaller encodings could be possible + /// with another such as Thumb. This return value is used as a penalty when + /// the total costs for a constant is calculated (the bigger the cost, the + /// more beneficial constant hoisting is). + int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) const; /// @} /// \name Vector Target Information @@ -665,6 +675,8 @@ virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; virtual bool haveFastSqrt(Type *Ty) = 0; virtual int getFPOpCost(Type *Ty) = 0; + virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) = 0; virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0; virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) = 0; @@ -841,6 +853,10 @@ int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); } + int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) override { + return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty); + } int getIntImmCost(const APInt &Imm, Type *Ty) override { return Impl.getIntImmCost(Imm, Ty); } Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -257,6 +257,11 @@ unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } + int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty) { + return 0; + } + unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Index: llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h +++ llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h @@ -134,6 +134,9 @@ void collectConstantCandidates(Function &Fn); void findAndMakeBaseConstant(ConstCandVecType::iterator S, ConstCandVecType::iterator E); + unsigned maximizeConstantsInRange(ConstCandVecType::iterator S, + ConstCandVecType::iterator E, + ConstCandVecType::iterator &MaxCostItr); void findBaseConstants(); void emitBaseConstants(Instruction *Base, Constant *Offset, const consthoist::ConstantUser &ConstUser); Index: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp +++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp @@ -209,6 +209,14 @@ return Cost; } +int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, + Type *Ty) const { + int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { int Cost = TTIImpl->getIntImmCost(Imm, Ty); assert(Cost >= 0 && "TTI should not produce negative costs!"); Index: llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.h +++ llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.h @@ -64,6 +64,9 @@ /// \name Scalar TTI Implementations /// @{ + int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty); + using BaseT::getIntImmCost; int getIntImmCost(const APInt &Imm, Type *Ty); Index: llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -47,6 +47,17 @@ return 3; } + +// Constants smaller than 256 fit in the immediate field of +// Thumb1 instructions so we return a zero cost and 1 otherwise. +int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) { + if (Imm.isNonNegative() && Imm.getLimitedValue() < 256) + return 0; + + return 1; +} + int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) { // Division by a constant can be turned into multiplication, but only if we Index: llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp +++ llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -285,18 +285,109 @@ collectConstantCandidates(ConstCandMap, &Inst); } -/// \brief Find the base constant within the given range and rebase all other -/// constants with respect to the base constant. -void ConstantHoistingPass::findAndMakeBaseConstant( - ConstCandVecType::iterator S, ConstCandVecType::iterator E) { - auto MaxCostItr = S; +// This helper function is necessary to deal with values that have different +// bit widths (APInt Operator- does not like that). If the value cannot be +// represented in uint64 we return an "empty" APInt. This is then interpreted +// as the value is not in range. +static llvm::Optional calculateOffsetDiff(APInt V1, APInt V2) +{ + llvm::Optional Res = None; + unsigned BW = V1.getBitWidth() > V2.getBitWidth() ? + V1.getBitWidth() : V2.getBitWidth(); + uint64_t LimVal1 = V1.getLimitedValue(); + uint64_t LimVal2 = V2.getLimitedValue(); + + if (LimVal1 == ~0ULL || LimVal2 == ~0ULL) + return Res; + + uint64_t Diff = LimVal1 - LimVal2; + return APInt(BW, Diff, true); +} + +// From a list of constants, one needs to picked as the base and the other +// constants will be transformed into an offset from that base constant. The +// question is which we can pick best? For example, consider these constants +// and their number of uses: +// +// Constants| 2 | 4 | 12 | 42 | +// NumUses | 3 | 2 | 8 | 7 | +// +// Selecting constant 12 because it has the most uses will generate negative +// offsets for constants 2 and 4 (i.e. -10 and -8 respectively). If negative +// offsets lead to less optimal code generation, then there might be better +// solutions. Suppose immediates in the range of 0..35 are most optimally +// supported by the architecture, then selecting constant 2 is most optimal +// because this will generate offsets: 0, 2, 10, 40. Offsets 0, 2 and 10 are in +// range 0..35, and thus 3 + 2 + 8 = 13 uses are in range. Selecting 12 would +// have only 8 uses in range, so choosing 2 as a base is more optimal. Thus, in +// selecting the base constant the range of the offsets is a very important +// factor too that we take into account here. This algorithm calculates a total +// costs for selecting a constant as the base and substract the costs if +// immediates are out of range. It has quadratic complexity, so we call this +// function only when we're optimising for size and there are less than 100 +// constants, we fall back to the straightforward algorithm otherwise +// which does not do all the offset calculations. +unsigned +ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S, + ConstCandVecType::iterator E, + ConstCandVecType::iterator &MaxCostItr) { unsigned NumUses = 0; - // Use the constant that has the maximum cost as base constant. + + if(!Entry->getParent()->optForSize() || std::distance(S,E) > 100) { + for (auto ConstCand = S; ConstCand != E; ++ConstCand) { + NumUses += ConstCand->Uses.size(); + if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost) + MaxCostItr = ConstCand; + } + return NumUses; + } + + DEBUG(dbgs() << "== Maximize constants in range ==\n"); + int MaxCost = -1; for (auto ConstCand = S; ConstCand != E; ++ConstCand) { + auto Value = ConstCand->ConstInt->getValue(); + Type *Ty = ConstCand->ConstInt->getType(); + int Cost = 0; NumUses += ConstCand->Uses.size(); - if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost) + DEBUG(dbgs() << "= Constant: " << ConstCand->ConstInt->getValue() << "\n"); + + for (auto User : ConstCand->Uses) { + unsigned Opcode = User.Inst->getOpcode(); + unsigned OpndIdx = User.OpndIdx; + Cost += TTI->getIntImmCost(Opcode, OpndIdx, Value, Ty); + DEBUG(dbgs() << "Cost: " << Cost << "\n"); + + for (auto C2 = S; C2 != E; ++C2) { + llvm::Optional Diff = calculateOffsetDiff( + C2->ConstInt->getValue(), + ConstCand->ConstInt->getValue()); + if (Diff) { + const int ImmCosts = + TTI->getIntImmCodeSizeCost(Opcode, OpndIdx, Diff.getValue(), Ty); + Cost -= ImmCosts; + DEBUG(dbgs() << "Offset " << Diff.getValue() << " " + << "has penalty: " << ImmCosts << "\n" + << "Adjusted cost: " << Cost << "\n"); + } + } + } + DEBUG(dbgs() << "Cumulative cost: " << Cost << "\n"); + if (Cost > MaxCost) { + MaxCost = Cost; MaxCostItr = ConstCand; + DEBUG(dbgs() << "New candidate: " << MaxCostItr->ConstInt->getValue() + << "\n"); + } } + return NumUses; +} + +/// \brief Find the base constant within the given range and rebase all other +/// constants with respect to the base constant. +void ConstantHoistingPass::findAndMakeBaseConstant( + ConstCandVecType::iterator S, ConstCandVecType::iterator E) { + auto MaxCostItr = S; + unsigned NumUses = maximizeConstantsInRange(S, E, MaxCostItr); // Don't hoist constants that have only one use. if (NumUses <= 1) Index: llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll =================================================================== --- llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll +++ llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll @@ -0,0 +1,42 @@ +; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -S < %s | FileCheck %s + +; There are different candidates here for the base constant: 1073876992 and +; 1073876996. But we don't want to see the latter because it results in +; negative offsets. + +define void @foo() #0 { +entry: +; CHECK-LABEL: @foo +; CHECK-NOT: [[CONST1:%const_mat[0-9]*]] = add i32 %const, -4 + %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %or = or i32 %0, 1 + store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %1 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4 + %and = and i32 %1, -117506048 + store volatile i32 %and, i32* inttoptr (i32 1073876996 to i32*), align 4 + %2 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %and1 = and i32 %2, -17367041 + store volatile i32 %and1, i32* inttoptr (i32 1073876996 to i32*), align 4096 + %3 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %and2 = and i32 %3, -262145 + store volatile i32 %and2, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %4 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4 + %and3 = and i32 %4, -8323073 + store volatile i32 %and3, i32* inttoptr (i32 1073876996 to i32*), align 4 + store volatile i32 10420224, i32* inttoptr (i32 1073877000 to i32*), align 8 + %5 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4096 + %or4 = or i32 %5, 65536 + store volatile i32 %or4, i32* inttoptr (i32 1073876996 to i32*), align 4096 + %6 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192 + %or6.i.i = or i32 %6, 16 + store volatile i32 %or6.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192 + %7 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192 + %and7.i.i = and i32 %7, -4 + store volatile i32 %and7.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192 + %8 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192 + %or8.i.i = or i32 %8, 2 + store volatile i32 %or8.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192 + ret void +} + +attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }