Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -409,6 +409,15 @@ Type *Ty) const; int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const; + + /// \brief Return the expected cost for the given integer when optimising + /// for size. This is different than the other integer immediate cost + /// functions in that it is subtarget agnostic. This is useful when you e.g. + /// target one ISA such as Aarch32 but smaller encodings could be possible + /// with another such as Thumb. This return value is used as a penalty when + /// the total costs for a constant is calculated (the bigger the cost, the + /// more beneficial constant hoisting is). + int getIntImmCodeSizeCost(const APInt &Imm) const; /// @} /// \name Vector Target Information @@ -656,6 +665,7 @@ virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; virtual bool haveFastSqrt(Type *Ty) = 0; virtual int getFPOpCost(Type *Ty) = 0; + virtual int getIntImmCodeSizeCost(const APInt &Imm) = 0; virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0; virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) = 0; @@ -827,6 +837,9 @@ int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); } + int getIntImmCodeSizeCost(const APInt &Imm) override { + return Impl.getIntImmCodeSizeCost(Imm); + } int getIntImmCost(const APInt &Imm, Type *Ty) override { return Impl.getIntImmCost(Imm, Ty); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -252,6 +252,8 @@ unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } + int getIntImmCodeSizeCost(const APInt &Imm) { return 0; } + unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Index: include/llvm/Transforms/Scalar/ConstantHoisting.h =================================================================== --- include/llvm/Transforms/Scalar/ConstantHoisting.h +++ include/llvm/Transforms/Scalar/ConstantHoisting.h @@ -134,6 +134,9 @@ void collectConstantCandidates(Function &Fn); void findAndMakeBaseConstant(ConstCandVecType::iterator S, ConstCandVecType::iterator E); + unsigned maximizeConstantsInRange(ConstCandVecType::iterator S, + ConstCandVecType::iterator E, + ConstCandVecType::iterator &MaxCostItr); void findBaseConstants(); void emitBaseConstants(Instruction *Base, Constant *Offset, const consthoist::ConstantUser &ConstUser); Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -196,6 +196,12 @@ return Cost; } +int TargetTransformInfo::getIntImmCodeSizeCost(const APInt &Imm) const { + int Cost = TTIImpl->getIntImmCodeSizeCost(Imm); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { int Cost = TTIImpl->getIntImmCost(Imm, Ty); assert(Cost >= 0 && "TTI should not produce negative costs!"); Index: lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.h +++ lib/Target/ARM/ARMTargetTransformInfo.h @@ -64,6 +64,8 @@ /// \name Scalar TTI Implementations /// @{ + int getIntImmCodeSizeCost(const APInt &Imm); + using BaseT::getIntImmCost; int getIntImmCost(const APInt &Imm, Type *Ty); Index: lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.cpp +++ lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -47,6 +47,16 @@ return 3; } + +// Constants smaller than 256 fit in the immediate field of +// Thumb1 instructions so we return a zero cost and 1 otherwise. +int ARMTTIImpl::getIntImmCodeSizeCost(const APInt &Imm) { + if (Imm.isNonNegative() && Imm.getLimitedValue() < 256) + return 0; + + return 1; +} + int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) { // Division by a constant can be turned into multiplication, but only if we Index: lib/Transforms/Scalar/ConstantHoisting.cpp =================================================================== --- lib/Transforms/Scalar/ConstantHoisting.cpp +++ lib/Transforms/Scalar/ConstantHoisting.cpp @@ -285,17 +285,104 @@ collectConstantCandidates(ConstCandMap, &Inst); } +// This helper function is necessary to deal with values that have differend +// bit widths (APInt Operator- does not like that). If the value cannot be +// represented in uint64 we return an "empty" APInt. This is then interpreted +// as the value is not in range. +static llvm::Optional calculateOffsetDiff(APInt V1, APInt V2) +{ + llvm::Optional Res = None; + unsigned BW = V1.getBitWidth() > V2.getBitWidth() ? + V1.getBitWidth() : V2.getBitWidth(); + uint64_t LimVal1 = V1.getLimitedValue(); + uint64_t LimVal2 = V2.getLimitedValue(); + + if (LimVal1 == ~0ULL || LimVal2 == ~0ULL) + return Res; + + uint64_t Diff = LimVal1 - LimVal2; + return APInt(BW, Diff, true); +} + +// From a list of constants, one needs to picked as the base and the other +// constants will be transformed into an offset from that base constant. The +// question is which we can pick best? For example, consider these constants +// and their number of uses: +// +// Constants| 2 | 4 | 12 | 42 | +// NumUses | 3 | 2 | 8 | 7 | +// +// Selecting constant 12 because it has the most uses will generate negative +// offsets for constants 2 and 4 (i.e. -10 and -8 respectively). If negative +// offsets lead to less optimal code generation, then there might be better +// solutions. Suppose immediates in the range of 0..35 are most optimally +// supported by the architecture, then selecting constant 2 is most optimal +// because this will generate offsets: 0, 2, 10, 40. Offsets 0, 2 and 10 are in +// range 0..35, and thus 3 + 2 + 8 = 13 uses are in range. Selecting 12 would +// have only 8 uses in range, so choosing 2 as a base is more optimal. Thus, in +// selecting the base constant the range of the offsets is a very important +// factor too that we take into account here. This algorithm calculates a total +// costs for selecting a constant as the base and substract the costs if +// immediates are out of range. It has quadratic complexity, so we call this +// function only when we're optimising for size and there are less than 100 +// constants, we fall back to the old algorithm otherwise. +unsigned +ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S, + ConstCandVecType::iterator E, + ConstCandVecType::iterator &MaxCostItr) { + unsigned NumUses = 0; + + DEBUG(dbgs() << "== Maximize constants in range ==\n"); + int MaxCost = -1; + for (auto ConstCand = S; ConstCand != E; ++ConstCand) { + int Cost = 0; + NumUses += ConstCand->Uses.size(); + DEBUG(dbgs() << "= Constant: " << ConstCand->ConstInt->getValue() << "\n"); + for (auto User = ConstCand->Uses.begin(); User != ConstCand->Uses.end(); ++User) { + auto Value = ConstCand->ConstInt->getValue(); + Cost += TTI->getIntImmCost(User->Inst->getOpcode(), User->OpndIdx, + Value, ConstCand->ConstInt->getType()); + DEBUG(dbgs() << "Cost: " << Cost << "\n"); + for (auto C2 = S; C2 != E; ++C2) { + llvm::Optional Diff = calculateOffsetDiff( + C2->ConstInt->getValue(), + ConstCand->ConstInt->getValue()); + if (Diff) { + const int ImmCosts = TTI->getIntImmCodeSizeCost(Diff.getValue()); + Cost -= ImmCosts; + DEBUG(dbgs() << "Offset " << Diff.getValue() << " " + << "has penalty: " << ImmCosts << "\n" + << "Adjusted cost: " << Cost << "\n"); + } + } + } + DEBUG(dbgs() << "Cumulative cost: " << Cost << "\n"); + if (Cost > MaxCost) { + MaxCost = Cost; + MaxCostItr = ConstCand; + DEBUG(dbgs() << "New candidate: " << MaxCostItr->ConstInt->getValue() << "\n"); + } + } + return NumUses; +} + /// \brief Find the base constant within the given range and rebase all other /// constants with respect to the base constant. void ConstantHoistingPass::findAndMakeBaseConstant( ConstCandVecType::iterator S, ConstCandVecType::iterator E) { auto MaxCostItr = S; unsigned NumUses = 0; + // Use the constant that has the maximum cost as base constant. - for (auto ConstCand = S; ConstCand != E; ++ConstCand) { - NumUses += ConstCand->Uses.size(); - if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost) - MaxCostItr = ConstCand; + if(Entry->getParent()->optForSize() && std::distance(S,E) < 100) { + NumUses = maximizeConstantsInRange(S, E, MaxCostItr); + } else { + for (auto ConstCand = S; ConstCand != E; ++ConstCand) { + NumUses += ConstCand->Uses.size(); + if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost) { + MaxCostItr = ConstCand; + } + } } // Don't hoist constants that have only one use.