Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -402,6 +402,8 @@ /// immediate of the specified type. int getIntImmCost(const APInt &Imm, Type *Ty) const; + bool isImmediateInRangeForLoad(const APInt &Imm) const; + /// \brief Return the expected cost of materialization for the given integer /// immediate of the specified type for a given instruction. The cost can be /// zero if the immediate can be folded into the specified instruction. @@ -652,6 +654,7 @@ virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; virtual bool haveFastSqrt(Type *Ty) = 0; virtual int getFPOpCost(Type *Ty) = 0; + virtual bool isImmediateInRangeForLoad(const APInt &Imm) = 0; virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0; virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) = 0; @@ -822,6 +825,10 @@ int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); } + bool isImmediateInRangeForLoad(const APInt &Imm) override { + return Impl.isImmediateInRangeForLoad(Imm); + } + int getIntImmCost(const APInt &Imm, Type *Ty) override { return Impl.getIntImmCost(Imm, Ty); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -254,6 +254,8 @@ unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } + bool isImmediateInRangeForLoad(const APInt &Imm) { return true; } + unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) { return TTI::TCC_Free; Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -202,6 +202,10 @@ return Cost; } +bool TargetTransformInfo::isImmediateInRangeForLoad(const APInt &Imm) const { + return TTIImpl->isImmediateInRangeForLoad(Imm); +} + int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const { int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty); Index: lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.h +++ lib/Target/ARM/ARMTargetTransformInfo.h @@ -64,6 +64,8 @@ /// \name Scalar TTI Implementations /// @{ + bool isImmediateInRangeForLoad(const APInt &Imm); + using BaseT::getIntImmCost; int getIntImmCost(const APInt &Imm, Type *Ty); Index: lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.cpp +++ lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -47,6 +47,13 @@ return 3; } +bool ARMTTIImpl::isImmediateInRangeForLoad(const APInt &Imm) { + if (Imm.isNonNegative() && Imm.getLimitedValue() < 256) + return true; + + return false; +} + int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) { // Division by a constant can be turned into multiplication, but only if we Index: lib/Transforms/Scalar/ConstantHoisting.cpp =================================================================== --- lib/Transforms/Scalar/ConstantHoisting.cpp +++ lib/Transforms/Scalar/ConstantHoisting.cpp @@ -164,6 +164,12 @@ void collectConstantCandidates(Function &Fn); void findAndMakeBaseConstant(ConstCandVecType::iterator S, ConstCandVecType::iterator E); + + bool CandidatesHaveUses(ConstCandVecType::iterator S, ConstCandVecType::iterator E); + llvm::Optional calcOffsetDiff(APInt V1, APInt V2); + ConstCandVecType::iterator maximizeConstantsInRange(ConstCandVecType::iterator S, + ConstCandVecType::iterator E); + void printOffsetRange(ConstCandVecType::iterator S, ConstCandVecType::iterator E); void findBaseConstants(); void emitBaseConstants(Instruction *Base, Constant *Offset, const ConstantUser &ConstUser); @@ -382,32 +388,162 @@ collectConstantCandidates(ConstCandMap, &Inst); } +void ConstantHoisting::printOffsetRange(ConstCandVecType::iterator P1, ConstCandVecType::iterator P2) { + DEBUG(dbgs() << "Range: ["); + DEBUG(dbgs() << P1->ConstInt->getValue()); + DEBUG(dbgs() << ", "); + DEBUG(dbgs() << P2->ConstInt->getValue()); + DEBUG(dbgs() << "]\n"); +} + +// This helper function is necessary to deal with values that have differend bit widths +// (APInt Operator- does not like that). If the value cannot be represented +// in uint64 we return an "empty" APInt. This is then interpreted as the value is not +// in range. +llvm::Optional ConstantHoisting::calcOffsetDiff(APInt V1, APInt V2) +{ + llvm::Optional Res; + unsigned BW = V1.getBitWidth() > V2.getBitWidth() ? V1.getBitWidth() : V2.getBitWidth(); + uint64_t LimVal1 = V1.getLimitedValue(); + uint64_t LimVal2 = V2.getLimitedValue(); + + DEBUG(dbgs() << "Diffing " << V1 << " and " << V2 << "\n"); + if (LimVal1 == ~0ULL || LimVal2 == ~0ULL) { + return Res; + } + + uint64_t Diff = LimVal1 - LimVal2; + return APInt(BW, Diff, true); +} + +// From the list of constants, we want to select the best possible candidate that serves as +// the base constant. For example: +// +// Constants| 2 | 4 | 12 | 42 | +// NumUses | 3 | 2 | 8 | 7 | +// +// Selecting constant 12 because it has the most uses will generate negative offsets +// for constants 2 and 4 (i.e. -10 and -8 respectively). If negative offsets lead to less +// optimal code generation, then there might be better solutions. Suppose immediates in the +// range of 0..35 are most optimally supported by the architecture, then selecting constant +// 2 is most optimal because this will generate offsets: 0 2 10 40. Offsets 0, 2 and 10 +// are in range 0..35, and thus 3 + 2 + 8 = 13 uses are in range. Selecting 12 would have +// only 8 uses in range, so choosing 2 as a base is more optimal. +// Thus, in selecting the base constant, there are two important factors: i) the number of +// uses, and ii) the range of the offsets. +// +// A straight forward implementation would evaluate all options, i.e. compare each element +// with each other element, take the difference and if this difference is in range, then the +// number of uses of are added to a gain. The constant with the biggest gain is selected +// as the base. This straight forward implement however is O(N^2). Here we implement a more +// efficient algorithm that works in linear time. It uses two pointers P1 and P2 that move +// like a sliding window throught the array to maximise the constants that are in the +// supported range. This works because array ConstCandVec is sorted. +// +std::vector::iterator +ConstantHoisting::maximizeConstantsInRange(ConstCandVecType::iterator S, + ConstCandVecType::iterator E) { + auto BestOffset = S; + auto P1 = S; + auto P2 = S; + unsigned MaxGain = 0; + unsigned Gain = 0; + bool P2WrappedAround = false; + llvm::Optional Diff; + + while(1) { + DEBUG(dbgs() << "--Advancing P2\n"); + DEBUG(dbgs() << "P1 start value: " << P1->ConstInt->getValue() << "\n"); + DEBUG(dbgs() << "P2 start value: " << P2->ConstInt->getValue() << "\n"); + + Diff = calcOffsetDiff(P2->ConstInt->getValue(), P1->ConstInt->getValue()); + while (Diff && TTI->isImmediateInRangeForLoad(Diff.getValue())) { + printOffsetRange(P1, P2); + if (P1 == P2 && P2WrappedAround) { + return BestOffset; + } + + if (P1 == P2) { + Gain = 0; + } + + Gain += P2->Uses.size(); + if (Gain > MaxGain) { + MaxGain = Gain; + BestOffset = P1; + } + DEBUG(dbgs() << "Gain = " << Gain << " after increment\n"); + DEBUG(dbgs() << "MaxGain = " << MaxGain << "\n"); + + P2++; + if (P2 == ConstCandVec.end()) { + DEBUG(dbgs() << "P2 is at the end, resetting to beginning\n"); + P2 = ConstCandVec.begin(); + P2WrappedAround = true; + } + Diff = calcOffsetDiff(P2->ConstInt->getValue(), P1->ConstInt->getValue()); + } + printOffsetRange(P1, P2); + DEBUG(dbgs() << "--Advancing P2: done, it is now out of range\n"); + DEBUG(dbgs() << "--Advancing P1\n"); + + do { + printOffsetRange(P1, P2); + Gain -= P1->Uses.size(); + DEBUG(dbgs() << "Gain = " << Gain << " (decr)\n"); + P1++; + if (P1 == ConstCandVec.end()) { + DEBUG(dbgs() << "P1 is at the end, all done!\n"); + return BestOffset; + } + printOffsetRange(P1, P2); + + Diff = calcOffsetDiff(P2->ConstInt->getValue(), P1->ConstInt->getValue()); + } while(P1 < P2 && Diff && !TTI->isImmediateInRangeForLoad(Diff.getValue())); + DEBUG(dbgs() << "--Advancing P1: done, imm is in range or P1 == P2\n"); + } + DEBUG(dbgs() << "MaxGain = " << MaxGain << "\n"); + return BestOffset; +} + +// This is a check to see if there is any work to do; we don't want +// to rebase constants when the candidates all together have none or 1 use. +bool ConstantHoisting::CandidatesHaveUses(ConstCandVecType::iterator S, + ConstCandVecType::iterator E) { + unsigned NumUses = 0; + for (auto i = S; i != E; ++i) { + NumUses += i->Uses.size(); + if (NumUses > 1) + return true; + } + return false; +} + /// \brief Find the base constant within the given range and rebase all other /// constants with respect to the base constant. void ConstantHoisting::findAndMakeBaseConstant(ConstCandVecType::iterator S, ConstCandVecType::iterator E) { - auto MaxCostItr = S; - unsigned NumUses = 0; - // Use the constant that has the maximum cost as base constant. - for (auto ConstCand = S; ConstCand != E; ++ConstCand) { - NumUses += ConstCand->Uses.size(); - if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost) - MaxCostItr = ConstCand; - } - - // Don't hoist constants that have only one use. - if (NumUses <= 1) + auto BestBaseConstantItr = S; + if (!CandidatesHaveUses(S, E)) return; + BestBaseConstantItr = maximizeConstantsInRange(S, E); + ConstantInfo ConstInfo; - ConstInfo.BaseConstant = MaxCostItr->ConstInt; + ConstInfo.BaseConstant = BestBaseConstantItr->ConstInt; Type *Ty = ConstInfo.BaseConstant->getType(); + DEBUG(dbgs() << "\nRebasing constants\n"); // Rebase the constants with respect to the base constant. for (auto ConstCand = S; ConstCand != E; ++ConstCand) { APInt Diff = ConstCand->ConstInt->getValue() - ConstInfo.BaseConstant->getValue(); Constant *Offset = Diff == 0 ? nullptr : ConstantInt::get(Ty, Diff); + if (Offset) + DEBUG(dbgs() << "offset = " << *Offset << "\n"); + else + DEBUG(dbgs() << "offset = 0\n"); + ConstInfo.RebasedConstants.push_back( RebasedConstantInfo(std::move(ConstCand->Uses), Offset)); } @@ -565,8 +701,9 @@ // Emit materialization code for all rebased constants. for (auto const &RCI : ConstInfo.RebasedConstants) { NumConstantsRebased++; - for (auto const &U : RCI.Uses) + for (auto const &U : RCI.Uses) { emitBaseConstants(Base, RCI.Offset, U); + } } // Use the same debug location as the last user of the constant. Index: test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll =================================================================== --- test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll +++ test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll @@ -0,0 +1,32 @@ +; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -S < %s | FileCheck %s + +; There are different candidates here for the base constant: 1073876992 and +; 1073876996. But we don't want to see the latter because it results in +; negative offsets. + +define void @foo() { +entry: +; CHECK-LABEL: @foo +; CHECK: [[CONST1:%const[0-9]*]] = bitcast i32 1073876992 to i32 + %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %or = or i32 %0, 1 + store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %1 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4 + %and = and i32 %1, -117506048 + store volatile i32 %and, i32* inttoptr (i32 1073876996 to i32*), align 4 +; CHECK: [[CONSTMAT1:%const_mat[0-9]*]] = add i32 [[CONST1]], 4 + %2 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %and1 = and i32 %2, -17367041 + store volatile i32 %and1, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %3 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %and2 = and i32 %3, -262145 + store volatile i32 %and2, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %4 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4 + %and3 = and i32 %4, -8323073 + store volatile i32 %and3, i32* inttoptr (i32 1073876996 to i32*), align 4 + store volatile i32 10420224, i32* inttoptr (i32 1073877000 to i32*), align 8 + %5 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096 + %or4 = or i32 %5, 65536 + store volatile i32 %or4, i32* inttoptr (i32 1073876992 to i32*), align 4096 + ret void +} Index: test/Transforms/ConstantHoisting/PowerPC/masks.ll =================================================================== --- test/Transforms/ConstantHoisting/PowerPC/masks.ll +++ test/Transforms/ConstantHoisting/PowerPC/masks.ll @@ -38,20 +38,24 @@ define i32 @test2() nounwind { entry: ; CHECK-LABEL: @test2 -; CHECK: bitcast i32 65531 to i32 +; CHECK: [[CONST:%const]] = bitcast i32 32773 to i32 +; CHECK: [[CONSTMAT:%const_mat[0-9]*]] = add i32 [[CONST]], 32758 +; CHECK: and i32 undef, [[CONSTMAT]] %conv121 = and i32 undef, 65531 br i1 undef, label %if.then152, label %if.end167 if.then152: +; CHECK: [[CONSTMAT2:%const_mat[0-9]*]] = add i32 %const, 32758 +; CHECK: %conv153 = and i32 undef, [[CONSTMAT2]] %conv153 = and i32 undef, 65531 br i1 undef, label %if.end167, label %end2 if.end167: -; CHECK: add i32 {{.*}}, -32758 %shl161 = shl nuw nsw i32 %conv121, 15 %0 = load i8, i8* undef, align 1 %conv169 = zext i8 %0 to i32 %shl170 = shl nuw nsw i32 %conv169, 7 +; CHECK: and i32 %shl161, [[CONST]] %shl161.masked = and i32 %shl161, 32773 %conv174 = or i32 %shl170, %shl161.masked %cmp178 = icmp ugt i32 %conv174, 32767 Index: test/Transforms/ConstantHoisting/X86/phi.ll =================================================================== --- test/Transforms/ConstantHoisting/X86/phi.ll +++ test/Transforms/ConstantHoisting/X86/phi.ll @@ -20,7 +20,11 @@ ; CHECK-LABEL: @test1 ; CHECK: if.end: -; CHECK: %2 = inttoptr i64 %const to i8* +; OLDHECK: %2 = inttoptr i64 %const to i8* +; CHECK: [[CONSTMAT:%const_mat[0-9]*]] = add i64 %const, 1 +; CHECK: %1 = inttoptr i64 [[CONSTMAT]] to i8* +; CHECK: [[CONSTMAT2:%const_mat[0-9]*]] = add i64 %const, 1 +; CHECK: %2 = inttoptr i64 [[CONSTMAT2]] to i8* ; CHECK-NEXT: br ; CHECK: return: ; CHECK-NEXT: %retval.0 = phi i8* [ null, %entry ], [ %2, %if.end ] @@ -41,8 +45,7 @@ ; CHECK-LABEL: @test2 ; CHECK: return: -; CHECK-NEXT: %const_mat = add i64 %const, -1 -; CHECK-NEXT: inttoptr i64 %const_mat to i64* +; CHECK-NEXT: inttoptr i64 %const to i64* } declare void @foo(i8*)