Index: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1015,10 +1015,17 @@ /// This class is used to measure and compare candidate formulae. class Cost { + const Loop *L = nullptr; + ScalarEvolution *SE = nullptr; + DominatorTree *DT = nullptr; + const TargetTransformInfo *TTI = nullptr; TargetTransformInfo::LSRCost C; public: - Cost() { + Cost() = delete; + Cost(const Loop *L, ScalarEvolution &SE, DominatorTree &DT, + const TargetTransformInfo &TTI) : + L(L), SE(&SE), DT(&DT), TTI(&TTI) { C.Insns = 0; C.NumRegs = 0; C.AddRecCost = 0; @@ -1029,7 +1036,7 @@ C.ScaleCost = 0; } - bool isLess(Cost &Other, const TargetTransformInfo &TTI); + bool isLess(Cost &Other); void Lose(); @@ -1048,12 +1055,9 @@ return C.NumRegs == ~0u; } - void RateFormula(const TargetTransformInfo &TTI, - const Formula &F, + void RateFormula(const Formula &F, SmallPtrSetImpl &Regs, const DenseSet &VisitedRegs, - const Loop *L, - ScalarEvolution &SE, DominatorTree &DT, const LSRUse &LU, SmallPtrSetImpl *LoserRegs = nullptr); @@ -1062,16 +1066,10 @@ private: void RateRegister(const Formula &F, const SCEV *Reg, - SmallPtrSetImpl &Regs, - const Loop *L, - ScalarEvolution &SE, DominatorTree &DT, - const TargetTransformInfo &TTI); + SmallPtrSetImpl &Regs); void RatePrimaryRegister(const Formula &F, const SCEV *Reg, SmallPtrSetImpl &Regs, - const Loop *L, - ScalarEvolution &SE, DominatorTree &DT, - SmallPtrSetImpl *LoserRegs, - const TargetTransformInfo &TTI); + SmallPtrSetImpl *LoserRegs); }; /// An operand value in an instruction which is to be replaced with some @@ -1237,17 +1235,14 @@ /// Tally up interesting quantities from the given register. void Cost::RateRegister(const Formula &F, const SCEV *Reg, - SmallPtrSetImpl &Regs, - const Loop *L, - ScalarEvolution &SE, DominatorTree &DT, - const TargetTransformInfo &TTI) { + SmallPtrSetImpl &Regs) { if (const SCEVAddRecExpr *AR = dyn_cast(Reg)) { // If this is an addrec for another loop, it should be an invariant // with respect to L since L is the innermost loop (at least // for now LSR only handles innermost loops). if (AR->getLoop() != L) { // If the AddRec exists, consider it's register free and leave it alone. - if (isExistingPhi(AR, SE)) + if (isExistingPhi(AR, *SE)) return; // It is bad to allow LSR for current loop to add induction variables @@ -1263,23 +1258,23 @@ } unsigned LoopCost = 1; - if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) || - TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) { + if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) || + TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) { // If the step size matches the base offset, we could use pre-indexed // addressing. - if (TTI.shouldFavorBackedgeIndex(L)) { - if (auto *Step = dyn_cast(AR->getStepRecurrence(SE))) + if (TTI->shouldFavorBackedgeIndex(L)) { + if (auto *Step = dyn_cast(AR->getStepRecurrence(*SE))) if (Step->getAPInt() == F.BaseOffset) LoopCost = 0; } - if (TTI.shouldFavorPostInc()) { - const SCEV *LoopStep = AR->getStepRecurrence(SE); + if (TTI->shouldFavorPostInc()) { + const SCEV *LoopStep = AR->getStepRecurrence(*SE); if (isa(LoopStep)) { const SCEV *LoopStart = AR->getStart(); if (!isa(LoopStart) && - SE.isLoopInvariant(LoopStart, L)) + SE->isLoopInvariant(LoopStart, L)) LoopCost = 0; } } @@ -1290,7 +1285,7 @@ // TODO: The non-affine case isn't precisely modeled here. if (!AR->isAffine() || !isa(AR->getOperand(1))) { if (!Regs.count(AR->getOperand(1))) { - RateRegister(F, AR->getOperand(1), Regs, L, SE, DT, TTI); + RateRegister(F, AR->getOperand(1), Regs); if (isLoser()) return; } @@ -1303,7 +1298,7 @@ C.SetupCost += getSetupCost(Reg); C.NumIVMuls += isa(Reg) && - SE.hasComputableLoopEvolution(Reg, L); + SE->hasComputableLoopEvolution(Reg, L); } /// Record this register in the set. If we haven't seen it before, rate @@ -1311,27 +1306,21 @@ /// one of those regs an instant loser. void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg, SmallPtrSetImpl &Regs, - const Loop *L, - ScalarEvolution &SE, DominatorTree &DT, - SmallPtrSetImpl *LoserRegs, - const TargetTransformInfo &TTI) { + SmallPtrSetImpl *LoserRegs) { if (LoserRegs && LoserRegs->count(Reg)) { Lose(); return; } if (Regs.insert(Reg).second) { - RateRegister(F, Reg, Regs, L, SE, DT, TTI); + RateRegister(F, Reg, Regs); if (LoserRegs && isLoser()) LoserRegs->insert(Reg); } } -void Cost::RateFormula(const TargetTransformInfo &TTI, - const Formula &F, +void Cost::RateFormula(const Formula &F, SmallPtrSetImpl &Regs, const DenseSet &VisitedRegs, - const Loop *L, - ScalarEvolution &SE, DominatorTree &DT, const LSRUse &LU, SmallPtrSetImpl *LoserRegs) { assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula"); @@ -1344,7 +1333,7 @@ Lose(); return; } - RatePrimaryRegister(F, ScaledReg, Regs, L, SE, DT, LoserRegs, TTI); + RatePrimaryRegister(F, ScaledReg, Regs, LoserRegs); if (isLoser()) return; } @@ -1353,7 +1342,7 @@ Lose(); return; } - RatePrimaryRegister(F, BaseReg, Regs, L, SE, DT, LoserRegs, TTI); + RatePrimaryRegister(F, BaseReg, Regs, LoserRegs); if (isLoser()) return; } @@ -1364,11 +1353,11 @@ // Do not count the base and a possible second register if the target // allows to fold 2 registers. C.NumBaseAdds += - NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F))); + NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(*TTI, LU, F))); C.NumBaseAdds += (F.UnfoldedOffset != 0); // Accumulate non-free scaling amounts. - C.ScaleCost += getScalingFactorCost(TTI, LU, F, *L); + C.ScaleCost += getScalingFactorCost(*TTI, LU, F, *L); // Tally up the non-zero immediates. for (const LSRFixup &Fixup : LU.Fixups) { @@ -1383,7 +1372,7 @@ // Check with target if this offset with this instruction is // specifically not supported. if (LU.Kind == LSRUse::Address && Offset != 0 && - !isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV, + !isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV, Offset, F.HasBaseReg, F.Scale, Fixup.UserInst)) C.NumBaseAdds++; } @@ -1396,7 +1385,7 @@ // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as // additional instruction (at least fill). - unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1; + unsigned TTIRegNum = TTI->getNumberOfRegisters(false) - 1; if (C.NumRegs > TTIRegNum) { // Cost already exceeded TTIRegNum, then only newly added register can add // new instructions. @@ -1416,7 +1405,8 @@ // // For {-10, +, 1}: // i = i + 1; - if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() && !TTI.canMacroFuseCmp()) + if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() && + !TTI->canMacroFuseCmp()) C.Insns++; // Each new AddRec adds 1 instruction to calculation. C.Insns += (C.AddRecCost - PrevAddRecCost); @@ -1440,11 +1430,11 @@ } /// Choose the lower cost. -bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) { +bool Cost::isLess(Cost &Other) { if (InsnsCost.getNumOccurrences() > 0 && InsnsCost && C.Insns != Other.C.Insns) return C.Insns < Other.C.Insns; - return TTI.isLSRCostLess(C, Other.C); + return TTI->isLSRCostLess(C, Other.C); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -4327,9 +4317,9 @@ // avoids the need to recompute this information across formulae using the // same bad AddRec. Passing LoserRegs is also essential unless we remove // the corresponding bad register from the Regs set. - Cost CostF; + Cost CostF(L, SE, DT, TTI); Regs.clear(); - CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, SE, DT, LU, &LoserRegs); + CostF.RateFormula(F, Regs, VisitedRegs, LU, &LoserRegs); if (CostF.isLoser()) { // During initial formula generation, undesirable formulae are generated // by uses within other loops that have some non-trivial address mode or @@ -4360,10 +4350,10 @@ Formula &Best = LU.Formulae[P.first->second]; - Cost CostBest; + Cost CostBest(L, SE, DT, TTI); Regs.clear(); - CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU); - if (CostF.isLess(CostBest, TTI)) + CostBest.RateFormula(Best, Regs, VisitedRegs, LU); + if (CostF.isLess(CostBest)) std::swap(F, Best); LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); dbgs() << "\n" @@ -4611,12 +4601,13 @@ // If the new register numbers are the same, choose the Formula with // less Cost. - Cost CostFA, CostFB; + Cost CostFA(L, SE, DT, TTI); + Cost CostFB(L, SE, DT, TTI); Regs.clear(); - CostFA.RateFormula(TTI, FA, Regs, VisitedRegs, L, SE, DT, LU); + CostFA.RateFormula(FA, Regs, VisitedRegs, LU); Regs.clear(); - CostFB.RateFormula(TTI, FB, Regs, VisitedRegs, L, SE, DT, LU); - return CostFA.isLess(CostFB, TTI); + CostFB.RateFormula(FB, Regs, VisitedRegs, LU); + return CostFA.isLess(CostFB); }; bool Any = false; @@ -4902,7 +4893,7 @@ ReqRegs.insert(S); SmallPtrSet NewRegs; - Cost NewCost; + Cost NewCost(L, SE, DT, TTI); for (const Formula &F : LU.Formulae) { // Ignore formulae which may not be ideal in terms of register reuse of // ReqRegs. The formula should use all required registers before @@ -4926,8 +4917,8 @@ // the current best, prune the search at that point. NewCost = CurCost; NewRegs = CurRegs; - NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU); - if (NewCost.isLess(SolutionCost, TTI)) { + NewCost.RateFormula(F, NewRegs, VisitedRegs, LU); + if (NewCost.isLess(SolutionCost)) { Workspace.push_back(&F); if (Workspace.size() != Uses.size()) { SolveRecurse(Solution, SolutionCost, Workspace, NewCost, @@ -4936,9 +4927,9 @@ VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]); } else { LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs()); - dbgs() << ".\n Regs:"; for (const SCEV *S - : NewRegs) dbgs() - << ' ' << *S; + dbgs() << ".\nRegs:\n"; + for (const SCEV *S : NewRegs) dbgs() + << "- " << *S << "\n"; dbgs() << '\n'); SolutionCost = NewCost; @@ -4953,9 +4944,9 @@ /// vector. void LSRInstance::Solve(SmallVectorImpl &Solution) const { SmallVector Workspace; - Cost SolutionCost; + Cost SolutionCost(L, SE, DT, TTI); SolutionCost.Lose(); - Cost CurCost; + Cost CurCost(L, SE, DT, TTI); SmallPtrSet CurRegs; DenseSet VisitedRegs; Workspace.reserve(Uses.size());