Index: llvm/lib/Transforms/Scalar/LoopInterchange.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/LoopCacheAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopNestAnalysis.h" #include "llvm/Analysis/LoopPass.h" @@ -358,8 +359,10 @@ : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} /// Check if the loop interchange is profitable. - bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, - CharMatrix &DepMatrix); + bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop, + unsigned InnerLoopId, unsigned OuterLoopId, + CharMatrix &DepMatrix, + const DenseMap &CostMap); private: int getInstrOrderCost(); @@ -410,13 +413,15 @@ LoopInfo *LI = nullptr; DependenceInfo *DI = nullptr; DominatorTree *DT = nullptr; + std::unique_ptr CC = nullptr; /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI, - DominatorTree *DT, OptimizationRemarkEmitter *ORE) - : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {} + DominatorTree *DT, std::unique_ptr &CC, + OptimizationRemarkEmitter *ORE) + : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {} bool run(Loop *L) { if (L->getParentLoop()) @@ -499,6 +504,11 @@ } unsigned SelecLoopId = selectLoopForInterchange(LoopList); + const auto &LoopCosts = CC->getLoopCosts(); + DenseMap CostMap; + for (unsigned i = 0; i < LoopCosts.size(); i++) { + CostMap[LoopCosts[i].first] = i; + } // We try to achieve the globally optimal memory access for the loopnest, // and do interchange based on a bubble-sort fasion. We start from // the innermost loop, move it outwards to the best possible position @@ -507,7 +517,7 @@ bool ChangedPerIter = false; for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) { bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1, - DependencyMatrix); + DependencyMatrix, CostMap); if (!Interchanged) continue; // Loops interchanged, update LoopList accordingly. @@ -531,7 +541,8 @@ bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId, unsigned OuterLoopId, - std::vector> &DependencyMatrix) { + std::vector> &DependencyMatrix, + const DenseMap &CostMap) { LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << "\n"); LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE); @@ -541,7 +552,8 @@ } LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n"); LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE); - if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) { + if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId, + DependencyMatrix, CostMap)) { LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n"); return false; } @@ -1135,9 +1147,10 @@ return !DepMatrix.empty(); } -bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, - unsigned OuterLoopId, - CharMatrix &DepMatrix) { +bool LoopInterchangeProfitability::isProfitable( + const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId, + unsigned OuterLoopId, CharMatrix &DepMatrix, + const DenseMap &CostMap) { // TODO: Add better profitability checks. // e.g // 1) Construct dependency matrix and move the one with no loop carried dep @@ -1146,6 +1159,16 @@ // This is rough cost estimation algorithm. It counts the good and bad order // of induction variables in the instruction and allows reordering if number // of bad orders is more than good. + if (CostMap.find(InnerLoop) != CostMap.end() && + CostMap.find(OuterLoop) != CostMap.end()) { + unsigned IndexInner = CostMap.find(InnerLoop)->second; + unsigned IndexOuter = CostMap.find(OuterLoop)->second; + LLVM_DEBUG(dbgs() << "IndexInner = " << IndexInner + << " IndexOuter = " << IndexOuter << "\n"); + if (IndexInner < IndexOuter) + return true; + } + int Cost = getInstrOrderCost(); LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n"); if (Cost < -LoopInterchangeCostThreshold) @@ -1709,8 +1732,8 @@ auto *DI = &getAnalysis().getDI(); auto *DT = &getAnalysis().getDomTree(); auto *ORE = &getAnalysis().getORE(); - - return LoopInterchange(SE, LI, DI, DT, ORE).run(L); + std::unique_ptr CC = nullptr; + return LoopInterchange(SE, LI, DI, DT, CC, ORE).run(L); } }; } // namespace @@ -1737,8 +1760,10 @@ Function &F = *LN.getParent(); DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); + std::unique_ptr CC = + CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI); OptimizationRemarkEmitter ORE(&F); - if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN)) + if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN)) return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); }