diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h --- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h @@ -139,6 +139,8 @@ return all_of(Loops, [](const Loop *L) { return L->isRotatedForm(); }); } + BasicBlock *getHeader() const { return Loops.front()->getHeader(); } + StringRef getName() const { return Loops.front()->getName(); } protected: diff --git a/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h b/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h --- a/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h @@ -15,7 +15,7 @@ namespace llvm { struct LoopInterchangePass : public PassInfoMixin { - PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -258,16 +258,16 @@ // if the direction matrix, after the same permutation is applied to its // columns, has no ">" direction as the leftmost non-"=" direction in any row. static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, - unsigned InnerLoopId, - unsigned OuterLoopId) { + unsigned InnerLoopId) { unsigned NumRows = DepMatrix.size(); // For each row check if it is valid to interchange. for (unsigned Row = 0; Row < NumRows; ++Row) { char InnerDep = DepMatrix[Row][InnerLoopId]; - char OuterDep = DepMatrix[Row][OuterLoopId]; + char OuterDep = DepMatrix[Row][InnerLoopId - 1]; if (InnerDep == '*' || OuterDep == '*') return false; - if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep)) + if (!validDepInterchange(DepMatrix, Row, InnerLoopId - 1, InnerDep, + OuterDep)) return false; } return true; @@ -332,8 +332,7 @@ : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} /// Check if the loops can be interchanged. - bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, - CharMatrix &DepMatrix); + bool canInterchangeLoops(unsigned InnerLoopId, CharMatrix &DepMatrix); /// Check if the loop structure is understood. We do not handle triangular /// loops for now. @@ -379,8 +378,7 @@ : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} /// Check if the loop interchange is profitable. - bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, - CharMatrix &DepMatrix); + bool isProfitable(unsigned InnerLoopId, CharMatrix &DepMatrix); private: int getInstrOrderCost(); @@ -449,7 +447,15 @@ return processLoopList(populateWorklist(*L)); } - bool isComputableLoopNest(LoopVector LoopList) { + bool run(LoopNest &LN) { + const auto &LoopList = LN.getLoops(); + for (unsigned I = 1; I < LoopList.size(); ++I) + if (LoopList[I]->getParentLoop() != LoopList[I - 1]) + return false; + return processLoopList(LoopList); + } + + bool isComputableLoopNest(ArrayRef LoopList) { for (Loop *L : LoopList) { const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); if (isa(ExitCountOuter)) { @@ -468,13 +474,13 @@ return true; } - unsigned selectLoopForInterchange(const LoopVector &LoopList) { + unsigned selectLoopForInterchange(ArrayRef LoopList) { // TODO: Add a better heuristic to select the loop to be interchanged based // on the dependence matrix. Currently we select the innermost loop. return LoopList.size() - 1; } - bool processLoopList(LoopVector LoopList) { + bool processLoopList(ArrayRef LoopList) { bool Changed = false; unsigned LoopNestDepth = LoopList.size(); if (LoopNestDepth < 2) { @@ -515,14 +521,12 @@ unsigned SelecLoopId = selectLoopForInterchange(LoopList); // Move the selected loop outwards to the best possible position. + Loop *LoopToBeInterchanged = LoopList[SelecLoopId]; for (unsigned i = SelecLoopId; i > 0; i--) { - bool Interchanged = - processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix); + bool Interchanged = processLoop(LoopToBeInterchanged, LoopList[i - 1], i, + LoopNestExit, DependencyMatrix); if (!Interchanged) return Changed; - // Loops interchanged reflect the same in LoopList - std::swap(LoopList[i - 1], LoopList[i]); - // Update the DependencyMatrix interChangeDependencies(DependencyMatrix, i, i - 1); #ifdef DUMP_DEP_MATRICIES @@ -534,22 +538,19 @@ return Changed; } - bool processLoop(LoopVector LoopList, unsigned InnerLoopId, - unsigned OuterLoopId, BasicBlock *LoopNestExit, + bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId, + BasicBlock *LoopNestExit, std::vector> &DependencyMatrix) { - LLVM_DEBUG(dbgs() << "Processing Inner Loop Id = " << InnerLoopId - << " and OuterLoopId = " << OuterLoopId << "\n"); - Loop *InnerLoop = LoopList[InnerLoopId]; - Loop *OuterLoop = LoopList[OuterLoopId]; - + LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId + << " and OuterLoopId = " << InnerLoopId - 1 << "\n"); LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE); - if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { + if (!LIL.canInterchangeLoops(InnerLoopId, DependencyMatrix)) { LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n"); return false; } LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n"); LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE); - if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) { + if (!LIP.isProfitable(InnerLoopId, DependencyMatrix)) { LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n"); return false; } @@ -952,11 +953,10 @@ } bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, - unsigned OuterLoopId, CharMatrix &DepMatrix) { - if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) { + if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId)) { LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId - << " and OuterLoopId = " << OuterLoopId + << " and OuterLoopId = " << InnerLoopId - 1 << " due to dependence\n"); ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence", @@ -1087,7 +1087,6 @@ } static bool isProfitableForVectorization(unsigned InnerLoopId, - unsigned OuterLoopId, CharMatrix &DepMatrix) { // TODO: Improve this heuristic to catch more cases. // If the inner loop is loop independent or doesn't carry any dependency it is @@ -1096,7 +1095,7 @@ if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I') return false; // TODO: We need to improve this heuristic. - if (Row[OuterLoopId] != '=') + if (Row[InnerLoopId - 1] != '=') return false; } // If outer loop has dependence and inner loop is loop independent then it is @@ -1106,7 +1105,6 @@ } bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, - unsigned OuterLoopId, CharMatrix &DepMatrix) { // TODO: Add better profitability checks. // e.g @@ -1123,7 +1121,7 @@ // It is not profitable as per current cache profitability model. But check if // we can move this loop outside to improve parallelism. - if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix)) + if (isProfitableForVectorization(InnerLoopId, DepMatrix)) return true; ORE->emit([&]() { @@ -1682,14 +1680,15 @@ return new LoopInterchangeLegacyPass(); } -PreservedAnalyses LoopInterchangePass::run(Loop &L, LoopAnalysisManager &AM, +PreservedAnalyses LoopInterchangePass::run(LoopNest &LN, + LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U) { - Function &F = *L.getHeader()->getParent(); + Function &F = *LN.getHeader()->getParent(); DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); OptimizationRemarkEmitter ORE(&F); - if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(&L)) + if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN)) return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); } diff --git a/llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll b/llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll --- a/llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll +++ b/llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll @@ -21,10 +21,10 @@ ;; fn2(T[k]); ;; } -; CHECK: Processing Inner Loop Id = 2 and OuterLoopId = 1 +; CHECK: Processing InnerLoopId = 2 and OuterLoopId = 1 ; CHECK: Loops interchanged. -; CHECK: Processing Inner Loop Id = 1 and OuterLoopId = 0 +; CHECK: Processing InnerLoopId = 1 and OuterLoopId = 0 ; CHECK: Not interchanging loops. Cannot prove legality. @T = internal global [100 x double] zeroinitializer, align 4 diff --git a/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll b/llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-3.ll copy from llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll copy to llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-3.ll --- a/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll +++ b/llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-3.ll @@ -11,13 +11,13 @@ ;; for(int i=0;i<100;i++) ;; for(int j=0;j<100;j++) ;; for(int k=0;k<100;k++) -;; D[i][k][j] = D[i][k][j]+t; +;; D[k][j][i] = D[k][j][i]+t; -; CHECK: Processing Inner Loop Id = 2 and OuterLoopId = 1 +; CHECK: Processing InnerLoopId = 2 and OuterLoopId = 1 ; CHECK: Loops interchanged. -; CHECK: Processing Inner Loop Id = 1 and OuterLoopId = 0 -; CHECK: Interchanging loops not profitable. +; CHECK: Processing InnerLoopId = 1 and OuterLoopId = 0 +; CHECK: Loops interchanged. define void @interchange_08(i32 %t){ entry: @@ -33,7 +33,7 @@ for.body6: ; preds = %for.body6, %for.cond4.preheader %k.026 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ] - %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i32 %i.028, i32 %k.026, i32 %j.027 + %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i32 %k.026, i32 %j.027, i32 %i.028 %0 = load i32, i32* %arrayidx8 %add = add nsw i32 %0, %t store i32 %add, i32* %arrayidx8 diff --git a/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll b/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll --- a/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll +++ b/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll @@ -13,10 +13,10 @@ ;; for(int k=0;k<100;k++) ;; D[i][k][j] = D[i][k][j]+t; -; CHECK: Processing Inner Loop Id = 2 and OuterLoopId = 1 +; CHECK: Processing InnerLoopId = 2 and OuterLoopId = 1 ; CHECK: Loops interchanged. -; CHECK: Processing Inner Loop Id = 1 and OuterLoopId = 0 +; CHECK: Processing InnerLoopId = 1 and OuterLoopId = 0 ; CHECK: Interchanging loops not profitable. define void @interchange_08(i32 %t){