Index: llvm/lib/Transforms/Scalar/LoopInterchange.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -338,7 +338,8 @@ bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix, - const DenseMap &CostMap); + const DenseMap &CostMap, + std::unique_ptr& CC); private: int getInstrOrderCost(); @@ -539,7 +540,7 @@ LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n"); LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE); if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId, - DependencyMatrix, CostMap)) { + DependencyMatrix, CostMap, CC)) { LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n"); return false; } @@ -1113,18 +1114,19 @@ static bool isProfitableForVectorization(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { - // TODO: Improve this heuristic to catch more cases. - // If the inner loop is loop independent or doesn't carry any dependency it is - // profitable to move this to outer position. for (auto &Row : DepMatrix) { - if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I') - return false; - // TODO: We need to improve this heuristic. + // If the inner loop is loop independent or doesn't carry any dependency it is + // not profitable to move this to outer position, since we are likely able to do + // inner loop vectorization already. + if (Row[InnerLoopId] == 'I' || Row[InnerLoopId] == '=') + return false; + // If the outer loop is not loop independent it is not profitable to move this + // to inner position, since doing so would not enable inner loop parallelism. if (Row[OuterLoopId] != '=') return false; } - // If outer loop has dependence and inner loop is loop independent then it is - // profitable to interchange to enable parallelism. + // If inner loop has dependence and outer loop is loop independent then it is + // profitable to interchange to enable inner loop parallelism. // If there are no dependences, interchanging will not improve anything. return !DepMatrix.empty(); } @@ -1132,7 +1134,8 @@ bool LoopInterchangeProfitability::isProfitable( const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix, - const DenseMap &CostMap) { + const DenseMap &CostMap, + std::unique_ptr& CC) { // TODO: Remove the legacy cost model. // This is the new cost model returned from loop cache analysis. @@ -1157,11 +1160,14 @@ return true; } - // It is not profitable as per current cache profitability model. But check if - // we can move this loop outside to improve parallelism. - if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix)) - return true; - + // To prevent endless interchange, only check whether it is profitable for vectorization + // when InnerLoop and OuterLoop has the same locality, or loop cache analysis failed to + // analyze the loopnest (e.g., due to delinearization issues). + if (CostMap.find(InnerLoop) == CostMap.end() || CostMap.find(OuterLoop) == CostMap.end() || + (CC && CC->getLoopCost(*InnerLoop) == CC->getLoopCost(*OuterLoop))) { + if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix)) + return true; + } ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable", InnerLoop->getStartLoc(),