Index: llvm/lib/Transforms/Scalar/LoopInterchange.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -338,7 +338,8 @@
   bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
                     unsigned InnerLoopId, unsigned OuterLoopId,
                     CharMatrix &DepMatrix,
-                    const DenseMap<const Loop *, unsigned> &CostMap);
+                    const DenseMap<const Loop *, unsigned> &CostMap,
+                    std::unique_ptr<CacheCost>& CC);
 
 private:
   int getInstrOrderCost();
@@ -539,7 +540,7 @@
     LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
     LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
     if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
-                          DependencyMatrix, CostMap)) {
+                          DependencyMatrix, CostMap, CC)) {
       LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
       return false;
     }
@@ -1113,18 +1114,19 @@
 static bool isProfitableForVectorization(unsigned InnerLoopId,
                                          unsigned OuterLoopId,
                                          CharMatrix &DepMatrix) {
-  // TODO: Improve this heuristic to catch more cases.
-  // If the inner loop is loop independent or doesn't carry any dependency it is
-  // profitable to move this to outer position.
   for (auto &Row : DepMatrix) {
-    if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
-      return false;
-    // TODO: We need to improve this heuristic.
+    // If the inner loop is loop independent or doesn't carry any dependency it is
+    // not profitable to move this to outer position, since we are likely able to do
+    // inner loop vectorization already.
+    if (Row[InnerLoopId] == 'I' || Row[InnerLoopId] == '=')
+     return false;
+    // If the outer loop is not loop independent it is not profitable to move this
+    // to inner position, since doing so would not enable inner loop parallelism.
     if (Row[OuterLoopId] != '=')
       return false;
   }
-  // If outer loop has dependence and inner loop is loop independent then it is
-  // profitable to interchange to enable parallelism.
+  // If inner loop has dependence and outer loop is loop independent then it is
+  // profitable to interchange to enable inner loop parallelism.
   // If there are no dependences, interchanging will not improve anything.
   return !DepMatrix.empty();
 }
@@ -1132,7 +1134,8 @@
 bool LoopInterchangeProfitability::isProfitable(
     const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
     unsigned OuterLoopId, CharMatrix &DepMatrix,
-    const DenseMap<const Loop *, unsigned> &CostMap) {
+    const DenseMap<const Loop *, unsigned> &CostMap,
+    std::unique_ptr<CacheCost>& CC) {
   // TODO: Remove the legacy cost model.
 
   // This is the new cost model returned from loop cache analysis.
@@ -1157,11 +1160,14 @@
       return true;
   }
 
-  // It is not profitable as per current cache profitability model. But check if
-  // we can move this loop outside to improve parallelism.
-  if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
-    return true;
-
+  // To prevent endless interchange, only check whether it is profitable for vectorization
+  // when InnerLoop and OuterLoop has the same locality, or loop cache analysis failed to
+  // analyze the loopnest (e.g., due to delinearization issues).
+  if (CostMap.find(InnerLoop) == CostMap.end() || CostMap.find(OuterLoop) == CostMap.end() ||
+     (CC && CC->getLoopCost(*InnerLoop) == CC->getLoopCost(*OuterLoop))) {
+    if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
+      return true;
+  }
   ORE->emit([&]() {
     return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
                                     InnerLoop->getStartLoc(),