Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -1414,9 +1414,10 @@ /// @brief Build Schedule and ScopStmts. /// /// @param R The current region traversed. + /// @param LoopStack Stack to remember currently traversed loops. /// @param LoopSchedules Map from loops to their schedule and progress. void buildSchedule( - Region *R, + Region *R, SmallVectorImpl<Loop *> &LoopStack, DenseMap<Loop *, std::pair<isl_schedule *, unsigned>> &LoopSchedules); /// @brief Collect all memory access relations of a given type. Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -2675,8 +2675,10 @@ DenseMap<Loop *, std::pair<isl_schedule *, unsigned>> LoopSchedules; Loop *L = getLoopSurroundingRegion(R, LI); + SmallVector<Loop *, 8> LoopStack; + LoopStack.push_back(L); LoopSchedules[L]; - buildSchedule(&R, LoopSchedules); + buildSchedule(&R, LoopStack, LoopSchedules); Schedule = LoopSchedules[L].first; if (isl_set_is_empty(AssumedContext)) @@ -3420,7 +3422,7 @@ } void Scop::buildSchedule( - Region *R, + Region *R, SmallVectorImpl<Loop *> &LoopStack, DenseMap<Loop *, std::pair<isl_schedule *, unsigned>> &LoopSchedules) { if (SD.isNonAffineSubRegion(R, &getRegion())) { @@ -3434,21 +3436,49 @@ return; } + Loop *OuterScopLoop = getLoopSurroundingRegion(getRegion(), LI); + + std::deque<RegionNode *> WaitingRegionNodes; + bool LastRNWaiting = false; + ReversePostOrderTraversal<Region *> RTraversal(R); - for (auto *RN : RTraversal) { + auto RNIt = RTraversal.begin(); + auto RNEnd = RTraversal.end(); + + while (RNIt != RNEnd || WaitingRegionNodes.size() != 0) { + RegionNode *RN; + + if ((LastRNWaiting && RNIt != RNEnd) || WaitingRegionNodes.size() == 0) { + assert(RNIt != RNEnd); + RN = *RNIt++; + LastRNWaiting = false; + } else { + RN = WaitingRegionNodes.front(); + WaitingRegionNodes.pop_front(); + } + + Loop *L = getRegionNodeLoop(RN, LI); + if (!getRegion().contains(L)) + L = OuterScopLoop; + + Loop *LastLoop = LoopStack.back(); + if (LastLoop != L) { + if (!LastLoop->contains(L)) { + WaitingRegionNodes.push_back(RN); + LastRNWaiting = true; + continue; + } + LoopStack.push_back(L); + } if (RN->isSubRegion()) { Region *SubRegion = RN->getNodeAs<Region>(); if (!SD.isNonAffineSubRegion(SubRegion, &getRegion())) { - buildSchedule(SubRegion, LoopSchedules); + buildSchedule(SubRegion, LoopStack, LoopSchedules); continue; } } - Loop *L = getRegionNodeLoop(RN, LI); - if (!getRegion().contains(L)) - L = getLoopSurroundingRegion(getRegion(), LI); - int LD = getRelativeLoopDepth(L); auto &LSchedulePair = LoopSchedules[L]; LSchedulePair.second += getNumBlocksInRegionNode(RN); @@ -3469,6 +3499,9 @@ if (auto *MUPA = mapToDimension(LDomain, LD + 1)) LSchedule = isl_schedule_insert_partial_schedule(LSchedule, MUPA); + assert(LoopStack.back() == L); + LoopStack.pop_back(); + auto *PL = L->getParentLoop(); // Either we have a proper loop and we also build a schedule for the Index: test/ScopInfo/wrong_schedule.ll =================================================================== --- /dev/null +++ test/ScopInfo/wrong_schedule.ll @@ -0,0 +1,67 @@ +; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s +; +; CHECK: { Stmt_land_rhs[i0] -> [0, 0] }; +; CHECK: { Stmt_land_rhs_for_cond_53_loopexit_crit_edge[] -> [1, 0] }; + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind uwtable +define void @vorbis_staticbook_unpack() #0 { +entry: + br label %if.end.6 + +if.end.6: ; preds = %entry + switch i32 undef, label %return [ + i32 0, label %sw.epilog.loopexit68 + i32 1, label %land.rhs.lr.ph + ] + +for.cond.67.for.cond.53.loopexit_crit_edge: ; preds = %for.body.73 + br label %sw.epilog + +land.rhs.for.cond.53.loopexit_crit_edge: ; preds = %land.rhs + %i.380.lcssa = phi i64 [ %i.380, %land.rhs ] + br label %sw.epilog + +land.rhs.lr.ph: ; preds = %if.end.6 + br label %land.rhs + +land.rhs: ; preds = %for.body.73, %land.rhs.lr.ph + %i.380 = phi i64 [ 0, %land.rhs.lr.ph ], [ 0, %for.body.73 ] + br i1 false, label %for.body.73, label %land.rhs.for.cond.53.loopexit_crit_edge + +for.body.73: ; preds = %land.rhs + br i1 false, label %land.rhs, label %for.cond.67.for.cond.53.loopexit_crit_edge + +sw.epilog.loopexit68: ; preds = %if.end.6 + unreachable + +sw.epilog: ; preds = %land.rhs.for.cond.53.loopexit_crit_edge, %for.cond.67.for.cond.53.loopexit_crit_edge + %i.3.lcssa = phi i64 [ undef, %for.cond.67.for.cond.53.loopexit_crit_edge ], [ %i.380.lcssa, %land.rhs.for.cond.53.loopexit_crit_edge ] + switch i32 undef, label %_eofout [ + i32 0, label %return + i32 1, label %sw.bb.85 + i32 2, label %sw.bb.85 + ] + +sw.bb.85: ; preds = %sw.epilog, %sw.epilog + switch i32 undef, label %for.body.110.lr.ph [ + i32 1, label %sw.bb.94 + i32 2, label %sw.bb.97 + ] + +sw.bb.94: ; preds = %sw.bb.85 + unreachable + +sw.bb.97: ; preds = %sw.bb.85 + unreachable + +for.body.110.lr.ph: ; preds = %sw.bb.85 + unreachable + +_eofout: ; preds = %sw.epilog + br label %return + +return: ; preds = %_eofout, %sw.epilog, %if.end.6 + ret void +}