Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -1410,10 +1410,33 @@ /// @brief Build Schedule for the region @p R. /// + /// Schedule generation traverses the SCoP region recursively in reverse + /// post-order (wrt. sub-regions). Whenever a loop header is encountered, the + /// corresponding loop is added to the @p LoopStack. This is needed since we + /// do not want to visit any sub-region or block outside the last loop on the + /// @p LoopStack. If the reverse post-order traversal yields a sub-region or + /// block that is outside the last loop on the @p LoopStack we queue it and + /// traverse it after all loops have been poped from the @p LoopStack that do + /// not contain this sub-region or block. This allows to process encountered + /// loops first completely, thus to generate a schedule for the whole loop + /// before a schedule for some part "after" the loop is generated. A loop is + /// processed if all its blocks have been visited. To this end, the number of + /// blocks visited per Loop is kept as the second component of the @p + /// LoopSchedules values. The first component is the actual schedule for the + /// mapped loop. Every time a loop is traversed completely, its schedule is + /// inserted into the schedule of the parent loop (according to the @p + /// LoopSchedules). After the whole SCoP region is traversed the @p + /// LoopSchedules map contains only one valid mapping, namely for the loop + /// surrounding the SCoP (or nullptr if none). + /// + /// Note that "nullptr" is mapped too, if the SCoP is not surrounded by any + /// loop that is not contained in the SCoP. + /// /// @param R The current region traversed. + /// @param LoopStack Stack to remember currently traversed loops. /// @param LoopSchedules Map from loops to their schedule and progress. void buildSchedule( - Region *R, + Region *R, SmallVectorImpl &LoopStack, DenseMap> &LoopSchedules); /// @brief Collect all memory access relations of a given type. Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -3439,31 +3439,72 @@ // For general SCoPs invoke the recursive schedule generation. DenseMap> LoopSchedules; + SmallVector LoopStack; Loop *L = getLoopSurroundingRegion(getRegion(), LI); + LoopStack.push_back(L); LoopSchedules[L]; - buildSchedule(&getRegion(), LoopSchedules); + buildSchedule(&getRegion(), LoopStack, LoopSchedules); Schedule = LoopSchedules[L].first; + assert(LoopStack.size() == 1 && LoopStack.back() == L); } void Scop::buildSchedule( - Region *R, + Region *R, SmallVectorImpl &LoopStack, DenseMap> &LoopSchedules) { + Loop *OuterScopLoop = getLoopSurroundingRegion(getRegion(), LI); + + // Iterate over the region @p R in reverse post-order but queue + // sub-regions/blocks iff they are not part of the last encountered but not + // completely traversed loop. The variable LastRNWaiting is a flag to indicate + // that we queued the last sub-region/block from the reverse post-order + // iterator. If it is set we have to explore the next sub-region/block from + // the iterator (if any) to guarantee progress. If it is not set we first try + // the next queded sub-region/blocks. + // + // Note that in most cases we will not queue any nodes. + + std::deque WaitingRegionNodes; + bool LastRNWaiting = false; + ReversePostOrderTraversal RTraversal(R); - for (auto *RN : RTraversal) { + auto RNIt = RTraversal.begin(); + auto RNEnd = RTraversal.end(); + + while (RNIt != RNEnd || WaitingRegionNodes.size() != 0) { + RegionNode *RN; + + if ((LastRNWaiting && RNIt != RNEnd) || WaitingRegionNodes.size() == 0) { + assert(RNIt != RNEnd); + RN = *RNIt++; + LastRNWaiting = false; + } else { + RN = WaitingRegionNodes.front(); + WaitingRegionNodes.pop_front(); + } + + Loop *L = getRegionNodeLoop(RN, LI); + if (!getRegion().contains(L)) + L = OuterScopLoop; + + Loop *LastLoop = LoopStack.back(); + if (LastLoop != L) { + if (!LastLoop->contains(L)) { + WaitingRegionNodes.push_back(RN); + LastRNWaiting = true; + continue; + } + LoopStack.push_back(L); + } if (RN->isSubRegion()) { Region *SubRegion = RN->getNodeAs(); if (!SD.isNonAffineSubRegion(SubRegion, &getRegion())) { - buildSchedule(SubRegion, LoopSchedules); + buildSchedule(SubRegion, LoopStack, LoopSchedules); continue; } } - Loop *L = getRegionNodeLoop(RN, LI); - if (!getRegion().contains(L)) - L = getLoopSurroundingRegion(getRegion(), LI); - int LD = getRelativeLoopDepth(L); auto &LSchedulePair = LoopSchedules[L]; LSchedulePair.second += getNumBlocksInRegionNode(RN); @@ -3479,6 +3520,9 @@ isl_schedule *LSchedule = LSchedulePair.first; unsigned NumVisited = LSchedulePair.second; while (L && NumVisited == L->getNumBlocks()) { + assert(LoopStack.back() == L); + LoopStack.pop_back(); + auto *PL = L->getParentLoop(); // Either we have a proper loop and we also build a schedule for the Index: test/ScopInfo/wrong_schedule.ll =================================================================== --- /dev/null +++ test/ScopInfo/wrong_schedule.ll @@ -0,0 +1,67 @@ +; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s +; +; CHECK: { Stmt_land_rhs[i0] -> [0, 0] }; +; CHECK: { Stmt_land_rhs_for_cond_53_loopexit_crit_edge[] -> [1, 0] }; + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind uwtable +define void @vorbis_staticbook_unpack() #0 { +entry: + br label %if.end.6 + +if.end.6: ; preds = %entry + switch i32 undef, label %return [ + i32 0, label %sw.epilog.loopexit68 + i32 1, label %land.rhs.lr.ph + ] + +for.cond.67.for.cond.53.loopexit_crit_edge: ; preds = %for.body.73 + br label %sw.epilog + +land.rhs.for.cond.53.loopexit_crit_edge: ; preds = %land.rhs + %i.380.lcssa = phi i64 [ %i.380, %land.rhs ] + br label %sw.epilog + +land.rhs.lr.ph: ; preds = %if.end.6 + br label %land.rhs + +land.rhs: ; preds = %for.body.73, %land.rhs.lr.ph + %i.380 = phi i64 [ 0, %land.rhs.lr.ph ], [ 0, %for.body.73 ] + br i1 false, label %for.body.73, label %land.rhs.for.cond.53.loopexit_crit_edge + +for.body.73: ; preds = %land.rhs + br i1 false, label %land.rhs, label %for.cond.67.for.cond.53.loopexit_crit_edge + +sw.epilog.loopexit68: ; preds = %if.end.6 + unreachable + +sw.epilog: ; preds = %land.rhs.for.cond.53.loopexit_crit_edge, %for.cond.67.for.cond.53.loopexit_crit_edge + %i.3.lcssa = phi i64 [ undef, %for.cond.67.for.cond.53.loopexit_crit_edge ], [ %i.380.lcssa, %land.rhs.for.cond.53.loopexit_crit_edge ] + switch i32 undef, label %_eofout [ + i32 0, label %return + i32 1, label %sw.bb.85 + i32 2, label %sw.bb.85 + ] + +sw.bb.85: ; preds = %sw.epilog, %sw.epilog + switch i32 undef, label %for.body.110.lr.ph [ + i32 1, label %sw.bb.94 + i32 2, label %sw.bb.97 + ] + +sw.bb.94: ; preds = %sw.bb.85 + unreachable + +sw.bb.97: ; preds = %sw.bb.85 + unreachable + +for.body.110.lr.ph: ; preds = %sw.bb.85 + unreachable + +_eofout: ; preds = %sw.epilog + br label %return + +return: ; preds = %_eofout, %sw.epilog, %if.end.6 + ret void +}