Index: polly/trunk/include/polly/ScopInfo.h =================================================================== --- polly/trunk/include/polly/ScopInfo.h +++ polly/trunk/include/polly/ScopInfo.h @@ -1474,17 +1474,62 @@ /// the dimensionality of the underlying ScopArrayInfo object. void updateAccessDimensionality(); - /// @brief Build Schedule for the SCoP region. - /// + /// @brief Construct the schedule of this SCoP. void buildSchedule(); - /// @brief Build Schedule for the region @p RN. - /// - /// @param RN The current region traversed. - /// @param LoopSchedules Map from loops to their schedule and progress. - void buildSchedule( - RegionNode *RN, - DenseMap> &LoopSchedules); + /// @brief A loop stack element to keep track of per-loop information during + /// schedule construction. + typedef struct LoopStackElement { + // The loop for which we keep information. + Loop *L; + + // The (possibly incomplete) schedule for this loop. + isl_schedule *Schedule; + + // The number of basic blocks in the current loop, for which a schedule has + // already been constructed. + unsigned NumBlocksProcessed; + + LoopStackElement(Loop *L, __isl_give isl_schedule *S, + unsigned NumBlocksProcessed) + : L(L), Schedule(S), NumBlocksProcessed(NumBlocksProcessed){}; + } LoopStackElementTy; + + /// @brief The loop stack used for schedule construction. + /// + /// The loop stack keeps track of schedule information for a set of nested + /// loops as well as an (optional) 'nullptr' loop that models the outermost + /// schedule dimension. The loops in a loop stack always have a parent-child + /// relation where the loop at position n is the parent of the loop at + /// position n + 1. + typedef SmallVector LoopStackTy; + + /// @brief Construct schedule information for a given Region and add the + /// derived information to @p LoopStack. + /// + /// Given a Region we derive schedule information for all RegionNodes + /// contained in this region ensuring that the assigned execution times + /// correctly model the existing control flow relations. + /// + /// @param R The region which to process. + /// @param LoopStack A stack of loops that are currently under + /// construction. + void buildSchedule(Region *R, LoopStackTy &LoopStack); + + /// @brief Build Schedule for the region node @p RN and add the derived + /// information to @p LoopStack. + /// + /// In case @p RN is a BasicBlock or a non-affine Region, we construct the + /// schedule for this @p RN and also finalize loop schedules in case the + /// current @p RN completes the loop. + /// + /// In case @p RN is a not-non-affine Region, we delegate the construction to + /// buildSchedule(Region *R, ...). + /// + /// @param RN The RegionNode region traversed. + /// @param LoopStack A stack of loops that are currently under + /// construction. + void buildSchedule(RegionNode *RN, LoopStackTy &LoopStack); /// @brief Collect all memory access relations of a given type. /// Index: polly/trunk/lib/Analysis/ScopInfo.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopInfo.cpp +++ polly/trunk/lib/Analysis/ScopInfo.cpp @@ -3444,61 +3444,129 @@ } void Scop::buildSchedule() { - DenseMap> LoopSchedules; Loop *L = getLoopSurroundingRegion(getRegion(), LI); - LoopSchedules[L]; - buildSchedule(getRegion().getNode(), LoopSchedules); - Schedule = LoopSchedules[L].first; + LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)}); + buildSchedule(getRegion().getNode(), LoopStack); + assert(LoopStack.size() == 1 && LoopStack.back().L == L); + Schedule = LoopStack[0].Schedule; } -void Scop::buildSchedule( - RegionNode *RN, - DenseMap> &LoopSchedules) { +/// To generate a schedule for the elements in a Region we traverse the Region +/// in reverse-post-order and add the contained RegionNodes in traversal order +/// to the schedule of the loop that is currently at the top of the LoopStack. +/// For loop-free codes, this results in a correct sequential ordering. +/// +/// Example: +/// bb1(0) +/// / \. +/// bb2(1) bb3(2) +/// \ / \. +/// bb4(3) bb5(4) +/// \ / +/// bb6(5) +/// +/// Including loops requires additional processing. Whenever a loop header is +/// encountered, the corresponding loop is added to the @p LoopStack. Starting +/// from an empty schedule, we first process all RegionNodes that are within +/// this loop and complete the sequential schedule at this loop-level before +/// processing about any other nodes. To implement this +/// loop-nodes-first-processing, the reverse post-order traversal is +/// insufficient. Hence, we additionally check if the traversal yields +/// sub-regions or blocks that are outside the last loop on the @p LoopStack. +/// These region-nodes are then queue and only traverse after the all nodes +/// within the current loop have been processed. +void Scop::buildSchedule(Region *R, LoopStackTy &LoopStack) { + Loop *OuterScopLoop = getLoopSurroundingRegion(getRegion(), LI); + + ReversePostOrderTraversal RTraversal(R); + std::deque WorkList(RTraversal.begin(), RTraversal.end()); + std::deque DelayList; + bool LastRNWaiting = false; + + // Iterate over the region @p R in reverse post-order but queue + // sub-regions/blocks iff they are not part of the last encountered but not + // completely traversed loop. The variable LastRNWaiting is a flag to indicate + // that we queued the last sub-region/block from the reverse post-order + // iterator. If it is set we have to explore the next sub-region/block from + // the iterator (if any) to guarantee progress. If it is not set we first try + // the next queued sub-region/blocks. + while (!WorkList.empty() || !DelayList.empty()) { + RegionNode *RN; + + if ((LastRNWaiting && !WorkList.empty()) || DelayList.size() == 0) { + RN = WorkList.front(); + WorkList.pop_front(); + LastRNWaiting = false; + } else { + RN = DelayList.front(); + DelayList.pop_front(); + } + + Loop *L = getRegionNodeLoop(RN, LI); + if (!getRegion().contains(L)) + L = OuterScopLoop; + + Loop *LastLoop = LoopStack.back().L; + if (LastLoop != L) { + if (!LastLoop->contains(L)) { + LastRNWaiting = true; + DelayList.push_back(RN); + continue; + } + LoopStack.push_back({L, nullptr, 0}); + } + buildSchedule(RN, LoopStack); + } + + return; +} + +void Scop::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) { if (RN->isSubRegion()) { auto *LocalRegion = RN->getNodeAs(); if (!SD.isNonAffineSubRegion(LocalRegion, &getRegion())) { - ReversePostOrderTraversal RTraversal(LocalRegion); - for (auto *Child : RTraversal) - buildSchedule(Child, LoopSchedules); + buildSchedule(LocalRegion, LoopStack); return; } } - Loop *L = getRegionNodeLoop(RN, LI); - if (!getRegion().contains(L)) - L = getLoopSurroundingRegion(getRegion(), LI); - - int LD = getRelativeLoopDepth(L); - auto &LSchedulePair = LoopSchedules[L]; - LSchedulePair.second += getNumBlocksInRegionNode(RN); + auto &LoopData = LoopStack.back(); + LoopData.NumBlocksProcessed += getNumBlocksInRegionNode(RN); if (auto *Stmt = getStmtForRegionNode(RN)) { auto *UDomain = isl_union_set_from_set(Stmt->getDomain()); auto *StmtSchedule = isl_schedule_from_domain(UDomain); - LSchedulePair.first = combineInSequence(LSchedulePair.first, StmtSchedule); + LoopData.Schedule = combineInSequence(LoopData.Schedule, StmtSchedule); } - isl_schedule *LSchedule = LSchedulePair.first; - unsigned NumVisited = LSchedulePair.second; - while (L && NumVisited == L->getNumBlocks()) { - auto *PL = L->getParentLoop(); - - auto &PSchedulePair = LoopSchedules[PL]; - - if (LSchedule) { - auto *LDomain = isl_schedule_get_domain(LSchedule); - auto *MUPA = mapToDimension(LDomain, LD + 1); - LSchedule = isl_schedule_insert_partial_schedule(LSchedule, MUPA); - PSchedulePair.first = combineInSequence(PSchedulePair.first, LSchedule); + // Check if we just processed the last node in this loop. If we did, finalize + // the loop by: + // + // - adding new schedule dimensions + // - folding the resulting schedule into the parent loop schedule + // - dropping the loop schedule from the LoopStack. + // + // Then continue to check surrounding loops, which might also have been + // completed by this node. + while (LoopData.L && + LoopData.NumBlocksProcessed == LoopData.L->getNumBlocks()) { + auto Schedule = LoopData.Schedule; + auto NumBlocksProcessed = LoopData.NumBlocksProcessed; + + LoopStack.pop_back(); + auto &NextLoopData = LoopStack.back(); + + if (Schedule) { + auto *Domain = isl_schedule_get_domain(Schedule); + auto *MUPA = mapToDimension(Domain, LoopStack.size()); + Schedule = isl_schedule_insert_partial_schedule(Schedule, MUPA); + NextLoopData.Schedule = + combineInSequence(NextLoopData.Schedule, Schedule); } - PSchedulePair.second += NumVisited; - - L = PL; - LD--; - NumVisited = PSchedulePair.second; - LSchedule = PSchedulePair.first; + NextLoopData.NumBlocksProcessed += NumBlocksProcessed; + LoopData = NextLoopData; } } Index: polly/trunk/test/ScopInfo/schedule-const-post-dominator-walk-2.ll =================================================================== --- polly/trunk/test/ScopInfo/schedule-const-post-dominator-walk-2.ll +++ polly/trunk/test/ScopInfo/schedule-const-post-dominator-walk-2.ll @@ -0,0 +1,42 @@ +; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s + +; CHECK: Stmt_loopA[i0] -> [0, 0, 0] +; CHECK-DAG: Stmt_loopB[i0] -> [0, 0, 1] +; CHECK-DAG: Stmt_bbB[] -> [1, 0, 0] +; CHECK-DAG: Stmt_bbA[] -> [2, 0, 0] +; CHECK-DAG: Stmt_bbMerge[] -> [3, 0, 0] + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @hoge(i64 %p0, i64 %p1, i64 %p2, i64 %p3, float* %A) { +entry: + br label %loopA + +loopA: + %tmp4 = phi i64 [ 0, %entry ], [ 0, %loopB] + store float 42.0, float* %A + %cmp0 = icmp sle i64 %p0, 100 + br i1 %cmp0, label %loopB, label %bbB + +loopB: + store float 42.0, float* %A + %cmp1 = icmp sle i64 %p1, 100 + br i1 %cmp1, label %loopA, label %bbA + +bbA: + store float 42.0, float* %A + %cmpbbA = icmp sle i64 %p2, 50 + br i1 %cmpbbA, label %bbMerge, label %exit + +bbB: + store float 42.0, float* %A + %cmpbbB= icmp sle i64 %p3, 200 + br i1 %cmpbbB, label %exit, label %bbMerge + +bbMerge: + store float 42.0, float* %A + br label %exit + +exit: + ret void +} Index: polly/trunk/test/ScopInfo/schedule-const-post-dominator-walk.ll =================================================================== --- polly/trunk/test/ScopInfo/schedule-const-post-dominator-walk.ll +++ polly/trunk/test/ScopInfo/schedule-const-post-dominator-walk.ll @@ -0,0 +1,34 @@ +; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s + +; CHECK: { Stmt_bb3[i0] -> [0, 0] }; +; CHECK: { Stmt_bb2[] -> [1, 0] }; + +; Verify that we generate the correct schedule. In older versions of Polly, +; we generated an incorrect schedule: +; +; { Stmt_bb3[i0] -> [1, 0]; Stmt_bb2[] -> [0, 0] } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @hoge() { +bb: + br label %bb3 + +bb1: ; preds = %bb5 + br label %bb6 + +bb2: ; preds = %bb3 + %tmp = phi i64 [ %tmp4, %bb3 ] + br label %bb6 + +bb3: ; preds = %bb5, %bb + %tmp4 = phi i64 [ 0, %bb ], [ 0, %bb5 ] + br i1 false, label %bb5, label %bb2 + +bb5: ; preds = %bb3 + br i1 false, label %bb3, label %bb1 + +bb6: ; preds = %bb2, %bb1 + %tmp2 = phi i64 [ %tmp, %bb2 ], [ undef, %bb1 ] + ret void +}