Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -1421,6 +1421,53 @@ void init(AliasAnalysis &AA, AssumptionCache &AC, ScopDetection &SD, DominatorTree &DT, LoopInfo &LI); + /// @brief Propagate domains that are known due to graph properties. + /// + /// As a CFG is mostly structured we use the graph properties to propagate + /// domains without the need to compute all path conditions. In particular, if + /// a block A dominates a block B and B post-dominates A we know that the + /// domain of B is a superset of the domain of A. As we do not have + /// post-dominator information available here we use the less precise region + /// information. Given a region R, we know that the exit is always executed if + /// the entry was executed, thus the domain of the exit is a superset of the + /// domain of the entry. In case the exit can only be reached from within the + /// region the domains are in fact equal. This function will use this property + /// to avoid the generation of condition constraints that determine when a + /// branch is taken. If @p BB is a region entry block we will propagate its + /// domain to the region exit block. Additionally, we put the region exit + /// block in the @p FinishedExitBlocks set so we can later skip edges from + /// within the region to that block. + /// + /// @param BB The block for which the domain is currently propagated. + /// @param BBLoop The innermost affine loop surrounding @p BB. + /// @param FinishedExitBlocks Set of region exits the domain was set for. + /// @param SD The ScopDetection analysis for the current function. + /// @param LI The LoopInfo for the current function. + /// + void propagateDomainConstraintsToRegionExit( + BasicBlock *BB, Loop *BBLoop, + SmallPtrSetImpl &FinishedExitBlocks, ScopDetection &SD, + LoopInfo &LI); + + /// @brief Compute the union of predecessor domains for @p BB. + /// + /// To compute the union of all domains of predecessors of @p BB this + /// function applies similar reasoning on the CFG structure as described for + /// @see propagateDomainConstraintsToRegionExit + /// + /// @param BB The block for which the predecessor domains are collected. + /// @param Domain The domain under which BB is executed. + /// @param SD The ScopDetection analysis for the current function. + /// @param DT The DominatorTree for the current function. + /// @param LI The LoopInfo for the current function. + /// + /// @returns The domain under which @p BB is executed. + __isl_give isl_set *getPredecessorDomainConstraints(BasicBlock *BB, + isl_set *Domain, + ScopDetection &SD, + DominatorTree &DT, + LoopInfo &LI); + /// @brief Add loop carried constraints to the header block of the loop @p L. /// /// @param L The loop to process. Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -2179,6 +2179,94 @@ return true; } +static Loop * +getFirstNonBoxedLoopFor(BasicBlock *BB, LoopInfo &LI, + const ScopDetection::BoxedLoopsSetTy &BoxedLoops) { + auto *L = LI.getLoopFor(BB); + while (BoxedLoops.count(L)) + L = L->getParentLoop(); + return L; +} + +/// @brief Adjust the dimensions of @p Dom that was constructed for @p OldL +/// to be compatible to domains constructed for loop @p NewL. +/// +/// This function assumes @p NewL and @p OldL are equal or there is a CFG +/// edge from @p OldL to @p NewL. +static __isl_give isl_set *adjustDomainDimensions(Scop &S, + __isl_take isl_set *Dom, + Loop *OldL, Loop *NewL) { + + // If the loops are the same there is nothing to do. + if (NewL == OldL) + return Dom; + + int OldDepth = S.getRelativeLoopDepth(OldL); + int NewDepth = S.getRelativeLoopDepth(NewL); + // If both loops are non-affine loops there is nothing to do. + if (OldDepth == -1 && NewDepth == -1) + return Dom; + + // Distinguish three cases: + // 1) The depth is the same but the loops are not. + // => One loop was left one was entered. + // 2) The depth increased from OldL to NewL. + // => One loop was entered, none was left. + // 3) The depth decreased from OldL to NewL. + // => Loops were left were difference of the depths defines how many. + if (OldDepth == NewDepth) { + Dom = isl_set_project_out(Dom, isl_dim_set, NewDepth, 1); + Dom = isl_set_add_dims(Dom, isl_dim_set, 1); + Dom = addDomainDimId(Dom, NewDepth, NewL); + } else if (OldDepth < NewDepth) { + assert(OldDepth + 1 == NewDepth); + assert(NewL->getParentLoop() == OldL); + Dom = isl_set_add_dims(Dom, isl_dim_set, 1); + Dom = addDomainDimId(Dom, NewDepth, NewL); + } else { + assert(OldDepth > NewDepth); + int Diff = OldDepth - NewDepth; + int NumDim = isl_set_n_dim(Dom); + assert(NumDim >= Diff); + Dom = isl_set_project_out(Dom, isl_dim_set, NumDim - Diff, Diff); + } + + return Dom; +} + +void Scop::propagateDomainConstraintsToRegionExit( + BasicBlock *BB, Loop *BBLoop, + SmallPtrSetImpl &FinishedExitBlocks, ScopDetection &SD, + LoopInfo &LI) { + + // Check if the block @p BB is the entry of a region. If so we propagate it's + // domain to the exit block of the region. Otherwise we are done. + auto *RI = R.getRegionInfo(); + auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr; + if (!BBReg || BBReg->getEntry() != BB) + return; + + auto *Domain = DomainMap[BB]; + assert(Domain && "Cannot propagate a nullptr"); + + auto *ExitBB = BBReg->getExit(); + auto &BoxedLoops = *SD.getBoxedLoops(&getRegion()); + auto *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, BoxedLoops); + + // Since the dimensions of @p BB and @p ExitBB might be differen we have to + // adjust the domain before we can propagate it. + auto *AdjustedDomain = + adjustDomainDimensions(*this, isl_set_copy(Domain), BBLoop, ExitBBLoop); + auto *&ExitDomain = DomainMap[ExitBB]; + + // If the exit domain is not yet created we set it otherwise we "add" the + // current domain. + ExitDomain = + ExitDomain ? isl_set_union(AdjustedDomain, ExitDomain) : AdjustedDomain; + + FinishedExitBlocks.insert(ExitBB); +} + bool Scop::buildDomainsWithBranchConstraints(Region *R, ScopDetection &SD, DominatorTree &DT, LoopInfo &LI) { auto &BoxedLoops = *SD.getBoxedLoops(&getRegion()); @@ -2194,6 +2282,7 @@ // As we are only interested in non-loop carried constraints here we can // simply skip loop back edges. + SmallPtrSet FinishedExitBlocks; ReversePostOrderTraversal RTraversal(R); for (auto *RN : RTraversal) { @@ -2221,8 +2310,22 @@ if (!Domain) continue; - Loop *BBLoop = getRegionNodeLoop(RN, LI); - int BBLoopDepth = getRelativeLoopDepth(BBLoop); + auto *BBLoop = getRegionNodeLoop(RN, LI); + // Propagate the domain from BB directly to blocks that have a superset + // domain, at the moment only region exit nodes of regions that start in BB. + propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks, SD, + LI); + + // If all successors of BB have been set a domain though the propagation + // above we do not need to build condition sets but can just skip this + // block. However, it is important to note that this is a local property + // with regards to the region @p R. To this end FinishedExitBlocks is a + // local variable. + auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) { + return FinishedExitBlocks.count(SuccBB); + }; + if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit)) + continue; // Build the condition sets for the successor nodes of the current region // node. If it is a non-affine subregion we will always execute the single @@ -2243,40 +2346,21 @@ isl_set *CondSet = ConditionSets[u]; BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u); + // If we propagates the domain of some block to "SuccBB" we do not have to + // adjust the domain. + if (FinishedExitBlocks.count(SuccBB)) { + isl_set_free(CondSet); + continue; + } + // Skip back edges. if (DT.dominates(SuccBB, BB)) { isl_set_free(CondSet); continue; } - // Do not adjust the number of dimensions if we enter a boxed loop or are - // in a non-affine subregion or if the surrounding loop stays the same. - Loop *SuccBBLoop = LI.getLoopFor(SuccBB); - while (BoxedLoops.count(SuccBBLoop)) - SuccBBLoop = SuccBBLoop->getParentLoop(); - - if (BBLoop != SuccBBLoop) { - - // Check if the edge to SuccBB is a loop entry or exit edge. If so - // adjust the dimensionality accordingly. Lastly, if we leave a loop - // and enter a new one we need to drop the old constraints. - int SuccBBLoopDepth = getRelativeLoopDepth(SuccBBLoop); - unsigned LoopDepthDiff = std::abs(BBLoopDepth - SuccBBLoopDepth); - if (BBLoopDepth > SuccBBLoopDepth) { - CondSet = isl_set_project_out(CondSet, isl_dim_set, - isl_set_n_dim(CondSet) - LoopDepthDiff, - LoopDepthDiff); - } else if (SuccBBLoopDepth > BBLoopDepth) { - assert(LoopDepthDiff == 1); - CondSet = isl_set_add_dims(CondSet, isl_dim_set, 1); - CondSet = addDomainDimId(CondSet, SuccBBLoopDepth, SuccBBLoop); - } else if (BBLoopDepth >= 0) { - assert(LoopDepthDiff <= 1); - CondSet = isl_set_project_out(CondSet, isl_dim_set, BBLoopDepth, 1); - CondSet = isl_set_add_dims(CondSet, isl_dim_set, 1); - CondSet = addDomainDimId(CondSet, SuccBBLoopDepth, SuccBBLoop); - } - } + auto *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, BoxedLoops); + CondSet = adjustDomainDimensions(*this, CondSet, BBLoop, SuccBBLoop); // Set the domain for the successor or merge it with an existing domain in // case there are multiple paths (without loop back edges) to the @@ -2321,6 +2405,65 @@ return getDomainForBlock(R->getEntry(), DomainMap, RI); } +isl_set *Scop::getPredecessorDomainConstraints(BasicBlock *BB, isl_set *Domain, + ScopDetection &SD, + DominatorTree &DT, + LoopInfo &LI) { + // If @p BB is the ScopEntry we are done + if (R.getEntry() == BB) + return isl_set_universe(isl_set_get_space(Domain)); + + // The set of boxed loops (loops in non-affine subregions) for this SCoP. + auto &BoxedLoops = *SD.getBoxedLoops(&getRegion()); + + // The region info of this function. + auto &RI = *R.getRegionInfo(); + + auto *BBLoop = getFirstNonBoxedLoopFor(BB, LI, BoxedLoops); + + // A domain to collect all predecessor domains, thus all conditions under + // which the block is executed. To this end we start with the empty domain. + isl_set *PredDom = isl_set_empty(isl_set_get_space(Domain)); + + // Set of regions of which the entry block domain has been propagated to BB. + // all predecessor inside any of the regions can be skipped. + SmallSet PropagatedRegions; + + for (auto *PredBB : predecessors(BB)) { + // Skip backedges + if (DT.dominates(BB, PredBB)) + continue; + + // If the predecessor is in a region we used for propagation we can skip it. + auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); }; + if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(), + PredBBInRegion)) { + continue; + } + + // Check if there is a valid region we can use for propagation, thus look + // for a region that contains the predecessor and has @p BB as exit block. + auto *PredR = RI.getRegionFor(PredBB); + while (PredR->getExit() != BB && !PredR->contains(BB)) + PredR->getParent(); + + // If a valid region for propagation was found use the entry of that region + // for propagation, otherwise the PredBB directly. + if (PredR->getExit() == BB) { + PredBB = PredR->getEntry(); + PropagatedRegions.insert(PredR); + } + + auto *PredBBDom = getDomainForBlock(PredBB, DomainMap, RI); + auto *PredBBLoop = getFirstNonBoxedLoopFor(PredBB, LI, BoxedLoops); + PredBBDom = adjustDomainDimensions(*this, PredBBDom, PredBBLoop, BBLoop); + + PredDom = isl_set_union(PredDom, PredBBDom); + } + + return PredDom; +} + void Scop::propagateDomainConstraints(Region *R, ScopDetection &SD, DominatorTree &DT, LoopInfo &LI) { // Iterate over the region R and propagate the domain constrains from the @@ -2332,9 +2475,6 @@ // predecessors have been visited before a block or non-affine subregion is // visited. - // The set of boxed loops (loops in non-affine subregions) for this SCoP. - auto &BoxedLoops = *SD.getBoxedLoops(&getRegion()); - ReversePostOrderTraversal RTraversal(R); for (auto *RN : RTraversal) { @@ -2362,59 +2502,12 @@ continue; } - Loop *BBLoop = getRegionNodeLoop(RN, LI); - int BBLoopDepth = getRelativeLoopDepth(BBLoop); - - isl_set *PredDom = isl_set_empty(isl_set_get_space(Domain)); - for (auto *PredBB : predecessors(BB)) { - - // Skip backedges - if (DT.dominates(BB, PredBB)) - continue; - - isl_set *PredBBDom = nullptr; - - // Handle the SCoP entry block with its outside predecessors. - if (!getRegion().contains(PredBB)) - PredBBDom = isl_set_universe(isl_set_get_space(PredDom)); - - if (!PredBBDom) { - // Determine the loop depth of the predecessor and adjust its domain to - // the domain of the current block. This can mean we have to: - // o) Drop a dimension if this block is the exit of a loop, not the - // header of a new loop and the predecessor was part of the loop. - // o) Add an unconstrainted new dimension if this block is the header - // of a loop and the predecessor is not part of it. - // o) Drop the information about the innermost loop dimension when the - // predecessor and the current block are surrounded by different - // loops in the same depth. - PredBBDom = getDomainForBlock(PredBB, DomainMap, *R->getRegionInfo()); - Loop *PredBBLoop = LI.getLoopFor(PredBB); - while (BoxedLoops.count(PredBBLoop)) - PredBBLoop = PredBBLoop->getParentLoop(); - - int PredBBLoopDepth = getRelativeLoopDepth(PredBBLoop); - unsigned LoopDepthDiff = std::abs(BBLoopDepth - PredBBLoopDepth); - if (BBLoopDepth < PredBBLoopDepth) - PredBBDom = isl_set_project_out( - PredBBDom, isl_dim_set, isl_set_n_dim(PredBBDom) - LoopDepthDiff, - LoopDepthDiff); - else if (PredBBLoopDepth < BBLoopDepth) { - assert(LoopDepthDiff == 1); - PredBBDom = isl_set_add_dims(PredBBDom, isl_dim_set, 1); - } else if (BBLoop != PredBBLoop && BBLoopDepth >= 0) { - assert(LoopDepthDiff <= 1); - PredBBDom = isl_set_drop_constraints_involving_dims( - PredBBDom, isl_dim_set, BBLoopDepth, 1); - } - } - - PredDom = isl_set_union(PredDom, PredBBDom); - } - // Under the union of all predecessor conditions we can reach this block. + auto *PredDom = getPredecessorDomainConstraints(BB, Domain, SD, DT, LI); Domain = isl_set_coalesce(isl_set_intersect(Domain, PredDom)); + Domain = isl_set_align_params(Domain, getParamSpace()); + Loop *BBLoop = getRegionNodeLoop(RN, LI); if (BBLoop && BBLoop->getHeader() == BB && getRegion().contains(BBLoop)) addLoopBoundsToHeaderDomain(BBLoop, LI); Index: test/ScopInfo/complex-successor-structure-2.ll =================================================================== --- test/ScopInfo/complex-successor-structure-2.ll +++ test/ScopInfo/complex-successor-structure-2.ll @@ -1,12 +1,16 @@ ; RUN: opt %loadPolly -pass-remarks-analysis="polly-scops" -polly-scops \ ; RUN: < %s 2>&1 | FileCheck %s -; We build scops from a region of for.body->B13 having successor nodes -; of following form and check that the domain construction does not take a huge -; amount of time. +; We build a scop for the region for.body->B13. The CFG is of the following +; form and the branch conditions are build from "smax" SCEVs. However, in +; contrast to complex-success-structure.ll the smax constraints do not grow +; anymore after B4. This will keep the condition construction bounded. +; Since we propagate the domains from one B(X) to the B(X+1) we can also keep +; the domains simple. We will bail anyway due to invalid required invariant +; loads. +; +; CHECK-NOT: Low complexity assumption ; -; CHECK: Low complexity assumption: { : 1 = 0 } - ; | ; for.body <--+ ; | | @@ -451,7 +455,7 @@ B5: ; preds = %A5, %B4 %48 = phi i16 [ %add84.4, %A5 ], [ %47, %B4 ] - %add84.5 = add i16 %48, 128 + %add84.5 = add i16 %46, 128 %arrayidx74.6 = getelementptr inbounds i16, i16* %Output, i32 6 %49 = load i16, i16* %arrayidx74.6, align 2 %cmp77.6 = icmp slt i16 %49, %add84.5 @@ -463,7 +467,7 @@ B6: ; preds = %A6, %B5 %50 = phi i16 [ %add84.5, %A6 ], [ %49, %B5 ] - %add84.6 = add i16 %50, 128 + %add84.6 = add i16 %46, 128 %arrayidx74.7 = getelementptr inbounds i16, i16* %Output, i32 7 %51 = load i16, i16* %arrayidx74.7, align 2 %cmp77.7 = icmp slt i16 %51, %add84.6 @@ -475,7 +479,7 @@ B7: ; preds = %A7, %B6 %52 = phi i16 [ %add84.6, %A7 ], [ %51, %B6 ] - %add84.7 = add i16 %52, 128 + %add84.7 = add i16 %46, 128 %arrayidx74.8 = getelementptr inbounds i16, i16* %Output, i32 8 %53 = load i16, i16* %arrayidx74.8, align 2 %cmp77.8 = icmp slt i16 %53, %add84.7 @@ -487,7 +491,7 @@ B8: ; preds = %A8, %B7 %54 = phi i16 [ %add84.7, %A8 ], [ %53, %B7 ] - %add84.8 = add i16 %54, 128 + %add84.8 = add i16 %46, 128 %cmp77.9 = icmp slt i16 %.reload, %add84.8 br i1 %cmp77.9, label %A9, label %B9 @@ -498,7 +502,7 @@ B9: ; preds = %A9, %B8 %55 = phi i16 [ %add84.8, %A9 ], [ %.reload, %B8 ] - %add84.9 = add i16 %55, 128 + %add84.9 = add i16 %46, 128 %cmp77.10 = icmp slt i16 %.reload151, %add84.9 br i1 %cmp77.10, label %A10, label %B10 @@ -509,7 +513,7 @@ B10: ; preds = %A10, %B9 %56 = phi i16 [ %add84.9, %A10 ], [ %.reload151, %B9 ] - %add84.10 = add i16 %56, 128 + %add84.10 = add i16 %46, 128 %cmp77.11 = icmp slt i16 %.reload153, %add84.10 br i1 %cmp77.11, label %A11, label %B11 @@ -520,7 +524,7 @@ B11: ; preds = %A11, %B10 %57 = phi i16 [ %add84.10, %A11 ], [ %.reload153, %B10 ] - %add84.11 = add i16 %57, 128 + %add84.11 = add i16 %46, 128 %cmp77.12 = icmp slt i16 %.reload155, %add84.11 br i1 %cmp77.12, label %A12, label %B13 Index: test/ScopInfo/complex-successor-structure-3.ll =================================================================== --- /dev/null +++ test/ScopInfo/complex-successor-structure-3.ll @@ -0,0 +1,365 @@ +; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s +; +; Check that propagation of domains from A(X) to A(X+1) will keep the +; domains small and concise. +; +; CHECK: Assumed Context: +; CHECK-NEXT: [tmp5, tmp, tmp8, tmp11, tmp14, tmp17, tmp20, tmp23, tmp26, p_9, p_10, p_11, p_12] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [tmp5, tmp, tmp8, tmp11, tmp14, tmp17, tmp20, tmp23, tmp26, p_9, p_10, p_11, p_12] -> { : 1 = 0 } +; +; CHECK: Stmt_FINAL +; CHECK-NEXT: Domain := +; CHECK-NEXT: [tmp5, tmp, tmp8, tmp11, tmp14, tmp17, tmp20, tmp23, tmp26, p_9, p_10, p_11, p_12] -> { Stmt_FINAL[] }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [tmp5, tmp, tmp8, tmp11, tmp14, tmp17, tmp20, tmp23, tmp26, p_9, p_10, p_11, p_12] -> { Stmt_FINAL[] -> [22] }; +; +; +; void f(short *restrict In, int *restrict Out) { +; int InV, V, Idx; +; Idx = 0; +; V = 999; +; +; A0: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B0: +; V = V + 42; +; Out[V]++; +; } else { +; C0: +; V = InV; +; Out[V]--; +; } +; +; A1: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B1: +; V = V + 42; +; Out[V]++; +; } else { +; C1: +; V = InV; +; Out[V]--; +; } +; V = 999; +; +; A2: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B2: +; V = V + 42; +; Out[V]++; +; } else { +; C2: +; V = InV; +; Out[V]--; +; } +; +; A3: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B3: +; V = V + 42; +; Out[V]++; +; } else { +; C3: +; V = InV; +; Out[V]--; +; } +; V = 999; +; +; A4: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B4: +; V = V + 42; +; Out[V]++; +; } else { +; C4: +; V = InV; +; Out[V]--; +; } +; +; A5: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B5: +; V = V + 42; +; Out[V]++; +; } else { +; C5: +; V = InV; +; Out[V]--; +; } +; V = 999; +; +; A6: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B6: +; V = V + 42; +; Out[V]++; +; } else { +; C6: +; V = InV; +; Out[V]--; +; } +; +; A7: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B7: +; V = V + 42; +; Out[V]++; +; } else { +; C7: +; V = InV; +; Out[V]--; +; } +; V = 999; +; +; A8: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B8: +; V = V + 42; +; Out[V]++; +; } else { +; C8: +; V = InV; +; Out[V]--; +; } +; FINAL: +; Out[V]++; +; +; ScopExit: +; return; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i16* noalias %In, i32* noalias %Out) { +entry: + %tmp = load i16, i16* %In, align 2 + %conv = sext i16 %tmp to i32 + %cmp = icmp slt i16 %tmp, 1041 + br i1 %cmp, label %B0, label %C0 + +B0: ; preds = %entry + %arrayidx4 = getelementptr inbounds i32, i32* %Out, i64 1041 + %tmp3 = load i32, i32* %arrayidx4, align 4 + %inc5 = add nsw i32 %tmp3, 1 + store i32 %inc5, i32* %arrayidx4, align 4 + br label %A1 + +C0: ; preds = %entry + %idxprom6 = sext i16 %tmp to i64 + %arrayidx7 = getelementptr inbounds i32, i32* %Out, i64 %idxprom6 + %tmp4 = load i32, i32* %arrayidx7, align 4 + %dec = add nsw i32 %tmp4, -1 + store i32 %dec, i32* %arrayidx7, align 4 + br label %A1 + +A1: ; preds = %B0, %C0 + %V.0 = phi i32 [ 1041, %B0 ], [ %conv, %C0 ] + %arrayidx10 = getelementptr inbounds i16, i16* %In, i64 1 + %tmp5 = load i16, i16* %arrayidx10, align 2 + %conv11 = sext i16 %tmp5 to i32 + %add12 = add nsw i32 %V.0, 42 + %cmp13 = icmp slt i32 %conv11, %add12 + br i1 %cmp13, label %B1, label %C1 + +B1: ; preds = %A1 + %add16 = add nsw i32 %V.0, 42 + %idxprom17 = sext i32 %add16 to i64 + %arrayidx18 = getelementptr inbounds i32, i32* %Out, i64 %idxprom17 + %tmp6 = load i32, i32* %arrayidx18, align 4 + %inc19 = add nsw i32 %tmp6, 1 + store i32 %inc19, i32* %arrayidx18, align 4 + br label %A2 + +C1: ; preds = %A1 + %idxprom21 = sext i16 %tmp5 to i64 + %arrayidx22 = getelementptr inbounds i32, i32* %Out, i64 %idxprom21 + %tmp7 = load i32, i32* %arrayidx22, align 4 + %dec23 = add nsw i32 %tmp7, -1 + store i32 %dec23, i32* %arrayidx22, align 4 + br label %A2 + +A2: ; preds = %B1, %C1 + %arrayidx27 = getelementptr inbounds i16, i16* %In, i64 2 + %tmp8 = load i16, i16* %arrayidx27, align 2 + %conv28 = sext i16 %tmp8 to i32 + %cmp30 = icmp slt i16 %tmp8, 1041 + br i1 %cmp30, label %B2, label %C2 + +B2: ; preds = %A2 + %arrayidx35 = getelementptr inbounds i32, i32* %Out, i64 1041 + %tmp9 = load i32, i32* %arrayidx35, align 4 + %inc36 = add nsw i32 %tmp9, 1 + store i32 %inc36, i32* %arrayidx35, align 4 + br label %A3 + +C2: ; preds = %A2 + %idxprom38 = sext i16 %tmp8 to i64 + %arrayidx39 = getelementptr inbounds i32, i32* %Out, i64 %idxprom38 + %tmp10 = load i32, i32* %arrayidx39, align 4 + %dec40 = add nsw i32 %tmp10, -1 + store i32 %dec40, i32* %arrayidx39, align 4 + br label %A3 + +A3: ; preds = %B2, %C2 + %V.1 = phi i32 [ 1041, %B2 ], [ %conv28, %C2 ] + %arrayidx44 = getelementptr inbounds i16, i16* %In, i64 3 + %tmp11 = load i16, i16* %arrayidx44, align 2 + %conv45 = sext i16 %tmp11 to i32 + %add46 = add nsw i32 %V.1, 42 + %cmp47 = icmp slt i32 %conv45, %add46 + br i1 %cmp47, label %B3, label %C3 + +B3: ; preds = %A3 + %add50 = add nsw i32 %V.1, 42 + %idxprom51 = sext i32 %add50 to i64 + %arrayidx52 = getelementptr inbounds i32, i32* %Out, i64 %idxprom51 + %tmp12 = load i32, i32* %arrayidx52, align 4 + %inc53 = add nsw i32 %tmp12, 1 + store i32 %inc53, i32* %arrayidx52, align 4 + br label %A4 + +C3: ; preds = %A3 + %idxprom55 = sext i16 %tmp11 to i64 + %arrayidx56 = getelementptr inbounds i32, i32* %Out, i64 %idxprom55 + %tmp13 = load i32, i32* %arrayidx56, align 4 + %dec57 = add nsw i32 %tmp13, -1 + store i32 %dec57, i32* %arrayidx56, align 4 + br label %A4 + +A4: ; preds = %B3, %C3 + %arrayidx61 = getelementptr inbounds i16, i16* %In, i64 4 + %tmp14 = load i16, i16* %arrayidx61, align 2 + %conv62 = sext i16 %tmp14 to i32 + %cmp64 = icmp slt i16 %tmp14, 1041 + br i1 %cmp64, label %B4, label %C4 + +B4: ; preds = %A4 + %arrayidx69 = getelementptr inbounds i32, i32* %Out, i64 1041 + %tmp15 = load i32, i32* %arrayidx69, align 4 + %inc70 = add nsw i32 %tmp15, 1 + store i32 %inc70, i32* %arrayidx69, align 4 + br label %A5 + +C4: ; preds = %A4 + %idxprom72 = sext i16 %tmp14 to i64 + %arrayidx73 = getelementptr inbounds i32, i32* %Out, i64 %idxprom72 + %tmp16 = load i32, i32* %arrayidx73, align 4 + %dec74 = add nsw i32 %tmp16, -1 + store i32 %dec74, i32* %arrayidx73, align 4 + %phitmp = add nsw i32 %conv62, 42 + br label %A5 + +A5: ; preds = %B4, %C4 + %V.2 = phi i32 [ 1083, %B4 ], [ %phitmp, %C4 ] + %arrayidx78 = getelementptr inbounds i16, i16* %In, i64 5 + %tmp17 = load i16, i16* %arrayidx78, align 2 + %conv79 = sext i16 %tmp17 to i32 + %cmp81 = icmp slt i32 %conv79, %V.2 + br i1 %cmp81, label %B5, label %C5 + +B5: ; preds = %A5 + %idxprom85 = sext i32 %V.2 to i64 + %arrayidx86 = getelementptr inbounds i32, i32* %Out, i64 %idxprom85 + %tmp18 = load i32, i32* %arrayidx86, align 4 + %inc87 = add nsw i32 %tmp18, 1 + store i32 %inc87, i32* %arrayidx86, align 4 + br label %A6 + +C5: ; preds = %A5 + %idxprom89 = sext i16 %tmp17 to i64 + %arrayidx90 = getelementptr inbounds i32, i32* %Out, i64 %idxprom89 + %tmp19 = load i32, i32* %arrayidx90, align 4 + %dec91 = add nsw i32 %tmp19, -1 + store i32 %dec91, i32* %arrayidx90, align 4 + br label %A6 + +A6: ; preds = %B5, %C5 + %arrayidx95 = getelementptr inbounds i16, i16* %In, i64 6 + %tmp20 = load i16, i16* %arrayidx95, align 2 + %conv96 = sext i16 %tmp20 to i32 + %cmp98 = icmp slt i16 %tmp20, 1041 + br i1 %cmp98, label %B6, label %C6 + +B6: ; preds = %A6 + %arrayidx103 = getelementptr inbounds i32, i32* %Out, i64 1041 + %tmp21 = load i32, i32* %arrayidx103, align 4 + %inc104 = add nsw i32 %tmp21, 1 + store i32 %inc104, i32* %arrayidx103, align 4 + br label %A7 + +C6: ; preds = %A6 + %idxprom106 = sext i16 %tmp20 to i64 + %arrayidx107 = getelementptr inbounds i32, i32* %Out, i64 %idxprom106 + %tmp22 = load i32, i32* %arrayidx107, align 4 + %dec108 = add nsw i32 %tmp22, -1 + store i32 %dec108, i32* %arrayidx107, align 4 + %phitmp1 = add nsw i32 %conv96, 42 + br label %A7 + +A7: ; preds = %B6, %C6 + %V.3 = phi i32 [ 1083, %B6 ], [ %phitmp1, %C6 ] + %arrayidx112 = getelementptr inbounds i16, i16* %In, i64 7 + %tmp23 = load i16, i16* %arrayidx112, align 2 + %conv113 = sext i16 %tmp23 to i32 + %cmp115 = icmp slt i32 %conv113, %V.3 + br i1 %cmp115, label %B7, label %C7 + +B7: ; preds = %A7 + %idxprom119 = sext i32 %V.3 to i64 + %arrayidx120 = getelementptr inbounds i32, i32* %Out, i64 %idxprom119 + %tmp24 = load i32, i32* %arrayidx120, align 4 + %inc121 = add nsw i32 %tmp24, 1 + store i32 %inc121, i32* %arrayidx120, align 4 + br label %A8 + +C7: ; preds = %A7 + %idxprom123 = sext i16 %tmp23 to i64 + %arrayidx124 = getelementptr inbounds i32, i32* %Out, i64 %idxprom123 + %tmp25 = load i32, i32* %arrayidx124, align 4 + %dec125 = add nsw i32 %tmp25, -1 + store i32 %dec125, i32* %arrayidx124, align 4 + br label %A8 + +A8: ; preds = %B7, %C7 + %arrayidx129 = getelementptr inbounds i16, i16* %In, i64 8 + %tmp26 = load i16, i16* %arrayidx129, align 2 + %cmp132 = icmp slt i16 %tmp26, 1041 + br i1 %cmp132, label %B8, label %C8 + +B8: ; preds = %A8 + %arrayidx137 = getelementptr inbounds i32, i32* %Out, i64 1041 + %tmp27 = load i32, i32* %arrayidx137, align 4 + %inc138 = add nsw i32 %tmp27, 1 + store i32 %inc138, i32* %arrayidx137, align 4 + br label %FINAL + +C8: ; preds = %A8 + %idxprom140 = sext i16 %tmp26 to i64 + %arrayidx141 = getelementptr inbounds i32, i32* %Out, i64 %idxprom140 + %tmp28 = load i32, i32* %arrayidx141, align 4 + %dec142 = add nsw i32 %tmp28, -1 + store i32 %dec142, i32* %arrayidx141, align 4 + %phitmp2 = sext i16 %tmp26 to i64 + br label %FINAL + +FINAL: ; preds = %C8, %B8 + %V.4 = phi i64 [ 1041, %B8 ], [ %phitmp2, %C8 ] + %arrayidx145 = getelementptr inbounds i32, i32* %Out, i64 %V.4 + %tmp29 = load i32, i32* %arrayidx145, align 4 + %inc146 = add nsw i32 %tmp29, 1 + store i32 %inc146, i32* %arrayidx145, align 4 + br label %ScopExit + +ScopExit: + ret void +} Index: test/ScopInfo/complex-successor-structure.ll =================================================================== --- test/ScopInfo/complex-successor-structure.ll +++ test/ScopInfo/complex-successor-structure.ll @@ -1,9 +1,12 @@ ; RUN: opt %loadPolly -pass-remarks-analysis="polly-scops" -polly-scops \ ; RUN: < %s 2>&1 | FileCheck %s -; We build scops from a region of for.body->B13 having successor nodes -; of following form and check that the domain construction does not take a huge -; amount of time. +; We build a scop from the region for.body->B13. The CFG has is of the +; following form. The test checks that the condition construction does not take +; a huge amount of time. While we can propagate the domain constraints from +; B(X) to B(X+1) the conditions in B(X+1) will exponentially grow the number +; of needed constraints (it is basically the condition of B(X) + one smax), +; thus we should bail out at some point. ; ; CHECK: Low complexity assumption: { : 1 = 0 } Index: test/ScopInfo/intra_and_inter_bb_scalar_dep.ll =================================================================== --- test/ScopInfo/intra_and_inter_bb_scalar_dep.ll +++ test/ScopInfo/intra_and_inter_bb_scalar_dep.ll @@ -15,7 +15,7 @@ ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] }; -; CHECK-NEXT: Execution Context: [N] -> { : N < 0 or N > 0 } +; CHECK-NEXT: Execution Context: [N] -> { : N > 0 } ; CHECK-NEXT: } ; ; CHECK: Statements { Index: test/ScopInfo/non-affine-region-with-loop-2.ll =================================================================== --- test/ScopInfo/non-affine-region-with-loop-2.ll +++ test/ScopInfo/non-affine-region-with-loop-2.ll @@ -8,7 +8,7 @@ ; CHECK: [indvar] -> { Stmt_loop3[i0] -> [0, 0] : indvar >= 101 or indvar <= 99 }; ; CHECK: Stmt_loop2__TO__loop ; CHECK: Domain := -; CHECK: [indvar] -> { Stmt_loop2__TO__loop[] : indvar <= 99 or indvar >= 101 }; +; CHECK: [indvar] -> { Stmt_loop2__TO__loop[] : indvar >= 101 or indvar <= 99 }; ; CHECK: Schedule := ; CHECK: [indvar] -> { Stmt_loop2__TO__loop[] -> [1, 0] : indvar >= 101 or indvar <= 99 }; ; Index: test/ScopInfo/non_affine_region_1.ll =================================================================== --- test/ScopInfo/non_affine_region_1.ll +++ test/ScopInfo/non_affine_region_1.ll @@ -18,9 +18,6 @@ ; } ; } ; -; TODO: We build a complicated representation of Stmt_bb10__TO__bb18's domain that will also complicate the schedule. -; Once the domain is simple this test should fail and this TODO can be removed. - ; CHECK: Statements { ; CHECK-NEXT: Stmt_bb3 ; CHECK-NEXT: Domain := @@ -38,16 +35,16 @@ ; CHECK-NEXT: [b] -> { Stmt_bb7[i0] -> MemRef_x_1__phi[] }; ; CHECK-NEXT: Stmt_bb8 ; CHECK-NEXT: Domain := -; CHECK-NEXT: [b] -> { Stmt_bb8[0] : b = 0 }; +; CHECK-NEXT: [b] -> { Stmt_bb8[i0] : i0 >= b and 0 <= i0 <= 1023 and 2i0 <= b }; ; CHECK-NEXT: Schedule := ; CHECK-NEXT: [b] -> { Stmt_bb8[i0] -> [0, 0] }; ; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] ; CHECK-NEXT: [b] -> { Stmt_bb8[i0] -> MemRef_x_1__phi[] }; ; CHECK-NEXT: Stmt_bb10__TO__bb18 ; CHECK-NEXT: Domain := -; CHECK-NEXT: [b] -> { Stmt_bb10__TO__bb18[i0] : 0 <= i0 <= 1023 and (i0 < b or (i0 >= b and 2i0 > b)); Stmt_bb10__TO__bb18[0] : b = 0 }; +; CHECK-NEXT: [b] -> { Stmt_bb10__TO__bb18[i0] : 0 <= i0 <= 1023 }; ; CHECK-NEXT: Schedule := -; CHECK-NEXT: [b] -> { Stmt_bb10__TO__bb18[i0] -> [i0, 3] : i0 < b or (i0 >= b and 2i0 > b); Stmt_bb10__TO__bb18[0] -> [0, 3] : b = 0 }; +; CHECK-NEXT: [b] -> { Stmt_bb10__TO__bb18[i0] -> [i0, 3] } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] ; CHECK-NEXT: [b] -> { Stmt_bb10__TO__bb18[i0] -> MemRef_x_1__phi[] }; ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] Index: test/ScopInfo/phi_scalar_simple_1.ll =================================================================== --- test/ScopInfo/phi_scalar_simple_1.ll +++ test/ScopInfo/phi_scalar_simple_1.ll @@ -17,16 +17,16 @@ ; CHECK: Statements { ; CHECK-NEXT: Stmt_for_cond ; CHECK-NEXT: Domain := -; CHECK-NEXT: [N] -> { Stmt_for_cond[i0] : N >= 2 and 0 <= i0 < N; Stmt_for_cond[0] : N <= 1 }; +; CHECK-NEXT: [N] -> { Stmt_for_cond[i0] : 0 <= i0 < N; Stmt_for_cond[0] : N <= 0 }; ; CHECK-NEXT: Schedule := -; CHECK-NEXT: [N] -> { Stmt_for_cond[i0] -> [i0, 0, 0, 0] : N >= 2 and i0 < N; Stmt_for_cond[0] -> [0, 0, 0, 0] : N <= 1 }; +; CHECK-NEXT: [N] -> { Stmt_for_cond[i0] -> [i0, 0, 0, 0] : i0 < N; Stmt_for_cond[0] -> [0, 0, 0, 0] : N <= 0 }; ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] ; CHECK-NEXT: [N] -> { Stmt_for_cond[i0] -> MemRef_x_addr_0__phi[] }; ; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] ; CHECK-NEXT: [N] -> { Stmt_for_cond[i0] -> MemRef_x_addr_0[] }; ; CHECK-NEXT: Stmt_for_body ; CHECK-NEXT: Domain := -; CHECK-NEXT: [N] -> { Stmt_for_body[i0] : N >= 2 and 0 <= i0 <= -2 + N }; +; CHECK-NEXT: [N] -> { Stmt_for_body[i0] : 0 <= i0 <= -2 + N }; ; CHECK-NEXT: Schedule := ; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> [i0, 1, 0, 0] }; ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] Index: test/ScopInfo/pointer-used-as-base-pointer-and-scalar-read.ll =================================================================== --- test/ScopInfo/pointer-used-as-base-pointer-and-scalar-read.ll +++ test/ScopInfo/pointer-used-as-base-pointer-and-scalar-read.ll @@ -37,7 +37,7 @@ ; CHECK-NEXT: [p] -> { Stmt_else[i0] -> MemRef_x__phi[] }; ; CHECK-NEXT: Stmt_bb8 ; CHECK-NEXT: Domain := -; CHECK-NEXT: [p] -> { Stmt_bb8[i0] : 0 <= i0 <= 999 and (p >= 33 or p <= 32) }; +; CHECK-NEXT: [p] -> { Stmt_bb8[i0] : 0 <= i0 <= 999 }; ; CHECK-NEXT: Schedule := ; CHECK-NEXT: [p] -> { Stmt_bb8[i0] -> [i0, 2] }; ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] Index: test/ScopInfo/remarks.ll =================================================================== --- test/ScopInfo/remarks.ll +++ test/ScopInfo/remarks.ll @@ -1,9 +1,9 @@ ; RUN: opt %loadPolly -pass-remarks-analysis="polly-scops" -polly-scops -disable-output < %s 2>&1 | FileCheck %s ; ; CHECK: remark: test/ScopInfo/remarks.c:4:7: SCoP begins here. -; CHECK: remark: test/ScopInfo/remarks.c:8:5: Finite loop restriction: [M, N] -> { : N > 0 and (M <= -2 or M = -1) } -; CHECK: remark: test/ScopInfo/remarks.c:13:7: No-error restriction: [M, N, Debug] -> { : M >= 0 and N > 0 and (Debug < 0 or Debug > 0) } -; CHECK: remark: test/ScopInfo/remarks.c:9:7: Inbounds assumption: [M, N, Debug] -> { : M <= 100 or (M > 0 and N <= 0) } +; CHECK: remark: test/ScopInfo/remarks.c:8:5: Finite loop restriction: [N, M] -> { : N > 0 and M < 0 } +; CHECK: remark: test/ScopInfo/remarks.c:13:7: No-error restriction: [N, M, Debug] -> { : N > 0 and M >= 0 and (Debug < 0 or Debug > 0) } +; CHECK: remark: test/ScopInfo/remarks.c:9:7: Inbounds assumption: [N, M] -> { : N <= 0 or (N > 0 and M <= 100) } ; CHECK: remark: :0:0: No-overflows restriction: [N, M, Debug] -> { : M <= -2147483649 - N or M >= 2147483648 - N } ; CHECK: remark: test/ScopInfo/remarks.c:9:18: Possibly aliasing pointer, use restrict keyword. ; CHECK: remark: test/ScopInfo/remarks.c:9:33: Possibly aliasing pointer, use restrict keyword.