Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -1317,6 +1317,14 @@ /// @brief A map from basic blocks to their domains. DenseMap DomainMap; + /// @brief A map from basic blocks to the error context reaching them. + /// + /// The error context describes all parameter configurations under which + /// the block would be executed but the control would pass an error block. + /// This information is not contained in the domain and only implicit in the + /// AssumedContext/InvalidContext. + DenseMap ErrorDomainCtxMap; + /// Constraints on parameters. isl_set *Context; @@ -1494,14 +1502,20 @@ void propagateDomainConstraints(Region *R, ScopDetection &SD, DominatorTree &DT, LoopInfo &LI); - /// @brief Remove domains of error blocks/regions (and blocks dominated by - /// them). + /// @brief Propagate of error block domain contexts through @p R. /// + /// This method will propagate error block domain contexts through @p R + /// and thereby fill the ErrorDomainCtxMap map. Additionally, the domains + /// of error statements and those only reachable via error statements will + /// be replaced by an empty set. Later those will be removed completely from + /// the SCoP. + /// + /// @param R The currently traversed region. /// @param SD The ScopDetection analysis for the current function. /// @param DT The DominatorTree for the current function. /// @param LI The LoopInfo for the current function. - void removeErrorBlockDomains(ScopDetection &SD, DominatorTree &DT, - LoopInfo &LI); + void propagateErrorConstraints(Region *R, ScopDetection &SD, + DominatorTree &DT, LoopInfo &LI); /// @brief Compute the domain for each basic block in @p R. /// @@ -1537,6 +1551,9 @@ /// @see isIgnored() void simplifySCoP(bool RemoveIgnoredStmts, DominatorTree &DT, LoopInfo &LI); + /// @brief Return the error context under which @p Stmt is reached. + __isl_give isl_set *getErrorCtxReachingStmt(ScopStmt &Stmt); + /// @brief Create equivalence classes for required invariant accesses. /// /// These classes will consolidate multiple required invariant loads from the Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -2118,39 +2118,6 @@ return isl_set_copy(DomainMap[BB]); } -void Scop::removeErrorBlockDomains(ScopDetection &SD, DominatorTree &DT, - LoopInfo &LI) { - auto removeDomains = [this, &DT](BasicBlock *Start) { - auto *BBNode = DT.getNode(Start); - for (auto *ErrorChild : depth_first(BBNode)) { - auto *ErrorChildBlock = ErrorChild->getBlock(); - auto *CurrentDomain = DomainMap[ErrorChildBlock]; - auto *Empty = isl_set_empty(isl_set_get_space(CurrentDomain)); - DomainMap[ErrorChildBlock] = Empty; - isl_set_free(CurrentDomain); - } - }; - - SmallVector Todo = {&R}; - - while (!Todo.empty()) { - auto *SubRegion = Todo.back(); - Todo.pop_back(); - - if (!SD.isNonAffineSubRegion(SubRegion, &getRegion())) { - for (auto &Child : *SubRegion) - Todo.push_back(Child.get()); - continue; - } - if (containsErrorBlock(SubRegion->getNode(), getRegion(), LI, DT)) - removeDomains(SubRegion->getEntry()); - } - - for (auto *BB : R.blocks()) - if (isErrorBlock(*BB, R, LI, DT)) - removeDomains(BB); -} - bool Scop::buildDomains(Region *R, ScopDetection &SD, DominatorTree &DT, LoopInfo &LI) { @@ -2178,12 +2145,17 @@ // Error blocks and blocks dominated by them have been assumed to never be // executed. Representing them in the Scop does not add any value. In fact, // it is likely to cause issues during construction of the ScopStmts. The - // contents of error blocks have not been verfied to be expressible and + // contents of error blocks have not been verified to be expressible and // will cause problems when building up a ScopStmt for them. // Furthermore, basic blocks dominated by error blocks may reference // instructions in the error block which, if the error block is not modeled, - // can themselves not be constructed properly. - removeErrorBlockDomains(SD, DT, LI); + // can themselves not be constructed properly. To this end we will replace + // the domains of error blocks and those only reachable via error blocks + // with an empty set. Additionally, we will record for each block under which + // parameter combination it would be reached via an error block in the + // ErrorDomainCtxMap map. This information is needed during load hoisting. + propagateErrorConstraints(R, SD, DT, LI); + return true; } @@ -2246,6 +2218,65 @@ return Dom; } +void Scop::propagateErrorConstraints(Region *R, ScopDetection &SD, + DominatorTree &DT, LoopInfo &LI) { + + ReversePostOrderTraversal RTraversal(R); + for (auto *RN : RTraversal) { + + // Recurse for affine subregions but go on for basic blocks and non-affine + // subregions. + if (RN->isSubRegion()) { + Region *SubRegion = RN->getNodeAs(); + if (!SD.isNonAffineSubRegion(SubRegion, &getRegion())) { + propagateErrorConstraints(SubRegion, SD, DT, LI); + continue; + } + } + + bool ContainsErrorBlock = containsErrorBlock(RN, getRegion(), LI, DT); + BasicBlock *BB = getRegionNodeBasicBlock(RN); + isl_set *&Domain = DomainMap[BB]; + assert(Domain && "Cannot propagate a nullptr"); + + auto *&ErrorCtx = ErrorDomainCtxMap[BB]; + auto *DomainCtx = isl_set_params(isl_set_copy(Domain)); + bool IsErrorBlock = ContainsErrorBlock || + (ErrorCtx && isl_set_is_subset(DomainCtx, ErrorCtx)); + + if (IsErrorBlock) { + ErrorCtx = ErrorCtx ? isl_set_union(ErrorCtx, DomainCtx) : DomainCtx; + auto *EmptyDom = isl_set_empty(isl_set_get_space(Domain)); + isl_set_free(Domain); + Domain = EmptyDom; + } else { + isl_set_free(DomainCtx); + } + + if (!ErrorCtx) + continue; + + auto *TI = BB->getTerminator(); + unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors(); + for (unsigned u = 0; u < NumSuccs; u++) { + auto *SuccBB = getRegionNodeSuccessor(RN, TI, u); + auto *&SuccErrorCtx = ErrorDomainCtxMap[SuccBB]; + auto *CurErrorCtx = isl_set_copy(ErrorCtx); + SuccErrorCtx = + SuccErrorCtx ? isl_set_union(SuccErrorCtx, CurErrorCtx) : CurErrorCtx; + SuccErrorCtx = isl_set_coalesce(SuccErrorCtx); + + // Check if the maximal number of domain conjuncts was reached. + // In case this happens we will bail. + if (isl_set_n_basic_set(SuccErrorCtx) < MaxConjunctsInDomain) + continue; + + invalidate(COMPLEXITY, TI->getDebugLoc()); + return; + } + } +} + void Scop::propagateDomainConstraintsToRegionExit( BasicBlock *BB, Loop *BBLoop, SmallPtrSetImpl &FinishedExitBlocks, ScopDetection &SD, @@ -2932,6 +2963,8 @@ for (auto It : DomainMap) isl_set_free(It.second); + for (auto It : ErrorDomainCtxMap) + isl_set_free(It.second); // Free the alias groups for (MinMaxVectorPairTy &MinMaxAccessPair : MinMaxAliasGroups) { @@ -3044,10 +3077,18 @@ return nullptr; } +isl_set *Scop::getErrorCtxReachingStmt(ScopStmt &Stmt) { + auto *BB = Stmt.getEntryBlock(); + return isl_set_copy(ErrorDomainCtxMap.lookup(BB)); +} + void Scop::addInvariantLoads(ScopStmt &Stmt, MemoryAccessList &InvMAs) { - // Get the context under which the statement is executed. + // Get the context under which the statement is executed but remove error + // block domains that reach this statement. isl_set *DomainCtx = isl_set_params(Stmt.getDomain()); + if (auto *ErrorCtx = getErrorCtxReachingStmt(Stmt)) + DomainCtx = isl_set_subtract(DomainCtx, ErrorCtx); DomainCtx = isl_set_remove_redundancies(DomainCtx); DomainCtx = isl_set_detect_equalities(DomainCtx); DomainCtx = isl_set_coalesce(DomainCtx); Index: test/ScopInfo/error-blocks-1.ll =================================================================== --- /dev/null +++ test/ScopInfo/error-blocks-1.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Context: +; CHECK-NEXT: [N] -> { : -2147483648 <= N <= 2147483647 } +; CHECK-NEXT: Assumed Context: +; CHECK-NEXT: [N] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [N] -> { : N >= 514 } +; +; CHECK: Statements { +; CHECK-NEXT: Stmt_if_end +; CHECK-NEXT: Domain := +; CHECK-NEXT: [N] -> { Stmt_if_end[i0] : 0 <= i0 < N }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [N] -> { Stmt_if_end[i0] -> [i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: +] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_if_end[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: +] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_if_end[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: } +; +; void f(); +; void g(int *A, int N) { +; for (int i = 0; i < N; i++) { +; if (i > 512) { +; f(); +; A[i]++; +; } +; A[i]++; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @g(i32* %A, i32 %N) { +entry: + %tmp = sext i32 %N to i64 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %cmp1 = icmp sgt i64 %indvars.iv, 512 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + call void (...) @f() #2 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp12 = load i32, i32* %arrayidx2, align 4 + %inc2 = add nsw i32 %tmp12, 1 + store i32 %inc2, i32* %arrayidx2, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp1 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %tmp1, 1 + store i32 %inc, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %if.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +declare void @f(...) Index: test/ScopInfo/error-blocks-2.ll =================================================================== --- /dev/null +++ test/ScopInfo/error-blocks-2.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, valid_val] -> { Stmt_for_body[i0] -> MemRef_valid[0] }; +; CHECK-NEXT: Execution Context: [N, valid_val] -> { : N > 0 } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, valid_val] -> { Stmt_S[i0] -> MemRef_ptr_addr[0] }; +; CHECK-NEXT: Execution Context: [N, valid_val] -> { : N > 0 and (valid_val < 0 or valid_val > 0) } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, valid_val] -> { Stmt_S[i0] -> MemRef_tmp2[0] }; +; CHECK-NEXT: Execution Context: [N, valid_val] -> { : N > 0 and (valid_val < 0 or valid_val > 0) } +; CHECK-NEXT: } +; CHECK-NEXT: Context: +; CHECK-NEXT: [N, valid_val] -> { : -2147483648 <= N <= 2147483647 and -2147483648 <= valid_val <= 2147483647 } +; CHECK-NEXT: Assumed Context: +; CHECK-NEXT: [N, valid_val] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [N, valid_val] -> { : valid_val = 0 and N > 0 } +; +; CHECK: Statements { +; CHECK-NEXT: Stmt_S +; CHECK-NEXT: Domain := +; CHECK-NEXT: [N, valid_val] -> { Stmt_S[i0] : 0 <= i0 < N }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [N, valid_val] -> { Stmt_S[i0] -> [i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, valid_val] -> { Stmt_S[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32 %N, i32* noalias %valid, i32* noalias %ptr, i32* noalias %A) { +entry: + %ptr.addr = alloca i32*, align 8 + store i32* %ptr, i32** %ptr.addr, align 8 + %tmp = sext i32 %N to i64 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %valid_val = load i32, i32* %valid, align 4 + %cmp1 = icmp eq i32 %valid_val, 0 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + call void @doSth(i32** nonnull %ptr.addr) + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %S + +S: ; preds = %if.end + %tmp2 = load i32*, i32** %ptr.addr, align 8 + %tmp3 = load i32, i32* %tmp2, align 4 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %tmp3, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %S + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +declare void @doSth(i32**) Index: test/ScopInfo/error-blocks-3.ll =================================================================== --- /dev/null +++ test/ScopInfo/error-blocks-3.ll @@ -0,0 +1,80 @@ +; RUN: opt %loadPolly -analyze -polly-scops -polly-detect-keep-going -polly-allow-nonaffine < %s | FileCheck %s +; +; TODO: FIXME: Investigate why "-polly-detect-keep-going" is needed to detect +; this SCoP. That flag should not make a difference. +; +; CHECK: Context: +; CHECK-NEXT: [N] -> { : -2147483648 <= N <= 2147483647 } +; CHECK-NEXT: Assumed Context: +; CHECK-NEXT: [N] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [N] -> { : N >= 514 } +; +; CHECK: Statements { +; CHECK-NEXT: Stmt_if_end3 +; CHECK-NEXT: Domain := +; CHECK-NEXT: [N] -> { Stmt_if_end3[i0] : 0 <= i0 < N }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [N] -> { Stmt_if_end3[i0] -> [i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: +] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_if_end3[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: +] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_if_end3[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: } +; +; int f(); +; void g(int *A, int N) { +; for (int i = 0; i < N; i++) { +; if (i > 512) { +; int v = f(); +; S: +; A[v]++; +; } +; A[i]++; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @g(i32* %A, i32 %N) { +entry: + %tmp = sext i32 %N to i64 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %cmp1 = icmp sgt i64 %indvars.iv, 512 + br i1 %cmp1, label %if.then, label %if.end3 + +if.then: ; preds = %for.body + %call = call i32 (...) @f() + br label %S + +S: ; preds = %if.then + %idxprom = sext i32 %call to i64 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom + %tmp1 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %tmp1, 1 + store i32 %inc, i32* %arrayidx, align 4 + br label %if.end3 + +if.end3: ; preds = %if.end, %for.body + %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp2 = load i32, i32* %arrayidx5, align 4 + %inc6 = add nsw i32 %tmp2, 1 + store i32 %inc6, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %if.end3, %if.then2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +declare i32 @f(...)