Index: polly/trunk/include/polly/ScopInfo.h =================================================================== --- polly/trunk/include/polly/ScopInfo.h +++ polly/trunk/include/polly/ScopInfo.h @@ -1317,6 +1317,14 @@ /// @brief A map from basic blocks to their domains. DenseMap DomainMap; + /// @brief A map from basic blocks to the error context reaching them. + /// + /// The error context describes all parameter configurations under which + /// the block would be executed but the control would pass an error block. + /// This information is not contained in the domain and only implicit in the + /// AssumedContext/InvalidContext. + DenseMap ErrorDomainCtxMap; + /// Constraints on parameters. isl_set *Context; @@ -1494,14 +1502,20 @@ void propagateDomainConstraints(Region *R, ScopDetection &SD, DominatorTree &DT, LoopInfo &LI); - /// @brief Remove domains of error blocks/regions (and blocks dominated by - /// them). + /// @brief Propagate of error block domain contexts through @p R. /// + /// This method will propagate error block domain contexts through @p R + /// and thereby fill the ErrorDomainCtxMap map. Additionally, the domains + /// of error statements and those only reachable via error statements will + /// be replaced by an empty set. Later those will be removed completely from + /// the SCoP. + /// + /// @param R The currently traversed region. /// @param SD The ScopDetection analysis for the current function. /// @param DT The DominatorTree for the current function. /// @param LI The LoopInfo for the current function. - void removeErrorBlockDomains(ScopDetection &SD, DominatorTree &DT, - LoopInfo &LI); + void propagateErrorConstraints(Region *R, ScopDetection &SD, + DominatorTree &DT, LoopInfo &LI); /// @brief Compute the domain for each basic block in @p R. /// @@ -1537,6 +1551,9 @@ /// @see isIgnored() void simplifySCoP(bool RemoveIgnoredStmts, DominatorTree &DT, LoopInfo &LI); + /// @brief Return the error context under which @p Stmt is reached. + __isl_give isl_set *getErrorCtxReachingStmt(ScopStmt &Stmt); + /// @brief Create equivalence classes for required invariant accesses. /// /// These classes will consolidate multiple required invariant loads from the Index: polly/trunk/lib/Analysis/ScopInfo.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopInfo.cpp +++ polly/trunk/lib/Analysis/ScopInfo.cpp @@ -2118,39 +2118,6 @@ return isl_set_copy(DomainMap[BB]); } -void Scop::removeErrorBlockDomains(ScopDetection &SD, DominatorTree &DT, - LoopInfo &LI) { - auto removeDomains = [this, &DT](BasicBlock *Start) { - auto *BBNode = DT.getNode(Start); - for (auto *ErrorChild : depth_first(BBNode)) { - auto *ErrorChildBlock = ErrorChild->getBlock(); - auto *CurrentDomain = DomainMap[ErrorChildBlock]; - auto *Empty = isl_set_empty(isl_set_get_space(CurrentDomain)); - DomainMap[ErrorChildBlock] = Empty; - isl_set_free(CurrentDomain); - } - }; - - SmallVector Todo = {&R}; - - while (!Todo.empty()) { - auto *SubRegion = Todo.back(); - Todo.pop_back(); - - if (!SD.isNonAffineSubRegion(SubRegion, &getRegion())) { - for (auto &Child : *SubRegion) - Todo.push_back(Child.get()); - continue; - } - if (containsErrorBlock(SubRegion->getNode(), getRegion(), LI, DT)) - removeDomains(SubRegion->getEntry()); - } - - for (auto *BB : R.blocks()) - if (isErrorBlock(*BB, R, LI, DT)) - removeDomains(BB); -} - bool Scop::buildDomains(Region *R, ScopDetection &SD, DominatorTree &DT, LoopInfo &LI) { @@ -2178,12 +2145,17 @@ // Error blocks and blocks dominated by them have been assumed to never be // executed. Representing them in the Scop does not add any value. In fact, // it is likely to cause issues during construction of the ScopStmts. The - // contents of error blocks have not been verfied to be expressible and + // contents of error blocks have not been verified to be expressible and // will cause problems when building up a ScopStmt for them. // Furthermore, basic blocks dominated by error blocks may reference // instructions in the error block which, if the error block is not modeled, - // can themselves not be constructed properly. - removeErrorBlockDomains(SD, DT, LI); + // can themselves not be constructed properly. To this end we will replace + // the domains of error blocks and those only reachable via error blocks + // with an empty set. Additionally, we will record for each block under which + // parameter combination it would be reached via an error block in the + // ErrorDomainCtxMap map. This information is needed during load hoisting. + propagateErrorConstraints(R, SD, DT, LI); + return true; } @@ -2246,6 +2218,65 @@ return Dom; } +void Scop::propagateErrorConstraints(Region *R, ScopDetection &SD, + DominatorTree &DT, LoopInfo &LI) { + + ReversePostOrderTraversal RTraversal(R); + for (auto *RN : RTraversal) { + + // Recurse for affine subregions but go on for basic blocks and non-affine + // subregions. + if (RN->isSubRegion()) { + Region *SubRegion = RN->getNodeAs(); + if (!SD.isNonAffineSubRegion(SubRegion, &getRegion())) { + propagateErrorConstraints(SubRegion, SD, DT, LI); + continue; + } + } + + bool ContainsErrorBlock = containsErrorBlock(RN, getRegion(), LI, DT); + BasicBlock *BB = getRegionNodeBasicBlock(RN); + isl_set *&Domain = DomainMap[BB]; + assert(Domain && "Cannot propagate a nullptr"); + + auto *&ErrorCtx = ErrorDomainCtxMap[BB]; + auto *DomainCtx = isl_set_params(isl_set_copy(Domain)); + bool IsErrorBlock = ContainsErrorBlock || + (ErrorCtx && isl_set_is_subset(DomainCtx, ErrorCtx)); + + if (IsErrorBlock) { + ErrorCtx = ErrorCtx ? isl_set_union(ErrorCtx, DomainCtx) : DomainCtx; + auto *EmptyDom = isl_set_empty(isl_set_get_space(Domain)); + isl_set_free(Domain); + Domain = EmptyDom; + } else { + isl_set_free(DomainCtx); + } + + if (!ErrorCtx) + continue; + + auto *TI = BB->getTerminator(); + unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors(); + for (unsigned u = 0; u < NumSuccs; u++) { + auto *SuccBB = getRegionNodeSuccessor(RN, TI, u); + auto *&SuccErrorCtx = ErrorDomainCtxMap[SuccBB]; + auto *CurErrorCtx = isl_set_copy(ErrorCtx); + SuccErrorCtx = + SuccErrorCtx ? isl_set_union(SuccErrorCtx, CurErrorCtx) : CurErrorCtx; + SuccErrorCtx = isl_set_coalesce(SuccErrorCtx); + + // Check if the maximal number of domain conjuncts was reached. + // In case this happens we will bail. + if (isl_set_n_basic_set(SuccErrorCtx) < MaxConjunctsInDomain) + continue; + + invalidate(COMPLEXITY, TI->getDebugLoc()); + return; + } + } +} + void Scop::propagateDomainConstraintsToRegionExit( BasicBlock *BB, Loop *BBLoop, SmallPtrSetImpl &FinishedExitBlocks, ScopDetection &SD, @@ -2932,6 +2963,8 @@ for (auto It : DomainMap) isl_set_free(It.second); + for (auto It : ErrorDomainCtxMap) + isl_set_free(It.second); // Free the alias groups for (MinMaxVectorPairTy &MinMaxAccessPair : MinMaxAliasGroups) { @@ -3044,10 +3077,18 @@ return nullptr; } +isl_set *Scop::getErrorCtxReachingStmt(ScopStmt &Stmt) { + auto *BB = Stmt.getEntryBlock(); + return isl_set_copy(ErrorDomainCtxMap.lookup(BB)); +} + void Scop::addInvariantLoads(ScopStmt &Stmt, MemoryAccessList &InvMAs) { - // Get the context under which the statement is executed. + // Get the context under which the statement is executed but remove the error + // context under which this statement is reached. isl_set *DomainCtx = isl_set_params(Stmt.getDomain()); + if (auto *ErrorCtx = getErrorCtxReachingStmt(Stmt)) + DomainCtx = isl_set_subtract(DomainCtx, ErrorCtx); DomainCtx = isl_set_remove_redundancies(DomainCtx); DomainCtx = isl_set_detect_equalities(DomainCtx); DomainCtx = isl_set_coalesce(DomainCtx); Index: polly/trunk/test/ScopInfo/error-blocks-2.ll =================================================================== --- polly/trunk/test/ScopInfo/error-blocks-2.ll +++ polly/trunk/test/ScopInfo/error-blocks-2.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, valid_val] -> { Stmt_for_body[i0] -> MemRef_valid[0] }; +; CHECK-NEXT: Execution Context: [N, valid_val] -> { : N > 0 } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, valid_val] -> { Stmt_S[i0] -> MemRef_ptr_addr[0] }; +; CHECK-NEXT: Execution Context: [N, valid_val] -> { : N > 0 and (valid_val < 0 or valid_val > 0) } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, valid_val] -> { Stmt_S[i0] -> MemRef_tmp2[0] }; +; CHECK-NEXT: Execution Context: [N, valid_val] -> { : N > 0 and (valid_val < 0 or valid_val > 0) } +; CHECK-NEXT: } +; CHECK-NEXT: Context: +; CHECK-NEXT: [N, valid_val] -> { : -2147483648 <= N <= 2147483647 and -2147483648 <= valid_val <= 2147483647 } +; CHECK-NEXT: Assumed Context: +; CHECK-NEXT: [N, valid_val] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [N, valid_val] -> { : valid_val = 0 and N > 0 } +; +; CHECK: Statements { +; CHECK-NEXT: Stmt_S +; CHECK-NEXT: Domain := +; CHECK-NEXT: [N, valid_val] -> { Stmt_S[i0] : 0 <= i0 < N }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [N, valid_val] -> { Stmt_S[i0] -> [i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, valid_val] -> { Stmt_S[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32 %N, i32* noalias %valid, i32* noalias %ptr, i32* noalias %A) { +entry: + %ptr.addr = alloca i32*, align 8 + store i32* %ptr, i32** %ptr.addr, align 8 + %tmp = sext i32 %N to i64 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %valid_val = load i32, i32* %valid, align 4 + %cmp1 = icmp eq i32 %valid_val, 0 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + call void @doSth(i32** nonnull %ptr.addr) + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %S + +S: ; preds = %if.end + %tmp2 = load i32*, i32** %ptr.addr, align 8 + %tmp3 = load i32, i32* %tmp2, align 4 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %tmp3, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %S + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +declare void @doSth(i32**)