Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -1187,7 +1187,7 @@ __isl_take isl_map *TargetRel, __isl_take isl_set *Domain); /// Initialize members after all MemoryAccesses have been added. - void init(LoopInfo &LI); + void init(LoopInfo &LI, DenseMap &DomainMap); private: /// Polyhedral description @@ -1286,7 +1286,7 @@ /// Build the statement. //@{ - void buildDomain(); + void buildDomain(DenseMap &DomainMap); /// Fill NestLoops with loops surrounding this statement. void collectSurroundingLoops(); @@ -1748,9 +1748,6 @@ /// vector comprises only of a single statement. DenseMap> StmtMap; - /// A map from basic blocks to their domains. - DenseMap DomainMap; - /// Constraints on parameters. isl_set *Context; @@ -1939,11 +1936,13 @@ /// @param BBLoop The innermost affine loop surrounding @p BB. /// @param FinishedExitBlocks Set of region exits the domain was set for. /// @param LI The LoopInfo for the current function. + /// @param DomainMap BB to Domain map for the BB of current region. /// @param InvalidDomainMap BB to InvalidDomain map for the BB of current /// region. void propagateDomainConstraintsToRegionExit( BasicBlock *BB, Loop *BBLoop, SmallPtrSetImpl &FinishedExitBlocks, LoopInfo &LI, + DenseMap &DomainMap, DenseMap &InvalidDomainMap); /// Compute the union of predecessor domains for @p BB. @@ -1956,22 +1955,25 @@ /// @param Domain The domain under which BB is executed. /// @param DT The DominatorTree for the current function. /// @param LI The LoopInfo for the current function. + /// @param DomainMap BB to Domain map for the BB of current region. /// /// @returns The domain under which @p BB is executed. __isl_give isl_set * getPredecessorDomainConstraints(BasicBlock *BB, __isl_keep isl_set *Domain, - DominatorTree &DT, LoopInfo &LI); + DominatorTree &DT, LoopInfo &LI, + DenseMap &DomainMap); /// Add loop carried constraints to the header block of the loop @p L. /// /// @param L The loop to process. /// @param LI The LoopInfo for the current function. + /// @param DomainMap BB to Domain map for the BB of current region. /// @param InvalidDomainMap BB to InvalidDomain map for the BB of current /// region. /// /// @returns True if there was no problem and false otherwise. bool addLoopBoundsToHeaderDomain( - Loop *L, LoopInfo &LI, + Loop *L, LoopInfo &LI, DenseMap &DomainMap, DenseMap &InvalidDomainMap); /// Compute the branching constraints for each basic block in @p R. @@ -1980,12 +1982,14 @@ /// for. /// @param DT The DominatorTree for the current function. /// @param LI The LoopInfo for the current function. + /// @param DomainMap BB to Domain map for the BB of current region. /// @param InvalidDomainMap BB to InvalidDomain map for the BB of current /// region. /// /// @returns True if there was no problem and false otherwise. bool buildDomainsWithBranchConstraints( Region *R, DominatorTree &DT, LoopInfo &LI, + DenseMap &DomainMap, DenseMap &InvalidDomainMap); /// Propagate the domain constraints through the region @p R. @@ -1994,12 +1998,14 @@ /// for. /// @param DT The DominatorTree for the current function. /// @param LI The LoopInfo for the current function. + /// @param DomainMap BB to Domain map for the BB of current region. /// @param InvalidDomainMap BB to InvalidDomain map for the BB of current /// region. /// /// @returns True if there was no problem and false otherwise. bool propagateDomainConstraints( Region *R, DominatorTree &DT, LoopInfo &LI, + DenseMap &DomainMap, DenseMap &InvalidDomainMap); /// Propagate invalid domains of statements through @p R. @@ -2012,12 +2018,14 @@ /// @param R The currently traversed region. /// @param DT The DominatorTree for the current function. /// @param LI The LoopInfo for the current function. + /// @param DomainMap BB to Domain map for the BB of current region. /// @param InvalidDomainMap BB to InvalidDomain map for the BB of current /// region. // /// @returns True if there was no problem and false otherwise. bool propagateInvalidStmtDomains( Region *R, DominatorTree &DT, LoopInfo &LI, + DenseMap &DomainMap, DenseMap &InvalidDomainMap); /// Compute the domain for each basic block in @p R. @@ -2025,11 +2033,13 @@ /// @param R The region we currently traverse. /// @param DT The DominatorTree for the current function. /// @param LI The LoopInfo for the current function. + /// @param DomainMap BB to Domain map for the BB of current region. /// @param InvalidDomainMap BB to InvalidDomain map for the BB of current /// region. /// /// @returns True if there was no problem and false otherwise. bool buildDomains(Region *R, DominatorTree &DT, LoopInfo &LI, + DenseMap &DomainMap, DenseMap &InvalidDomainMap); /// Add parameter constraints to @p C that imply a non-empty domain. @@ -2139,6 +2149,7 @@ /// Add user provided parameter constraints to context (source code). void addUserAssumptions(AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, + DenseMap &DomainMap, DenseMap &InvalidDomainMap); /// Add user provided parameter constraints to context (command line). @@ -2246,7 +2257,7 @@ /// Removes all statements where the entry block of the statement does not /// have a corresponding domain in the domain map. - void removeStmtNotInDomainMap(); + void removeStmtNotInDomainMap(DenseMap &DomainMap); /// Mark arrays that have memory accesses with FortranArrayDescriptor. void markFortranArrays(); @@ -2594,7 +2605,7 @@ BasicBlock *BB = nullptr); /// Add all recorded assumptions to the assumed context. - void addRecordedAssumptions(); + void addRecordedAssumptions(DenseMap &DomainMap); /// Mark the scop as invalid. /// @@ -2871,12 +2882,17 @@ /// Return the domain of @p Stmt. /// /// @param Stmt The statement for which the conditions should be returned. - __isl_give isl_set *getDomainConditions(const ScopStmt *Stmt) const; + __isl_give isl_set * + getDomainConditions(DenseMap &DomainMap, + const ScopStmt *Stmt) const; /// Return the domain of @p BB. /// - /// @param BB The block for which the conditions should be returned. - __isl_give isl_set *getDomainConditions(BasicBlock *BB) const; + /// @param DomainMap BB to Domain map for the BB of current region. + /// @param BB The block for which the conditions should be returned. + __isl_give isl_set * + getDomainConditions(DenseMap &DomainMap, + BasicBlock *BB) const; /// Get a union set containing the iteration domains of all statements. __isl_give isl_union_set *getDomains() const; Index: lib/Analysis/ScopBuilder.cpp =================================================================== --- lib/Analysis/ScopBuilder.cpp +++ lib/Analysis/ScopBuilder.cpp @@ -955,13 +955,16 @@ scop->buildInvariantEquivalenceClasses(); + /// A map from basic blocks to their domains. + DenseMap DomainMap; + /// A map from basic blocks to their invalid domains. DenseMap InvalidDomainMap; - if (!scop->buildDomains(&R, DT, LI, InvalidDomainMap)) + if (!scop->buildDomains(&R, DT, LI, DomainMap, InvalidDomainMap)) return; - scop->addUserAssumptions(AC, DT, LI, InvalidDomainMap); + scop->addUserAssumptions(AC, DT, LI, DomainMap, InvalidDomainMap); // Initialize the invalid domain. for (ScopStmt &Stmt : scop->Stmts) @@ -974,14 +977,14 @@ // Remove empty statements. // Exit early in case there are no executable statements left in this scop. - scop->removeStmtNotInDomainMap(); + scop->removeStmtNotInDomainMap(DomainMap); scop->simplifySCoP(false); if (scop->isEmpty()) return; // The ScopStmts now have enough information to initialize themselves. for (ScopStmt &Stmt : *scop) - Stmt.init(LI); + Stmt.init(LI, DomainMap); // Check early for a feasible runtime context. if (!scop->hasFeasibleRuntimeContext()) @@ -1004,7 +1007,7 @@ // After the context was fully constructed, thus all our knowledge about // the parameters is in there, we add all recorded assumptions to the // assumed/invalid context. - scop->addRecordedAssumptions(); + scop->addRecordedAssumptions(DomainMap); scop->simplifyContexts(); if (!scop->buildAliasChecks(AA)) Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -1692,10 +1692,10 @@ ConditionSets); } -void ScopStmt::buildDomain() { +void ScopStmt::buildDomain(DenseMap &DomainMap) { isl_id *Id = isl_id_alloc(getIslCtx(), getBaseName(), this); - Domain = getParent()->getDomainConditions(this); + Domain = getParent()->getDomainConditions(DomainMap, this); Domain = isl_set_set_tuple_id(Domain, Id); } @@ -1745,10 +1745,10 @@ addAccess(Access); } -void ScopStmt::init(LoopInfo &LI) { +void ScopStmt::init(LoopInfo &LI, DenseMap &DomainMap) { assert(!Domain && "init must be called only once"); - buildDomain(); + buildDomain(DomainMap); collectSurroundingLoops(); buildAccessRelations(); @@ -2228,7 +2228,9 @@ void Scop::addUserAssumptions( AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, + DenseMap &DomainMap, DenseMap &InvalidDomainMap) { + for (auto &Assumption : AC.assumptions()) { auto *CI = dyn_cast_or_null(Assumption); if (!CI || CI->getNumArgOperands() != 1) @@ -2278,7 +2280,8 @@ AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]); } - // Project out newly introduced parameters as they are not otherwise useful. + // Project out newly introduced parameters as they are not otherwise + // useful. if (!NewParams.empty()) { for (unsigned u = 0; u < isl_set_n_param(AssumptionCtx); u++) { auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u); @@ -2383,20 +2386,21 @@ } } -// We use the outermost dimension to generate GPU transfers for Fortran arrays -// even when the array bounds are not known statically. To do so, we need the -// outermost dimension information. We add this into the context so that the -// outermost dimension is available during codegen. -// We currently do not care about dimensions other than the outermost -// dimension since it doesn't affect transfers. +// We use the outermost dimension to generate GPU transfers for Fortran +// arrays even when the array bounds are not known statically. To do so, we +// need the outermost dimension information. We add this into the context so +// that the outermost dimension is available during codegen. We currently do +// not care about dimensions other than the outermost dimension since it +// doesn't affect transfers. static isl_set *addFortranArrayOutermostDimParams(__isl_give isl_set *Context, Scop::array_range Arrays) { std::vector OutermostSizeIds; for (auto Array : Arrays) { - // To check if an array is a Fortran array, we check if it has a isl_pw_aff - // for its outermost dimension. Fortran arrays will have this since the - // outermost dimension size can be picked up from their runtime description. + // To check if an array is a Fortran array, we check if it has a + // isl_pw_aff for its outermost dimension. Fortran arrays will have this + // since the outermost dimension size can be picked up from their + // runtime description. // TODO: actually need to check if it has a FAD, but for now this works. if (Array->getNumberOfDimensions() > 0) { isl_pw_aff *PwAff = Array->getDimensionSizePw(0).release(); @@ -2456,11 +2460,11 @@ simplifyAssumptionContext(__isl_take isl_set *AssumptionContext, const Scop &S) { // If we have modeled all blocks in the SCoP that have side effects we can - // simplify the context with the constraints that are needed for anything to - // be executed at all. However, if we have error blocks in the SCoP we already - // assumed some parameter combinations cannot occur and removed them from the - // domains, thus we cannot use the remaining domain to simplify the - // assumptions. + // simplify the context with the constraints that are needed for anything + // to be executed at all. However, if we have error blocks in the SCoP we + // already assumed some parameter combinations cannot occur and removed + // them from the domains, thus we cannot use the remaining domain to + // simplify the assumptions. if (!S.hasErrorBlock()) { isl_set *DomainParameters = isl_union_set_params(S.getDomains()); AssumptionContext = @@ -2483,11 +2487,11 @@ // WARNING: This only holds if the assumptions we have taken do not reduce // the set of statement instances that are executed. Otherwise we // may run into a case where the iteration domains suggest that - // for a certain set of parameter constraints no code is executed, - // but in the original program some computation would have been - // performed. In such a case, modifying the run-time conditions and - // possibly influencing the run-time check may cause certain scops - // to not be executed. + // for a certain set of parameter constraints no code is + // executed, but in the original program some computation would + // have been performed. In such a case, modifying the run-time + // conditions and possibly influencing the run-time check may + // cause certain scops to not be executed. // // Example: // @@ -2498,8 +2502,9 @@ // A[i+p][j] = 1.0; // // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as - // otherwise we would access out of bound data. Now, knowing that code is - // only executed for the case m >= 0, it is sufficient to assume p >= 0. + // otherwise we would access out of bound data. Now, knowing that code + // is only executed for the case m >= 0, it is sufficient to assume p >= + // 0. AssumedContext = simplifyAssumptionContext(AssumedContext, *this); InvalidContext = isl_set_align_params(InvalidContext, getParamSpace()); } @@ -2626,11 +2631,12 @@ BasicBlock *BB = RN->getNodeAs(); Loop *L = LI.getLoopFor(BB); - // Unreachable statements are not considered to belong to a LLVM loop, as - // they are not part of an actual loop in the control flow graph. - // Nevertheless, we handle certain unreachable statements that are common - // when modeling run-time bounds checks as being part of the loop to be - // able to model them and to later eliminate the run-time bounds checks. + // Unreachable statements are not considered to belong to a LLVM loop, + // as they are not part of an actual loop in the control flow graph. + // Nevertheless, we handle certain unreachable statements that are + // common when modeling run-time bounds checks as being part of the loop + // to be able to model them and to later eliminate the run-time bounds + // checks. // // Specifically, for basic blocks that terminate in an unreachable and // where the immediate predecessor is part of a loop, we assume these @@ -2706,11 +2712,15 @@ return isl_set_set_dim_id(Domain, isl_dim_set, Dim, DimId); } -__isl_give isl_set *Scop::getDomainConditions(const ScopStmt *Stmt) const { - return getDomainConditions(Stmt->getEntryBlock()); +__isl_give isl_set * +Scop::getDomainConditions(DenseMap &DomainMap, + const ScopStmt *Stmt) const { + return getDomainConditions(DomainMap, Stmt->getEntryBlock()); } -__isl_give isl_set *Scop::getDomainConditions(BasicBlock *BB) const { +__isl_give isl_set * +Scop::getDomainConditions(DenseMap &DomainMap, + BasicBlock *BB) const { auto DIt = DomainMap.find(BB); if (DIt != DomainMap.end()) return DIt->getSecond().copy(); @@ -2719,10 +2729,11 @@ auto *BBR = RI.getRegionFor(BB); while (BBR->getEntry() == BB) BBR = BBR->getParent(); - return getDomainConditions(BBR->getEntry()); + return getDomainConditions(DomainMap, BBR->getEntry()); } bool Scop::buildDomains(Region *R, DominatorTree &DT, LoopInfo &LI, + DenseMap &DomainMap, DenseMap &InvalidDomainMap) { bool IsOnlyNonAffineRegion = isNonAffineSubRegion(R); @@ -2742,25 +2753,27 @@ if (IsOnlyNonAffineRegion) return !containsErrorBlock(R->getNode(), *R, LI, DT); - if (!buildDomainsWithBranchConstraints(R, DT, LI, InvalidDomainMap)) + if (!buildDomainsWithBranchConstraints(R, DT, LI, DomainMap, + InvalidDomainMap)) return false; - if (!propagateDomainConstraints(R, DT, LI, InvalidDomainMap)) + if (!propagateDomainConstraints(R, DT, LI, DomainMap, InvalidDomainMap)) return false; // Error blocks and blocks dominated by them have been assumed to never be - // executed. Representing them in the Scop does not add any value. In fact, - // it is likely to cause issues during construction of the ScopStmts. The - // contents of error blocks have not been verified to be expressible and - // will cause problems when building up a ScopStmt for them. - // Furthermore, basic blocks dominated by error blocks may reference - // instructions in the error block which, if the error block is not modeled, - // can themselves not be constructed properly. To this end we will replace - // the domains of error blocks and those only reachable via error blocks - // with an empty set. Additionally, we will record for each block under which - // parameter combination it would be reached via an error block in its - // InvalidDomain. This information is needed during load hoisting. - if (!propagateInvalidStmtDomains(R, DT, LI, InvalidDomainMap)) + // executed. Representing them in the Scop does not add any value. In + // fact, it is likely to cause issues during construction of the + // ScopStmts. The contents of error blocks have not been verified to be + // expressible and will cause problems when building up a ScopStmt for + // them. Furthermore, basic blocks dominated by error blocks may reference + // instructions in the error block which, if the error block is not + // modeled, can themselves not be constructed properly. To this end we + // will replace the domains of error blocks and those only reachable via + // error blocks with an empty set. Additionally, we will record for each + // block under which parameter combination it would be reached via an + // error block in its InvalidDomain. This information is needed during + // load hoisting. + if (!propagateInvalidStmtDomains(R, DT, LI, DomainMap, InvalidDomainMap)) return false; return true; @@ -2818,17 +2831,19 @@ bool Scop::propagateInvalidStmtDomains( Region *R, DominatorTree &DT, LoopInfo &LI, + DenseMap &DomainMap, DenseMap &InvalidDomainMap) { ReversePostOrderTraversal RTraversal(R); for (auto *RN : RTraversal) { - // Recurse for affine subregions but go on for basic blocks and non-affine - // subregions. + // Recurse for affine subregions but go on for basic blocks and + // non-affine subregions. if (RN->isSubRegion()) { Region *SubRegion = RN->getNodeAs(); if (!isNonAffineSubRegion(SubRegion)) { - propagateInvalidStmtDomains(SubRegion, DT, LI, InvalidDomainMap); + propagateInvalidStmtDomains(SubRegion, DT, LI, DomainMap, + InvalidDomainMap); continue; } } @@ -2903,18 +2918,19 @@ void Scop::propagateDomainConstraintsToRegionExit( BasicBlock *BB, Loop *BBLoop, SmallPtrSetImpl &FinishedExitBlocks, LoopInfo &LI, + DenseMap &DomainMap, DenseMap &InvalidDomainMap) { - // Check if the block @p BB is the entry of a region. If so we propagate it's - // domain to the exit block of the region. Otherwise we are done. + // Check if the block @p BB is the entry of a region. If so we propagate + // it's domain to the exit block of the region. Otherwise we are done. auto *RI = R.getRegionInfo(); auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr; auto *ExitBB = BBReg ? BBReg->getExit() : nullptr; if (!BBReg || BBReg->getEntry() != BB || !contains(ExitBB)) return; - // Do not propagate the domain if there is a loop backedge inside the region - // that would prevent the exit block from being executed. + // Do not propagate the domain if there is a loop backedge inside the + // region that would prevent the exit block from being executed. auto *L = BBLoop; while (L && contains(L)) { SmallVector LatchBBs; @@ -2930,8 +2946,8 @@ Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, getBoxedLoops()); - // Since the dimensions of @p BB and @p ExitBB might be different we have to - // adjust the domain before we can propagate it. + // Since the dimensions of @p BB and @p ExitBB might be different we have + // to adjust the domain before we can propagate it. isl::set AdjustedDomain = isl::manage( adjustDomainDimensions(*this, Domain.copy(), BBLoop, ExitBBLoop)); isl::set &ExitDomain = DomainMap[ExitBB]; @@ -2948,29 +2964,30 @@ bool Scop::buildDomainsWithBranchConstraints( Region *R, DominatorTree &DT, LoopInfo &LI, + DenseMap &DomainMap, DenseMap &InvalidDomainMap) { // To create the domain for each block in R we iterate over all blocks and - // subregions in R and propagate the conditions under which the current region - // element is executed. To this end we iterate in reverse post order over R as - // it ensures that we first visit all predecessors of a region node (either a - // basic block or a subregion) before we visit the region node itself. - // Initially, only the domain for the SCoP region entry block is set and from - // there we propagate the current domain to all successors, however we add the - // condition that the successor is actually executed next. - // As we are only interested in non-loop carried constraints here we can - // simply skip loop back edges. + // subregions in R and propagate the conditions under which the current + // region element is executed. To this end we iterate in reverse post + // order over R as it ensures that we first visit all predecessors of a + // region node (either a basic block or a subregion) before we visit the + // region node itself. Initially, only the domain for the SCoP region + // entry block is set and from there we propagate the current domain to + // all successors, however we add the condition that the successor is + // actually executed next. As we are only interested in non-loop carried + // constraints here we can simply skip loop back edges. SmallPtrSet FinishedExitBlocks; ReversePostOrderTraversal RTraversal(R); for (auto *RN : RTraversal) { - // Recurse for affine subregions but go on for basic blocks and non-affine - // subregions. + // Recurse for affine subregions but go on for basic blocks and + // non-affine subregions. if (RN->isSubRegion()) { Region *SubRegion = RN->getNodeAs(); if (!isNonAffineSubRegion(SubRegion)) { - if (!buildDomainsWithBranchConstraints(SubRegion, DT, LI, + if (!buildDomainsWithBranchConstraints(SubRegion, DT, LI, DomainMap, InvalidDomainMap)) return false; continue; @@ -2993,25 +3010,27 @@ auto *BBLoop = getRegionNodeLoop(RN, LI); // Propagate the domain from BB directly to blocks that have a superset - // domain, at the moment only region exit nodes of regions that start in BB. + // domain, at the moment only region exit nodes of regions that start in + // BB. propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks, LI, - InvalidDomainMap); + DomainMap, InvalidDomainMap); - // If all successors of BB have been set a domain through the propagation - // above we do not need to build condition sets but can just skip this - // block. However, it is important to note that this is a local property - // with regards to the region @p R. To this end FinishedExitBlocks is a - // local variable. + // If all successors of BB have been set a domain through the + // propagation above we do not need to build condition sets but can just + // skip this block. However, it is important to note that this is a + // local property with regards to the region @p R. To this end + // FinishedExitBlocks is a local variable. auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) { return FinishedExitBlocks.count(SuccBB); }; if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit)) continue; - // Build the condition sets for the successor nodes of the current region - // node. If it is a non-affine subregion we will always execute the single - // exit node, hence the single entry node domain is the condition set. For - // basic blocks we use the helper function buildConditionSets. + // Build the condition sets for the successor nodes of the current + // region node. If it is a non-affine subregion we will always execute + // the single exit node, hence the single entry node domain is the + // condition set. For basic blocks we use the helper function + // buildConditionSets. SmallVector ConditionSets; if (RN->isSubRegion()) ConditionSets.push_back(Domain.copy()); @@ -3020,9 +3039,9 @@ return false; // Now iterate over the successors and set their initial domain based on - // their condition set. We skip back edges here and have to be careful when - // we leave a loop not to keep constraints over a dimension that doesn't - // exist anymore. + // their condition set. We skip back edges here and have to be careful + // when we leave a loop not to keep constraints over a dimension that + // doesn't exist anymore. assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size()); for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) { isl::set CondSet = isl::manage(ConditionSets[u]); @@ -3032,8 +3051,8 @@ if (!contains(SuccBB)) continue; - // If we propagate the domain of some block to "SuccBB" we do not have to - // adjust the domain. + // If we propagate the domain of some block to "SuccBB" we do not have + // to adjust the domain. if (FinishedExitBlocks.count(SuccBB)) continue; @@ -3046,9 +3065,9 @@ CondSet = isl::manage( adjustDomainDimensions(*this, CondSet.copy(), BBLoop, SuccBBLoop)); - // Set the domain for the successor or merge it with an existing domain in - // case there are multiple paths (without loop back edges) to the - // successor block. + // Set the domain for the successor or merge it with an existing + // domain in case there are multiple paths (without loop back edges) + // to the successor block. isl::set &SuccDomain = DomainMap[SuccBB]; if (SuccDomain) { @@ -3076,10 +3095,10 @@ return true; } -__isl_give isl_set * -Scop::getPredecessorDomainConstraints(BasicBlock *BB, - __isl_keep isl_set *Domain, - DominatorTree &DT, LoopInfo &LI) { +__isl_give isl_set *Scop::getPredecessorDomainConstraints( + BasicBlock *BB, __isl_keep isl_set *Domain, DominatorTree &DT, LoopInfo &LI, + DenseMap &DomainMap) { + // If @p BB is the ScopEntry we are done if (R.getEntry() == BB) return isl_set_universe(isl_set_get_space(Domain)); @@ -3090,11 +3109,12 @@ Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, getBoxedLoops()); // A domain to collect all predecessor domains, thus all conditions under - // which the block is executed. To this end we start with the empty domain. + // which the block is executed. To this end we start with the empty + // domain. isl_set *PredDom = isl_set_empty(isl_set_get_space(Domain)); - // Set of regions of which the entry block domain has been propagated to BB. - // all predecessors inside any of the regions can be skipped. + // Set of regions of which the entry block domain has been propagated to + // BB. all predecessors inside any of the regions can be skipped. SmallSet PropagatedRegions; for (auto *PredBB : predecessors(BB)) { @@ -3102,27 +3122,29 @@ if (DT.dominates(BB, PredBB)) continue; - // If the predecessor is in a region we used for propagation we can skip it. + // If the predecessor is in a region we used for propagation we can skip + // it. auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); }; if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(), PredBBInRegion)) { continue; } - // Check if there is a valid region we can use for propagation, thus look - // for a region that contains the predecessor and has @p BB as exit block. + // Check if there is a valid region we can use for propagation, thus + // look for a region that contains the predecessor and has @p BB as exit + // block. auto *PredR = RI.getRegionFor(PredBB); while (PredR->getExit() != BB && !PredR->contains(BB)) PredR->getParent(); - // If a valid region for propagation was found use the entry of that region - // for propagation, otherwise the PredBB directly. + // If a valid region for propagation was found use the entry of that + // region for propagation, otherwise the PredBB directly. if (PredR->getExit() == BB) { PredBB = PredR->getEntry(); PropagatedRegions.insert(PredR); } - auto *PredBBDom = getDomainConditions(PredBB); + auto *PredBBDom = getDomainConditions(DomainMap, PredBB); Loop *PredBBLoop = getFirstNonBoxedLoopFor(PredBB, LI, getBoxedLoops()); PredBBDom = adjustDomainDimensions(*this, PredBBDom, PredBBLoop, BBLoop); @@ -3135,25 +3157,27 @@ bool Scop::propagateDomainConstraints( Region *R, DominatorTree &DT, LoopInfo &LI, + DenseMap &DomainMap, DenseMap &InvalidDomainMap) { // Iterate over the region R and propagate the domain constrains from the // predecessors to the current node. In contrast to the - // buildDomainsWithBranchConstraints function, this one will pull the domain - // information from the predecessors instead of pushing it to the successors. - // Additionally, we assume the domains to be already present in the domain - // map here. However, we iterate again in reverse post order so we know all - // predecessors have been visited before a block or non-affine subregion is - // visited. + // buildDomainsWithBranchConstraints function, this one will pull the + // domain information from the predecessors instead of pushing it to the + // successors. Additionally, we assume the domains to be already present + // in the domain map here. However, we iterate again in reverse post order + // so we know all predecessors have been visited before a block or + // non-affine subregion is visited. ReversePostOrderTraversal RTraversal(R); for (auto *RN : RTraversal) { - // Recurse for affine subregions but go on for basic blocks and non-affine - // subregions. + // Recurse for affine subregions but go on for basic blocks and + // non-affine subregions. if (RN->isSubRegion()) { Region *SubRegion = RN->getNodeAs(); if (!isNonAffineSubRegion(SubRegion)) { - if (!propagateDomainConstraints(SubRegion, DT, LI, InvalidDomainMap)) + if (!propagateDomainConstraints(SubRegion, DT, LI, DomainMap, + InvalidDomainMap)) return false; continue; } @@ -3163,15 +3187,16 @@ isl::set &Domain = DomainMap[BB]; assert(Domain); - // Under the union of all predecessor conditions we can reach this block. - isl::set PredDom = - isl::manage(getPredecessorDomainConstraints(BB, Domain.get(), DT, LI)); + // Under the union of all predecessor conditions we can reach this + // block. + isl::set PredDom = isl::manage( + getPredecessorDomainConstraints(BB, Domain.get(), DT, LI, DomainMap)); Domain = Domain.intersect(PredDom).coalesce(); Domain = Domain.align_params(isl::manage(getParamSpace())); Loop *BBLoop = getRegionNodeLoop(RN, LI); if (BBLoop && BBLoop->getHeader() == BB && contains(BBLoop)) - if (!addLoopBoundsToHeaderDomain(BBLoop, LI, InvalidDomainMap)) + if (!addLoopBoundsToHeaderDomain(BBLoop, LI, DomainMap, InvalidDomainMap)) return false; } @@ -3202,7 +3227,9 @@ } bool Scop::addLoopBoundsToHeaderDomain( - Loop *L, LoopInfo &LI, DenseMap &InvalidDomainMap) { + Loop *L, LoopInfo &LI, DenseMap &DomainMap, + DenseMap &InvalidDomainMap) { + int LoopDepth = getRelativeLoopDepth(L); assert(LoopDepth >= 0 && "Loop in region should have at least depth one"); @@ -3270,9 +3297,9 @@ auto Parts = partitionSetParts(HeaderBBDom.copy(), LoopDepth); HeaderBBDom = isl::manage(Parts.second); - // Check if there is a tagged AddRec for this loop and if so do not add - // the bounded assumptions to the context as they are already implied by the - // tag. + // Check if there is a tagged AddRec for this loop and if so do not + // add the bounded assumptions to the context as they are already implied + // by the tag. if (Affinator.hasNSWAddRecForLoop(L)) { isl_set_free(Parts.first); return true; @@ -3317,8 +3344,8 @@ return true; if (buildAliasGroups(AA)) { - // Aliasing assumptions do not go through addAssumption but we still want to - // collect statistics so we do it here explicitly. + // Aliasing assumptions do not go through addAssumption but we still + // want to collect statistics so we do it here explicitly. if (MinMaxAliasGroups.size()) AssumptionsAliasing++; return true; @@ -3410,7 +3437,8 @@ bool Scop::buildAliasGroups(AliasAnalysis &AA) { // To create sound alias checks we perform the following steps: // o) We partition each group into read only and non read only accesses. - // o) For each group with more than one base pointer we then compute minimal + // o) For each group with more than one base pointer we then compute + // minimal // and maximal accesses to each array of a group in read only and non // read only partitions separately. AliasGroupVectorTy AliasGroups; @@ -3463,8 +3491,8 @@ } } - // If there are no read-only pointers, and less than two read-write pointers, - // no alias check is needed. + // If there are no read-only pointers, and less than two read-write + // pointers, no alias check is needed. if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1) return true; @@ -3743,8 +3771,8 @@ for (const auto &IAClass : InvariantEquivClasses) isl_set_free(IAClass.ExecutionContext); - // Explicitly release all Scop objects and the underlying isl objects before - // we release the isl context. + // Explicitly release all Scop objects and the underlying isl objects + // before we release the isl context. Stmts.clear(); ScopArrayInfoSet.clear(); ScopArrayInfoMap.clear(); @@ -3753,8 +3781,8 @@ } void Scop::updateAccessDimensionality() { - // Check all array accesses for each base pointer and find a (virtual) element - // size for the base pointer that divides all access functions. + // Check all array accesses for each base pointer and find a (virtual) + // element size for the base pointer that divides all access functions. for (ScopStmt &Stmt : *this) for (MemoryAccess *Access : Stmt) { if (!Access->isArrayKind()) @@ -3809,9 +3837,10 @@ } } -void Scop::removeStmtNotInDomainMap() { - auto ShouldDelete = [this](ScopStmt &Stmt) -> bool { - return !this->DomainMap.lookup(Stmt.getEntryBlock()); +void Scop::removeStmtNotInDomainMap( + DenseMap &DomainMap) { + auto ShouldDelete = [&DomainMap](ScopStmt &Stmt) -> bool { + return !DomainMap.lookup(Stmt.getEntryBlock()); }; removeStmts(ShouldDelete); } @@ -3875,20 +3904,21 @@ LInst->getAlignment(), DL)) return false; - // If the location might be overwritten we do not hoist it unconditionally. + // If the location might be overwritten we do not hoist it + // unconditionally. // // TODO: This is probably to conservative. if (!NonHoistableCtxIsEmpty) return false; - // If a dereferenceable load is in a statement that is modeled precisely we - // can hoist it. + // If a dereferenceable load is in a statement that is modeled precisely + // we can hoist it. if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty) return true; - // Even if the statement is not modeled precisely we can hoist the load if it - // does not involve any parameters that might have been specialized by the - // statement domain. + // Even if the statement is not modeled precisely we can hoist the load if + // it does not involve any parameters that might have been specialized by + // the statement domain. for (unsigned u = 0, e = MA->getNumSubscripts(); u < e; u++) if (!isa(MA->getSubscript(u))) return false; @@ -3903,8 +3933,8 @@ auto *StmtInvalidCtx = Stmt.getInvalidContext(); bool StmtInvalidCtxIsEmpty = isl_set_is_empty(StmtInvalidCtx); - // Get the context under which the statement is executed but remove the error - // context under which this statement is reached. + // Get the context under which the statement is executed but remove the + // error context under which this statement is reached. isl_set *DomainCtx = isl_set_params(Stmt.getDomain()); DomainCtx = isl_set_subtract(DomainCtx, StmtInvalidCtx); @@ -3917,11 +3947,11 @@ return; } - // Project out all parameters that relate to loads in the statement. Otherwise - // we could have cyclic dependences on the constraints under which the - // hoisted loads are executed and we could not determine an order in which to - // pre-load them. This happens because not only lower bounds are part of the - // domain but also upper bounds. + // Project out all parameters that relate to loads in the statement. + // Otherwise we could have cyclic dependences on the constraints under + // which the hoisted loads are executed and we could not determine an + // order in which to pre-load them. This happens because not only lower + // bounds are part of the domain but also upper bounds. for (auto &InvMA : InvMAs) { auto *MA = InvMA.MA; Instruction *AccInst = MA->getAccessInstruction(); @@ -3976,11 +4006,11 @@ if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType) continue; - // If the pointer and the type is equal check if the access function wrt. - // to the domain is equal too. It can happen that the domain fixes - // parameter values and these can be different for distinct part of the - // SCoP. If this happens we cannot consolidate the loads but need to - // create a new invariant load equivalence class. + // If the pointer and the type is equal check if the access function + // wrt. to the domain is equal too. It can happen that the domain + // fixes parameter values and these can be different for distinct part + // of the SCoP. If this happens we cannot consolidate the loads but + // need to create a new invariant load equivalence class. auto &MAs = IAClass.InvariantAccesses; if (!MAs.empty()) { auto *LastMA = MAs.front(); @@ -4055,8 +4085,8 @@ // TODO: Loads that are not loop carried, hence are in a statement with // zero iterators, are by construction invariant, though we // currently "hoist" them anyway. This is necessary because we allow - // them to be treated as parameters (e.g., in conditions) and our code - // generation would otherwise use the old value. + // them to be treated as parameters (e.g., in conditions) and our + // code generation would otherwise use the old value. auto &Stmt = *Access->getStatement(); BasicBlock *BB = Stmt.getEntryBlock(); @@ -4175,8 +4205,8 @@ return false; } -/// Replace the base pointer arrays in all memory accesses referencing @p Old, -/// with a reference to @p New. +/// Replace the base pointer arrays in all memory accesses referencing @p +/// Old, with a reference to @p New. static void replaceBasePtrArrays(Scop *S, const ScopArrayInfo *Old, const ScopArrayInfo *New) { for (ScopStmt &Stmt : *S) @@ -4209,9 +4239,9 @@ continue; // we currently do not canonicalize arrays where some accesses are - // hoisted as invariant loads. If we would, we need to update the access - // function of the invariant loads as well. However, as this is not a - // very common situation, we leave this for now to avoid further + // hoisted as invariant loads. If we would, we need to update the + // access function of the invariant loads as well. However, as this is + // not a very common situation, we leave this for now to avoid further // complexity increases. if (isUsedForIndirectHoistedLoad(this, BasePtrSAI)) continue; @@ -4237,8 +4267,8 @@ ScopArrayInfoSet.insert(SAI.get()); } else { SAI->updateElementType(ElementType); - // In case of mismatching array sizes, we bail out by setting the run-time - // context to false. + // In case of mismatching array sizes, we bail out by setting the + // run-time context to false. if (!SAI->updateSizes(Sizes)) invalidate(DELINEARIZATION, DebugLoc()); } @@ -4500,7 +4530,8 @@ RecordedAssumptions.push_back({Kind, Sign, Set, Loc, BB}); } -void Scop::addRecordedAssumptions() { +void Scop::addRecordedAssumptions(DenseMap &DomainMap) { + while (!RecordedAssumptions.empty()) { const Assumption &AS = RecordedAssumptions.pop_back_val(); @@ -4510,7 +4541,7 @@ } // If the domain was deleted the assumptions are void. - isl_set *Dom = getDomainConditions(AS.BB); + isl_set *Dom = getDomainConditions(DomainMap, AS.BB); if (!Dom) { isl_set_free(AS.Set); continue; @@ -4523,8 +4554,8 @@ // _ _____ // Dom => S <==> A v B <==> A - B // - // To avoid the complement we will register A - B as a restriction not an - // assumption. + // To avoid the complement we will register A - B as a restriction not + // an assumption. isl_set *S = AS.Set; if (AS.Sign == AS_RESTRICTION) S = isl_set_params(isl_set_intersect(S, Dom)); @@ -4896,10 +4927,11 @@ Schedule = LoopStack[0].Schedule; } -/// To generate a schedule for the elements in a Region we traverse the Region -/// in reverse-post-order and add the contained RegionNodes in traversal order -/// to the schedule of the loop that is currently at the top of the LoopStack. -/// For loop-free codes, this results in a correct sequential ordering. +/// To generate a schedule for the elements in a Region we traverse the +/// Region in reverse-post-order and add the contained RegionNodes in +/// traversal order to the schedule of the loop that is currently at the top +/// of the LoopStack. For loop-free codes, this results in a correct +/// sequential ordering. /// /// Example: /// bb1(0) @@ -4910,16 +4942,16 @@ /// \ / /// bb6(5) /// -/// Including loops requires additional processing. Whenever a loop header is -/// encountered, the corresponding loop is added to the @p LoopStack. Starting -/// from an empty schedule, we first process all RegionNodes that are within -/// this loop and complete the sequential schedule at this loop-level before -/// processing about any other nodes. To implement this +/// Including loops requires additional processing. Whenever a loop header +/// is encountered, the corresponding loop is added to the @p LoopStack. +/// Starting from an empty schedule, we first process all RegionNodes that +/// are within this loop and complete the sequential schedule at this +/// loop-level before processing about any other nodes. To implement this /// loop-nodes-first-processing, the reverse post-order traversal is /// insufficient. Hence, we additionally check if the traversal yields -/// sub-regions or blocks that are outside the last loop on the @p LoopStack. -/// These region-nodes are then queue and only traverse after the all nodes -/// within the current loop have been processed. +/// sub-regions or blocks that are outside the last loop on the @p +/// LoopStack. These region-nodes are then queue and only traverse after the +/// all nodes within the current loop have been processed. void Scop::buildSchedule(Region *R, LoopStackTy &LoopStack, LoopInfo &LI) { Loop *OuterScopLoop = getLoopSurroundingScop(*this, LI); @@ -4929,12 +4961,12 @@ bool LastRNWaiting = false; // Iterate over the region @p R in reverse post-order but queue - // sub-regions/blocks iff they are not part of the last encountered but not - // completely traversed loop. The variable LastRNWaiting is a flag to indicate - // that we queued the last sub-region/block from the reverse post-order - // iterator. If it is set we have to explore the next sub-region/block from - // the iterator (if any) to guarantee progress. If it is not set we first try - // the next queued sub-region/blocks. + // sub-regions/blocks iff they are not part of the last encountered but + // not completely traversed loop. The variable LastRNWaiting is a flag to + // indicate that we queued the last sub-region/block from the reverse + // post-order iterator. If it is set we have to explore the next + // sub-region/block from the iterator (if any) to guarantee progress. If + // it is not set we first try the next queued sub-region/blocks. while (!WorkList.empty() || !DelayList.empty()) { RegionNode *RN; @@ -4985,8 +5017,8 @@ LoopData.Schedule = combineInSequence(LoopData.Schedule, StmtSchedule); } - // Check if we just processed the last node in this loop. If we did, finalize - // the loop by: + // Check if we just processed the last node in this loop. If we did, + // finalize the loop by: // // - adding new schedule dimensions // - folding the resulting schedule into the parent loop schedule @@ -5143,9 +5175,9 @@ if (!contains(UserBB)) return true; - // When the SCoP region exit needs to be simplified, PHIs in the region exit - // move to a new basic block such that its incoming blocks are not in the - // SCoP anymore. + // When the SCoP region exit needs to be simplified, PHIs in the region + // exit move to a new basic block such that its incoming blocks are not + // in the SCoP anymore. if (hasSingleExitEdge() && isa(Use.getUser()) && isExit(cast(Use.getUser())->getParent())) return true; Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -990,7 +990,7 @@ } else if (S.getStmtFor(Inst)) { IsDead = false; } else { - auto *Domain = S.getDomainConditions(Inst->getParent()); + auto *Domain = S.getStmtFor(Inst)->getDomain(); IsDead = isl_set_is_empty(Domain); isl_set_free(Domain); }