Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -491,6 +491,8 @@ const Region &CurRegion); __isl_give isl_set *addLoopBoundsToDomain(__isl_take isl_set *Domain, TempScop &tempScop); + __isl_give isl_set *addLoopTripCountToDomain(__isl_take isl_set *Domain, + const Loop *L); __isl_give isl_set *buildDomain(TempScop &tempScop, const Region &CurRegion); void buildSchedule(SmallVectorImpl &ScheduleVec); Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -171,6 +171,11 @@ cl::location(PollyModelPHINodes), cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory)); +static cl::opt AllowNonSCEVBackedgeTakenCount( + "polly-allow-non-scev-backedge-taken-count", + cl::desc("Allow loops even if SCEV cannot provide a trip count"), + cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); + bool polly::PollyModelPHINodes = false; bool polly::PollyTrackFailures = false; bool polly::PollyDelinearize = false; @@ -693,6 +698,22 @@ } bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { + + // Use ISL to compute the loop trip count instead of ScalarEvolution. We can + // currently handle non-affine bounds if the following conditions are met. + // + // 1) The loop has only one back edge that is an affine conditional branch. + // 2) The exit branch is not unsigned. + if (AllowNonSCEVBackedgeTakenCount && L->getNumBackEdges() == 1) { + auto *ExitingBlock = L->getExitingBlock(); + if (ExitingBlock) + if (auto *Branch = dyn_cast(ExitingBlock->getTerminator())) + if (Branch->isConditional()) + if (auto *Compare = dyn_cast(Branch->getCondition())) + if (!Compare->isUnsigned() || AllowUnsigned) + return true; + } + // Is the loop count affine? const SCEV *LoopCount = SE->getBackedgeTakenCount(L); if (isAffineExpr(&Context.CurRegion, LoopCount, *SE)) { @@ -944,7 +965,8 @@ // Check if there was at least one non-overapproximated loop in the region or // we allow regions without loops. - if (!DetectRegionsWithoutLoops && !Context.hasAffineLoops) + if (!DetectRegionsWithoutLoops && !Context.hasAffineLoops && + !AllowNonSCEVBackedgeTakenCount) invalid(Context, /*Assert=*/true, &CurRegion); DEBUG(dbgs() << "OK\n"); Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -74,6 +74,12 @@ cl::desc("The maximal number of arrays to compare in each alias group."), cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory)); +static int getLoopDepth(const Scop *S, const Loop *L) { + Loop *outerLoop = S->getRegion().outermostLoopInRegion(const_cast(L)); + assert(outerLoop && "Scop does not contain this loop"); + return L->getLoopDepth() - outerLoop->getLoopDepth(); +} + /// Translate a 'const SCEV *' expression in an isl_pw_aff. struct SCEVAffinator : public SCEVVisitor { public: @@ -90,7 +96,6 @@ const Scop *S; SCEVAffinator(const ScopStmt *Stmt); - int getLoopDepth(const Loop *L); __isl_give isl_pw_aff *visit(const SCEV *Expr); __isl_give isl_pw_aff *visitConstant(const SCEVConstant *Expr); @@ -226,7 +231,7 @@ isl_space *Space = isl_space_set_alloc(Ctx, 0, NbLoopSpaces); isl_local_space *LocalSpace = isl_local_space_from_space(Space); - int loopDimension = getLoopDepth(Expr->getLoop()); + int loopDimension = getLoopDepth(S, Expr->getLoop()); isl_aff *LAff = isl_aff_set_coefficient_si( isl_aff_zero_on_domain(LocalSpace), isl_dim_in, loopDimension, 1); @@ -297,12 +302,6 @@ "Unknowns SCEV was neither parameter nor a valid instruction."); } -int SCEVAffinator::getLoopDepth(const Loop *L) { - Loop *outerLoop = S->getRegion().outermostLoopInRegion(const_cast(L)); - assert(outerLoop && "Scop does not contain this loop"); - return L->getLoopDepth() - outerLoop->getLoopDepth(); -} - /// @brief Add the bounds of @p Range to the set @p S for dimension @p dim. static __isl_give isl_set *addRangeBoundsToSet(__isl_take isl_set *S, const ConstantRange &Range, @@ -739,6 +738,12 @@ isl_space *Space = isl_space_map_from_set(setDomain); isl_map *Map = isl_map_universe(isl_space_copy(Space)); isl_local_space *MapLocalSpace = isl_local_space_from_space(Space); + + if (isl_map_dim(Map, isl_dim_in) == 0) { + isl_local_space_free(MapLocalSpace); + return Map; + } + unsigned lastDimension = isl_map_dim(Map, isl_dim_in) - 1; // Set all but the last dimension to be equal for the input and output @@ -903,6 +908,53 @@ Schedule = isl_map_align_params(Schedule, Parent.getParamSpace()); } +__isl_give isl_set * +ScopStmt::addLoopTripCountToDomain(__isl_take isl_set *Domain, const Loop *L) { + + unsigned loopDimension = getLoopDepth(getParent(), L); + BasicBlock *CurBB = getBasicBlock(); + ScalarEvolution *SE = getParent()->getSE(); + isl_space *DomSpace = isl_set_get_space(Domain); + + isl_space *MapSpace = isl_space_map_from_set(isl_space_copy(DomSpace)); + isl_multi_aff *LoopMAff = isl_multi_aff_identity(MapSpace); + isl_aff *LoopAff = isl_multi_aff_get_aff(LoopMAff, loopDimension); + LoopAff = isl_aff_add_constant_si(LoopAff, 1); + LoopMAff = isl_multi_aff_set_aff(LoopMAff, loopDimension, LoopAff); + isl_map *TranslationMap = isl_map_from_multi_aff(LoopMAff); + + // We can currently use ISL to compute the trip count only under certain + // conditoins. These are enforeced in ScopDetection. + auto *ExitingBB = L->getExitingBlock(); + assert(ExitingBB && "Loop has more than one exiting block"); + auto *Term = dyn_cast(ExitingBB->getTerminator()); + assert(Term && Term->isConditional() && "Terminator is not conditional"); + auto *Cond = dyn_cast(Term->getCondition()); + assert(Cond && !Cond->isUnsigned() && "Condition is not signed"); + + auto *LHS = SE->getSCEVAtScope(Cond->getOperand(0), L); + auto *RHS = SE->getSCEVAtScope(Cond->getOperand(1), L); + auto Pred = Cond->getPredicate(); + if (!L->contains(Term->getSuccessor(0))) + Pred = ICmpInst::getInversePredicate(Pred); + Comparison Comp(LHS, RHS, Pred); + + isl_set *CondSet = buildConditionSet(Comp); + isl_map *ForwardMap = isl_map_lex_le(isl_space_copy(DomSpace)); + for (unsigned i = 0; i < isl_set_n_dim(Domain); i++) + if (i != loopDimension) + ForwardMap = isl_map_equate(ForwardMap, isl_dim_in, i, isl_dim_out, i); + ForwardMap = isl_map_apply_range(ForwardMap, isl_map_copy(TranslationMap)); + isl_set *CondDom = isl_set_subtract(isl_set_copy(Domain), CondSet); + isl_set *ForwardCond = isl_set_apply(CondDom, ForwardMap); + Domain = isl_set_subtract(Domain, ForwardCond); + + isl_map_free(TranslationMap); + isl_space_free(DomSpace); + + return Domain; +} + __isl_give isl_set *ScopStmt::buildConditionSet(const Comparison &Comp) { isl_pw_aff *L = SCEVAffinator::getPwAff(this, Comp.getLHS()); isl_pw_aff *R = SCEVAffinator::getPwAff(this, Comp.getRHS()); @@ -954,9 +1006,15 @@ // IV <= LatchExecutions. const Loop *L = getLoopForDimension(i); const SCEV *LatchExecutions = SE->getBackedgeTakenCount(L); - isl_pw_aff *UpperBound = SCEVAffinator::getPwAff(this, LatchExecutions); - isl_set *UpperBoundSet = isl_pw_aff_le_set(IV, UpperBound); - Domain = isl_set_intersect(Domain, UpperBoundSet); + if (!isa(LatchExecutions)) { + isl_pw_aff *UpperBound = SCEVAffinator::getPwAff(this, LatchExecutions); + isl_set *UpperBoundSet = isl_pw_aff_le_set(IV, UpperBound); + Domain = isl_set_intersect(Domain, UpperBoundSet); + } else { + // In case SCEV cannot provide a loop trip count we compute it with ISL + Domain = addLoopTripCountToDomain(Domain, L); + isl_pw_aff_free(IV); + } } isl_local_space_free(LocalSpace); Index: test/ScopInfo/isl_trip_count.ll =================================================================== --- /dev/null +++ test/ScopInfo/isl_trip_count.ll @@ -0,0 +1,38 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-allow-non-scev-backedge-taken-count -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: [M, N] -> { Stmt_while_body[i0] : 4i0 <= -M + N and i0 >= 1; Stmt_while_body[0] } +; +; void f(int *A, int N, int M) { +; int i = 0; +; while (M <= N) { +; A[i++] = 1; +; M += 4; +; } +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +define void @f(i32* nocapture %A, i32 %N, i32 %M) { +entry: + %cmp3 = icmp sgt i32 %M, %N + br i1 %cmp3, label %while.end, label %while.body.preheader + +while.body.preheader: + br label %while.body + +while.body: + %i.05 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ] + %M.addr.04 = phi i32 [ %add, %while.body ], [ %M, %while.body.preheader ] + %inc = add nuw nsw i32 %i.05, 1 + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05 + store i32 1, i32* %arrayidx, align 4 + %add = add nsw i32 %M.addr.04, 4 + %cmp = icmp sgt i32 %add, %N + br i1 %cmp, label %while.end.loopexit, label %while.body + +while.end.loopexit: + br label %while.end + +while.end: + ret void +}