Index: include/polly/CodeGen/CodeGeneration.h =================================================================== --- include/polly/CodeGen/CodeGeneration.h +++ include/polly/CodeGen/CodeGeneration.h @@ -39,6 +39,7 @@ /// @brief Flag to turn on/off annotation of alias scopes. extern bool PollyAnnotateAliasScopes; +/// FIXME: Depraced and needs to be removed with the cloog backend. static inline int getNumberOfIterations(__isl_take isl_set *Domain) { int Dim = isl_set_dim(Domain, isl_dim_set); Index: include/polly/CodeGen/IslAst.h =================================================================== --- include/polly/CodeGen/IslAst.h +++ include/polly/CodeGen/IslAst.h @@ -52,7 +52,8 @@ IslAstUserPayload() : IsInnermost(false), IsInnermostParallel(false), IsOutermostParallel(false), IsReductionParallel(false), - MinimalDependenceDistance(nullptr), Build(nullptr) {} + MinimalDependenceDistance(nullptr), NumberOfIterations(nullptr), + Build(nullptr) {} /// @brief Cleanup all isl structs on destruction. ~IslAstUserPayload(); @@ -72,6 +73,9 @@ /// @brief The minimal dependence distance for non parallel loops. isl_pw_aff *MinimalDependenceDistance; + /// @brief The loop trip count. + isl_pw_aff *NumberOfIterations; + /// @brief The build environment at the time this node was constructed. isl_ast_build *Build; @@ -139,6 +143,12 @@ /// @brief Get the nodes build context or a nullptr if not available. static __isl_give isl_ast_build *getBuild(__isl_keep isl_ast_node *Node); + /// @brief Get the number of iterations or a nullptr if not available. + static __isl_give isl_pw_aff * + getNumberOfIterations(__isl_keep isl_ast_node *Node); + + /// @brief Get the number of iterations as int or -1 if not available. + static int getNumberOfIterationsAsInt(__isl_keep isl_ast_node *Node); ///} virtual void getAnalysisUsage(AnalysisUsage &AU) const; Index: include/polly/Support/GICHelper.h =================================================================== --- include/polly/Support/GICHelper.h +++ include/polly/Support/GICHelper.h @@ -75,6 +75,32 @@ std::string getIslCompatibleName(std::string Prefix, const llvm::Value *Val, std::string Suffix); +/// @brief Return @p Aff incremented by @p i +__isl_give isl_pw_aff *incrementPwAff(__isl_take isl_pw_aff *Aff, int i); + +/// @brief Return the only affine piece of @p PA. +__isl_give isl_aff *isl_aff_from_pw_aff(__isl_take isl_pw_aff *PA); + +/// @brief Return an overestimation of the number of elements in @p S. +/// +/// @param S The set of which we want to now a bound on the elements. +/// @param Dim The number of input dimensions we want to keep. +/// @param LexMinPtr If not null the lexmin will be returned in here. +/// @param LexMaxPtr If not null the lexmax will be returned in here. +/// +/// @returns A piecewise affine function over approximating the number of +/// elements in @p S as a function of the first @p Dim dimensions +/// of @p S. Furthermore the lexicographic minimum/maximum of @p S +/// in @p LexMinPtr and @p LexMaxPtr if they are given. +__isl_give isl_pw_multi_aff * +getNumElementsUB(__isl_take isl_set *S, int Dim, + __isl_give isl_map **LexMinPtr = nullptr, + __isl_give isl_map **LexMaxPtr = nullptr); + +/// @brief Return the number of iterations for the outermost dim of @p Schedule. +__isl_give isl_pw_aff * +getNumberOfIterationsForSchedule(__isl_take isl_union_map *Schedule); + } // end namespace polly #endif Index: lib/CodeGen/IslAst.cpp =================================================================== --- lib/CodeGen/IslAst.cpp +++ lib/CodeGen/IslAst.cpp @@ -25,6 +25,7 @@ #include "polly/LinkAllPasses.h" #include "polly/Options.h" #include "polly/ScopInfo.h" +#include "polly/Support/GICHelper.h" #include "llvm/Support/Debug.h" #include "isl/union_map.h" @@ -82,6 +83,7 @@ IslAstInfo::IslAstUserPayload::~IslAstUserPayload() { isl_ast_build_free(Build); + isl_pw_aff_free(NumberOfIterations); isl_pw_aff_free(MinimalDependenceDistance); } @@ -225,9 +227,10 @@ BuildInfo->LastForNodeId = Id; // Test for parallelism only if we are not already inside a parallel loop - if (!BuildInfo->InParallelFor) - BuildInfo->InParallelFor = Payload->IsOutermostParallel = - astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload); + if (BuildInfo->Deps) + if (!BuildInfo->InParallelFor) + BuildInfo->InParallelFor = Payload->IsOutermostParallel = + astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload); return Id; } @@ -255,16 +258,20 @@ // Innermost loops that are surrounded by parallel loops have not yet been // tested for parallelism. Test them here to ensure we check all innermost // loops for parallelism. - if (Payload->IsInnermost && BuildInfo->InParallelFor) { - if (Payload->IsOutermostParallel) - Payload->IsInnermostParallel = true; - else - Payload->IsInnermostParallel = - astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload); - } + if (BuildInfo->Deps) + if (Payload->IsInnermost && BuildInfo->InParallelFor) { + if (Payload->IsOutermostParallel) + Payload->IsInnermostParallel = true; + else + Payload->IsInnermostParallel = + astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload); + } if (Payload->IsOutermostParallel) BuildInfo->InParallelFor = false; + isl_union_map *Schedule = IslAstInfo::getSchedule(Node); + Payload->NumberOfIterations = getNumberOfIterationsForSchedule(Schedule); + isl_id_free(Id); return Node; } @@ -346,6 +353,9 @@ isl_ast_build *Build; AstBuildUserInfo BuildInfo; + if (DetectParallel || PollyVectorizerChoice != VECTORIZER_NONE) + BuildInfo.Deps = &D; + if (UseContext) Build = isl_ast_build_from_context(S->getContext()); else @@ -356,15 +366,10 @@ isl_union_map *Schedule = isl_union_map_intersect_domain(S->getSchedule(), S->getDomains()); - if (DetectParallel || PollyVectorizerChoice != VECTORIZER_NONE) { - BuildInfo.Deps = &D; - BuildInfo.InParallelFor = 0; - - Build = isl_ast_build_set_before_each_for(Build, &astBuildBeforeFor, - &BuildInfo); - Build = - isl_ast_build_set_after_each_for(Build, &astBuildAfterFor, &BuildInfo); - } + Build = + isl_ast_build_set_before_each_for(Build, &astBuildBeforeFor, &BuildInfo); + Build = + isl_ast_build_set_after_each_for(Build, &astBuildAfterFor, &BuildInfo); buildRunCondition(Build); @@ -466,6 +471,31 @@ return Payload ? Payload->Build : nullptr; } +isl_pw_aff *IslAstInfo::getNumberOfIterations(isl_ast_node *Node) { + IslAstUserPayload *Payload = getNodePayload(Node); + return Payload ? isl_pw_aff_copy(Payload->NumberOfIterations) : nullptr; +} + +int IslAstInfo::getNumberOfIterationsAsInt(isl_ast_node *Node) { + isl_pw_aff *NumItPAff = getNumberOfIterations(Node); + if (!NumItPAff || !isl_pw_aff_is_cst(NumItPAff) || + isl_pw_aff_n_piece(NumItPAff) != 1) { + isl_pw_aff_free(NumItPAff); + return -1; + } + + isl_aff *NumItAff = isl_aff_from_pw_aff(NumItPAff); + assert(isl_aff_is_cst(NumItAff) && "Assumed affine function to be constant"); + + isl_val *NumItVal = isl_aff_get_constant_val(NumItAff); + isl_aff_free(NumItAff); + + int NumItInt = isl_val_get_num_si(NumItVal); + isl_val_free(NumItVal); + + return NumItInt; +} + void IslAstInfo::printScop(raw_ostream &OS) const { isl_ast_print_options *Options; isl_ast_node *RootNode = getAst(); Index: lib/CodeGen/IslCodeGeneration.cpp =================================================================== --- lib/CodeGen/IslCodeGeneration.cpp +++ lib/CodeGen/IslCodeGeneration.cpp @@ -110,8 +110,6 @@ __isl_give isl_ast_expr *getUpperBound(__isl_keep isl_ast_node *For, CmpInst::Predicate &Predicate); - unsigned getNumberOfIterations(__isl_keep isl_ast_node *For); - void createFor(__isl_take isl_ast_node *For); void createForVector(__isl_take isl_ast_node *For, int VectorWidth); void createForSequential(__isl_take isl_ast_node *For); @@ -222,15 +220,6 @@ return UB; } -unsigned IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) { - isl_union_map *Schedule = IslAstInfo::getSchedule(For); - isl_set *LoopDomain = isl_set_from_union_set(isl_union_map_range(Schedule)); - int NumberOfIterations = polly::getNumberOfIterations(LoopDomain); - if (NumberOfIterations == -1) - return -1; - return NumberOfIterations + 1; -} - void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User, std::vector &IVS, __isl_take isl_id *IteratorID, @@ -362,8 +351,13 @@ // If we can show that LB UB holds at least once, we can // omit the GuardBB in front of the loop. - bool UseGuardBB = - !SE.isKnownPredicate(Predicate, SE.getSCEV(ValueLB), SE.getSCEV(ValueUB)); + isl_pw_aff *NumIterations = IslAstInfo::getNumberOfIterations(For); + isl_pw_aff *Zero = isl_pw_aff_zero_on_domain( + isl_local_space_from_space(isl_pw_aff_get_domain_space(NumIterations))); + isl_set *LessOrEqualZero = isl_pw_aff_le_set(NumIterations, Zero); + bool UseGuardBB = !isl_set_is_empty(LessOrEqualZero); + isl_set_free(LessOrEqualZero); + IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, P, LI, DT, ExitBlock, Predicate, &Annotator, Parallel, UseGuardBB); IDToValue[IteratorID] = IV; @@ -386,7 +380,7 @@ if (Vector && IslAstInfo::isInnermostParallel(For) && !IslAstInfo::isReductionParallel(For)) { - int VectorWidth = getNumberOfIterations(For); + int VectorWidth = IslAstInfo::getNumberOfIterationsAsInt(For); if (1 < VectorWidth && VectorWidth <= 16) { createForVector(For, VectorWidth); return; Index: lib/Support/GICHelper.cpp =================================================================== --- lib/Support/GICHelper.cpp +++ lib/Support/GICHelper.cpp @@ -11,7 +11,9 @@ // //===----------------------------------------------------------------------===// #include "polly/Support/GICHelper.h" + #include "llvm/IR/Value.h" + #include "isl/aff.h" #include "isl/map.h" #include "isl/schedule.h" @@ -152,3 +154,72 @@ makeIslCompatible(ValStr); return ValStr; } + +isl_pw_aff *polly::incrementPwAff(isl_pw_aff *Aff, int i) { + isl_aff *Zero = isl_aff_zero_on_domain( + isl_local_space_from_space(isl_pw_aff_get_domain_space(Aff))); + isl_aff *Inc = isl_aff_add_constant_si(Zero, i); + return isl_pw_aff_add(Aff, isl_pw_aff_from_aff(Inc)); +} + +isl_pw_multi_aff *polly::getNumElementsUB(isl_set *S, int Dim, + isl_map **LexMinPtr, + isl_map **LexMaxPtr) { + S = isl_set_reset_tuple_id(S); + + isl_map *Map = isl_map_from_domain_and_range(isl_set_copy(S), S); + for (int i = 0; i < Dim; i++) + Map = isl_map_equate(Map, isl_dim_in, i, isl_dim_out, i); + + isl_map *LexMax = isl_map_lexmax(isl_map_copy(Map)); + if (LexMaxPtr) + *LexMaxPtr = isl_map_copy(LexMax); + + isl_map *LexMin = isl_map_lexmin(Map); + if (LexMinPtr) + *LexMinPtr = isl_map_copy(LexMin); + + isl_map *Sub = isl_map_sum(LexMax, isl_map_neg(LexMin)); + isl_pw_multi_aff *Elements = isl_pw_multi_aff_from_map(Sub); + + // Elements is now off-by-one (to low) in each dimension. + for (unsigned u = 0, e = isl_pw_multi_aff_dim(Elements, isl_dim_out); u < e; + u++) { + isl_pw_aff *ElementsDim = isl_pw_multi_aff_get_pw_aff(Elements, u); + ElementsDim = incrementPwAff(ElementsDim, 1); + Elements = isl_pw_multi_aff_set_pw_aff(Elements, u, ElementsDim); + } + + return isl_pw_multi_aff_coalesce(Elements); +} + +static int extractAffineFromPiecewiseAffine(__isl_keep isl_set *Dom, + __isl_keep isl_aff *Aff, + void *User) { + isl_set_free(Dom); + *(isl_aff **)User = Aff; + return 0; +} + +isl_aff *polly::isl_aff_from_pw_aff(isl_pw_aff *PA) { + assert(PA && isl_pw_aff_n_piece(PA) == 1 && + "PA is null or has not exactly one piece"); + isl_aff *A = nullptr; + isl_pw_aff_foreach_piece(PA, extractAffineFromPiecewiseAffine, &A); + isl_pw_aff_free(PA); + assert(A && "Could not extract affine piece"); + return A; +} + +isl_pw_aff *polly::getNumberOfIterationsForSchedule(isl_union_map *Schedule) { + isl_union_set *Range = isl_union_map_range(Schedule); + isl_set *LoopDomain = isl_set_from_union_set(Range); + int InDim = isl_set_n_dim(LoopDomain) - 1; + isl_pw_multi_aff *NumElements = + getNumElementsUB(isl_set_copy(LoopDomain), InDim); + unsigned Dim = isl_pw_multi_aff_dim(NumElements, isl_dim_set) - 1; + isl_pw_aff *NumIterations = isl_pw_multi_aff_get_pw_aff(NumElements, Dim); + isl_pw_multi_aff_free(NumElements); + NumIterations = isl_pw_aff_gist(NumIterations, LoopDomain); + return isl_pw_aff_coalesce(NumIterations); +} Index: test/Isl/CodeGen/no_guard_after_tiling.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/no_guard_after_tiling.ll @@ -0,0 +1,59 @@ +; RUN: opt %loadPolly -polly-opt-isl -polly-codegen-isl -S < %s | FileCheck %s +; +; When we use isl to compute the loop bound we can show that even after tiling +; only one loop guards is needed. ScalarEvolution can't figure that out and +; places at least two. +; +; CHECK: br i1 %polly.{{[._a-zA-Z0-9]*}}guard +; CHECK-NOT: br i1 %polly.{{[._a-zA-Z0-9]*}}guard +; +; void jd(int *A) { +; for (int i = 0; i < 1024; i++) +; for (int j = 0; j < 1024; j++) +; A[j] = A[j - 1] + i * j; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc6, %entry + %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc6 ], [ 0, %entry ] + %exitcond5 = icmp ne i64 %indvars.iv3, 1024 + br i1 %exitcond5, label %for.body, label %for.end8 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %tmp = add nsw i64 %indvars.iv, -1 + %arrayidx = getelementptr inbounds i32* %A, i64 %tmp + %tmp6 = load i32* %arrayidx, align 4 + %tmp7 = mul nsw i64 %indvars.iv3, %indvars.iv + %tmp8 = trunc i64 %tmp7 to i32 + %add = add nsw i32 %tmp6, %tmp8 + %arrayidx5 = getelementptr inbounds i32* %A, i64 %indvars.iv + store i32 %add, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc6 + +for.inc6: ; preds = %for.end + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + br label %for.cond + +for.end8: ; preds = %for.cond + ret void +}