Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -692,7 +692,9 @@ void addAssumption(__isl_take isl_set *Set); /// @brief Build all alias groups for this SCoP. - void buildAliasGroups(AliasAnalysis &AA); + /// + /// @returns True if __no__ error occurred, false otherwise. + bool buildAliasGroups(AliasAnalysis &AA); /// @brief Return all alias groups for this SCoP. const MinMaxVectorVectorTy &getAliasGroups() const { Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -64,6 +64,11 @@ cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory)); +static cl::opt RunTimeChecksMaxParameters( + "polly-rtc-max-parameters", + cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden, + cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory)); + /// Translate a 'const SCEV *' expression in an isl_pw_aff. struct SCEVAffinator : public SCEVVisitor { public: @@ -1137,6 +1142,32 @@ isl_aff *OneAff; unsigned Pos; + // Restrict the number of parameters involved in the access as the lexmin/ + // lexmax computation will take too long if this number is hight. + // + // Experiments with a simple test case using an i7 4800MQ: + // + // #Parameters invovled | Time (in sec) + // 6 | 0.01 + // 7 | 0.04 + // 8 | 0.12 + // 9 | 0.40 + // 10 | 1.54 + // 11 | 6.78 + // 12 | 30.38 + // + if (isl_set_n_param(Set) > RunTimeChecksMaxParameters) { + unsigned InvolvedParams = 0; + for (unsigned u = 0, e = isl_set_n_param(Set); u < e; u++) + if (isl_set_involves_dims(Set, isl_dim_param, u, 1)) + InvolvedParams++; + + if (InvolvedParams > RunTimeChecksMaxParameters) { + isl_set_free(Set); + return -1; + } + } + MinPMA = isl_set_lexmin_pw_multi_aff(isl_set_copy(Set)); MaxPMA = isl_set_lexmax_pw_multi_aff(isl_set_copy(Set)); @@ -1166,7 +1197,7 @@ return isl_set_reset_tuple_id(Domain); } -void Scop::buildAliasGroups(AliasAnalysis &AA) { +bool Scop::buildAliasGroups(AliasAnalysis &AA) { // To create sound alias checks we perform the following steps: // o) Use the alias analysis and an alias set tracker to build alias sets // for all memory accesses inside the SCoP. @@ -1282,6 +1313,7 @@ AG.clear(); } + bool Valid = true; for (AliasGroupTy &AG : AliasGroups) { if (AG.empty()) continue; @@ -1298,11 +1330,21 @@ Locations = isl_union_set_intersect_params(Locations, getAssumedContext()); Locations = isl_union_set_coalesce(Locations); Locations = isl_union_set_detect_equalities(Locations); - isl_union_set_foreach_set(Locations, buildMinMaxAccess, MinMaxAccesses); + Valid = (0 == isl_union_set_foreach_set(Locations, buildMinMaxAccess, + MinMaxAccesses)); isl_union_set_free(Locations); - MinMaxAliasGroups.push_back(MinMaxAccesses); + + if (!Valid) + break; } + + // We limit the compile time internally but large RTCs might still consume a + // a lot of isl operations. This can cause other passes to fail as the quota + // is maxed out. Because of the interal limit we can reset the counter here + // without harm. + isl_ctx_reset_operations(getIslCtx()); + return Valid; } Scop::Scop(TempScop &tempScop, LoopInfo &LI, ScalarEvolution &ScalarEvolution, @@ -1644,9 +1686,28 @@ scop = new Scop(*tempScop, LI, SE, ctx); - if (PollyUseRuntimeAliasChecks) - scop->buildAliasGroups(AA); + if (!PollyUseRuntimeAliasChecks) + return false; + // If a problem occurs while building the alias groups we need to delete + // this SCoP and pretend it wasn't valid in the first place. + if (scop->buildAliasGroups(AA)) + return false; + + --ScopFound; + if (tempScop->getMaxLoopDepth() > 0) + --RichScopFound; + + DEBUG(dbgs() + << "\n\nNOTE: Run time checks for " << scop->getNameStr() + << " could not be created as the number of parameters involved is too " + "hight. The SCoP will be " + "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust the " + "maximal number of parameters but be advised that the compile time " + "might increase exponentially.\n\n"); + + delete scop; + scop = nullptr; return false; } Index: test/ScopInfo/aliasing_many_parameters_not_all_involved.ll =================================================================== --- /dev/null +++ test/ScopInfo/aliasing_many_parameters_not_all_involved.ll @@ -0,0 +1,91 @@ +; RUN: opt %loadPolly -polly-scops -polly-code-generator=isl -polly-rtc-max-parameters=8 -analyze < %s | FileCheck %s --check-prefix=MAX8 +; RUN: opt %loadPolly -polly-scops -polly-code-generator=isl -polly-rtc-max-parameters=7 -analyze < %s | FileCheck %s --check-prefix=MAX7 +; +; Check that we allow this SCoP even though it has 10 parameters involved in posisbly aliasing accesses. +; However, only 7 are involved in accesses through B, 8 through C and none in accesses through A. +; +; MAX8: Printing analysis 'Polly - Create polyhedral description of Scops' for region: 'for.cond => for.end' in function 'jd': +; MAX8-NEXT: Function: jd + +; MAX7: Printing analysis 'Polly - Create polyhedral description of Scops' for region: 'for.cond => for.end' in function 'jd': +; MAX7-NEXT: Invalid Scop! +; +; void jd(int *A, int *B, int *C, long p1, long p2, long p3, long p4, long p5, +; long p6, long p7, long p8, long p9, long p10) { +; for (int i = 0; i < 1024; i++) +; A[i] = B[p1] - B[p2] + B[-p3] - B[p4] + B[p5] - B[-p6] + B[p7] - C[p3] + +; C[-p4] - C[p5] + C[p6] - C[-p7] + C[p8] - C[p9] + C[-p10]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* %A, i32* %B, i32* %C, i64 %p1, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6, i64 %p7, i64 %p8, i64 %p9, i64 %p10) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32* %B, i64 %p1 + %tmp = load i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %B, i64 %p2 + %tmp1 = load i32* %arrayidx1, align 4 + %sub = sub nsw i32 %tmp, %tmp1 + %sub2 = sub nsw i64 0, %p3 + %arrayidx3 = getelementptr inbounds i32* %B, i64 %sub2 + %tmp2 = load i32* %arrayidx3, align 4 + %add = add nsw i32 %sub, %tmp2 + %arrayidx4 = getelementptr inbounds i32* %B, i64 %p4 + %tmp3 = load i32* %arrayidx4, align 4 + %sub5 = sub nsw i32 %add, %tmp3 + %arrayidx6 = getelementptr inbounds i32* %B, i64 %p5 + %tmp4 = load i32* %arrayidx6, align 4 + %add7 = add nsw i32 %sub5, %tmp4 + %sub8 = sub nsw i64 0, %p6 + %arrayidx9 = getelementptr inbounds i32* %B, i64 %sub8 + %tmp5 = load i32* %arrayidx9, align 4 + %sub10 = sub nsw i32 %add7, %tmp5 + %arrayidx11 = getelementptr inbounds i32* %B, i64 %p7 + %tmp6 = load i32* %arrayidx11, align 4 + %add12 = add nsw i32 %sub10, %tmp6 + %arrayidx13 = getelementptr inbounds i32* %C, i64 %p3 + %tmp7 = load i32* %arrayidx13, align 4 + %sub14 = sub nsw i32 %add12, %tmp7 + %sub15 = sub nsw i64 0, %p4 + %arrayidx16 = getelementptr inbounds i32* %C, i64 %sub15 + %tmp8 = load i32* %arrayidx16, align 4 + %add17 = add nsw i32 %sub14, %tmp8 + %arrayidx18 = getelementptr inbounds i32* %C, i64 %p5 + %tmp9 = load i32* %arrayidx18, align 4 + %sub19 = sub nsw i32 %add17, %tmp9 + %arrayidx20 = getelementptr inbounds i32* %C, i64 %p6 + %tmp10 = load i32* %arrayidx20, align 4 + %add21 = add nsw i32 %sub19, %tmp10 + %sub22 = sub nsw i64 0, %p7 + %arrayidx23 = getelementptr inbounds i32* %C, i64 %sub22 + %tmp11 = load i32* %arrayidx23, align 4 + %sub24 = sub nsw i32 %add21, %tmp11 + %arrayidx25 = getelementptr inbounds i32* %C, i64 %p8 + %tmp12 = load i32* %arrayidx25, align 4 + %add26 = add nsw i32 %sub24, %tmp12 + %arrayidx27 = getelementptr inbounds i32* %C, i64 %p9 + %tmp13 = load i32* %arrayidx27, align 4 + %sub28 = sub nsw i32 %add26, %tmp13 + %sub29 = sub nsw i64 0, %p10 + %arrayidx30 = getelementptr inbounds i32* %C, i64 %sub29 + %tmp14 = load i32* %arrayidx30, align 4 + %add31 = add nsw i32 %sub28, %tmp14 + %arrayidx32 = getelementptr inbounds i32* %A, i64 %indvars.iv + store i32 %add31, i32* %arrayidx32, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/run-time-check-many-parameters.ll =================================================================== --- /dev/null +++ test/ScopInfo/run-time-check-many-parameters.ll @@ -0,0 +1,130 @@ +; RUN: opt %loadPolly -polly-scops -polly-code-generator=isl -analyze < %s | FileCheck %s +; +; A valid Scop would print the list of it's statements, we check that we do not +; see that list. +; +; CHECK-NOT: Statements +; +; FIXME: Handling this is an open problem, at the moment we just bail out. +; +; void foo(float *A, float *B, +; long p1, +; long p2, +; long p3, +; long p4, +; long p5, +; long p6, +; long p7, +; long p8, +; long p9, +; long p10, +; long p11, +; long p12) { +; for (long i = 0; i < 100; i++) { +; A[i] = +; B[i + p1] + +; B[i + p2] + +; B[i + p3] + +; B[i + p4] + +; B[i + p5] + +; B[i + p6] + +; B[i + p7] + +; B[i + p8] + +; B[i + p9] + +; B[i + p10] + +; B[i + p11] + +; B[i + p12]; +; } +; } +; +; Computing the minimal and maximal element accessed in B is very expensive. +; Expressing the minimal element itself yields a rather complex isl_pw_aff which +; looks as follows: +; { ... +; MemRef_B[(100 + p11)] : p2 <= -1 + p1 and p3 <= -1 + p1 and p4 <= -1 + p1 +; and p5 <= -1 + p1 and p6 <= -1 + p1 and +; p7 <= -1 + p1 and p8 <= -1 + p1 and p9 <= -1 + p1 +; and p10 <= -1 + p1 and p11 >= p1 and +; p12 <= -1 + p11; +; MemRef_B[(100 + p12)] : p2 <= -1 + p1 and p3 <= -1 + p1 and p4 <= -1 + p1 +; and p5 <= -1 + p1 and p6 <= -1 + p1 and +; p7 <= -1 + p1 and p8 <= -1 + p1 and p9 <= -1 + p1 +; and p10 <= -1 + p1 and p11 <= -1 + p1 and p12 >= p1; +; +; and this isl_pw_aff is then 1:1 translated into a isl ast expression. +; +; In the best case, we would create a run-time check such as: +; +; if (B[99 + max(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12)] < A[0] +; || A[99] B[min(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9])) +; + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @foo(float* %A, float* %B, i64 %p1, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6, i64 %p7, i64 %p8, i64 %p9, i64 %p10, i64 %p11, i64 %p12) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.body + %i.01 = phi i64 [ 0, %entry.split ], [ %tmp25, %for.body ] + %tmp = add i64 %p1, %i.01 + %arrayidx = getelementptr float* %B, i64 %tmp + %tmp2 = add i64 %p2, %i.01 + %arrayidx2 = getelementptr float* %B, i64 %tmp2 + %tmp3 = add i64 %p3, %i.01 + %arrayidx5 = getelementptr float* %B, i64 %tmp3 + %tmp4 = add i64 %p4, %i.01 + %arrayidx8 = getelementptr float* %B, i64 %tmp4 + %tmp5 = add i64 %p5, %i.01 + %arrayidx11 = getelementptr float* %B, i64 %tmp5 + %tmp6 = add i64 %p6, %i.01 + %arrayidx14 = getelementptr float* %B, i64 %tmp6 + %tmp7 = add i64 %p7, %i.01 + %arrayidx17 = getelementptr float* %B, i64 %tmp7 + %tmp8 = add i64 %p8, %i.01 + %arrayidx20 = getelementptr float* %B, i64 %tmp8 + %tmp9 = add i64 %p9, %i.01 + %arrayidx23 = getelementptr float* %B, i64 %tmp9 + %tmp10 = add i64 %p10, %i.01 + %arrayidx26 = getelementptr float* %B, i64 %tmp10 + %tmp11 = add i64 %p11, %i.01 + %arrayidx29 = getelementptr float* %B, i64 %tmp11 + %tmp12 = add i64 %p12, %i.01 + %arrayidx32 = getelementptr float* %B, i64 %tmp12 + %arrayidx34 = getelementptr float* %A, i64 %i.01 + %tmp13 = load float* %arrayidx, align 4 + %tmp14 = load float* %arrayidx2, align 4 + %add3 = fadd float %tmp13, %tmp14 + %tmp15 = load float* %arrayidx5, align 4 + %add6 = fadd float %add3, %tmp15 + %tmp16 = load float* %arrayidx8, align 4 + %add9 = fadd float %add6, %tmp16 + %tmp17 = load float* %arrayidx11, align 4 + %add12 = fadd float %add9, %tmp17 + %tmp18 = load float* %arrayidx14, align 4 + %add15 = fadd float %add12, %tmp18 + %tmp19 = load float* %arrayidx17, align 4 + %add18 = fadd float %add15, %tmp19 + %tmp20 = load float* %arrayidx20, align 4 + %add21 = fadd float %add18, %tmp20 + %tmp21 = load float* %arrayidx23, align 4 + %add24 = fadd float %add21, %tmp21 + %tmp22 = load float* %arrayidx26, align 4 + %add27 = fadd float %add24, %tmp22 + %tmp23 = load float* %arrayidx29, align 4 + %add30 = fadd float %add27, %tmp23 + %tmp24 = load float* %arrayidx32, align 4 + %add33 = fadd float %add30, %tmp24 + store float %add33, float* %arrayidx34, align 4 + %tmp25 = add nsw i64 %i.01, 1 + %exitcond = icmp ne i64 %tmp25, 100 + br i1 %exitcond, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +}