Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -411,6 +411,9 @@ std::string BaseName; + /// @brief Flag to indicate errors during the construction of the statement. + bool IsValid; + /// Build the statement. //@{ __isl_give isl_set *buildConditionSet(const Comparison &Cmp); @@ -437,6 +440,9 @@ BasicBlock &bb, SmallVectorImpl &NestLoops, SmallVectorImpl &Scatter); + /// @brief Check if the construction went without an error. + bool isValid() const { return IsValid; } + friend class Scop; public: @@ -652,6 +658,14 @@ /// @brief Simplify the assumed context. void simplifyAssumedContext(); + /// @brief Check if all statements are valid or if at least one is "corrupt". + bool statementsAreValid() const { + for (ScopStmt *Stmt : *this) + if (!Stmt->isValid()) + return false; + return true; + } + /// Build the Scop and Statement with precalculated scop information. void buildScop(TempScop &TempScop, const Region &CurRegion, // Loops in Scop containing CurRegion Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -778,6 +778,7 @@ Space = isl_set_get_space(Domain); LocalSpace = isl_local_space_from_space(Space); + ScalarEvolution *SE = getParent()->getSE(); for (int i = 0, e = getNumIterators(); i != e; ++i) { isl_aff *Zero = isl_aff_zero_on_domain(isl_local_space_copy(LocalSpace)); isl_pw_aff *IV = @@ -790,6 +791,24 @@ // IV <= LatchExecutions. const Loop *L = getLoopForDimension(i); const SCEV *LatchExecutions = tempScop.getLoopBound(L); + + // In case the numberof latch executions is constant and negative we got a + // wrap around in the number of iterations. We can still proceed when we + // extend the type of the SCEV by at least 1 bit. However, our code + // generation needs to be able to handle the type so we don't do it for + // types > i32 (as i64 is our default/fallback type in code generation). + if (auto *LatchExecutionsCst = dyn_cast(LatchExecutions)) { + if (LatchExecutionsCst->getValue()->isNegative()) { + unsigned TypeSize = LatchExecutions->getType()->getScalarSizeInBits(); + if (TypeSize > 32) { + IsValid = false; + } else { + Type *WidenedType = Type::getIntNTy(SE->getContext(), TypeSize * 2); + LatchExecutions = SE->getZeroExtendExpr(LatchExecutions, WidenedType); + } + } + } + isl_pw_aff *UpperBound = SCEVAffinator::getPwAff(this, LatchExecutions); isl_set *UpperBoundSet = isl_pw_aff_le_set(IV, UpperBound); Domain = isl_set_intersect(Domain, UpperBoundSet); @@ -842,7 +861,8 @@ ScopStmt::ScopStmt(Scop &parent, TempScop &tempScop, const Region &CurRegion, BasicBlock &bb, SmallVectorImpl &Nest, SmallVectorImpl &Scatter) - : Parent(parent), BB(&bb), IVS(Nest.size()), NestLoops(Nest.size()) { + : Parent(parent), BB(&bb), IVS(Nest.size()), NestLoops(Nest.size()), + IsValid(true) { // Setup the induction variables. for (unsigned i = 0, e = Nest.size(); i < e; ++i) { if (!SCEVCodegen) { @@ -1762,35 +1782,34 @@ return false; } - // Statistics. - ++ScopFound; - if (tempScop->getMaxLoopDepth() > 0) - ++RichScopFound; - scop = new Scop(*tempScop, LI, SE, ctx); - if (!PollyUseRuntimeAliasChecks) - return false; - - // If a problem occurs while building the alias groups we need to delete - // this SCoP and pretend it wasn't valid in the first place. - if (scop->buildAliasGroups(AA)) + // If a problem occurs while building the SCoP statements or the alias groups + // we need to delete this SCoP and pretend it wasn't valid in the first place. + bool Valid = scop->statementsAreValid(); + if (PollyUseRuntimeAliasChecks) + Valid = Valid && scop->buildAliasGroups(AA); + + if (!Valid) { + DEBUG( + dbgs() << "There was a problem during the construction of the SCoP. " + "It will not be considered for optimization. Possible " + "reasons include:\n\tRuntime alias checks were to " + "complex/contained to many parameters.\n\t\t Use " + "--polly-rtc-max-parameters=X\n\tto adjust the maximal " + "number of parameters allowed (for the price of compile " + "time).\n\n\tThe loop bound wrapped around for types <= i64"); + + delete scop; + scop = nullptr; return false; + } - --ScopFound; + // Statistics after we know the SCoP is valid. + ++ScopFound; if (tempScop->getMaxLoopDepth() > 0) - --RichScopFound; - - DEBUG(dbgs() - << "\n\nNOTE: Run time checks for " << scop->getNameStr() - << " could not be created as the number of parameters involved is too " - "high. The SCoP will be " - "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust the " - "maximal number of parameters but be advised that the compile time " - "might increase exponentially.\n\n"); - - delete scop; - scop = nullptr; + ++RichScopFound; + return false; } Index: test/Isl/single_loop_uint_max_iterations.ll =================================================================== --- test/Isl/single_loop_uint_max_iterations.ll +++ /dev/null @@ -1,72 +0,0 @@ -; RUN: opt %loadPolly -polly-cloog-scop -S -analyze < %s | FileCheck %s -; XFAIL: * - -;#include "limits.h" -;#define N 20 -; -;int main () { -; unsigned int i; -; unsigned int A[N]; -; -; A[0] = 0; -; -; __sync_synchronize(); -; -; for (i = 0; i < UINT_MAX; i++) -; A[0] = i; -; -; __sync_synchronize(); -; -; if (A[0] == UINT_MAX - 1) -; return 0; -; else -; return 1; -;} - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-unknown-linux-gnu" - -define i32 @main() nounwind { -entry: - %A = alloca [20 x i32], align 4 ; <[20 x i32]*> [#uses=3] - %arraydecay = getelementptr inbounds [20 x i32]* %A, i32 0, i32 0 ; [#uses=1] - %arrayidx = getelementptr inbounds i32* %arraydecay, i64 0 ; [#uses=1] - store i32 0, i32* %arrayidx - fence seq_cst - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] ; [#uses=3] - %exitcond = icmp ne i32 %0, -1 ; [#uses=1] - br i1 %exitcond, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %arraydecay2 = getelementptr inbounds [20 x i32]* %A, i32 0, i32 0 ; [#uses=1] - %arrayidx3 = getelementptr inbounds i32* %arraydecay2, i64 0 ; [#uses=1] - store i32 %0, i32* %arrayidx3 - br label %for.inc - -for.inc: ; preds = %for.body - %inc = add i32 %0, 1 ; [#uses=1] - br label %for.cond - -for.end: ; preds = %for.cond - fence seq_cst - %arraydecay5 = getelementptr inbounds [20 x i32]* %A, i32 0, i32 0 ; [#uses=1] - %arrayidx6 = getelementptr inbounds i32* %arraydecay5, i64 0 ; [#uses=1] - %tmp7 = load i32* %arrayidx6 ; [#uses=1] - %cmp8 = icmp eq i32 %tmp7, -2 ; [#uses=1] - br i1 %cmp8, label %if.then, label %if.else - -if.then: ; preds = %for.end - br label %return - -if.else: ; preds = %for.end - br label %return - -return: ; preds = %if.else, %if.then - %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.else ] ; [#uses=1] - ret i32 %retval.0 -} - -; CHECK:for (c2=0; Index: test/Isl/single_loop_ull_max_iterations.ll =================================================================== --- test/Isl/single_loop_ull_max_iterations.ll +++ /dev/null @@ -1,72 +0,0 @@ -; RUN: opt %loadPolly -polly-cloog-scop -S -analyze < %s | FileCheck %s -; XFAIL: * - -;#include "limits.h" -;#define N 20 -; -;int main () { -; unsigned long long i; -; unsigned long long A[N]; -; -; A[0] = 0; -; -; __sync_synchronize(); -; -; for (i = 0; i < ULLONG_MAX; i++) -; A[0] = i; -; -; __sync_synchronize(); -; -; if (A[0] == ULLONG_MAX - 1) -; return 0; -; else -; return 1; -;} - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-unknown-linux-gnu" - -define i32 @main() nounwind { -entry: - %A = alloca [20 x i64], align 8 ; <[20 x i64]*> [#uses=3] - %arraydecay = getelementptr inbounds [20 x i64]* %A, i32 0, i32 0 ; [#uses=1] - %arrayidx = getelementptr inbounds i64* %arraydecay, i64 0 ; [#uses=1] - store i64 0, i64* %arrayidx - fence seq_cst - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] ; [#uses=3] - %exitcond = icmp ne i64 %0, -1 ; [#uses=1] - br i1 %exitcond, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %arraydecay2 = getelementptr inbounds [20 x i64]* %A, i32 0, i32 0 ; [#uses=1] - %arrayidx3 = getelementptr inbounds i64* %arraydecay2, i64 0 ; [#uses=1] - store i64 %0, i64* %arrayidx3 - br label %for.inc - -for.inc: ; preds = %for.body - %inc = add i64 %0, 1 ; [#uses=1] - br label %for.cond - -for.end: ; preds = %for.cond - fence seq_cst - %arraydecay5 = getelementptr inbounds [20 x i64]* %A, i32 0, i32 0 ; [#uses=1] - %arrayidx6 = getelementptr inbounds i64* %arraydecay5, i64 0 ; [#uses=1] - %tmp7 = load i64* %arrayidx6 ; [#uses=1] - %cmp8 = icmp eq i64 %tmp7, -2 ; [#uses=1] - br i1 %cmp8, label %if.then, label %if.else - -if.then: ; preds = %for.end - br label %return - -if.else: ; preds = %for.end - br label %return - -return: ; preds = %if.else, %if.then - %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.else ] ; [#uses=1] - ret i32 %retval.0 -} - -; CHECK:for (c2=0; Index: test/ScopInfo/negative_loop_bound_int_type.ll =================================================================== --- /dev/null +++ test/ScopInfo/negative_loop_bound_int_type.ll @@ -0,0 +1,38 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; The loop bound presented by Scalar evolution will wrap around (as it is +; signed) and we will see -1. However that would lead to an empty domain, +; thus wrong code. To fix this we promote the loop bound to i64 and proceed. +; +; CHECK: Stmt_for_body +; CHECK: Domain := +; CHECK: { Stmt_for_body[i0] : i0 >= 0 and i0 <= 4294967295 }; +; +; void jd(int *A) { +; for (int i = 0; i < (unsigned)-1; i++) +; A[i] = i; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i32 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp eq i32 %indvars.iv, -1 + br i1 %exitcond, label %for.end, label %for.body + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32* %A, i32 %indvars.iv + store i32 %indvars.iv, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/negative_loop_bound_long_type.ll =================================================================== --- /dev/null +++ test/ScopInfo/negative_loop_bound_long_type.ll @@ -0,0 +1,37 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; The loop bound presented by Scalar evolution will wrap around (as it is +; signed) and we will see -1. However that would lead to an empty domain, +; thus wrong code. To fix this we would need promote the loop bound to i128 but +; our code generation will fail, thus we bail out. +; +; CHECK-NOT: Domain +; +; void jd(long int *A) { +; for (long int i = 0; i < (long unsigned)-1; i++) +; A[i] = i; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i64* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond = icmp eq i64 %i.0, -1 + br i1 %exitcond, label %for.end, label %for.body + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i64* %A, i64 %i.0 + store i64 %i.0, i64* %arrayidx, align 8 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i64 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/negative_loop_bound_small_type.ll =================================================================== --- /dev/null +++ test/ScopInfo/negative_loop_bound_small_type.ll @@ -0,0 +1,47 @@ +; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Stmt_body_load +; CHECK: Domain := +; CHECK: { Stmt_body_load[i0] : i0 >= 0 and i0 <= 127 }; +; CHECK: Stmt_body_store +; CHECK: Domain := +; CHECK: { Stmt_body_store[i0] : i0 >= 0 and i0 <= 127 }; +; +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +@A = common global [128 x i32] zeroinitializer, align 16 + +define void @test() nounwind uwtable { +preheader: + br label %header + +header: + %i = phi i7 [ 0, %preheader ], [ %i.1, %body.incr ] + br label %body.addr + +body.addr: + %tmp = zext i7 %i to i64 + %A.addr = getelementptr [128 x i32]* @A, i64 0, i64 %tmp + br label %body.load + +body.load: + %A.load = load i32* %A.addr, align 4 + br label %body.comp + +body.comp: + %A.inc = zext i7 %i to i32 + %A.val = add nsw i32 %A.load, %A.inc + br label %body.store + +body.store: + store i32 %A.val, i32* %A.addr, align 4 + br label %body.incr + +body.incr: + %i.1 = add i7 %i, 1 + %exitcond = icmp eq i7 %i.1, 0 + br i1 %exitcond, label %exit, label %header + +exit: + ret void +}