Index: polly/trunk/include/polly/Support/SCEVAffinator.h =================================================================== --- polly/trunk/include/polly/Support/SCEVAffinator.h +++ polly/trunk/include/polly/Support/SCEVAffinator.h @@ -113,6 +113,10 @@ /// @returns The isl representation @p PWAC with a posisbly adjusted domain. __isl_give PWACtx checkForWrapping(const llvm::SCEV *Expr, PWACtx PWAC) const; + /// Whether to track the value of this expression precisely, rather than + /// assuming it won't wrap. + bool computeModuloForExpr(const llvm::SCEV *Expr); + __isl_give PWACtx visit(const llvm::SCEV *E); __isl_give PWACtx visitConstant(const llvm::SCEVConstant *E); __isl_give PWACtx visitTruncateExpr(const llvm::SCEVTruncateExpr *E); Index: polly/trunk/lib/Support/SCEVAffinator.cpp =================================================================== --- polly/trunk/lib/Support/SCEVAffinator.cpp +++ polly/trunk/lib/Support/SCEVAffinator.cpp @@ -36,21 +36,9 @@ // compile time. static int const MaxDisjunctionsInPwAff = 100; -// The maximal number of bits for which a zero-extend is modeled precisely. -static unsigned const MaxZextSmallBitWidth = 7; - -// The maximal number of bits for which a truncate is modeled precisely. -static unsigned const MaxTruncateSmallBitWidth = 31; - -/// Return true if a zero-extend from @p Width bits is precisely modeled. -static bool isPreciseZeroExtend(unsigned Width) { - return Width <= MaxZextSmallBitWidth; -} - -/// Return true if a truncate from @p Width bits is precisely modeled. -static bool isPreciseTruncate(unsigned Width) { - return Width <= MaxTruncateSmallBitWidth; -} +// The maximal number of bits for which a general expression is modeled +// precisely. +static unsigned const MaxSmallBitWidth = 7; /// Add the number of basic sets in @p Domain to @p User static isl_stat addNumBasicSets(__isl_take isl_set *Domain, @@ -99,26 +87,6 @@ PWAC0.second = isl_set_union(PWAC0.second, PWAC1.second); } -/// Set the possible wrapping of @p Expr to @p Flags. -static const SCEV *setNoWrapFlags(ScalarEvolution &SE, const SCEV *Expr, - SCEV::NoWrapFlags Flags) { - auto *NAry = dyn_cast(Expr); - if (!NAry) - return Expr; - - SmallVector Ops(NAry->op_begin(), NAry->op_end()); - switch (Expr->getSCEVType()) { - case scAddExpr: - return SE.getAddExpr(Ops, Flags); - case scMulExpr: - return SE.getMulExpr(Ops, Flags); - case scAddRecExpr: - return SE.getAddRecExpr(Ops, cast(Expr)->getLoop(), Flags); - default: - return Expr; - } -} - static __isl_give isl_pw_aff *getWidthExpValOnDomain(unsigned Width, __isl_take isl_set *Dom) { auto *Ctx = isl_set_get_ctx(Dom); @@ -241,6 +209,15 @@ return false; } +bool SCEVAffinator::computeModuloForExpr(const SCEV *Expr) { + unsigned Width = TD.getTypeSizeInBits(Expr->getType()); + // We assume nsw expressions never overflow. + if (auto *NAry = dyn_cast(Expr)) + if (NAry->getNoWrapFlags() & SCEV::FlagNSW) + return false; + return Width <= MaxSmallBitWidth; +} + __isl_give PWACtx SCEVAffinator::visit(const SCEV *Expr) { auto Key = std::make_pair(Expr, BB); @@ -267,16 +244,23 @@ PWAC = getPWACtxFromPWA(isl_pw_aff_alloc(Domain, Affine)); } else { PWAC = SCEVVisitor::visit(Expr); - PWAC = checkForWrapping(Expr, PWAC); + if (computeModuloForExpr(Expr)) + PWAC.first = addModuloSemantic(PWAC.first, Expr->getType()); + else + PWAC = checkForWrapping(Expr, PWAC); } - if (!Factor->getType()->isIntegerTy(1)) + if (!Factor->getType()->isIntegerTy(1)) { combine(PWAC, visitConstant(Factor), isl_pw_aff_mul); + if (computeModuloForExpr(Key.first)) + PWAC.first = addModuloSemantic(PWAC.first, Expr->getType()); + } // For compile time reasons we need to simplify the PWAC before we cache and // return it. PWAC.first = isl_pw_aff_coalesce(PWAC.first); - PWAC = checkForWrapping(Key.first, PWAC); + if (!computeModuloForExpr(Key.first)) + PWAC = checkForWrapping(Key.first, PWAC); CachedExpressions[Key] = copyPWACtx(PWAC); return PWAC; @@ -314,12 +298,9 @@ auto OpPWAC = visit(Op); unsigned Width = TD.getTypeSizeInBits(Expr->getType()); - bool Precise = isPreciseTruncate(Width); - if (Precise) { - OpPWAC.first = addModuloSemantic(OpPWAC.first, Expr->getType()); + if (computeModuloForExpr(Expr)) return OpPWAC; - } auto *Dom = isl_pw_aff_domain(isl_pw_aff_copy(OpPWAC.first)); auto *ExpPWA = getWidthExpValOnDomain(Width - 1, Dom); @@ -380,28 +361,17 @@ // assumptions and assume the "former negative" piece will not exist. auto *Op = Expr->getOperand(); - unsigned Width = TD.getTypeSizeInBits(Op->getType()); - - bool Precise = isPreciseZeroExtend(Width); - - auto Flags = getNoWrapFlags(Op); - auto NoWrapFlags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); - bool OpCanWrap = Precise && !(Flags & SCEV::FlagNSW); - if (OpCanWrap) - Op = setNoWrapFlags(SE, Op, NoWrapFlags); - auto OpPWAC = visit(Op); - if (OpCanWrap) - OpPWAC.first = addModuloSemantic(OpPWAC.first, Op->getType()); // If the width is to big we assume the negative part does not occur. - if (!Precise) { + if (!computeModuloForExpr(Op)) { takeNonNegativeAssumption(OpPWAC); return OpPWAC; } // If the width is small build the piece for the non-negative part and // the one for the negative part and unify them. + unsigned Width = TD.getTypeSizeInBits(Op->getType()); interpretAsUnsigned(OpPWAC, Width); return OpPWAC; } Index: polly/trunk/test/ScopInfo/infeasible-rtc.ll =================================================================== --- polly/trunk/test/ScopInfo/infeasible-rtc.ll +++ polly/trunk/test/ScopInfo/infeasible-rtc.ll @@ -4,24 +4,47 @@ ; RUN: opt %loadPolly -polly-scops -analyze < %s \ ; RUN: | FileCheck %s -check-prefix=SCOPS -; DETECT: Valid Region for Scop: header => exit -; SCOPS-NOT: Region: %header---%exit +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; DETECT: Valid Region for Scop: test1.header => test1.exit +; SCOPS-NOT: Region: %test1.header---%test1.exit ; Verify that we detect this scop, but that, due to an infeasible run-time ; check, we refuse to model it. -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +define void @test(i64* %a) nounwind uwtable { +preheader: + br label %test1.header + +test1.header: + %i = phi i56 [ 0, %preheader ], [ %i.1, %test1.header ] + %tmp = zext i56 %i to i64 + %A.addr = getelementptr i64, i64* %a, i64 %tmp + %A.load = load i64, i64* %A.addr, align 4 + %A.inc = zext i56 %i to i64 + %A.val = add nsw i64 %A.load, %A.inc + store i64 %A.val, i64* %A.addr, align 4 + %i.1 = add i56 %i, 1 + %exitcond = icmp eq i56 %i.1, 0 + br i1 %exitcond, label %test1.exit, label %test1.header + +test1.exit: + ret void +} + +; Old version of the previous test; make sure we compute the trip count +; correctly. -@A = common global [128 x i32] zeroinitializer, align 16 +; SCOPS: { Stmt_header[i0] : 0 <= i0 <= 127 }; -define void @test() nounwind uwtable { +define void @test2([128 x i32]* %a) nounwind uwtable { preheader: br label %header header: %i = phi i7 [ 0, %preheader ], [ %i.1, %header ] %tmp = zext i7 %i to i64 - %A.addr = getelementptr [128 x i32], [128 x i32]* @A, i64 0, i64 %tmp + %A.addr = getelementptr [128 x i32], [128 x i32]* %a, i64 0, i64 %tmp %A.load = load i32, i32* %A.addr, align 4 %A.inc = zext i7 %i to i32 %A.val = add nsw i32 %A.load, %A.inc Index: polly/trunk/test/ScopInfo/integers.ll =================================================================== --- polly/trunk/test/ScopInfo/integers.ll +++ polly/trunk/test/ScopInfo/integers.ll @@ -111,8 +111,24 @@ store i3 %indvar, i3* %scevgep, align 8 %indvar.next = add nsw i3 %indvar, 1 %sub = sub i3 %n, 3 -; CHECK: 'bb => return' in function 'f6' -; CHECK: [n] -> { Stmt_bb[0] : n = 3 }; +; CHECK-LABEL: 'bb => return' in function 'f6' +; CHECK: Context: +; CHECK-NEXT: [n] -> { : -4 <= n <= 3 } +; CHECK-NEXT: Assumed Context: +; CHECK-NEXT: [n] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [n] -> { : 1 = 0 } + +; CHECK: Statements { +; CHECK-NEXT: Stmt_bb +; CHECK-NEXT: Domain := +; CHECK-NEXT: [n] -> { Stmt_bb[i0] : i0 >= 0 and 8*floor((2 - n)/8) >= -5 - n + i0 and 8*floor((2 - n)/8) <= -2 - n }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [n] -> { Stmt_bb[i0] -> [i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bb[i0] -> MemRef_a[i0] }; +; CHECK-NEXT:} + %exitcond = icmp eq i3 %indvar, %sub br i1 %exitcond, label %return, label %bb Index: polly/trunk/test/ScopInfo/truncate-1.ll =================================================================== --- polly/trunk/test/ScopInfo/truncate-1.ll +++ polly/trunk/test/ScopInfo/truncate-1.ll @@ -5,13 +5,14 @@ ; A[i]++; ; } ; +; FIXME: We should the truncate precisely... or just make it a separate parameter. ; CHECK: Assumed Context: ; CHECK-NEXT: [N] -> { : } ; CHECK-NEXT: Invalid Context: -; CHECK-NEXT: [N] -> { : 1 = 0 } +; CHECK-NEXT: [N] -> { : N <= -129 or N >= 128 } ; ; CHECK: Domain := -; CHECK-NEXT: [N] -> { Stmt_for_body[i0] : i0 >= 0 and 256*floor((128 + N)/256) < N - i0 }; +; CHECK-NEXT: [N] -> { Stmt_for_body[i0] : 0 <= i0 < N }; ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: polly/trunk/test/ScopInfo/truncate-2.ll =================================================================== --- polly/trunk/test/ScopInfo/truncate-2.ll +++ polly/trunk/test/ScopInfo/truncate-2.ll @@ -5,15 +5,16 @@ ; A[(char)(N)]++; ; } ; +; FIXME: We should the truncate precisely... or just make it a separate parameter. ; CHECK: Assumed Context: ; CHECK-NEXT: [N] -> { : } ; CHECK-NEXT: Invalid Context: -; CHECK-NEXT: [N] -> { : 1 = 0 } +; CHECK-NEXT: [N] -> { : N >= 128 } ; ; CHECK: ReadAccess := [Reduction Type: +] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : 256*floor((-N + o0)/256) = -N + o0 and -128 <= o0 <= 127 }; +; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> MemRef_A[N] }; ; CHECK-NEXT: MustWriteAccess := [Reduction Type: +] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : 256*floor((-N + o0)/256) = -N + o0 and -128 <= o0 <= 127 }; +; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> MemRef_A[N] }; ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: polly/trunk/test/ScopInfo/zero_ext_of_truncate.ll =================================================================== --- polly/trunk/test/ScopInfo/zero_ext_of_truncate.ll +++ polly/trunk/test/ScopInfo/zero_ext_of_truncate.ll @@ -9,13 +9,14 @@ ; } ; } ; +; FIXME: The truncated value should be a paramter. ; CHECK: Assumed Context: ; CHECK-NEXT: [N, tmp, M] -> { : } ; CHECK-NEXT: Invalid Context: -; CHECK-NEXT: [N, tmp, M] -> { : N < 0 or (N > 0 and M < 0) or (N > 0 and 256*floor((128 + tmp)/256) > tmp) } +; CHECK-NEXT: [N, tmp, M] -> { : N < 0 or (N > 0 and tmp >= 128) or (N > 0 and tmp < 0) or (N > 0 and M < 0) } ; ; CHECK: Domain := -; CHECK-NEXT: [N, tmp, M] -> { Stmt_if_then[i0] : 0 <= i0 < N and 256*floor((128 + tmp)/256) > tmp - M }; +; CHECK-NEXT: [N, tmp, M] -> { Stmt_if_then[i0] : M > tmp and 0 <= i0 < N }; ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"