Index: include/polly/Support/SCEVAffinator.h =================================================================== --- include/polly/Support/SCEVAffinator.h +++ include/polly/Support/SCEVAffinator.h @@ -113,6 +113,10 @@ /// @returns The isl representation @p PWAC with a posisbly adjusted domain. __isl_give PWACtx checkForWrapping(const llvm::SCEV *Expr, PWACtx PWAC) const; + /// Whether to track the value of this expression precisely, rather than + /// assuming it won't wrap. + bool isPrecise(const llvm::SCEV *Expr); + __isl_give PWACtx visit(const llvm::SCEV *E); __isl_give PWACtx visitConstant(const llvm::SCEVConstant *E); __isl_give PWACtx visitTruncateExpr(const llvm::SCEVTruncateExpr *E); Index: lib/Support/SCEVAffinator.cpp =================================================================== --- lib/Support/SCEVAffinator.cpp +++ lib/Support/SCEVAffinator.cpp @@ -36,22 +36,13 @@ // compile time. static int const MaxDisjunctionsInPwAff = 100; -// The maximal number of bits for which a zero-extend is modeled precisely. -static unsigned const MaxZextSmallBitWidth = 7; +// The maximal number of bits for which a general expression is modeled +// precisely. +static unsigned const MaxSmallBitWidth = 7; // The maximal number of bits for which a truncate is modeled precisely. static unsigned const MaxTruncateSmallBitWidth = 31; -/// Return true if a zero-extend from @p Width bits is precisely modeled. -static bool isPreciseZeroExtend(unsigned Width) { - return Width <= MaxZextSmallBitWidth; -} - -/// Return true if a truncate from @p Width bits is precisely modeled. -static bool isPreciseTruncate(unsigned Width) { - return Width <= MaxTruncateSmallBitWidth; -} - /// Add the number of basic sets in @p Domain to @p User static isl_stat addNumBasicSets(__isl_take isl_set *Domain, __isl_take isl_aff *Aff, void *User) { @@ -99,26 +90,6 @@ PWAC0.second = isl_set_union(PWAC0.second, PWAC1.second); } -/// Set the possible wrapping of @p Expr to @p Flags. -static const SCEV *setNoWrapFlags(ScalarEvolution &SE, const SCEV *Expr, - SCEV::NoWrapFlags Flags) { - auto *NAry = dyn_cast(Expr); - if (!NAry) - return Expr; - - SmallVector Ops(NAry->op_begin(), NAry->op_end()); - switch (Expr->getSCEVType()) { - case scAddExpr: - return SE.getAddExpr(Ops, Flags); - case scMulExpr: - return SE.getMulExpr(Ops, Flags); - case scAddRecExpr: - return SE.getAddRecExpr(Ops, cast(Expr)->getLoop(), Flags); - default: - return Expr; - } -} - static __isl_give isl_pw_aff *getWidthExpValOnDomain(unsigned Width, __isl_take isl_set *Dom) { auto *Ctx = isl_set_get_ctx(Dom); @@ -241,6 +212,13 @@ return false; } +bool SCEVAffinator::isPrecise(const SCEV *Expr) { + unsigned Width = TD.getTypeSizeInBits(Expr->getType()); + if (isa(Expr)) + return Width <= MaxTruncateSmallBitWidth; + return Width <= MaxSmallBitWidth; +} + __isl_give PWACtx SCEVAffinator::visit(const SCEV *Expr) { auto Key = std::make_pair(Expr, BB); @@ -267,11 +245,16 @@ PWAC = getPWACtxFromPWA(isl_pw_aff_alloc(Domain, Affine)); } else { PWAC = SCEVVisitor::visit(Expr); + if (isPrecise(Expr)) + PWAC.first = addModuloSemantic(PWAC.first, Expr->getType()); PWAC = checkForWrapping(Expr, PWAC); } - if (!Factor->getType()->isIntegerTy(1)) + if (!Factor->isOne()) { combine(PWAC, visitConstant(Factor), isl_pw_aff_mul); + if (isPrecise(Expr)) + PWAC.first = addModuloSemantic(PWAC.first, Expr->getType()); + } // For compile time reasons we need to simplify the PWAC before we cache and // return it. @@ -314,12 +297,9 @@ auto OpPWAC = visit(Op); unsigned Width = TD.getTypeSizeInBits(Expr->getType()); - bool Precise = isPreciseTruncate(Width); - if (Precise) { - OpPWAC.first = addModuloSemantic(OpPWAC.first, Expr->getType()); + if (isPrecise(Expr)) return OpPWAC; - } auto *Dom = isl_pw_aff_domain(isl_pw_aff_copy(OpPWAC.first)); auto *ExpPWA = getWidthExpValOnDomain(Width - 1, Dom); @@ -382,13 +362,10 @@ auto *Op = Expr->getOperand(); unsigned Width = TD.getTypeSizeInBits(Op->getType()); - bool Precise = isPreciseZeroExtend(Width); + bool Precise = isPrecise(Op); auto Flags = getNoWrapFlags(Op); - auto NoWrapFlags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); bool OpCanWrap = Precise && !(Flags & SCEV::FlagNSW); - if (OpCanWrap) - Op = setNoWrapFlags(SE, Op, NoWrapFlags); auto OpPWAC = visit(Op); if (OpCanWrap) Index: test/ScopInfo/bool-addrec.ll =================================================================== --- /dev/null +++ test/ScopInfo/bool-addrec.ll @@ -0,0 +1,50 @@ +; RUN: opt -analyze -polly-ast -polly-process-unprofitable < %s | FileCheck %s + +; CHECK: for (int c0 = 0; c0 <= 19999; c0 += 1) { +; CHECK-NEXT: if (c0 % 2 == 0) +; CHECK-NEXT: Stmt_if_then(c0); +; CHECK-NEXT: Stmt_if_end(c0); +; CHECK-NEXT: if (c0 % 2 == 0) +; CHECK-NEXT: Stmt_if_then5(c0); +; CHECK-NEXT: } + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64" +target triple = "armv4t--linux-gnueabi" + +define void @f(i32* %a, i32 %x) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.inc + %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %rem1 = and i32 %i.03, 1 + %cmp1 = icmp eq i32 %rem1, 0 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.03 + store i32 3, i32* %arrayidx, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + %mul = shl nsw i32 %i.03, 1 + %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %mul + store i32 3, i32* %arrayidx2, align 4 + %rem32 = and i32 %i.03, 1 + %cmp4 = icmp eq i32 %rem32, 0 + br i1 %cmp4, label %if.then5, label %for.inc + +if.then5: ; preds = %if.end + %mul6 = mul nsw i32 %i.03, 3 + %arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %mul6 + store i32 3, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: ; preds = %if.end, %if.then5 + %inc = add nsw i32 %i.03, 1 + %cmp = icmp slt i32 %inc, 20000 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.inc + ret void +} Index: test/ScopInfo/infeasible-rtc.ll =================================================================== --- test/ScopInfo/infeasible-rtc.ll +++ test/ScopInfo/infeasible-rtc.ll @@ -12,22 +12,20 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -@A = common global [128 x i32] zeroinitializer, align 16 - -define void @test() nounwind uwtable { +define void @test(i64* %a) nounwind uwtable { preheader: br label %header header: - %i = phi i7 [ 0, %preheader ], [ %i.1, %header ] - %tmp = zext i7 %i to i64 - %A.addr = getelementptr [128 x i32], [128 x i32]* @A, i64 0, i64 %tmp - %A.load = load i32, i32* %A.addr, align 4 - %A.inc = zext i7 %i to i32 - %A.val = add nsw i32 %A.load, %A.inc - store i32 %A.val, i32* %A.addr, align 4 - %i.1 = add i7 %i, 1 - %exitcond = icmp eq i7 %i.1, 0 + %i = phi i56 [ 0, %preheader ], [ %i.1, %header ] + %tmp = zext i56 %i to i64 + %A.addr = getelementptr i64, i64* %a, i64 %tmp + %A.load = load i64, i64* %A.addr, align 4 + %A.inc = zext i56 %i to i64 + %A.val = add nsw i64 %A.load, %A.inc + store i64 %A.val, i64* %A.addr, align 4 + %i.1 = add i56 %i, 1 + %exitcond = icmp eq i56 %i.1, 0 br i1 %exitcond, label %exit, label %header exit: Index: test/ScopInfo/integers.ll =================================================================== --- test/ScopInfo/integers.ll +++ test/ScopInfo/integers.ll @@ -112,7 +112,7 @@ %indvar.next = add nsw i3 %indvar, 1 %sub = sub i3 %n, 3 ; CHECK: 'bb => return' in function 'f6' -; CHECK: [n] -> { Stmt_bb[0] : n = 3 }; +; CHECK: [n] -> { Stmt_bb[i0] : i0 >= 0 and 8*floor((5 + n)/8) <= 5 + n - i0 }; %exitcond = icmp eq i3 %indvar, %sub br i1 %exitcond, label %return, label %bb Index: test/ScopInfo/zero_ext_of_truncate.ll =================================================================== --- test/ScopInfo/zero_ext_of_truncate.ll +++ test/ScopInfo/zero_ext_of_truncate.ll @@ -12,10 +12,12 @@ ; CHECK: Assumed Context: ; CHECK-NEXT: [N, tmp, M] -> { : } ; CHECK-NEXT: Invalid Context: -; CHECK-NEXT: [N, tmp, M] -> { : N < 0 or (N > 0 and M < 0) or (N > 0 and 256*floor((128 + tmp)/256) > tmp) } +; CHECK-NEXT: [N, tmp, M] -> { : N < 0 or (N > 0 and M < 0) } ; ; CHECK: Domain := -; CHECK-NEXT: [N, tmp, M] -> { Stmt_if_then[i0] : 0 <= i0 < N and 256*floor((128 + tmp)/256) > tmp - M }; +; CHECK-NEXT: [N, tmp, M] -> { Stmt_if_then[i0] : 0 <= i0 < N and +; CHECK-SAME: 256*floor((128 + tmp)/256) > tmp - M and ((256*floor((128 + tmp)/256) > tmp and +; CHECK-SAME: 256*floor((128 + tmp)/256) >= 257 + tmp - M) or 256*floor((128 + tmp)/256) <= tmp) }; ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"