Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -43,6 +43,7 @@ struct isl_basic_map; struct isl_id; struct isl_set; +struct isl_pw_aff; struct isl_union_set; struct isl_union_map; struct isl_space; @@ -656,6 +657,12 @@ /// @param NewDomain The new statement domain. void restrictDomain(__isl_take isl_set *NewDomain); + /// @brief Compute the isl representation for @p S. + /// + /// This will compute the isl representation for @p S and also restrict the + /// context of the SCoP accordingly if a computation in @p S could wrap. + __isl_give isl_pw_aff *getPwAff(const SCEV *S); + /// @brief Get the loop for a dimension. /// /// @param Dimension The dimension of the induction variable Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -79,19 +79,33 @@ public: /// @brief Translate a 'const SCEV *' to an isl_pw_aff. /// - /// @param Stmt The location at which the scalar evolution expression - /// is evaluated. - /// @param Expr The expression that is translated. - static __isl_give isl_pw_aff *getPwAff(ScopStmt *Stmt, const SCEV *Expr); + /// @param Stmt The location at which the scalar evolution expression + /// is evaluated. + /// @param Expr The expression that is translated. + /// @param UseModulo Flag to indicate that modulo semantic should be used. + static __isl_give isl_pw_aff *getPwAff(ScopStmt *Stmt, const SCEV *Expr, + bool UseModulo = false); private: isl_ctx *Ctx; int NbLoopSpaces; const Scop *S; - SCEVAffinator(const ScopStmt *Stmt); + /// @brief Flag to indicate that modulo semantic should be used. + const bool UseModulo; + + SCEVAffinator(const ScopStmt *Stmt, bool UseModulo); int getLoopDepth(const Loop *L); + /// @brief Compute the non-wrapping version of @p PWA for the type of @p Expr. + /// + /// @param PWA The piece-wise affine function that might wrap. + /// @param Expr The SCEV that was translated to @p PWA. + /// @param Flags The nsw/nuw flags of the operation. + __isl_give isl_pw_aff *addModuloSemantic(__isl_take isl_pw_aff *PWA, + const SCEV *Expr, + SCEV::NoWrapFlags Flags); + __isl_give isl_pw_aff *visit(const SCEV *Expr); __isl_give isl_pw_aff *visitConstant(const SCEVConstant *Expr); __isl_give isl_pw_aff *visitTruncateExpr(const SCEVTruncateExpr *Expr); @@ -109,21 +123,56 @@ friend struct SCEVVisitor; }; -SCEVAffinator::SCEVAffinator(const ScopStmt *Stmt) +SCEVAffinator::SCEVAffinator(const ScopStmt *Stmt, bool UseModulo) : Ctx(Stmt->getIslCtx()), NbLoopSpaces(Stmt->getNumIterators()), - S(Stmt->getParent()) {} + S(Stmt->getParent()), UseModulo(UseModulo) {} -__isl_give isl_pw_aff *SCEVAffinator::getPwAff(ScopStmt *Stmt, - const SCEV *Scev) { +__isl_give isl_pw_aff *SCEVAffinator::getPwAff(ScopStmt *Stmt, const SCEV *Scev, + bool UseModulo) { Scop *S = Stmt->getParent(); const Region *Reg = &S->getRegion(); S->addParams(getParamsInAffineExpr(Reg, Scev, *S->getSE())); - SCEVAffinator Affinator(Stmt); + SCEVAffinator Affinator(Stmt, UseModulo); return Affinator.visit(Scev); } +__isl_give isl_pw_aff * +SCEVAffinator::addModuloSemantic(isl_pw_aff *PWA, const SCEV *Expr, + SCEV::NoWrapFlags Flags) { + // If the SCEV flags do contain NSW (no signed wrap) then PWA already + // represents Expr in modulo semantic (it cannot overflow), thus we are done. + // Otherwise, we will compute: + // PWA = ((PWA + 2^(n-1)) mod (2 ^ n)) - 2^(n-1) + // whereas n is the number of bits of the Expr, hence: + // n = bitwidth(type(Expr)) + + if (Flags & SCEV::FlagNSW) + return PWA; + + assert(Expr->getType()->isIntegerTy() && "SCEV did not have integer type"); + + unsigned Width = Expr->getType()->getIntegerBitWidth(); + isl_ctx *Ctx = isl_pw_aff_get_ctx(PWA); + + isl_set *Domain = isl_pw_aff_domain(isl_pw_aff_copy(PWA)); + + isl_val *ModVal = isl_val_int_from_ui(Ctx, Width); + ModVal = isl_val_2exp(ModVal); + + isl_val *AddVal = isl_val_int_from_ui(Ctx, Width - 1); + AddVal = isl_val_2exp(AddVal); + + isl_pw_aff *AddPW = isl_pw_aff_val_on_domain(Domain, AddVal); + + PWA = isl_pw_aff_add(PWA, isl_pw_aff_copy(AddPW)); + PWA = isl_pw_aff_mod_val(PWA, ModVal); + PWA = isl_pw_aff_sub(PWA, AddPW); + + return PWA; +} + __isl_give isl_pw_aff *SCEVAffinator::visit(const SCEV *Expr) { // In case the scev is a valid parameter, we do not further analyze this // expression, but create a new parameter in the isl_pw_aff. This allows us @@ -189,7 +238,8 @@ Sum = isl_pw_aff_add(Sum, NextSummand); } - // TODO: Check for NSW and NUW. + if (UseModulo) + Sum = addModuloSemantic(Sum, Expr, Expr->getNoWrapFlags()); return Sum; } @@ -202,8 +252,13 @@ // only of parameters and we will stop in the visit(const SCEV *) function and // return the isl_pw_aff for that parameter. auto ConstantAndLeftOverPair = extractConstantFactor(Expr, *S->getSE()); - return isl_pw_aff_mul(visit(ConstantAndLeftOverPair.first), - visit(ConstantAndLeftOverPair.second)); + isl_pw_aff *MulPWA = isl_pw_aff_mul(visit(ConstantAndLeftOverPair.first), + visit(ConstantAndLeftOverPair.second)); + + if (UseModulo) + MulPWA = addModuloSemantic(MulPWA, Expr, Expr->getNoWrapFlags()); + + return MulPWA; } __isl_give isl_pw_aff *SCEVAffinator::visitUDivExpr(const SCEVUDivExpr *Expr) { @@ -232,8 +287,12 @@ isl_aff_zero_on_domain(LocalSpace), isl_dim_in, loopDimension, 1); isl_pw_aff *LPwAff = isl_pw_aff_from_aff(LAff); - // TODO: Do we need to check for NSW and NUW? - return isl_pw_aff_add(Start, isl_pw_aff_mul(Step, LPwAff)); + isl_pw_aff *PWA = isl_pw_aff_add(Start, isl_pw_aff_mul(Step, LPwAff)); + + if (UseModulo) + PWA = addModuloSemantic(PWA, Expr, Flags); + + return PWA; } // Translate AddRecExpr from '{start, +, inc}' into 'start + {0, +, inc}' @@ -249,7 +308,12 @@ isl_pw_aff *ZeroStartResult = visit(ZeroStartExpr); isl_pw_aff *Start = visit(Expr->getStart()); - return isl_pw_aff_add(ZeroStartResult, Start); + isl_pw_aff *PWA = isl_pw_aff_add(ZeroStartResult, Start); + + if (UseModulo) + PWA = addModuloSemantic(PWA, Expr, Flags); + + return PWA; } __isl_give isl_pw_aff *SCEVAffinator::visitSMaxExpr(const SCEVSMaxExpr *Expr) { @@ -652,8 +716,7 @@ AccessRelation = isl_map_universe(Space); for (int i = 0, Size = Access.Subscripts.size(); i < Size; ++i) { - isl_pw_aff *Affine = - SCEVAffinator::getPwAff(Statement, Access.Subscripts[i]); + isl_pw_aff *Affine = Statement->getPwAff(Access.Subscripts[i]); if (Size == 1) { // For the non delinearized arrays, divide the access function of the last @@ -825,6 +888,24 @@ isl_map *ScopStmt::getScattering() const { return isl_map_copy(Scattering); } +__isl_give isl_pw_aff *ScopStmt::getPwAff(const SCEV *S) { + isl_pw_aff *Expr = SCEVAffinator::getPwAff(this, S, false); + isl_pw_aff *ModExpr = SCEVAffinator::getPwAff(this, S, true); + + isl_set *Domain = isl_set_reset_tuple_id(getDomain()); + isl_pw_aff *DomExpr = + isl_pw_aff_intersect_domain(isl_pw_aff_copy(Expr), isl_set_copy(Domain)); + isl_pw_aff *DomModExpr = isl_pw_aff_intersect_domain(ModExpr, Domain); + + isl_set *NonWrappingDom = isl_pw_aff_ne_set(DomExpr, DomModExpr); + NonWrappingDom = isl_set_params(NonWrappingDom); + NonWrappingDom = isl_set_complement(NonWrappingDom); + + getParent()->addAssumption(NonWrappingDom); + + return Expr; +} + void ScopStmt::restrictDomain(__isl_take isl_set *NewDomain) { assert(isl_set_is_subset(NewDomain, Domain) && "New domain is not a subset of old domain!"); Index: test/DependenceInfo/sequential_loops.ll =================================================================== --- test/DependenceInfo/sequential_loops.ll +++ test/DependenceInfo/sequential_loops.ll @@ -273,7 +273,7 @@ ; VALUE: RAW dependences: ; VALUE: [p] -> { ; VALUE: Stmt_S1[i0] -> Stmt_S2[-p + i0] : -; VALUE: p <= 190 and i0 >= p and i0 <= 9 + p and i0 >= 0 and i0 <= 99 +; VALUE: p <= 190 and p >= -2305843009213693952 and i0 >= p and i0 <= 9 + p and i0 <= 99 and i0 >= 0 ; VALUE: } ; VALUE: WAR dependences: ; VALUE: [p] -> { Index: test/Isl/Ast/aliasing_parametric_simple_2.ll =================================================================== --- test/Isl/Ast/aliasing_parametric_simple_2.ll +++ test/Isl/Ast/aliasing_parametric_simple_2.ll @@ -5,7 +5,7 @@ ; A[i] = B[c - 10] + B[5]; ; } ; -; CHECK: if (1 && (&MemRef_A[1024] <= &MemRef_B[c >= 15 ? 5 : c - 10] || &MemRef_B[c <= 15 ? 6 : c - 9] <= &MemRef_A[0])) +; CHECK: if (c >= -2147483638 && (&MemRef_A[1024] <= &MemRef_B[c >= 15 ? 5 : c - 10] || &MemRef_B[c <= 15 ? 6 : c - 9] <= &MemRef_A[0])) ; CHECK: for (int c0 = 0; c0 <= 1023; c0 += 1) ; CHECK: Stmt_for_body(c0); ; CHECK: else Index: test/Isl/Ast/simple-run-time-condition.ll =================================================================== --- test/Isl/Ast/simple-run-time-condition.ll +++ test/Isl/Ast/simple-run-time-condition.ll @@ -19,7 +19,7 @@ ; cause any code to be executed are not generated. ; CHECK: if ( -; CHECK: (o >= 1 && q <= 0 && m + q >= 0) +; CHECK: (o >= 1 && n + p <= 9223372036854775808 && q <= 0 && m + q >= 0) ; CHECK: || ; CHECK; (o <= 0 && m + q >= 100 && q <= 100) ; CHECK: ) Index: test/Isl/CodeGen/aliasing_parametric_simple_2.ll =================================================================== --- test/Isl/CodeGen/aliasing_parametric_simple_2.ll +++ test/Isl/CodeGen/aliasing_parametric_simple_2.ll @@ -22,7 +22,7 @@ ; CHECK: %[[AMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %A, i64 0 ; CHECK: %[[BltA:[._a-zA-Z0-9]*]] = icmp ule i32* %[[BMax]], %[[AMin]] ; CHECK: %[[NoAlias:[._a-zA-Z0-9]*]] = or i1 %[[AltB]], %[[BltA]] -; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 true, %[[NoAlias]] +; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 %{{[0-9]*}}, %[[NoAlias]] ; CHECK: br i1 %[[RTC]], label %polly.start, label %for.cond ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/pointer-type-expressions.ll =================================================================== --- test/Isl/CodeGen/pointer-type-expressions.ll +++ test/Isl/CodeGen/pointer-type-expressions.ll @@ -42,6 +42,6 @@ ; CHECK: Stmt_store(c0); ; CHECK: } -; CODEGEN: %0 = bitcast float* %P to i8* -; CODEGEN: %1 = icmp ule i8* %0, inttoptr (i64 -1 to i8*) +; CODEGEN: %[[BC:[0-9]*]] = bitcast float* %P to i8* +; CODEGEN: icmp ule i8* %[[BC]], inttoptr (i64 -1 to i8*) Index: test/ScopInfo/assume_gep_bounds.ll =================================================================== --- test/ScopInfo/assume_gep_bounds.ll +++ test/ScopInfo/assume_gep_bounds.ll @@ -20,6 +20,8 @@ ; CHECK-DAG: p <= 30 ; CHECK-DAG: and ; CHECK-DAG: m <= 20 +; CHECK-DAG: and +; CHECK-DAG: p <= 2305843009213694582 - 600n - 30m ; CHECK: } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ScopInfo/assume_gep_bounds_2.ll =================================================================== --- test/ScopInfo/assume_gep_bounds_2.ll +++ test/ScopInfo/assume_gep_bounds_2.ll @@ -17,9 +17,9 @@ ; CHECK: Assumed Context: ; CHECK-NEXT: [n, m, p] -> { : -; CHECK-DAG: (n >= 1 and m <= 20 and p <= 20) +; CHECK-DAG: (n >= 1 and m <= 2305843009213693972 - 20n and m <= 20 and p <= 20) ; CHECK-DAG: or -; CHECK-DAG: (n <= 0 and p <= 20) +; CHECK-DAG: (n <= 0 and p <= 2305843009213693972 - 20m and p <= 20 and p >= 1) ; CHECK: } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ScopInfo/loop_carry.ll =================================================================== --- test/ScopInfo/loop_carry.ll +++ test/ScopInfo/loop_carry.ll @@ -46,8 +46,6 @@ ret i64 0 } -; CHECK: Context: -; CHECK: [n] -> { : } ; CHECK: Statements { ; CHECK: Stmt_bb_nph ; CHECK: Domain := Index: test/ScopInfo/multidim_2d_outer_parametric_offset.ll =================================================================== --- test/ScopInfo/multidim_2d_outer_parametric_offset.ll +++ test/ScopInfo/multidim_2d_outer_parametric_offset.ll @@ -11,7 +11,7 @@ ; } ; CHECK: Assumed Context: -; CHECK: [m, p] -> { : } +; CHECK: [m, p] -> { : p <= 9223372036854775708 } ; CHECK: p0: %m ; CHECK: p1: %p ; CHECK: Statements { Index: test/ScopInfo/multidim_ivs_and_parameteric_offsets_3d.ll =================================================================== --- test/ScopInfo/multidim_ivs_and_parameteric_offsets_3d.ll +++ test/ScopInfo/multidim_ivs_and_parameteric_offsets_3d.ll @@ -15,7 +15,13 @@ ; (8 * %o)}<%for.j>,+,8}<%for.k> ; CHECK: Assumed Context: -; CHECK: [n, m, o, p, q, r] -> { : (q <= 0 and q >= 1 - m and r <= -1 and r >= 1 - o) or (r = 0 and q <= 0 and q >= -m) or (r = -o and q <= 1 and q >= 1 - m) } +; CHECK: [n, m, o, p, q, r] -> { : +; CHECK-DAG: (p <= 9223372036854775808 - n and q <= 0 and q >= 1 - m and r <= -1 and r >= 1 - o) +; CHECK-DAG: or +; CHECK-DAG: (r = 0 and p <= 9223372036854775808 - n and q <= 0 and q >= -m) +; CHECK-DAG: or +; CHECK-DAG: (r = -o and p <= 9223372036854775808 - n and q <= 1 and q >= 1 - m) +; CHECK: } ; ; CHECK: p0: %n ; CHECK: p1: %m Index: test/ScopInfo/pointer-type-expressions.ll =================================================================== --- test/ScopInfo/pointer-type-expressions.ll +++ test/ScopInfo/pointer-type-expressions.ll @@ -20,7 +20,7 @@ br i1 %brcond, label %store, label %bb.backedge store: - %scevgep = getelementptr i64, i64* %a, i64 %i + %scevgep = getelementptr inbounds i64, i64* %a, i64 %i store i64 %i, i64* %scevgep br label %bb.backedge Index: test/ScopInfo/ranged_parameter.ll =================================================================== --- test/ScopInfo/ranged_parameter.ll +++ test/ScopInfo/ranged_parameter.ll @@ -4,7 +4,11 @@ ; range metadata (see bottom of the file) are present: ; ; CHECK: Context: -; CHECK: [p_0] -> { : p_0 >= 0 and p_0 <= 255 } +; CHECK: [p_0] -> { : +; CHECK-DAG: p_0 >= 0 +; CHECK-DAG: and +; CHECK-DAG: p_0 <= 255 +; CHECK: } ; ; void jd(int *A, int *p /* in [0,256) */) { ; for (int i = 0; i < 1024; i++) Index: test/ScopInfo/simple_loop_1.ll =================================================================== --- test/ScopInfo/simple_loop_1.ll +++ test/ScopInfo/simple_loop_1.ll @@ -15,7 +15,7 @@ bb: ; preds = %bb, %entry %i = phi i64 [ 0, %entry ], [ %i.inc, %bb ] - %scevgep = getelementptr i64, i64* %a, i64 %i + %scevgep = getelementptr inbounds i64, i64* %a, i64 %i store i64 %i, i64* %scevgep %i.inc = add nsw i64 %i, 1 %exitcond = icmp eq i64 %i.inc, %N Index: test/ScopInfo/unsigned-condition.ll =================================================================== --- test/ScopInfo/unsigned-condition.ll +++ test/ScopInfo/unsigned-condition.ll @@ -20,7 +20,7 @@ br i1 %brcond, label %store, label %bb.backedge store: - %scevgep = getelementptr i64, i64* %a, i64 %i + %scevgep = getelementptr inbounds i64, i64* %a, i64 %i store i64 %i, i64* %scevgep br label %bb.backedge Index: test/ScopInfo/wraping_signed_expr_0.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_0.ll @@ -0,0 +1,71 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(int *A, char N, char p) { +; for (char i = 0; i < N; i++) { +; A[i + 3] = 0; +; } +; } +; +; The wrap function has no inbounds GEP but the nowrap function has. Therefore, +; we will add the assumption that i+1 won't overflow only to the former. +; +; CHECK: Function: wrap +; CHECK: Assumed Context: +; CHECK: [N] -> { : N <= 125 } +; +; +; FIXME: This is a negative test as nowrap should not need an assumed context. +; However %tmp5 in @nowrap is translated to the SCEV <3,+,1><%bb2> +; which lacks the flags we would need to avoid runtime checks. +; +; CHECK: Function: nowrap +; CHECK: Assumed Context: +; CHECK-NOT: [N] -> { : } +; +target datalayout = "e-m:e-i8:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i32* %A, i8 %N, i8 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i8 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i8 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add i8 %indvars.iv, 3 + %tmp6 = getelementptr i32, i32* %A, i8 %tmp5 + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nsw nuw i8 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} + +define void @nowrap(i32* %A, i8 %N, i8 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i8 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i8 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add nsw nuw i8 %indvars.iv, 3 + %tmp6 = getelementptr inbounds i32, i32* %A, i8 %tmp5 + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nsw nuw i8 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_1.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_1.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(long *A, long N, long p) { +; for (long i = 0; i < N; i++) +; A[i + 1] = 0; +; } +; +; The wrap function has no inbounds GEP but the nowrap function has. Therefore, +; we will add the assumption that i+1 won't overflow only to the former. +; +; Note: +; 1152921504606846975 * sizeof(long) <= 2 ^ 63 - 1 +; and +; 1152921504606846976 * sizeof(long) > 2 ^ 63 - 1 +; with +; sizeof(long) == 8 +; +; CHECK: Function: wrap +; CHECK: Assumed Context: +; CHECK: [N] -> { : N <= 1152921504606846975 } +; +; CHECK: Function: nowrap +; CHECK: Assumed Context: +; CHECK: [N] -> { : } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i64* %A, i64 %N, i64 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i64 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add nsw nuw i64 %indvars.iv, 1 + %tmp6 = getelementptr i64, i64* %A, i64 %tmp5 + store i64 0, i64* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nsw nuw i64 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} + +define void @nowrap(i64* %A, i64 %N, i64 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i64 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add nsw nuw i64 %indvars.iv, 1 + %tmp6 = getelementptr inbounds i64, i64* %A, i64 %tmp5 + store i64 0, i64* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nsw nuw i64 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_2.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_2.ll @@ -0,0 +1,42 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(int *A, int N, int p) { +; for (int i = 0; i < N; i++) +; A[i + 30] = 0; +; } +; +; The wrap function has no inbounds GEP but the nowrap function has. Therefore, +; we will add the assumption that i+1 won't overflow only to the former. +; +; Note: 2147483618 + 30 == 2 ^ 31 +; +; CHECK: Function: wrap +; CHECK: Context: +; CHECK: [N] -> { : N <= 2147483647 and N >= -2147483648 } +; CHECK: Assumed Context: +; CHECK: [N] -> { : N <= 2147483618 } +; +target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i32* %A, i32 %N, i32 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i32 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i32 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add i32 %indvars.iv, 30 + %tmp6 = getelementptr i32, i32* %A, i32 %tmp5 + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_3.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_3.ll @@ -0,0 +1,37 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(int *A, int N, int p) { +; for (int i = 0; i < N; i++) +; A[i + p] = 0; +; } +; +; Note: 2147483648 == 2 ^ 31 +; +; CHECK: Function: wrap +; CHECK: Assumed Context: +; CHECK: [N, p] -> { : p <= 2147483648 - N } +; +target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i32* %A, i32 %N, i32 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i32 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i32 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add i32 %indvars.iv, %p + %tmp6 = getelementptr inbounds i32, i32* %A, i32 %tmp5 + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_4.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_4.ll @@ -0,0 +1,37 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(char *A, char N, char p) { +; for (char i = 0; i < N; i++) +; A[p-1] = 0; +; } +; +; CHECK: Function: wrap +; CHECK: Context: +; CHECK: [N, p] -> { : N <= 127 and N >= -128 and p <= 127 and p >= -128 } +; CHECK: Assumed Context: +; CHECK: [N, p] -> { : p >= -127 } +; +target datalayout = "e-m:e-i8:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i8* %A, i8 %N, i8 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i8 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i8 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add i8 %p, -1 + %tmp6 = getelementptr i8, i8* %A, i8 %tmp5 + store i8 0, i8* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_5.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_5.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; We should not generate runtime check for ((int)r1 + (int)r2) as it is known not +; to overflow. However (p + q) can, thus checks are needed. +; +; CHECK: Assumed Context: +; CHECK: [r1, r2, q, p] -> { : p <= 2147483647 - q and p >= -2147483648 - q } +; +; void wraps(int *A, int p, short q, char r1, char r2) { +; for (char i = r1; i < r2; i++) +; A[p + q] = A[(int)r1 + (int)r2]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @wraps(i32* %A, i32 %p, i16 signext %q, i8 signext %r1, i8 signext %r2) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i8 [ %r1, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i8 %i.0, %r2 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %conv3 = sext i8 %r1 to i64 + %conv4 = sext i8 %r2 to i64 + %add = add nsw i64 %conv3, %conv4 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %add + %tmp = load i32, i32* %arrayidx, align 4 + %conv5 = sext i16 %q to i32 + %add6 = add nsw i32 %conv5, %p + %idxprom7 = sext i32 %add6 to i64 + %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %idxprom7 + store i32 %tmp, i32* %arrayidx8, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add i8 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/wraping_signed_expr_slow_1.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_slow_1.ll @@ -0,0 +1,81 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; This checks that the no-wraps checks will be computed fast as some example +; already showed huge slowdowns even though the inbounds and nsw flags were +; all in place. +; +; // Inspired by itrans8x8 in transform8x8.c from the ldecode benchmark. +; void fast(char *A, char N, char M) { +; for (char i = 0; i < 8; i++) { +; short index0 = (short)(i + N); +; #ifdef fast +; short index1 = (index0 * 1) + (short)M; +; #else +; short index1 = (index0 * 16) + (short)M; +; #endif +; A[index1]++; +; } +; } +; +; CHECK: Function: fast +; CHECK: Function: slow +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @fast(i8* %A, i8 %N, i8 %M) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i8 %indvars.iv, 8 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp3 = add nsw i8 %indvars.iv, %N + %tmp3ext = sext i8 %tmp3 to i16 + ;%mul = mul nsw i16 %tmp3ext, 16 + %Mext = sext i8 %M to i16 + %add2 = add nsw i16 %tmp3ext, %Mext + %arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2 + %tmp4 = load i8, i8* %arrayidx, align 4 + %inc = add nsw i8 %tmp4, 1 + store i8 %inc, i8* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +define void @slow(i8* %A, i8 %N, i8 %M) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i8 %indvars.iv, 8 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp3 = add nsw i8 %indvars.iv, %N + %tmp3ext = sext i8 %tmp3 to i16 + %mul = mul nsw i16 %tmp3ext, 16 + %Mext = sext i8 %M to i16 + %add2 = add nsw i16 %mul, %Mext + %arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2 + %tmp4 = load i8, i8* %arrayidx, align 4 + %inc = add nsw i8 %tmp4, 1 + store i8 %inc, i8* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/wraping_signed_expr_slow_2.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_slow_2.ll @@ -0,0 +1,86 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; This checks that the no-wraps checks will be computed fast as some example +; already showed huge slowdowns even though the inbounds and nsw flags were +; all in place. +; +; // Inspired by itrans8x8 in transform8x8.c from the ldecode benchmark. +; void fast(char *A, char N, char M) { +; for (char i = 0; i < 8; i++) { +; char index0 = i + N; +; char index1 = index0 * 16; +; char index2 = index1 + M; +; A[(short)index2]++; +; } +; } +; +; void slow(char *A, char N, char M) { +; for (char i = 0; i < 8; i++) { +; char index0 = i + N; +; char index1 = index0 * 16; +; short index2 = ((short)index1) + ((short)M); +; A[index2]++; +; } +; } +; +; CHECK: Function: fast +; CHECK: Function: slow +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @fast(i8* %A, i8 %N, i8 %M) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i8 %indvars.iv, 8 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp3 = add nsw i8 %indvars.iv, %N + %mul = mul nsw i8 %tmp3, 16 + %add2 = add nsw i8 %mul, %M + %add2ext = sext i8 %add2 to i16 + %arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2ext + %tmp4 = load i8, i8* %arrayidx, align 4 + %inc = add nsw i8 %tmp4, 1 + store i8 %inc, i8* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +define void @slow(i8* %A, i8 %N, i8 %M) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i8 %indvars.iv, 8 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp3 = add nsw i8 %indvars.iv, %N + %mul = mul nsw i8 %tmp3, 16 + %mulext = sext i8 %mul to i16 + %Mext = sext i8 %M to i16 + %add2 = add nsw i16 %mulext, %Mext + %arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2 + %tmp4 = load i8, i8* %arrayidx, align 4 + %inc = add nsw i8 %tmp4, 1 + store i8 %inc, i8* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +}